Python csv reader for row in reader gives syntax error - python

New to Django/Python. I need to write an import script for a CSV file to seed some data (not using fixtures, did that already as that is JSON based and not CSV).
This works:
import csv
from datetime import datetime
from django.utils.timezone import make_aware
from django.core.management.base import BaseCommand
from chatterbox.models import Organisation, Course, Student
class Command(BaseCommand):
def handle(self, **options):
CSV_PATH = './students_AEKI.csv'
Student.objects.filter(organisation__name__exact="AEKI").delete()
with open(CSV_PATH) as file:
file.readline() # skip the header
csv_reader = csv.reader(file, delimiter=',')
org = Organisation.objects.filter(name="AEKI")
for row in csv_reader:
_, Student.objects.get_or_create(
first_name=row[0],
last_name=row[1],
email=row[2],
organisation=org[0],
enrolled=row[4],
last_booking=row[5],
credits_total=row[6],
credits_balance=row[7],
)
This does NOT work:
import csv
from datetime import datetime
from django.utils.timezone import make_aware
from django.core.management.base import BaseCommand
from chatterbox.models import Organisation, Course, Student
class Command(BaseCommand):
def handle(self, **options):
CSV_PATH = './students_AEKI.csv'
Student.objects.filter(organisation__name__exact="AEKI").delete()
with open(CSV_PATH) as file:
file.readline() # skip the header
csv_reader = csv.reader(file, delimiter=',')
org = Organisation.objects.filter(name="AEKI")
for row in csv_reader:
enrolled_utc = make_aware(datetime.strptime(row[4], '%Y-%m-%d'))
last_booking_utc = make_aware(datetime.strptime((row[5], '%Y-%m-%d'))
_, Student.objects.get_or_create(
first_name=row[0],
last_name=row[1],
email=row[2],
organisation=org[0],
enrolled=enrolled_utc,
last_booking=last_booking_utc,
credits_total=row[6],
credits_balance=row[7],
)
Syntax error at the "_".
I need to do some manipulation (eg like adding timezone to date fields) on data before creating it in the table. So what is wrong with the 2nd version?

There's a Syntax error at the "_". Remove the trailing characters.
Also this line has an extra bracket:
last_booking_utc = datetime.strptime((row[5], '%Y-%m-%d')
From
for row in csv_reader:
enrolled_utc = make_aware(datetime.strptime(row[4], '%Y-%m-%d'))
last_booking_utc = make_aware(datetime.strptime((row[5], '%Y-%m-%d'))
_, Student.objects.get_or_create(
first_name=row[0],
last_name=row[1],
email=row[2],
organisation=org[0],
enrolled=enrolled_utc,
last_booking=last_booking_utc,
credits_total=row[6],
credits_balance=row[7],
)
To
for row in csv_reader:
enrolled_utc = make_aware(datetime.strptime(row[4], '%Y-%m-%d'))
last_booking_utc = make_aware(datetime.strptime(row[5], '%Y-%m-%d'))
Student.objects.get_or_create(
first_name=row[0],
last_name=row[1],
email=row[2],
organisation=org[0],
enrolled=enrolled_utc,
last_booking=last_booking_utc,
credits_total=row[6],
credits_balance=row[7],
)

Related

How to build specific format with open()?

Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this

How to delete Row while printing into excel format

I have simple code written in python. while writing into an excel file. I found additional rows get added each time. How can I skip the empty row added each time. and print data one after the other in an excel file
import csv
from datetime import datetime
import time
filename = 'testformat.csv'
fields = ['SLNo', 'Date', 'Time', 'RData', 'BData', 'GData', 'IRData']
date_format = datetime.now().strftime('%Y/%m/%d')
current_time = datetime.now().strftime('%I:%M:%S,%f')
def main():
with open(filename, 'w') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
for i in range(30):
csvwriter.writerow([i, date_format, current_time])
if __name__ == '__main__':
main()
What you need is already here : https://stackoverflow.com/a/3191811/18081892
You have to use :
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:

Using with open from other file

I have a Python file were I want to add functions that I don't want to have in my main code like open csv files which is this case. The problem is when I want to invoke the function where I have the method "open with" the error:
"ValueError: I/O operation on closed file."
appears and for what I found this happens because indentation.
main.py:
from external_tools import read_csv
import unittest
class ClassTest(unittest.TestCase):
def test_a(self):
csv_file = "file.csv"
csv_reader = read_csv(csv_file)
for line in csv_reader:
print(line[0])
if __name__ == '__main__':
unittest.main()
external_tools.py
import datetime
import csv
def epoch_converter():
epoch_time = 40246871
date_time = datetime.datetime.fromtimestamp(epoch_time)
print("Given epoch time: ", epoch_time)
print("Converted Datetime: ", date_time)
def read_csv(csv_file):
with open(csv_file, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
print(csv_reader)
return csv_reader
Is there any way to do it in this way or do I have to use open with in my main?
Here's how to do what I suggested in my comment, which was to turn read_csv into a context manager itself using contextlib.contextmanager as a function decorator.
main.py
from external_tools import read_csv
import unittest
class ClassTest(unittest.TestCase):
def test_a(self):
csv_file = "testfile.csv"
with read_csv(csv_file) as csv_reader:
for row in csv_reader:
print(row)
if __name__ == '__main__':
unittest.main()
external_tools.py
import datetime
from contextlib import contextmanager
import csv
def epoch_converter():
epoch_time = 40246871
date_time = datetime.datetime.fromtimestamp(epoch_time)
print("Given epoch time: ", epoch_time)
print("Converted Datetime: ", date_time)
#contextmanager
def read_csv(csv_file):
with open(csv_file, 'r', newline='') as csv_file:
csv_reader = csv.reader(csv_file)
yield csv_reader
print('csv file closed')

How to write csv file in html?

I have read file of csv but I have a problem that how to read CSV file and save it in table.html?
import csv
html_about = ''
names = []
with open('filo.csv') as data_file:
csv_data = csv.reader(data_file)
for line in csv_data:
names.append(f'{line[0]}')
html_output = '\n<ul>'
for name in names:
html_output += f'\n\t<li>{name}</li>'
html_output += '\n</ul>'
from prettytable import PrettyTable
x = PrettyTable(line[0])
html_code = x.get_html_string()
html_file = open('table.html','w')
html_file = html_file.write(html_code)
I suggest you use pandas library,
it has pd.read_csv, and also pd.to_html
usage should look like this, let me know if this works for you:
import pandas as pd
df = pd.read_csv('filo.csv')
with open('table.html', 'w') as html_file:
df.to_html(html_file)

How can I convert a string type column in a class to datetime in parse.com?

I have a column filled with dates. I imported all of the data using the csv importer. (parse.com)
I have tried reformatting the dates to match the formatting used in parse's other datetime fields. Here's the python I used to reformat the initial dates. It runs in a folder which contains my monthly logs as csv files. The date for changing is in the fifth column (ie - row[4]).
import os
import csv
import datetime
import time
from os import listdir
mypath = os.getcwd()
def find_csv_filenames( path_to_dir, suffix=".csv" ):
filenames = listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
for each in find_csv_filenames(mypath):
with open(each,'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
next(spamreader, None) # skip the headers
for row in spamreader:
# twilio formatting "yyyy-MM-dd hh:mm:ss PDT" / or sometimes PST
date_object = datetime.datetime.strptime(row[4][:-4], "%Y-%m-%d %H:%M:%S")
# parse formatting"yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS'Z'"
row[4] = date_object.strftime("%Y-%m-%dT%H:%M:%S.1%MZ")
row[2] = ''
print ', '.join(row)

Categories