splitting an hash and writing it to csv file in python - python

I am trying to split an hash and writing it to different csv files but it throws error
def split(self,filehandler, delimiter=',', row_limit=1000,
output_name_template='output_%s.csv', output_path='.', keep_headers=True):
#reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=',')
current_limit = row_limit
if keep_headers:
headers = filehandler.next()
current_out_writer.writerow(headers)
for i, row in enumerate(filehandler):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=',')
if keep_headers:
current_out_writer.writerow(self.headers)
current_out_writer.writerow(row)
split(self.merged_data)
first error
headers = filehandler.next()
AttributeError: 'dict' object has no attribute 'next'
Second error
for i, row in enumerate(filehandler):
TypeError: 'Merge_project' object is not iterable
how to fix the error and split the hash and write out to different csv file?

Related

Issue with Request--downloading an empty image

My request are downloading the images, but not properly. The images are empty. I know I am close to getting it correct, but I am not sure what I am missing--any help would be greatly appreciated!!!
fname ='testData.csv'
s = "dos2unix {}".format(fname)
inner_quote_grabber = re.compile(r'("[^"]*")', flags=re.M)
parenth_grabber = re.compile(r'\(([^)]*)\)', flags=re.M)
new_rows = []
matter = [0, 3,4]
file_counter = 0
file_prefix = 'images/'
file_out = 'outfile.csv'
with open(fname, 'r') as c:
reader = csv.reader(c, delimiter=',')
for row in reader:
t_row = []
#print( len(row), row)
for i in matter:
t_row.append(row[i])
last_row = []
print(row[7])
for group in parenth_grabber.findall(row[7]):
print('grabbing ', group)
file_counter += 1
click_clack = file_prefix + 'file_' + str(file_counter) +'.jpg'
print('group:', group)
req = requests.get(group)
print('status_code', req.status_code)
last_row.append(click_clack)
with open(click_clack, 'wb') as f:
req.raw.decode_content = True
shutil.copyfileobj(req.raw, f)
#f.write(text)
file_counter += 1
t_row.append(', '.join(last_row))
with open(file_out, 'a') as f:
writer = csv.writer(f)
writer.writerow(t_row)

python csv splitter not functioning

I am trying to make a .csv splitter.
I should take x number og lines and make new csv files with that number of lines in them from the original csv file.
import os
import csv
fileDir = ('C:\\somedir\\')
fName = input('Write the file name here, without extention: ')
lineNo = int(input('Number of lines pr csv file: '))
fNameF = (fName + '.csv')
filehandler = (fileDir + fNameF)
def split(filehandler, delimiter=',', row_limit=lineNo,
output_name_template='fName_%s.csv', output_path=fileDir, keep_headers=True):
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
It runs the script, but nothing happens, could someone help me?
I think you have not called the function split() at all. And you have also passed all the values in function as parameters. Check if you get output with the following code.
import os
import csv
fileDir = ('C:\\somedir\\')
fName = input('Write the file name here, without extention: ')
lineNo = int(input('Number of lines pr csv file: '))
fNameF = (fName + '.csv')
filehandler = (fileDir + fNameF)
def split(filehandler, delimiter, row_limit,
output_name_template, output_path, keep_headers):
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'))
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
split(filehandler,',', lineNo,'fName_%s.csv', fileDir, True)

Convert CSV to excel using xlsxwriter and save it to django Model

Now I don't have any problems about converting this csv or downloading it I have a problem saving it to django model at filefield
The minmized sample code:
def download_convert_reports_s3_temp():
def get_report_url():
bucket_name = 'temp_bucket'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY)
bucket = conn.get_bucket(bucket_name)
key = bucket.get_key('TEMP_2017-01-10.csv')
return key.generate_url(expires_in=600)
def get_doc():
return Doc.objects.get(owner=User.objects.first())
def get_file(file):
file_temp = NamedTemporaryFile(delete=True)
file_temp.write(file.content)
file_temp.flush()
return File(file_temp)
def convert_csv_to_xlsx():
request = requests.get(get_report_url())
csvfile = get_file(request)
from django.conf import settings
excelFile = xlsxwriter.Workbook('report.xlsx', {
'strings_to_numbers': True,
'default_date_format': 'yy/mm/dd',
'tmpdir': settings.MEDIA_ROOT +
'/documents/%s'.format(file.name.rsplit('.')[0] + '.xlsx')
}
excelFile = get_doc().file
worksheet = excelFile.add_worksheet()
worksheet.write('A1', 'data')
worksheet.write('B1', 'data')
worksheet.write('C1', 'data')
worksheet.write('D1', 'data')
worksheet.write('E1', 'data')
# Start from the first cell. Rows and columns are zero indexed.
row = 1
col = 0
with open(csvfile, 'rb') as f:
content = csv.reader(f)
# Iterate over the data and write it out row by row.
for row_data in content:
for data in row_data:
worksheet.write(row, col, data)
col += 1
row += 1
col = 0
f.close()
excelFile.close()
return convert_csv_to_xlsx()
Now the problem that I really don't know to to save this excel file to the doc.file,
and I tried django fieldfile save
---> 19 read = property(lambda self: self.file.
AttributeError: 'Workbook' object has no attribute 'read'
Any suggestion Thanks
def convert_csv_to_xlsx():
csvfile = get_file()
from django.conf import settings
excelFile = xlsxwriter.Workbook(filename=settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx'))
bold = excelFile.add_format({'bold': 1, 'align': 'left', 'bg_color': 'red', 'color': 'white'})
worksheet = excelFile.add_worksheet()
worksheet.set_column(0, 4, width=15)
worksheet.write('A1', 'Sender MSISDN', bold)
worksheet.write('B1', 'Reciever MSISDN', bold)
worksheet.write('C1', 'Amount', bold)
worksheet.write('D1', 'Transaction ID', bold)
worksheet.write('E1', 'Datetime', bold)
# Start from the first cell. Rows and columns are zero indexed.
row = 1
col = 0
# Iterate over the data and write it out row by row.
for row_data in csv.reader(csvfile):
for idx, data in enumerate(row_data):
if idx == 0:
worksheet.write(row, col, data)
elif idx == 1:
worksheet.write(row, col, data)
elif idx == 2:
worksheet.write(row, col, data)
elif idx == 3:
worksheet.write(row, col, data)
elif idx == 4:
worksheet.write(row, col, data)
col += 1
row += 1
col = 0
csvfile.close()
doc = get_doc()
now = datetime.now()
excelFile.close()
doc.file.save(
name='RECHARGE_%d-%s-%s.xlsx' % (now.year,
validate_date(now.month),
validate_date(now.day)
),
content=File(open(settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx',)))
)
os.remove(settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx'), )

Python: numpy, how to len of csv

I have 2 files:
res_my_inp.csv : http://ge.tt/8Fa4d512/v/0?c
res_my_out.csv : http://ge.tt/8Fa4d512/v/1?c
I want to get matrix from res_my_inp.csv and array from res_my_out.csv (get only 1 column)
My code:
import csv
in_filepath = os.path.abspath( "res_my_inp.csv" )
out_filepath = os.path.abspath( "res_my_out.csv" )
def ReadCsv( fileName, mode ):
in_file = open( fileName )
reader = csv.reader( in_file, delimiter = '\t', quotechar = '"' )
if ( mode == 'input' ):
data = [[]]
for row in reader:
data.append( row )
elif( mode == 'output' ):
column = 1
data = []
for row in reader:
data.append( int( row[column] ) )
data.pop(0)
return data
X = np.asarray( ReadCsv( in_filepath, 'input' ), 'int' )
Y = np.asarray( ReadCsv( out_filepath, 'output' ), 'int' )
print(len(X))
print(len(Y))
Why len(X) = 100, but len(Y) = 99 ?
The length discrepancy is because this line:
data = [[]]
is different from this line:
data = []
In the 'input' branch you've got an empty list at the start of your data list that isn't present in the 'output' branch.

Comparing two CSV files and print the difference

I'm trying to compare CSV files, I have 2 CSV Files - CSVFileOne, CSVFileTwo.
my desired output is printing CSVFileOne but only the rows that not existing in CSVFileTwo.
My code:
input_file = CSVFileOne
ABGSOne = []
with open(input_file, encoding='UTF-8') as fone:
rowsOne = csv.reader(fone,delimiter=",",lineterminator="\n")
next(rowsOne, None)
for rowOne in rowsOne:
abbgone = {}
abbgone['astringOne'] = row[0]
abbgone['bstringOne'] = row[1]
abbgone['cstringOne'] = row[2]
abbgone['dstringOne'] = row[3]
ABGSOne.append(abbgone)
input_fileTwo = CSVFileTwo
ABGSTwo = []
with open(input_fileTwo, encoding='UTF-8') as ftwo:
rowsTwo = csv.reader(ftwo,delimiter=",",lineterminator="\n")
next(rowsTwo, None)
for rowTwo in rowsTwo:
abbgtwo = {}
abbgtwo['astringTwo'] = row[0]
abbgtwo['bstringTwo'] = row[1]
abbgtwo['cstringTwo'] = row[2]
abbgtwo['dstringTwo'] = row[3]
ABGSOne.append(abbgTwo)
for abbgone in ABGSOne:
if abbgone['bstringOne'] == abbgtwo['bstringTwo']:
print('abbgone['bstringOne']
try this out .
with open('CSVFileOne.csv', 'r') as t1, open('CSVFileTwo.csv', 'r') as t2:
fileone = t1.readlines()
filetwo = t2.readlines()
with open('Desired.csv', 'w') as outFile:
for line in filetwo:
if line not in fileone:
outFile.write(line)

Categories