Split CSV file using Python shows not all data in Excel - python

I am trying to dump the values in my Django database to a csv, then write the contents of the csv to an Excel spreadsheet which looks like a table (one value per cell), so that my users can export a spreadsheet of all records in the database from Django admin. Right now when I export the file, I get this (only one random value out of many and not formatted correctly):
What am I doing wrong? Not sure if I am using list comprehensions wrong, reading the file incorrectly, or if there is something wrong with my for loop. Please help!
def dump_table_to_csv(db_table, io):
with connection.cursor() as cursor:
cursor.execute("SELECT * FROM %s" % db_table, [])
row = cursor.fetchall()
writer = csv.writer(io)
writer.writerow([i[0] for i in cursor.description])
writer.writerow(row)
with open('/Users/nicoletorek/emarshal/myfile.csv', 'w') as f:
dump_table_to_csv(Attorney._meta.db_table, f)
with open('/Users/nicoletorek/emarshal/myfile.csv', 'r') as f:
db_list = f.read()
split_db_list = db_list.split(',')
output = BytesIO()
workbook = xlsxwriter.Workbook(output)
worksheet_s = workbook.add_worksheet("Summary")
header = workbook.add_format({
'bg_color': '#F7F7F7',
'color': 'black',
'align': 'center',
'valign': 'top',
'border': 1
})
row = 0
col = 0
for x in split_db_list:
worksheet_s.write(row + 1, col + 1, x, header)

The immediate problem with your sample code, as Jean-Francois points out, is that you aren't incrementing your counters in the loop. Also you may also find it more readable to use xlsxwriter.write_row() instead of xlsxwriter.write(). At the moment a secondary complication is you aren't preserving row information when you read in your data from the CSV.
If your data looks like this:
row_data = [[r1c1, r1c2], [r2c1, r2c2], ... ]
You can then use:
for index, row in enumerate(row_data):
worksheet_s.write_row(index, 0, row)
That said, I assume you are interested in the .xlsx because you want control over formatting. If the goal is to just to generate the .xlsx and there is no need for the intermediate .csv, why not just create the .xlsx file directly? This can be accomplished nicely in a view:
import io
from django.http import HttpResponse
def dump_attorneys_to_xlsx(request):
output = io.BytesIO()
workbook = xlsxwriter.Workbook(output, {'in_memory': True})
worksheet = workbook.add_worksheet('Summary')
attorneys = Attorney.objects.all().values()
# Write header
worksheet.write_row(0, 0, attorneys[0].keys())
# Write data
for row_index, row_dict in enumerate(attorneys, start=1):
worksheet.write_row(row_index, 0, row_dict.values())
workbook.close()
output.seek(0)
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
response['Content-Disposition'] = 'attachment; filename=summary.xlsx'
return response

Your CSV file could be read in and written as follows:
import csv
workbook = xlsxwriter.Workbook('output.xlsx')
worksheet_s = workbook.add_worksheet("Summary")
with open(r'\Users\nicoletorek\emarshal\myfile.csv', 'rb') as f_input:
csv_input = csv.reader(f_input)
for row_index, row_data in enumerate(csv_input):
worksheet_s.write_row(row_index, 0, row_data)
workbook.close()
This uses the csv library to ensure the rows are correctly read in, and the write_row function to allow the whole row to be written using a single call. The enumerate() function is used to provide a running row_index value.

Related

How do I automate the repetitive action of applying filters in CSV in Python?

In a nutshell what I want is to be able to:
select a file thats csv,or anything excel
From that file
apply a filter on the the selections needed
save that applied filter view to a new file xls that just adds "for review" to the file name to the current folder
`
# Import the necessary libraries
import openpyxl
import os
import time
import csv
# Define the path to the CSV file
csv_file = "C:/Users/USERNAME//Desktop/filename.csv"
# Open the CSV file and create a reader object
with open(csv_file, "r") as f:
reader = csv.reader(f)
# Iterate over the rows in the reader
rows = []
for row in reader:
rows.append(row)
# Open the Excel document and create a new worksheet
wb = openpyxl.Workbook()
ws = wb.active
# Write the rows from the CSV file to the Excel worksheet
for row in rows:
ws.append(row)
# Apply filters to the top row
ws.auto_filter.ref = "A1:Z1"
# Filter column A by "Network Upload Egress" and "Removable Media Egress"
ws.auto_filter.add_filter_column(0, \["Network Upload Egress", "Removable Media Egress"\])
# Save a copy of the Excel document with "Evidence Review" added to the file name
new_file = os.path.splitext(excel_file)\[0\] + " Evidence Review.xlsx"
# Display a loading animation while the process is running
print("Processing...")
for i in range(10):
time.sleep(0.5)
print(".", end="")
# Save the copy of the Excel document
try:
wb.save(new_file)
print("\\nProcess complete!")
except IOError:
print("Error saving the copy of the Excel document. Make sure you have permission to save files to the specified location.")`
try the below code.
import csv
def filter_csv(input_file, output_file, filter_column, filter_value):
with open(input_file, 'r') as in_file, open(output_file, 'w', newline='') as out_file:
# Create CSV reader and writer
reader = csv.reader(in_file)
writer = csv.writer(out_file)
# Find the index of the filter column
header = next(reader)
filter_column_index = header.index(filter_column)
# Write the header row to the output file
writer.writerow(header)
# Iterate through the rows in the input file
for row in reader:
# If the filter value is in the filter column, write the row to the output file
if row[filter_column_index] == filter_value:
writer.writerow(row)
Example usage
filter_csv('input.csv', 'output.csv', 'State', 'CA')

How can I convert Cell of Openpyxl from Text to Number format?

I wrote a code to convert a text file into excel file using Openpyxl extension of Python.
Although the value are setting properly into the column but they are showing as a text instead of number. Although I tried to convert, seems like it is not working.
Can anyone please correct the code?
import csv
import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
ws.append(row)
for row in range(2, ws.max_row+1):
ws["{}{}".format("A", row)].number_format = 'General'
ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Here is the output excel file
the read data from txt file will be in string. So, as suggested by jezza, you need to convert list to float. You don't need the 'number_format` lines you have. Updated code is here. Note that the conversion map assumes all data can be converted to float (no text). The try/catch will basically skip the row if there is text on any row
import csv
#import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
#ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
try:
row = list(map(float, row))
ws.append(row)
except:
print("Skipping row ", row)
pass
#for row in range(2, ws.max_row+1):
# ws["{}{}".format("A", row)].number_format = 'General'
# ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Output

Export Oracle database column headers with column names into csv

I am making a program that fetches column names and dumps the data into csv format.
Now everything is working just fine and data is being dumped into csv, the problem is,
I am not able to fetch headers into csv. If I open the exported csv file into excel, only data shows up not the column headers. How do I do that?
Here's my code:
import cx_Oracle
import csv
dsn_tns = cx_Oracle.makedsn(--Details--)
conn = cx_Oracle.connect(--Details--)
d = conn.cursor()
csv_file = open("profile.csv", "w")
writer = csv.writer(csv_file, delimiter=',', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
d.execute("""
select * from all_tab_columns where OWNER = 'ABBAS'
""")
tables_tu = d.fetchall()
for row in tables_tu:
writer.writerow(row)
conn.close()
csv_file.close()
What code do I use to export headers too in csv?
Place this just above your for loop:
writer.writerow(i[0] for i in d.description)
Because d.description is a read-only attribute containing 7-tuples that look like:
(name,
type_code,
display_size,
internal_size,
precision,
scale,
null_ok)

Add data to new column and first row in CSV file

I have a python function that creates a CSV file using a Postgresql copy statement. I need to add a new column to this spreadsheet called 'UAL' with an example value in the first row of say 30,000, but without editing the copy statement. This is the current code:
copy_sql = 'COPY (
SELECT
e.name AS "Employee Name",
e.title AS "Job Title"
e.gross AS "Total Pay",
e.total AS "Total Pay & Benefits",
e.year AS "Year",
e.notes AS "Notes",
j.name AS "Agency",
e.status AS "Status"
FROM employee_employee e
INNER JOIN jurisdiction_jurisdiction j on e.jurisdiction_id = j.id
WHERE
e.year = 2011 AND
j.id = 4479
ORDER BY "Agency" ASC, "Total Pay & Benefits" DESC
)'
with open(path, 'w') as csvfile:
self.cursor.copy_expert(copy_sql, csvfile)
What I am trying to do is use something like csv.writer to add content like this:
with open(path, 'w') as csvfile:
self.cursor.copy_expert(copy_sql, csvfile)
writer = csv.writer(csvfile)
writer.writerow('test123')
But this is adding the text to the last row. I am also unsure how to add a new header column. Any advice?
adding a header is easy: write the header before the call to copy_expert.
with open(path, 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["my","super","header"])
self.cursor.copy_expert(copy_sql, csvfile)
But adding a column cannot be done without re-reading the file again and add your info on each row, so the above solution doesn't help much.
If the file isn't too big and fits in memory, you could write the sql output to a "fake" file:
import io
fakefile = io.StringIO()
self.cursor.copy_expert(copy_sql, fakefile)
now rewind the file and parse it as csv, add the extra column when writing it back
import csv
fakefile.seek(0)
with open(path, 'w', newline="") as csvfile:
writer = csv.writer(csvfile)
reader = csv.reader(fakefile) # works if copy_expert uses "," as separator, else change it
writer.writerow(["my","super","header","UAL"])
for row in reader:
writer.writerow(row+[30000])
or instead of the inner loop:
writer.writerows(row+[30000] for row in reader)
And if the file is too big, write it in a temp file, and proceed the same way (less performant)

Reading contents of excel file in python webapp2

I have two files namely sample.csv and sample.xlsx, all those files are stored in blobstore.I am able to read the records of csv file(which is in the blobstore) using the following code
blobReader = blobstore.BlobReader(blob_key)
inputFile = BlobIterator(blobReader)
if inputFile is None:
values = None
else:
try:
stringReader = csv.reader(inputFile)
data = []
columnHeaders = []
for rowIndex, row in enumerate(stringReader):
if(rowIndex == 0):
columnHeaders = row
else:
data.append(row)
values = {'columnHeaders' : columnHeaders, 'data' : data}
except:
values = None
self.response.write(values)
The output of the above code of a sample.csv file is
{'columnHeaders': ['First Name', 'Last Name', 'Email', 'Mobile'], 'data': [['fx1', 'lx2', 'flx1x2#xxx.com', 'xxx-xxx-xxxx'], ['fy1', 'ly2', 'fly1y2#yyy.com', 'yyy-yyy-yyyy'], ['fz1', 'lz2', 'flz1z2#zzz.com', 'zzz-zzz-zzzz']]}
Using the xlrd package, i am able to read the excel file contents, but in this i have to specify the exact file location
book = xlrd.open_workbook('D:/sample.xlsx')
first_sheet = book.sheet_by_index(0)
self.response.write(first_sheet.row_values(0))
cell = first_sheet.cell(0,0)
self.response.write(cell.value)
Is there any way to read the excel file contents from the blobstore, i have tried it with the following code
blobReader = blobstore.BlobReader(blobKey)
uploadedFile = BlobIterator(blobReader)
book = xlrd.open_workbook(file_contents=uploadedFile)
(or)
book = xlrd.open_workbook(file_contents=blobReader)
But it throws some error TypeError: 'BlobReader' object has no attribute 'getitem'.
Any ideas? Thanks..
Looking into the doc for open_workbook in the xlrd package doc, it seems that when you pass "file_contents", it's expecting a string.
Then you need to look into turning a Blob into a String, which can be done with BlobReader.read(), which gives you a string of the read data.

Categories