Python - How to convert CSV to XLSX? - python

I have a code to convert CSV to XLXS the only problem is that when I do the conversion some numeric columns are stored as text. And that makes SQL unable to convert from nvarchar to float.
Code:
import csv, os
from glob import glob
from xlsxwriter.workbook import Workbook
import pandas as pd
import numpy as np
for csvfile in glob('FILE.CSV'):
name = os.path.basename(csvfile).split('.')[-2]
workbook = Workbook('FILE.xlsx', {'strings_to_numbers': True, 'constant_memory': True})
worksheet = workbook.add_worksheet()
with open(csvfile, 'r') as f:
r = csv.reader(f, delimiter=';')
for row_index, row in enumerate(r):
for col_index, data in enumerate(row):
worksheet.write(row_index, col_index, data)
currency_format = workbook.add_format({'num_format': '$#,##0.00'})
workbook.close()
import openpyxl
ss = openpyxl.load_workbook("file.xlsx")
# printing the sheet names
ss_sheet = ss['Sheet1']
ss_sheet.title = 'plan1'
ss.save("file.xlsx")
print("-------------------------------------------")
print(" .CSV to .XLSX Conversion Successful")
print("-------------------------------------------")

Related

How can I convert Cell of Openpyxl from Text to Number format?

I wrote a code to convert a text file into excel file using Openpyxl extension of Python.
Although the value are setting properly into the column but they are showing as a text instead of number. Although I tried to convert, seems like it is not working.
Can anyone please correct the code?
import csv
import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
ws.append(row)
for row in range(2, ws.max_row+1):
ws["{}{}".format("A", row)].number_format = 'General'
ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Here is the output excel file
the read data from txt file will be in string. So, as suggested by jezza, you need to convert list to float. You don't need the 'number_format` lines you have. Updated code is here. Note that the conversion map assumes all data can be converted to float (no text). The try/catch will basically skip the row if there is text on any row
import csv
#import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
#ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
try:
row = list(map(float, row))
ws.append(row)
except:
print("Skipping row ", row)
pass
#for row in range(2, ws.max_row+1):
# ws["{}{}".format("A", row)].number_format = 'General'
# ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Output

How to convert multiple CSV files to XLXS in Python?

I'm trying to convert all CSV files within a directory into one XLXS file with each csv file becoming a separate worksheet.
The code below works except when I provide the input path in this Line
"for filename in glob.glob(InputPath + "*.csv"):"
I get this error - InvalidWorksheetName: Invalid Excel character '[]:*?/' in sheetname
Does anyone have a suggestion how I can get around this? Full code is below - Thanks!
import xlsxwriter
import glob
import csv
InputPath = r"C:\\Users\\.spyder-py3\\"
workbook = xlsxwriter.Workbook(r"C:\\Users\\.spyder-py3\\Output\\compiled.xlsx")
for filename in glob.glob(InputPath + "\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()
Try this:
import xlsxwriter
import glob
import csv
InputPath = r"C:\Users\.spyder-py3"
workbook = xlsxwriter.Workbook(r"C:\Users\.spyder-py3\Output\compiled.xlsx")
for filename in glob.glob(InputPath + r"\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()

Python Exporting a file via pyexcel (csv to xlsx)

I have made a little code using pyexcel to convert all files in my folder from csv to xlsx. But I want to export it with the same name (instead of file1.xlsx) as it was for each file in the folder. Can you help please?
from pyexcel.cookbook import merge_all_to_a_book
import pyexcel.ext.xlsx
import glob
import os
os.chdir(“/Users/vanicek/Desktop/csv2xlsx” )
i = 0
for file in glob.glob(“*.csv”):
while os.path.exists(“file%s.xlsx” % i):
i+=1
merge_all_to_a_book(glob.glob(“*.csv”), “file%s.xlsx” % i)
print “Exported.”
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()

Writing large data to a excel column cell with looping

Parsing .ts file and getting string from that file. i want write that string to excel column. can anyone help me?
from xml.dom import minidom
import sys, xlsxwriter
import pandas as pd
import numpy as np
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
messages = doc.getElementsByTagName("message")
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
df = pd.DataFrame({'TString': [ori_string]})
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
if __name__ == '__main__':
main()
I am not getting error and "ori_string" is printing but in excel sheet nothing is written.
I want to just write the string to the excel column.how to iterate rows and how to give row and column number inside iteration?
from xml.dom import minidom
import sys, xlsxwriter
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
row = 0
messages = doc.getElementsByTagName("message")
workbook = xlsxwriter.Workbook('data3.xlsx')
worksheet = workbook.add_worksheet()
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
worksheet.write(row, 0, ori_string)
row += 1
workbook.close()
Now its working thank you #DavidG for your suggestion.

Unable to open excel file created inthe python code

I converted a few .csv files using python code to .xls but now when i try to open the files it gives me an error : "COUNT_DIST2.xls cannot be accessed. The file may be corrupt, located on a server that is not responding , or read only".
I have created many other files in this projects which are both .csv and .xls but none of them are giving me this problem. I think in my code somewhere maybe i am doing something wrong. I am very sure this is not a network error or office error because i am able to open all the other documents.
Attached is the code:
import pandas as pd
import numpy as np
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import xlwt
import os
path = ('C:\Users\PETERemote\PycharmProjects\untitled\distributions')
data = []
count =1
count2 = 0
for files in os.listdir(path):
if files.endswith("COUNT16_DISTRIBUTION" + str(count*1) + ".csv"):
count += 1
count2 = count-2
print(count2)
#print(count2 = count)
count3=1
file_name = "COUNT16_DISTRIBUTION" + str(count3*1) + ".csv"
while (count3<=count2):
with open(file_name) as f:
for line in f:
data.append([word for word in line.split(" ") if word])
wb = xlwt.Workbook()
output_file = open("COUNT16_DIST" + str(count3 * 1) + ".xls", 'w')
count3 += 1
sheet = wb.add_sheet("Sheet1")
for row_index in range(len(data)):
for col_index in range(len(data[row_index])):
sheet.write(row_index, col_index, data[row_index][col_index])
wb.save(output_file)
data = []
output_file.close()
Here's an example using xlsxwriter:
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
FYI, there is also a package called openpyxl, that can read/write Excel
xlsx/xlsm files which I have tested it and it's working fine.

Categories