Conversion of a .csv file to an .xls file - python

I'm looking to automate the conversion of a .csv file to an .xls file.
I try this in Python:
import os
import csv
import xlsxwriter
f = open('U:\\INSEE\\Data.csv','r')
reader = csv.reader(f, delimiter=';')
# Création d'un nouveau fichier
fichier = xlsxwriter.Workbook('U:\\INSEE\\fichier.xlsx')
Feuil = fichier.add_worksheet('Feuil')
i=0
for row in reader:
Feuil.write(i,0,row)
i=i+1
reader.close()
fichier.close()
or this :
import os
csv_file = 'C:/Users/../fichier.csv'
excel_file = os.path.splitext(csv_file)[0] + '.xlsx'
All this does not work. Can you help me please?

I would use pandas since it makes this incredibly easy.
import pandas as pd
csv_file = pd.read_csv('U:\\INSEE\\Data.csv',sep=';')
csv_file.to_excel('U:\\INSEE\\fichier.xlsx')

Related

How to convert multiple CSV files to XLXS in Python?

I'm trying to convert all CSV files within a directory into one XLXS file with each csv file becoming a separate worksheet.
The code below works except when I provide the input path in this Line
"for filename in glob.glob(InputPath + "*.csv"):"
I get this error - InvalidWorksheetName: Invalid Excel character '[]:*?/' in sheetname
Does anyone have a suggestion how I can get around this? Full code is below - Thanks!
import xlsxwriter
import glob
import csv
InputPath = r"C:\\Users\\.spyder-py3\\"
workbook = xlsxwriter.Workbook(r"C:\\Users\\.spyder-py3\\Output\\compiled.xlsx")
for filename in glob.glob(InputPath + "\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()
Try this:
import xlsxwriter
import glob
import csv
InputPath = r"C:\Users\.spyder-py3"
workbook = xlsxwriter.Workbook(r"C:\Users\.spyder-py3\Output\compiled.xlsx")
for filename in glob.glob(InputPath + r"\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()

Python - How to convert CSV to XLSX?

I have a code to convert CSV to XLXS the only problem is that when I do the conversion some numeric columns are stored as text. And that makes SQL unable to convert from nvarchar to float.
Code:
import csv, os
from glob import glob
from xlsxwriter.workbook import Workbook
import pandas as pd
import numpy as np
for csvfile in glob('FILE.CSV'):
name = os.path.basename(csvfile).split('.')[-2]
workbook = Workbook('FILE.xlsx', {'strings_to_numbers': True, 'constant_memory': True})
worksheet = workbook.add_worksheet()
with open(csvfile, 'r') as f:
r = csv.reader(f, delimiter=';')
for row_index, row in enumerate(r):
for col_index, data in enumerate(row):
worksheet.write(row_index, col_index, data)
currency_format = workbook.add_format({'num_format': '$#,##0.00'})
workbook.close()
import openpyxl
ss = openpyxl.load_workbook("file.xlsx")
# printing the sheet names
ss_sheet = ss['Sheet1']
ss_sheet.title = 'plan1'
ss.save("file.xlsx")
print("-------------------------------------------")
print(" .CSV to .XLSX Conversion Successful")
print("-------------------------------------------")

How to read from a csv file in zip folder and save data from csv file in database?

import glob
import os
import csv
import zipfile
from io import StringIO
for name in glob.glob('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Users/RAMESH SANTHA/Downloads/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv_file = '.'.join([dataFile, 'csv']) #all fixed
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv_file))
reader = csv.reader(data)
for row in reader:
print (row)
I tried following code to read data from zip folder which contains csv file and print rows but got error:
data = StringIO.StringIO(zfile.read(csv_file))
AttributeError: type object '_io.StringIO' has no attribute 'StringIO'
There is no StringIO.StringIO() but io.StringIO()
import io
data = io.StringIO(...)
With your import it will be even without io.
from io import StringIO
data = StringIO(...)
BTW: I think you overcomplicated code using glob and join(). And you can use filename directly with ZipFile without open()
import os
import csv
import zipfile
import io
zip_fullname = 'C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'
zip_file = os.path.basename(zip_fullname)
csv_file = zip_file.replace('.zip', '.csv')
print(zip_file) # download-NIFTY 50-01012020.zip
print(csv_file) # download-NIFTY 50-01012020.csv
zfile = zipfile.ZipFile(zip_fullname)
data = io.StringIO(zfile.read(csv_file).decode('utf-8')) # bytes needs to be converted to string
reader = csv.reader(data)
for row in reader:
print(row)
But with pandas it should be even simpler
import pandas as pd
df = pd.read_csv('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip')
print(df)
Looking at the script you getting error opening the csv file from zip file. Below is python 3 code that I have working for a zip file having few csv's. The directory to extract should exist before you run the script
import zipfile
path_to_zip_file='/tmp/test1.zip' # Assuming this file exist , This path is from mac, but should work for windows as well'
directory_to_extract_to='/tmp/extract/' # Assuming this directory already exist
import csv,os
import codecs
import glob
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
for file in glob.glob(directory_to_extract_to+'*.csv'):
path = os.path.join(directory_to_extract_to,file)
with open(path, 'rb') as f:
reader = csv.reader(codecs.iterdecode(f, 'utf-8'))
# Below code is print them as arrays
# for row in reader:
# print(row)
# Reading rows as ordered dictionary
dictReader = csv.DictReader(codecs.iterdecode(f, 'utf-8'))
for row in dictReader:
print(row)

Python Exporting a file via pyexcel (csv to xlsx)

I have made a little code using pyexcel to convert all files in my folder from csv to xlsx. But I want to export it with the same name (instead of file1.xlsx) as it was for each file in the folder. Can you help please?
from pyexcel.cookbook import merge_all_to_a_book
import pyexcel.ext.xlsx
import glob
import os
os.chdir(“/Users/vanicek/Desktop/csv2xlsx” )
i = 0
for file in glob.glob(“*.csv”):
while os.path.exists(“file%s.xlsx” % i):
i+=1
merge_all_to_a_book(glob.glob(“*.csv”), “file%s.xlsx” % i)
print “Exported.”
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()

Unable to open excel file created inthe python code

I converted a few .csv files using python code to .xls but now when i try to open the files it gives me an error : "COUNT_DIST2.xls cannot be accessed. The file may be corrupt, located on a server that is not responding , or read only".
I have created many other files in this projects which are both .csv and .xls but none of them are giving me this problem. I think in my code somewhere maybe i am doing something wrong. I am very sure this is not a network error or office error because i am able to open all the other documents.
Attached is the code:
import pandas as pd
import numpy as np
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import xlwt
import os
path = ('C:\Users\PETERemote\PycharmProjects\untitled\distributions')
data = []
count =1
count2 = 0
for files in os.listdir(path):
if files.endswith("COUNT16_DISTRIBUTION" + str(count*1) + ".csv"):
count += 1
count2 = count-2
print(count2)
#print(count2 = count)
count3=1
file_name = "COUNT16_DISTRIBUTION" + str(count3*1) + ".csv"
while (count3<=count2):
with open(file_name) as f:
for line in f:
data.append([word for word in line.split(" ") if word])
wb = xlwt.Workbook()
output_file = open("COUNT16_DIST" + str(count3 * 1) + ".xls", 'w')
count3 += 1
sheet = wb.add_sheet("Sheet1")
for row_index in range(len(data)):
for col_index in range(len(data[row_index])):
sheet.write(row_index, col_index, data[row_index][col_index])
wb.save(output_file)
data = []
output_file.close()
Here's an example using xlsxwriter:
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
FYI, there is also a package called openpyxl, that can read/write Excel
xlsx/xlsm files which I have tested it and it's working fine.

Categories