I converted a few .csv files using python code to .xls but now when i try to open the files it gives me an error : "COUNT_DIST2.xls cannot be accessed. The file may be corrupt, located on a server that is not responding , or read only".
I have created many other files in this projects which are both .csv and .xls but none of them are giving me this problem. I think in my code somewhere maybe i am doing something wrong. I am very sure this is not a network error or office error because i am able to open all the other documents.
Attached is the code:
import pandas as pd
import numpy as np
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import xlwt
import os
path = ('C:\Users\PETERemote\PycharmProjects\untitled\distributions')
data = []
count =1
count2 = 0
for files in os.listdir(path):
if files.endswith("COUNT16_DISTRIBUTION" + str(count*1) + ".csv"):
count += 1
count2 = count-2
print(count2)
#print(count2 = count)
count3=1
file_name = "COUNT16_DISTRIBUTION" + str(count3*1) + ".csv"
while (count3<=count2):
with open(file_name) as f:
for line in f:
data.append([word for word in line.split(" ") if word])
wb = xlwt.Workbook()
output_file = open("COUNT16_DIST" + str(count3 * 1) + ".xls", 'w')
count3 += 1
sheet = wb.add_sheet("Sheet1")
for row_index in range(len(data)):
for col_index in range(len(data[row_index])):
sheet.write(row_index, col_index, data[row_index][col_index])
wb.save(output_file)
data = []
output_file.close()
Here's an example using xlsxwriter:
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
FYI, there is also a package called openpyxl, that can read/write Excel
xlsx/xlsm files which I have tested it and it's working fine.
Related
I have 3 excel files currently in my working directory. All 3 files has name that ends with "_Updated.xlsx". I wanted to transform the files such that all empty rows in each of the files get deleted. I have created function for it, but the only issue is I cannot save all transformed file using below code. Not sure what is wrong ? The reason for creating new file is I would like to save my raw files.
Python code
import openpyxl
import os
from openpyxl import load_workbook,Workbook
import glob
from pathlib import Path
Excel_file_path="/Excel"
for file in Path(Excel_file_path).glob('*_Updated.xlsx'):
wb=load_workbook(file)
wb_modified = False
for sheet in wb.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
sheet_modified = False
if max_row_in_sheet > 1:
first_nonempty_row = nonempty_row() # Function to find nonempty row
sheet_modified = del_rows_before(first_nonempty_row) #Function to delete nonempty row
wb_modified = wb_modified or sheet_modified
if wb_modified:
for workbook in workbooks:
for sheet in wb.worksheets:
new_wb = Workbook()
ws = new_wb.active
for row_data in sheet.iter_rows():
for row_cell in row_data:
ws[row_cell.coordinate].value = row_cell.value
new_wb.save("/Excel/"+sheet.title+"_Transformed.xlsx")
In case, if any one is still looking for answer to my above question. Below is the code that worked for me.
import openpyxl
import os
from openpyxl import load_workbook
import glob
from pathlib import Path
Excel_file_path="/Excel"
for file in Path(Excel_file_path).glob('*_Updated.xlsx'):
wb=load_workbook(file)
wb_modified = False
for sheet in wb.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
sheet_modified = False
if max_row_in_sheet > 1:
first_nonempty_row = get_first_nonempty_row() # Function to find nonempty row
sheet_modified = del_rows_before(first_nonempty_row) #Function to delete nonempty roW
file_name = os.path.basename(file)
wb.save("Excel/"+file_name[:-5]+"_Transformed.xlsx")
wb.close()
I'm trying to convert all CSV files within a directory into one XLXS file with each csv file becoming a separate worksheet.
The code below works except when I provide the input path in this Line
"for filename in glob.glob(InputPath + "*.csv"):"
I get this error - InvalidWorksheetName: Invalid Excel character '[]:*?/' in sheetname
Does anyone have a suggestion how I can get around this? Full code is below - Thanks!
import xlsxwriter
import glob
import csv
InputPath = r"C:\\Users\\.spyder-py3\\"
workbook = xlsxwriter.Workbook(r"C:\\Users\\.spyder-py3\\Output\\compiled.xlsx")
for filename in glob.glob(InputPath + "\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()
Try this:
import xlsxwriter
import glob
import csv
InputPath = r"C:\Users\.spyder-py3"
workbook = xlsxwriter.Workbook(r"C:\Users\.spyder-py3\Output\compiled.xlsx")
for filename in glob.glob(InputPath + r"\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()
import os
import pandas as pd
from glob import glob
import pathlib
import fileinput
import sys
import xlsxwriter
def csv_folder_input(folder):
path = sys.path[0]
path = path + "/" + folder
os.chdir(path)
counter = 1
for filename in os.listdir(path):
if filename.endswith(".csv"):
with open(filename, 'r') as csvfile:
df = pd.DataFrame(csvfile)
with pd.ExcelWriter('output.xlsx') as writer:
df.to_excel(writer, sheet_name='sheet '+str(counter), index=False)
writer.save()
counter = counter + 1
Currently it overrides each excel file sheet, but I want each CSV file to make a new sheet on excel
def csv_folder_input(folder):
path = sys.path[0]
path = os.path.join(path,folder)
os.chdir(path)
counter=1
writer = pd.ExcelWriter('output.xlsx')
for filename in os.listdir(path):
if filename.endswith(".csv"):
print(filename)
with open(filename, 'r') as csvfile:
counter=counter+1
print(counter)
df = pd.read_csv(csvfile)
df.to_excel(writer,sheet_name=os.path.splitext(filename)[0]+'_'+str(counter),index=False)
writer.save()
writer.close()
I have just modified your function. Please note that to read your Dataframe into a csv,
pd.read_csv() is the function.
You have used pd.DataFrame(csv_file), which I believe is the incorrect way to read it.
You will find your output.xlsx in the same path as your folder.
It re-writes the existing excel file because the ExcelWriter is defined inside the loop. You need to create an excel only once by defining it outside the loop and should add sheets to them using the loop. The below code worked for me
def csv_folder_input(folder):
path = sys.path[0]
path = path + "/" + folder
os.chdir(path)
counter = 1
with pd.ExcelWriter('output.xlsx') as writer:
for filename in os.listdir(path):
if filename.endswith(".csv"):
with open(filename, 'r') as csvfile:
df = pd.DataFrame(csvfile)
df.to_excel(writer, sheet_name=filename, index=False)
print(f"Added sheet to excel: {filename}")
counter = counter + 1
writer.save()
writer.close()
I have made a little code using pyexcel to convert all files in my folder from csv to xlsx. But I want to export it with the same name (instead of file1.xlsx) as it was for each file in the folder. Can you help please?
from pyexcel.cookbook import merge_all_to_a_book
import pyexcel.ext.xlsx
import glob
import os
os.chdir(“/Users/vanicek/Desktop/csv2xlsx” )
i = 0
for file in glob.glob(“*.csv”):
while os.path.exists(“file%s.xlsx” % i):
i+=1
merge_all_to_a_book(glob.glob(“*.csv”), “file%s.xlsx” % i)
print “Exported.”
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
is there a way to create multi excel files with xlsxwriter?
from itertools import chain
import glob ,csv, sys, os
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
newName = filename
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()
so i wanna to save all the txt files to exel files. I think the problem is here
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
if i'm not using + newName[:-4] + it will work but only write 1 excel files. Is there a way to do it?
The error showed that you combined the path to the files 2 times on top of each other:
'D:/apera/Workspace/Sounding2/bla.txtD:/apera/Workspace/Sounding2/'
This does it for me:
import xlsxwriter
import glob ,csv
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
# Note that filename is the full path already! Just [:-4] to remove .txt
workbook = xlsxwriter.Workbook(filename[:-4] + '.xlsx')
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()