Python/Excel - Merge .xlsx workbooks from file into .csv - python

Attempting to compile multiple .xlsx workbooks from a file into a single .csv
Loop I've created is only capturing the first workbook from the file.
How can I alter this to capture all workbooks in a file? Data is only on Sheet1 from all workbooks
import os
import xlrd
import csv
rootdir = r'C:\Users\username\Desktop\Mults'
filenames = []
for subdir, dir, files in os.walk(rootdir):
for file in files:
filenames.append(os.path.join(subdir, file))
wb = xlrd.open_workbook(os.path.join(subdir, file))
sh = wb.sheet_by_index(0)
with open('acit_multsTEST.csv','wb') as f:
c = csv.writer(f)
for r in range(sh.nrows):
c.writerow(sh.row_values(r))
I appreciate any help!
Thank you!

So you have to do the following.
Get a list of all the workbooks
Open a main csv to append all your data to
Iterate through your list of workbooks
Append each sheet to your csv
import glob
import os
import xlrd
import csv
ROOTDIR = r'C:\Users\username\Desktop\Mults'
wb_pattern = os.path.join(ROOTDIR, '*.xlsx')
workbooks = glob.glob(wb_pattern)
with open('out.csv', 'wb') as outcsv:
writer = csv.writer(outcsv)
for wb in workbooks:
book_path = os.path.join(ROOTDIR, wb)
book = xlrd.open(book_path)
sheet = book.sheet_by_index(0)
for row_num in xrange(sheet.nrows):
row = sheet(row_num)
writer.writerow(row)

Related

How to convert multiple CSV files to XLXS in Python?

I'm trying to convert all CSV files within a directory into one XLXS file with each csv file becoming a separate worksheet.
The code below works except when I provide the input path in this Line
"for filename in glob.glob(InputPath + "*.csv"):"
I get this error - InvalidWorksheetName: Invalid Excel character '[]:*?/' in sheetname
Does anyone have a suggestion how I can get around this? Full code is below - Thanks!
import xlsxwriter
import glob
import csv
InputPath = r"C:\\Users\\.spyder-py3\\"
workbook = xlsxwriter.Workbook(r"C:\\Users\\.spyder-py3\\Output\\compiled.xlsx")
for filename in glob.glob(InputPath + "\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()
Try this:
import xlsxwriter
import glob
import csv
InputPath = r"C:\Users\.spyder-py3"
workbook = xlsxwriter.Workbook(r"C:\Users\.spyder-py3\Output\compiled.xlsx")
for filename in glob.glob(InputPath + r"\*.csv"):
ws = workbook.add_worksheet(str(filename.split('.')[0]))
spamReader = csv.reader(open(filename, 'r'), delimiter=',',quotechar='"')
row_count = 0
print(filename)
for row in spamReader:
for col in range(len(row)):
ws.write(row_count,col,row[col])
row_count +=1
workbook.close()

How can i get input from user of path from where i have to merge multiple excel file header and there file name into other excel file in python

This is the code i have be working on but not sure how to merge them into one new excel file and get header and file name.
import os
import xlrd
Folder_path = input ("Enter the file path :")
def listDir(dir):
fileNames = os.listdir(dir)
loc = input ("Enter the path of file + filename :")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
sheet.cell_value(0, 0)
for i in range(sheet.ncols):
print(sheet.cell_value(0, i))
for filename in fileNames:
print(filename + sheet.cell_value(0, i))
if __name__ == '__main__':
listDir(Folder_path)
from this code i can get the head value and file name but i want the put as show in the picture , where it should print in a new excel file the file name and output header of that particular file.
output should be as follow's
https://i.stack.imgur.com/7bXoE.png
after i get file names i want to get header of each file and put in new excel file where it should show name file and there header .
This is one of doing it:
import os
from pathlib import Path
import xlrd
import pandas as pd
def listDir(inputdir):
allheaders=[]
fileNames = os.listdir(inputdir)
for filename in fileNames:
headers=[filename]
loc = os.path.join(Folder_path, filename)
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
for i in range(sheet.ncols):
headers.append(sheet.cell_value(0, i))
allheaders.append(headers)
return allheaders
Enter_path =input("Enter the file path :")
Folder_path = Path(Enter_path)
allheaders=listDir(Folder_path)
df=pd.DataFrame(allheaders)
df.to_excel("ListOfHeaders.xlsx",header=False, index=False)

How do I create a new sheet in excel for every csv file I have in my folder

import os
import pandas as pd
from glob import glob
import pathlib
import fileinput
import sys
import xlsxwriter
def csv_folder_input(folder):
path = sys.path[0]
path = path + "/" + folder
os.chdir(path)
counter = 1
for filename in os.listdir(path):
if filename.endswith(".csv"):
with open(filename, 'r') as csvfile:
df = pd.DataFrame(csvfile)
with pd.ExcelWriter('output.xlsx') as writer:
df.to_excel(writer, sheet_name='sheet '+str(counter), index=False)
writer.save()
counter = counter + 1
Currently it overrides each excel file sheet, but I want each CSV file to make a new sheet on excel
def csv_folder_input(folder):
path = sys.path[0]
path = os.path.join(path,folder)
os.chdir(path)
counter=1
writer = pd.ExcelWriter('output.xlsx')
for filename in os.listdir(path):
if filename.endswith(".csv"):
print(filename)
with open(filename, 'r') as csvfile:
counter=counter+1
print(counter)
df = pd.read_csv(csvfile)
df.to_excel(writer,sheet_name=os.path.splitext(filename)[0]+'_'+str(counter),index=False)
writer.save()
writer.close()
I have just modified your function. Please note that to read your Dataframe into a csv,
pd.read_csv() is the function.
You have used pd.DataFrame(csv_file), which I believe is the incorrect way to read it.
You will find your output.xlsx in the same path as your folder.
It re-writes the existing excel file because the ExcelWriter is defined inside the loop. You need to create an excel only once by defining it outside the loop and should add sheets to them using the loop. The below code worked for me
def csv_folder_input(folder):
path = sys.path[0]
path = path + "/" + folder
os.chdir(path)
counter = 1
with pd.ExcelWriter('output.xlsx') as writer:
for filename in os.listdir(path):
if filename.endswith(".csv"):
with open(filename, 'r') as csvfile:
df = pd.DataFrame(csvfile)
df.to_excel(writer, sheet_name=filename, index=False)
print(f"Added sheet to excel: {filename}")
counter = counter + 1
writer.save()
writer.close()

Unable to open excel file created inthe python code

I converted a few .csv files using python code to .xls but now when i try to open the files it gives me an error : "COUNT_DIST2.xls cannot be accessed. The file may be corrupt, located on a server that is not responding , or read only".
I have created many other files in this projects which are both .csv and .xls but none of them are giving me this problem. I think in my code somewhere maybe i am doing something wrong. I am very sure this is not a network error or office error because i am able to open all the other documents.
Attached is the code:
import pandas as pd
import numpy as np
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import xlwt
import os
path = ('C:\Users\PETERemote\PycharmProjects\untitled\distributions')
data = []
count =1
count2 = 0
for files in os.listdir(path):
if files.endswith("COUNT16_DISTRIBUTION" + str(count*1) + ".csv"):
count += 1
count2 = count-2
print(count2)
#print(count2 = count)
count3=1
file_name = "COUNT16_DISTRIBUTION" + str(count3*1) + ".csv"
while (count3<=count2):
with open(file_name) as f:
for line in f:
data.append([word for word in line.split(" ") if word])
wb = xlwt.Workbook()
output_file = open("COUNT16_DIST" + str(count3 * 1) + ".xls", 'w')
count3 += 1
sheet = wb.add_sheet("Sheet1")
for row_index in range(len(data)):
for col_index in range(len(data[row_index])):
sheet.write(row_index, col_index, data[row_index][col_index])
wb.save(output_file)
data = []
output_file.close()
Here's an example using xlsxwriter:
import os
import glob
import csv
from xlsxwriter.workbook import Workbook
for csvfile in glob.glob(os.path.join('.', '*.csv')):
workbook = Workbook(csvfile[:-4] + '.xlsx')
worksheet = workbook.add_worksheet()
with open(csvfile, 'rt', encoding='utf8') as f:
reader = csv.reader(f)
for r, row in enumerate(reader):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
FYI, there is also a package called openpyxl, that can read/write Excel
xlsx/xlsm files which I have tested it and it's working fine.

write multiple excel files with xlsxwriter python

is there a way to create multi excel files with xlsxwriter?
from itertools import chain
import glob ,csv, sys, os
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
newName = filename
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()
so i wanna to save all the txt files to exel files. I think the problem is here
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
if i'm not using + newName[:-4] + it will work but only write 1 excel files. Is there a way to do it?
The error showed that you combined the path to the files 2 times on top of each other:
'D:/apera/Workspace/Sounding2/bla.txtD:/apera/Workspace/Sounding2/'
This does it for me:
import xlsxwriter
import glob ,csv
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
# Note that filename is the full path already! Just [:-4] to remove .txt
workbook = xlsxwriter.Workbook(filename[:-4] + '.xlsx')
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()

Categories