So, i'm trying to create a way to convert a tsv file to csv, but already with tab "\t" and delimiter= ' ',
there is a way of doing that?
from xlsxwriter.workbook import Workbook
tsv_file = '1.tsv'
xlsx_file = '1.xlsx'
workbook = Workbook(xlsx_file)
worksheet = workbook.add_worksheet()
tsv_reader = csv.reader(open(tsv_file,'rt'),delimiter="\t")
for row, data in enumerate(tsv_reader):
worksheet.write_row(row, 0, data)
workbook.close()`
Related
I wrote a code to convert a text file into excel file using Openpyxl extension of Python.
Although the value are setting properly into the column but they are showing as a text instead of number. Although I tried to convert, seems like it is not working.
Can anyone please correct the code?
import csv
import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
ws.append(row)
for row in range(2, ws.max_row+1):
ws["{}{}".format("A", row)].number_format = 'General'
ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Here is the output excel file
the read data from txt file will be in string. So, as suggested by jezza, you need to convert list to float. You don't need the 'number_format` lines you have. Updated code is here. Note that the conversion map assumes all data can be converted to float (no text). The try/catch will basically skip the row if there is text on any row
import csv
#import openpyxl
import openpyxl as oxl
input_file = r'C:\Python\Test.txt'
output_file = r'C:\Python\Test.xlsx'
wb = oxl.Workbook()
ws = wb.active
#ws.number_format = 'General'
ws.title = "Waveform"
#ws = wb.create_sheet(title='Waveform')
with open(input_file, 'r') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
try:
row = list(map(float, row))
ws.append(row)
except:
print("Skipping row ", row)
pass
#for row in range(2, ws.max_row+1):
# ws["{}{}".format("A", row)].number_format = 'General'
# ws["{}{}".format("B", row)].number_format = 'General'
wb.save(output_file)
Output
I have a code to convert CSV to XLXS the only problem is that when I do the conversion some numeric columns are stored as text. And that makes SQL unable to convert from nvarchar to float.
Code:
import csv, os
from glob import glob
from xlsxwriter.workbook import Workbook
import pandas as pd
import numpy as np
for csvfile in glob('FILE.CSV'):
name = os.path.basename(csvfile).split('.')[-2]
workbook = Workbook('FILE.xlsx', {'strings_to_numbers': True, 'constant_memory': True})
worksheet = workbook.add_worksheet()
with open(csvfile, 'r') as f:
r = csv.reader(f, delimiter=';')
for row_index, row in enumerate(r):
for col_index, data in enumerate(row):
worksheet.write(row_index, col_index, data)
currency_format = workbook.add_format({'num_format': '$#,##0.00'})
workbook.close()
import openpyxl
ss = openpyxl.load_workbook("file.xlsx")
# printing the sheet names
ss_sheet = ss['Sheet1']
ss_sheet.title = 'plan1'
ss.save("file.xlsx")
print("-------------------------------------------")
print(" .CSV to .XLSX Conversion Successful")
print("-------------------------------------------")
I have dictionary of dataframes.
dd = {
'table': pd.DataFrame({'Name':['Banana'], 'color':['Yellow'], 'type':'Fruit'}),
'another_table':pd.DataFrame({'city':['Atlanta'],'state':['Georgia'], 'Country':['United States']}),
'and_another_table':pd.DataFrame({'firstname':['John'], 'middlename':['Patrick'], 'lastnme':['Snow']}),
}
I would like to create an Excel file which contains Excel Table objects created from these dataframes. Each Table needs to be on a separate Tab/Sheet and Table names should match dataframe names.
Is this possible to do with Python?
So far I was only able to export data to Excel normally without converting to tables using xlsxwriter
writer = pd.ExcelWriter('Results.xlsx', engine='xlsxwriter')
for sheet, frame in dd.items():
frame.to_excel(writer, sheet_name = sheet)
writer.save()
For writing multiple sheets from Pandas, use the openpyxl library. In addition, to prevent overwriting, set the workbook sheets before each update.
Try this code:
import pandas as pd
import openpyxl
dd = {
'table': pd.DataFrame({'Name':['Banana'], 'color':['Yellow'], 'type':'Fruit'}),
'another_table':pd.DataFrame({'city':['Atlanta'],'state':['Georgia'], 'Country':['United States']}),
'and_another_table':pd.DataFrame({'firstname':['John'], 'middlename':['Patrick'], 'lastnme':['Snow']}),
}
filename = 'Results.xlsx' # must exist
wb = openpyxl.load_workbook(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl')
for sheet, frame in dd.items():
writer.sheets = dict((ws.title, ws) for ws in wb.worksheets) # need this to prevent overwrite
frame.to_excel(writer, index=False, sheet_name = sheet)
writer.save()
# convert data to tables
wb = openpyxl.load_workbook(filename)
for ws in wb.worksheets:
mxrow = ws.max_row
mxcol = ws.max_column
tab = openpyxl.worksheet.table.Table(displayName=ws.title, ref="A1:" + ws.cell(mxrow,mxcol).coordinate)
ws.add_table(tab)
wb.save(filename)
Output
As of now i can read EXCEL file's all sheet.
e.msgbox("select Excel File")
updated_deleted_xls = e.fileopenbox()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
openfile = e.fileopenbox()
for sheet in book.sheets():
for row in range(sheet.nrows):
for col in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
If you open your editor from the desktop or command line, you would have to specify the file path while trying to read the file:
import pandas as pd
df = pd.read_excel(r'File path', sheet_name='Sheet name')
Alternatively, if you open your editor in the file's directory, then you could read directly using the panda library
import pandas as pd
df = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='Title Sheet')
df1 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx',sheet_name='Transactions')
df2 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='NewCustomerList')
df3 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerDemographic')
df4 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerAddress')
Maybe Pandaswould be helpful ( the go-to package for data) :
import pandas as pd
df = pd.read_excel('filname.xls', sheet = 0)
Edit: Since a lot of time has passed and pandas matured the arguemnts have change. So for pandas >1.0.0
import pandas as pd
df = pd.read_excel('filname.xls', sheet_name = 0)
You can use book.sheet_by_name() to read specific sheets by their name from xls file.
for name, sheet_name in zip(filename, sheetnumber):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
filename is the path to your xls file. Specify the sheet number you need to read in sheetnumber.
Alternatively, you could use book.sheet_by_index() and pass argument to return a specific sheet.
From docs:
sheet_by_index(sheetx)
Parameters: sheetx – Sheet index in range(nsheets)
For example:
first_sheet = book.sheet_by_index(0) # returns the first sheet.
You can use either book.sheet_by_name() or book.get_sheet()
Example using get_sheet()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet = book.get_sheet(0) #Gets the first sheet.
Example using sheet_by_name()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet_names = book.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
MoreInfo on getting sheet by sheet_by_name
is there a way to create multi excel files with xlsxwriter?
from itertools import chain
import glob ,csv, sys, os
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
newName = filename
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()
so i wanna to save all the txt files to exel files. I think the problem is here
workbook = xlsxwriter.Workbook('D:/apera/Workspace/Sounding2/' + newName[:-4] + '.xlsx'
if i'm not using + newName[:-4] + it will work but only write 1 excel files. Is there a way to do it?
The error showed that you combined the path to the files 2 times on top of each other:
'D:/apera/Workspace/Sounding2/bla.txtD:/apera/Workspace/Sounding2/'
This does it for me:
import xlsxwriter
import glob ,csv
openSoundingFile = 'D:/apera/Workspace/Sounding2/*.txt'
for filename in glob.glob(openSoundingFile):
spamReader = csv.reader(open(filename, 'rb'), delimiter=';',quotechar='"')
# Note that filename is the full path already! Just [:-4] to remove .txt
workbook = xlsxwriter.Workbook(filename[:-4] + '.xlsx')
sheet = workbook.add_worksheet('Original data')
for rowx, row in enumerate(spamReader):
for colx, value in enumerate(row):
sheet.write(rowx, colx, value)
workbook.close()