Data displayed wrong when exported to excel - python

So I have this code:
import itertools
import xlsxwriter
from pprint import pprint
inputdata = [
['900x200', '200x500', '232x232'],
['242423', '232d2s', '2sdad'],
['AA', 'BB'],
]
result = list(itertools.product(*inputdata))
pprint(result)
workbook = xlsxwriter.Workbook('Documents/files/arrays.xlsx')
worksheet = workbook.add_worksheet()
row = 0
for col, data in enumerate(result):
worksheet.write_column(row, col, data)
workbook.close()
First part generates combinations then it's saved to a excel file.
My issue is that items are displayed like this:
= = = = = = = =
= = = = = = = =
= = = = = = = =
And I want them to be like:
= = =
= = =
= = =
= = =
= = =
= = =
= = =
Basically just "rotate" as the data doesn't make much sense currently.

You're just not using the write function for the output you want.
Change worksheet.write_column(row, col, data) to worksheet.write_row(row, col, data)
Basically your new script should be:
import itertools
import xlsxwriter
from pprint import pprint
inputdata = [
['900x200', '200x500', '232x232'],
['242423', '232d2s', '2sdad'],
['AA', 'BB'],
]
result = list(itertools.product(*inputdata))
pprint(result)
workbook = xlsxwriter.Workbook('arrays.xlsx')
worksheet = workbook.add_worksheet()
col = 0
for row, data in enumerate(result):
worksheet.write_row(row, col, data)
workbook.close()
This produces the output you want

Related

writing Excel file while using for loop

I am trying to write data to an Excel file, during a for loop.
But what I am getting is a single line containing the last data received by the loop.
I have tried a couple of different methods but came short..
2 tries are list below
Any Ideas ?
def write_excel(x):
workbook = xlsxwriter.Workbook('ID_Num.xlsx')
worksheet = workbook.add_worksheet()
df = pd.DataFrame(
{'ID':[x],
'mail_one':[Email],
'second_mail':[second_mail],
'Num':[Num],
'date':[Date]})
row_num = 0
for key, value in df.items():
worksheet.write(0, row_num, key)
worksheet.write_row(1, row_num, value)
row_num += 1
workbook.close()
#df = pd.DataFrame(
# {'ID':[x],
# 'mail_one':[Email],
# 'second_mail':[second_mail],
# 'Num':[Num],
# 'date':[Date]})
# writer = ExcelWriter('ID_Num.xlsx')
# df.to_excel(writer,'ID_Num',index=False)
# writer.save()
if __name__ == "__main__":
for x in List:
my_dic = {}
my_dict["ID"] = x
my_dict["mail_one"] = Email
my_dict["second_mail"] = second_mail
my_dict["Num"] = str(Num)
my_dict["date"] = Date
print(my_dict)
write_excel(x)
I don't have xlsxwriter so I cannot test. The documentation says that it cannot modify an existing file so I suspect that every iteration of for x in List: you are over-writing your file (workbook = xlsxwriter.Workbook('ID_Num.xlsx')).
You can make multiple files with these changes:
def write_excel(x,i):
workbook = xlsxwriter.Workbook(f'ID_Num{i}.xlsx')
...
# and
for i,x in enumerate(List):
...
write_excel(x,i)
Or you could accumulate multiple dictionaries and pass all of them to your function
data = []
for x in List:
my_dic = {}
...
data.append(my_dic)
write_excel(data)
Changing the function to iterate over those dicts; making a new sheet for each one
def write_excel(data):
workbook = xlsxwriter.Workbook('ID_Num.xlsx')
for sht in data:
worksheet = workbook.add_worksheet()
df = pd.DataFrame(...
row_num = 0
for key, value in df.items():
worksheet.write(...
worksheet.write_row(...
row_num += 1
workbook.close()

How do you use Pandas to copy a cell value into a table in Word?

So I'm trying to copy cell values from an excel file into table in a Word document but, I keep getting an error when it comes to equating a table cell with a cell value from an Excel file. I've tried a couple of things but keep getting an error
Attempt 1:
from docx import Document
from docx.shared import Pt
from openpyxl import load_workbook
import pandas as pd
document = Document()
workbook = load_workbook(filename = r"C:\Users\Desktop\excel_file.xlsx")
sheet = workbook['Sheet2']
tries = 5
for i in range(1, tries + 1):
table = document.add_table(rows = 4, cols = 2, style = 'Table Grid')
table.cell(0,0).text = "ACCT #"
table.cell(1,0).text = "NAME"
table.cell(0,1).text = sheet.cell(row = i, column = 1).value
table.cell(1,1).text = sheet.cell(row = i, column = 2).value
This gives me
TypeError: 'int' object is not iterable'
And it references this line:
table.cell(0,1).text = sheet.cell(row = i, column = 1).value
Also, the data in the Excel file has numbers in the first column and names in the second column, just so you know. Not sure if this matters.
Attempt 2: Using Pandas
document = Document()
df = pd.read_excel(r"C:\Users\Desktop\excel_file.xlsx",sheet_name = None, header = None)
data = df["Sheet2"]
tries = 5
for i in range(1, tries + 1):
table = document.add_table(rows = 4, cols = 2, style = 'Table Grid')
table.cell(0,0).text = "ACCT #"
table.cell(1,0).text = "NAME"
table.cell(0,1).text = data.iloc[i, 0]
table.cell(1,1).text = data.iloc[i, 1]
And this gives me: TypeError: 'numpy.int64' object is not iterable
and it references: table.cell(0,1).text = data.iloc[i, 0]
I'm not sure what I'm doing wrong or how to fix it at this point. Any help would be much appreciated!
This worked for me,
from docx import Document
from docx.shared import Pt
from openpyxl import load_workbook
import pandas as pd
def addTable(doc, df):
doc.add_paragraph('A plain paragraph having some ')
tabrows = df.shape[0]
tabcols =df.shape[1]
table = doc.add_table(rows = tabrows+1, cols = tabcols, style= 'Table Grid')
table.cell(0,0).text = "Account"
table.cell(0,1).text = 'Name'
for r in range(tabrows):
for c in range(tabcols):
table.cell(r+1,c).text = str(df.iloc[r,c])
doc.add_page_break()
def main():
dfin = pd.DataFrame({'Numbers':[1,2,3,4,5],'Words':['Alpha','Beta',
'Gamma', 'Delta', 'Epsilon']})
doc = Document()
addTable(doc, dfin)
doc.save('tstdoc.docx')
if __name__ == "__main__":
main()

How to import values from excel with pandas into tkinter faster?

guys! How are you? I have this code below and I'm having this trouble with the insertData function. This function is used to enter values from an excel spreadsheet into the tkinter treeview using Pandas. It's almost all right, but it's too slow and I don't know how to fix it. If anyone could help me, I'd be very happy.
Thanks in advance!
from tkinter import *
import ttk
import openpyxl
import pandas as pd
nuScreen = Tk()
nuScreen.title("ContrasinSystem - Usuário Comum")
nuScreen.iconbitmap("logocontransin.ico")
book = openpyxl.load_workbook('Registros.xlsx')
sheet = book.sheetnames
sh = book.active.cell
#Contador de Linhas:
wb2 = openpyxl.load_workbook('Registros.xlsx')
sheet2 = wb2.worksheets[0]
rowCount = sheet2.max_row
v = []
#Design:
class main():
def __init__(self,tk):
for x in range (0,len(sheet)):
v.append(sheet[int(x)])
self.wb2 = openpyxl.load_workbook('Registros.xlsx')
self.sheet2 = self.wb2.worksheets[0]
self.row_count = self.sheet2.max_row
self.column_count = self.sheet2.max_column
self.nuFrame = Frame(nuScreen, width = 1500, height = 450)
self.nuFrame.pack()
self.img = PhotoImage(file="logocontransin.png")
self.w = Label(self.nuFrame, image = self.img)
self.w.img = self.img
self.w.place(x=65,y=150)
self.srchlbl = ttk.Label(self.nuFrame, text = "Buscar:")
self.srchlbl.place(x=25,y=75)
self.srchetr = ttk.Entry(self.nuFrame, width = 30)
self.srchetr.place(x=75,y=75)
self.treeview = ttk.Treeview(self.nuFrame)
self.treeview.place(x=300,y=75, width = 1100)
dataVector = []
def columnsName():
def Header():
self.columnVector = []
self.dataVector = []
teste = []
self.treeview.column("#0", width = 20)
self.columnHeader = pd.read_excel(r'Registros.xlsx', str(self.cmb.get()), header_only = True, nrows=0).columns
for a in self.columnHeader:
self.columnVector.append(a)
self.treeview.configure(columns = self.columnVector)
for b in self.columnHeader:
self.treeview.heading(str(b), text = str(b))
def insertData():
for m in range(rowCount):
self.dataValues = pd.read_excel(r'Registros.xlsx',str(self.cmb.get()), skip_blank_lines=True, skiprows=0)
for l in self.dataValues:
dataVector.append(l)
self.treeview.insert("", "end",values = dataVector)
print(self.dataValues)
Header()
insertData()
self.cmbLbl = ttk.Label(self.nuFrame, text = "Assunto:")
self.cmbLbl.place(x=1200, y=325)
self.cmb = ttk.Combobox(self.nuFrame, values = v)
self.cmb.place(x=1250,y=325)
self.btncmb = ttk.Button(self.nuFrame, text = "Buscar", command = columnsName)
self.btncmb.place(x=1320,y=375)
nuScreen.geometry("1500x450")
main(nuScreen)
nuScreen.mainloop()
How many times do you want to open a excel file for reading?
def insertData():
for m in range(rowCount):
self.dataValues = pd.read_excel(r'Registros.xlsx',str(self.cmb.get()),
skip_blank_lines=True, skiprows=0)
for l in self.dataValues:
dataVector.append(l)
self.treeview.insert("", "end",values = dataVector)
print(self.dataValues)
Should you instead omit the first loop?
def insertData():
self.dataValues = pd.read_excel(r'Registros.xlsx',str(self.cmb.get()),
skip_blank_lines=True, skiprows=0)
for l in self.dataValues:
self.treeview.insert("", "end",values = l)
print(self.dataValues)

Extra Comma in specific data field using pandas

I am combining very large data sets using python. The script works completely fine. However there is one specific row that may or may not have a comma in side of it. Does anyone know how to remove the comma? FYI this is how the data is collected it cannot be removed on collection. The field that it is in is the ["NAME'] field.
I have tried to implement a sep=r',(?!\s)' look ahead and that screws my data up even more
THANKS!
import csv
import shutil
import os
import pandas as pd
from os import path
def combinecsv(source_folder):
all_files = os.listdir(source_folder)
master_df = None
for anyfile in all_files:
if anyfile.lower().endswith(".csv"):
file_path = path.join(source_folder, anyfile)
print("opening file path: {}".format(file_path))
df = pd.read_csv(file_path)
if master_df is None:
master_df = df
else:
master_df = master_df.append(df)
new_df = pd.DataFrame()
new_df["MSG_TYPE"] = master_df["MSG_TYPE"]
new_df["MMSI"]= master_df["MMSI"]
new_df["NAME"]= master_df.apply(lambda row: check_for_none(row["NAME"]), axis = 1)
new_df["LAT_AVG"]= master_df["LAT_AVG"]
new_df["LON_AVG"]= master_df["LON_AVG"]
new_df["PERIOD"]= master_df.apply(lambda row: convert_period(row["PERIOD"]),axis = 1)
new_df["SPEED_KNOTS"]= master_df.apply(lambda row: check_for_none(row["SPEED_KNOTS"]), axis = 1)
new_df["COG_DEG"]= master_df.apply(lambda row: check_for_none(row["COG_DEG"]), axis = 1)
new_df["SHIP_AND_CARGO_TYPE"]= master_df.apply(lambda row: check_for_none(row["SHIP_AND_CARGO_TYPE"]), axis = 1)
new_df["DRAUGHT"]= master_df.apply(lambda row: check_for_none(row["DRAUGHT"]), axis = 1)
new_df["LEN"]= master_df.apply(lambda row: combine_bowstern(row["DIM_BOW"],row["DIM_STERN"]), axis = 1)
# axis traverses rows not columns
new_folder = path.join(source_folder, "output")
if not path.exists(new_folder):
os.mkdir(new_folder)
new_csvpath = path.join(new_folder, "output.csv")
print("saving csv to {}".format(new_csvpath))
new_df.to_csv(new_csvpath, index=False, quoting = csv.QUOTE_NONNUMERIC)
def check_for_none(df):
if (df) == 'None':
return ""
else:
return (df)
def convert_period(period):
y = str(period[2:4])
m = str(period[5:7])
d = str(period[8:10])
t = str(period[11:16])
periodnewformat = "{}/{}/{} {}".format(d,m,y,t)
return periodnewformat
def combine_bowstern(bow, stern):
bow_int = 0
stern_int = 0
if bow !="None":
bow_int = int(bow)
if stern !="None":
stern_int = int(stern)
return bow_int + stern_int
if __name__ == "__main__":
source_folder = r'C:\Users\MTTA Standalone\Desktop\Code\csvcombine'
combinecsv(source_folder)
Here is a sample of with and without comma data set:
MSG_TYPE,MMSI,NAME,IMO_NUMBER,CALL_SIGN,LAT_AVG,LON_AVG,PERIOD,SPEED_KNOTS,COG_DEG,HEADING_DEG,NAV_STATUS,NAV_SENSOR,SHIP_AND_CARGO_TYPE,DRAUGHT,DIM_BOW,DIM_STERN,DIM_PORT,DIM_STARBOARD,MMSI_COUNTRY_CD,RECEIVER
1,249830000,ZIM LUANDA,9403229,9HA2029,37.825850,-74.340755,2018-08-01 00:00:00.000,11.5,196.4,198,0,1,71,10.9,197,63,21,11,MT,D05MN-HR-CHIBS1
1,256819000,IOLCOS, DESTINY,9486049,9HA2936,36.833089,-75.672449,2018-08-01 00:00:00.000,9.7,93.1,95,0,1,70,14.4,199,30,13,24,MT,D05MN-NC-MAMBS1

Looping Through Excel Files using Openpyxl getting Worksheet object is not callable error

I am currently trying to loop through different excel files in a folder and amalgamate them into one main file. However, when trying to implement this loop I am getting the error Worksheet object is not callable.
Here is the code (apologies for my poor naming conventions):
import openpyxl
import openpyxl.formula
import pandas
import os
from openpyxl import Workbook
from openpyxl.styles import Alignment
from openpyxl.utils import get_column_letter
from openpyxl import load_workbook
from openpyxl import worksheet
#
# Declaration of openpyxl variables
#
directoryPath = "C:\\Users\\Desktop\\ExcelForReverse\\"
productionPath = "C:\\Users\\Desktop\\TransformedReverse.xlsx"
book_empty = Workbook()
book_empty.save(productionPath)
book_empty.create_sheet("Internals")
book_empty.create_sheet("Externals")
book_empty.create_sheet("All Formatted")
del_sheet = book_empty["Sheet"]
book_empty.remove_sheet(del_sheet)
book_empty.save(productionPath)
sheet_externals_con = book_empty["Externals"]
sheet_internals_con = book_empty["Internals"]
os.chdir(directoryPath)
folder_list = os.listdir(directoryPath)
attempt = 1
rowvar_externals = 2
rowvar_internals = 2
for folders, sub_folders, file in os.walk(directoryPath):
for name in file:
if name.endswith(".xlsx"):
filename = os.path.join(name)
print(filename)
wb_controller = load_workbook(filename, data_only=True)
sheet_controller_internals = wb_controller['Input INT']
sheet_controller_externals = wb_controller['Input EXT']
numOfRows_externals = sheet_controller_externals.max_row
numOfCols_externals = sheet_controller_externals.max_column
numOfRows_internals = sheet_controller_internals.max_row
numOfCols_internals = sheet_controller_internals.max_column
if attempt == 1:
for i in range(1,numOfCols_externals+1):
sheet_externals_con.cell(row = 1, column = i).value = sheet_controller_externals.cell(row = 3, column = i).value
for r in range(4,numOfRows_externals+1):
for i in range(1,numOfCols_externals+1):
sheet_externals_con.cell(row = rowvar_externals, column = i).value = sheet_controller_externals(row = r, column = i).value
rowvar_externals = rowvar_externals + 1
if attempt == 1:
for i in range(1,numOfCols_internals+1):
sheet_internals_con.cell(row = 1, column = i).value = sheet_controller_internals.cell(row = 3, column = i).value
for r in range(4,numOfRows_internals+1):
for i in range(1,numOfCols_internals+1):
sheet_internals_con.cell(row = rowvar_internals, column = i).value = sheet_controller_internals(row = r, column = i).value
rowvar_internals = rowvar_internals + 1
attempt = attempt + 1
print("Worked")
book_empty.save(productionPath)
The Error Message:
File line 48, in
sheet_externals_con.cell(row = rowvar_externals, column = i).value = sheet_controller_externals(row = r, column = i).value
builtins.TypeError: 'Worksheet' object is not callable
Should the line not read:
sheet_externals_con.cell(row = rowvar_externals, column = i).value = sheet_controller_externals.cell(row = r, column = i).value
Where
sheet_controller_externals -> sheet_controller_externals.cell

Categories