Resize columns in Excel document with Python - python

I am currently working with creating an Excel document in Python. I create the excel document but I'm not sure what is wrong with the code that it is not resizing the columns correctly. Does anyone have any ideas?
def writerow(self, vals):
ws = self.workbook.active
this_row = self.numrows
this_col = 1
for v in vals:
cell = ws.cell(row = this_row, column = this_col)
cell.value = v
if ws.column_dimensions[get_column_letter(this_col)] < len(str(v)):
ws.column_dimensions[get_column_letter(this_col)] = len(str(v))
this_col += 1
self.numrows += 1
self.worksheet = ws

I found what I needed for what I am working on.
I needed to add ".width" to the areas where I was checking or assigning column widths.
def writerow(self, vals):
ws = self.workbook.active
this_row = self.numrows
this_col = 1
for v in vals:
cell = ws.cell(row = this_row, column = this_col)
cell.value = v
print "Column Width:"
print ws.column_dimensions[get_column_letter(this_col)].width
if ws.column_dimensions[get_column_letter(this_col)].width < len(str(v)):
ws.column_dimensions[get_column_letter(this_col)].width = len(str(v))
this_col += 1
self.numrows += 1
self.worksheet = ws

Related

How to speed up this search script?

IHello,
I have created a python script which aims to complete an excel file (wb) thanks to the first column of this file composed of many references (about 4000). To complete this excel, my script must search each reference (so use a for loop of list references from reading wb file) in two other excel files transformed into dataframe (df_mbom and df_ebom) and fill the specific cells of wb according to the presence or not of the references in df_mbom and df_ebom. If the reference is found, it is necessary to compare the level of the reference and the following line and fill wb accordingly. The created script works very well and it does the job very well.
But the only problem I have is that it takes more than 6 hours to search and fill wb for 1000 references so to process the 4000 references, it would take almost 24 hours! Do you have any suggestions to speed up this program?
Here is the code used:
from multiprocessing.dummy import Pool
def finding_complete(elt):
elt = str(elt)
pos = mylist_ref.index(elt)
print(pos)
item = r'^' + elt + '$'
df_findings = df_mbom[df_mbom['Article'].str.contains(item, case=True, regex=True)]
if df_findings.shape[0] == 0 :
active_sheet.cell(row = 4+pos, column = 19).value = "NOK"
active_sheet.cell(row = 4+pos, column = 18).value = "NOK"
else :
active_sheet.cell(row = 4+pos, column = 19).value = "OK"
boolean_f = df_findings.drop_duplicates(subset = ['Article'],keep = 'first')
ind = boolean_f.index.to_list()
idx = ind[0]
item1 = df_mbom['Niveau'][idx]
item2 = df_mbom['Niveau'][idx + 1]
if item2 > item1 :
active_sheet.cell(row = 4+pos, column = 18).value = "OK"
else :
active_sheet.cell(row = 4+pos, column = 18).value = "NOK"
df_findings2 = df_ebom[df_ebom['Article'].str.contains(item, case=True, regex=True)]
pos = mylist_ref.index(elt)
if df_findings2.shape[0] == 0 :
active_sheet.cell(row = 4+pos, column = 17).value = "NOK"
else :
boolean_f = df_findings2.drop_duplicates(subset = ['Article'],keep = 'first')
ind = boolean_f.index.to_list()
idx = ind[0]
item1 = df_ebom['Niveau'][idx]
item2 = df_ebom['Niveau'][idx + 1]
if item2 > item1 :
active_sheet.cell(row = 4+pos, column = 17).value = "OK"
else :
active_sheet.cell(row = 4+pos, column = 17).value = "NOK"
if __name__ == '__main__':
start = time.time()
path = '100446099_mbom.xlsx'
df_mbom = pd.read_excel(path, sheet_name=0, header=0)
path = '100446099_ebom.xlsx'
df_ebom = pd.read_excel(path, sheet_name=0, header=0)
location = 'DOC#6TERNORrev0.xlsx'
wb = openpyxl.load_workbook(filename=location) #, data_only=True"
active_sheet = wb["DOC#6 toutes regions"]
#Get cell value and put it in a list
mylist_ref = []
for row in active_sheet.iter_rows(min_row=4, max_row=active_sheet.max_row, min_col=2, max_col=2):
for cell in row:
if cell.value == None :
pass
else:
mylist_ref.append(cell.value)
print("Number of references :")
print(len(mylist_ref))
print(" ")
with Pool() as pool: #os.cpu_count())
pool.map(finding_complete,mylist_ref) # correspond à for elt in mylist_ref: do finding_complete
wb.save(location)
wb.close()
final = time.time()
timer = final - start
print(round(timer, 1))
Thanks in advance for your time.
convert the Excel file to json, procces the json, then write it to Excel.

writing Excel file while using for loop

I am trying to write data to an Excel file, during a for loop.
But what I am getting is a single line containing the last data received by the loop.
I have tried a couple of different methods but came short..
2 tries are list below
Any Ideas ?
def write_excel(x):
workbook = xlsxwriter.Workbook('ID_Num.xlsx')
worksheet = workbook.add_worksheet()
df = pd.DataFrame(
{'ID':[x],
'mail_one':[Email],
'second_mail':[second_mail],
'Num':[Num],
'date':[Date]})
row_num = 0
for key, value in df.items():
worksheet.write(0, row_num, key)
worksheet.write_row(1, row_num, value)
row_num += 1
workbook.close()
#df = pd.DataFrame(
# {'ID':[x],
# 'mail_one':[Email],
# 'second_mail':[second_mail],
# 'Num':[Num],
# 'date':[Date]})
# writer = ExcelWriter('ID_Num.xlsx')
# df.to_excel(writer,'ID_Num',index=False)
# writer.save()
if __name__ == "__main__":
for x in List:
my_dic = {}
my_dict["ID"] = x
my_dict["mail_one"] = Email
my_dict["second_mail"] = second_mail
my_dict["Num"] = str(Num)
my_dict["date"] = Date
print(my_dict)
write_excel(x)
I don't have xlsxwriter so I cannot test. The documentation says that it cannot modify an existing file so I suspect that every iteration of for x in List: you are over-writing your file (workbook = xlsxwriter.Workbook('ID_Num.xlsx')).
You can make multiple files with these changes:
def write_excel(x,i):
workbook = xlsxwriter.Workbook(f'ID_Num{i}.xlsx')
...
# and
for i,x in enumerate(List):
...
write_excel(x,i)
Or you could accumulate multiple dictionaries and pass all of them to your function
data = []
for x in List:
my_dic = {}
...
data.append(my_dic)
write_excel(data)
Changing the function to iterate over those dicts; making a new sheet for each one
def write_excel(data):
workbook = xlsxwriter.Workbook('ID_Num.xlsx')
for sht in data:
worksheet = workbook.add_worksheet()
df = pd.DataFrame(...
row_num = 0
for key, value in df.items():
worksheet.write(...
worksheet.write_row(...
row_num += 1
workbook.close()

Problem extracting content from text files using Python

I am trying to capture the data here in the second table (Field crops) titled "Prices Received, United States,July 2010, with Comparisons". I am using Panda dataframes to capture the table from the text file and then I will output it to a CSV file.
My code is as follows
def find_no_line_start_table(table_title,splited_data):
found_no_lines = []
for index, line in enumerate(splited_data):
if table_title in line:
found_no_lines.append(index)
return found_no_lines
def get_start_data_table(table_start, splited_data):
for index, row in enumerate(splited_data[table_start:]):
if 'Dollars' in row:
return table_start + index
def get_end_table(start_table_data, splited_data ):
for index, row in enumerate(splited_data[start_table_data:]):
if END_TABLE_LINE in row:
return start_table_data + index
def row(l):
l = l.split()
number_columns = 6
if len(l) >= number_columns:
data_row = [''] * number_columns
first_column_done = False
index = 0
for w in l:
if not first_column_done:
data_row[0] = ' '.join([data_row[0], w])
if ':' in w:
first_column_done = True
else:
index += 1
data_row[index] = w
return data_row
def take_table(txt_data):
comodity = []
q = []
w = []
e = []
t = []
p = []
for r in table:
data_row = row(r)
if data_row:
col_1, col_2, col_3, col_4, col_5, col_6 = data_row
comodity.append(col_1)
q.append(col_2)
w.append(col_3)
e.append(col_4)
t.append(col_5)
p.append(col_6)
table_data = {'comodity': comodity, 'q': q,
'w': w, 'e': e, 't': t}
return table_data
And, then I am doing this:
import requests
import pandas as pd
txt_data = requests.get("https://downloads.usda.library.cornell.edu/usda-esmis/files/c821gj76b/6w924d00c/9z903130m/AgriPric-07-30-2010.txt").text
splited_data = txt_data.split('\n')
table_title = 'Prices Received, United States'
END_TABLE_LINE = '-------------------------------------------'
_, table_start,_ = find_no_line_start_table(table_title,splited_data)
start_line = get_start_data_table(table_start, splited_data)
end_line = get_end_table(start_line, splited_data)
table = splited_data[start_line : end_line]
dict_table = take_table(txt_data)
pd.DataFrame(dict_table)
c = pd.DataFrame(dict_table)
IndexError: list assignment index out of range
However, I am getting an error here. Can anyone help me figure out what I am doing wrong?
Cause of error:
data_row is a list of 6 elements.
number_columns = 6
# ...
data_row = [''] * number_columns # [''] * 6
and index will increment with each iteration where first_column_done = True. But first_column_done will be True when : is encountered in a word, i.e
if ':' in w:
first_column_done = True
hence, for each iteration after first_column_done turns True, index will increment until it gets more than 6 which is the bound of list data_row.
def row(l):
l = l.split()
number_columns = 6
if len(l) >= number_columns:
data_row = [''] * number_columns
first_column_done = False
index = 0
for w in l:
if not first_column_done:
data_row[0] = ' '.join([data_row[0], w])
if ':' in w:
first_column_done = True
else:
index += 1
data_row[index] = w # error pos.
In other words, U get this error for each line that contains a number of words greater than 6 - index after the first occurence of : within a word in that line.
Fix:
Use split(':') and list comprehension as well as python tertiary operator.
def row(l):
row = [ col.strip() for col in l.split(':') ]
row[2:] = row[2].split()
return [ row[i] if i < len(row) else '' for i in range(6) ]

Problems porting code from Python 2.7 to 3.6

I have a fragment of code which loads data from a .csv file.
It's written for Python 2.7 but in Python 3.6 does not work.
def load_new_data(self):
full = list()
with open(self.filename, 'rb') as csv_in:
myreader2 = csv.reader(csv_in, delimiter=';')
count = 0
for row in myreader2:
if count == 0:
headers = row[1:]
count += 1
elif count == 1:
count += 1
else:
current_row = row[1:-1]
full.append(current_row)
count += 1
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print '\t Removing incomplete variables.'
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df
the error I get is:
212
213 count = 0
--> 214 for row in myreader2:
215 if count == 0:
216 headers = row[1:]
Error: iterator should return strings, not bytes (did you open the file in
text mode?)
I did try changing the 'rb' to 'r' and 'rt' and even deleting it, as other posts here suggest, but with no success...
try this
def load_new_data(self):
full = list()
with open(self.filename, 'r') as csv_in:
myreader2 = csv.reader(csv_in, delimiter=';')
count = 0
for row in myreader2:
if count == 0:
headers = row[1:]
count += 1
elif count == 1:
count += 1
else:
current_row = row[1:-1]
full.append(current_row)
count += 1
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print ('\t Removing incomplete variables.')
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df
You should try codecs, for open file. Be careful this file encoding.
Sample:
def load_new_data(self):
with codecs.open(self.filename, 'rb', encoding="cp1251") as csv_in: # cp1251 replace for your encoding!
myreader2 = csv.reader(csv_in, delimiter=';')
headers = next(myreader2)[1:]
next(myreader2)
full = [row[1:] for row in myreader2]
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print('\t Removing incomplete variables.')
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df

PyQt: QTableWidget to .xls file

So, I have a QTableWidget that I want to save it to an .xls file using the xlwt module...
Here's the code:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
self.sheet = wbk.add_sheet("sheet")
self.row = 0
self.col = 0
self.add2(self.row, self.col)
wbk.save(filename)
def add2(self, row, col):
for i in range(self.tableWidget.columnCount()):
for x in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(row, col).text())
self.sheet.write(row, col, teext)
row += 1
except AttributeError:
pass
col += 1
But that writes out only the text from cell 0,0 and nothing else...
I think that I have made some serious mistake...
Update:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
self.sheet = wbk.add_sheet("sheet", cell_overwrite_ok=True)
self.add2()
wbk.save(filename)
def add2(self):
row = 0
col = 0
for i in range(self.tableWidget.columnCount()):
for x in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(row, col).text())
self.sheet.write(row, col, teext)
row += 1
except AttributeError:
row += 1
row = 0
col += 1
Solved the problem...
You might also find it more concise and easier to use the output of the range (or xrange) as the indexes for your tableWidget.item call rather than worrying about incrementing your own counters. You might be using the sheet itself in other places in code, but if you're not, it would save you some memory to not assign the sheet to be an attribute variable of your class:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
sheet = wbk.add_sheet("sheet", cell_overwrite_ok=True)
self.add2(sheet)
wbk.save(filename)
def add2(self, sheet):
for currentColumn in range(self.tableWidget.columnCount()):
for currentRow in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(currentRow, currentColumn).text()
sheet.write(currentRow, currentColumn, teext)
except AttributeError:
pass
Because you are using the range command, the currentColumn variable will increment from 0 to columnCount() and currentRow will increment from 0 to currentRow()

Categories