AttributeError - 'Div' object has no attribute 'set_index' - python

I am trying to run this code:
def parse_data(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV or TXT file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
elif 'txt' or 'tsv' in filename:
# Assume that the user upl, delimiter = r'\s+'oaded an excel file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')), delimiter = r'\s+')
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return df
def update_graph(contents, filename):
fig = {
'layout': go.Layout(
plot_bgcolor=colors["graphBackground"],
paper_bgcolor=colors["graphBackground"])
}
if contents:
contents = contents[0]
filename = filename[0]
df = parse_data(contents, filename)
df = df.set_index(df.columns[0])
fig['data'] = df.iplot(asFigure=True, kind='scatter', mode='lines+markers', size=1)
return fig
And get this error:
Traceback (most recent call last):
File "/Users/.../PycharmProjects/pythonProject2/main.py", line 93, in update_graph
df = df.set_index(df.columns[0])
AttributeError: 'Div' object has no attribute 'set_index'
Any ideas what might be wrong? Thanks a lot!

All problem is with your parse_data(). If it can't read file then it runs return html.Div() so running df = parse_data() means df = html.Div() and later you don't check if you really get data in df and df.set_index() means html.Div().set_index().
Maybe better use return None and check this after df = parse_data()
def parse_data(contents, filename):
# ... code ...
try:
# ... code ...
except Exception as e:
print(e)
return None
return df
and later
df = parse_data(contents, filename)
if df is None:
html.Div(['There was an error processing this file.'])
else:
df = df.set_index(df.columns[0])
fig['data'] = df.iplot(asFigure=True, kind='scatter', mode='lines+markers', size=1)
But this still can have problem with fig['data'] when it can't read file.
I can't test your code but maybe it should do assign Div to fig['data']
if df is None:
fig['data'] = html.Div(['There was an error processing this file.'])
else:
df = df.set_index(df.columns[0])
fig['data'] = df.iplot(asFigure=True, kind='scatter', mode='lines+markers', size=1)

Related

Try/Except in for loop failing

I am trying to catch some exceptions when processing files from an AWS s3 bucket. I know that the processing works just fine normally, as the error i get is expected and generated by myself by altering the column names of 1 file. The bucket contains several files that should process like normal, while the 1 file i altered should throw an exception. My desire is to append the filename to a list if it is not processed, print the exception with logging module, and continue processing the rest of the files. This is my code:
for item in settings.keys:
try:
response = settings.client.get_object(Bucket=settings.source_bucket, Key=item)
tmp = pd.read_csv(io.BytesIO(response['Body'].read()), encoding='unicode_escape', sep=None, engine='python')
tmp['account_number'] = item.split('/')[4][:-4]
tmp.columns = tmp.columns.str.strip()
tmp.columns = tmp.columns.map(settings._config['balances']['columns'])
df = pd.concat([df, tmp], ignore_index=False)
except:
settings.unprocessed.append(item)
logger.exception(f'{item} Not Processed')
Before i altered the 1 file, everything processed like it should. By using try/except, i want to catch the exception if a file contains errors, and still process the rest of the files. However, after i altered the 1 file, every single file in the bucket threw an exception, and nothing was processed. Does anyone have any input as to why this happens?
2023-01-25 14:59:56 - ERROR - xxxx.csv Not Processed
Traceback (most recent call last):
File "C:\Users\xxxx\Desktop\xxxx\Python\xxxx\xxxx\xxxx.py", line 19, in balances
df = pd.concat([df, tmp], ignore_index=False)
File "C:\Users\xxxx\Desktop\xxxx\Python\xxxx\xxxx\xxxx\venv\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\xxxx\Desktop\xxxx\Python\xxxx\xxxx\xxxx\venv\lib\site-packages\pandas\core\reshape\concat.py", line 360, in concat
return op.get_result()
File "C:\Users\xxxx\Desktop\xxxx\Python\xxxx\xxxx\xxxx\venv\lib\site-packages\pandas\core\reshape\concat.py", line 591, in get_result
indexers[ax] = obj_labels.get_indexer(new_labels)
File "C:\Users\xxxx\Desktop\xxxx\Python\xxxx\xxxx\xxxx\venv\lib\site-packages\pandas\core\indexes\base.py", line 3721, in get_indexer
raise InvalidIndexError(self._requires_unique_msg)
pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
raise ValueError("cannot reindex on an axis with duplicate labels")
ValueError: cannot reindex on an axis with duplicate labels
UPDATE:
I did the same for some other files, and this works as expected. The error i generated is catched and printed to console, and the filename is appended to a list. This is the working code that does as expected:
for item in settings.keys:
try:
tmp = pd.DataFrame()
response = settings.client.get_object(Bucket=settings.source_bucket, Key=item)
if item.endswith('.csv'):
tmp = pd.read_csv(io.BytesIO(response['Body'].read()), encoding='unicode_escape', sep=None, engine='python')
elif item.endswith('.xlsx'):
tmp = pd.read_excel(io.BytesIO(response['Body'].read()))
tmp['file'] = item.split('/')[4]
tmp.columns = tmp.columns.map(settings._config['account statements']['columns'])
tmp['row'] = tmp.index + 2
tmp.columns = tmp.columns.astype(str)
tmp.rename(columns=lambda x: x.strip())
for col in tmp.columns:
if col.startswith('Beløp'):
settings.statement_currencies[item.split('/')[-1:][0]] = col[-3:]
tmp[col] = tmp[col].astype(str)
tmp[col] = tmp[col].str.replace(',', '.')
tmp[col] = tmp[col].astype(float)
tmp['direction'] = np.where(tmp[col] > 0, 'Incoming', 'Outgoing')
df = pd.concat([df, tmp], ignore_index=False)
except:
settings.unprocessed.append(item)
logger.exception(f'{item} Not Processed')

How to merge excel file without losing its format

So here is a sample of my excel layout:
But after merging it has two header and loses the layout.
Here is my code:
import pandas as pd
import glob
path = r"C:/Users//"
fname = glob.glob(path + "/*.xlsx")
result_DFs1 = pd.DataFrame()
result_DFs2 = pd.DataFrame()
for i in fname:
try:
df1 = pd.read_excel(i,sheet_name = "Test1")
result_DFs1 = pd.concat([result_DFs1, df1])
except:
pass
for i in fname:
try:
df2 = pd.read_excel(i,sheet_name = "Test2")
result_DFs2 = pd.concat([result_DFs2, df2])
except:
pass
with pd.ExcelWriter('pandas_to_excel.xlsx') as writer:
result_DFs1.to_excel (writer, sheet_name='Test1')
result_DFs2.to_excel (writer, sheet_name='Test2')
Is there a way I can just have one header and without losing the excel layout format?
You can keep track of your sheets and only include headers for the first one. Something like:
first = True
for i in fname:
try:
if first:
df1 = pd.read_excel(i,sheet_name = "Test1", skiprows=0, header=0)
first = False
else:
df1 = pd.read_excel(i,sheet_name = "Test1", skiprows=1, header=None)
result_DFs1 = pd.concat([result_DFs1, df1])
except:
pass

How to fix the error: (-2146827864, 'OLE error 0x800a01a8', None, None) error in excel wings

I am using xlwings to create and save workbooks as seperate excel files, but when the workbooks are once created and I have to overwrite them I get this error "error: (-2146827864, 'OLE error 0x800a01a8', None, None)"
try:
excel_app = xw.App(visible=False)
wb = excel_app.books.open(EXCEL_FILE)
wb.app.visible = False
os.chdir(file_path)
print("Excel File Loaded")
for i in range(len(books)):
print("Working on sheet :" , books[i])
answ=os.path.isfile(f'{files[i]}.xlsx')
if answ:
wb_new = xw.Book(f'{files[i]}.xlsx')
wb_new.sheets.add()
final_sheet = wb_new.sheets[1]
wb_new.sheets[sname[i]].delete()
final_sheet.name = sname[i] #for name
sht = wb.sheets[books[i]]
values = sht.used_range.copy()
final_sheet.range("A1").paste("values_and_number_formats")
wb_new.save()
print("Done!")
wb_new.close()
else:
wb_new = xw.Book()
final_sheet = wb_new.sheets[0]
final_sheet.name = sname[i] #for name
sht = wb.sheets[books[i]]
values = sht.used_range.copy()
final_sheet.range("A1").paste("values_and_number_formats")
#wb_new.sheets[0].delete()
wb_new.save(f'{files[i]}.xlsx')
print("Done!")
wb_new.close()
finally:
excel_app.quit()
print("Finished!!!")

How to handle except errors to be saved to a list

I have a routine where I have to read from a Excel which has a column with links to get individual .xlsx/.xls files that people upload into the form containing some information.
My problem is, people do not always upload the correct file format. So I had to create exceptions to handle that. I save the links that have a exception in a list, but I don't know which exception blocked it. Here's my code:
erros = []
for i in links:
try:
name = i[50:]
df = pd.read_excel(i, header = 1, usecols = col_names, encoding = 'utf-8') #usecols = names)
file_name = r"%s\%s" %(pasta_sol,name)
writer = pd.ExcelWriter(file_name , engine='xlsxwriter')
df.to_excel(writer, header = True, index = True)
writer.close()
except (TypeError, IndexError, ValueError, XLRDError, BadZipFile, urllib.error.URLError) as e:
erros.append(i)
There is a way to append to each file that has a exception which one was it? It could be a list or a new df that looks like it:
erros = [['http://abs.company.pdf', 'TypeError'],['http://abs.company.xls','XLRDError']]
or df.
*There are thousands of files to read per day.
Thanks
This is isnt exactly what you wanted but its close enough. Hope it helps
errors = []
for i in links:
try:
name = i[50:]
df = pd.read_excel(i, header = 1, usecols = col_names, encoding = 'utf-8') #usecols = names)
file_name = r"%s\%s" %(pasta_sol,name)
writer = pd.ExcelWriter(file_name , engine='xlsxwriter')
df.to_excel(writer, header = True, index = True)
writer.close()
except (TypeError, IndexError, ValueError, XLRDError, BadZipFile, urllib.error.URLError) as e:
errors.append([file_name, e.args[0]])
print(errors) # doesnt print the error name but the description of the error e.g "division by zero"

iterate over multiple files in my directory

Currently I am grabbing a excel file from a folder with Python just fine; in the below code.. and pushing this to a web form via selenium.
However, I am trying to modify this to continue to go through a directory over multiple files. (there will be many excel files in my 'directory' or 'folder').
main.py
from data.find_pending_records import FindPendingRecords
from vital.vital_entry import VitalEntry
if __name__ == "__main__":
try:
#Instantiates FindPendingRecords then gets records to process
PENDING_RECORDS = FindPendingRecords().get_excel_data()
#Reads excel to map data from excel to vital
MAP_DATA = FindPendingRecords().get_mapping_data()
#Configures Driver for vital
VITAL_ENTRY = VitalEntry()
#Start chrome and navigate to vital website
VITAL_ENTRY.instantiate_chrome()
#Begin processing Records
VITAL_ENTRY.process_records(PENDING_RECORDS, MAP_DATA)
print("All done, Bill")
except Exception as exc:
print(exc)
config.py
FILE_LOCATION = r"C:\Zip\2019.02.12 Data Docs.zip"
UNZIP_LOCATION = r"C:\Zip\Pending"
VITAL_URL = 'http://boringdatabasewebsite:8080/Horrible'
HEADLESS = False
PROCESSORS = 4
MAPPING_DOC = ".//map/mapping.xlsx"
find_pending_records.py
"""Module used to find records that need to be inserted into Horrible website"""
from zipfile import ZipFile
import math
import pandas
import config
class FindPendingRecords:
"""Class used to find records that need to be inserted into Site"""
#classmethod
def find_file(cls):
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
return archive.extract(file.filename, config.UNZIP_LOCATION)
return FileNotFoundError
def get_excel_data(self):
"""Places excel data into pandas dataframe"""
excel_data = pandas.read_excel(self.find_file())
columns = pandas.DataFrame(columns=excel_data.columns.tolist())
excel_data = pandas.concat([excel_data, columns])
excel_data.columns = excel_data.columns.str.strip()
excel_data.columns = excel_data.columns.str.replace("/", "_")
excel_data.columns = excel_data.columns.str.replace(" ", "_")
num_valid_records = 0
for row in excel_data.itertuples():
person = row.PERSON
if person in ("", " ", None) or math.isnan(mrn):
print(f"Invalid record: {row}")
excel_data = excel_data.drop(excel_data.index[row.Index])
else:
num_valid_records += 1
print(f"Processing #{num_valid_records} records")
return self.clean_data_frame(excel_data)
def clean_data_frame(self, data_frame):
"""Cleans up dataframes"""
for col in data_frame.columns:
if "date" in col.lower():
data_frame[col] = pandas.to_datetime(data_frame[col],
errors='coerce', infer_datetime_format=True)
data_frame[col] = data_frame[col].dt.date
data_frame['PERSON'] = data_frame['PERSON'].astype(int).astype(str)
return data_frame
def get_mapping_data(self):
map_data = pandas.read_excel(config.MAPPING_DOC, sheet_name='main')
columns = pandas.DataFrame(columns=map_data.columns.tolist())
return pandas.concat([map_data, columns])
One way is as below (pseudocode)
class FindPendingRecords:
#classmethod
def find_file(cls):
return ["file1", "file2", "file3"]
def __init__(self):
self.files = self.find_file()
def get_excel_data(self):
for excel_data in self.files:
# process your excel_data
yield excel_data
Your main should be
if __name__ == "__main__":
try:
for PENDING_RECORDS in FindPendingRecords().get_excel_data():
# Do operations on PENDING_RECORDS
print (PENDING_RECORDS)
print("All done, Bill")
except Exception as exc:
print(exc)
Your find_file method will be
#classmethod
def find_file(cls):
all_files = list()
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
all_files.append(archive.extract(file.filename, config.UNZIP_LOCATION))
return all_files

Categories