I'm really new to these things so I followed a geekforgeeks tutorial. I have some experience in python, but I couldn't figure out what the problem is.
This is the code.
# Import following modules
import urllib.request
import pandas as pd
from pushbullet import PushBullet
# Get Access Token from pushbullet.com
Access_token = "#.########################"
# Authentication
pb = PushBullet(Access_token)
# All pushes created by you
all_pushes = pb.get_pushes()
# Get the latest push
latest_one = all_pushes[0]
# Fetch the latest file URL link
url = latest_one['file_url']
# Create a new text file for storing
# all the chats
Text_file = "All_Chats.txt"
# Retrieve all the data store into
# Text file
urllib.request.urlretrieve(url, Text_file)
# Create an empty chat list
chat_list = []
# Open the Text file in read mode and
# read all the data
with open(Text_file, mode='r', encoding='utf8') as f:
# Read all the data line-by-line
data = f.readlines()
# Excluded the first item of the list
# first items contains some garbage
# data
final_data_set = data[1:]
# Run a loop and read all the data
# line-by-line
for line in final_data_set:
# Extract the date, time, name,
# message
date = line.split(",")[0]
tim = line.split("-")[0].split(",")[1]
name = line.split(":")[1].split("-")[1]
message = line.split(":")[2][:-0] ##### THIS IS THE LINE 53 #####
# Append all the data in a List
chat_list.append([date, tim, name, message])
# Create a dataframe, for storing
# all the data in a excel file
df = pd.DataFrame(chat_list,
columns = ['Date', 'Time',
'Name', 'Message'])
df.to_excel("BackUp.xlsx", index = False)
This is the error message I am getting.
Traceback (most recent call last):
File "d:\#adress to the file location", line 53, in <module>
message = line.split(":")[2][:-0]
IndexError: list index out of range
I have made a note at the line 53, so as I am just getting started, please excuse for any silly mistakes, point me out anything. I just want to figure this out.
Thanks in advance.🥲
Related
I am trying to extract texts from PDF and compare the info, finally saving it as an excel file. But while I am running it, (the code is given below), I get the error. I have provided the whole Traceback.
`
import pdfminer
import pandas as pd
from time import sleep
from tqdm import tqdm
from itertools import chain
import slate
# List of pdf files to process
pdf_files = ['file1.pdf', 'file2.pdf']
# Create a list to store the text from each PDF
pdf1_text = []
pdf2_text = []
# Iterate through each pdf file
for pdf_file in tqdm(pdf_files):
# Open the pdf file
with open(pdf_file, 'rb') as pdf_now:
# Extract text using slate
text = slate.PDF(pdf_now)
text = text[0].split('\n')
if pdf_file == pdf_files[0]:
pdf1_text.append(text)
else:
pdf2_text.append(text)
sleep(20)
pdf1_text = list(chain.from_iterable(pdf1_text))
pdf2_text = list(chain.from_iterable(pdf2_text))
differences = set(pdf1_text).symmetric_difference(pdf2_text)
## Create a new dataframe to hold the differences
differences_df = pd.DataFrame(columns=['pdf1_text', 'pdf2_text'])
# Iterate through the differences and add them to the dataframe
for difference in differences:
# Create a new row in the dataframe with the difference from pdf1 and pdf2
differences_df = differences_df.append({'pdf1_text': difference if difference in pdf1_text else '',
'pdf2_text': difference if difference in pdf2_text else ''}, ignore_index=True)
# Write the dataframe to an excel sheet
differences_df = differences_df.applymap(lambda x: x.encode('unicode_escape').decode('utf-8') if isinstance(x, str) else x)
differences_df.to_excel('differences.xlsx', index=False, engine='openpyxl')
import openpyxl
import re
# Load the Excel file into a dataframe
df = pd.read_excel("differences.xlsx")
# Create a condition to check the number of words in each cell
for column in ["pdf1_text", "pdf2_text"]:
df[f"{column}_word_count"] = df[column].str.split().str.len()
condition = df[f"{column}_word_count"] < 10
# Drop the rows that meet the condition
df = df[~condition]
for column in ["pdf1_text", "pdf2_text"]:
df = df.drop(f"{column}_word_count", axis=1)
# Save the modified dataframe to a new Excel file
df.to_excel("differences.xlsx", index=False)
This is my code, and below is the error which I am getting. Listing the whole traceback below -
Traceback (most recent call last):
File "c:\Users\lmohandas\stuff\1801pdfs\slatetrial.py", line 22, in <module>
text = slate.PDF(pdf_now)
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\slate\classes.py", line 61, in __init__
self.doc = PDFDocument(self.parser, password)
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\pdfdocument.py", line 558, in __init__
self.read_xref_from(parser, pos, self.xrefs)
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\pdfdocument.py", line 789, in read_xref_from
xref.load(parser)
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\pdfdocument.py", line 242, in load
self.data = stream.get_data()
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\pdftypes.py", line 292, in get_data
self.decode()
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\pdftypes.py", line 283, in decode
data = apply_png_predictor(pred, colors, columns, bitspercomponent, data)
File "C:\Users\lmohandas\AppData\Local\Programs\Python\Python310\lib\site-packages\pdfminer\utils.py", line 46, in apply_png_predictor
raise ValueError("Unsupported predictor value: %d"%ft)
TypeError: %d format: a real number is required, not bytes
i am using streamlit with python in order to allow user to upload multiple files than to show the content as a dataframe.
But first i need to check if its csv type or xls, and dislay the type and name.
The problem is that when it comes to check what is the file type it crash and display the below error:
AttributeError: 'list' object has no attribute 'name'
Traceback:
File "F:\AIenv\lib\site-packages\streamlit\script_runner.py", line 333, in _run_script
exec(code, module.__dict__)
File "f:\AIenv\streamlit\app2.py", line 766, in <module>
main()
File "f:\AIenv\streamlit\app2.py", line 300, in main
file_details = {"filename":data.name,
Note if i upload a single file the script run with no error.
code:
import streamlit as st
import pandas as pd
def main():
if st.sidebar.checkbox("Multiple Files"):
data = st.sidebar.file_uploader('Multiple Excel files', type=["csv","xlsx","xls"],
accept_multiple_files=True)
for file in data:
file.seek(0)
elif st.sidebar.checkbox("Single File"):
data = st.sidebar.file_uploader("Upload Dataset",type=["csv","xlsx","xls"])
if data is not None:
# display the name and the type of the file
file_details = {"filename":data.name,
"filetype":data.type
}
st.write(file_details)
if __name__=='__main__':
main()
You are trying to access name and type of list of files if "multiple files" is checked. I suggest unifying of your 'data' structure and making it always a list. Then you have to iterate over it:
import streamlit as st
import pandas as pd
def main():
if st.sidebar.checkbox("Multiple Files"):
data = st.sidebar.file_uploader('Multiple Excel files', type=["csv","xlsx","xls"],
accept_multiple_files=True)
for file in data:
file.seek(0)
elif st.sidebar.checkbox("Single File"):
data = st.sidebar.file_uploader("Upload Dataset",type=["csv","xlsx","xls"])
if data is not None:
data = [data]
if data is not None:
for file in data:
# display the name and the type of the file
file_details = {"filename":file.name,
"filetype":file.type
}
st.write(file_details)
if __name__=='__main__':
main()
data in this case should be a list type (you can check with type(data)).
What you could do is change:
if data is not None:
# display the name and the type of the file
file_details = {"filename":data.name,
"filetype":data.type
}
st.write(file_details)
To:
if data is not None and len(data) > 0:
st.write("filename {} | filetype: {}".format(data[i].name, data[i].type) for i in range(len(data)))
I am trying to loop through a JSON file and add data for all the bike stations listed into the dictionary instead of only the final bikestation, which is 502. So if my code was working the way I want it to, The resulting dictionary would have JSON data for all 5 bikestations. I am very new to this and any help is appreciated. here is my code so far:
import json
import urllib.request, urllib.parse, urllib.error
import datetime
import pymongo
stations = (123, 258, 290, 501, 502)
chicagoBikesURL = "https://data.cityofchicago.org/resource/eq45-8inv.json?"
for station in stations:
paramD = dict()
paramD["id"] = station
paramD["$order"] = "timeStamp DESC"
paramD["$limit"] = 2
params = urllib.parse.urlencode(paramD)
print(chicagoBikesURL+params)
document = urllib.request.urlopen(chicagoBikesURL+params)
# get all of the text from the document
text = document.read().decode()
if document.getcode() != 200 :
print("Error code=",document.getcode(), chicagoBikesURL+params)
text = "{}"
# Load the JSON text from the URL into a dictionary using the json library
js = json.loads(text)
# Output first Record
print("\nFirst BikeStation")
print(js[0])
# Write JSON data to a file
fdumps = open('bike_data.txt', "w")
fdumps.write(json.dumps(js).strip())
bikeStation_list.append(js)
# Make sure you close the file otherwise data may not be saved
fdumps.close()
#Process JSON Data
bikeStation_list = []`enter code here`
"w" mode on the file open always creates a new file. So each one overwrites the previous so you will only see the final item.
To append to the file, use:
fdumps = open('bike_data.txt', "a")
My question is that I have assigned one task in that I have to read excel document and store that data into XML file. So I have done one code in python for that. But it giving me error when I am writing an XML file.
#!/usr/bin/python
import xlrd
import xml.etree.ElementTree as ET
workbook = xlrd.open_workbook('anuja.xls')
workbook = xlrd.open_workbook('anuja.xlsx', on_demand = True)
worksheet = workbook.sheet_by_index(0)
first_row = [] # Header
for col in range(worksheet.ncols):
first_row.append( worksheet.cell_value(0,col) )
# tronsform the workbook to a list of dictionnaries
data =[]
for row in range(1, worksheet.nrows):
elm = {}
for col in range(worksheet.ncols):
elm[first_row[col]]=worksheet.cell_value(row,col)
data.append(elm)
for set1 in data :
f = open('data.xml', 'w')
f.write("<Progress>%s</Progress>" % (set1[0]))
f.write("<P>%s</P>" % (set1[1]))
f.write("<Major>%s</Major>" % (set1[2]))
f.write("<pop>%s</pop>" % (set1[3]))
f.write("<Key>%s</Key>" % (set1[4]))
f.write("<Summary>%s</Summary>" % (set1[5]))
Error is
Traceback (most recent call last):
File "./read.py", line 23, in <module>
f.write("<Progress>%s</Progress>" % (set1[0]))
KeyError: 0
So the error message actually tells you that there is no key '0' that you try to write to the XML file.
Some more Tipps:
You open the XML file in every iteration of your loop which will fail
There are easier ways to create XML files, check out this article https://pythonadventures.wordpress.com/2011/04/04/write-xml-to-file/
You should check out a python debugger, it will make it easy for you to investigate e.g. what your data loop looks from the inside. I like ipdb most https://pypi.python.org/pypi/ipdb
All, I am just getting started with python and I thought this may be a good time to see if it can help me automate a lot of repeative tasks I have to complete.
I am using a script I found on Gethub that will search and replace and then write a new file with the name output.txt. It works fine, but Since I have lots of these files I need to be able to name them different names based on the Text in the final modified document.
To make this a little more difficult the name of the file is based on the text I will be modifing the document with.
So, basically after I run this script, I have a file that sits at C:\Program Files (x86)\Python35-32\Scripts\Text_Find_and_Replace\Result with the name of output.txt in this Modified new file I would like to name it based on what text is in a particular line of the file. So in the modified file of output.txt I would like to have it rename the file to the plain text in line 35.
I have figured out how to read the line within the file using
import linecache
line = linecache.getline("readme.txt", 1)
line
>>> line
'This is Python version 3.5.1\n'
I just need to figure out how to rename the file based on the variable "line"
Any Ideas?
#!/usr/bin/python
import os
import sys
import string
import re
## information/replacingvalues.txt this is the text of the values you want in your final document
information = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\information/replacingvalues.txt", 'r')
#Text_Find_and_Replace\Result\output.txt This is the dir and the sum or final document
output = open("C:\Program Files (x86)\Python35-32\Scripts\Text_Find_and_Replace\Result\output.txt", 'w')
#field = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Field/values.txt"
# Field is the file or words you will be replacing
field = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Field/values.txt", 'r')
##
##
# modified code for autohot key
# Text_Find_and_Replace\Test/remedy line 1.ahk is the original doc you want modified
with open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Test/remedy line 1.ahk", 'r') as myfile:
inline = myfile.read()
#orig code
##with open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Test/input.txt", 'r') as myfile:
## inline = myfile.read()
informations = []
fields = []
dictionary = {}
i = 0
for line in information:
informations.append(line.splitlines())
for lines in field:
fields.append(lines.split())
i = i+1;
if (len(fields) != len(informations) ):
print ("replacing values and values have different numbers")
exit();
else:
for i in range(0, i):
rightvalue = str(informations[i])
rightvalue = rightvalue.strip('[]')
rightvalue = rightvalue[1:-1]
leftvalue = str(fields[i])
leftvalue = leftvalue.strip('[]')
leftvalue = leftvalue.strip("'")
dictionary[leftvalue] = rightvalue
robj = re.compile('|'.join(dictionary.keys()))
result = robj.sub(lambda m: dictionary[m.group(0)], inline)
output.write(result)
information.close;
output.close;
field.close;
I figured out how...
import os
import linecache
linecache.clearcache()
newfilename= linecache.getline("C:\python 3.5/remedy line 1.txt",37)
filename = ("C:\python 3.5/output.ahk")
os.rename(filename, newfilename.strip())
linecache.clearcache()