Program crashes during reading text file - python

def process_file(self):
error_flag = 0
line_count = 0
log_file = self.file_name
pure_name = log_file.strip()
# print('Before opening file ',pure_name)
logfile_in = open(pure_name, 'r') # Read file
lines = logfile_in.readlines()
# print('After reading file enteries ', pure_name)
Error Message
Traceback (most recent call last):
File "C:\Users\admin\PycharmProjects\BackupLogCheck\main.py", line 49, in <module>
backupLogs.process_file()
File "C:\Users\admin\PycharmProjects\BackupLogCheck\main.py", line 20, in process_file
lines = logfile_in.readlines()
File "C:\Users\admin\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 350: character maps to <undefined>
Process finished with exit code 1
Line 49 is where I call above method. But I have traced that it crashes at reading the file. I have checked the file; it has just text in it. I don't know if there are some characters which it doesn't like on reading entries. I am running on Windows 10.
I am new to Python, any suggestion how to find/correct the issue?

Try the file name in string format
logfile_in = open('pure_name', 'r') # Read file
lines = logfile_in.readlines()
print(lines)
output
['test line one\n', 'test line two']
or
logfile_in = open('pure_name', 'r') # Read file
lines = logfile_in.readlines()
for line in lines:
print(line)
output
test line one
test line two

Related

UnicodeDecodeError when I want to convert dbf files to csv

I wrote a little program to convert dbf files to csv and it works on 80 % files.
When I try to convert whole folder I get error on last 2 dbf files.
UnicodeDecodeError: 'ascii' codec can't decode byte 0x88 in position 14: ordinal not in range(128)
This is my program:
import csv
from dbfread import DBF
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
def dbf_to_csv(dbf_table_pth):#Input a dbf, output a csv, same name, same path, except extension
csv_fn = dbf_table_pth[:-4]+ ".csv" #Set the csv file name
table = DBF(dbf_table_pth)# table variable is a DBF object
with open(csv_fn, 'w',encoding = 'utf-8', newline = '') as f:# create a csv file, fill it with dbf content
writer = csv.writer(f)
writer.writerow(table.field_names)# write the column name
for record in table:# write the rows
writer.writerow(list(record.values()))
return csv_fn# return the csv name
listOfFiles = []
def get_filenames():
Tk().withdraw()
print("Initializing Dialogue... \\nPlease select a file.")
tk_filenames = filedialog.askdirectory()
tempDir = tk_filenames
return tempDir
choosen_dir = get_filenames()
os.chdir(choosen_dir)
for file in glob.glob("*.dbf"):
listOfFiles.append(file)
for file in listOfFiles:
dbf_to_csv(file)
Here I paste Trackback:
Traceback (most recent call last):
File "C:\Pliki po Dawidzie\Converter\main.py", line 35, in <module>
dbf_to_csv(file)
File "C:\Pliki po Dawidzie\Converter\main.py", line 15, in dbf_to_csv
for record in table:# write the rows
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\dbf.py", line 314, in _iter_records
items = [(field.name,
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\dbf.py", line 315, in <listcomp>
parse(field, read(field.length))) \
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 79, in parse
return func(field, data)
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 87, in parseC
return self.decode_text(data.rstrip(b'\0 '))
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 45, in decode_text
return decode_text(text, self.encoding, errors=self.char_decode_errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x88 in position 14: ordinal not in range(128)
Thanks for any help help.

Converting JSON file to CSV file

I am trying to convert a JSON file into a CSV file. My code is down below. However, I keep getting this error:
Traceback (most recent call last):
File "C:\Users\...\PythonParse.py", line 42, in <module>
writer.writerow(data)
File "C:\Documents and Settings\...\Python37\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 38409-38412: character maps to <undefined>
import json
import gzip
import csv
outfile = open("VideoGamesMeta.csv","w")
writer = csv.writer(outfile)
data = []
items = []
names = []
checkItems = False;
checkUsers = False;
numItems = []
numUsers = []
for line in open("meta_Video_Games.json","r",encoding="utf-8"):
results = (json.loads(line))
if 'title' in results:
if 'asin' in results:
name = results['title']
item = results['asin']
data = [item,name]
writer.writerow(data)
items.append(item)
names.append(name)

Trouble scanning list for duplicates

Hey so i want to scan this text file of emails and if two of the same emails pop up i want it to be printed if only 1 email is on the list i dont want it to be printed.
It worked for a different text file but now its saying traceback error???
#note make sure found.txt and list.txt are in the 'include' for pycharmfrom collect ions import Counter
print("Welcome DADDY")
with open('myheritage-1-million.txt') as f:
c=Counter(c.strip().lower() for c in f if c.strip()) #for case-insensitive search
for line in c:
if c[line] > 1:
print(line)
ERROR:
rs/dcaputo/PycharmProjects/searchtoolforrhys/venv/include/search.py
Welcome DADDY
Traceback (most recent call last):
File "/Users/dcaputo/PycharmProjects/searchtoolforrhys/venv/include/search.py", line 5, in <module>
c = Counter(c.strip().lower() for c in f if c.strip()) #for case-insensitive search
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/collections/__init__.py", line 566, in __init__
self.update(*args, **kwds)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/collections/__init__.py", line 653, in update
_count_elements(self, iterable)
File "/Users/dcaputo/PycharmProjects/searchtoolforrhys/venv/include/search.py", line 5, in <genexpr>
c = Counter(c.strip().lower() for c in f if c.strip()) #for case-insensitive search
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc5 in position 2668: invalid continuation byte
Process finished with exit code 1
a list of all emails that are shown up 2 times in that whole text file
The key is the error message at the end:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc5 in position 2668: invalid continuation byte
This error can occur when trying to read a non-text file as text. Your file could be corrupted somehow and has some data (at position 2668) in it that can't be read as text.

Python Unicode decode error- Not able to run script even after suggested correction

I am using python 2.7.9 to create excel sheet using tab delimited text files; however I am getting problem while running this python script
#!/usr/bin/env python
# encoding=utf8
import xlwt
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')
wb = xlwt.Workbook()
path = "/home/Final_analysis/"
#print(os.listdir())
lis = os.listdir(path)
sheetnumber = 1
for x in lis:
if os.path.isfile(x)==True:
extension = os.path.splitext(x)
print(extension[1])
if extension[1] == '.txt':
#print("Yes")
ws = wb.add_sheet(extension[0])
row = 0
column = 0
a = open(x)
while True:
a1 = a.readline()
if len(a1)==0:
break
data = a1.split("\t")
for z in data:
ws.write(row,column,z)
column += 1
column = 0
row += 1
sheetnumber+=1
else:
pass
wb.save("Ronic.xls")
I am getting following error
Traceback (most recent call last):
File "home/Final_analysis/combine_excel_v2.py", line 39, in <module>
wb.save("Ronic.xls")
File "/usr/local/lib/python2.7/site-packages/xlwt/Workbook.py", line 710, in save
doc.save(filename_or_stream, self.get_biff_data())
File "/usr/local/lib/python2.7/site-packages/xlwt/Workbook.py", line 674, in get_biff_data
shared_str_table = self.__sst_rec()
File "/usr/local/lib/python2.7/site-packages/xlwt/Workbook.py", line 636, in __sst_rec
return self.__sst.get_biff_record()
File "/usr/local/lib/python2.7/site-packages/xlwt/BIFFRecords.py", line 77, in get_biff_record
self._add_to_sst(s)
File "/usr/local/lib/python2.7/site-packages/xlwt/BIFFRecords.py", line 92, in _add_to_sst
u_str = upack2(s, self.encoding)
File "/usr/local/lib/python2.7/site-packages/xlwt/UnicodeUtils.py", line 50, in upack2
us = unicode(s, encoding)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 83: ordinal not in range(128)
I have used answer given in thread How to fix: "UnicodeDecodeError: 'ascii' codec can't decode byte"
But it didn't work.
problem is at wb.save() command
Setting the encoding at the top of your program is to handle non-ascii characters in your code, not your data. sys.setdefaultencoding('utf8') is not intended to be used in ordinary programs and does more harm than good.
To fix the problem, tell xlwt about the encoding to use.
Change this line:
wb = xlwt.Workbook()
to this:
wb = xlwt.Workbook(encoding="UTF-8")

ip2location python library error

I am trying to use a file to read ip addresses and then find out corresponding location of that address
import IP2Location;
IP2LocObj = IP2Location.IP2Location();
IP2LocObj.open("data/IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER-MOBILE-ELEVATION-USAGETYPE.BIN");
#t=open('output.txt','w');
t=open('test_ip','r');
Line=t.readline();
While line:
rec = IP2LocObj.get_all(Line);
Line=t.readline();
print rec.country_short
error is coming here
Traceback (most recent call last):
File "myprogram.py", line 8, in <module>
rec = IP2LocObj.get_all(t);
File "/home/networkgroup/Downloads/IP2Location-Python-master/IP2Location.py", line 219, in get_all
return self._get_record(addr)
File "/home/networkgroup/Downloads/IP2Location-Python-master/IP2Location.py", line 364, in _get_record
ipv = self._parse_addr(ip)
File "/home/networkgroup/Downloads/IP2Location-Python-master/IP2Location.py", line 357, in _parse_addr
socket.inet_pton(socket.AF_INET, addr)
TypeError: inet_pton() argument 2 must be string, not file
This code is giving error.You can check out the sample code here http://www.ip2location.com/developers/python
Please try the new Python codes below.
import IP2Location;
IP2LocObj = IP2Location.IP2Location();
IP2LocObj.open("IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER-MOBILE-ELEVATION-USAGETYPE-SAMPLE.BIN"); # This is sample database
with open('test_ip.txt') as f: # file containing ip addresses
for line_terminated in f:
line = line_terminated.rstrip('\r\n'); # strip newline
if line: # non-blank lines
print line
rec = IP2LocObj.get_all(line);
print rec.country_short

Categories