Converting JSON file to CSV file - python

I am trying to convert a JSON file into a CSV file. My code is down below. However, I keep getting this error:
Traceback (most recent call last):
File "C:\Users\...\PythonParse.py", line 42, in <module>
writer.writerow(data)
File "C:\Documents and Settings\...\Python37\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 38409-38412: character maps to <undefined>
import json
import gzip
import csv
outfile = open("VideoGamesMeta.csv","w")
writer = csv.writer(outfile)
data = []
items = []
names = []
checkItems = False;
checkUsers = False;
numItems = []
numUsers = []
for line in open("meta_Video_Games.json","r",encoding="utf-8"):
results = (json.loads(line))
if 'title' in results:
if 'asin' in results:
name = results['title']
item = results['asin']
data = [item,name]
writer.writerow(data)
items.append(item)
names.append(name)

Related

Program crashes during reading text file

def process_file(self):
error_flag = 0
line_count = 0
log_file = self.file_name
pure_name = log_file.strip()
# print('Before opening file ',pure_name)
logfile_in = open(pure_name, 'r') # Read file
lines = logfile_in.readlines()
# print('After reading file enteries ', pure_name)
Error Message
Traceback (most recent call last):
File "C:\Users\admin\PycharmProjects\BackupLogCheck\main.py", line 49, in <module>
backupLogs.process_file()
File "C:\Users\admin\PycharmProjects\BackupLogCheck\main.py", line 20, in process_file
lines = logfile_in.readlines()
File "C:\Users\admin\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 350: character maps to <undefined>
Process finished with exit code 1
Line 49 is where I call above method. But I have traced that it crashes at reading the file. I have checked the file; it has just text in it. I don't know if there are some characters which it doesn't like on reading entries. I am running on Windows 10.
I am new to Python, any suggestion how to find/correct the issue?
Try the file name in string format
logfile_in = open('pure_name', 'r') # Read file
lines = logfile_in.readlines()
print(lines)
output
['test line one\n', 'test line two']
or
logfile_in = open('pure_name', 'r') # Read file
lines = logfile_in.readlines()
for line in lines:
print(line)
output
test line one
test line two

UnicodeDecodeError when I want to convert dbf files to csv

I wrote a little program to convert dbf files to csv and it works on 80 % files.
When I try to convert whole folder I get error on last 2 dbf files.
UnicodeDecodeError: 'ascii' codec can't decode byte 0x88 in position 14: ordinal not in range(128)
This is my program:
import csv
from dbfread import DBF
from tkinter import filedialog
from tkinter import *
import os, sys
import glob
def dbf_to_csv(dbf_table_pth):#Input a dbf, output a csv, same name, same path, except extension
csv_fn = dbf_table_pth[:-4]+ ".csv" #Set the csv file name
table = DBF(dbf_table_pth)# table variable is a DBF object
with open(csv_fn, 'w',encoding = 'utf-8', newline = '') as f:# create a csv file, fill it with dbf content
writer = csv.writer(f)
writer.writerow(table.field_names)# write the column name
for record in table:# write the rows
writer.writerow(list(record.values()))
return csv_fn# return the csv name
listOfFiles = []
def get_filenames():
Tk().withdraw()
print("Initializing Dialogue... \\nPlease select a file.")
tk_filenames = filedialog.askdirectory()
tempDir = tk_filenames
return tempDir
choosen_dir = get_filenames()
os.chdir(choosen_dir)
for file in glob.glob("*.dbf"):
listOfFiles.append(file)
for file in listOfFiles:
dbf_to_csv(file)
Here I paste Trackback:
Traceback (most recent call last):
File "C:\Pliki po Dawidzie\Converter\main.py", line 35, in <module>
dbf_to_csv(file)
File "C:\Pliki po Dawidzie\Converter\main.py", line 15, in dbf_to_csv
for record in table:# write the rows
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\dbf.py", line 314, in _iter_records
items = [(field.name,
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\dbf.py", line 315, in <listcomp>
parse(field, read(field.length))) \
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 79, in parse
return func(field, data)
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 87, in parseC
return self.decode_text(data.rstrip(b'\0 '))
File "C:\Users\maciej.olech\PycharmProjects\pythonProject\venv\lib\site-packages\dbfread\field_parser.py", line 45, in decode_text
return decode_text(text, self.encoding, errors=self.char_decode_errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x88 in position 14: ordinal not in range(128)
Thanks for any help help.

how can I import this file I am getting the error

I am importing csv file for cleaning purpose but pycharm showing me this error
I have tried encoding format but it didn't work
import csv
txt1 = ""
txt2 = ""
i = 0
with open('data.csv',encoding='cp1252') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
i += 10
print(i)
txt1 = str(row['posts'])
print(txt1)
#print(row['type'], row['posts'])
My Traceback:
> Traceback (most recent call last):
> File "C:/Users/Administrator/PycharmProjects/mosh/clean.py", line 7, in <module>
> for row in reader:
> File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\csv.py",
> line 112, in __next__
> row = next(self.reader)
> File `enter code here`"C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\encodings\cp1252.py",
> line 23, in decode
> return codecs.charmap_decode(input,self.errors,decoding_table)[0]
> UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 2409: character maps to <undefined>
>
> Process finished with exit code 1

jieba.analyse: 'generator' object has no attribute 'decode'

I have to encode that json file by utf-8 and use a generator to get content. when I tried to run it, there is an AttributeError:
Traceback (most recent call last):
File "F:\Files\python\yiyouhome\WordSeg\json_load.py", line 25, in <module>
tags = jieba.analyse.extract_tags(content_seg,topK = top_K, withWeight = False, allowPOS = allow_pos)
File "C:\Users\ThinkPad\AppData\Local\Programs\Python\Python36\lib\site-packages\jieba\analyse\tfidf.py", line 94, in extract_tags
for w in words:
File "C:\Users\ThinkPad\AppData\Local\Programs\Python\Python36\lib\site-packages\jieba\posseg\__init__.py", line 249, in cut
for w in self.__cut_internal(sentence, HMM=HMM):
File "C:\Users\ThinkPad\AppData\Local\Programs\Python\Python36\lib\site-packages\jieba\posseg\__init__.py", line 217, in __cut_internal
sentence = strdecode(sentence)
File "C:\Users\ThinkPad\AppData\Local\Programs\Python\Python36\lib\site-packages\jieba\_compat.py", line 37, in strdecode
sentence = sentence.decode('utf-8')
AttributeError: 'generator' object has no attribute 'decode'
Why does this happen?
At first:
Traceback (most recent call last):
File "F:\Files\python\yiyouhome\WordSeg\json_load.py", line 10, in <module>
json_data = open('spider_raw.json',encoding = 'gbk').read() #,encoding = 'utf-8'
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa3 in position 74: illegal multibyte sequence
So i add encoding = 'utf-8' to fix it.
Here is my code:
import json
import jieba.analyse
import jieba.posseg as pseg
json_data = open('spider_raw.json',encoding = 'utf-8').read()
data = json.loads(json_data)
top_K = 20
allow_pos = ('nr',)
def getcontent(spiderlist):
for k,v in spiderlist.items():
for item in v['talk_mutidetails']:
yield(item['cotent'])
#def getcontenttopic(spiderlist):
item = getcontent(data)
content_seg = pseg.cut(item)
tags = jieba.analyse.extract_tags(content_seg,topK = top_K, withWeight = False, allowPOS = allow_pos)
for t in tags:
print(t)

CSV Error: Line contains NULL byte

I am trying to read a csv file. Here is my code:
import csv
openFile = open('example.csv')
fileRead = csv.reader(openFile)
fileRead = list(fileRead)
print fileRead[0]
I am getting this error:
Traceback (most recent call last):
File "C:/Users/bob/PycharmProjects/Random/random.py", line 5, in <module>
fileRead = list(fileRead)
_csv.Error: line contains NULL byte

Categories