Reading scraped json file into python - python

I'm trying to read a JSON file I've scraped from a website into Python, with the eventual aim of turning it into a csv file for later statistical analysis.
An example of the file I want to read is:
globals.jsonpCallback('/feed/match/1-1-8vOt3JMq-1-2-yj0a9.dat', {"s":1,"d":{"bt":1,"sc":2,"st":{"notice":"finished","status-name":"Finished","result-name":"<p id=\"event-status\" class=\"result\"><span class=\"bold\" id=\"event-status-finished\">Final result 3:1<\/span><\/p>"},"oddsdata":{"back":{"E-1-2-0-0-0":{"handicapType":0,"handicapValue":"0.00","odds":{"49":{"0":1.32,"2":7,"1":4.8},"46":{"0":1.33,"2":7.3,"1":4.5},"9":{"0":1.25,"2":12,"1":5},"16":{"0":1.29,"2":11,"1":5},"164":{"0":1.31,"2":7.59,"1":4.5},"18":{"0":1.24,"2":11.6,"1":5.25},"23":{"0":1.27,"2":11.23,"1":4.75},"26":{"0":1.3,"2":10.5,"1":5},"5":{"0":1.23,"2":12,"1":5.75},"44":{"0":1.14,"2":4,"1":2.52},"1":{"0":1.3,"2":8.5,"1":4.4},"8":{"0":1.25,"2":9,"1":5},"3":{"0":1.25,"2":11,"1":4.5},"31":{"0":1.25,"2":9,"1":4.5},"60":{"0":1.29,"2":8.5,"1":4.5},"15":{"0":1.28,"2":10,"1":5.4},"34":{"0":1.25,"2":10,"1":5},"111":{"0":1.25,"2":10,"1":4.84},"68":{"0":1.26,"2":9.75,"1":4.84},"105":{"0":1.28,"2":8,"1":4.5},"71":{"0":1.3,"2":9,"1":5},"28":{"0":1.25,"2":12,"1":5.5},"33":{"0":1.3,"2":9.25,"1":4.84},"14":{"0":1.22,"2":10.93,"1":5.05},"21":{"0":1.33,"2":7.5,"1":4.33},"30":{"0":1.22,"2":8.5,"1":5.5},"24":{"0":1.3,"2":9,"1":4.95},"32":{"0":1.25,"2":9.5,"1":4.9},"2":{"0":1.25,"2":10,"1":5},"90":{"0":1.28,"2":10.25,"1":4.8},"56":{"0":1.3,"2":9.8,"1":4.8},"41":{"0":1.3,"2":8.9,"1":4.5},"128":{"0":1.27,"2":12,"1":5.5},"75":{"0":1.2,"2":10.5,"1":5.2}},"mixedParameterId":0,"OutcomeID":{"0":"s3nfxv464x0x1o7d5","2":"s3nfxv464x0x1o7d6","1":"s3nfxv498x0x0"},"opening_odds":{"49":{"0":null,"2":null,"1":null},"46":{"0":null,"2":null,"1":null},"9":{"0":null,"2":null,"1":null},"16":{"0":1.22,"2":null,"1":null},"164":{"0":null,"2":null,"1":null},"18":{"0":1.31,"2":9.8,"1":5.65},"23":{"0":null,"2":4.75,"1":11.23},"26":{"0":1.25,"2":10,"1":5.25},"5":{"0":1.22,"2":12.5,"1":5.5},"44":{"0":null,"2":null,"1":null},"1":{"0":null,"2":null,"1":null},"8":{"0":null,"2":null,"1":null},"3":{"0":null,"2":null,"1":null},"31":{"0":null,"2":null,"1":null},"60":{"0":null,"2":null,"1":null},"15":{"0":1.25,"2":null,"1":4.5},"34":{"0":null,"2":null,"1":null},"111":{"0":null,"2":null,"1":null},"68":{"0":1.27,"2":9.15,"1":4.65},"105":{"0":null,"2":null,"1":null},"71":{"0":1.29,"2":8,"1":4.5},"28":{"0":1.22,"2":10,"1":5},"33":{"0":1.4,"2":6.65,"1":4.25},"14":{"0":1.32,"2":7.8,"1":4.42},"21":{"0":null,"2":null,"1":null},"30":{"0":null,"2":null,"1":null},"24":{"0":null,"2":null,"1":null},"32":{"0":1.3,"2":8.5,"1":4.75},"2":{"0":1.35,"2":7.75,"1":4.3},"90":{"0":null,"2":null,"1":null},"56":{"0":null,"2":11,"1":4.59},"41":{"0":null,"2":null,"1":null},"128":{"0":1.25,"2":null,"1":5},"75":{"0":1.25,"2":8.9,"1":4.8}},"opening_change_time":{"49":{"0":false,"2":false,"1":false},"46":{"0":false,"2":false,"1":false},"9":{"0":false,"2":false,"1":false},"16":{"0":false,"2":false,"1":false},"164":{"0":false,"2":false,"1":false},"18":{"0":false,"2":false,"1":false},"23":{"0":false,"2":false,"1":false},"26":{"0":false,"2":false,"1":false},"5":{"0":false,"2":false,"1":false},"44":{"0":false,"2":false,"1":false},"1":{"0":false,"2":false,"1":false},"8":{"0":false,"2":false,"1":false},"3":{"0":false,"2":false,"1":false},"31":{"0":false,"2":false,"1":false},"60":{"0":false,"2":false,"1":false},"15":{"0":false,"2":false,"1":false},"34":{"0":false,"2":false,"1":false},"111":{"0":false,"2":false,"1":false},"68":{"0":false,"2":false,"1":false},"105":{"0":false,"2":false,"1":false},"71":{"0":false,"2":false,"1":false},"28":{"0":false,"2":false,"1":false},"33":{"0":false,"2":false,"1":false},"14":{"0":false,"2":false,"1":false},"21":{"0":false,"2":false,"1":false},"30":{"0":false,"2":false,"1":false},"24":{"0":false,"2":false,"1":false},"32":{"0":false,"2":false,"1":false},"2":{"0":false,"2":false,"1":false},"90":{"0":false,"2":false,"1":false},"56":{"0":false,"2":false,"1":false},"41":{"0":false,"2":false,"1":false},"128":{"0":false,"2":false,"1":false},"75":{"0":false,"2":false,"1":false}},"opening_volume":{"49":{"0":null,"2":null,"1":null},"46":{"0":null,"2":null,"1":null},"9":{"0":null,"2":null,"1":null},"16":{"0":null,"2":null,"1":null},"164":{"0":null,"2":null,"1":null},"18":{"0":null,"2":null,"1":null},"23":{"0":null,"2":null,"1":null},"26":{"0":null,"2":null,"1":null},"5":{"0":null,"2":null,"1":null},"44":{"0":null,"2":null,"1":null},"1":{"0":null,"2":null,"1":null},"8":{"0":null,"2":null,"1":null},"3":{"0":null,"2":null,"1":null},"31":{"0":null,"2":null,"1":null},"60":{"0":null,"2":null,"1":null},"15":{"0":null,"2":null,"1":null},"34":{"0":null,"2":null,"1":null},"111":{"0":null,"2":null,"1":null},"68":{"0":null,"2":null,"1":null},"105":{"0":null,"2":null,"1":null},"71":{"0":null,"2":null,"1":null},"28":{"0":null,"2":null,"1":null},"33":{"0":null,"2":null,"1":null},"14":{"0":null,"2":null,"1":null},"21":{"0":null,"2":null,"1":null},"30":{"0":null,"2":null,"1":null},"24":{"0":null,"2":null,"1":null},"32":{"0":null,"2":null,"1":null},"2":{"0":null,"2":null,"1":null},"90":{"0":null,"2":null,"1":null},"56":{"0":null,"2":null,"1":null},"41":{"0":null,"2":null,"1":null},"128":{"0":null,"2":null,"1":null},"75":{"0":null,"2":null,"1":null}},"volume":[],"change_time":{"49":{"0":1192344925,"2":1192344925,"1":1192344925},"46":{"0":1192356640,"2":1192356640,"1":1192356640},"9":{"0":1192374117,"2":1192374117,"1":1192374117},"16":{"0":1192643462,"2":1192643462,"1":1192643462},"164":{"0":1192356640,"2":1192356640,"1":1192356640},"18":{"0":1192643462,"2":1192642538,"1":1192643462},"23":{"0":1192528549,"2":1192533516,"1":1192533516},"26":{"0":1192643462,"2":1192527200,"1":1192643462},"5":{"0":1192617648,"2":1192617648,"1":1192617648},"44":{"0":1192054308,"2":1192054308,"1":1192054308},"1":{"0":1192383076,"2":1192383076,"1":1192383076},"8":{"0":1192364254,"2":1192364254,"1":1192364254},"3":{"0":1192430433,"2":1192430433,"1":1192430433},"31":{"0":1192365159,"2":1192365159,"1":1192365159},"60":{"0":1192389374,"2":1192389374,"1":1192389374},"15":{"0":1192610985,"2":1192310256,"1":1192610985},"34":{"0":1192366959,"2":1192366959,"1":1192366959},"111":{"0":1192487538,"2":1192487538,"1":1192487538},"68":{"0":1192473161,"2":1192473161,"1":1192473161},"105":{"0":1192354376,"2":1192354376,"1":1192354376},"71":{"0":1192610985,"2":1192610985,"1":1192610985},"28":{"0":1192570418,"2":1192570418,"1":1192570418},"33":{"0":1192397461,"2":1192397461,"1":1192397461},"14":{"0":1192641649,"2":1192641649,"1":1192641649},"21":{"0":1192362474,"2":1192362474,"1":1192362474},"30":{"0":1192356171,"2":1192356171,"1":1192356171},"24":{"0":1192419956,"2":1192419956,"1":1192419956},"32":{"0":1192635347,"2":1192635347,"1":1192635347},"2":{"0":1192449725,"2":1192449725,"1":1192449725},"90":{"0":1192426261,"2":1192426261,"1":1192426261},"56":{"0":1192366052,"2":1192621850,"1":1192621850},"41":{"0":1192436180,"2":1192436180,"1":1192436180},"128":{"0":1192624523,"2":1192441597,"1":1192624523},"75":{"0":1192437084,"2":1192437084,"1":1192437084}},"st":{"49":[3,60],"46":[3,60],"9":[3,60],"16":[3,60],"164":[3,60],"18":[3,60],"23":[3,60],"26":[3,60],"5":[3,60],"44":[3,60],"1":[3,60],"8":[3,60],"3":[3,60],"31":[3,60],"60":[3,60],"15":[3,60],"34":[3,60],"111":[3,60],"68":[3,60],"105":[3,60],"71":[3,60],"28":[3,60],"33":[3,60],"14":[3,60],"21":[3,60],"30":[3,60],"24":[3,60],"32":[3,60],"2":[3,60],"90":[3,60],"56":[3,60],"41":[3,60],"128":[3,60],"75":[3,60]},"bs":[],"act":{"49":true,"46":true,"9":true,"16":true,"164":true,"18":true,"23":true,"26":true,"5":true,"44":true,"1":true,"8":true,"3":true,"31":true,"60":true,"15":true,"34":false,"111":true,"68":true,"105":true,"71":true,"28":true,"33":true,"14":true,"21":true,"30":false,"24":true,"32":true,"2":true,"90":true,"56":true,"41":true,"128":true,"75":true},"actEx":{"44":{"0":true,"2":true,"1":true}}}},"lay":[]},"history":{"back":null,"lay":null},"hcl":true,"time-base":1192644000,"nav":{"1":{"2":["1","2","3","5","8","9","14","15","16","18","21","23","24","26","28","30","31","32","33","34","41","44","46","49","56","60","68","71","75","90","105","111","128","164"]},"4":{"2":["2","5","9","23","46","49","164"]}},"hasLiveOdds":false,"brokenParser":["30","34","103","154"],"hash":"c9fff233b9f89d1da10e81a9fdae7c14"},"refresh":16});
My current code looks like this (previous copy and paste job was inexplicably wrong):
import json
from pprint import pprint
json_data = open('/home/readejj/Documents/data/1-1-jFEwehyT-1-2-yjaf9.dat').read()
json_stuff = json_data[json_data.find('{'):json_data.rfind('}')+1]
json_stuff = json_stuff.replace("\"","'")
print "json_stuff",type(json_stuff), len(json_stuff), json_stuff[1:2000]
d = json.loads(json_stuff)
pprint(d)
This is adopted from: Reading JSON from a file?
The error I'm getting is:
Traceback (most recent call last):
File "read_json.py", line 16, in <module>
d = json.loads(json_stuff)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting property name: line 1 column 1 (char 1)
If anyone can help that would be greatly appreciated - my apologies if it's a very basic error, I am not an experienced programmer.
UPDATE: It's been pointed out my replace line is bad - if removed, I get a different error:
Traceback (most recent call last):
File "read_oddsportal_json.py", line 19, in <module>
d = json.loads(json_stuff)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting , delimiter: line 1 column 99 (char 99)

This line:
json_stuff = json_stuff.replace("\"","'")
is bad. Delete it.
The JSON spec requires that strings are double-quote (") delimited, not single-quote.

Related

Decode double quotes in a json string to convert to json Python

Faced the following problem, I have a following string like this:
mystr = '[\\x22https://dosprn.co.il/\\x22, \\x22DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. \\\\\\x22Tools\\\\\\x22 Create Bootable USB Drive\\x22]'
I am trying to convert this string to JSON. However, this is not possible:
json.loads(mystr)
Traceback (most recent call last):
File "/Users/Tokyonight/PycharmProjects/WebCeoDev/venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-57-6f4efa0d20c6>", line 1, in <module>
json.loads(mystr)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 2 (char 1)
import html, json
# Escape html objects. And unescape unicode.
mystr = '[\\x22https://dosprn.co.il/\\x22, \\x22DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. \\\\\\x22Tools\\\\\\x22 Create Bootable USB Drive\\x22]'
mystr = mystr.encode('ascii', 'xmlcharrefreplace').decode('unicode_escape')
mystr_to_json = json.loads(mystr)
print(mystr_to_json)
#['https://dosprn.co.il/', 'DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. "Tools" Create Bootable USB Drive']
mystr_to_json[1] = html.unescape(mystr_to_json[1])
print(mystr_to_json)
# ['https://dosprn.co.il/', 'DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. "Tools" Create Bootable USB Drive']
It works. But I think it's too cumbersome. I'm pretty sure there is an easier way to do this.
Anyone have any ideas how to do it better?
Thanks.

Why do I get an error with the json original codes?

How to fix this error?
I have a very long code, within which I am using:
import json
folder = json.loads(file_content[0][:-1])
and
f.write(json.dumps(saveable_folder))
from json library. However, I get the following error:
File"C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\decoder.py", line 353, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 85 (char 84)
Is it because I need to change something in json library? or because I have a virtual environment in Anaconda, but I am using VScode?
I am completely lost.
Thanks!

I have a problem with json variable setting

i have this code here:
import json
with open("pass_file.txt", "r") as file:
password = json.loads(file.read())
it calls this error:
Traceback (most recent call last):
File "testdoc.py", line 9, in <module>
print(json.loads(file.read()))
File "C:\Program Files\Python37\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Program Files\Python37\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Program Files\Python37\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I dont know why this is happening because i have the same code on another file just with different variable name and file name and it works file. I did notice another similar question about a similar error but it didnt answer my question.
Thanks in advance :)
What is the content of you pass_file.txt ? The python code use json.loads so it expect JSON formated content in the pass_file.txt
For example for a string, the content of this file will be "hello world"
If you don't put quotes, the JSON parsing process will fail.

Not able to import json from commandline for Python

I am currently tring to work with import a json input that is accepted by Python through a commandline argument and I am trying to save the different values to JSON to a list. I am having issues with my code given below and have attached both the code and the error I get below. Any help much appreciated.
import sys
import json
def lookup1 ():
jsonData = json.loads(sys.argv[1])
print jsonData
jsonList = [jsonData['proxy'],jsonData['OS']]
print jsonList
lookup1()
The error is given below:
$ python dynamicMapper.py '{'proxy':1,'OS':2}'
Traceback (most recent call last):
File "dynamicMapper.py", line 9, in <module>
lookup1()
File "dynamicMapper.py", line 4, in lookup1
jsonData = json.loads(sys.argv[1])
File "/usr/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting property name: line 1 column 2 (char 1)
The commadline argunet that I give is python dynamicMapper.py '{'proxy':1,'OS':2}'
I am not able to find out what is causing this error and if my approach is right.
The script is working fine, you just need to call it the right way:
python dynamicMapper.py '{"proxy":1,"OS":2}'
{u'OS': 2, u'proxy': 1}
[1, 2]
In JSON the strings are quoted with double quotes instead of single quotes. You also need to quote the string passed to script so that shell understands it being a single argument.

Can't read JSON file in Python

I tried to read data from a JSON file, but I encountered weird error and have no idea what it means. I tried googling it, but it didn't help. I got the following error:
Traceback (most recent call last):
File "items_uploader.py", line 40, in <module>
main()
File "items_uploader.py", line 16, in main
LoadItemsData(settings['items_filename'])
File "items_uploader.py", line 36, in LoadItemsData
data = json.load(json_data)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 278, in load
**kw)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting , delimiter: line 8 column 397 (char 3064)
The code itself is quite simple:
import socket
import MySQLdb
from ConfigParser import SafeConfigParser
import json
from pprint import pprint
def main():
settings = GetSettings()
LoadItemsData(settings['items_filename'])
return
def GetSettings():
settings = {}
parser = SafeConfigParser()
parser.read('settings.yaml')
settings['items_filename'] = parser.get('files', 'items_filename')
return settings
def LoadItemsData(filename):
json_data=open(filename)
data = json.load(json_data)
return data
if __name__ == '__main__':
main()
Any help would be appreciated!
Make sure your JSON data is in a valid format, one extra character will mess up the python parser. To test your JSON data go here, make sure you can see it in a correct format.
For example, if I had
JSON_data ='{"c":[{"xy":{"xstart":0,"xend":5,"ystart":1,"yend":5},"names":["D","T","O","H","L","C",],"co":["rgb(0,0,128)"]}],"Values":{"D":["11/30/2012"],"T":["09:44:00"],"O":["5848.40"],"H":["5848.40"],"L":["5847.45"],"C":["5848.40"]}}'
The , after C (here ["D","T","O","H","L","C",]) will show an error. So make sure that your data is in correct format and there are no unnecessary characters.
Hope this helps.

Categories