Reading scraped json file into python - python
I'm trying to read a JSON file I've scraped from a website into Python, with the eventual aim of turning it into a csv file for later statistical analysis.
An example of the file I want to read is:
globals.jsonpCallback('/feed/match/1-1-8vOt3JMq-1-2-yj0a9.dat', {"s":1,"d":{"bt":1,"sc":2,"st":{"notice":"finished","status-name":"Finished","result-name":"<p id=\"event-status\" class=\"result\"><span class=\"bold\" id=\"event-status-finished\">Final result 3:1<\/span><\/p>"},"oddsdata":{"back":{"E-1-2-0-0-0":{"handicapType":0,"handicapValue":"0.00","odds":{"49":{"0":1.32,"2":7,"1":4.8},"46":{"0":1.33,"2":7.3,"1":4.5},"9":{"0":1.25,"2":12,"1":5},"16":{"0":1.29,"2":11,"1":5},"164":{"0":1.31,"2":7.59,"1":4.5},"18":{"0":1.24,"2":11.6,"1":5.25},"23":{"0":1.27,"2":11.23,"1":4.75},"26":{"0":1.3,"2":10.5,"1":5},"5":{"0":1.23,"2":12,"1":5.75},"44":{"0":1.14,"2":4,"1":2.52},"1":{"0":1.3,"2":8.5,"1":4.4},"8":{"0":1.25,"2":9,"1":5},"3":{"0":1.25,"2":11,"1":4.5},"31":{"0":1.25,"2":9,"1":4.5},"60":{"0":1.29,"2":8.5,"1":4.5},"15":{"0":1.28,"2":10,"1":5.4},"34":{"0":1.25,"2":10,"1":5},"111":{"0":1.25,"2":10,"1":4.84},"68":{"0":1.26,"2":9.75,"1":4.84},"105":{"0":1.28,"2":8,"1":4.5},"71":{"0":1.3,"2":9,"1":5},"28":{"0":1.25,"2":12,"1":5.5},"33":{"0":1.3,"2":9.25,"1":4.84},"14":{"0":1.22,"2":10.93,"1":5.05},"21":{"0":1.33,"2":7.5,"1":4.33},"30":{"0":1.22,"2":8.5,"1":5.5},"24":{"0":1.3,"2":9,"1":4.95},"32":{"0":1.25,"2":9.5,"1":4.9},"2":{"0":1.25,"2":10,"1":5},"90":{"0":1.28,"2":10.25,"1":4.8},"56":{"0":1.3,"2":9.8,"1":4.8},"41":{"0":1.3,"2":8.9,"1":4.5},"128":{"0":1.27,"2":12,"1":5.5},"75":{"0":1.2,"2":10.5,"1":5.2}},"mixedParameterId":0,"OutcomeID":{"0":"s3nfxv464x0x1o7d5","2":"s3nfxv464x0x1o7d6","1":"s3nfxv498x0x0"},"opening_odds":{"49":{"0":null,"2":null,"1":null},"46":{"0":null,"2":null,"1":null},"9":{"0":null,"2":null,"1":null},"16":{"0":1.22,"2":null,"1":null},"164":{"0":null,"2":null,"1":null},"18":{"0":1.31,"2":9.8,"1":5.65},"23":{"0":null,"2":4.75,"1":11.23},"26":{"0":1.25,"2":10,"1":5.25},"5":{"0":1.22,"2":12.5,"1":5.5},"44":{"0":null,"2":null,"1":null},"1":{"0":null,"2":null,"1":null},"8":{"0":null,"2":null,"1":null},"3":{"0":null,"2":null,"1":null},"31":{"0":null,"2":null,"1":null},"60":{"0":null,"2":null,"1":null},"15":{"0":1.25,"2":null,"1":4.5},"34":{"0":null,"2":null,"1":null},"111":{"0":null,"2":null,"1":null},"68":{"0":1.27,"2":9.15,"1":4.65},"105":{"0":null,"2":null,"1":null},"71":{"0":1.29,"2":8,"1":4.5},"28":{"0":1.22,"2":10,"1":5},"33":{"0":1.4,"2":6.65,"1":4.25},"14":{"0":1.32,"2":7.8,"1":4.42},"21":{"0":null,"2":null,"1":null},"30":{"0":null,"2":null,"1":null},"24":{"0":null,"2":null,"1":null},"32":{"0":1.3,"2":8.5,"1":4.75},"2":{"0":1.35,"2":7.75,"1":4.3},"90":{"0":null,"2":null,"1":null},"56":{"0":null,"2":11,"1":4.59},"41":{"0":null,"2":null,"1":null},"128":{"0":1.25,"2":null,"1":5},"75":{"0":1.25,"2":8.9,"1":4.8}},"opening_change_time":{"49":{"0":false,"2":false,"1":false},"46":{"0":false,"2":false,"1":false},"9":{"0":false,"2":false,"1":false},"16":{"0":false,"2":false,"1":false},"164":{"0":false,"2":false,"1":false},"18":{"0":false,"2":false,"1":false},"23":{"0":false,"2":false,"1":false},"26":{"0":false,"2":false,"1":false},"5":{"0":false,"2":false,"1":false},"44":{"0":false,"2":false,"1":false},"1":{"0":false,"2":false,"1":false},"8":{"0":false,"2":false,"1":false},"3":{"0":false,"2":false,"1":false},"31":{"0":false,"2":false,"1":false},"60":{"0":false,"2":false,"1":false},"15":{"0":false,"2":false,"1":false},"34":{"0":false,"2":false,"1":false},"111":{"0":false,"2":false,"1":false},"68":{"0":false,"2":false,"1":false},"105":{"0":false,"2":false,"1":false},"71":{"0":false,"2":false,"1":false},"28":{"0":false,"2":false,"1":false},"33":{"0":false,"2":false,"1":false},"14":{"0":false,"2":false,"1":false},"21":{"0":false,"2":false,"1":false},"30":{"0":false,"2":false,"1":false},"24":{"0":false,"2":false,"1":false},"32":{"0":false,"2":false,"1":false},"2":{"0":false,"2":false,"1":false},"90":{"0":false,"2":false,"1":false},"56":{"0":false,"2":false,"1":false},"41":{"0":false,"2":false,"1":false},"128":{"0":false,"2":false,"1":false},"75":{"0":false,"2":false,"1":false}},"opening_volume":{"49":{"0":null,"2":null,"1":null},"46":{"0":null,"2":null,"1":null},"9":{"0":null,"2":null,"1":null},"16":{"0":null,"2":null,"1":null},"164":{"0":null,"2":null,"1":null},"18":{"0":null,"2":null,"1":null},"23":{"0":null,"2":null,"1":null},"26":{"0":null,"2":null,"1":null},"5":{"0":null,"2":null,"1":null},"44":{"0":null,"2":null,"1":null},"1":{"0":null,"2":null,"1":null},"8":{"0":null,"2":null,"1":null},"3":{"0":null,"2":null,"1":null},"31":{"0":null,"2":null,"1":null},"60":{"0":null,"2":null,"1":null},"15":{"0":null,"2":null,"1":null},"34":{"0":null,"2":null,"1":null},"111":{"0":null,"2":null,"1":null},"68":{"0":null,"2":null,"1":null},"105":{"0":null,"2":null,"1":null},"71":{"0":null,"2":null,"1":null},"28":{"0":null,"2":null,"1":null},"33":{"0":null,"2":null,"1":null},"14":{"0":null,"2":null,"1":null},"21":{"0":null,"2":null,"1":null},"30":{"0":null,"2":null,"1":null},"24":{"0":null,"2":null,"1":null},"32":{"0":null,"2":null,"1":null},"2":{"0":null,"2":null,"1":null},"90":{"0":null,"2":null,"1":null},"56":{"0":null,"2":null,"1":null},"41":{"0":null,"2":null,"1":null},"128":{"0":null,"2":null,"1":null},"75":{"0":null,"2":null,"1":null}},"volume":[],"change_time":{"49":{"0":1192344925,"2":1192344925,"1":1192344925},"46":{"0":1192356640,"2":1192356640,"1":1192356640},"9":{"0":1192374117,"2":1192374117,"1":1192374117},"16":{"0":1192643462,"2":1192643462,"1":1192643462},"164":{"0":1192356640,"2":1192356640,"1":1192356640},"18":{"0":1192643462,"2":1192642538,"1":1192643462},"23":{"0":1192528549,"2":1192533516,"1":1192533516},"26":{"0":1192643462,"2":1192527200,"1":1192643462},"5":{"0":1192617648,"2":1192617648,"1":1192617648},"44":{"0":1192054308,"2":1192054308,"1":1192054308},"1":{"0":1192383076,"2":1192383076,"1":1192383076},"8":{"0":1192364254,"2":1192364254,"1":1192364254},"3":{"0":1192430433,"2":1192430433,"1":1192430433},"31":{"0":1192365159,"2":1192365159,"1":1192365159},"60":{"0":1192389374,"2":1192389374,"1":1192389374},"15":{"0":1192610985,"2":1192310256,"1":1192610985},"34":{"0":1192366959,"2":1192366959,"1":1192366959},"111":{"0":1192487538,"2":1192487538,"1":1192487538},"68":{"0":1192473161,"2":1192473161,"1":1192473161},"105":{"0":1192354376,"2":1192354376,"1":1192354376},"71":{"0":1192610985,"2":1192610985,"1":1192610985},"28":{"0":1192570418,"2":1192570418,"1":1192570418},"33":{"0":1192397461,"2":1192397461,"1":1192397461},"14":{"0":1192641649,"2":1192641649,"1":1192641649},"21":{"0":1192362474,"2":1192362474,"1":1192362474},"30":{"0":1192356171,"2":1192356171,"1":1192356171},"24":{"0":1192419956,"2":1192419956,"1":1192419956},"32":{"0":1192635347,"2":1192635347,"1":1192635347},"2":{"0":1192449725,"2":1192449725,"1":1192449725},"90":{"0":1192426261,"2":1192426261,"1":1192426261},"56":{"0":1192366052,"2":1192621850,"1":1192621850},"41":{"0":1192436180,"2":1192436180,"1":1192436180},"128":{"0":1192624523,"2":1192441597,"1":1192624523},"75":{"0":1192437084,"2":1192437084,"1":1192437084}},"st":{"49":[3,60],"46":[3,60],"9":[3,60],"16":[3,60],"164":[3,60],"18":[3,60],"23":[3,60],"26":[3,60],"5":[3,60],"44":[3,60],"1":[3,60],"8":[3,60],"3":[3,60],"31":[3,60],"60":[3,60],"15":[3,60],"34":[3,60],"111":[3,60],"68":[3,60],"105":[3,60],"71":[3,60],"28":[3,60],"33":[3,60],"14":[3,60],"21":[3,60],"30":[3,60],"24":[3,60],"32":[3,60],"2":[3,60],"90":[3,60],"56":[3,60],"41":[3,60],"128":[3,60],"75":[3,60]},"bs":[],"act":{"49":true,"46":true,"9":true,"16":true,"164":true,"18":true,"23":true,"26":true,"5":true,"44":true,"1":true,"8":true,"3":true,"31":true,"60":true,"15":true,"34":false,"111":true,"68":true,"105":true,"71":true,"28":true,"33":true,"14":true,"21":true,"30":false,"24":true,"32":true,"2":true,"90":true,"56":true,"41":true,"128":true,"75":true},"actEx":{"44":{"0":true,"2":true,"1":true}}}},"lay":[]},"history":{"back":null,"lay":null},"hcl":true,"time-base":1192644000,"nav":{"1":{"2":["1","2","3","5","8","9","14","15","16","18","21","23","24","26","28","30","31","32","33","34","41","44","46","49","56","60","68","71","75","90","105","111","128","164"]},"4":{"2":["2","5","9","23","46","49","164"]}},"hasLiveOdds":false,"brokenParser":["30","34","103","154"],"hash":"c9fff233b9f89d1da10e81a9fdae7c14"},"refresh":16});
My current code looks like this (previous copy and paste job was inexplicably wrong):
import json
from pprint import pprint
json_data = open('/home/readejj/Documents/data/1-1-jFEwehyT-1-2-yjaf9.dat').read()
json_stuff = json_data[json_data.find('{'):json_data.rfind('}')+1]
json_stuff = json_stuff.replace("\"","'")
print "json_stuff",type(json_stuff), len(json_stuff), json_stuff[1:2000]
d = json.loads(json_stuff)
pprint(d)
This is adopted from: Reading JSON from a file?
The error I'm getting is:
Traceback (most recent call last):
File "read_json.py", line 16, in <module>
d = json.loads(json_stuff)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting property name: line 1 column 1 (char 1)
If anyone can help that would be greatly appreciated - my apologies if it's a very basic error, I am not an experienced programmer.
UPDATE: It's been pointed out my replace line is bad - if removed, I get a different error:
Traceback (most recent call last):
File "read_oddsportal_json.py", line 19, in <module>
d = json.loads(json_stuff)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting , delimiter: line 1 column 99 (char 99)
This line:
json_stuff = json_stuff.replace("\"","'")
is bad. Delete it.
The JSON spec requires that strings are double-quote (") delimited, not single-quote.
Related
Decode double quotes in a json string to convert to json Python
Faced the following problem, I have a following string like this: mystr = '[\\x22https://dosprn.co.il/\\x22, \\x22DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. \\\\\\x22Tools\\\\\\x22 Create Bootable USB Drive\\x22]' I am trying to convert this string to JSON. However, this is not possible: json.loads(mystr) Traceback (most recent call last): File "/Users/Tokyonight/PycharmProjects/WebCeoDev/venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-57-6f4efa0d20c6>", line 1, in <module> json.loads(mystr) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/__init__.py", line 348, in loads return _default_decoder.decode(s) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 1 column 2 (char 1) import html, json # Escape html objects. And unescape unicode. mystr = '[\\x22https://dosprn.co.il/\\x22, \\x22DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. \\\\\\x22Tools\\\\\\x22 Create Bootable USB Drive\\x22]' mystr = mystr.encode('ascii', 'xmlcharrefreplace').decode('unicode_escape') mystr_to_json = json.loads(mystr) print(mystr_to_json) #['https://dosprn.co.il/', 'DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. "Tools" Create Bootable USB Drive'] mystr_to_json[1] = html.unescape(mystr_to_json[1]) print(mystr_to_json) # ['https://dosprn.co.il/', 'DOSPrn - הדפסה בעברית ב-DOS מדפסת Windows DOSPrn. "Tools" Create Bootable USB Drive'] It works. But I think it's too cumbersome. I'm pretty sure there is an easier way to do this. Anyone have any ideas how to do it better? Thanks.
Why do I get an error with the json original codes?
How to fix this error? I have a very long code, within which I am using: import json folder = json.loads(file_content[0][:-1]) and f.write(json.dumps(saveable_folder)) from json library. However, I get the following error: File"C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\__init__.py", line 357, in loads return _default_decoder.decode(s) File "C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "C:\Users\user1\Anaconda3\envs\virtualenvironment\lib\json\decoder.py", line 353, in raw_decode obj, end = self.scan_once(s, idx) json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 85 (char 84) Is it because I need to change something in json library? or because I have a virtual environment in Anaconda, but I am using VScode? I am completely lost. Thanks!
I have a problem with json variable setting
i have this code here: import json with open("pass_file.txt", "r") as file: password = json.loads(file.read()) it calls this error: Traceback (most recent call last): File "testdoc.py", line 9, in <module> print(json.loads(file.read())) File "C:\Program Files\Python37\lib\json\__init__.py", line 348, in loads return _default_decoder.decode(s) File "C:\Program Files\Python37\lib\json\decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "C:\Program Files\Python37\lib\json\decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) I dont know why this is happening because i have the same code on another file just with different variable name and file name and it works file. I did notice another similar question about a similar error but it didnt answer my question. Thanks in advance :)
What is the content of you pass_file.txt ? The python code use json.loads so it expect JSON formated content in the pass_file.txt For example for a string, the content of this file will be "hello world" If you don't put quotes, the JSON parsing process will fail.
Not able to import json from commandline for Python
I am currently tring to work with import a json input that is accepted by Python through a commandline argument and I am trying to save the different values to JSON to a list. I am having issues with my code given below and have attached both the code and the error I get below. Any help much appreciated. import sys import json def lookup1 (): jsonData = json.loads(sys.argv[1]) print jsonData jsonList = [jsonData['proxy'],jsonData['OS']] print jsonList lookup1() The error is given below: $ python dynamicMapper.py '{'proxy':1,'OS':2}' Traceback (most recent call last): File "dynamicMapper.py", line 9, in <module> lookup1() File "dynamicMapper.py", line 4, in lookup1 jsonData = json.loads(sys.argv[1]) File "/usr/lib/python2.7/json/__init__.py", line 338, in loads return _default_decoder.decode(s) File "/usr/lib/python2.7/json/decoder.py", line 366, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/usr/lib/python2.7/json/decoder.py", line 382, in raw_decode obj, end = self.scan_once(s, idx) ValueError: Expecting property name: line 1 column 2 (char 1) The commadline argunet that I give is python dynamicMapper.py '{'proxy':1,'OS':2}' I am not able to find out what is causing this error and if my approach is right.
The script is working fine, you just need to call it the right way: python dynamicMapper.py '{"proxy":1,"OS":2}' {u'OS': 2, u'proxy': 1} [1, 2] In JSON the strings are quoted with double quotes instead of single quotes. You also need to quote the string passed to script so that shell understands it being a single argument.
Can't read JSON file in Python
I tried to read data from a JSON file, but I encountered weird error and have no idea what it means. I tried googling it, but it didn't help. I got the following error: Traceback (most recent call last): File "items_uploader.py", line 40, in <module> main() File "items_uploader.py", line 16, in main LoadItemsData(settings['items_filename']) File "items_uploader.py", line 36, in LoadItemsData data = json.load(json_data) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 278, in load **kw) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 326, in loads return _default_decoder.decode(s) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 366, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 382, in raw_decode obj, end = self.scan_once(s, idx) ValueError: Expecting , delimiter: line 8 column 397 (char 3064) The code itself is quite simple: import socket import MySQLdb from ConfigParser import SafeConfigParser import json from pprint import pprint def main(): settings = GetSettings() LoadItemsData(settings['items_filename']) return def GetSettings(): settings = {} parser = SafeConfigParser() parser.read('settings.yaml') settings['items_filename'] = parser.get('files', 'items_filename') return settings def LoadItemsData(filename): json_data=open(filename) data = json.load(json_data) return data if __name__ == '__main__': main() Any help would be appreciated!
Make sure your JSON data is in a valid format, one extra character will mess up the python parser. To test your JSON data go here, make sure you can see it in a correct format. For example, if I had JSON_data ='{"c":[{"xy":{"xstart":0,"xend":5,"ystart":1,"yend":5},"names":["D","T","O","H","L","C",],"co":["rgb(0,0,128)"]}],"Values":{"D":["11/30/2012"],"T":["09:44:00"],"O":["5848.40"],"H":["5848.40"],"L":["5847.45"],"C":["5848.40"]}}' The , after C (here ["D","T","O","H","L","C",]) will show an error. So make sure that your data is in correct format and there are no unnecessary characters. Hope this helps.