How to deal with json data in python? - python

# This code searches the query from googlecustomsearch api and returns data in json format
import pprint
import json
from googleapiclient.discovery import build
my_api_key = "**************************************"
my_cse_id = "*************************************"
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch", "v1", developerKey=api_key)
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
return res['items']
results = google_search(
'Roshan Patel', my_api_key, my_cse_id, num=10)
for result in results:
pprint.pprint(result)
result_dict = json.loads(result)
print result_dict['formattedUrl']
This is the output i am getting:
I want to get only the url parts for eg :u'formattedUrl' and store it in a list , how to do it?
New error
Traceback (most recent call last):
File "<ipython-input-38-eb898c8de239>", line 1, in <module>
runfile('C:/Users/abc/untitled9.py', wdir='C:/Users/abc')
File "C:\Users\abc\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\abc\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/abc/untitled9.py", line 28, in <module>
result_dict = json.loads(result)
File "C:\Users\abc\Anaconda2\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Users\abc\Anaconda2\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
TypeError: expected string or buffer

Do you need first convert the results to a Python object, like a dict, to do that uses the json module:
result_dict = json.loads(result)
Now you can filter your dict:
result_dict['formattedUrl']

result is not JSON, but an actual Python dictionary, so just:
print result[u'formattedUrl']

Related

citybikes: JSON to Dataframe

I am using python-citybikes (https://pypi.org/project/python-citybikes/) to retrieve some data.
However, I can't figure out a way export the data
import citybikes
import pandas as pd
client = citybikes.Client()
GlasgowNextBike = citybikes.Network(client, uid='nextbike-glasgow')
list(GlasgowNextBike.stations)
Stations = list(GlasgowNextBike.stations)
pd.read_json(Stations)
I am getting
Traceback (most recent call last):
File "<ipython-input-15-5a1904def0e8>", line 1, in <module>
pd.read_json(Stations)
File "/Users/noor/opt/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py", line 214, in wrapper
return func(*args, **kwargs)
File "/Users/noor/opt/anaconda3/lib/python3.7/site-packages/pandas/io/json/_json.py", line 585, in read_json
path_or_buf, encoding=encoding, compression=compression
File "/Users/noor/opt/anaconda3/lib/python3.7/site-packages/pandas/io/common.py", line 200, in get_filepath_or_buffer
raise ValueError(msg)
ValueError: Invalid file path or buffer object type: <class 'list'>
My question is :
How can I export/save the results as JSON or CSV file
Try using the json module, like so:
import citybikes, json
client = citybikes.Client()
GlasgowNextBike = citybikes.Network(client, uid='nextbike-glasgow')
with open('GlasgowNextBike.json', 'w') as f:
json.dump(GlasgowNextBike.data, f, indent=2)

remove function wrapper with str.replace() in Python

I have some annoying elements in a JSON file that go something like:
"DateTime" : Date(-62135596800000),
"ReceivedDateTime" : Date(-62135596800000)
where serialising this using json.Load() results in an error because Date() is unrecognized.
Traceback (most recent call last):
File "json_parse.py", line 10, in <module>
data = json.load(data_file)
File "C:\Python27\lib\json\__init__.py", line 291, in load
**kw)
File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 382, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
so the easiest thing to do is to remove the Date() wrapper before serialising. I can then convert to proper datetime afterwards.
I can do simple things with str.replace such as:
data.replace("Date(","")
but obviously I am not removing the trailing bracket.
How might I go about doing this?
Cheers.
The more readable way would be to use re library and create regex:
import re
text = '''"DateTime" : Date(-62135596800000),
"ReceivedDateTime" : Date(-62135596800000)'''
pattern = re.compile("Date\((.+)\)")
x = pattern.findall(text)
text2 = text
for i in x:
text2 = text2.replace("Date("+i+")", i)
I wrote this code for you, it should solve the problem.
a = '''"DateTime" : Date(-62135596800000),
"ReceivedDateTime" : Date(-62135596800000)'''
while "Date(" in a: a = (a[:a.index("Date(")+len("Date(")+a[a.index("Date(")+len("Date("):].index(")")] + a[a.index("Date(")+len("Date(")+a[a.index("Date(")+len("Date("):].index(")")+1:]).replace("Date(", "", 1)

Issue in reading JSON file in python

>>> import json
>>> d2 = json.loads(open("t.json").read())
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib64/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/usr/lib64/python2.6/json/decoder.py", line 319, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib64/python2.6/json/decoder.py", line 336, in raw_decode
obj, end = self._scanner.iterscan(s, **kw).next()
File "/usr/lib64/python2.6/json/scanner.py", line 55, in iterscan
rval, next_pos = action(m, context)
File "/usr/lib64/python2.6/json/decoder.py", line 185, in JSONObject
raise ValueError(errmsg("Expecting object", s, end))
ValueError: Expecting object: line 1 column 11 (char 11)
[ RHEL - ~/testing ]$ cat t.json
{"us": u"OFF", "val": u"5"}
Here is what I have in my JSON file and when I try to read it using open and json.load and json.loads it fails.
After using json.load
>>> import json
>>> d2 = json.load(open("t.json"))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib64/python2.6/json/__init__.py", line 267, in load
parse_constant=parse_constant, **kw)
File "/usr/lib64/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/usr/lib64/python2.6/json/decoder.py", line 319, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib64/python2.6/json/decoder.py", line 336, in raw_decode
obj, end = self._scanner.iterscan(s, **kw).next()
File "/usr/lib64/python2.6/json/scanner.py", line 55, in iterscan
rval, next_pos = action(m, context)
File "/usr/lib64/python2.6/json/decoder.py", line 185, in JSONObject
raise ValueError(errmsg("Expecting object", s, end))
ValueError: Expecting object: line 1 column 11 (char 11)
>>>
You are using the wrong function. Use json.load() (no s!) to load data from an open file object:
d2 = json.load(open("t.json"))
The json.loads() function expects you to pass in a string, not a file object. You'd have to read your file in that case, returning the read data:
d2 = json.loads(open("t.json").read())
Next, you have invalid JSON in that file:
{"us": u"OFF", "val": u"5"}
# ^ ^
JSON is not Python; those u prefixes are not supported nor needed. You'll need to remove those from the file before it'll load.
If you have an API producing that format, it is not giving you JSON. It could be that it is producing a (strange form of) Python syntax instead; Python itself would produce {'us': u'OFF', 'val': u'5'} (single quotes). You can have Python interpret that as Python literals with ast.literal_eval():
import ast
with open('t.json') as fileobj:
d2 = ast.literal_eval(fileobj.read())
but it could be that the format is broken in other ways we cannot determine from a single isolated sample. It could be using true and false for boolean values, like in JSON, for example.
Better to have the API fixed rather that try and work around this broken-ness.
You are using the json.loads method. More documentation here. This method is used for string arguments only. Luckily, there is a similarly named json.load method documented here. This one can be used directly on a file object.
d2 = json.load(open("t.json"))
Your issue is that the JSON is not valid.
It looks like it is a python dictionnary. u'string' is a python 2 unicode string.
If you remove the u from your strings, it works fine.
>>> import json
>>> json.load(open('i.json'))
{u'val': u'5', u'us': u'OFF'}
Here is the json file:
$ cat i.json
{"us": "OFF", "val": "5"}

ValueError: Invalid \escape while running query

I am trying to query DBpedia using SPARQLWrapper in Python (v3.3). This is my query:
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?slot WHERE {
<http://dbpedia.org/resource/Week> <http://www.w3.org/2002/07/owl#sameAs> ?slot
}
It results in an error from the SPARQLWrapper package:
ValueError: Invalid \escape: line 118 column 74 (char 11126)
Code:
query = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?slot WHERE{{ {subject} {predicate} {object} }} "
query = query.format(subject=subject, predicate=predicate, object= objectfield)
self.sparql.setQuery(query)
self.sparql.setReturnFormat(JSON)
results = self.sparql.query().convert() # Error thrown at this line
Error :
Traceback (most recent call last):
File "getUriLiteralAgainstPredicate.py", line 84, in <module>
sys.exit(main())
File "getUriLiteralAgainstPredicate.py", line 61, in main
entity,predicateURI,result = p.getObject(dataAtURI,predicates, each["entity"])
File "getUriLiteralAgainstPredicate.py", line 30, in getObject
result = self.run_sparql("<"+subjectURI+">","<"+predicateURI+">","?slot")
File "getUriLiteralAgainstPredicate.py", line 24, in run_sparql
results = self.sparql.query().convert()
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/site-packages/SPARQLWrapper-1.5.2-py3.3.egg/SPARQLWrapper/Wrapper.py", line 539, in convert
return self._convertJSON()
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/site-packages/SPARQLWrapper-1.5.2-py3.3.egg/SPARQLWrapper/Wrapper.py", line 476, in _convertJSON
return jsonlayer.decode(self.response.read().decode("utf-8"))
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/site-packages/SPARQLWrapper-1.5.2-py3.3.egg/SPARQLWrapper/jsonlayer.py", line 76, in decode
return _decode(string)
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/site-packages/SPARQLWrapper-1.5.2-py3.3.egg/SPARQLWrapper/jsonlayer.py", line 147, in <lambda>
_decode = lambda string, loads=json.loads: loads(string)
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/json/__init__.py", line 319, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/json/decoder.py", line 352, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.3/lib/python3.3/json/decoder.py", line 368, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Invalid \escape: line 118 column 74 (char 11126)
The problem is, that dbpedia output has this line:
{ "slot": { "type": "uri", "value": "http://got.dbpedia.org/resource/\U00010345\U00010339\U0001033A\U00010349" }},
Notice literals which start with \U (capital U). This is not valid JSON and python doesn't know how to handle it. So, problem is on DBPedia side and it can't be handled on SPARQLWrapper side.
But… You can handle it yourself like this:
results = self.sparql.query()
body = results.response.read()
fixed_body = body.decode("unicode_escape")
from SPARQLWrapper.Wrapper import jsonlayer
results = jsonlayer.decode(fixed_body)
try python-cjson
so the above thing can also be tried as below
import cjson
results = self.sparql.query()
body = results.response.read()
results = cjson.decode(body)

Python read multiline JSON

I have been trying to use JSON to store settings for a program. I can't seem to get Python 2.6 's JSON Decoder to decode multi-line JSON strings...
Here is example input:
.settings file:
"""
{\
'user':'username',\
'password':'passwd',\
}\
"""
I have tried a couple other syntaxes for this file, which I will specify below (with the traceback they cause).
My python code for reading the file in is
import json
settings_text = open(".settings", "r").read()
settings = json.loads(settings_text)
The Traceback for this is:
Traceback (most recent call last):
File "json_test.py", line 4, in <module>
print json.loads(text)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 322, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 1 column 2 - line 7 column 1 (char 2 - 41)
I assume the "Extra data" is the triple-quote.
Here are the other syntaxes I have tried for the .settings file, with their respective Tracebacks:
"{\
'user':'username',\
'pass':'passwd'\
}"
Traceback (most recent call last):
File "json_test.py", line 4, in <module>
print json.loads(text)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 319, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 336, in raw_decode
obj, end = self._scanner.iterscan(s, **kw).next()
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/scanner.py", line 55, in iterscan
rval, next_pos = action(m, context)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 155, in JSONString
return scanstring(match.string, match.end(), encoding, strict)
ValueError: Invalid \escape: line 1 column 2 (char 2)
'{\
"user":"username",\
"pass":"passwd",\
}'
Traceback (most recent call last):
File "json_test.py", line 4, in <module>
print json.loads(text)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 319, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 338, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
If I put the settings all on one line, it decodes fine.
Get rid of all of the backslashes and all of the "Pythonic" quoting in the settings file. Works fine if the file is just:
{
"user":"username",
"password":"passwd"
}
Note also that JSON strings are quoted with double quotes, not single quotes. See JSON spec here:
http://www.json.org/
>>> s = """
{
"user":"username",
"password":"passwd"
}
"""
>>> json.loads(s)
{'password': 'passwd', 'user': 'username'}
json doesn't consider \ to be a line-continuation character.
Try to use eval(s)
s="""
{\
'user':'username',\
'password':'passwd',\
\
"""
ss=eval(q)
qq
{'password': 'passwd', 'user': 'username'}
type(qq)
dict

Categories