I'm not sure why this happen, there are zero length in my json file.
0
I t supposedly to be like this,
1000
I'm afraid the comma thing after each json object cause this issue. (My current json format)
{ A:"A"},{ B:"B"),...
The correct way is like this
{ A:"A"} { B:"B"),...
So how I can calculate all the length without removing the comma?
My code
import json
githubusers_data_path = 'githubusers.json'
githubusers_data = []
githubusers_file = open(githubusers_data_path, "r")
for line in githubusers_file:
try:
data = json.loads(line)
githubusers_data.append(data)
except:
continue
print len(githubusers_data)
Sample
{
"login": "datomnurdin"
}, {
"login": "ejamesc"
},...
I think you're getting an exception that you're suppressing with try-except, because of the commas.
One solution would be to convert your file to a string first, stick a '[' and ']' around the string to convert it into a valid json format, then use json.loads to convert the string.
import json
githubusers_data_path = 'githubusers.json'
githubusers_file = open(githubusers_data_path, "r")
githubusers_string = ''.join(line for line in githubusers_file)
githubusers_string = '[{}]'.format(githubusers_string)
githubusers_data = json.loads(githubusers_string)
print len(githubusers_data)
githubusers_file.close()
there is an exception in your code:
import json
githubusers_data_path = 'githubusers.json'
githubusers_data = []
githubusers_file = open(githubusers_data_path, "r")
for line in githubusers_file:
try:
data = json.load(githubusers_file) # exception from here
githubusers_data.append(data)
except Exception, e:
print e
print len(githubusers_data) # so githubusers_data is always []
Mixing iteration and read methods would lose data
Related
What I am trying to accomplish:
Use contents of a text file to search for values in JSON file.
My text file contains one entry per line. The will match values in the JSON file.
thing1
thing2
thing3
I would like to iterate this list and return matches from the JSON file
{
"ssb": [
{
"uid": 27,
"appid": "thing1",
"title": "Title of thing",
"created_time": "2009-11-17T01:32:28+00:00",
"published_time": "2009-11-17T01:32:28+00:00",
"updated_time": "2022-11-14T17:26:23+00:00",
}
]
}
import json
upgrade_apps = open("apps_to_upgrade.txt", "r")
ua = upgrade_apps.readlines()
upgrade_apps.close()
ua3 = "thing1"
#Testing results
print(ua)
print(type(ua))
for atu in ua:
print(atu)
## ^ this returns the expected text from file
with open('dump.json') as f:
data = json.load(f)
f.close()
jsonResult = data['ssb']
for i in jsonResult:
if i['appid'] == ua3: #<THIS IS WHERE I AM STUCK> If i use ua3 OR "thing1" i get the expected result
print(i['uid'],i['appid'])
break
I have also tried including a for loop before the first for loop. I get back only the last entry in the text file.
#all of the above codeblock plus
for atu in ua:
with open('dump.json') as f:
data = json.load(f)
f.close()
jsonResult = data['ssb']
for i in jsonResult:
if i['appid'] == atu:
print(i['uid'],i['appid'])
break
Remove the newlines from the lines in ua, and turn it into a set. Then you can test if i['appid'] is in the set.
import json
with open("apps_to_upgrade.txt", "r") as upgrade_apps
ua = set(line.strip() for line in upgrade_apps)
with open('dump.json') as f:
data = json.load(f)
jsonResult = data['ssb']
for i in jsonResult:
if i['appid'] in ua:
print(i['uid'],i['appid'])
break
I am attempting to copy the image field from one group of json files and replace the image field in another group of JSON files.
JSON file structure that I am pulling from:
[
{
"\ufeffFile Name": "16.png",
"image": "https://arweave.net/v8OOGgKmlAZF56bpGQRv1nM691gu2FMlrNK3KY-3HZk"
}
]
JSON file structure that I need to replace:
{
"name": "name",
"symbol": "KK",
"description": "description",
"seller_fee_basis_points": 1000,
"image": "image.png",
"external_url": "https://www.image.net",
"edition": 16,
}
Code:
import json
import os
def getFileImage(fp):
with open(fp, 'r') as ff:
try:
return json.load(ff)['image']
except:
return None
def ReadAndReplace(filename, image):
with open(filename, 'r') as ff:
try:
data = json.load(ff)
data['image'] = image
return json.dumps(data, indent = 4)
except:
print(f'Could not process file {filename}')
return None
def Save(filename, data):
with open(filename, 'w') as ff:
ff.write(data)
def EnsureOutput():
if not os.path.exists('output'):
os.mkdir('output')
if not os.path.exists('input') or not os.path.exists('toReplace'):
print('Please create "input" and "toReplace" folders')
exit()
def main():
EnsureOutput()
for i in os.listdir('toReplace'):
filepath1 = os.path.join('input', i)
filepath2 = os.path.join('toReplace', i)
outpath = os.path.join('output', i)
if os.path.exists(filepath1):
image = getFileImage(filepath1)
data = ReadAndReplace(filepath2, image)
Save(outpath, data)
else:
print(f'No input file for toReplace "{i}" was found')
main()
When I run this code the image field updates to NULL instead of the link I need.
The input you are loading is a JSON array and hence parsed as a python list, which you can obviously not access by keys as you are trying to do.
You first need to access the object within the array and then you may access this object with keys.
See the following:
input = """
[
{
"\ufeffFile Name": "16.png",
"image": "https://arweave.net/v8OOGgKmlAZF56bpGQRv1nM691gu2FMlrNK3KY-3HZk"
}
]
"""
input_parsed = json.loads(input)
print(type(input_parsed))
print(input_parsed[0]["image"])
Expected output:
<class 'list'>
https://arweave.net/v8OOGgKmlAZF56bpGQRv1nM691gu2FMlrNK3KY-3HZk
For your ReadAndReplace() function this would look like this:
def read_and_replace(filename, image):
with open(filename, 'r') as ff:
try:
# data is a list
data = json.load(ff)
# be sure the list has actually one value exactly
if len(data) == 1:
data[0]['image'] = image
return json.dumps(data, indent=4)
except (json.JSONDecodeError, KeyError):
print(f'Could not process file {filename}')
return None
I have thousands of very large JSON files that I need to process on specific elements. To avoid memory overload I am using a python library called ijson which works fine when I am processing only a single element from the json file but when I try to process multiple-element at once it throughs
IncompleteJSONError: parse error: premature EOF
Partial JSON:
{
"info": {
"added": 1631536344.112968,
"started": 1631537322.81162,
"duration": 14,
"ended": 1631537337.342377
},
"network": {
"domains": [
{
"ip": "231.90.255.25",
"domain": "dns.msfcsi.com"
},
{
"ip": "12.23.25.44",
"domain": "teo.microsoft.com"
},
{
"ip": "87.101.90.42",
"domain": "www.msf.com"
}
]
}
}
Working Code: (Multiple file open)
my_file_list = [f for f in glob.glob("data/jsons/*.json")]
final_result = []
for filename in my_file_list:
row = {}
with open(filename, 'r') as f:
info = ijson.items(f, 'info')
for o in info:
row['added']= float(o.get('added'))
row['started']= float(o.get('started'))
row['duration']= o.get('duration')
row['ended']= float(o.get('ended'))
with open(filename, 'r') as f:
domains = ijson.items(f, 'network.domains.item')
domain_count = 0
for domain in domains:
domain_count+=1
row['domain_count'] = domain_count
Failure Code: (Single file open)
my_file_list = [f for f in glob.glob("data/jsons/*.json")]
final_result = []
for filename in my_file_list:
row = {}
with open(filename, 'r') as f:
info = ijson.items(f, 'info')
for o in info:
row['added']= float(o.get('added'))
row['started']= float(o.get('started'))
row['duration']= o.get('duration')
row['ended']= float(o.get('ended'))
domains = ijson.items(f, 'network.domains.item')
domain_count = 0
for domain in domains:
domain_count+=1
row['domain_count'] = domain_count
Not sure this is the reason Using python ijson to read a large json file with multiple json objects that ijson not able to work on multiple json element at once.
Also, let me know any other python package or any sample example that can handle large size json without memory issues.
I think this is happening because you've finished reading your IO stream from the file, you're at the end already, and already asking for another query.
What you can do is to reset the cursor to the 0 position before the second query:
f.seek(0)
In a comment I said that you should try json-stream as well, but this is not an ijson or json-stream bug, it's a TextIO feature.
This is the equivalent of you opening the file a second time.
If you don't want to do this, then maybe you should look at iterating through every portion of the JSON, and then deciding for each object whether it has info or network.domains.item.
While the answer above is correct, you can do better: if you know the structure of your JSON file and can rely on it, you can use this to your advantage and read the file only once.
ijson has an even interception mechanism, and the example there is very similar to what you want to achieve. In your case you want to get the info values, then iterate over the network.domains.item and count them. This should do:
row = {}
with open(filename, 'r') as f:
parse_events = ijson.parse(f, use_float=True)
for prefix, event, value in parse_events:
if prefix == 'info.added':
row['added'] = value
elif prefix == 'info.started':
row['started'] = value
elif prefix == 'info.duration':
row['duration'] = value
elif prefix == 'info.ended':
row['ended'] = value
elif prefix == 'info' and event == 'end_map':
break
row['domain_count'] = sum(1 for _ in ijson.items(parse_events, 'network.domains.item'))
Note how:
ijson.items is fed with the result of ijson.parse.
use_float=True saves you from having to convert the values to float yourself.
The counting can be done by sum()-ing 1 for each item coming from ijson.items so you don't have to loop yourself manually.
I want to make a keystore of values in JSON. Everything should work through the arguments entered into the console. That is, the data is first written to a file, and then must be read from there.
Input: python storage.py --key key_name --value value_name
Output: python storage.py --key key_name
A function with arguments and a function with data entry work. But I had a problem with the file read function. I need to print the key by its value, or values if there are several.
The recorded JSON looks something like this:
{"key": "Pepe", "value": "Pepeyaya"}{"key": "PepeHug", "value": "KekeHug"}{"key": "Pepega", "value": "Kekega"}{"key": "Pepe", "value": "Keke"}
I tried reading the file like this:
data = json.loads(f.read())
But the error is exactly the same
In other similar topics I saw that the "dictionaries" in JSON are written to the list. I tried something like this:
data = json.loads([f.read()])
Result:
TypeError: the JSON object must be str, bytes or bytearray, not list
Also:
data = json.load([f])
Result:
AttributeError: 'list' object has no attribute 'read'
I tried to change the recording function, but I can't write everything to a pre-created dictionary, everything is written to the right of it. Something like this:
[]{"key": "Pepe", "value": "Pepeyaya"}{"key": "PepeHug", "value": "KekeHug"}{"key": "Pepega", "value": "Kekega"}{"key": "Pepe", "value": "Keke"}
Code:
import os
import tempfile
import json
import sys
def create_json(path):
with open(path, mode='a', encoding='utf-8') as f:
json.dump([], f)
def add(key, value, path):
with open(path, mode='a', encoding='utf-8') as f:
entry = {'key': key, 'value': value}
json.dump(entry, f)
def read(a_key, path):
read_result = ""
with open(path) as f:
data = json.load(f)
print(data)
my_list = data
for i in my_list:
for key, value in i.items():
if key == a_key:
read_result += value + ", "
print(value)
def main():
storage_path = os.path.join(tempfile.gettempdir(), 'storage.json')
if sys.argv[1] == "--key":
arg_key = sys.argv[2]
if len(sys.argv) <= 3:
read(arg_key, storage_path)
elif sys.argv[3] == "--value":
arg_value = sys.argv[4]
add(arg_key, arg_value, storage_path)
else:
print("Введите верные аргументы")
else:
print("Введите верные аргументы")
if __name__ == '__main__':
main()
In general, from the attached code, now this error:
json.decoder.JSONDecodeError: Extra data: line 1 column 39 (char 38)
I need on request:
python storage.py --key Pepe
Get Pepe and PepeYaya values
this it's a basic storage method, this method is very bad for large json files but it's an example that show how can you do the job.
import os
import sys
import json
# storage.py key_name value
key =sys.argv[1]
value = sys.argv[2]
data_path = "data.json"
if os.path.isfile(data_path):
with open("data.json") as target:
json_data = json.load(target)
else:
json_data = {}
json_data[key] = value
with open("data.json", "w") as target:
json.dump(json_data, target)
in your case the problem is because the append flag when you open the file. If you need to write a new object you need to delete the last '}' of the json and add a ",object" item after that add the '}' char again.
I have a JSON file containing various objects each containing elements. With my python script, I only keep the objects I want, and then put the elements I want in a list. But the element has a prefix, which I'd like to suppress form the list.
The post-script JSON looks like that:
{
"ip_prefix": "184.72.128.0/17",
"region": "us-east-1",
"service": "EC2"
}
The "IP/mask" is what I'd like to keep. The List looks like that:
'"ip_prefix": "23.20.0.0/14",'
So what can I do to only keep "23.20.0.0/14" in the list?
Here is the code:
json_data = open(jsonsourcefile)
data = json.load(json_data)
print (destfile)
d=[]
for objects in (data['prefixes']):
if servicerequired in json.dumps(objects):
#print(json.dumps(objects, sort_keys=True, indent=4))
with open(destfile, 'a') as file:
file.write(json.dumps(objects, sort_keys=True, indent=4 ))
with open(destfile, 'r') as reads:
liste = list()
for strip in reads:
if "ip_prefix" in strip:
strip = strip.strip()
liste.append(strip)
print(liste)
Thanks,
dersoi
Ok so i've went through your JSON object
import json, urllib2
url = 'https://ip-ranges.amazonaws.com/ip-ranges.json'
req = urllib2.Request(url)
res = urllib2.urlopen(req)
j = json.load(res)
print j['prefixes'][0]['ip_prefix']
prefixes = j['prefixes']
for i in prefixes:
print i['ip_prefix']
the result:
>>>
23.20.0.0/14
23.20.0.0/14
27.0.0.0/22
43.250.192.0/24
43.250.193.0/24
46.51.128.0/18
46.51.192.0/20
46.51.216.0/21
46.51.224.0/19
etc...
So now you want all into one txt file right?
So you do this:
import json, urllib2
url = 'https://ip-ranges.amazonaws.com/ip-ranges.json'
req = urllib2.Request(url)
res = urllib2.urlopen(req)
j = json.load(res)
#print j['prefixes'][0]['ip_prefix']
prefixes = j['prefixes']
destfile = 'destfile.txt'
with open('destfile.txt', 'w') as f:
for i in prefixes:
#print i['ip_prefix']
f.write(i['ip_prefix'])
f.write('\n')
f.close
Best regards,
Rizzit
I've refactored your code, try this out:
import json
with open('sample.json', 'r') as data:
json_data = json.loads(data.read())
print json_data.get('ip_prefix')
# Output: "184.72.128.0/17"
You can rewrite the second open block as:
with open(destfile, 'r') as reads:
data = json.load(reads)
liste = [i['ip_prefix'] for i in data]
Although, I don't think you need to write to an intermediate file anyway, you could combine both blocks.