Related
I have a large JSON file that contains image annotation data. I am iterating through one of the keys below.:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data['annotations']:
if i['segmentation'] == [[]]:
print(i['segmentation'])
del i
#print(i['segmentation'])
# Closing file
f.close()
Printing the returned dictionaries, they look like this:
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
I am trying to remove the following above lines in the annotations key that contain no data for segmentation. I am able to extract these lines, I am just not sure how to remove them without breaking the format of the file.
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":340,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":341,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":342,"area":0},
...
Here is what finally got it working for me:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
count = 0
for key in data['annotations']:
count +=1
if key['segmentation'] == [[]]:
print(key['segmentation'])
data["annotations"].pop(count)
if key['bbox'] == []:
data["annotations"].pop(count)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)
The function json.loads() returns a python dictionary, which you can then modify as you'd like. Similarly json.dumps() can be used to write a json file from a python dictionary.
In order to remove an entry from a dictionary, you can use the dictionary pop() method. Assuming in the above you want to delete each entry referred to with the key i (as per the del i) if the entry in data["annotations"][i]["segmentation"] ==[[]], one could do it approximately as follows:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
for key in data['annotations']:
if data["annotations"][key]['segmentation'] == [[]]:
print(data["annotations"][key]['segmentation'])
data["annotations"].pop(key)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)
Is this what you wanted to do?
I'm trying to merge both json files but I'm trying to append timestamp from file2 to corresponding frame number in file1.please guide.
JSON_FILE1
{"frameNumber":1,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classifications":[]}]}
JSON_FILE2
{
"frame1": "0:0:0:66",
"frame2": "0:0:0:100",
"frame3": "0:0:0:133",
"frame4": "0:0:0:166",
"frame5": "0:0:0:200"
}
expected output:
{"frameNumber":1,"frame1": "0:0:0:66",,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2, "frame2": "0:0:0:10,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"frame3": "0:0:0:133,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"frame4": "0:0:0:166","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"frame5": "0:0:0:200","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classification
I tried this way but I am unable to achieve.
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f,"rb") as infile:
result.append(json.load(infile))
with open("merged_file.json","wb") as outfile:
json.dump(result,outfile)
A correct .json needs a pair of [] and than you could json.load it, iterate over ever line and do the same like below but anyway:
The easiest solution is turn every line in a dict, if the framenumber matches add the timestamp and write it back.
def fuse(file1, file2, nTargetPath):
with open(nTargetPath, "wb") as tTargetFile:
with open(file1, "rb") as tSourceFileA:
for tLineA in tSourceFileA.readlines():
tDictA = json.loads(tLineA) #loads dict from a string
tKey = "frame"+tDictA["frameNumber"] #searching the correct entry but why not name this timestampX
with open(file2, "rb") as tSourceFileB:
for tLineB in tSourceFileB.readlines():
tDictB = json.loads(tLineB )
if tKey in tDictB:
tDictA[tKey] = tDictB[tKey]
break #cause there is only one timestamp
tTargetFile.write(json.dumps(tDictA)+'\n')
This code cann easily updated by improve the file accessing for example when you know the key for the timestamp in file2 is everytime in the same row as in file1 and so on.
As was pointed out, one file is ndjson and the other file is json. You need to implement some logic to add the json to the ndjson
# https://pypi.org/project/ndjson/
# pip install ndjson
import ndjson
import json
with open('path/to/file/im_a_ndjson.ndjson') as infile:
ndjson_object = ndjson.load(infile)
with open('path/to/file/json_file2.json') as infile:
dict_object = json.load(infile)
print(type(ndjson_object[0]['frameNumber']))
# output: <class 'int'>
for key in dict_object:
# int needed as you can see above
framenumber = int(key.strip('frame'))
# find the matching ndjson object
for ndjs in ndjson_object:
if ndjs['frameNumber'] == framenumber:
# add the key/value pair
ndjs[key] = dict_object[key]
# we can break as we've found it
break
with open('path/to/file/new_ndjson.ndjson', 'w') as outfile:
ndjson.dump(ndjson_object, outfile)
I get stuck when I try to create a new dictionary with the result from each file.
Basically I have a bunch of files which I'm reading it using glob and json, so I managed to get the value from each file and it's working fine, it's displaying all files content with the different informations which is the expected and it's good.
But now I'm looking about how to create a new dictionary new_dictonary = {} #in my code using the variable I've got get_hostname, get_fs_type, get_uptime without overwrite the new dictionary, below is my code.
import json
import glob
test_inventory = glob.glob('inventory/facts_am4/*')
new_dictonary = {}
for files in test_inventory:
with open(files, 'r') as jsonfile:
myfile = json.load(jsonfile)
get_hostname = myfile['ansible_facts']['facter_networking']['fqdn']
get_fs_type = myfile['ansible_facts']['facter_filesystems']
get_uptime = myfile['ansible_facts']['facter_system_uptime']['uptime']
print('Hostname: ' + get_hostname)
print('FS Type:' + get_fs_type)
print('Uptime:' + get_uptime)
#Here I need something which you grab the variables and create a new dictionary.
#Without overwrite.
I really tried a lot of stuffs, I'm learning Python and I came here to kindly request you help.
You can either:
make a list of dictionaries, and add a new one to it for each file, or
make a nested dictionary, where each "info-dict" is keyed by the filename.
Using a list:
data_list = []
for filename in test_inventory:
with open(filename, 'r') as file_obj:
# read the data
data = {'Hostname': get_hostname,
'FS Type': get_fs_type,
'Uptime': get_uptime}
data_list.append(data)
# Now data_list has a list of all your data, accessible as data_list[0], [1], etc..
Using a dictionary:
data_dict = {}
for filename in test_inventory:
with open(filename, 'r') as file_obj:
# as above
data_dict[filename] = data
# Now data_dict has each file's data accessible as data_dict[filename]
I want to create a single file combining multiple python dictionaries about costal weather conditions (tides, wind, etc) that can be updated day to day. The data comes from multiple APIs and websites, each of which I convert to a python dictionary, and merge into a single dictionary using the following line of code:
OneDayWeather_data = {'Willydata' : Willydata, 'Bureau of Meteorology' : BoMdata, 'WeatherZone' : WZdata}
My goal is to sample the sites each day; and update a single file with each day's weather and forecast across the sites. I am thinking the best way to do this is to create a new top level to the hierarchy using the date. So it would be like something like:
Weather_data['18/07/2017']['Willy']['Winds']
Weather_data['18/07/2017']['BoMdata']['Winds']
For each day, I would then add a new top level entry for the new day's data, i.e.
AllWeatherData['19/07/2017']['Willy']['Winds']
I have tried this using a variety of methods suggested from stack overflow (Full disclosure: I'm pretty new to Python). For example,
# write the initial file
with open('test.json', 'w') as f:
json.dump(OneDayWeather_data, f)
# open the initial file and attempt to append
with open('test.json','r+') as f:
dic = dict(json.load(f))
dic.update(OneDayWeather_data)
json.dump(dic, f)
# reopen the appended file
with open('test.json', 'r') as f2:
json_object = json.load(f2)
...but I keep getting errors (in this case: ValueError(errmsg("Extra data", s, end, len(s))) when I try to reopen). Hoping someone w some expertise can weigh in on how to approach this problem.
Thanks!
You are actually appending the update dict to the existing dict
# write the initial file
import json
OneDayWeather_data = {'a':'b'}
with open('test.json', 'w') as f:
json.dump(OneDayWeather_data, f)
OneDayWeather_data = {'c':'d'}
# open the initial file and attempt to append
with open('test.json','r+') as f:
dic = dict(json.load(f))
dic.update(OneDayWeather_data)
json.dump(dic, f)
# reopen the appended file
with open('test.json', 'r') as f2:
json_object = json.load(f2)
At this stage, your test.json looks like
{"a": "b"}{"a": "b", "c": "d"}
You may separate read/update/write
with open('test.json','r') as f:
dic = dict(json.load(f))
dic.update(OneDayWeather_data)
with open('test.json', 'w') as f:
json.dump(dic, f)
Similar answer can be found in How to append in a json file in Python?
I'm trying to create a function that would add entries to a json file. Eventually, I want a file that looks like
[{"name" = "name1", "url" = "url1"}, {"name" = "name2", "url" = "url2"}]
etc. This is what I have:
def add(args):
with open(DATA_FILENAME, mode='r', encoding='utf-8') as feedsjson:
feeds = json.load(feedsjson)
with open(DATA_FILENAME, mode='w', encoding='utf-8') as feedsjson:
entry = {}
entry['name'] = args.name
entry['url'] = args.url
json.dump(entry, feedsjson)
This does create an entry such as {"name"="some name", "url"="some url"}. But, if I use this add function again, with different name and url, the first one gets overwritten. What do I need to do to get a second (third...) entry appended to the first one?
EDIT: The first answers and comments to this question have pointed out the obvious fact that I am not using feeds in the write block. I don't see how to do that, though. For example, the following apparently will not do:
with open(DATA_FILENAME, mode='a+', encoding='utf-8') as feedsjson:
feeds = json.load(feedsjson)
entry = {}
entry['name'] = args.name
entry['url'] = args.url
json.dump(entry, feeds)
json might not be the best choice for on-disk formats; The trouble it has with appending data is a good example of why this might be. Specifically, json objects have a syntax that means the whole object must be read and parsed in order to understand any part of it.
Fortunately, there are lots of other options. A particularly simple one is CSV; which is supported well by python's standard library. The biggest downside is that it only works well for text; it requires additional action on the part of the programmer to convert the values to numbers or other formats, if needed.
Another option which does not have this limitation is to use a sqlite database, which also has built-in support in python. This would probably be a bigger departure from the code you already have, but it more naturally supports the 'modify a little bit' model you are apparently trying to build.
You probably want to use a JSON list instead of a dictionary as the toplevel element.
So, initialize the file with an empty list:
with open(DATA_FILENAME, mode='w', encoding='utf-8') as f:
json.dump([], f)
Then, you can append new entries to this list:
with open(DATA_FILENAME, mode='w', encoding='utf-8') as feedsjson:
entry = {'name': args.name, 'url': args.url}
feeds.append(entry)
json.dump(feeds, feedsjson)
Note that this will be slow to execute because you will rewrite the full contents of the file every time you call add. If you are calling it in a loop, consider adding all the feeds to a list in advance, then writing the list out in one go.
Append entry to the file contents if file exists, otherwise append the entry to an empty list and write in in the file:
a = []
if not os.path.isfile(fname):
a.append(entry)
with open(fname, mode='w') as f:
f.write(json.dumps(a, indent=2))
else:
with open(fname) as feedsjson:
feeds = json.load(feedsjson)
feeds.append(entry)
with open(fname, mode='w') as f:
f.write(json.dumps(feeds, indent=2))
Using a instead of w should let you update the file instead of creating a new one/overwriting everything in the existing file.
See this answer for a difference in the modes.
One possible solution is do the concatenation manually, here is some useful
code:
import json
def append_to_json(_dict,path):
with open(path, 'ab+') as f:
f.seek(0,2) #Go to the end of file
if f.tell() == 0 : #Check if file is empty
f.write(json.dumps([_dict]).encode()) #If empty, write an array
else :
f.seek(-1,2)
f.truncate() #Remove the last character, open the array
f.write(' , '.encode()) #Write the separator
f.write(json.dumps(_dict).encode()) #Dump the dictionary
f.write(']'.encode()) #Close the array
You should be careful when editing the file outside the script not add any spacing at the end.
this, work for me :
with open('file.json', 'a') as outfile:
outfile.write(json.dumps(data))
outfile.write(",")
outfile.close()
I have some code which is similar, but does not rewrite the entire contents each time. This is meant to run periodically and append a JSON entry at the end of an array.
If the file doesn't exist yet, it creates it and dumps the JSON into an array. If the file has already been created, it goes to the end, replaces the ] with a , drops the new JSON object in, and then closes it up again with another ]
# Append JSON object to output file JSON array
fname = "somefile.txt"
if os.path.isfile(fname):
# File exists
with open(fname, 'a+') as outfile:
outfile.seek(-1, os.SEEK_END)
outfile.truncate()
outfile.write(',')
json.dump(data_dict, outfile)
outfile.write(']')
else:
# Create file
with open(fname, 'w') as outfile:
array = []
array.append(data_dict)
json.dump(array, outfile)
You aren't ever writing anything to do with the data you read in. Do you want to be adding the data structure in feeds to the new one you're creating?
Or perhaps you want to open the file in append mode open(filename, 'a') and then add your string, by writing the string produced by json.dumps instead of using json.dump - but nneonneo points out that this would be invalid json.
import jsonlines
object1 = {
"name": "name1",
"url": "url1"
}
object2 = {
"name": "name2",
"url": "url2"
}
# filename.jsonl is the name of the file
with jsonlines.open("filename.jsonl", "a") as writer: # for writing
writer.write(object1)
writer.write(object2)
with jsonlines.open('filename.jsonl') as reader: # for reading
for obj in reader:
print(obj)
visit for more info https://jsonlines.readthedocs.io/en/latest/
You can simply import the data from the source file, read it, and save what you want to append to a variable. Then open the destination file, assign the list data inside to a new variable (presumably this will all be valid JSON), then use the 'append' function on this list variable and append the first variable to it. Viola, you have appended to the JSON list. Now just overwrite your destination file with the newly appended list (as JSON).
The 'a' mode in your 'open' function will not work here because it will just tack everything on to the end of the file, which will make it non-valid JSON format.
let's say you have the following dicts
d1 = {'a': 'apple'}
d2 = {'b': 'banana'}
d3 = {'c': 'carrot'}
you can turn this into a combined json like this:
master_json = str(json.dumps(d1))[:-1]+', '+str(json.dumps(d2))[1:-1]+', '+str(json.dumps(d3))[1:]
therefore, code to append to a json file will look like below:
dict_list = [d1, d2, d3]
for i, d in enumerate(d_list):
if i == 0:
#first dict
start = str(json.dumps(d))[:-1]
with open(str_file_name, mode='w') as f:
f.write(start)
else:
with open(str_file_name, mode='a') as f:
if i != (len(dict_list) - 1):
#middle dicts
mid = ','+str(json.dumps(d))[1:-1]
f.write(mid)
else:
#last dict
end = ','+str(json.dumps(d))[1:]
f.write(end)