Deleting specific JSON lines while iterating thorugh key in Python - python

I have a large JSON file that contains image annotation data. I am iterating through one of the keys below.:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data['annotations']:
if i['segmentation'] == [[]]:
print(i['segmentation'])
del i
#print(i['segmentation'])
# Closing file
f.close()
Printing the returned dictionaries, they look like this:
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
I am trying to remove the following above lines in the annotations key that contain no data for segmentation. I am able to extract these lines, I am just not sure how to remove them without breaking the format of the file.
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":340,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":341,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":342,"area":0},
...
Here is what finally got it working for me:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
count = 0
for key in data['annotations']:
count +=1
if key['segmentation'] == [[]]:
print(key['segmentation'])
data["annotations"].pop(count)
if key['bbox'] == []:
data["annotations"].pop(count)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)

The function json.loads() returns a python dictionary, which you can then modify as you'd like. Similarly json.dumps() can be used to write a json file from a python dictionary.
In order to remove an entry from a dictionary, you can use the dictionary pop() method. Assuming in the above you want to delete each entry referred to with the key i (as per the del i) if the entry in data["annotations"][i]["segmentation"] ==[[]], one could do it approximately as follows:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
for key in data['annotations']:
if data["annotations"][key]['segmentation'] == [[]]:
print(data["annotations"][key]['segmentation'])
data["annotations"].pop(key)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)
Is this what you wanted to do?

Related

How to retrieve "values" against specific "key" in nested dictionary

I have a json file that looks like this JSON_FILE:
It contains nested dictionary. I want to retrieve the key annotations(appears one time in file). Specifically all the values against key image_id(appears many times in file) and store it in a separate file. How do I do it in PYTHON
I have been able to resolve this
import json
with open("myfile.json") as f:
data_retreived= json.load(f)
a=data_retreived["annotations"]
myfile = open('data.txt', 'w')
for f in a:
myfile.write("%s\n" % f['image_id'])
#print(f['image_id'])
myfile.close()

Merging 2 json files

I'm trying to merge both json files but I'm trying to append timestamp from file2 to corresponding frame number in file1.please guide.
JSON_FILE1
{"frameNumber":1,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classifications":[]}]}
JSON_FILE2
{
"frame1": "0:0:0:66",
"frame2": "0:0:0:100",
"frame3": "0:0:0:133",
"frame4": "0:0:0:166",
"frame5": "0:0:0:200"
}
expected output:
{"frameNumber":1,"frame1": "0:0:0:66",,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2, "frame2": "0:0:0:10,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"frame3": "0:0:0:133,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"frame4": "0:0:0:166","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"frame5": "0:0:0:200","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classification
I tried this way but I am unable to achieve.
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f,"rb") as infile:
result.append(json.load(infile))
with open("merged_file.json","wb") as outfile:
json.dump(result,outfile)
A correct .json needs a pair of [] and than you could json.load it, iterate over ever line and do the same like below but anyway:
The easiest solution is turn every line in a dict, if the framenumber matches add the timestamp and write it back.
def fuse(file1, file2, nTargetPath):
with open(nTargetPath, "wb") as tTargetFile:
with open(file1, "rb") as tSourceFileA:
for tLineA in tSourceFileA.readlines():
tDictA = json.loads(tLineA) #loads dict from a string
tKey = "frame"+tDictA["frameNumber"] #searching the correct entry but why not name this timestampX
with open(file2, "rb") as tSourceFileB:
for tLineB in tSourceFileB.readlines():
tDictB = json.loads(tLineB )
if tKey in tDictB:
tDictA[tKey] = tDictB[tKey]
break #cause there is only one timestamp
tTargetFile.write(json.dumps(tDictA)+'\n')
This code cann easily updated by improve the file accessing for example when you know the key for the timestamp in file2 is everytime in the same row as in file1 and so on.
As was pointed out, one file is ndjson and the other file is json. You need to implement some logic to add the json to the ndjson
# https://pypi.org/project/ndjson/
# pip install ndjson
import ndjson
import json
with open('path/to/file/im_a_ndjson.ndjson') as infile:
ndjson_object = ndjson.load(infile)
with open('path/to/file/json_file2.json') as infile:
dict_object = json.load(infile)
print(type(ndjson_object[0]['frameNumber']))
# output: <class 'int'>
for key in dict_object:
# int needed as you can see above
framenumber = int(key.strip('frame'))
# find the matching ndjson object
for ndjs in ndjson_object:
if ndjs['frameNumber'] == framenumber:
# add the key/value pair
ndjs[key] = dict_object[key]
# we can break as we've found it
break
with open('path/to/file/new_ndjson.ndjson', 'w') as outfile:
ndjson.dump(ndjson_object, outfile)

Accessing items in a dump of dictionary objects in Python

I have a strange dataset from our customer. It is a .json file but inside it looks like below
{"a":"aaa","b":"bbb","text":"hello"}
{"a":"aaa","b":"bbb","text":"hi"}
{"a":"aaa","b":"bbb","text":"hihi"}
As you notice, this is just a dump of dictionary objects. It is neither a list (no [] and comma seperator between objects) nor a proper JSON although the file extension is .json. So I am really confused about how to read this file.
All I care about is reading all the text keys from each of the dictionary objects.
This "strange dataset" is actually an existing format that builds upon JSON, called JSONL.
As #user655321 said, you can parse each line. Here's a more complete example with the complete dataset available in the list of dicts dataset:
import json
dataset = []
with open("my_file.json") as file:
for line in file:
dataset.append(json.loads(line))
In [51]: [json.loads(i)["text"] for i in open("file.json").readlines()]
Out[51]: ['hello', 'hi', 'hihi']
Use list comprehension, it's easier
You can read it line by line and convert the lines to JSON objects and extract the needed data text in your case.
You can do something as follows:
import json
lines = open("file.txt").readlines()
for line in lines:
dictionary = json.loads(line)
print(dictionary["text"])
Since it's not a single JSON file, you can read in the input line by line and deserialize them independently:
import json
with open('my_file.json') as fh:
for line in fh:
json_obj = json.loads(line)
keys = json_obj.keys() # eg, 'a', 'b', 'text'
text_val = json_obj['text'] # eg, 'hello', 'hi', or 'hihi'
How about splitting the content by \n then using json to load each dictionary? something like:
import json
with open(your_file) as f:
data = f.read()
my_dicts = []
for line in data.split():
my_dicts.append(json.loads(line))
import ast
with open('my_file.json') as fh:
for line in fh:
try:
dict_data = ast.literal_eval(line)
assert isinstance(dict_data,dict)
### Process Dictionary Data here or append to list to convert to list of dicts
except (SyntaxError, ValueError, AssertionError):
print('ERROR - {} is not a dictionary'.format(line))

Creating runtime variable in python to fetch data from dictionary object

I have created dictionary object my parsing a json file in python....lets assume the data is as follows
plants = {}
# Add three key-value tuples to the dictionary.
plants["radish"] = {"color":"red", "length":4}
plants["apple"] = {"smell":"sweet", "season":"winter"}
plants["carrot"] = {"use":"medicine", "juice":"sour"}
This could be a very long dictionary object
But at runtime, I need only few values to be stored in a commaa delimited csv file.....The list of properties desired is in a file....
e.g
radish.color
carrot.juice
So, how would I create in python an app, where I can created dynamic variables such as below to get data of the json object & create a csv file....
at runtime i need variable
plants[radish][color]
plants[carrot][juice]
Thank you to all who help
Sanjay
Consider parsing the text file line by line to retrieve file contents. In the read, split the line by period which denotes the keys of dictionaries. From there, use such a list of keys to retrieve dictionary values. Then, iteratively output values to csv, conditioned by number of items:
Txt file
radish.color
carrot.juice
Python code
import csv
plants = {}
plants["radish"] = {"color":"red", "length":4}
plants["apple"] = {"smell":"sweet", "season":"winter"}
plants["carrot"] = {"use":"medicine", "juice":"sour"}
data = []
with open("Input.txt", "r") as f:
for line in f:
data.append(line.replace("\n", "").strip().split("."))
with open("Output.csv", "w") as w:
writer = csv.writer(w, lineterminator = '\n')
for item in data:
if len(item) == 2: # ONE-NEST DEEP
writer.writerow([item[0], item[1], plants[item[0]][item[1]]])
if len(item) == 3: # SECOND NEST DEEP
writer.writerow([item[0], item[1], item[2], plants[item[0]][item[1]][item[2]]])
Output csv
radish,color,red
carrot,juice,sour
(Note: the deeper the nest, the more columns will output conflicting with key/value pairs across columns -maybe output different structured csv files like one-level files/second-level files)

Extracting value data from multiple JSON strings in a single file

I know I am missing the obvious here but I have the following PYTHON code in which I am trying to-
Take a specified JSON file containing multiple strings as an input.
Start at the line 1 and look for the key value of "content_text"
Add the key value to a new dictionary and write said dictionary to a new file
Repeat 1-3 on additional JSON files
import json
def OpenJsonFileAndPullData (JsonFileName, JsonOutputFileName):
output_file=open(JsonOutputFileName, 'w')
result = []
with open(JsonFileName, 'r') as InputFile:
for line in InputFile:
Item=json.loads(line)
my_dict={}
print item
my_dict['Post Content']=item.get('content_text')
my_dict['Type of Post']=item.get('content_type')
print my_dict
result.append(my_dict)
json.dumps(result, output_file)
OpenJsonFileAndPullData ('MyInput.json', 'MyOutput.txt')
However, when run I receive this error:
AttributeError: 'str' object has no attribute 'get'
Python is case-sensitive.
Item = json.loads(line) # variable "Item"
my_dict['Post Content'] = item.get('content_text') # another variable "item"
By the way, why don't you load whole file as json at once?

Categories