Merging 2 json files

Merging 2 json files - python

I'm trying to merge both json files but I'm trying to append timestamp from file2 to corresponding frame number in file1.please guide.
JSON_FILE1
{"frameNumber":1,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classifications":[]}]}
JSON_FILE2
{
"frame1": "0:0:0:66",
"frame2": "0:0:0:100",
"frame3": "0:0:0:133",
"frame4": "0:0:0:166",
"frame5": "0:0:0:200"
}
expected output:
{"frameNumber":1,"frame1": "0:0:0:66",,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2, "frame2": "0:0:0:10,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"frame3": "0:0:0:133,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"frame4": "0:0:0:166","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"frame5": "0:0:0:200","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classification
I tried this way but I am unable to achieve.
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f,"rb") as infile:
result.append(json.load(infile))
with open("merged_file.json","wb") as outfile:
json.dump(result,outfile)

A correct .json needs a pair of [] and than you could json.load it, iterate over ever line and do the same like below but anyway:
The easiest solution is turn every line in a dict, if the framenumber matches add the timestamp and write it back.
def fuse(file1, file2, nTargetPath):
with open(nTargetPath, "wb") as tTargetFile:
with open(file1, "rb") as tSourceFileA:
for tLineA in tSourceFileA.readlines():
tDictA = json.loads(tLineA) #loads dict from a string
tKey = "frame"+tDictA["frameNumber"] #searching the correct entry but why not name this timestampX
with open(file2, "rb") as tSourceFileB:
for tLineB in tSourceFileB.readlines():
tDictB = json.loads(tLineB )
if tKey in tDictB:
tDictA[tKey] = tDictB[tKey]
break #cause there is only one timestamp
tTargetFile.write(json.dumps(tDictA)+'\n')
This code cann easily updated by improve the file accessing for example when you know the key for the timestamp in file2 is everytime in the same row as in file1 and so on.

As was pointed out, one file is ndjson and the other file is json. You need to implement some logic to add the json to the ndjson
# https://pypi.org/project/ndjson/
# pip install ndjson
import ndjson
import json
with open('path/to/file/im_a_ndjson.ndjson') as infile:
ndjson_object = ndjson.load(infile)
with open('path/to/file/json_file2.json') as infile:
dict_object = json.load(infile)
print(type(ndjson_object[0]['frameNumber']))
# output: <class 'int'>
for key in dict_object:
# int needed as you can see above
framenumber = int(key.strip('frame'))
# find the matching ndjson object
for ndjs in ndjson_object:
if ndjs['frameNumber'] == framenumber:
# add the key/value pair
ndjs[key] = dict_object[key]
# we can break as we've found it
break
with open('path/to/file/new_ndjson.ndjson', 'w') as outfile:
ndjson.dump(ndjson_object, outfile)

Related

Deleting specific JSON lines while iterating thorugh key in Python

I have a large JSON file that contains image annotation data. I am iterating through one of the keys below.:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data['annotations']:
if i['segmentation'] == [[]]:
print(i['segmentation'])
del i
#print(i['segmentation'])
# Closing file
f.close()
Printing the returned dictionaries, they look like this:
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
I am trying to remove the following above lines in the annotations key that contain no data for segmentation. I am able to extract these lines, I am just not sure how to remove them without breaking the format of the file.
{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":339,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":340,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":341,"area":0}
,{"iscrowd":0,"image_id":32,"bbox":[],"segmentation":[[]],"category_id":2,"id":342,"area":0},
...
Here is what finally got it working for me:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
count = 0
for key in data['annotations']:
count +=1
if key['segmentation'] == [[]]:
print(key['segmentation'])
data["annotations"].pop(count)
if key['bbox'] == []:
data["annotations"].pop(count)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)

The function json.loads() returns a python dictionary, which you can then modify as you'd like. Similarly json.dumps() can be used to write a json file from a python dictionary.
In order to remove an entry from a dictionary, you can use the dictionary pop() method. Assuming in the above you want to delete each entry referred to with the key i (as per the del i) if the entry in data["annotations"][i]["segmentation"] ==[[]], one could do it approximately as follows:
import json
# Opening JSON file
f = open('annotations.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Closing file
f.close()
# Iterating through the json
# list
for key in data['annotations']:
if data["annotations"][key]['segmentation'] == [[]]:
print(data["annotations"][key]['segmentation'])
data["annotations"].pop(key)
#print(i['segmentation'])
with open("newannotations.json", "w") as json_file:
json.dump(data, json_file)
Is this what you wanted to do?

How to retrieve "values" against specific "key" in nested dictionary

I have a json file that looks like this JSON_FILE:
It contains nested dictionary. I want to retrieve the key annotations(appears one time in file). Specifically all the values against key image_id(appears many times in file) and store it in a separate file. How do I do it in PYTHON

I have been able to resolve this
import json
with open("myfile.json") as f:
data_retreived= json.load(f)
a=data_retreived["annotations"]
myfile = open('data.txt', 'w')
for f in a:
myfile.write("%s\n" % f['image_id'])
#print(f['image_id'])
myfile.close()

How to dump Python data into already made JSON folder?

I started to learn more about json and dumping data into a json file with python. My question is:
I made two json files not with python code (with open("randomFile.json") but I made the files 'with hand' and I stored one value to each file and loaded those values into separate variables.. Now I modified the variables(values) from json file inside a program and now I want to store or replace the old values with the new modified values inside these already made json files.. I hope this makes sense. Here is a simple example code..
import json
f1 = open('number1.json')
number1 = json.load(f1)
f2 = open('number2.json')
number2 = json.load(f2)
number1 += 10
number2 += 5
So variables number1 and number2 got the values from json file now I want to store these new values into the same json file..

Suppose you have the following valid json file called number1.json:
{
"field": 1
}
Then, you can update it with the following code. Just do the same thing for number2.json:
import json
import os
file = 'number1.json'
with open(file, 'r+') as f:
data = json.load(f)
data['field'] += 10
os.remove(file)
with open(file, 'w') as f:
json.dump(data, f, indent=4)
EDIT: As mentioned in the comments, in this simple case, you can also modify without removing the file:
import json
import os
file = 'number1.json'
with open(file, 'r+') as f:
data = json.load(f)
data['field'] += 10
f.seek(0)
json.dump(data, f, indent=4)

How to set Chrome flags using terminal in Mac? [duplicate]

I'm trying to create a function that would add entries to a json file. Eventually, I want a file that looks like
[{"name" = "name1", "url" = "url1"}, {"name" = "name2", "url" = "url2"}]
etc. This is what I have:
def add(args):
with open(DATA_FILENAME, mode='r', encoding='utf-8') as feedsjson:
feeds = json.load(feedsjson)
with open(DATA_FILENAME, mode='w', encoding='utf-8') as feedsjson:
entry = {}
entry['name'] = args.name
entry['url'] = args.url
json.dump(entry, feedsjson)
This does create an entry such as {"name"="some name", "url"="some url"}. But, if I use this add function again, with different name and url, the first one gets overwritten. What do I need to do to get a second (third...) entry appended to the first one?
EDIT: The first answers and comments to this question have pointed out the obvious fact that I am not using feeds in the write block. I don't see how to do that, though. For example, the following apparently will not do:
with open(DATA_FILENAME, mode='a+', encoding='utf-8') as feedsjson:
feeds = json.load(feedsjson)
entry = {}
entry['name'] = args.name
entry['url'] = args.url
json.dump(entry, feeds)

json might not be the best choice for on-disk formats; The trouble it has with appending data is a good example of why this might be. Specifically, json objects have a syntax that means the whole object must be read and parsed in order to understand any part of it.
Fortunately, there are lots of other options. A particularly simple one is CSV; which is supported well by python's standard library. The biggest downside is that it only works well for text; it requires additional action on the part of the programmer to convert the values to numbers or other formats, if needed.
Another option which does not have this limitation is to use a sqlite database, which also has built-in support in python. This would probably be a bigger departure from the code you already have, but it more naturally supports the 'modify a little bit' model you are apparently trying to build.

You probably want to use a JSON list instead of a dictionary as the toplevel element.
So, initialize the file with an empty list:
with open(DATA_FILENAME, mode='w', encoding='utf-8') as f:
json.dump([], f)
Then, you can append new entries to this list:
with open(DATA_FILENAME, mode='w', encoding='utf-8') as feedsjson:
entry = {'name': args.name, 'url': args.url}
feeds.append(entry)
json.dump(feeds, feedsjson)
Note that this will be slow to execute because you will rewrite the full contents of the file every time you call add. If you are calling it in a loop, consider adding all the feeds to a list in advance, then writing the list out in one go.

Append entry to the file contents if file exists, otherwise append the entry to an empty list and write in in the file:
a = []
if not os.path.isfile(fname):
a.append(entry)
with open(fname, mode='w') as f:
f.write(json.dumps(a, indent=2))
else:
with open(fname) as feedsjson:
feeds = json.load(feedsjson)
feeds.append(entry)
with open(fname, mode='w') as f:
f.write(json.dumps(feeds, indent=2))

Using a instead of w should let you update the file instead of creating a new one/overwriting everything in the existing file.
See this answer for a difference in the modes.

One possible solution is do the concatenation manually, here is some useful
code:
import json
def append_to_json(_dict,path):
with open(path, 'ab+') as f:
f.seek(0,2) #Go to the end of file
if f.tell() == 0 : #Check if file is empty
f.write(json.dumps([_dict]).encode()) #If empty, write an array
else :
f.seek(-1,2)
f.truncate() #Remove the last character, open the array
f.write(' , '.encode()) #Write the separator
f.write(json.dumps(_dict).encode()) #Dump the dictionary
f.write(']'.encode()) #Close the array
You should be careful when editing the file outside the script not add any spacing at the end.

this, work for me :
with open('file.json', 'a') as outfile:
outfile.write(json.dumps(data))
outfile.write(",")
outfile.close()

I have some code which is similar, but does not rewrite the entire contents each time. This is meant to run periodically and append a JSON entry at the end of an array.
If the file doesn't exist yet, it creates it and dumps the JSON into an array. If the file has already been created, it goes to the end, replaces the ] with a , drops the new JSON object in, and then closes it up again with another ]
# Append JSON object to output file JSON array
fname = "somefile.txt"
if os.path.isfile(fname):
# File exists
with open(fname, 'a+') as outfile:
outfile.seek(-1, os.SEEK_END)
outfile.truncate()
outfile.write(',')
json.dump(data_dict, outfile)
outfile.write(']')
else:
# Create file
with open(fname, 'w') as outfile:
array = []
array.append(data_dict)
json.dump(array, outfile)

You aren't ever writing anything to do with the data you read in. Do you want to be adding the data structure in feeds to the new one you're creating?
Or perhaps you want to open the file in append mode open(filename, 'a') and then add your string, by writing the string produced by json.dumps instead of using json.dump - but nneonneo points out that this would be invalid json.

import jsonlines
object1 = {
"name": "name1",
"url": "url1"
}
object2 = {
"name": "name2",
"url": "url2"
}
# filename.jsonl is the name of the file
with jsonlines.open("filename.jsonl", "a") as writer: # for writing
writer.write(object1)
writer.write(object2)
with jsonlines.open('filename.jsonl') as reader: # for reading
for obj in reader:
print(obj)
visit for more info https://jsonlines.readthedocs.io/en/latest/

You can simply import the data from the source file, read it, and save what you want to append to a variable. Then open the destination file, assign the list data inside to a new variable (presumably this will all be valid JSON), then use the 'append' function on this list variable and append the first variable to it. Viola, you have appended to the JSON list. Now just overwrite your destination file with the newly appended list (as JSON).
The 'a' mode in your 'open' function will not work here because it will just tack everything on to the end of the file, which will make it non-valid JSON format.

let's say you have the following dicts
d1 = {'a': 'apple'}
d2 = {'b': 'banana'}
d3 = {'c': 'carrot'}
you can turn this into a combined json like this:
master_json = str(json.dumps(d1))[:-1]+', '+str(json.dumps(d2))[1:-1]+', '+str(json.dumps(d3))[1:]
therefore, code to append to a json file will look like below:
dict_list = [d1, d2, d3]
for i, d in enumerate(d_list):
if i == 0:
#first dict
start = str(json.dumps(d))[:-1]
with open(str_file_name, mode='w') as f:
f.write(start)
else:
with open(str_file_name, mode='a') as f:
if i != (len(dict_list) - 1):
#middle dicts
mid = ','+str(json.dumps(d))[1:-1]
f.write(mid)
else:
#last dict
end = ','+str(json.dumps(d))[1:]
f.write(end)

How to save a dictionary to a file?

I have problem with changing a dict value and saving the dict to a text file (the format must be same), I only want to change the member_phone field.
My text file is the following format:
memberID:member_name:member_email:member_phone
and I split the text file with:
mdict={}
for line in file:
x=line.split(':')
a=x[0]
b=x[1]
c=x[2]
d=x[3]
e=b+':'+c+':'+d
mdict[a]=e
When I try change the member_phone stored in d, the value has changed not flow by the key,
def change(mdict,b,c,d,e):
a=input('ID')
if a in mdict:
d= str(input('phone'))
mdict[a]=b+':'+c+':'+d
else:
print('not')
and how to save the dict to a text file with same format?

Python has the pickle module just for this kind of thing.
These functions are all that you need for saving and loading almost any object:
import pickle
with open('saved_dictionary.pkl', 'wb') as f:
pickle.dump(dictionary, f)
with open('saved_dictionary.pkl', 'rb') as f:
loaded_dict = pickle.load(f)
In order to save collections of Python there is the shelve module.

Pickle is probably the best option, but in case anyone wonders how to save and load a dictionary to a file using NumPy:
import numpy as np
# Save
dictionary = {'hello':'world'}
np.save('my_file.npy', dictionary)
# Load
read_dictionary = np.load('my_file.npy',allow_pickle='TRUE').item()
print(read_dictionary['hello']) # displays "world"
FYI: NPY file viewer

We can also use the json module in the case when dictionaries or some other data can be easily mapped to JSON format.
import json
# Serialize data into file:
json.dump( data, open( "file_name.json", 'w' ) )
# Read data from file:
data = json.load( open( "file_name.json" ) )
This solution brings many benefits, eg works for Python 2.x and Python 3.x in an unchanged form and in addition, data saved in JSON format can be easily transferred between many different platforms or programs. This data are also human-readable.

Save and load dict to file:
def save_dict_to_file(dic):
f = open('dict.txt','w')
f.write(str(dic))
f.close()
def load_dict_from_file():
f = open('dict.txt','r')
data=f.read()
f.close()
return eval(data)

As Pickle has some security concerns and is slow (source), I would go for JSON, as it is fast, built-in, human-readable, and interchangeable:
import json
data = {'another_dict': {'a': 0, 'b': 1}, 'a_list': [0, 1, 2, 3]}
# e.g. file = './data.json'
with open(file, 'w') as f:
json.dump(data, f)
Reading is similar easy:
with open(file, 'r') as f:
data = json.load(f)
This is similar to this answer, but implements the file handling correctly.
If the performance improvement is still not enough, I highly recommend orjson, fast, correct JSON library for Python build upon Rust.

I'm not sure what your first question is, but if you want to save a dictionary to file you should use the json library. Look up the documentation of the loads and puts functions.

I would suggest saving your data using the JSON format instead of pickle format as JSON's files are human-readable which makes your debugging easier since your data is small. JSON files are also used by other programs to read and write data. You can read more about it here
You'll need to install the JSON module, you can do so with pip:
pip install json
# To save the dictionary into a file:
json.dump( data, open( "myfile.json", 'w' ) )
This creates a json file with the name myfile.
# To read data from file:
data = json.load( open( "myfile.json" ) )
This reads and stores the myfile.json data in a data object.

For a dictionary of strings such as the one you're dealing with, it could be done using only Python's built-in text processing capabilities.
(Note this wouldn't work if the values are something else.)
with open('members.txt') as file:
mdict={}
for line in file:
a, b, c, d = line.strip().split(':')
mdict[a] = b + ':' + c + ':' + d
a = input('ID: ')
if a not in mdict:
print('ID {} not found'.format(a))
else:
b, c, d = mdict[a].split(':')
d = input('phone: ')
mdict[a] = b + ':' + c + ':' + d # update entry
with open('members.txt', 'w') as file: # rewrite file
for id, values in mdict.items():
file.write(':'.join([id] + values.split(':')) + '\n')

I like using the pretty print module to store the dict in a very user-friendly readable form:
import pprint
def store_dict(fname, dic):
with open(fname, "w") as f:
f.write(pprint.pformat(dic, indent=4, sort_dicts=False))
# note some of the defaults are: indent=1, sort_dicts=True
Then, when recovering, read in the text file and eval() it to turn the string back into a dict:
def load_file(fname):
try:
with open(fname, "r") as f:
dic = eval(f.read())
except:
dic = {}
return dic

Unless you really want to keep the dictionary, I think the best solution is to use the csv Python module to read the file.
Then, you get rows of data and you can change member_phone or whatever you want ;
finally, you can use the csv module again to save the file in the same format
as you opened it.
Code for reading:
import csv
with open("my_input_file.txt", "r") as f:
reader = csv.reader(f, delimiter=":")
lines = list(reader)
Code for writing:
with open("my_output_file.txt", "w") as f:
writer = csv.writer(f, delimiter=":")
writer.writerows(lines)
Of course, you need to adapt your change() function:
def change(lines):
a = input('ID')
for line in lines:
if line[0] == a:
d=str(input("phone"))
line[3]=d
break
else:
print "not"

I haven't timed it but I bet h5 is faster than pickle; the filesize with compression is almost certainly smaller.
import deepdish as dd
dd.io.save(filename, {'dict1': dict1, 'dict2': dict2}, compression=('blosc', 9))

file_name = open("data.json", "w")
json.dump(test_response, file_name)
file_name.close()
or use context manager, which is better:
with open("data.json", "w") as file_name:
json.dump(test_response, file_name)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Merging 2 json files - python

Related

Deleting specific JSON lines while iterating thorugh key in Python

How to retrieve "values" against specific "key" in nested dictionary

How to dump Python data into already made JSON folder?

How to set Chrome flags using terminal in Mac? [duplicate]

How to save a dictionary to a file?

Categories

Resources