I am working on a project where I need to use US States Zip Code Data. I want to merge two geojson files while preserving the data in those files. geojson-merge https://github.com/mapbox/geojson-merge does this but I am hoping for a python based solution.
Each state has a separate *.json file. For example:
mt_montana_zip_codes_geo.min.json
nd_north_dakota_zip_codes_geo.min.json
import json
nd_boundary_file = r"C:\Data_ZipCodes_States\State-zip-code-GeoJSON-master" \
r"\nd_north_dakota_zip_codes_geo.min.json"
with open(nd_boundary_file, 'r') as f:
nd_zipcode_boundary = json.load(f)
mt_boundary_file = r"C:\\Data_ZipCodes_States\State-zip-code-GeoJSON-master" \
r"\mt_montana_zip_codes_geo.min.json"
with open(mt_boundary_file, 'r') as f:
mt_zipcode_boundary = json.load(f)
#This overwrote the mt_zipcode_boundary with the nd_zipcode_boundary into merged
#merged = {**mt_zipcode_boundary, **nd_zipcode_boundary}
#produced a file with two json objects one 'mt' and the other 'nd'
data = {'mt': mt_zipcode_boundary, 'nd':nd_zipcode_boundary}
#Also overwrote mt_zipcode_boundary
mt_zipcode_boundary.update(nd_zipcode_boundary)
How would I write code to combine these two geojson files into a single file?
What about something like this?
import json
fc = {
'type': 'FeatureCollection',
'features': []
}
with open("mt_montana_zip_codes_geo.min.json") as json_file:
obj = json.load(json_file)
fc['features'].extend(obj['features'])
with open("nd_north_dakota_zip_codes_geo.min.json") as json_file:
obj = json.load(json_file)
fc['features'].extend(obj['features'])
with open("merged.json", "w") as outfile:
json.dump(fc, outfile)
Related
I am able to do with two files, I am not sure if I can use any library to find common data in multiple json file.
import json
with open("data/file1.json", "r") as f1:
file1 = json.loads(f1.read())
with open("data/file2.json", "r") as f2:
file2 = json.loads(f2.read())
for item in file2:
if item in file1:
print(f"Found common: {item}")
file1.append(item)
print(f"New file1: {file1}")
Shared Items
def shared_items(dict_1, dict_2):
return {
key: dict_1[key]
for key in dict_1
if key in dict_2 and \
dict_1[key] == dict_2[key]
}
Your Code
import json
with open("data/file1.json", "r") as f1:
file1 = json.loads(f1.read())
with open("data/file2.json", "r") as f2:
file2 = json.loads(f2.read())
# Load Function
with open("data/shared.json", "w", encoding="utf-8") as file:
shared = json.dumps(shared_items(file1, file2), indent=2, ensure_ascii=False, sort_keys=False)
file.write(shared)
print(f"{shared}\n\n\tdata/shared.json Saved!")
Differences
DeepDiff
This is where deepdiff comes in handy. Deepdiff is a powerful python library to compare 2 dictionaries. What makes it powerful is that, during the comparison, deepdiff does not consider the order in which the elements inside the dictionaries are present. Hence, solves the problem.
Let’s see deepdiff in action
# Load Datas
from json import loads
with open("OLD.json", "r") as file:
old_json = loads(file.read()) # load from old data
with open("NEW.json", "r") as file:
new_json = loads(file.read()) # load from new data
# Magic Import
from deepdiff import DeepDiff
differences = DeepDiff(old_json, new_json, ignore_order=True) # compare the dictionaries
## if there is any difference, the diff will not be empty.
# Pretty Print
from rich import print
print(differences)
Simple solution
import json
file1 = open("test.json","r") #opening first file with read permission
file2 = open("test1.json","r") #opening second file with read permission
#loading json files as dictionary objects
object1 = json.load(file1)
object2 = json.load(file2)
final_dict = {} #to store common elements
commonKeys = list(set(object1.keys()).intersection(object2.keys())) #finding common keys
for key in commonKeys:
if object1[key] == object2[key]: #if key,val matches in both json file
final_dict[key] = object1[key]
outputfile = open("output.json","w") #opening output file with write operation
json.dump(final_dict, outputfile) #saving final_disct to a json file
How to create a null json file and append each details to the json file in the following format
[
{"name":"alan","job":"clerk"},
{"name":"bob","job":"engineer"}
]
Code
import json
with open("test.json", mode='w', encoding='utf-8') as f:
json.dump([], f)
test_data = ['{"name":"alan","job":"clerk"}','{"name":"bob","job":"engineer"}']
for i in test_data:
with open("test.json", mode='w', encoding='utf-8') as fileobj:
json.dump(i, fileobj)
How this can be efficiently done
You can't modify the json content like that. You'll need to modify the data structure and then completely rewrite the json file. You might be able to just read the data from jsone at startup, and write it at shutdown.
import json
def store_my_data(data, filename='test.json'):
""" write data to json file """
with open(filename, mode='w', encoding='utf-8') as f:
json.dump(data, f)
def load_my_data(filename='test.json'):
""" load data from json file """
with open(filename, mode='r', encoding='utf-8') as f:
return json.load(f)
raise Exception # skipping some steps here
test_data = [
{"name": "alan", "job": "clerk"},
{"name": "bob", "job": "engineer"}
]
item_one = test_data[0]
item_two = test_data[1]
# You already know how to store data in a json file.
store_my_data(test_data)
# Suppose you don't have any data at the start.
current_data = []
store_my_data(current_data)
# Later, you want to add to the data.
# You will have to change your data in memory,
# then completely rewrite the file.
current_data.append(item_one)
current_data.append(item_two)
store_my_data(current_data)
I'm building a real-time Twitter sentiment analysis web using Python. I want the results of the analysis to be stored in a json format file to be used as historical data for each search that was carried out. How can I overwrite all the search data in one file?
The data was originally stored in the Pandas dataframe on a temporary basis, so I converted it to an array in json
headings = ("Tweet", "Sentimen")
data = list(zip(tweets['tweet_text'], sentiment))
df = pd.DataFrame(data, columns=['Tweet', 'Sentimen'])
df.to_json(r'Export_DataFrame6.json', orient='records', indent=4)
a_file = open("Export_DataFrame6.json", "r")
json_object = json.load(a_file)
d = json_object[0]
d['Tweet'] = "Testing"
d['Sentimen'] = "Negative"
a_file = open("Export_DataFrame6.json", "w")
json.dump(json_object, a_file)
a_file.close()
update code:
headings = ("Tweet", "Sentimen")
data = list(zip(tweets['tweet_text'], sentiment))
df = pd.DataFrame(data, columns=['Tweet', 'Sentimen'])
df.to_json(r'Export_DataFrame6.json', orient='records', indent=4)
a_file = open("Export_DataFrame6.json", "r")
json_object = json.load(a_file)
a_file.close()
d = json_object[0]
d['Tweet'] = tweets['tweet_text']
d['Sentimen'] = sentiment
a_file = open("Export_DataFrame6.json", "w")
json.dump(json_object, a_file)
a_file.close()
error: TypeError: Object of type Series is not JSON serializable
I think you are getting stuck on this line:
d = json_object[0]
because you think you have a json object but don't, and when try to use [0] you get the error. Without seeing the json file, it's a rough guess.
can you open your file this way?
with open('Export_DataFrame6.json', 'rb') as f:
json_object = f.read().decode('utf-8')
and then try
d = json_object[0]
or
d = json.loads(json_object)[0]
I have an unpickle function which returns a dict as:
def unpickle(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
and a function which reads pickled object with fieldnames (Don't know if this is the correct definiton):
def do_sth():
all_data = unpickle('mypickle.pickle')
image_filenames = all_data["Filenames"]
conditions = all_data["Labels"]
I have two lists as Filenames = ['001.png','002.png'] and Labels = ['0','1'] for brevity, that I need to pickle and save under mypickle.pickle so I can call them under the do_sth function. Till now what I did is:
data = [Filenames,Labels]
with open("mypickle.pickle", "wb") as f:
pickle.dump(data, f)
and
data = dict(zip(file_paths, labels))
with open("mypickle.pickle", "wb") as f:
pickle.dump(data, f)
But I'm getting KeyError :'Filenames'. Which structure shall I use to save these 2 lists so they may work properly.
Thanks.
Change your function to this
def do_sth():
all_data = unpickle('mypickle.pickle')
image_filenames = all_data[0]
conditions = all_data[1]
Explanation
You saved pickle as list. When you load the pickle it is still a list.
or
Actually save it as a dict
data = {"Filenames": Filenames, "Labels": Labels}
with open("mypickle.pickle", "wb") as f:
pickle.dump(data, f)
Hi I am trying to take the data from a json file and insert and id then perform POST REST.
my file data.json has:
{
'name':'myname'
}
and I would like to add an id so that the json data looks like:
{
'id': 134,
'name': 'myname'
}
So I tried:
import json
f = open("data.json","r")
data = f.read()
jsonObj = json.loads(data)
I can't get to load the json format file.
What should I do so that I can convert the json file into json object and add another id value.
Set item using data['id'] = ....
import json
with open('data.json', 'r+') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
f.seek(0) # <--- should reset file position to the beginning.
json.dump(data, f, indent=4)
f.truncate() # remove remaining part
falsetru's solution is nice, but has a little bug:
Suppose original 'id' length was larger than 5 characters. When we then dump with the new 'id' (134 with only 3 characters) the length of the string being written from position 0 in file is shorter than the original length. Extra chars (such as '}') left in file from the original content.
I solved that by replacing the original file.
import json
import os
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
os.remove(filename)
with open(filename, 'w') as f:
json.dump(data, f, indent=4)
I would like to present a modified version of Vadim's solution. It helps to deal with asynchronous requests to write/modify json file. I know it wasn't a part of the original question but might be helpful for others.
In case of asynchronous file modification os.remove(filename) will raise FileNotFoundError if requests emerge frequently. To overcome this problem you can create temporary file with modified content and then rename it simultaneously replacing old version. This solution works fine both for synchronous and asynchronous cases.
import os, json, uuid
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
# add, remove, modify content
# create randomly named temporary file to avoid
# interference with other thread/asynchronous request
tempfile = os.path.join(os.path.dirname(filename), str(uuid.uuid4()))
with open(tempfile, 'w') as f:
json.dump(data, f, indent=4)
# rename temporary file replacing old file
os.rename(tempfile, filename)
There is really quite a number of ways to do this and all of the above are in one way or another valid approaches... Let me add a straightforward proposition. So assuming your current existing json file looks is this....
{
"name":"myname"
}
And you want to bring in this new json content (adding key "id")
{
"id": "134",
"name": "myname"
}
My approach has always been to keep the code extremely readable with easily traceable logic. So first, we read the entire existing json file into memory, assuming you are very well aware of your json's existing key(s).
import json
# first, get the absolute path to json file
PATH_TO_JSON = 'data.json' # assuming same directory (but you can work your magic here with os.)
# read existing json to memory. you do this to preserve whatever existing data.
with open(PATH_TO_JSON,'r') as jsonfile:
json_content = json.load(jsonfile) # this is now in memory! you can use it outside 'open'
Next, we use the 'with open()' syntax again, with the 'w' option. 'w' is a write mode which lets us edit and write new information to the file. Here s the catch that works for us ::: any existing json with the same target write name will be erased automatically.
So what we can do now, is simply write to the same filename with the new data
# add the id key-value pair (rmbr that it already has the "name" key value)
json_content["id"] = "134"
with open(PATH_TO_JSON,'w') as jsonfile:
json.dump(json_content, jsonfile, indent=4) # you decide the indentation level
And there you go!
data.json should be good to go for an good old POST request
try this script:
with open("data.json") as f:
data = json.load(f)
data["id"] = 134
json.dump(data, open("data.json", "w"), indent = 4)
the result is:
{
"name":"mynamme",
"id":134
}
Just the arrangement is different, You can solve the problem by converting the "data" type to a list, then arranging it as you wish, then returning it and saving the file, like that:
index_add = 0
with open("data.json") as f:
data = json.load(f)
data_li = [[k, v] for k, v in data.items()]
data_li.insert(index_add, ["id", 134])
data = {data_li[i][0]:data_li[i][1] for i in range(0, len(data_li))}
json.dump(data, open("data.json", "w"), indent = 4)
the result is:
{
"id":134,
"name":"myname"
}
you can add if condition in order not to repeat the key, just change it, like that:
index_add = 0
n_k = "id"
n_v = 134
with open("data.json") as f:
data = json.load(f)
if n_k in data:
data[n_k] = n_v
else:
data_li = [[k, v] for k, v in data.items()]
data_li.insert(index_add, [n_k, n_v])
data = {data_li[i][0]:data_li[i][1] for i in range(0, len(data_li))}
json.dump(data, open("data.json", "w"), indent = 4)
This implementation should suffice:
with open(jsonfile, 'r') as file:
data = json.load(file)
data[id] = value
with open(jsonfile, 'w') as file:
json.dump(data, file)
using context manager for the opening of the jsonfile.
data holds the updated object and dumped into the overwritten jsonfile in 'w' mode.
Not exactly your solution but might help some people solving this issue with keys.
I have list of files in folder, and i need to make Jason out of it with keys.
After many hours of trying the solution is simple.
Solution:
async def return_file_names():
dir_list = os.listdir("./tmp/")
json_dict = {"responseObj":[{"Key": dir_list.index(value),"Value": value} for value in dir_list]}
print(json_dict)
return(json_dict)
Response look like this:
{
"responseObj": [
{
"Key": 0,
"Value": "bottom_mask.GBS"
},
{
"Key": 1,
"Value": "bottom_copper.GBL"
},
{
"Key": 2,
"Value": "copper.GTL"
},
{
"Key": 3,
"Value": "soldermask.GTS"
},
{
"Key": 4,
"Value": "ncdrill.DRD"
},
{
"Key": 5,
"Value": "silkscreen.GTO"
}
]
}