Save filenames as variable's name in Python - python

I want to save my python to json file, but the thing is, I need to name my json file's name as title's name.
code:
data={
"Title" : title.text,
"Registration": doctor.text,
"Keywords": list2,
"Article": list
}
#title.text="banana"
with open('title.text.json', 'w',encoding='UTF-8') as f:
json.dump(data, f,ensure_ascii=False)
The result I expected: Save it as banana.json
Edit:
It works with this
with open('%s.json' % title_tag.text, 'w',encoding='UTF-8') as f:
json.dump(data, f,ensure_ascii=False)

you can use the following code to achieve this:
with open(title.text, 'w', encoding='UTF-8') as f:
json.dump(data, f, ensure_ascii=False)

Related

create and append data in json format to json file - python

How to create a null json file and append each details to the json file in the following format
[
{"name":"alan","job":"clerk"},
{"name":"bob","job":"engineer"}
]
Code
import json
with open("test.json", mode='w', encoding='utf-8') as f:
json.dump([], f)
test_data = ['{"name":"alan","job":"clerk"}','{"name":"bob","job":"engineer"}']
for i in test_data:
with open("test.json", mode='w', encoding='utf-8') as fileobj:
json.dump(i, fileobj)
How this can be efficiently done
You can't modify the json content like that. You'll need to modify the data structure and then completely rewrite the json file. You might be able to just read the data from jsone at startup, and write it at shutdown.
import json
def store_my_data(data, filename='test.json'):
""" write data to json file """
with open(filename, mode='w', encoding='utf-8') as f:
json.dump(data, f)
def load_my_data(filename='test.json'):
""" load data from json file """
with open(filename, mode='r', encoding='utf-8') as f:
return json.load(f)
raise Exception # skipping some steps here
test_data = [
{"name": "alan", "job": "clerk"},
{"name": "bob", "job": "engineer"}
]
item_one = test_data[0]
item_two = test_data[1]
# You already know how to store data in a json file.
store_my_data(test_data)
# Suppose you don't have any data at the start.
current_data = []
store_my_data(current_data)
# Later, you want to add to the data.
# You will have to change your data in memory,
# then completely rewrite the file.
current_data.append(item_one)
current_data.append(item_two)
store_my_data(current_data)

Converting a dictionary to json having persian characters

Here is some code of mine, I'm trying to convert a dictionary to json having Persian characters but I get question marks instead of characters. My dictionary looks like this:
bycommunity("0": [{"60357": "این یک پیام است"}] )
with open('data.json', 'wb') as f:
f.write(json.dumps(bycommunity).encode("utf-8"))
the result is :
{"0": [{"60357": "?????? ??? ??? ???? ???????? ??????"}]}
data = {"0": [{"60357": "این یک پیام است"}]}
with open('data.json', 'w') as f:
json.dump(data, f, ensure_ascii=False)
and also check this Answer for more details
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, ensure_ascii=False, indent=4))

Compare two large files and combine matching information

I have two fairly hefty files, JSON (185,000 Lines) and CSV (650,000). I need to iterate through each dict in the JSON file then within that iterate through each part in part_numbers and compare it to get the first three letters from where that part is found in the CSV.
For some reason I'm having a hard time doing this properly. The first version of my script was way too slow, so I'm trying to speed it up
JSON Example:
[
{"category": "Dryer Parts", "part_numbers": ["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], "parent_category": "Dryers"},
{"category": "Washer Parts", "part_numbers": ["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], "parent_category": "Washers"},
{"category": "Sink Parts", "part_numbers": ["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], "parent_category": "Sinks"},
{"category": "Other Parts", "part_numbers": ["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], "parent_category": "Others"}
]
The CSV:
WCI|ABC
WPL|DEF
BSH|GHI
WCI|JKL
The end dict would look like below:
{"category": "Other Parts",
"part_numbers": ["WCIABC","WPLDEF","BSHGHI","JKLWCI"...]}
Here's an example of what I've made so far although, it returns IndexError: list index out of range at if (part.rstrip() == row[1])::
import csv
import json
from multiprocessing import Pool
def find_part(item):
data = {
'parent_category': item['parent_category'],
'category': item['category'],
'part_numbers': []
}
for part in item['part_numbers']:
for row in reader:
if (part.rstrip() == row[1]):
data['part_numbers'].append(row[0] + row[1])
with open('output.json', 'a') as outfile:
outfile.write(' ')
json.dump(data, outfile)
outfile.write(',\n')
if __name__ == '__main__':
catparts = json.load(open('catparts.json', 'r'))
partfile = open('partfile.csv', 'r')
reader = csv.reader(partfile, delimiter='|')
with open('output.json', 'w+') as outfile:
outfile.write('[\n')
p = Pool(50)
p.map(find_part, catparts)
with open('output.json', 'a') as outfile:
outfile.write('\n]')
As I said in a comment, your code (now) gives me a NameError: name 'reader' is not defined in the find_part() function. The fix was to move the creation of the csv.reader into the function. I also changed how the file was being opened to use a with context manager and a newline argument. This also solves the problem of a bunch of separate tasks all trying to read the same csv file at the same time.
Your approach is very inefficient because it reads the entire 'partfile.csv' file for every part in item['part_numbers']. Nevertheless, the following seems to work:
import csv
import json
from multiprocessing import Pool
def find_part(item):
data = {
'parent_category': item['parent_category'],
'category': item['category'],
'part_numbers': []
}
for part in item['part_numbers']:
with open('partfile.csv', newline='') as partfile: # open csv in Py 3.x
for row in csv.reader(partfile, delimiter='|'):
if part.rstrip() == row[1]:
data['part_numbers'].append(row[0] + row[1])
with open('output.json', 'a') as outfile:
outfile.write(' ')
json.dump(data, outfile)
outfile.write(',\n')
if __name__ == '__main__':
catparts = json.load(open('carparts.json', 'r'))
with open('output.json', 'w+') as outfile:
outfile.write('[\n')
p = Pool(50)
p.map(find_part, catparts)
with open('output.json', 'a') as outfile:
outfile.write(']')
Here's a significantly more efficient version that only reads the entire 'partfile.csv' file once per subprocess:
import csv
import json
from multiprocessing import Pool
def find_part(item):
data = {
'parent_category': item['parent_category'],
'category': item['category'],
'part_numbers': []
}
with open('partfile.csv', newline='') as partfile: # open csv for reading in Py 3.x
partlist = [row for row in csv.reader(partfile, delimiter='|')]
for part in item['part_numbers']:
part = part.rstrip()
for row in partlist:
if row[1] == part:
data['part_numbers'].append(row[0] + row[1])
with open('output.json', 'a') as outfile:
outfile.write(' ')
json.dump(data, outfile)
outfile.write(',\n')
if __name__ == '__main__':
catparts = json.load(open('carparts.json', 'r'))
with open('output.json', 'w+') as outfile:
outfile.write('[\n')
p = Pool(50)
p.map(find_part, catparts)
with open('output.json', 'a') as outfile:
outfile.write(']')
While you could read the 'partfile.csv' data into memory in the main task and pass it as an argument to the find_part() subtasks, doing so would just mean that the data would have to be pickled and unpickled for every process. You would need to run some timing tests to determine if that would be faster than using the csv module to explicitly read it, as shown above.
Also note that it would also be more efficient to preprocess the data load from the 'carparts.json' file and strip trailing whitespace from the first elem in every row before submitting tasks to the Pool because then you wouldn't need to do the part = part.rstrip() in find_part() over and over. Again, I don't know if doing so would be worth the effort or not—and only timing tests can determine the answer.
I think I found it. Your CSV reader is like many other file access methods: you read the file sequentially, and then hit EOF. When you try to do the same with the second part, the file is already at EOF, and the first read attempt returns a null result; this has no second element.
If you want to access all of the records again, you need to reset the file bookmark. The easiest way is to seek back to byte 0 with
partfile.seek(0)
Another way is to close and reopen the file.
Does that get you moving?
This should work as long as all the part numbers exist in the csv.
import json
# read part codes into a dictionary
with open('partfile.csv') as fp:
partcodes = {}
for line in fp:
code, number = line.strip().split('|')
partcodes[number] = code
with open('catparts.json') as fp:
catparts = json.load(fp)
# modify the part numbers/codes
for cat in catparts:
cat['part_numbers'] = [partcodes[n] + n for n in cat['part_numbers']]
# output
with open('output.json', 'w') as fp:
json.dump(catparts, fp)

Writing to a JSON file and updating said file

I have the following code that will write to a JSON file:
import json
def write_data_to_table(word, hash):
data = {word: hash}
with open("rainbow_table\\rainbow.json", "a+") as table:
table.write(json.dumps(data))
What I want to do is open the JSON file, add another line to it, and close it. How can I do this without messing with the file?
As of right now when I run the code I get the following:
write_data_to_table("test1", "0123456789")
write_data_to_table("test2", "00123456789")
write_data_to_table("test3", "000123456789")
#<= {"test1": "0123456789"}{"test2": "00123456789"}{"test3": "000123456789"}
How can I update the file without completely screwing with it?
My expected output would probably be something along the lines of:
{
"test1": "0123456789",
"test2": "00123456789",
"test3": "000123456789",
}
You may read the JSON data with :
parsed_json = json.loads(json_string)
You now manipulate a classic dictionary. You can add data with :
parsed_json.update({'test4': 0000123456789})
Then you can write data to a file using :
with open('data.txt', 'w') as outfile:
json.dump(parsed_json, outfile)
If you are sure the closing "}" is the last byte in the file you can do this:
>>> f = open('test.json', 'a+')
>>> json.dump({"foo": "bar"}, f) # create the file
>>> f.seek(0)
>>> f.read()
'{"foo": "bar"}'
>>> f.seek(-1, 2)
>>> f.write(',\n', f.write(',\n' + json.dumps({"spam": "bacon"})[1:]))
>>> f.seek(0)
>>> print(f.read())
{"foo": "bar",
"spam": "bacon"}
Since your data is not hierarchical, you should consider a flat format like "TSV".

Python read JSON file and modify

Hi I am trying to take the data from a json file and insert and id then perform POST REST.
my file data.json has:
{
'name':'myname'
}
and I would like to add an id so that the json data looks like:
{
'id': 134,
'name': 'myname'
}
So I tried:
import json
f = open("data.json","r")
data = f.read()
jsonObj = json.loads(data)
I can't get to load the json format file.
What should I do so that I can convert the json file into json object and add another id value.
Set item using data['id'] = ....
import json
with open('data.json', 'r+') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
f.seek(0) # <--- should reset file position to the beginning.
json.dump(data, f, indent=4)
f.truncate() # remove remaining part
falsetru's solution is nice, but has a little bug:
Suppose original 'id' length was larger than 5 characters. When we then dump with the new 'id' (134 with only 3 characters) the length of the string being written from position 0 in file is shorter than the original length. Extra chars (such as '}') left in file from the original content.
I solved that by replacing the original file.
import json
import os
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
os.remove(filename)
with open(filename, 'w') as f:
json.dump(data, f, indent=4)
I would like to present a modified version of Vadim's solution. It helps to deal with asynchronous requests to write/modify json file. I know it wasn't a part of the original question but might be helpful for others.
In case of asynchronous file modification os.remove(filename) will raise FileNotFoundError if requests emerge frequently. To overcome this problem you can create temporary file with modified content and then rename it simultaneously replacing old version. This solution works fine both for synchronous and asynchronous cases.
import os, json, uuid
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
data['id'] = 134 # <--- add `id` value.
# add, remove, modify content
# create randomly named temporary file to avoid
# interference with other thread/asynchronous request
tempfile = os.path.join(os.path.dirname(filename), str(uuid.uuid4()))
with open(tempfile, 'w') as f:
json.dump(data, f, indent=4)
# rename temporary file replacing old file
os.rename(tempfile, filename)
There is really quite a number of ways to do this and all of the above are in one way or another valid approaches... Let me add a straightforward proposition. So assuming your current existing json file looks is this....
{
"name":"myname"
}
And you want to bring in this new json content (adding key "id")
{
"id": "134",
"name": "myname"
}
My approach has always been to keep the code extremely readable with easily traceable logic. So first, we read the entire existing json file into memory, assuming you are very well aware of your json's existing key(s).
import json
# first, get the absolute path to json file
PATH_TO_JSON = 'data.json' # assuming same directory (but you can work your magic here with os.)
# read existing json to memory. you do this to preserve whatever existing data.
with open(PATH_TO_JSON,'r') as jsonfile:
json_content = json.load(jsonfile) # this is now in memory! you can use it outside 'open'
Next, we use the 'with open()' syntax again, with the 'w' option. 'w' is a write mode which lets us edit and write new information to the file. Here s the catch that works for us ::: any existing json with the same target write name will be erased automatically.
So what we can do now, is simply write to the same filename with the new data
# add the id key-value pair (rmbr that it already has the "name" key value)
json_content["id"] = "134"
with open(PATH_TO_JSON,'w') as jsonfile:
json.dump(json_content, jsonfile, indent=4) # you decide the indentation level
And there you go!
data.json should be good to go for an good old POST request
try this script:
with open("data.json") as f:
data = json.load(f)
data["id"] = 134
json.dump(data, open("data.json", "w"), indent = 4)
the result is:
{
"name":"mynamme",
"id":134
}
Just the arrangement is different, You can solve the problem by converting the "data" type to a list, then arranging it as you wish, then returning it and saving the file, like that:
index_add = 0
with open("data.json") as f:
data = json.load(f)
data_li = [[k, v] for k, v in data.items()]
data_li.insert(index_add, ["id", 134])
data = {data_li[i][0]:data_li[i][1] for i in range(0, len(data_li))}
json.dump(data, open("data.json", "w"), indent = 4)
the result is:
{
"id":134,
"name":"myname"
}
you can add if condition in order not to repeat the key, just change it, like that:
index_add = 0
n_k = "id"
n_v = 134
with open("data.json") as f:
data = json.load(f)
if n_k in data:
data[n_k] = n_v
else:
data_li = [[k, v] for k, v in data.items()]
data_li.insert(index_add, [n_k, n_v])
data = {data_li[i][0]:data_li[i][1] for i in range(0, len(data_li))}
json.dump(data, open("data.json", "w"), indent = 4)
This implementation should suffice:
with open(jsonfile, 'r') as file:
data = json.load(file)
data[id] = value
with open(jsonfile, 'w') as file:
json.dump(data, file)
using context manager for the opening of the jsonfile.
data holds the updated object and dumped into the overwritten jsonfile in 'w' mode.
Not exactly your solution but might help some people solving this issue with keys.
I have list of files in folder, and i need to make Jason out of it with keys.
After many hours of trying the solution is simple.
Solution:
async def return_file_names():
dir_list = os.listdir("./tmp/")
json_dict = {"responseObj":[{"Key": dir_list.index(value),"Value": value} for value in dir_list]}
print(json_dict)
return(json_dict)
Response look like this:
{
"responseObj": [
{
"Key": 0,
"Value": "bottom_mask.GBS"
},
{
"Key": 1,
"Value": "bottom_copper.GBL"
},
{
"Key": 2,
"Value": "copper.GTL"
},
{
"Key": 3,
"Value": "soldermask.GTS"
},
{
"Key": 4,
"Value": "ncdrill.DRD"
},
{
"Key": 5,
"Value": "silkscreen.GTO"
}
]
}

Categories