How to open multiple Json files and save them in multiple variables - python

I have some Json files. The naming convention of the file is dataset_ML-Model_params.json. For example, House_Lasso_params.json,
House_RF_params.json, Bike_Lasso_params.json, and Bike_RF_params.json.
All of these files contains tuning-hyperparameters in dict format. I can open 1 file using the below code
filename = f"{args.dataset}_Lasso_params.json"
outfile = HT_OUT / filename
with open(outfile, "r") as file:
d_loaded = json.load(file)
Passing the value to the model.
Lasso(**d_loaded, precompute=True)
Again for another file
filename = f"{args.dataset}_RF_params.json"
outfile = HT_OUT / filename
with open(outfile, "r") as file:
rf_loaded = json.load(file)
RF(**rf_loaded)
Here, args.dataset contains the dataset name. Could you tell me, how can I load these 2 files and save them in different variables. So that later i can pass the variable to the model. Like
# After opening and saving the json file in different variable
Lasso(**lasso_params, precompute=True)
RF(**rf_params)

Make a list of all models
MODEL_NAMES = ["Lasso", "Ridge"]
Make another dictionary to save the params for each model
models_params = {}
for model_name in MODEL_NAMES:
filename = f"{args.dataset}_{model_name}_params.json"
outfile = HT_OUT / filename
with open(outfile, "r") as file:
d_loaded = json.load(file)
models_params[model_name] = d_loaded
Later, use the get(key) to access your expected params.
Lasso(**(models_params.get('Lasso')), precompute=True)
RF(**(models_params.get('RF')))
You can also check the params
print(Lasso(**(models_params.get('Lasso')), precompute=True).get_params())

You could use another dict that gonna contain params that you need.
For example,
model_params = {'lasso_params': smth_here, 'rf_params': smth_here}
So then you can get needed value by
*model_params['lasso_params']
To get all files by that wildcard (dataset_ML-Model_params.json.) you could use library called glob:
from glob import glob
glob('*_params.json') # return ['lasso_params', 'rf_params', ...]
And then just read them one by one.

Related

How to use elements in list by order

My goal is to change multiple csv files in a folder into JSON.
First, I needed to list my csv files
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
#check if csv files are listed correctly
print(os.path.join("C:/Users/folder_to_csv", filename))
With this, I was able to call csv files in that folder.
Result:
C:/Users/folder_to_csv\file_1.csv C:/Users/folder_to_csv\file_2.csv C:/Users/folder_to_csv\file_3.csv
Then, I wanted to use all of the csv files in 'csvlist' to jsonObj, however for some reason, my codes are only using the first file (C:/Users/folder_to_csv\file_1.csv)
This is what I have tried so far:
import json
import csv
import requests
import threading
import os
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
csvlist = os.path.join("C:/Users/folder_to_csv", filename)
data = {}
def main():
#loop csv list so my codes can read all csv files
length = len(csvlist)
for i in range(length):
i += 1
path = csvlist
#switch csv to json
with open(path, mode='r') as f:
reader = csv.DictReader(f)
processdata = [row for row in reader]
dlist = processdata
jsonObj = json.dumps(dlist)
})
print(jsonObj)
main()
In the initial loop, you keep redefining the csvlist variable. I suppose you want it to be a list? Then just create an initial empty list and append to it instead of redefining
csvlist = []
...
csvlist.append(os.path.join("C:/Users/folder_to_csv", filename))

Write list to file using Pickle

I am reading a set of files to a list and writing the list to a single file using Pickle.dump. But some invalid characters are added in the new file at the beginning like \80]q\00(XK\00\00\00. How can I remove these?
if(gyro):
lines = []
for fil in gyro:
file_name = os.path.join(file_path,fil)
file_data = open(file_name)
lines.extend(file_data.readlines())
file_data.close
os.remove(file_name)
uncompressed_filename = "raw_gyro.txt"
uncompressed_filename = os.path.join(file_path,uncompressed_filename)
with open(uncompressed_filename, "wb") as fp3:
print("Created",uncompressed_filename)
pickle.dump(lines, fp3)

How to assign the elements of a list as file names in python?

I am trying to assign the elements of a list as names for some files that live in a directory, so far I created a function that recover the name of a each file from a directory and returns them in a list:
def retrive(directory_path):
path_names = []
for filename in sorted(glob.glob(os.path.join(directory_path, '*.pdf'))):
retrieved_files = filename.split('/')[-1]
path_names.append(retrieved_files)
print (path_names)
The above function returns in a list the names of each file, then I am writing the files into another directory as follows:
path = os.path.join(new_dir_path, "list%d.txt" % i)
#This is the path of each new file:
#print(path)
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
Finally, my question is: how can I assign as a name of each file, each element of path_names?, something like this line:
path = os.path.join(new_dir_path, "list%d.txt" % i)
I also tried to use the format() function. However I still cant assign the the correct name to each file.
Here's the full script:
def transform_directoy(input_directory, output_directory):
import codecs, glob, os
from tika import parser
all_texts = []
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
texts = parsed['content']
all_texts.append(texts)
for i , a_list in enumerate(all_texts):
new_dir_path = output_directory
#print(new_dir_path)
path = os.path.join(new_dir_path, "list%d.txt" % i)
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
The desired output will consist of the actual names of each processed file.
You’re almost there:
for path_name in path_names:
path = os.path.join(new_dir_path, "list%s.txt" % path_name)
#This is the path of each new file:
#print(path)
with codecs.open(path, "w", encoding='utf8') as f:
for item in [a_list]:
f.write(item+"\n")
Update based on updated code sample. You are using different loops here, and that is not ideal unless you are doing processing in between the two loops. Since I am going to keep that structure, we are going to have to make sure to associate each block of content with the original filename. The best structure for that is a dict, and in case order is important, we use an OrderedDict. Now, when we’re looping over the filename, content pairs in the OrderedDict we’ll want to change the extension of the file to match the new file type. Luckily, python has some nice utilities for file/path manipulation in the os.path module. os.path.basename can be used to strip off the directory from a file and os.path.splitext will strip off an extension from a filename. We use both of those to get just the filename without the extension and then append .txt to designate the new file type. Putting it all together, we get :
def transform_directoy(input_directory, output_directory):
import codecs, glob, os
from collections import OrderedDict
from tika import parser
all_texts = OrderedDict()
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
filename = os.path.basename(filename)
texts = parsed['content']
all_texts[filename] = texts
for i, (original_filename, a_list) in enumerate(all_texts.items()):
new_filename, _ = os.path.splitext(original_filename)
new_filename += '.txt'
new_dir_path = output_directory
#print(new_dir_path)
path = os.path.join(new_dir_path, new_filename)
# Print out the name of the file we are processing
print('Transforming %s => %s' % (original_filename, path,))
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
Second update: OP asked how I would write this code if this was all that there was, so here goes:
# move imports to top of file: PEP 8
import codecs, glob, os
from tika import parser
def transform_directoy(input_directory, output_directory):
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
parsed_content = parsed['content']
original_filename = os.path.basename(filename)
new_filename, _ = os.path.splitext(original_filename)
new_filename += '.txt'
path = os.path.join(output_directory, new_filename)
# Print out the name of the file we are processing
print('Transforming %s => %s' % (original_filename, path,))
# no need for a second loop since we can piggy back off the first loop
with codecs.open(path, "w", encoding='utf8') as filename:
# No need for a for loop here since our list only has one item
filename.write(parsed_content)
filename.write("\n")

Modifying JSON key values in Python

I am trying to load a JSON file and change specific key values then save the updated entries to a new file. This JSON file has many entries with the same format. This is my furthest attempt before coming here, however it does not save the new values.
What am I missing?
#!/usr/bin/python
import simplejson as json
import names
in_file = open('Names.json', 'r')
out_file = open('Names_new.json','w')
data_file = in_file.read()
data = json.loads(data_file)
for x in data:
nickname = x['nickname']
newname = names.get_first_name()
nickname = newname
out_file.write(json.dumps(data))
out_file.close()
The problem is that you didn't change x['nickname'] when you wanted to assign newname to it. Instead, you only modified the variable nickname.
Try assigning the x['nickname'] directly:
for x in data:
x['nickname'] = names.get_first_name()
You are just dumping old JSON data again into a new file without modifying its contents.
Instead, you should change the contents of the file with newname:
#!/usr/bin/python
import simplejson as json
import names
in_file = open('Names.json', 'r')
out_file = open('Names_new.json','w')
data_file = in_file.read()
data = json.loads(data_file)
for x in data:
newname = names.get_first_name()
x['nickname'] = newname
out_file.write(json.dumps(data))
out_file.close()

Save file without first and last double quotes

I am trying to save my data to a file. My problem is the file i saved contains double quotes at the first and the last of a line. I have tried many ways to solve it from str.replace(), strip, csv to json, pickle. However, the problem has been still persistent. I have got stuck with it. Please help me. I will detail my problem below.
Firstly, I have a file called angles.txt like that:
{'left_w0': -2.6978887076110842, 'left_w1': -1.3257428944152834, 'left_w2': -1.7533400385498048, 'left_e0': 0.03566505327758789, 'left_e1': 0.6948932961 181641, 'left_s0': -1.1665923878540039, 'left_s1': -0.6726505747192383}
{'left_w0': -2.6967382220214846, 'left_w1': -0.8440729275695802, 'left_w2': -1.7541070289428713, 'left_e0': 0.036048548474121096, 'left_e1': 0.166820410 49194338, 'left_s0': -0.7731263162109375, 'left_s1': -0.7056311616210938}
I read line by line from the text file and transfer to a dict variable called data. Here is the reading file code:
def read_data_from_file(file_name):
data = dict()
f = open(file_name, 'r')
for index_line in range(1, number_lines +1):
data[index_line] = eval(f.readline())
f.close()
return data
Then I changed something in the data. Something like data[index_line]['left_w0'] = data[index_line]['left_w0'] + 0.0006. After that I wrote my data into another text file. Here is the code:
def write_data_to_file(data, file_name)
f = open(file_name, 'wb')
data_convert = dict()
for index_line in range(1, number_lines):
data_convert[index_line] = repr(data[index_line])
data_convert[index_line] = data_convert[index_line].replace('"','') # I also used strip
json.dump(data_convert[index_line], f)
f.write('\n')
f.close()
The result I received in the new file is:
"{'left_w0': -2.6978887076110842, 'left_w1': -1.3257428944152834, 'left_w2': -1.7533400385498048, 'left_e0': 0.03566505327758789, 'left_e1': 0.6948932961 181641, 'left_s0': -1.1665923878540039, 'left_s1': -0.6726505747192383}"
"{'left_w0': -2.6967382220214846, 'left_w1': -0.8440729275695802, 'left_w2': -1.7541070289428713, 'left_e0': 0.036048548474121096, 'left_e1': 0.166820410 49194338, 'left_s0': -0.7731263162109375, 'left_s1': -0.7056311616210938}"
I cannot remove "".
You could simplify your code by removing unnecessary transformations:
import json
def write_data_to_file(data, filename):
with open(filename, 'w') as file:
json.dump(data, file)
def read_data_from_file(filename):
with open(filename) as file:
return json.load(file)

Categories