I am downloading a file with boto3 from AWS S3, it's a basic JSON file.
{
"Counter": 0,
"NumOfReset": 0,
"Highest": 0
}
I can open the JSON file, but when I go to dump it back to the same file after changing some values, I get IOError: [Errno 9] Bad file descriptor.
with open("/tmp/data.json", "rw") as fh:
data = json.load(fh)
i = data["Counter"]
i = i + 1
if i >= data["Highest"]:
data["Highest"] = i
json.dump(data, fh)
fh.close()
Am I just using the wrong file mode or am I doing this incorrectly?
Two things. Its r+ not rw, and if you want to overwrite the previous data, you need to return to the beginning of the file, using fh.seek(0). Otherwise, the changed JSON string would be appended.
with open("/tmp/data.json", "r+") as fh:
data = json.load(fh)
i = data["Counter"]
i = i + 1
if i >= data["Highest"]:
data["Highest"] = i
fh.seek(0)
json.dump(data, fh)
fh.close()
But that may overwrite the data only partially. So closing and re-opening the file with w is probably a better idea.
with open("/tmp/data.json", "r") as fh:
data = json.load(fh)
i = data["Counter"]
i = i + 1
if i >= data["Highest"]:
data["Highest"] = i
with open("/tmp/data.json", "w") as fh:
json.dump(data, fh)
fh.close()
No need to fh.close(), that's what with .. as is for.
Related
I have this ".txt" file image so I want to convert it to a JSON file using python
I've tried a lot of solutions but It didn't work because of the format of the file.
can anyone help me, please!
can I convert it so it will be easy to manipulate it?
This is my file
Teste: 89
IGUAL
{
"3C:67:8C:E7:F5:C8": ["b''", "-83"],
"64:23:15:3D:25:FC": ["b'HUAWEI-B311-25FC'", "-83"],
"98:00:6A:1D:6F:CA": ["b'WE'", "-83"],
"64:23:15:3D:25:FF": ["b''", "-83"],
"D4:6B:A6:C7:36:24": ["b'Wudi'", "-51"],
"00:1E:2A:1B:A5:74": ["b'NETGEAR'", "-54"],
"3C:67:8C:63:70:54": ["b'Vodafone_ADSL_2018'", "-33"],
"90:F6:52:67:EA:EE": ["b'Akram'", "-80"],
"04:C0:6F:1F:07:40": ["b'memo'", "-60"],
"80:7D:14:5F:A7:FC": ["b'WIFI 1'", "-49"]
}
and this is the code I tried
import json
filename = 'data_strength/dbm-2021-11-21_12-11-47.963190.txt'
dict1 = {}
with open(filename) as fh:
for line in fh:
command, description = line.strip().split(None, 10)
dict1[command] = description.strip()
out_file = open('test1.json', "w")
json.dump(dict1, out_file, indent=4, sort_key=False)
out_file.close()
The JSON structure in your file starts at the first occurrence of a left brace. Therefore, you can just do this:
import json
INPUT = 'igual.txt'
OUTPUT = 'igual.json'
with open(INPUT) as igual:
contents = igual.read()
if (idx := contents.find('{')) >= 0:
d = json.loads(contents[idx:])
with open(OUTPUT, 'w') as jout:
json.dump(d, jout, indent=4)
My program takes a csv file as input and writes it as an output file in json format. On the final line, I use the print command to output the contents of the json format file to the screen. However, it does not print out the json file contents and I don't understand why.
Here is my code that I have so far:
import csv
import json
def jsonformat(infile,outfile):
contents = {}
csvfile = open(infile, 'r')
reader = csvfile.read()
for m in reader:
key = m['No']
contents[key] = m
jsonfile = open(outfile, 'w')
jsonfile.write(json.dumps(contents))
csvfile.close()
jsonfile.close()
return jsonfile
infile = 'orders.csv'
outfile = 'orders.json'
output = jsonformat(infile,outfile)
print(output)
Your function returns the jsonfile variable, which is a file.
Try adding this:
jsonfile.close()
with open(outfile, 'r') as file:
return file.read()
Your function returns a file handle to the file jsonfile that you then print. Instead, return the contents that you wrote to that file. Since you opened the file in w mode, any previous contents are removed before writing the new contents, so the contents of your file are going to be whatever you just wrote to it.
In your function, do:
def jsonformat(infile,outfile):
...
# Instead of this:
# jsonfile.write(json.dumps(contents))
# do this:
json_contents = json.dumps(contents, indent=4) # indent=4 to pretty-print
jsonfile.write(json_contents)
...
return json_contents
Aside from that, you aren't reading the CSV file the correct way. If your file has a header, you can use csv.DictReader to read each row as a dictionary. Then, you'll be able to use for m in reader: key = m['No']. Change reader = csvfile.read() to reader = csv.DictReader(csvfile)
As of now, reader is a string that contains all the contents of your file. for m in reader makes m each character in this string, and you cannot access the "No" key on a character.
a_file = open("sample.json", "r")
a_json = json.load(a_file)
pretty_json = json.dumps(a_json, indent=4)
a_file.close()
print(pretty_json)
Using this sample to print the contents of your json file. Have a good day.
I'm reading the file in my HDFS using Python language.
Each file has a header and I'm trying to merge the files. However, the header in each file also gets merged.
Is there a way to skip the header from second file?
hadoop = sc._jvm.org.apache.hadoop
conf = hadoop.conf.Configuration()
fs = hadoop.fs.FileSystem.get(conf)
src_dir = "/mnt/test/"
out_stream = fs.create(hadoop.fs.Path(dst_file), overwrite)
files = []
for f in fs.listStatus(hadoop.fs.Path(src_dir)):
if f.isFile():
files.append(f.getPath())
for file in files:
in_stream = fs.open(file)
hadoop.io.IOUtils.copyBytes(in_stream, out_stream, conf, False)
Currently I have solved the problem with below logic, however would like to know if there is any better and efficient solution? appreciate your help
for idx,file in enumerate(files):
if debug:
print("Appending file {} into {}".format(file, dst_file))
# remove header from the second file
if idx>0:
file_str = ""
with open('/'+str(file).replace(':',''),'r+') as f:
for idx,line in enumerate(f):
if idx>0:
file_str = file_str + line
with open('/'+str(file).replace(':',''), "w+") as f:
f.write(file_str)
in_stream = fs.open(file) # InputStream object and copy the stream
try:
hadoop.io.IOUtils.copyBytes(in_stream, out_stream, conf, False) # False means don't close out_stream
finally:
in_stream.close()
What you are doing now is appending repeatedly to a string. This is a fairly slow process. Why not write directly to the output file as you are reading?
for file_idx, file in enumerate(files):
with open(...) as out_f, open(...) as in_f:
for line_num, line in enumerate(in_f):
if file_idx == 0 or line_num > 0:
f_out.write(line)
If you can load the file all at once, you can also skip the first line by using readline followed by readlines:
for file_idx, file in enumerate(files):
with open(...) as out_f, open(...) as in_f:
if file_idx != 0:
f_in.readline()
f_out.writelines(f_in.readlines())
Problem was the huge data number, and I have to do it with my personal laptop with 12GB RAM. I tried a loop with 1M. lines every round, and used csv.writer. But csv.writer wrote like 1M. lines every two hours. So, any other ways worth to try?
lines = 10000000
for i in range(0, 330):
list_str = []
with open(file, 'r') as f:
line_flag = 0
for _ in range(i*lines):
next(f)
for line in f:
line_flag = line_flag + 1
data = json.loads(line)['name']
if data != former_str:
list_str.append(data)
former_str = data
if line_flag == lines:
break
with open(self.path + 'data_range\\names.csv', 'a', newline='') as writeFile:
writer = csv.writer(writeFile, delimiter='\n')
writer.writerow(list_str)
writeFile.close()
another version
def read_large_file(f):
block_size = 200000000
block = []
for line in f:
block.append(line[:-1])
if len(block) == block_size:
yield block
block = []
if block:
yield block
def split_files():
with open(write_file, 'r') as f:
i = 0
for block in read_large_file(f):
print(i)
file_name = write_name + str(i) + '.csv'
with open(file_name, 'w', newline='') as f_:
writer = csv.writer(f_, delimiter='\n')
writer.writerow(block)
i += 1
This was after it read a block and writing ... I wonder how come the rate of data trasmission was keeping about 0.
It should be as simple as this:
import json
import csv
with open(read_file, 'rt') as r, open(write_file, 'wt', newline='') as w:
writer = csv.writer(w)
for line in r:
writer.writerow([json.loads(line)['name']])
I tried the loop inside the file, but I always get me a Error, I guessed we cannot write the data into another file while opening the file?
You totally can write data in one file while reading another. I can't tell you more about your error until you post what it said, though.
There was a bit in your code about former_str which is not covered under "extract one column", so I did not write anything about it.
Would something like this work?
Essentially using a generator to avoid reading the entire file in memory, and writing the data one line at a time.
import jsonlines # pip install jsonlines
from typing import Generator
def gen_lines(file_path: str, col_name: str) -> Generator[str]:
with jsonline.open(file_path) as f:
for obj in f:
yield obj[col_name]
# Here you can also change to writing a jsonline again
with open(output_file, "w") as out:
for item in gen_lines(your_file_path, col_name_to_extract):
out.write(f"{item}\n")
I'm using python code to read from many csv files and set encoding to utf8.I meet the problem when I read the file I can read all lines but when I write it, it can write only 1 line. Please help me to check my code as below:
def convert_files(files, ascii, to="utf-8"):
for name in files:
#print ("Convert {0} from {1} to {2}").format(name, ascii, to)
with open(name) as f:
print(name)
count = 0
lineno = 0
#this point I want to write the below text into my each new file at the first line
#file_source.write('id;nom;prenom;nom_pere;nom_mere;prenom_pere;prenom_mere;civilite (1=homme 2=f);date_naissance;arrondissement;adresse;ville;code_postal;pays;telephone;email;civilite_demandeur (1=homme 2=f);nom_demandeur;prenom_demandeur;qualite_demandeur;type_acte;nombre_actes\n')
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
#pass
#print (line)
# start write data to to new file with encode
file_source.write(line)
#file_source.close
#print unicode(line, "cp866").encode("utf-8")
csv_files = find_csv_filenames('./csv', ".csv")
convert_files(csv_files, "cp866")
You're reopening the file during every iteration.
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
#move the following line outside of the for block
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
If all you need is to change the character encoding of the files then it doesn't matter that they are csv files unless the conversion may change what characters are interpreted as delimiter, quotechar, etc:
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
data = file.read().encode(to_encoding)
with open(filename, 'wb') as outfile:
outfile.write(data)
for path in csv_files:
convert(path, "cp866", "utf-8")
Add errors parameter to change how encoding/decoding errors are handled.
If files may be large then you could convert data incrementally:
import os
from shutil import copyfileobj
from tempfile import NamedTemporaryFile
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
with NamedTemporaryFile('w', encoding=to_encoding, newline='',
dir=os.path.dirname(filename)) as tmpfile:
copyfileobj(file, tmpfile)
tmpfile.delete = False
os.replace(tmpfile.name, filename) # rename tmpfile -> filename
for path in csv_files:
convert(path, "cp866", "utf-8")
You can do this
def convert_files(files, ascii, to="utf-8"):
for name in files:
with open(name, 'r+') as f:
data = ''.join(f.readlines())
data.decode(ascii).encode(to)
f.seek(0)
f.write(data)
f.truncate()