I want to convert base64 to json - python

I want to convert from base64 to json. Existing files are .jpg, and the purpose is to load multiple files in one folder and make them into one json.
import base64
import json
import os
directory = os.listdir('C:/users/user/desktop/k-means/image')
os.chdir('C:/users/user/desktop/k-means/image')
data={}
for file in directory:
open_file = open(file,'rb')
image_read = open_file.read()
image_64_encode = base64.encodestring(image_read)
data[""] = image_64_encode.decode('ascii')
with open('words.json', 'w', encoding="utf-8") as make_file:
print(json.dumps(data))
The desired output is as follows. How do I modify my code?
{"data":
"AAAAAGHLd/f39/clOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "label": 5}
{"data": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "label": 0}

You could do something like this:
import base64
import json
import os
from pprint import pprint
directory = os.listdir('C:/users/user/desktop/k-means/image')
os.chdir('C:/users/user/desktop/k-means/image')
data={}
for file in directory:
base = os.path.basename(file)
data["label"] = base
open_file = open(file,'rb')
image_read = open_file.read()
image_64_encode = base64.encodebytes(image_read)
data["data"] = image_64_encode.decode('ascii')
final_data = json.dumps(data)
final_data = json.loads(final_data)
pprint(final_data)
#output: {"data": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAIBAQIBAQICAgICAgICAwUDAwMDAwYEBAMFB...", "label": "Capture.JPG"}
Note: encodestring is a deprecated alias since Python3.1, I've used encodebytes instead.
To write this final_data to a json file, you must enclose the loop inside the open function for words.json:
with open('words.json', 'w') as make_file:
for file in directory:
base = os.path.basename(file)
data["label"] = base
open_file = open(file,'rb')
image_read = open_file.read()
image_64_encode = base64.encodebytes(image_read)
data["data"] = image_64_encode.decode('ascii')
final_data = json.dumps(data)
make_file.write(final_data)

Related

python change the value of a specific line

I have a 1000 json files, I need to change the value of a specific line with numeric sequence in all files.
An example
the specific line is - "name": "carl 00",
I need it to be like following
File 1
"name": "carl 1",
File 1
"name": "carl 2",
File 3
"name": "carl 3",
What is the right script to achieve the above using python
This should do the trick. But you're not very clear about how the data is stored in the actual json file. I listed two different approaches. The first is to parse the json file into a python dict then manipulate the data and then turn it back into a character string and then save it. The second is what I think you mean by "line". You can split the file's character string into a list then change the line you want, and remake the full string again, then save it.
This also assumes your json files are in the same folder as the python script.
import os
import json
my_files = [name1, name2, name3, ...] # ['file_name.json', ...]
folder_path = os.path.dirname(__file__)
for i, name in enumerate(my_files):
path = f'{folder_path}/{name}'
with open(path, 'r') as f:
json_text = f.read()
# if you know the key(s) in the json file...
json_dict = json.loads(json_text)
json_dict['name'] = json_dict['name'].replace('00', str(i))
new_json_str = json.dumps(json_dict)
# if you know the line number in the file...
line_list = json_text.split('\n')
line_list[line_number - 1] = line_list[line_number - 1].replace('00', str(i))
new_json_str = '\n'.join(line_list)
with open(path, 'w') as f:
f.write(new_json_str)
Based on your edit, this is what you want:
import os
import json
my_files = [f'{i}.json' for i in range(1, 1001)]
folder_path = os.path.dirname(__file__) # put this .py file in same folder as json files
for i, name in enumerate(my_files):
path = f'{folder_path}/{name}'
with open(path, 'r') as f:
json_text = f.read()
json_dict = json.loads(json_text)
json_dict['name'] = f'carl {i}'
# include these lines if you want "symbol" and "subtitle" changed
json_dict['symbol'] = f'carl {i}'
json_dict['subtitle'] = f'carl {i}'
new_json_str = json.dumps(json_dict)
with open(path, 'w') as f:
f.write(new_json_str)
Without knowing more, the below loop will accomplish the posts requirements.
name = 'carl'
for i in range(0,1001):
print(f'name: {name} {i}')

json dump not updating the file

I wanted to store some value in a json file
json file gets readed but not getting writed
import json
import os
filepath = os.path.abspath(__file__).replace("test.py", "test.json")
data = json.load(open(filepath, "r"))
out_file = open("test.json", "w")
a = input()
data["cool"] = a
print(data)
json.dump(data,out_file, indent = 6)
out_file.close()

Is there anyway I can make the output easier to read, Reading Data from 1000+ JSON files

My code
import os
import json
path_to_json = './validatedto'
json_files = [file for file in os.listdir(path_to_json) if file.endswith('.json')]
json_data = []
for index, data in enumerate(json_files):
f = '{0}/{1}'.format(path_to_json,data)
with open(f) as file:
d = json.loads(file.read())
print(d)
json_data.append(d)
output_path = os.path.join('.','output.json')
with open (output_path, 'w') as f:
json.dump(json_data, f)
Output
Use pathlib.Path instead of os
import json
import pprint
from pathlib import Path
base_path = Path('validatedto')
json_files = list(base_path.rglob('*.json'))
json_data = []
for index, path in enumerate(json_files):
d = json.loads(path.read_text())
pprint.pprint(d)
json_data.append(d)
output_path = Path('.') / 'output.json'
output_path.write_text(json.dumps(json_data, indent=4))

remove \xa0 while writing csv file

I have below code to write a list items as csv file. But while doing that, I see special character  is occurring in the output csv file. For testing in the local, I have defined a list with character '\xao' included in the list items and tested various ways to remove that and replace with space. But, I still get the special character in the csv output. Can anyone help?
import csv
from flask import make_response
import StringIO
csv_list = [['hfhf\xa0 fsdg','dsf'],['fsdgs fsdfs','fsdfsd'],['dsf\xa0 sf','asfg']]
def download_csv_summary(csv_list):
si = StringIO.StringIO()
cw = csv.writer(si)
filename = 'Test'
cw.writerows(csv_list)
output = make_response(si.getvalue())
output.headers['Content-Disposition'] = \
'attachment; filename={filename}.csv'.format(filename=filename)
output.headers['Content-Type'] = 'text/csv'
return output
The writerows line needs to be changed to replace the \xa0s:
import csv
from flask import make_response
import StringIO
csv_list = [
['hfhf\xa0 fsdg','dsf'],
['fsdgs fsdfs','fsdfsd'],
['dsf\xa0 sf','asfg']
]
def download_csv_summary(csv_list):
si = StringIO.StringIO()
cw = csv.writer(si)
filename = 'Test'
cw.writerows([[str(x).replace('\xa0', '') for x in l] for l in csv_list])
output = make_response(si.getvalue())
output.headers['Content-Disposition'] = \
'attachment; filename={filename}.csv'.format(filename=filename)
output.headers['Content-Type'] = 'text/csv'
return output

Boto3, read gzip from s3 and print content

I'm trying to read a gzip file from S3 - the "native" format f the file is a csv. Ultimately, after uncompressing the file, I'd like to be able to "see" the content so I can read the number of lines in the csv and keep count of it.
My "basic" attempts are here - still just trying to print the contents of the file. This attempt just tells me that there is no such file or directory...
I know I'm also probably erroneously thinking the unzipped csv file will be in json format - but that's the next "issue" once I get to read the unzipped contents...
[Errno 2] No such file or directory: 'SMSUsageReports/eu-west-1/2018/01/02/001.csv.gz'
import gzip
import boto3
import json
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
bucket = s3.Bucket('snssmsreports')
for obj in bucket.objects.filter(Prefix='SMSUsageReports/eu-west-1/2018/01/02'):
json_object = s3_client.get_object(Bucket=bucket.name, Key=obj.key)
file_name = obj.key
obj = bucket.Object(file_name)
file_body = obj.get()["Body"].read()
# gzip stuff here
f=gzip.open(file_name,'rb')
file_content=f.read()
#print file_content
#jsonFileReader = json_object['Body'].read()
jsonDict = json.loads(file_content)
#table = dynamodb.Table('SNS')
#table.put_item(Item=jsonDict)
print('{0}:{1}'.format(bucket.name, obj.key))
print(jsonDict)
OK, So I updated my code as follow:
import zipfile
import gzip
import boto3
import io
import json
import pandas as pd
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
bucket = s3.Bucket('snssmsreports')
for obj in bucket.objects.filter(Prefix='SMSUsageReports/eu-west-1/2018/01/02'):
json_object = s3_client.get_object(Bucket=bucket.name, Key=obj.key)
file_name = obj.key
obj = bucket.Object(file_name)
s3_client.download_file(bucket.name, file_name, '../../tmp/file.gz')
gzip_name = '../../tmp/file.gz'
# gzip stuff here
with gzip.open(gzip_name,'rb') as f:
file_content=f.read()
str_file = str(file_content)
csvfile = open('../../tmp/testfile.csv','w')
csvfile.write(str_file)
csvfile.close()
#table = dynamodb.Table('SNS')
#table.put_item(Item=jsonDict)
#pandas csv reader
df1 = pd.read_csv('../../tmp/testfile.csv')
print(df1)
#print('{0}:{1}'.format(bucket.name, obj.key))
#print(file_content)
#table = dynamodb.Table('SNS')
#table.put_item(Item=jsonDict)
This does not throw any errors anymore, but the output only has one row and 135 columns, so panda is not liking the actual content of the csv, or my conversion to str() is not the right way to do it?
OK, issue was the opening of the file for write - to write bytes I had to open file as wb...
csvfile = open('../../tmp/testfile.csv','wb')
csvfile.write(file_content)
csvfile.close()

Categories