invalid syntax error in list comprehension - python

I am trying to execute below code but it's throwing some error, whereas same code is running on jupyter notebook. I am not sure what's going wrong. The python version is 2 on both platform. This codes takes json file as input, pick 'Data' key and place all values under it into csv file.
command line :
Python version : 2.6.6
$ python Parser.py /data/csdb/stage/fundapiresponse.json /data/csp53/csdb/stage/fundresponse.csv
File "Parser.py", line 27
flat_data = [{k:v for j in i for k, v in j.items()} for i in zip(*[element['Data'] for key in element])]
^
SyntaxError: invalid syntax
Code:
#################################################
# importing libraries
#################################################
import csv
import json
import collections
import sys
#################################################
# Reading input and output file from command line
#################################################
infile = sys.argv[1]
outfile = sys.argv[2]
print infile
print outfile
#################################################
# Read JSON and build CSV layout
#################################################
with open(infile,'r') as f:
data= json.load(f)
with open(outfile, 'w') as f:
for element in data:
flat_data = [{k:v for j in i for k, v in j.items()} for i in zip(*[element['Data'] for key in element])]
csvwriter = DictWriter(f,flat_data[0].keys(),lineterminator='\n')
csvwriter.writerows(flat_data)
Jupyter notebook :
Python 2
#################################################
# importing libraries
#################################################
import csv
import json
import collections
import sys
#################################################
# Reading input and output file from command line
#################################################
infile = 'fundapiresponse.json'
outfile = 'fundresponse.csv'
print infile
print outfile
#################################################
# Read JSON and build CSV layout
#################################################
with open(infile,'r') as f:
data= json.load(f)
with open(outfile, 'w') as f:
for element in data:
flat_data = [{k:v for j in i for k, v in j.items()} for i in zip(*[element['Data'] for key in element])]
csvwriter = DictWriter(f,flat_data[0].keys(),lineterminator='\n')
csvwriter.writerows(flat_data)

Dict comprehensions (PEP274) with curly brackets and colon notation were only introduced in Python2.7. Before that, you had to use the dict constructor with an appropriate list or generator of pairs:
dict((k, v) for j in i for k, v in j.iteritems()) # items works, too
See also Alternative to dict comprehension prior to Python 2.7.

Related

Merging 2 json files

I'm trying to merge both json files but I'm trying to append timestamp from file2 to corresponding frame number in file1.please guide.
JSON_FILE1
{"frameNumber":1,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classifications":[]}]}
JSON_FILE2
{
"frame1": "0:0:0:66",
"frame2": "0:0:0:100",
"frame3": "0:0:0:133",
"frame4": "0:0:0:166",
"frame5": "0:0:0:200"
}
expected output:
{"frameNumber":1,"frame1": "0:0:0:66",,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2, "frame2": "0:0:0:10,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"frame3": "0:0:0:133,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"frame4": "0:0:0:166","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"frame5": "0:0:0:200","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classification
I tried this way but I am unable to achieve.
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f,"rb") as infile:
result.append(json.load(infile))
with open("merged_file.json","wb") as outfile:
json.dump(result,outfile)
A correct .json needs a pair of [] and than you could json.load it, iterate over ever line and do the same like below but anyway:
The easiest solution is turn every line in a dict, if the framenumber matches add the timestamp and write it back.
def fuse(file1, file2, nTargetPath):
with open(nTargetPath, "wb") as tTargetFile:
with open(file1, "rb") as tSourceFileA:
for tLineA in tSourceFileA.readlines():
tDictA = json.loads(tLineA) #loads dict from a string
tKey = "frame"+tDictA["frameNumber"] #searching the correct entry but why not name this timestampX
with open(file2, "rb") as tSourceFileB:
for tLineB in tSourceFileB.readlines():
tDictB = json.loads(tLineB )
if tKey in tDictB:
tDictA[tKey] = tDictB[tKey]
break #cause there is only one timestamp
tTargetFile.write(json.dumps(tDictA)+'\n')
This code cann easily updated by improve the file accessing for example when you know the key for the timestamp in file2 is everytime in the same row as in file1 and so on.
As was pointed out, one file is ndjson and the other file is json. You need to implement some logic to add the json to the ndjson
# https://pypi.org/project/ndjson/
# pip install ndjson
import ndjson
import json
with open('path/to/file/im_a_ndjson.ndjson') as infile:
ndjson_object = ndjson.load(infile)
with open('path/to/file/json_file2.json') as infile:
dict_object = json.load(infile)
print(type(ndjson_object[0]['frameNumber']))
# output: <class 'int'>
for key in dict_object:
# int needed as you can see above
framenumber = int(key.strip('frame'))
# find the matching ndjson object
for ndjs in ndjson_object:
if ndjs['frameNumber'] == framenumber:
# add the key/value pair
ndjs[key] = dict_object[key]
# we can break as we've found it
break
with open('path/to/file/new_ndjson.ndjson', 'w') as outfile:
ndjson.dump(ndjson_object, outfile)

Appending JSONs together without modifying them

I have multiple JSON files which look like this:
foo.json:
{"Email":["abc#abc.com","aac#abc.com","asd#ac.com"]}
bar.json:
{"Work": ["Microsoft","Google","Yahoo"]}
I want to append all these together without modifying them:
I am using this for appending them:
import json
import glob
import sys
import os
read_files = glob.glob("*.json")
output_list = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
with open("merged_file.json", "wb") as outfile:
json.dump(output_list, outfile)
The output of this is:
[{"Email":["abc#abc.com","aac#abc.com","asd#ac.com"]},{"Work": ["Microsoft","Google","Yahoo"]}]
Which makes the output look different from the input. How can I not have the "[ ]" at the start and the end, with all keys in the same JSON object?
The desired output is:
{"Email":["abc#abc.com","aac#abc.com","asd#ac.com"],"Work": ["Microsoft","Google","Yahoo"]}
You are building a list, yet your expected output shows you want a JSON object (a dictionary in Python). Provided your input files have unique keys, you need to build a new dictionary:
import glob
import json
output = {}
for f in glob.glob("*.json"):
with open(f, "rb") as infile:
entry = json.load(infile)
output.update(entry)
with open("merged_file.jsonl", "wb") as outfile:
json.dump(output, outfile)

Creating a Dictionary to Write to a file

I am trying to change the format in which a file is written. The file is written in a generic file format. I am putting headers into the file and writing it so that it can be read by another program in the future by simply using a pandas dictionary or an xarray datarray. In order to do this, I am trying to make columns in the file that are more separate than what I have now. I have the following code:
def cvrtpa(fle,separation_character,(labels)):
import numpy as np
import pandas as pd
labels[-1]=labels[-1]+'\n'
flabels=labels
finlabel = np.array([flabel + separation_character for flabel in flabels])
infile=open(fle,'r')
templines=infile.readlines()
vardict = {} #dict version
for i in finlabel:
for j in range(len(templines)):
split=templines[j]
x = split.split()
vardict.setdefault(finlabel[i],[]).append(x)
infile.close()
outfile=open(fle, 'w')
outfile.write(finlabel)
outfile.write(temp)
outfile.close()
mfile=open(fle,'r')
data=mfile.readlines()
return data
fl='.../Summer 2016/Data/Test/Test'
label=['Year','Month','Day','Hour','Minute','Precipitation']
xx=cvrtpa(fl,'',label)
I am not overly familiar with dictionaries and so it has been a bit difficult to come up with the code I have. I know there may be inconsistencies/errors.
import json
# Create some random data structure
animals = zip(['dogs', 'cats', 'mice'], [124156, 858532, 812885])
data = {k:{v: {k: [v, v, v, k, v, {k: [k, k, k, k]}]}} for k, v in animals}
# Create a new file, dump the data to it
with open('filename.ext', 'w+') as file:
json.dump(data, file, indent=4, sort_keys=False)
# Open the same file, load it back as a new variable
with open('filename.ext') as file:
new_dictionary = json.load(file)
# Make some changes to the dict
new_dictionary['new_key'] = 'hello python'
# Open the file back up again and rewrite the new data
with open('filename.ext', 'w+') as file:
json.dump(new_dictionary, file, indent=4)

How to save a dictionary to a file?

I have problem with changing a dict value and saving the dict to a text file (the format must be same), I only want to change the member_phone field.
My text file is the following format:
memberID:member_name:member_email:member_phone
and I split the text file with:
mdict={}
for line in file:
x=line.split(':')
a=x[0]
b=x[1]
c=x[2]
d=x[3]
e=b+':'+c+':'+d
mdict[a]=e
When I try change the member_phone stored in d, the value has changed not flow by the key,
def change(mdict,b,c,d,e):
a=input('ID')
if a in mdict:
d= str(input('phone'))
mdict[a]=b+':'+c+':'+d
else:
print('not')
and how to save the dict to a text file with same format?
Python has the pickle module just for this kind of thing.
These functions are all that you need for saving and loading almost any object:
import pickle
with open('saved_dictionary.pkl', 'wb') as f:
pickle.dump(dictionary, f)
with open('saved_dictionary.pkl', 'rb') as f:
loaded_dict = pickle.load(f)
In order to save collections of Python there is the shelve module.
Pickle is probably the best option, but in case anyone wonders how to save and load a dictionary to a file using NumPy:
import numpy as np
# Save
dictionary = {'hello':'world'}
np.save('my_file.npy', dictionary)
# Load
read_dictionary = np.load('my_file.npy',allow_pickle='TRUE').item()
print(read_dictionary['hello']) # displays "world"
FYI: NPY file viewer
We can also use the json module in the case when dictionaries or some other data can be easily mapped to JSON format.
import json
# Serialize data into file:
json.dump( data, open( "file_name.json", 'w' ) )
# Read data from file:
data = json.load( open( "file_name.json" ) )
This solution brings many benefits, eg works for Python 2.x and Python 3.x in an unchanged form and in addition, data saved in JSON format can be easily transferred between many different platforms or programs. This data are also human-readable.
Save and load dict to file:
def save_dict_to_file(dic):
f = open('dict.txt','w')
f.write(str(dic))
f.close()
def load_dict_from_file():
f = open('dict.txt','r')
data=f.read()
f.close()
return eval(data)
As Pickle has some security concerns and is slow (source), I would go for JSON, as it is fast, built-in, human-readable, and interchangeable:
import json
data = {'another_dict': {'a': 0, 'b': 1}, 'a_list': [0, 1, 2, 3]}
# e.g. file = './data.json'
with open(file, 'w') as f:
json.dump(data, f)
Reading is similar easy:
with open(file, 'r') as f:
data = json.load(f)
This is similar to this answer, but implements the file handling correctly.
If the performance improvement is still not enough, I highly recommend orjson, fast, correct JSON library for Python build upon Rust.
I'm not sure what your first question is, but if you want to save a dictionary to file you should use the json library. Look up the documentation of the loads and puts functions.
I would suggest saving your data using the JSON format instead of pickle format as JSON's files are human-readable which makes your debugging easier since your data is small. JSON files are also used by other programs to read and write data. You can read more about it here
You'll need to install the JSON module, you can do so with pip:
pip install json
# To save the dictionary into a file:
json.dump( data, open( "myfile.json", 'w' ) )
This creates a json file with the name myfile.
# To read data from file:
data = json.load( open( "myfile.json" ) )
This reads and stores the myfile.json data in a data object.
For a dictionary of strings such as the one you're dealing with, it could be done using only Python's built-in text processing capabilities.
(Note this wouldn't work if the values are something else.)
with open('members.txt') as file:
mdict={}
for line in file:
a, b, c, d = line.strip().split(':')
mdict[a] = b + ':' + c + ':' + d
a = input('ID: ')
if a not in mdict:
print('ID {} not found'.format(a))
else:
b, c, d = mdict[a].split(':')
d = input('phone: ')
mdict[a] = b + ':' + c + ':' + d # update entry
with open('members.txt', 'w') as file: # rewrite file
for id, values in mdict.items():
file.write(':'.join([id] + values.split(':')) + '\n')
I like using the pretty print module to store the dict in a very user-friendly readable form:
import pprint
def store_dict(fname, dic):
with open(fname, "w") as f:
f.write(pprint.pformat(dic, indent=4, sort_dicts=False))
# note some of the defaults are: indent=1, sort_dicts=True
Then, when recovering, read in the text file and eval() it to turn the string back into a dict:
def load_file(fname):
try:
with open(fname, "r") as f:
dic = eval(f.read())
except:
dic = {}
return dic
Unless you really want to keep the dictionary, I think the best solution is to use the csv Python module to read the file.
Then, you get rows of data and you can change member_phone or whatever you want ;
finally, you can use the csv module again to save the file in the same format
as you opened it.
Code for reading:
import csv
with open("my_input_file.txt", "r") as f:
reader = csv.reader(f, delimiter=":")
lines = list(reader)
Code for writing:
with open("my_output_file.txt", "w") as f:
writer = csv.writer(f, delimiter=":")
writer.writerows(lines)
Of course, you need to adapt your change() function:
def change(lines):
a = input('ID')
for line in lines:
if line[0] == a:
d=str(input("phone"))
line[3]=d
break
else:
print "not"
I haven't timed it but I bet h5 is faster than pickle; the filesize with compression is almost certainly smaller.
import deepdish as dd
dd.io.save(filename, {'dict1': dict1, 'dict2': dict2}, compression=('blosc', 9))
file_name = open("data.json", "w")
json.dump(test_response, file_name)
file_name.close()
or use context manager, which is better:
with open("data.json", "w") as file_name:
json.dump(test_response, file_name)

Reading File into Python Dictionary and then Writing to File

I have a a multi-row, multi-column text file. Has a header if that's important. Want to read all the data into a Python dictionary and then just write it out again in a file. I ultimately want to create two dictionaries from two file, join them on the key, and then print out the joined version but I can't even get this part right. This is what I got:
import sys
import csv
from collections import defaultdict
usage = "usage: python Newer.py <project_file> <table_file> <outfile>"
if len(sys.argv) != 4:
print usage
sys.exit(0)
project = open(sys.argv[1], "rb")
table = open(sys.argv[2], "rb")
outfile = open(sys.argv[3], "w")
projectdict = defaultdict(list)
for line in project:
parts = line.strip().split("\t")
first = parts[1]
projectdict[first].append(line)
for key in projectdict:
outfile.write(" ".join(projectdict[first]) + "\n")
And what I get from it is a text file with the same entry from the text file repeated over and over again.
The issue is that you're writing the value at key "first" every time in the second loop, regardless of what the actual key is. I believe this will fix your problem.
for key in projectdict:
outfile.write(" ".join(projectdict[key]) + "\n")

Categories