Converting objects in file to json in python - python

I have a file with multiple objects like this:
{
name: (sindey, crosby)
game: "Hockey"
type: athlete
},
{
name: (wayne, gretzky)
game: "Ice Hockey"
type: athlete
}
...and I'd like to convert them to JSON format and output this:
[
{
"name": "(sindey, crosby)",
"game": "Hockey",
"type": "athlete"
},
{
"name": "(wayne, gretzky)",
"game": "Ice Hockey",
"type": "athlete"
}
]
If the input was in this format,
name: (sidney, crosby) | game:"Hockey" | type:athlete
name: (wayne, gretzky) | game:"Ice Hockey" | type:athlete
I could implement using json dump with list and dict and it gives me the desired output
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
for v in splitcontent:
l = v.split(' | ')
d.append(dict(s.split(':',1) for s in l))
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
How can I reformat this code to convert my input to JSON format?

With slight changes on the answer given by #sarah Messer. Changes involved
lines without the : separator skipped
Try this
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line) or ('{' in line) or ('}' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage = {}
continue
key, val = line.split(':')
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
print(val)
# if val == "":
# pass
# else:
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys=False)))

Something like this will probably work for most cases - you just have to handle the lines with curly braces separately from the lines with data:
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage ={}
else:
key, val = line.split(':',1)
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
I might also have some typos in there...

Related

Converting text file to dictionary then to JSON object using Python - But I need to split data from the text file

I am trying to convert txt file data to a Python dictionary before dumping it to JSON and then writing JSON data into avro. My text file contains unwanted data that I need to remove, I only need field names and field types:
mappings = ['name', 'type']
with open('xxx.txt', 'r') as fn:
dict1 = {}
names = []
types = []
for line in file.readlines():
names_lines = line.split()[0] # index 0 = names
names.append(names_lines)
types_lines = line.split()[1] # index 1 = types
types.append(types_lines)
id = 1
for d in fn:
desc = list(d.strip().split(' ', 1))
name = desc[0]
i = 0
dict2 = {}
while i < len(mappings):
dict2[mappings[i]] = desc[i]
i = i + 1
dict1[name] = dict2
id = id + 1
print(dict1)
1- My Error is:
name 'file' is not defined. Did you mean: 'filter'?***
replace file with fn:
for line in fn.readlines():
From what you described you wanted in the comments, try this:
records = []
with open('xxx.txt', 'r') as f:
for line in f:
name, type_, *_ = line.split()
records.append({"name": name, "type": type_})
print(records)

How would I insert a number into a pre-existing script in and repeat 3544 times

I have a script for a video game modification that has to be repeated 3544 times.
country_event = {
id = 5893X
title = "Aquire $PROVINCENAME$"
desc = "$COUNTRY$ is surrendering a province to us."
picture = "administration"
trigger = {
has_country_flag = won_acquire_prov
NOT = { has_country_flag = prov_treaty }
any_country = {
limit = {
truce_with = THIS
NOT = {
X = {
owned_by = THIS
}
}
}
}
}
option = {
name = "Take $PROVINCENAME$"
X = {
secede_province = THIS
}
}
option = {
name = "We don't want $PROVINCENAME$"
}
option = {
name = "End Province arbitration"
set_country_flag = prov_treaty
}
}
The goal is to replace "X" with a number ranging from 1 to 3544 for each number there will be another "country_event". But I am not sure how to do this.
I have tried using a python script:
infile = open("provids.txt", "r")
outfile = open("0_provs_event.txt","w")
for line in infile:
if len(line) > 5:
if line[5] == "=":
prov = line.split("/")[1].split(".")[0]
if prov != "REB":
#^^^ Code does not change file if above is removed
newline = "country_event = { id = 5893" + prov + "title = Aquire $PROVINCENAME$ desc = $COUNTRY$ is surrendering a province to us. picture = administration trigger = { has_country_flag = won_aquire_prov NOT = { has_country_flag = prov_treaty } any_country = { limit = { truce_with = THIS NOT = {" + tag + "= { owned_by = THIS } } } } } option = { name = Take $PROVINCENAME$ " + prov + " = { secede_province = THIS } } option = { name = We don't want $PROVINCENAME$ } option = { name = End Province arbitration set_country_flag = prov_treaty } } \n"
outfile.write(newline)
infile.close()
outfile.close()
I haven't been able to get it to work. It just makes the file and does nothing else.
The main issue is how you're reading in infile
When you call open, python will open the file as '_io.TextIOWrapper' which is a class that contains metadata about the file among other things. If you want the text of the file as a list of strings, where each string is a line of the file (which appears to be what you want), you then need to read the file. This can be done using infile.read() if you want the whole this as a single string, or infile.readlines() if you want each line to be an element of a list
My preferred way of reading in files in python is as follows:
# Setup file paths, and something to store your output which will be written to outfile_path
infile_path = "path_to_infile"
outfile_path = "path_to_outfile"
outfile = []
with open(infile_path, 'r') as f: # Open infile_path into f as a readable object
infile = f.readlines() # Read through f and place each line as an element in a list
for line in infile:
# do stuff, append lines to outfile
outfile = "\n".join(outfile) # Turn the list outfile into a string with character returns between each line
with open(outfile_path, 'w') as f: # Open outfile_path into f as a writable object
f.write(outfile) # Write out the string outfile to f
The advantage of the with open version is you don't need to remember to close the file afterwards, that automatically happens after the with block (after the unindent in the code)

Removing the enumerator from json file using Python

The following code generates a json file, however, I need to get rid of the enumerator element in the output (i.e. "1":, "2":, etc.). It looks like a dumb question but I'm totally confused!
The output looks like this:
{
"1": {
"found": "Alaska",
"resolved as": " alaska",
"gazetteer": " {com.novetta.clavin.gazetteer.LazyAncestryGeoName#59b62d}",
"position": " 795",
"confidence": " 1.000000",
"fuzzy": " false",
"lon": " -150.00028",
"lat": " 64.00028"
}, ...
And here is the code:
import json
filename = 'output.txt'
dict1 = {}
fields = ['found', 'resolved as', 'gazetteer', 'position', 'confidence', 'fuzzy', 'lon', 'lat' ]
with open(filename) as fh:
l = 1
for line in fh:
# reading line by line from the text file
description = list( line.strip().split(','))
# for output see below
print(description)
# for automatic creation of id for each employee
sno = '' + str(l)
# loop variable
i = 0
# intermediate dictionary
dict2 = {}
while i<len(fields):
# creating dictionary for each employee
dict2[fields[i]]= description[i]
i = i + 1
# appending the record of each employee to
# the main dictionary
dict1[sno]= dict2
l = l + 1
# creating json file
out_file = open("test2.json", "w")
json.dump(dict1, out_file, indent = 4)
out_file.close()
To get rid of the enumerator, there should only be one dictionary. Here is the new code which converts a text file to json:
import json
filename = 'output.txt'
fields = ['found', 'resolved as', 'gazetteer', 'position', 'confidence', 'fuzzy',
'lon', 'lat' ]
dic1 = {}
with open(filename) as fh:
for line in fh:
# reading line by line from the text file
description = list( line.strip().split(','))
print(description)
for lines in description:
for key in fields:
for value in description:
dic1[key] = value
description.remove(value)
break
#print (str(dic1))
json_string = json.dumps(dic1)
print(json_string)
Now, the output in the json file looks like this (without the enumerator (i.e. key):
{"found": "Alaska", "resolved as": " alaska", "gazetteer": " {com.novetta.clavin.gazetteer.LazyAncestryGeoName#59b62d}", "position": " 56332", "confidence": " 1.000000", "fuzzy": " false", "lon": " -150.00028", "lat": " 64.00028"}

Python3 - Nested dict to JSON

I am trying to convert multiple .txt file to "table-like" data (with columns and rows). Each .txt file should be considered as a new column.
Consider below content of the .txt file:
File1.txt
Hi there
How are you doing?
What is your name?
File2.txt
Hi
Great!
Oliver, what's yours?
I have created a simple method, that accepts the file and and integer (the file number from another method):
def txtFileToJson(text_file, column):
data = defaultdict(list)
i = int(1)
with open(text_file) as f:
data[column].append(column)
for line in f:
i = i + 1
for line in re.split(r'[\n\r]+', line):
data[column] = line
with open("output.txt", 'a+') as f:
f.write(json.dumps(data))
So above method will run two times (one time for each file, and append the data).
This is the output.txt file after I have run my script:
{"1": "What is your name?"}{"2": "Oliver, what's yours?"}
As you can see, I can only get it to create a new for each file I have, and then add the entire line.
[{
"1": [{
"1": "Hi there",
"2": "How are you doing?",
"3": "\n"
"4": "What is your name?"
},
"2": [{
"1": "Hi"
"2": "Great!",
"3": "\n",
"4": "Oliver, what's yours?"
},
}]
Update:
OK, so I played around a bit and got a bit closer:
myDict = {str(column): []}
i = int(1)
with open(text_file) as f:
for line in f:
# data[column].append(column)
match = re.split(r'[\n\r]+', line)
if match:
myDict[str(column)].append({str(i): line})
i = i + 1
with open(out_file, 'a+') as f:
f.write(json.dumps(myDict[str(column)]))
That gives me below output:
[{"1": "Hi there\n"}, {"2": "How are you doing?\n"}, {"3": "\n"}, {"4": "What is your name?"}]
[{"1": "Hi\n"}, {"2": "Great!\n"}, {"3": "\n"}, {"4": "Oliver, what's yours?"}]
But as you can see, now I have multiple JSON root elements.
Solution
Thanks to jonyfries, I did this:
data = defaultdict(list)
for path in images.values():
column = column + 1
data[str(column)] = txtFileToJson(path, column)
saveJsonFile(path, data)
And then added a new method to save the final combined list:
def saveJsonFile(text_file, data):
basename = os.path.splitext(os.path.basename(text_file))
dir_name = os.path.dirname(text_file) + "/"
text_file = dir_name + basename[0] + "1.txt"
out_file = dir_name + 'table_data.txt'
with open(out_file, 'a+') as f:
f.write(json.dumps(data))
You're creating a new dictionary within the function itself. So each time you pass a text file in it will create a new dictionary.
The easiest solution seems to be returning the dictionary created and add it to an existing dictionary.
def txtFileToJson(text_file, column):
myDict = {str(column): []}
i = int(1)
with open(text_file) as f:
for line in f:
# data[column].append(column)
match = re.split(r'[\n\r]+', line)
if match:
myDict[str(column)].append({str(i): line})
i = i + 1
with open(out_file, 'a+') as f:
f.write(json.dumps(myDict[str(column)]))
return myDict
data = defaultdict(list)
data["1"] = txtFileToJson(text_file, column)
data["2"] = txtFileToJson(other_text_file, other_column)
def read(text_file):
data, i = {}, 0
with open(text_file) as f:
for line in f:
i = i + 1
data['row_%d'%i] = line.rstrip('\n')
return data
res = {}
for i, fname in enumerate([r'File1.txt', r'File2.txt']):
res[i] = read(fname)
with open(out_file, 'w') as f:
json.dump(res, f)
First, if I understand you are trying to get as output a dictionary of dictionaries, then let me observe that what I understand to be your desired output seems to be enclosing the whole thing within a list, Furthermore, you have unbalanced open and closed list brackets within the dictionaries, which I will ignore, as I will the enclosing list.
I think you need something like:
#!python3
import json
import re
def processTxtFile(text_file, n, data):
d = {}
with open(text_file) as f:
i = 0
for line in f:
for line in re.split(r'[\n\r]+', line):
i = i + 1
d[str(i)] = line
data[str(n)] = d
data = dict()
processTxtFile('File1.txt', 1, data)
processTxtFile('File2.txt', 2, data)
with open("output.txt", 'wt') as f:
f.write(json.dumps(data))
If you really need the nested dictionaries to be enclosed within a list, then replace
data[str(n)] = d
with:
data[str(n)] = [d]

Python IndexError - Need help sorting key and Value

I need help sorting my key-value pair. My output is in this url http://pastebin.com/ckKAtP5y.
However, what I've been trying to do is.
{
"courses": [
{
"professors": [
{
"first_name": "Zvezdelina",
"last_name": "Stankova",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=375269",
"helpfullness": 4.3,
"clarity": 4.3,
"overall_rating": 4.3
}],
"course_name": "CHEM 1",
"course_mentioned_times": 37
},
{
"professors": [
{
"first_name": "Alan",
"last_name": "Shabel",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1309831",
"helpfullness": 3.9,
"clarity": 3.5,
"overall_rating": 3.7
}],
"course_name": "CHEMISTRY 231",
"course_mentioned_times": 50
}
]
So what I want to do is I want to compare 'CHEM' and 'CHEMISTRY' in "course_name" and just get me the most 'course_mentioned_times' and remove the other one. In this case I'd want CHEMISTRY 231 because it's mentioned 50 times.
Here's what I've been helped with so far.
if __name__ == "__main__":
import json
#'output_info.json is http://pastebin.com/ckKAtP5y
with open('output_info.json') as data_file:
data = json.load(data_file)
temp_data = data
greater = []
len1 = len(data['courses'])
len2 = len1
for i in range(0,len1):
for j in range(0, len2):
if i==j:
continue
if data['courses'][i]['course_name'][0] == temp_data['courses'][j]['course_name'][0]:
if data['courses'][i]['course_name'][1] == temp_data['courses'][j]['course_name'][1]:
if data['courses'][i]['course_name'][2] == temp_data['courses'][j]['course_name'][2]:
if data['courses'][i]['course_mentioned_times']> temp_data['courses'][j]['course_mentioned_times']:
greater.append(i)
else:
greater.append(j)
final = []
for i in greater:
if i not in final:
final.append(i)
list_order = []
for i in range(0,len(data['courses'])):
list_order.append(i)
new_final = []
for i in list_order:
if i not in final:
new_final.append(i)
for i in new_final:
if i!=new_final[0]:
i=i-1
data['courses'].pop(i)
# Writing the new json data back to data.json file.
with open('data.json', 'w') as f:
json.dump(data, f)
This code gives me an IndexError
data['courses'].pop(i)
IndexError: pop index out of range
After a lot of back and forth in question's comments:
#coding:utf-8
import json
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
courses = data.get('courses', None)
if courses:
keys = sorted(set([course.get('course_name', None).strip().split()[0][0:3] for course in courses]))
results = {'courses': {}}
for key in keys:
results['courses'][key] = []
temp = {}
for course in courses:
course_name = course.get('course_name', None)
professors = course.get('professors', None)
if course_name.strip().split()[0][0:3] == key:
course_mentioned_times = course.get('course_mentioned_times')
temp[course_name] = {'course_mentioned_times':course_mentioned_times, 'professors': professors}
results['courses'][key] = temp
else:
raise Exception('No courses could be found on {}'.format(filename))
def get_most_mentioned(name):
name = name[0:3]
data = results.get('courses', None).get(name)
max_mentioned_times = max(map(lambda m: data.get(m, None).get('course_mentioned_times'), data.keys()))
most_mentioned = []
for course_name, values in data.items():
course_mentioned_times = values.get('course_mentioned_times', None)
if course_mentioned_times == max_mentioned_times:
most_mentioned.append({'course_name': course_name, 'course_mentioned_times': course_mentioned_times, \
'professors': values.get('professors')})
return most_mentioned
print "Course with most mentioned times:"
print "---------------------------------"
for key in keys:
print "[*] For Key '{}':".format(key)
for item in get_most_mentioned(key):
course_name = item.get('course_name', None)
print " Course Name: {}".format(course_name)
print " Mentioned Times: {}\n".format(item.get('course_mentioned_times'))
print " Professors:\n"
for i, professor in enumerate(item.get('professors', None), start=1):
print " {}) Full name: {} {}".format(i, professor.get('first_name'), professor.get('last_name'))
print " URL: {}".format(professor.get('professor_url'))
print " Helpfullness: {}".format(professor.get('helpfullness'))
print " Clarity: {}".format(professor.get('clarity'))
print " Overall_rating: {}".format(professor.get('overall_rating'))
print ""
print ""
import json
import collections
with open('output_info.json') as data_file:
data = json.load(data_file)
courses = data['courses']
courses_by_prefix = collections.defaultdict(list)
for course in courses:
prefix = course['course_name'].split(' ', 2)[0].upper()[:3]
courses_by_prefix[prefix].append(course)
results = []
for prefix, courselist in courses_by_prefix.items():
mosttimes = max(courselist, key=lambda c: c['course_mentioned_times'])
results.append(mosttimes)
print(results)

Categories