Python IndexError - Need help sorting key and Value - python

I need help sorting my key-value pair. My output is in this url http://pastebin.com/ckKAtP5y.
However, what I've been trying to do is.
{
"courses": [
{
"professors": [
{
"first_name": "Zvezdelina",
"last_name": "Stankova",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=375269",
"helpfullness": 4.3,
"clarity": 4.3,
"overall_rating": 4.3
}],
"course_name": "CHEM 1",
"course_mentioned_times": 37
},
{
"professors": [
{
"first_name": "Alan",
"last_name": "Shabel",
"professor_url": "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=1309831",
"helpfullness": 3.9,
"clarity": 3.5,
"overall_rating": 3.7
}],
"course_name": "CHEMISTRY 231",
"course_mentioned_times": 50
}
]
So what I want to do is I want to compare 'CHEM' and 'CHEMISTRY' in "course_name" and just get me the most 'course_mentioned_times' and remove the other one. In this case I'd want CHEMISTRY 231 because it's mentioned 50 times.
Here's what I've been helped with so far.
if __name__ == "__main__":
import json
#'output_info.json is http://pastebin.com/ckKAtP5y
with open('output_info.json') as data_file:
data = json.load(data_file)
temp_data = data
greater = []
len1 = len(data['courses'])
len2 = len1
for i in range(0,len1):
for j in range(0, len2):
if i==j:
continue
if data['courses'][i]['course_name'][0] == temp_data['courses'][j]['course_name'][0]:
if data['courses'][i]['course_name'][1] == temp_data['courses'][j]['course_name'][1]:
if data['courses'][i]['course_name'][2] == temp_data['courses'][j]['course_name'][2]:
if data['courses'][i]['course_mentioned_times']> temp_data['courses'][j]['course_mentioned_times']:
greater.append(i)
else:
greater.append(j)
final = []
for i in greater:
if i not in final:
final.append(i)
list_order = []
for i in range(0,len(data['courses'])):
list_order.append(i)
new_final = []
for i in list_order:
if i not in final:
new_final.append(i)
for i in new_final:
if i!=new_final[0]:
i=i-1
data['courses'].pop(i)
# Writing the new json data back to data.json file.
with open('data.json', 'w') as f:
json.dump(data, f)
This code gives me an IndexError
data['courses'].pop(i)
IndexError: pop index out of range

After a lot of back and forth in question's comments:
#coding:utf-8
import json
filename = 'data.json'
with open(filename, 'r') as f:
data = json.load(f)
courses = data.get('courses', None)
if courses:
keys = sorted(set([course.get('course_name', None).strip().split()[0][0:3] for course in courses]))
results = {'courses': {}}
for key in keys:
results['courses'][key] = []
temp = {}
for course in courses:
course_name = course.get('course_name', None)
professors = course.get('professors', None)
if course_name.strip().split()[0][0:3] == key:
course_mentioned_times = course.get('course_mentioned_times')
temp[course_name] = {'course_mentioned_times':course_mentioned_times, 'professors': professors}
results['courses'][key] = temp
else:
raise Exception('No courses could be found on {}'.format(filename))
def get_most_mentioned(name):
name = name[0:3]
data = results.get('courses', None).get(name)
max_mentioned_times = max(map(lambda m: data.get(m, None).get('course_mentioned_times'), data.keys()))
most_mentioned = []
for course_name, values in data.items():
course_mentioned_times = values.get('course_mentioned_times', None)
if course_mentioned_times == max_mentioned_times:
most_mentioned.append({'course_name': course_name, 'course_mentioned_times': course_mentioned_times, \
'professors': values.get('professors')})
return most_mentioned
print "Course with most mentioned times:"
print "---------------------------------"
for key in keys:
print "[*] For Key '{}':".format(key)
for item in get_most_mentioned(key):
course_name = item.get('course_name', None)
print " Course Name: {}".format(course_name)
print " Mentioned Times: {}\n".format(item.get('course_mentioned_times'))
print " Professors:\n"
for i, professor in enumerate(item.get('professors', None), start=1):
print " {}) Full name: {} {}".format(i, professor.get('first_name'), professor.get('last_name'))
print " URL: {}".format(professor.get('professor_url'))
print " Helpfullness: {}".format(professor.get('helpfullness'))
print " Clarity: {}".format(professor.get('clarity'))
print " Overall_rating: {}".format(professor.get('overall_rating'))
print ""
print ""

import json
import collections
with open('output_info.json') as data_file:
data = json.load(data_file)
courses = data['courses']
courses_by_prefix = collections.defaultdict(list)
for course in courses:
prefix = course['course_name'].split(' ', 2)[0].upper()[:3]
courses_by_prefix[prefix].append(course)
results = []
for prefix, courselist in courses_by_prefix.items():
mosttimes = max(courselist, key=lambda c: c['course_mentioned_times'])
results.append(mosttimes)
print(results)

Related

Converting objects in file to json in python

I have a file with multiple objects like this:
{
name: (sindey, crosby)
game: "Hockey"
type: athlete
},
{
name: (wayne, gretzky)
game: "Ice Hockey"
type: athlete
}
...and I'd like to convert them to JSON format and output this:
[
{
"name": "(sindey, crosby)",
"game": "Hockey",
"type": "athlete"
},
{
"name": "(wayne, gretzky)",
"game": "Ice Hockey",
"type": "athlete"
}
]
If the input was in this format,
name: (sidney, crosby) | game:"Hockey" | type:athlete
name: (wayne, gretzky) | game:"Ice Hockey" | type:athlete
I could implement using json dump with list and dict and it gives me the desired output
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
for v in splitcontent:
l = v.split(' | ')
d.append(dict(s.split(':',1) for s in l))
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
How can I reformat this code to convert my input to JSON format?
With slight changes on the answer given by #sarah Messer. Changes involved
lines without the : separator skipped
Try this
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line) or ('{' in line) or ('}' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage = {}
continue
key, val = line.split(':')
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
print(val)
# if val == "":
# pass
# else:
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys=False)))
Something like this will probably work for most cases - you just have to handle the lines with curly braces separately from the lines with data:
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage ={}
else:
key, val = line.split(':',1)
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
I might also have some typos in there...

return a list of word from a texte file with python

I work on a project on python.
I want to return a list of name from a text file.
I start with one name I know.
My text file is like :
ALPHA;n10;Output
ALPHA;n11;Input
ALPHA;n12;Input
BETA;n10;Input
BETA;n14;Input
CHARLIE;n10;Input
CHARLIE;n13;Output
DELTA;n13;Output
DELTA;n12;Input
Let's say I start from the name ALPHA and I know it's an Output.
So I have to search the number link to this name which is n10.
I want to return all the name of the number n10 which are in Input.
So at the end I want the list ["BETA", "CHARLIE"]
For the moment I code the following function :
file = "path of the texte file"
name = "ALPHA"
liste_new_name = []
def search_new_name(liste):
file_txt = open(file, "r")
contenu = file_txt.readline()
file_txt.close()
if contenu.split(";")[0] == name and ";Output" in contenu:
num = contenu.split(";")[1]
if num in contenu and ";Input" in contenu:
liste.append(contenu.split(";")[0]
return liste
print(liste)
else:
print("No new name found")
else:
print("No num found")
search_new_name(liste_new_name)
My problem is that I have "No num found" but like the example I know I should have a list.
I would parse the file into a dictionary. This will make searching much easier and will allow you to do multiple searches without having to re-read the file:
def parse_file(path):
data = {}
with open(path, 'r') as in_file:
for line in in_file:
try:
name, n, direction = line.strip().split(';')
if name not in data:
data[name] = {"Input": [], "Output": []}
data[name][direction].append(n)
except KeyError:
print(f"Error with: {line}")
except ValueError:
pass
return data
This will return a dictionary like:
{
'ALPHA': {'Input': ['n11', 'n12'], 'Output': ['n10']},
'BETA': {'Input': ['n10', 'n14'], 'Output': []},
'CHARLIE': {'Input': ['n10'], 'Output': ['n13']},
'DELTA': {'Input': ['n12'], 'Output': ['n13']}
}
With that searches can be done with a simple list comprehension:
def search_new_name(name, data):
if name not in data: return None
return [key for key,value in data.items() if any(x in data[key]["Input"] for x in data[name]["Output"])]
Sample usage:
data = parse_file(r"C:\foo\bar.txt")
print(search_new_name("ALPHA", data))
Output:
['BETA', 'CHARLIE']
You will have to read all the lines and creating a dictionary with the 'number' and 'type' combination as the key will solve the problem.
file = "path of the texte file"
name = "ALPHA"
liste_new_name = []
def search_new_name(name):
name_map = {} ## dict to save all the info
search_key = False
file_txt = open(file, "r")
all_lines = file_txt.readlines()
for contenu in all_lines:
[l_name,l_num,l_type] = contenu.split(";")
key = l_num + "_" + l_type ## use num and type combination as a key
if l_name == name and l_type == "Output":
search_key = l_num+"_"+l_type
if key in name_map:
name_map[key] = name_map[key].append(l_name)
else:
name_map[key] = [l_name]
if search_key is False:
print("Num not found")
return []
else:
search_num = search_key.split('_')[0]
if search_num+'_Input' in name_map:
return name_map[search_num+'_Input']
else:
## return empty list if no input found
return []
search_new_name(name)
I try to continue with my idea with two functions like that :
file = "path of the text file"
name = "ALPHA"
new_l_name = []
num = []
def search_num(num):
file_txt = open(file, "r")
contenu = file_txt.readline()
while contenu:
contenu = fichier_txt.readline()
if contenu.split(";")[0] == name and ";Output" in contenu:
num.append(contenu.split(";")[1]
return num
else:
print("No num found")
file_txt.close()
search_num(num)
def search_new_name(liste):
file_txt = open(file, "r")
contenu = file_txt.readline()
while contenu:
contenu = file_txt.readline()
if contenu.split(";")[1] == num[0] and ";Input" in contenu:
new_name = contenu.split(";")[0]
liste.append(new_name)
print("the list of new name : {}".format(liste))
return liste
else:
print("No new name found")
search_new_name(new_l_name)
Finally, I have the num we search in return but the list of the new name return the list with the first new name found in the textfile but not the others. It returns ["BETA"] and not ["BETA", "CHARLIE"] as we want.
If someone have an idea.
Thanks.

Removing the enumerator from json file using Python

The following code generates a json file, however, I need to get rid of the enumerator element in the output (i.e. "1":, "2":, etc.). It looks like a dumb question but I'm totally confused!
The output looks like this:
{
"1": {
"found": "Alaska",
"resolved as": " alaska",
"gazetteer": " {com.novetta.clavin.gazetteer.LazyAncestryGeoName#59b62d}",
"position": " 795",
"confidence": " 1.000000",
"fuzzy": " false",
"lon": " -150.00028",
"lat": " 64.00028"
}, ...
And here is the code:
import json
filename = 'output.txt'
dict1 = {}
fields = ['found', 'resolved as', 'gazetteer', 'position', 'confidence', 'fuzzy', 'lon', 'lat' ]
with open(filename) as fh:
l = 1
for line in fh:
# reading line by line from the text file
description = list( line.strip().split(','))
# for output see below
print(description)
# for automatic creation of id for each employee
sno = '' + str(l)
# loop variable
i = 0
# intermediate dictionary
dict2 = {}
while i<len(fields):
# creating dictionary for each employee
dict2[fields[i]]= description[i]
i = i + 1
# appending the record of each employee to
# the main dictionary
dict1[sno]= dict2
l = l + 1
# creating json file
out_file = open("test2.json", "w")
json.dump(dict1, out_file, indent = 4)
out_file.close()
To get rid of the enumerator, there should only be one dictionary. Here is the new code which converts a text file to json:
import json
filename = 'output.txt'
fields = ['found', 'resolved as', 'gazetteer', 'position', 'confidence', 'fuzzy',
'lon', 'lat' ]
dic1 = {}
with open(filename) as fh:
for line in fh:
# reading line by line from the text file
description = list( line.strip().split(','))
print(description)
for lines in description:
for key in fields:
for value in description:
dic1[key] = value
description.remove(value)
break
#print (str(dic1))
json_string = json.dumps(dic1)
print(json_string)
Now, the output in the json file looks like this (without the enumerator (i.e. key):
{"found": "Alaska", "resolved as": " alaska", "gazetteer": " {com.novetta.clavin.gazetteer.LazyAncestryGeoName#59b62d}", "position": " 56332", "confidence": " 1.000000", "fuzzy": " false", "lon": " -150.00028", "lat": " 64.00028"}

Python implementation with github

How I can fix the following bug for the comments from commits that has been merged are not downloaded? I think there is a problem from REST to GraphQL but I don't know exactly how to fix.
The following script is used for:
Get all all quantified activities from a set of GitHub repositories. The
list of repos to be searched for are found in the repos.lst file.
Authentication to GitHub is also stored in separate file called github.token
containing the username and password/access token on two separate lines.
Quantified activities include merged pull-requests, closed issues (except for
those explicitly removed from the list for not being constructive) and comments.
Pull-requests:
Only the closed pull-requests are listed, and their merge status determined,
finally the merged pull-requests are stored in a JSON file, with entries
containing the reference number, the repository, the title, the author and the
pull-request URL.
Issues:
Only the closed issues are listed, the pull-requests (which are treated also as
issues by the GitHub API) removed from them, isseues blacklisted in the
blacklist.lst file are also removed from the list, finally the remaining
issues are stored in a JSON file, with entries containing the reference number,
the repository, the title, the author and the issue URL.
Comments:
Comments from the commits, from the issues and from the pull-requests are all
listed and stored in JSON file with entries containing the author, the comment
ID, the repository, the comment content and the comment's URL. Issues comments
and pull-request comments will also contain the reference number of the issue
respectively the pull-request, with issues additionally having also the
original author of the issue, while the commit comments will contain the SHA1 of
the commit.
#!/usr/bin/env python3
# requests library is used to access GitHub API
import requests
import json
import re
import sys
import os.path
import bisect
import getpass
from itertools import chain
import repos
year = repos.get_year()
path = '../../{}/rezultate/'.format(year)
# read the list of repositories
repo_name = re.compile(r'([\w-]+)/([\w-]+)')
repos = [('etc-so', 'syllabus')]
with open(path + 'data/repos.lst', 'r') as f:
for s in f:
s = s.strip()
# ignore empty lines
if s and not s.startswith('#'):
m = repo_name.fullmatch(s)
if not m:
print("Invalid repo name: " + s)
repos.append((m.group(1), m.group(2)))
# read the list of students
students = []
with open(path + 'data/students.json', 'r') as f:
students = [x['user'] for x in json.load(f)]
if not students:
print("No students to check for")
sys.exit(1)
# get the access token
if os.path.exists('github.token'):
with open('github.token', 'r') as f:
auth = (f.readline().strip(), f.readline().strip())
else:
auth = (input("Enter GitHub username: "), getpass.getpass('Password: '))
# construct a labels list, so it can be added to the comments as well
issue_labels = {}
# get the persistent PR, issue and comment lists
if os.path.exists(path + 'data/pulls.json'):
with open(path + 'data/pulls.json', 'r') as f:
saved_pulls = dict((x['id'],(x['week'], x['value'], x['author']) + ((x['multi'],) if 'multi' in x else ())) for x in json.load(f))
else:
saved_pulls = {}
if os.path.exists(path + 'data/issues.json'):
with open(path + 'data/issues.json', 'r') as f:
saved_issues = dict((x['id'],(x['week'], x['value'])) for x in json.load(f))
else:
saved_issues = {}
if os.path.exists(path + 'data/comments.json'):
with open(path + 'data/comments.json', 'r') as f:
saved_comments = dict((x['id'],(x['week'], x['value'])) for x in json.load(f))
else:
saved_comments = {}
current_week = 0
# if there were already some activity then move ahead to the last week
if saved_pulls or saved_issues or saved_comments:
current_week = max(x[0] for x in chain(saved_pulls.values(),
saved_issues.values(),
saved_comments.values()))
# if there is a command line argument use it as current week
if len(sys.argv) == 2:
if sys.argv[1].isdigit():
current_week = int(sys.argv[1])
# -n increments the current week if it can be obtained from the activity
elif sys.argv[1] == '-n' and current_week != 0:
current_week += 1
print("Switching to week {}".format(current_week))
elif sys.argv[1] == '?':
print("Current week is {}".format(current_week))
sys.exit(0)
else:
print("Invalid command line parameter")
sys.exit(1)
elif len(sys.argv) > 2:
print("Too many parameters")
sys.exit(1)
# if no current week was obtained, start with week 1
if not current_week:
current_week = 1
api_url = "https://api.github.com/"
get_url = "repos/{}/{}/{}"
c_params = {
'state': 'closed', # get closed pull-requests/issues
'sort': 'created',
'direction': 'asc', # sort it in ascending order by their creation time
}
pr = []
pr_times = []
gi = []
comments = []
c_times = []
authors = {}
issue_comment_re = re.compile(r'.*/(pull|issues)/([0-9]+)#.*')
link_rel = re.compile(r'<([^>]+)>; rel="([\w]+)"(?:, )?')
request_count = 0
def github_get(get_code, **params):
global request_count
global raw_data
res = []
url = api_url + get_url.format(org, repo, get_code)
while url:
r = requests.get(url, auth=auth, params=params)
if r.status_code != 200:
print('Status code {} received'.format(r.status_code))
print(r.content)
sys.exit(1)
links = dict((m[1],m[0]) for m in link_rel.findall(r.headers.get('link', '')))
url = links.get('next')
res.extend(r.json())
request_count = r.headers['X-RateLimit-Remaining']
return res
label_values = {
'bug': 2,
'enhancement': 2,
'challenge': 2,
'help wanted': 1,
'invalid': -1,
'duplicate': -2
}
def label_value(labels):
# check predefined values
values = [label_values[x] for x in labels if x in label_values]
values += [x.count('★') for x in labels]
if values:
if min(values) < 0:
return min(values)
value = max(values)
else:
value = 0
# add all white stars
value += sum(x.count('☆') for x in labels)
return value
def issue_value(value, issue_id=None):
if issue_id:
old_value = saved_issues.get(issue_id)
if old_value and old_value[1] != value:
print("Value change detected in issue {}".format(issue_id))
return value
def pull_value(label, pull_id=None, ref=0):
if label:
value = label_value(label)
else:
value = None
print("No label for pull-request {}".format(ref))
if pull_id:
old_value = saved_pulls.get(pull_id)
if old_value and old_value[1] is not None and old_value[1] != value:
print("Value change detected in pull-request {}".format(ref))
if value is None:
value = old_value[1]
return value
ptotal = 0
itotal = 0
ctotal = 0
# pass through the repos
for org,repo in repos:
print('Processing repo {}:'.format(repo), end=' ', flush=True)
# get all the issues, do this first as it all includes the pull-requests
# for which only here we can determine the labels
issues = github_get('issues', state='all', sort='created', direction='asc')
icount = 0;
for i in issues:
ref = int(i['number'])
author = i['user']['login']
authors[(repo, ref)] = author
label = [x['name'] for x in i['labels']]
issue_labels[(repo, ref)] = label
if 'pull_request' not in i and author in students:
gi.append(
{
'id': i['id'],
'repo': repo,
'ref': ref,
'title': i['title'],
'url': i['html_url'],
'author': author,
'label': label,
'week' : saved_issues.get(i['id'], [current_week])[0],
'value' : issue_value(label_value(label), i['id'])
})
icount += 1
# get the merged pull-requests
pulls = github_get('pulls', state='closed')
pcount = 0;
#print(r.headers)
for p in pulls:
ref = int(p['number'])
author = p['user']['login']
label = issue_labels.get((repo, ref), [])
if p['merged_at'] and author in students and (not label or label_value(label) >= 0):
i = bisect.bisect_right(pr_times, p['merged_at'])
pr_times.insert(i, p['merged_at'])
# check authors of included commits
if p['id'] in saved_pulls:
author = saved_pulls[p['id']][2]
else:
pcommits = github_get('pulls/{}/commits'.format(ref))
pc_authors = [author]
for a in pcommits:
al = a['author'].get('login') if a['author'] else None
cl = a['committer'].get('login') if a['committer'] else None
if al == cl or cl == 'web-flow' or cl == 'kcs':
aa = al
else:
aa = ':'.join(x for x in [al, cl] if x)
if aa and aa not in pc_authors and aa != 'kcs':
pc_authors.append(aa)
if len(pc_authors) != 1:
author = pc_authors
pr.insert(i, {
'id': p['id'],
'repo': repo,
'ref': ref,
'title': p['title'],
'url': p['html_url'],
'label': label,
'author': author,
'week': saved_pulls.get(p['id'], [current_week])[0],
'value': pull_value(label, p['id'], ref)
})
if len(saved_pulls.get(p['id'], [])) > 3:
pr[i]['multi'] = saved_pulls[p['id']][3]
pcount += 1
# and now for the comments:
# this is more troublesome as constructive comments must be selected
# manually, so we are keeping persistent JSON file for the comments
# holding a valid tag, newly downloaded comments have this tag unset
# and they can be validated afterwards manually (or by script)
ccount = 0;
# 3 types of comments exist on GitHub: issue comments, review comments
# and commit comments, all have to be handled separately
ccomments = github_get('comments', per_page=50, sort='created')
#with open('all_ccomments.json', 'w') as f:
# json.dump(ccomments, f, indent=4, sort_keys=True)
for c in ccomments:
author = c['user']['login']
if author in students:
i = bisect.bisect_right(c_times, c['created_at'])
c_times.insert(i, c['created_at'])
comments.insert(i,
{
'id': c['id'],
'repo': repo,
'commit': c['commit_id'],
'msg': c['body'],
'url': c['html_url'],
'author': author,
'week': saved_comments.get(c['id'], [current_week])[0],
'value': saved_comments[c['id']][1] if c['id'] in saved_comments else None
})
ccount += 1
icomments = github_get('issues/comments', per_page=50, sort='created')
for c in icomments:
author = c['user']['login']
if author in students:
url = c['html_url']
m = issue_comment_re.fullmatch(url)
if not m:
print("Problem parsing issue url " + url)
sys.exit(1)
ref = int(m.group(2))
i = bisect.bisect_right(c_times, c['created_at'])
c_times.insert(i, c['created_at'])
comments.insert(i,
{
'id': c['id'],
'repo': repo,
'issue': ref,
'msg': c['body'],
'url': url,
'author': author,
'issue_author': authors[(repo, ref)],
'week': saved_comments.get(c['id'], [current_week])[0],
'value': saved_comments[c['id']][1] if c['id'] in saved_comments else None
})
if m.group(1) == 'issues' and (repo, ref) in issue_labels:
comments[i]['label'] = issue_labels[(repo, ref)]
ccount += 1
pcomments = github_get('pulls/comments', per_page=50, sort='created')
for c in pcomments:
author = c['user']['login']
if author in students:
ref = int(c['pull_request_url'].rsplit('/', 1)[1])
i = bisect.bisect_right(c_times, c['created_at'])
c_times.insert(i, c['created_at'])
comments.insert(i,
{
'id': c['id'],
'repo': repo,
'pull': ref,
'msg': c['body'],
'url': c['html_url'],
'author': author,
'week': saved_comments.get(c['id'], [current_week])[0],
'value': saved_comments[c['id']][1] if c['id'] in saved_comments else None
})
ccount += 1
print('found {} merged pull-requests, {} issues and {} comments'.format(pcount, icount, ccount))
ptotal += pcount
itotal += icount
ctotal += ccount
with open(path + 'data/pulls.json', 'w') as f:
json.dump(pr, f, indent=4, sort_keys=True, ensure_ascii=False)
with open(path + 'data/issues.json', 'w') as f:
json.dump(gi, f, indent=4, sort_keys=True, ensure_ascii=False)
with open(path + 'data/comments.json', 'w') as f:
json.dump(comments, f, indent=4, sort_keys=True, ensure_ascii=False)
print("Total: {} merged pull-requests, {} issues and {} comments [{} contributions]".format(
ptotal, itotal, ctotal, ptotal + itotal + ctotal))
print("Remaining request count: {}".format(request_count))
repo_key = dict((r[1],i) for i,r in enumerate(repos))
def sort_repos(x):
'''Sort the repos in a blacklist with (repo,ref) structure in the order
as they appear in the repos list.
If repo is not in the list then put them afterwards
'''
if x[0] in repo_key:
return (repo_key[x[0]],x[1])
else:
return (len(repos),) + x

How to create json file having array in Python

I want to create a json file like
{
"a":["12","34","23",...],
"b":["13","14","45",....],
.
.
.
}
key should come from the list:
lis = ['a','b',...]
and value from the sql query "select id from" + i , where I am iterating through the list through "i". This query simply returns the column id.
Here is the sample code:
lis = ['a','b','c']
len_obj = len(lis)
with open("Dataset.json", 'w') as file:
for i in lis:
file.write(i)
obj_query = i + '_query'
obj_query = sf.query("select id from " + i)
jsondata = json.loads(json.dumps(obj_query['records']))
length = len(jsondata)
i = {}
k = 0
for j in range(length):
obj_id = jsondata[j]['Id']
# print("id " + obj_id)
if k == 0:
ids = "\"" + obj_id + "\""
k = 1
else:
ids = ids + ",\"" + obj_id + "\""
if count != len_obj - 1:
file.write(ids)
else:
file.write(ids)
count += 1
file.write("}")
final output should be like:
{
"a":["12","23",...],
"b":["234","456",...],
}
This is my first blog and 1st program also.
Please guide me through this.
Please forgive the indentation of the program as I am not able to write it here properly.
You should be able to condense the whole thing down to just this:
import json
tables = ["a", "b", "c", "d"]
data = {}
for t in tables:
results = sf.query("select id from %s" % t)["records"]
data[t] = [r["id"] for r in results]
with open("Dataset.json", "w") as f:
json.dump(data, f)
You can simply create a dictionary containing the values you are after and then convert it to json using json.dumps
import json
data = {}
data['a'] = ["12","34","23"]
data['b'] = ["13","14","45"]
json_data = json.dumps(data)
print json_data
#Jaco
lis = ['a','b','c']
with open("Dataset.json", 'w') as file:
for i in lis:
obj_query = i + '_query'
obj_query = sf.query("select id from " + i)
jsondata = json.loads(json.dumps(obj_query['records']))
length = len(jsondata)
# create dict
data1 = {}
k = 0
for j in range(length):
obj_id = jsondata[j]['Id']
# print("id " + obj_id)
if k == 0:
ids = obj_id
k = 1
else:
ids = ids + "," + obj_id
data1[i] = [ids]
json_data = json.dumps(data1)
file.write(json_data)
the response i got is
{"a":["12,23,34.."]}{"b":["23,45,..."]}{...}

Categories