How to convert string to valid json or yaml - python

I have a large script that parses js with a dataframe entry, but to shorten the question, I put what I need in a separate variable.
My variable contains the following value
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
I apply the following script and get data like this
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
def parse_json(value):
arr = value.split("},")
arr = [x+"}" for x in arr]
arr[-1] = arr[-1][:-1]
return json.dumps({str(i):add_quotation_marks(x) for i, x in enumerate(arr)})
def add_quotation_marks(value):
words = re.findall(r'(\w+:)', value)
for word in words:
value = value.replace(word[:-1], f'"{word[:-1]}"')
return json.loads(value)
print(parse_json(value))
{"0": {"from": [3, 4], "to": [7, 4], "color": 2}, "1": {"from": [3, 6], "to": [10, 6], "color": 3}}
The script executes correctly, but I need to get a slightly different result.
This is what the result I want to get looks like:
{
"0": {
"from": {
"0": "3",
"1": "4"
},
"to": {
"0": "7",
"1": "4"
},
"color": "2"
},
"1": {
"from": {
"0": "3",
"1": "6"
},
"to": {
"0": "10",
"1": "6"
},
"color": "3"
}
}
This is valid json and valid yaml. Please tell me how can I do this

I'd suggest a regex approach in this case:
res = []
# iterates over each "{from:...,to:...,color:...}" group separately
for obj in re.findall(r'\{([^}]+)}', value):
item = {}
# iterates over each "...:..." key-value separately
for k, v in re.findall(r'(\w+):(\[[^]]+]|\d+)', obj):
if v.startswith('['):
v = v.strip('[]').split(',')
item[k] = v
res.append(item)
This produces this output in res:
[{'from': ['3', '4'], 'to': ['7', '4'], 'color': '2'}, {'from': ['3', '6'], 'to': ['10', '6'], 'color': '3'}]
Since your values can contain commas, trying to split on commas or other markers is fairly tricky, and using these regexes to match your desired values instead is more stable.

Here's the code that converts the the value to your desired output.
import json5 # pip install json5
value = "{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3}"
def convert(str_value):
str_value = f"[{str_value}]" # added [] to make it a valid json
parsed_value = json5.loads(str_value) # convert to python object
output = {} # create empty dict
# Loop through the list of dicts. For each item, create a new dict
# with the index as the key and the value as the value. If the value
# is a list, convert it to a dict with the index as the key and the
# value as the value. If the value is not a list, just add it to the dict.
for i, d in enumerate(parsed_value):
output[i] = {}
for k, v in d.items():
output[i][k] = {j: v[j] for j in range(len(v))} if isinstance(v, list) else v
return output
print(json5.dumps(convert(value)))
Output
{
"0": {
"from": {
"1": 4
},
"to": {
"0": 7,
"1": 4
},
"color": 2
},
"1": {
"from": {
"0": 3,
"1": 6
},
"to": {
"0": 10,
"1": 6
},
"color": 3
}
}
json5 package allows you to convert a javascrip object to a python dictionary so you dont have to do split("},{").
Then added [ and ] to make the string a valid json.
Then load the string using json5.loads()
Now you can loop through the dictionary and convert it to desired output format.

Related

JSON array sort by value - Python

I need to sort and create a new array based on the value of the JSON. I need to filter repositories under each team and store repositories into a different array.
Input array:
{
"repo_list": [
{
"repo_name": "MaticCorporation/Sample-Repo-1",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-2",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-3",
"team_name": "-",
"tlt_member": "Sample-TLT-Member-2",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-4",
"team_name": "RETIX",
"tlt_member": "-",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-5",
"team_name": "-",
"tlt_member": "-",
"matix.properties": "No"
}
]
}
Output:
{
"repo_by_team": [
{
"team": "AFIN",
"repo_count": 2,
"repo_list": [
"MaticCorporation/Sample-Repo-1",
"MaticCorporation/Sample-Repo-2"
]
},
{
"team": "RETIX",
"repo_count": 1,
"repo_list": [
"MaticCorporation/Sample-Repo-4"
]
}
]
}
I've implemented the solution to filter and store all team names into an array, but I'm having difficulty how to get the result like output array.
Here is my code for extracting team names:
def get_team_names(repo_list):
repos=valid_repos(repo_list)
team_name=[item.get('team') for item in repos]
return team_name
You can use a dict[str, list[str]] to map between a team and its repositories, and you can use the json module to transform data between Python dictionaries and a JSON representation.
import json
with open('input.json') as input_file, open('output.json', 'w') as output_file:
repo_data = json.load(input_file)['repo_list']
team_repos = {}
for repo in repo_data:
if repo['team_name'] != '-':
if repo['team_name'] not in team_repos:
team_repos[repo['team_name']] = []
team_repos[repo['team_name']].append(repo['repo_name'])
result = []
for team, repo_list in team_repos.items():
result.append({
"team": team,
"repo_count": len(repo_list),
"repo_list": repo_list
})
json.dump({'repo_by_team': result}, output_file, indent=4)
The following is functional. The function may perform slowly on large input, but it uses no more than the necessary amount of space. It does, however, accept and return a Python dictionary. To convert to and from a dictionary use the Python json module.
def sort_by_team(repo_list: dict) -> dict:
ans = {"repo_by_team": []}
for repo in repo_list:
if repo["team_name"] != "-" and repo["team_name"] not in [r["team"] for r in ans["repo_by_team"]]:
ans["repo_by_team"].append({"team": repo["team_name"], "repo_count": 1, "repo_list": [repo["repo_name"]]})
else:
for r in ans["repo_by_team"]:
if r["team"] != repo["team_name"]:
continue
r["repo_count"] += 1
r["repo_list"].append(repo["repo_name"])
break
return ans

Statistics on a list of dictionaries considering multiples keys

I have a list of dicts:
input = [{'name':'A', 'Status':'Passed','id':'x1'},
{'name':'A', 'Status':'Passed','id':'x2'},
{'name':'A','Status':'Failed','id':'x3'},
{'name':'B', 'Status':'Passed','id':'x4'},
{'name':'B', 'Status':'Passed','id':'x5'}]
I want an output like :
output = [{'name':'A', 'Passed':'2', 'Failed':'1', 'Total':'3', '%Pass':'66%'},
{'name':'B', 'Passed':'2', 'Failed':'0', 'Total':'2', '%Pass':'100%'},
{'name':'Total', 'Passed':'4', 'Failed':'1', 'Total':'5', '%Pass':'80%'}]\
i started retrieving the different names by using a lookup :
lookup = {(d["name"]): d for d in input [::-1]}
names= [e for e in lookup.values()]
names= names[::-1]
and after using the list comprehension something like :\
for name in names :
name_passed = sum(["Passed" and "name" for d in input if 'Status' in d and name in d])
name_faled = sum(["Failed" and "name" for d in input if 'Status' in d and name in d])\
But i am not sure if there is a smartest way ? a simple loop and comparing dict values will be more simple!?
Assuming your input entries will always be grouped according to the "name" key-value pair:
entries = [
{"name": "A", "Status": "Passed", "id": "x1"},
{"name": "A", "Status": "Passed", "id": "x2"},
{"name": "A", "Status": "Failed", "id": "x3"},
{"name": "B", "Status": "Passed", "id": "x4"},
{"name": "B", "Status": "Passed", "id": "x5"}
]
def to_grouped(entries):
from itertools import groupby
from operator import itemgetter
for key, group_iter in groupby(entries, key=itemgetter("name")):
group = list(group_iter)
total = len(group)
passed = sum(1 for entry in group if entry["Status"] == "Passed")
failed = total - passed
perc_pass = (100 // total) * passed
yield {
"name": key,
"Passed": str(passed),
"Failed": str(failed),
"Total": str(total),
"%Pass": f"{perc_pass:.0f}%"
}
print(list(to_grouped(entries)))
Output:
[{'name': 'A', 'Passed': '2', 'Failed': '1', 'Total': '3', '%Pass': '66%'}, {'name': 'B', 'Passed': '2', 'Failed': '0', 'Total': '2', '%Pass': '100%'}]
This will not create the final entry you're looking for, which sums the statistics of all other entries. Though, that shouldn't be too hard to do.

How to combine a dict to a json file as an object with same index in Python?

The question may be confusing I know however, I don't know how to ask this properly.
Let me explain the issue. I have a json file like this:
{
"0": "MyItem",
"1": "AnotherItem"
}
Then I am generating a dictionary with the same context above. Like this.
{
"UniqueId": "52355",
"AnotherUniqueId": "234235"
}
They have same length. What I want to do is I want to parse this dictionary to this json file at the same index as an object like:
{
{"0": "MyItem", "UniqueId": "52355"}
{"1": "AnotherItem", "AnotherUniqueId": "234235"}
}
How to achieve this ?
it takes item of each dict and combines them with { **dict1, **dict2 }
then stores each dict as key-value pairs of final dicts.
n = {
"0": "MyItem",
"1": "AnotherItem"
}
m = {
"UniqueId": "52355",
"AnotherUniqueId": "234235"
}
c = {}
for i, keys in enumerate(zip(n, m)):
a, b = keys
c[i] = { **{a:n[a]} , **{b:m[b]} }
print(c)
output :
{
0: {'0': 'MyItem', 'UniqueId': '52355'},
1: {'1': 'AnotherItem', 'AnotherUniqueId': '234235'}
}
Your dictionaries in the final dictionary need to be accompanied by some sort of key since a dictionary is a key-value pair, it wouldn't make sense to not have a key for a value. The output you should go after is this for example
{
0: {"0": "MyItem", "UniqueId": "52355"},
1: {"1": "AnotherItem", "AnotherUniqueId": "234235"}
}
Here's my solution
b = {
"UniqueId": "52355",
"AnotherUniqueId": "234235"
}
a = {
"0": "MyItem",
"1": "AnotherItem"
}
# Assuming a and b are of the same length
c = {} # will contain the final dictionaries
index = 0
for i,j in zip(a,b):
temp = {}
temp[i]=a[i]
temp[j]=b[j]
c[index] = temp
index+=1
print(c)

compare two list of dicts in python 2

I want to compare two lists of dictionaries in python, I have a list sent from the frontend and a query result stored in the same function, so all I want is to compare both of the lists with the key barcode and if they matches I want to append the name from the second dictionary to the first one
for example:
data_from_frontend = [
{ barcode: '1', name_en: 'milk' },
{ barcode: '2', name_en: 'water' },
{ barcode: '3', name_en: 'cheese' },
{ barcode: '10', name_en: 'pepsi' },
]
result_from_query = [
{ barcode: '1', name: 'PID012343' },
{ barcode: '2', name: 'PID123454' },
{ barcode: '10', name: 'PID123432' },
]
I want to compare both of the lists and by the barcode and if they match I want to merge the the pair of both variables to a new one + adding the one the doesn't match to another list, so the outcome would be two new variables with the [matched + name] and not_found, how can I achieve that ?
Here is what i've tried
equal = []
not_equal = []
no_barcode = []
x = [ { "age": "22" }, { "name": "John Doe" }, { "name": "Jane Doe" }, { "name": "Doctor" }, { "name": "Engineer" } ]
y = [ { "name": "Engineer" }, { "name": "Jane Doe" }, { "name": "Doctor" } ]
x_sort = sorted(x, key=lambda k: ("name" not in k, k.get("name", None)))
y_sort = sorted(y, key=lambda k: ("name" not in k, k.get("name", None)))
print(y_sort)
for x_val in x_sort:
if "name" not in x_val.keys():
no_barcode.append(x_val)
else:
for y_val in y_sort:
if x_val["name"] == y_val["name"]:
equal.append(x_val)
mapped = map(lambda k: k["name"], y_sort)
if x_val["name"] not in mapped:
not_equal.append(x_val)
print('equal')
print(equal)
print('not equal')
print(not_equal)
First of all you should fix your dict keys and enclose them into quotes.
Then you can use generator expression to find items, for example:
print('initial dict:')
pprint.pprint(data_from_frontend)
for item in result_from_query:
item_found = next((i for i in data_from_frontend if i['barcode'] == item['barcode']), False)
if item_found:
item_found['name'] = item['name']
print('dict after search:')
pprint.pprint(data_from_frontend)
will produce:
initial dict:
[{'barcode': '1', 'name_en': 'milk'},
{'barcode': '2', 'name_en': 'water'},
{'barcode': '3', 'name_en': 'cheese'},
{'barcode': '10', 'name_en': 'pepsi'}]
dict after search:
[{'barcode': '1', 'name': 'PID012343', 'name_en': 'milk'},
{'barcode': '2', 'name': 'PID123454', 'name_en': 'water'},
{'barcode': '3', 'name_en': 'cheese'},
{'barcode': '10', 'name': 'PID123432', 'name_en': 'pepsi'}]
Using False in generator will avoid error when searching by barcode value not existing in target dict.
P.S. dont forget to import pprint if you want to use it
P.P.S. and sure you can create new dict instead of modifying existing one, using same logic
Your barcode matching results can be get like this.
barcode = 'barcode'
name_en = 'name_en'
name = 'name'
matching_result = data_from_frontend[:] #get a copy of front end data to use as the output
for i in range(len(data_from_frontend)):
for j in range(len(result_from_query)):
if(data_from_frontend[i][barcode] == result_from_query[j][barcode]):
matching_result[i][name] = result_from_query[j][name]
break
print(matching_result)

Python - replace all keys in nested dict according to list

I have a JSON file where all the keys are numbers.
These numbers are the index of the correct key in a list of keys I have.
Example JSON file (excuse any odd formatting, this is a simplified version of the actual file):
{
"0": {
"1": [{
"2": 0,
"3": {
"4": "string"
},
"4": {
"5": 2,
"6": 1
}
}]
}
}
Example list:
[a, b, c, d, e, f, g]
Is there a way to then replace the keys with their counterparts from the list? (as in, the key '0' would become 'a', '1' would become 'b' and so on.)
I can get it working for just the parent keys, but not any of the nested keys.
Thanks for any help.
You can write a recursive function that transforms its argument, and then its nested values.
Use isinstance to handle dict and list arguments appropriately. Other values are considered "leaf nodes" and are returned unmodified.
import pprint
obj = {
"0": {
"1": [{
"2": 0,
"3": {
"4": "string"
},
"4": {
"5": 2,
"6": 1
}
}]
}
}
keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
def transform(obj):
if isinstance(obj, list):
return [transform(element) for element in obj]
elif isinstance(obj, dict):
return {keys[int(key)]: transform(value) for key, value in obj.items()}
else:
return obj
print('Before:')
pprint.pprint(obj)
other = transform(obj)
print('After:')
pprint.pprint(other)

Categories