I have been working on a project that involves parsing a CSV file in order to turn all the data into a very specifically formatted JSON following a complex schema. I have to custom make this program as the required complexity of the JSON makes existing converters fail. I am mostly there, I have run into one final roadblock though:
I have nested dictionaries, and occasionally there must be a list within those, this list will contain further dictionaries. This is fine, I have been able to complete that, BUT now I need to find a way to add more nested dictionaries within those. Below is a simplified breakdown of the concept.
the CSV will look something like this, where the # before a tag indicates it's a list
x.a, x.b.z, x.b.y, x.#c.z.nest1, x.#c.z.nest2, x.#c.yy, x.d, x.e.z, x.e.y
ab, cd, ef, gh, ij, kl, mn, op, qr
this should result in the following JSON
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"z": {
"nest1": "gh",
"nest2": "ij"
}
},
{
"yy": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
}
This is one issue that I haven't been able to solve, my current code can only do one dictionary after the list item, not further. I also need to be able to somehow do the following within a list of dictionaries:
"c": [
{
"z": {
"nest1": "gh"
},
"zz": {
"nest2": "ij"
}
},
{
"yy": "kl"
}
i.e. somehow add multiple nested dictionaries within the dictionary in the list. The problem with this occurs within the fact that these aren't reference-able by name, so I do not know how I could potentially indicate to do that within the CSV format.
Here is the code I have that works up to the first dictionary nested within a list:
import json
import pandas as pd
from os.path import exists
# df1 = pd.read_csv("excelTestFacilities.csv", header = 1, sep=",", keep_default_na=False, engine="python")
# df2 = pd.read_csv("excelTestFacilityContacts.csv", header = 1, sep=",", keep_default_na=False, engine="python")
# df = pd.merge(df1, df2, how = 'inner')
df = pd.read_csv("csvTestFile.csv", header = 1, sep=", ", keep_default_na=False, engine="python")
#print(df) # uncomment to see the transformation
json_data = df.to_dict(orient="records")
#print(json_data)
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
for parent in parents:
if parent[0] == '#':
list_bool = True
if list_bool:
for parent in parents:
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
else:
node[parent] = node = node.get(parent, {})
node.append({key : v})
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
return ini
def merge_lists(dic):
"""
Removes duplicates within sets
"""
for k,v in list(dic.items()):
if isinstance(v, dict):
keys = list(v.keys())
vals = list(v.values())
if all(isinstance(l, list) and len(l)==len(vals[0]) for l in vals):
dic[k] = []
val_tuple = set(zip(*vals)) # removing duplicates with set()
for t in val_tuple:
dic[k].append({subkey: t[i] for i, subkey in enumerate(keys)})
else:
merge_lists(v)
elif isinstance(v, list):
dic[k] = list(set(v)) # removing list duplicates
def clean_blanks(value):
"""
Recursively remove all None values from dictionaries and lists, and returns
the result as a new dictionary or list.
"""
if isinstance(value, list):
return [clean_blanks(x) for x in value if x != ""]
elif isinstance(value, dict):
return {
key: clean_blanks(val)
for key, val in value.items()
if val != "" and val != {}
}
else:
return value
def add_to_dict(section_added_to, section_to_add, value, reportNum):
"""
Adds a value to a given spot within a dictionary set.
section_added_to is optional for adding the set to a deeper section such as facility
section_to_add is the name that the new dictionary entry will have
value is the item to be added
reportNum is the number indicating which report to add to, starting at 0
"""
if section_added_to != '':
end_list[reportNum][section_added_to][section_to_add] = value
else:
end_list[reportNum][section_to_add] = value
def read_add_vals(filename_prefix, added_to, section):
for i in range(len(end_list)):
temp_list = []
filename = filename_prefix + str(i+1) + ".csv"
if not exists(filename):
continue;
temp_df = pd.read_csv(filename, header = 1, sep=",", keep_default_na=False, engine="python")
temp_json = temp_df.to_dict(orient="records")
for y in temp_json:
return_ini = unflatten_dic(y)
temp_list.append(return_ini)
add_to_dict(added_to, section, temp_list, i)
global end_list
end_list = []
for x in json_data:
return_ini = unflatten_dic(x)
end_list.append(return_ini)
#read_add_vals('excelTestPermitsFac', 'facility', 'permits');
json_data = clean_blanks(end_list)
final_json = {"year":2021, "version":"2022-02-14", "reports":json_data}
print(json.dumps(final_json, indent=4))
There is some parts of this code that are involved in other components of the overall end JSON, but I am mainly concerned with how to change unflatten_dic()
Here is my current working code for changing unflatten_dic(), even though it doesn't work...
def list_get(list, list_item):
i = 0
for dict in list:
if list_item in dict:
return dict.get(list_item, {})
i += 1
return {}
def check_in_list(list, list_item):
i = 0
for dict in list:
if list_item in dict:
return i
i += 1
return -1
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
for parent in parents:
if parent[0] == '#':
list_bool = True
previous_node_list = False
if list_bool:
for parent in parents:
print(parent)
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
ends_with_dict = False
previous_node_list = True
else:
print("else")
if previous_node_list:
print("prev list")
i = check_in_list(node, parent)
if i >= 0:
node[i] = node = list_get(node, parent)
else:
node.append({parent : {}})
previous_node_list = False
ends_with_dict = True
else:
print("not prev list")
node[parent] = node = node.get(parent, {})
previous_node_list = False
if ends_with_dict:
node[key] = v
else:
node.append({key : v})
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
#print(node)
return ini
Any, even small, amount of help would be greatly appreciated.
It is easiest to use recursion and collections.defaultdict to group child entries on their parents (each entry is separated by the . in the csv data):
from collections import defaultdict
def to_dict(vals, is_list = 0):
def form_child(a, b):
return b[0][0] if len(b[0]) == 1 else to_dict(b, a[0] == '#')
d = defaultdict(list)
for a, *b in vals:
d[a].append(b)
if not is_list:
return {a[a[0] == '#':]:form_child(a, b) for a, b in d.items()}
return [{a[a[0] == '#':]:form_child(a, b)} for a, b in d.items()]
import csv, json
with open('filename.csv') as f:
data = list(csv.reader(f))
r = [a.split('.')+[b] for i in range(0, len(data), 2) for a, b in zip(data[i], data[i+1])]
print(json.dumps(to_dict(r), indent=4))
Output:
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"z": {
"nest1": "gh",
"nest2": "ij"
}
},
{
"yy": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
}
I managed to get it working in what seems to be all scenarios. Here is the code that I made for the unflatten_dic() function.
def unflatten_dic(dic):
"""
Unflattens a CSV list into a set of nested dictionaries
"""
ini = {}
for k,v in list(dic.items()):
node = ini
list_bool = False
*parents, key = k.split('.')
# print("parents")
# print(parents)
for parent in parents:
if parent[0] == '#':
list_bool = True
if list_bool:
for parent in parents:
if parent[0] == '#':
node[parent[1:]] = node = node.get(parent[1:], [])
elif parent.isnumeric():
# print("numeric parent")
# print("length of node")
# print(len(node))
if len(node) > int(parent):
# print("node length good")
node = node[int(parent)]
else:
node.append({})
node = node[int(parent)]
else:
node[parent] = node = node.get(parent, {})
try:
node.append({key : v})
except AttributeError:
node[key] = v
else:
for parent in parents:
node[parent] = node = node.get(parent, {})
node[key] = v
return ini
I haven't run into an issue thus far, this is based on the following rules for the CSV:
# before any name results in that item being a list
if the section immediately after a list in the CSV is a number, that will create multiple dictionaries within the list. Here is an example
x.a, x.b.z, x.b.y, x.#c.0.zz, x.#c.1.zz, x.#c.2.zz, x.d, x.e.z, x.e.y, x.#c.1.yy.l, x.#c.1.yy.#m.q, x.#c.1.yy.#m.r
ab, cd, ef, gh, , kl, mn, op, qr, st, uv, wx
12, 34, 56, 78, 90, 09, , 65, 43, 21, , 92
This will result in the following JSON after formatting
"reports": [
{
"x": {
"a": "ab",
"b": {
"z": "cd",
"y": "ef"
},
"c": [
{
"zz": "gh"
},
{
"yy": {
"l": "st",
"m": [
{
"q": "uv"
},
{
"r": "wx"
}
]
}
},
{
"zz": "kl"
}
],
"d": "mn",
"e": {
"z": "op",
"y": "qr"
}
}
},
{
"x": {
"a": "12",
"b": {
"z": "34",
"y": "56"
},
"c": [
{
"zz": "78"
},
{
"zz": "90",
"yy": {
"l": "21",
"m": [
{
"r": "92"
}
]
}
},
{
"zz": "09"
}
],
"e": {
"z": "65",
"y": "43"
}
}
}
]
Related
I'm writing some Python code (data extraction from a ConLL-U format file) and I want my data to be stored in a .json file. I'd like to achieve an output's format like the following (x are keys, y are values):
{
"lemma": {"x": "y","x": [{"x":"y"}], "x": "y", "x": [{"x":"y"}], "x": "" },
"lemma1":{"x": "y", "x": [{"x":"y"}], "x": "y", "x": [{"x":"y"}], "x": "y" }...
}
Last section of my code (it's probably quite inefficient, but now I'm just intersted in formatting the json output):
token_info= {}
...
sentences = []
tokens = []
idn_dep_dict = {}
for line in lines:
if line == '\n':
sentences.append(tokens)
tokens = []
else:
fields = line.strip().split('\t')
if len(fields) >= 1:
if fields[0].isdigit():
idn = fields[0]
lemma = fields[1]
upos = fields[3]
xpos = fields[4]
feats = fields[5]
dep = fields[6]
pos_pair = (upos,xpos)
tokens.append((idn, lemma, pos_pair,feats,dep))
idn_dep_dict[idn]=[dep]
else:
continue
for sentence in sentences:
dependencies_dict = {} #dictionary for the dependencies of the current sentence
for token in sentence:
idn, lemma, pos_pair, feats, dep = token
if dep == '0':
dependencies_dict[idn] = 'root'
if dep in idn_dep_dict:
for head_token in sentence:
if head_token[0] == dep:
dependencies_dict[idn] = head_token[2]
# Create a dictionary for the current token's information
current_token = {'x1': [upos], 'x2': [{'0': pos_pair}],'x3': [{'0': dependencies_dict[idn]}],'x4': feats}
token_info[lemma] = current_token
# Write the JSON data to a file
with open('token_info.json', 'w', encoding='utf-8') as f:
json.dump(token_info, f, ensure_ascii=False, indent = 2, separators=(',', ': '))
The current code generates a newline after each [,] or {,} or comma in the json file. I'd like to have each lemma = {corrisponding dictionary} on each line. Is it possible? Thank you all in advance
Serialize one level of the dictionary structure manually like this.
import json
token_info = json.loads('''
{
"lemma": {"x": "y","x2": [{"x":"y"}], "x3": "y", "x4": [{"x":"y"}], "x5": "" },
"lemma1":{"x": "y", "x2": [{"x":"y"}], "x3": "y", "x4": [{"x":"y"}], "x5": "y" }
}
''')
lines = []
for k, v in token_info.items():
ks = json.dumps(k, ensure_ascii=False)
vs = json.dumps(v, ensure_ascii=False, separators=(',', ': '))
lines.append(ks + ': ' + vs)
src = '{\n ' + (',\n '.join(lines)) + '\n}'
print(src)
This will output the following.
{
"lemma": {"x": "y","x2": [{"x": "y"}],"x3": "y","x4": [{"x": "y"}],"x5": ""},
"lemma1": {"x": "y","x2": [{"x": "y"}],"x3": "y","x4": [{"x": "y"}],"x5": "y"}
}
Having the following dict, where some of the values can be list of dictionaries:
{
"A": [
{
"B": {
"C": "D",
"X": "CHNAGE ME"
}
},
{
"E": "F"
}
],
"G": {
"Y": "CHANGE ME"
}
}
I would like to recursively iterate over the items and change the pairs of key values where the value is "CHANGE ME", so the result would be:
{
"A": [
{
"B": {
"C": "D",
"X.CHANGED": "CHANGED"
}
},
{
"E": "F"
}
],
"G": {
"Y.CHANGED": "CHANGED"
}
}
Solutions I've found were not handling a case where the value is a list, for example:
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
How can I achieve my goal?
Using recursion
Ex:
def update(data):
for k, v in data.copy().items():
if isinstance(v, dict): # For DICT
data[k] = update(v)
elif isinstance(v, list): # For LIST
data[k] = [update(i) for i in v]
elif v == 'CHANGE ME': # Update Key-Value
# data.pop(k)
# OR
del data[k]
data[f"{k}.CHANGED"] = 'CHANGED'
return data
print(update(data))
Output:
{
'A':[{'B': {'C': 'D', 'X.CHANGED': 'CHANGED'}}, {'E': 'F'}],
'G':{'Y.CHANGED': 'CHANGED'}
}
Note: I have not tested all corner cases
Background
For some background, I'm trying to create a tool that converts worksheets into API calls using Python 3.5
For the conversion of the table cells to the schema needed for the API call, I've started down the path of using javascript like syntax for the headers used in the spreadsheet. e.g:
Worksheet Header (string)
dict.list[0].id
Python Dictionary
{
"dict":
"list": [
{"id": "my cell value"}
]
}
It's also possible that the header schema could have nested arrays/dicts:
one.two[0].three[0].four.five[0].six
And I also need to append to the object after it has been created as I go through each header.
What I've tried
add_branch
Based on https://stackoverflow.com/a/47276490/2903486 I am able to get nested dictionaries setup using values like one.two.three.four and I'm able to append to the existing dictionary as I go through the rows but I've been unable to add in support for arrays:
def add_branch(tree, vector, value):
key = vector[0]
tree[key] = value \
if len(vector) == 1 \
else add_branch(tree[key] if key in tree else {},
vector[1:],
value)
return tree
file = Worksheet(filePath, sheet).readRow()
rowList = []
for row in file:
rowObj = {}
for colName, rowValue in row.items():
rowObj.update(add_branch(rowObj, colName.split("."), rowValue))
rowList.append(rowObj)
return rowList
My own version of add_branch
import re, json
def branch(tree, vector, value):
"""
Used to convert JS style notation (e.g dict.another.array[0].id) to a python object
Originally based on https://stackoverflow.com/a/47276490/2903486
"""
# Convert Boolean
if isinstance(value, str):
value = value.strip()
if value.lower() in ['true', 'false']:
value = True if value.lower() == "true" else False
# Convert JSON
try:
value = json.loads(value)
except:
pass
key = vector[0]
arr = re.search('\[([0-9]+)\]', key)
if arr:
arr = arr.group(0)
key = key.replace(arr, '')
arr = arr.replace('[', '').replace(']', '')
newArray = False
if key not in tree:
tree[key] = []
tree[key].append(value \
if len(vector) == 1 \
else branch({} if key in tree else {},
vector[1:],
value))
else:
isInArray = False
for x in tree[key]:
if x.get(vector[1:][0], False):
isInArray = x[vector[1:][0]]
if isInArray:
tree[key].append(value \
if len(vector) == 1 \
else branch({} if key in tree else {},
vector[1:],
value))
else:
tree[key].append(value \
if len(vector) == 1 \
else branch({} if key in tree else {},
vector[1:],
value))
if len(vector) == 1 and len(tree[key]) == 1:
tree[key] = value.split(",")
else:
tree[key] = value \
if len(vector) == 1 \
else branch(tree[key] if key in tree else {},
vector[1:],
value)
return tree
What still needs help
My branch solution works pretty well actually now after adding in some things but I'm wondering if I'm doing something wrong/messy here or if theres a better way to handle where I'm editing nested arrays (my attempt started in the if IsInArray section of the code)
I'd expect these two headers to edit the last array, but instead I end up creating a duplicate dictionary on the first array:
file = [{
"one.array[0].dict.arrOne[0]": "1,2,3",
"one.array[0].dict.arrTwo[0]": "4,5,6"
}]
rowList = []
for row in file:
rowObj = {}
for colName, rowValue in row.items():
rowObj.update(add_branch(rowObj, colName.split("."), rowValue))
rowList.append(rowObj)
return rowList
Outputs:
[
{
"one": {
"array": [
{
"dict": {
"arrOne": [
"1",
"2",
"3"
]
}
},
{
"dict": {
"arrTwo": [
"4",
"5",
"6"
]
}
}
]
}
}
]
Instead of:
[
{
"one": {
"array": [
{
"dict": {
"arrOne": [
"1",
"2",
"3"
],
"arrTwo": [
"4",
"5",
"6"
]
}
}
]
}
}
]
So I'm not sure if there are any caveats in this solution, but this appears to work for some of the use cases i'm throwing at it:
import json, re
def build_job():
def branch(tree, vector, value):
# Originally based on https://stackoverflow.com/a/47276490/2903486
# Convert Boolean
if isinstance(value, str):
value = value.strip()
if value.lower() in ['true', 'false']:
value = True if value.lower() == "true" else False
# Convert JSON
try:
value = json.loads(value)
except:
pass
key = vector[0]
arr = re.search('\[([0-9]+)\]', key)
if arr:
# Get the index of the array, and remove it from the key name
arr = arr.group(0)
key = key.replace(arr,'')
arr = int(arr.replace('[','').replace(']',''))
if key not in tree:
# If we dont have an array already, turn the dict from the previous
# recursion into an array and append to it
tree[key] = []
tree[key].append(value \
if len(vector) == 1 \
else branch({} if key in tree else {},
vector[1:],
value))
else:
# Check to see if we are inside of an existing array here
isInArray = False
for i in range(len(tree[key])):
if tree[key][i].get(vector[1:][0], False):
isInArray = tree[key][i][vector[1:][0]]
if isInArray and arr < len(tree[key]) \
and isinstance(tree[key][arr], list):
# Respond accordingly by appending or updating the value
tree[key][arr].append(value \
if len(vector) == 1 \
else branch(tree[key] if key in tree else {},
vector[1:],
value))
else:
# Make sure we have an index to attach the requested array to
while arr >= len(tree[key]):
tree[key].append({})
# update the existing array with a dict
tree[key][arr].update(value \
if len(vector) == 1 \
else branch(tree[key][arr] if key in tree else {},
vector[1:],
value))
# Turn comma deliminated values to lists
if len(vector) == 1 and len(tree[key]) == 1:
tree[key] = value.split(",")
else:
# Add dictionaries together
tree.update({key: value \
if len(vector) == 1 \
else branch(tree[key] if key in tree else {},
vector[1:],
value)})
return tree
file = [{
"one.array[0].dict.dont-worry-about-me": "some value",
"one.array[0].dict.arrOne[0]": "1,2,3",
"one.array[0].dict.arrTwo[1]": "4,5,6",
"one.array[1].x.y[0].z[0].id": "789"
}]
rowList = []
for row in file:
rowObj = {}
for colName, rowValue in row.items():
rowObj.update(branch(rowObj, colName.split("."), rowValue))
rowList.append(rowObj)
return rowList
print(json.dumps(build_job(), indent=4))
Result:
[
{
"one": {
"array": [
{
"dict": {
"dont-worry-about-me": "some value",
"arrOne": [
"1",
"2",
"3"
],
"arrTwo": [
"4",
"5",
"6"
]
}
},
{
"x": {
"y": [
{
"z": [
{
"id": 789
}
]
}
]
}
}
]
}
}
]
I have a list of lists containing key and value like so:
[
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3']
]
Well I can easily change the list to this
(Lists arent seperated by ':')
[
['mounts:device', '/dev/sda3'],
['mounts:fstype[0]', 'ext1'],
['mounts:fstype[1]', 'ext3']
]
Whatever suits better for this problem:
Problem is to create a dictionary:
{
'mounts': {
'device': '/dev/sda3',
'fstype': [
'ext1',
'ext3'
]
}
It should also be possible to have lists in lists for example:
['mounts:test:lala:fstype[0][0]', 'abc']
or
['mounts:test:lala:fstype:[0]:[0]', 'abc']
This is what I have so far:
def unflatten(pair_list):
root = {}
for pair in pair_list:
context = root
key_list = pair[0].split(':')
key_list_last_item = key_list.pop()
for key in key_list:
if key not in context:
context[key] = {}
context = context[key]
context[key_list_last_item] = pair[1]
return root
Based on this answer https://stackoverflow.com/a/18648007/5413035 but as requested I need recursivness and lists in the mix
Thanks in advance
Here is a solution using a tree of dict:
import collections
def tree():
return collections.defaultdict(tree)
def unflatten(pair_list):
root = tree()
for mount, path in pair_list:
parts = mount.split(":")
curr = root
for part in parts[:-1]:
index = int(part[1:-1]) if part[0] == "[" else part
curr = curr[index]
part = parts[-1]
index = int(part[1:-1]) if part[0] == "[" else part
curr[index] = path
return root
With the following input:
pair_list = [
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3'],
['mounts:test:lala:fstype:[0]:[0]', 'abc']
]
You'll get:
{
"mounts": {
"fstype": {
"0": "ext1",
"1": "ext3"
},
"test": {
"lala": {
"fstype": {
"0": {
"0": "abc"
}
}
}
},
"device": "/dev/sda3"
}
}
Then you can use the recursive function make_listbellow to turn the integer indexes in a list.
def make_list(root):
if isinstance(root, str):
return root
keys = list(root.keys())
if all(isinstance(k, int) for k in keys):
values = [None] * (max(keys) + 1)
for k in keys:
values[k] = make_list(root[k])
return values
else:
return {k: make_list(v) for k, v in root.items()}
Here is the result with the pair_list:
flat = unflatten(pair_list)
flat = make_list(flat)
You'll get:
{'mounts': {'device': '/dev/sda3',
'fstype': ['ext1', 'ext3'],
'test': {'lala': {'fstype': [['abc']]}}}}
Is it fine?
input1=[
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3']
]
input2={x[1]:x[0].split(':')[1] for x in input1}
input3=['ext3', 'ext1', '/dev/sda3']
input4=['fstype', 'fstype', 'device']
res={}
for x,y in zip(input3, input4):
res.setdefault(y,[]).append(x)
res1=res.keys()
res2=res.values()
res3=[x[0] for x in res2 if len(x)==1]+[x for x in res2 if len(x)>1]
result=dict(zip(res1,res3))
print result
Output :
{'device': '/dev/sda3', 'fstype': ['ext3', 'ext1']}
I try to generate JSON file from python dictionary data type.
Here is the segment of python code involved in this issue before I dump it to Json format :
channelSeg = {}
channelSeg["ch"] = None
channelSeg["chdata"] = []
for e in channelPkg:
print e
attr = e.split(':')
if attr[0] == "ch":
channel = attr[1].split(',')
channelSeg["ch"] = int(channel[0])
Heading
I am doing this to init dictionary index then later I could append more data in my for loop like this:
channelSeg["ch"] = None
channelSeg["chdata"] = []
but I really want to do is without assign them any data just
channelSeg["ch"]
channelSeg["chdata"]
but python doesn't like me to do that .
So after dump operation , I got repetitive Json data like this(part of it)
"datapkg": [
{
"dataseg": [
{
"ch": 0,
"chdata": [
{
"euler": {
"y": "-19.32",
"x": "93.84",
"z": "-134.14"
}
},
{
"areal": {
"y": "57",
"x": "-242",
"z": "-210"
}
}
]
},
{
"ch": 1,
"chdata": [
{
"areal": {
"y": "-63",
"x": "-30",
"z": "10"
}
}
]
},
{
"ch": null,
"chdata": []
}
],
"t": "174464",
"n": "9884"
},
I always have redundant :
{
"ch": null,
"chdata": []
}
Which make this JSON data package not healthy enough , is there anyway to remove this piece of redundant data?
Many thanks for any advices
===========v2==============
after I consider Edward's answer I found I could only solve it with channelSeg["ch"] = None but I don't know how to deal with another redundant list, it is because I didn't post enough code , so I past more complete code here , and still looking for solutions ..
My code after modify :
for elem in sensorPkg:
channelPkg = elem.split('&') # channelPkg contain each channel's reading
# each channel need a dictonary to store data
channelSeg = {}
# channelSeg["ch"] = None
channelSeg["chdata"] = []
for e in channelPkg:
attr = e.split(':')
if attr[0] == "ch":
new_channel = {
'ch': int((attr[1].split(','))[0])
#channelSeg["ch"] = int(channel[0])
}
channelSeg["chdata"].append(new_channel)
# store channel numbers
elif attr[0] == "euler":
# create euler package
numbers = attr[1].split(',')
eulerSeg = {}
d = {}
d["x"] = numbers[0]
d["y"] = numbers[1]
d["z"] = numbers[2]
eulerSeg["euler"] = d
# append to channel segement
channelSeg["chdata"].append(eulerSeg)
elif attr[0] == "areal": # real accelrometer readings
# create areal package
numbers = attr[1].split(',')
arealSeg = {}
d = {}
d["x"] = numbers[0]
d["y"] = numbers[1]
d["z"] = numbers[2]
arealSeg["areal"] = d
# append to channel segement
channelSeg["chdata"].append(arealSeg)
#and so on
and here is the outcome
{
"dataseg": [
{
"chdata": [
{
"ch": 0
},
{
"euler": {
"y": "6.51",
"x": "73.16",
"z": "-133.69"
}
},
{
"areal": {
"y": "516",
"x": "-330",
"z": "-7"
}
}
]
},
{
"chdata": [
{
"ch": 1
},
{
"euler": {
"y": "24.86",
"x": "4.30",
"z": "-71.39"
}
},
{
"areal": {
"y": "120",
"x": "316",
"z": "273"
}
}
]
},
{
"chdata": [
{
"ch": 2
},
{
"euler": {
"y": "62.32",
"x": "-60.34",
"z": "-120.82"
}
},
{
"areal": {
"y": "440",
"x": "-611",
"z": "816"
}
}
]
},
{
"chdata": []
}
],
"t": "14275",
"n": "794"
},
which
{
"chdata": []
}
Still there
In the data structure that you're working with, I notice that 'dataseg' is a list of channels. Now, you don't need to initialize each channel before adding it to dataseg. First initialize dataseg as an empty list, then, while iterating over your entries in channelPkg, you can create new channel dicts using the information read from channelPkg, and append them immediately:
dataseg = []
for e in channelPkg:
attr = e.split(':')
if attr[0] == "ch":
new_channel = {
'ch': int(attr[1].split(',')),
'data': []
}
dataseg.append(new_channel)
Hope that helps -- I'm not sure what the context of your question is exactly, so comment if this doesn't solve your problem.
Edit
I think that your problem is that the very last channelPkg is empty. So, for e in channelPkg: is equivalent to for e in [], and as a result, the last iteration of the outer loop appends just the initialized values (nothing inside for e in channelPkg executes).
Try adding two lines to test if the sensorPkg has a ch property (I'm assuming that all valid sensorPkgs have a ch property):
for elem in sensorPkg:
channelPkg = elem.split('&')
# Add this to prevent appending an empty channel
if 'ch' not in [e.split(':')[0] for e in channelPkg]:
break
channelSeg = {}
channelSeg["chdata"] = []
for e in channelPkg:
# ... etc
Try using a conditional dictionary comprehension:
channelSeg["chdata"] = {ch.split(',')[0] if ch for ch in e.split(':')}