how to add dictionary object name to json object - python

I have 3 python dictionaries as below:
gender = {'Female': 241, 'Male': 240}
marital_status = {'Divorced': 245, 'Engaged': 243, 'Married': 244, 'Partnered': 246, 'Single': 242}
family_type = {'Extended': 234, 'Joint': 235, 'Nuclear': 233, 'Single Parent': 236}
I add them to a list:
lst = [gender, marital_status, family_type]
And create a JSON object which I need to save as a JSON file using pd.to_json using:
jf = json.dumps(lst, indent = 4)
When we look at jf object:
print(jf)
[
{
"Female": 241,
"Male": 240
},
{
"Divorced": 245,
"Engaged": 243,
"Married": 244,
"Partnered": 246,
"Single": 242
},
{
"Extended": 234,
"Joint": 235,
"Nuclear": 233,
"Single Parent": 236
}
]
Is there a way to make the dictionary name as key and get output as below:
{
"gender": {
"Female": 241,
"Male": 240
},
"marital_status": {
"Divorced": 245,
"Engaged": 243,
"Married": 244,
"Partnered": 246,
"Single": 242
},
"family_type": {
"Extended": 234,
"Joint": 235,
"Nuclear": 233,
"Single Parent": 236
}
}

You'll have to do this manually by creating a dictionary and mapping the name to the sub_dictionary yourself.
my_data = {'gender': gender, 'marital_status':marital_status, 'family_type': family_type}
Edit: example of adding to an outfile using json.dump
with open('myfile.json','w') as wrtier:
json.dump(my_data, writer)

As per your requirement you can done it like this by replacing line lst
dict_req = {"gender":gender, "marital_status":marital_status, "family_type":family_type}

Related

Getting AttributeError while calling RandomForest()

I have been trying to do hyperopt tuning using the following models but I keep getting this traceback. I have tried changing the parameters, added different code for the n_estimators but to no use. I am not able to solve it with any of the solutions that are available online.
# Defining Search Space
space = hp.choice('classifiers', [
{
'model': LogisticRegression(),
'params': {
'model__penalty': hp.choice('lr.penalty', ['l2']),
'model__C': hp.choice('lr.C', np.arange(0.005,1.0,0.01))
}
},
{
'model': BernoulliNB(),
'params': {}
},
{
'model': tree.DecisionTreeClassifier(),
'params': {
'model__max_depth' : hp.choice('tree.max_depth',
range(5, 30, 1)),
}
},
{
'model': xgb.XGBClassifier(),
'params': {
'model__max_depth' : hp.choice('xgb.max_depth',
range(5, 30, 1)),
'model__learning_rate': hp.loguniform ('learning_rate', 0.01, 0.5),
'model__gamma': hp.loguniform('xbg.gamma', 0.0, 2.0),
'model__random_state' : 42
}
},
# {
# 'model': GradientBoostingClassifier(),
# 'params': {
# 'model__n_estimators': hp.uniformint('n_estimators', 100, 500),
# 'model__max_depth': hp.uniformint('max_depth', 2, 20),
# 'model__random_state' : 42
# }
# },
{
'model': RandomForestClassifier(),
'params': {
'model__n_estimators' : hp.randint('rf.n_estimators_', [100, 200, 300, 400]),
'model__max_depth': hp.uniformint('rf.max_depth', 2, 20),
'model__min_samples_split':hp.uniformint('rf.min_samples_split', 2, 10),
'model__bootstrap': hp.choice('rf.bootstrap', [True, False]),
'model__max_features': hp.choice('rf.max_features', ['auto', 'sqrt']),
'model__random_state' : np.random.RandomState(42)
}
}
])
Traceback (most recent call last):
File "<input>", line 4, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll_utils.py", line 18, in wrapper
return f(label, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll_utils.py", line 72, in hp_choice
return scope.switch(ch, *options)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 188, in __call__
return self.symbol_table._new_apply(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 61, in _new_apply
pos_args = [as_apply(a) for a in args]
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 61, in <listcomp>
pos_args = [as_apply(a) for a in args]
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 211, in as_apply
named_args = [(k, as_apply(v)) for (k, v) in items]
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 211, in <listcomp>
named_args = [(k, as_apply(v)) for (k, v) in items]
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 217, in as_apply
rval = Literal(obj)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/hyperopt/pyll/base.py", line 534, in __init__
o_len = len(obj)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/ensemble/_base.py", line 195, in __len__
return len(self.estimators_)
AttributeError: 'RandomForestClassifier' object has no attribute 'estimators_'
I have tried everything at this point and would appreciate any/all help. Thank you!

How to extract items inside JSON one by one with regex condition

I use Google Vision API on my project. The OCR result returns a JSON file that represents all the items the API recognized with coordinates. I want to add a feature that runs through the whole JOSN to find the item I want and then store the coordinate and the description into an array/list.
This is the returned JSON format:
{
"textAnnotations": [
{
"description": "a",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},
{
"description": "b",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},{c...},{d...},{e...}
],
"fullTextAnnotation": {
"pages": "not important",
"text": "a\nb\nc\nd\ne\n"
}
}
My aim is to find 2 items and calculate whether they are parallel. For example, I want to find out b or c or d or e is parallel with a, and I have already stored the coordinate of a into a list with this method:
def getJson():
try:
f = open('json_file.json', 'r', encoding="utf-8")
string = f.read()
origin_data = json.loads(string)
return origin_data
except Exception as e:
print(e)
print(traceback.format_exc())
def get_keywords_coordinates(origin_data):
__nodes = [__node for __node in origin_data['textAnnotations'] if __node['description'] == "a"]
__keyword_coords = []
for __lv in range(0, 4):
__tempx = __node['boundingPoly']['vertices'][__lv]['x']
__keyword_coords.append(__tempx)
__tempy = __node['boundingPoly']['vertices'][__lv]['y']
__keyword_coords.append(__tempy)
return __keyword_coords
which keyword_coords is the list that contains the coordinate, which looks like this:
keyword_coords[235, 409, 247, 408, 250, 456, 238, 457]
I will put it and another keyword coordinate into a function to do that calculation but I have no idea how to get the coordinate of b, c, d, and e one by one (abcde is just an example, the real situation will not be able to define the item name with hard code. I may let the program finds out the keywords with some regex)
How should I deal with this?
I don't know what exactly you want to do but it doesn't need regex but normal for-loop to work with items one by one.
First I would change get_keywords_coordinates to get all items and coordinates
def get_keywords_coordinates(data):
results = []
for item in data['textAnnotations']:
key = item["description"]
coords = []
for point in item["boundingPoly"]['vertices']:
coords.append(point['x'])
coords.append(point['y'])
results.append( (key, coords) )
return results
results = get_keywords_coordinates(data)
print('--- coords ---')
print(results)
Result:
--- coords ---
[
('a', [235, 409, 247, 408, 250, 456, 238, 457]),
('b', [335, 409, 347, 408, 350, 456, 338, 457]),
('c', [435, 409, 447, 408, 450, 456, 438, 457])
]
And I would get some selected itme (i.e. first item with a) and create list without this item
selected = results[0]
#rest = results[1:]
rest = results.copy() # more useful if I would selected item with different index
rest.remove(selected) # more useful if I would selected item with different index
print('--- items ---')
print('selected:', selected)
print('rest :', rest)
print('---')
Result:
--- items ---
selected: ('a', [235, 409, 247, 408, 250, 456, 238, 457])
rest : [('b', [335, 409, 347, 408, 350, 456, 338, 457]), ('c', [435, 409, 447, 408, 450, 456, 438, 457])]
And I could use for-loop to compare selected item with other items - one by one
for item in rest:
print('compare', selected[0], 'with', item[0])
print(selected[0], selected[1])
print(item[0], item[1])
Result:
compare a with b
a [235, 409, 247, 408, 250, 456, 238, 457]
b [335, 409, 347, 408, 350, 456, 338, 457]
compare a with c
a [235, 409, 247, 408, 250, 456, 238, 457]
c [435, 409, 447, 408, 450, 456, 438, 457]
Full example:
data = {
"textAnnotations": [
{
"description": "a",
"boundingPoly": {
"vertices": [
{
"x": 235,
"y": 409
},
{
"x": 247,
"y": 408
},
{
"x": 250,
"y": 456
},
{
"x": 238,
"y": 457
}
]
}
},
{
"description": "b",
"boundingPoly": {
"vertices": [
{
"x": 335,
"y": 409
},
{
"x": 347,
"y": 408
},
{
"x": 350,
"y": 456
},
{
"x": 338,
"y": 457
}
]
}
},
{
"description": "c",
"boundingPoly": {
"vertices": [
{
"x": 435,
"y": 409
},
{
"x": 447,
"y": 408
},
{
"x": 450,
"y": 456
},
{
"x": 438,
"y": 457
}
]
}
},
],
"fullTextAnnotation": {
"pages": "not important",
"text": "a\nb\nc\nd\ne\n"
}
}
def get_keywords_coordinates(data):
results = []
for item in data['textAnnotations']:
key = item["description"]
coords = []
for point in item["boundingPoly"]['vertices']:
coords.append(point['x'])
coords.append(point['y'])
results.append( (key, coords) )
return results
results = get_keywords_coordinates(data)
print('--- coords ---')
print(results)
selected = results[0]
#rest = results[1:]
rest = results.copy()
rest.remove(selected)
print('--- keywords ---')
print('selected:', selected)
print('rest :', rest)
print('---')
for item in rest:
print('compare', selected[0], 'with', item[0])
print(selected[0], selected[1])
print(item[0], item[1])

Iterate through nested JSON in Python

js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
Let us name this json "js" as a variable, this variable will be in a for-loop.
To understand better what I'm doing here, I'm trying to collect data from a game.
This game has hundreds of different tanks, each tank has tank_id with which I can post tank_id to the game server and respond the performance data as "js".
for tank_id: json = requests.post(tank_id) etc...
and fetch all these values to my database as shown in the screenshot.
my python code for it:
def api_get():
for property in js['data']['542250529']['all']:
spotted = property['spotted']
battles_on_stunning_vehicles = property['battles_on_stunning_vehicles']
# etc
# ...
insert_to_db(spotted, battles_on_stunning_vehicles, etc....)
the exception is:
for property in js['data']['542250529']['all']:
TypeError: list indices must be integers or slices, not str
and when:
print(js['data']['542250529'])
i get the rest of the js as a string, and i can't iterate... can't be used a valid json string, also what's inside js['data']['542250529'] is a list containing only the item 'all'..., any help would be appreciated
You just missed [0] to get the first item in a list:
def api_get():
for property in js['data']['542250529'][0]['all']:
spotted = property['spotted']
# ...
Look carefully at the data structure in the source JSON.
There is a list containing the dictionary with a key of all. So you need to use js['data']['542250529'][0]['all'] not js['data']['542250529']['all']. Then you can use .items() to get the key-value pairs.
See below.
js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
for key, val in js['data']['542250529'][0]['all'].items():
print("key:", key, " val:", val)
#Or this way
for key in js['data']['542250529'][0]['all']:
print("key:", key, " val:", js['data']['542250529'][0]['all'][key])

python, zenoss old dictionary new dictionary

I have a dictionary with the following structure as:
Data2 = {
'1.1.1': {'unitReturnAirTemperature': 224, 'unitAirTemperature': 224, 'unitHumidity': 430, 'unitReturnAirHumidity': 431},
'1.1': {'unitName': 'Unit-01'},
'1.2': {'unitName': 'Unit-02'},
'1.2.1': {'unitReturnAirTemperature': 215, 'unitAirTemperature': 224, 'unitHumidity': 431, 'unitReturnAirHumidity': 399}
}
And I would like to get the following:
Data3 = {
'1.1.1': {'unitReturnAirTemperature': 224, 'unitAirTemperature': 224, 'unitHumidity': 430, 'unitReturnAirHumidity': 431, 'unitName': 'Unit-01'},
'1.2.1': {'unitReturnAirTemperature': 215, 'unitAirTemperature': 224, 'unitHumidity': 431, 'unitReturnAirHumidity': 399, 'unitName': 'Unit-02'}
}
The new dictionary (Data3) should be based on the data from Data2 dictionary.

Python - Encoding Kamenicky (CP-895)

I need read bytes from file and compare them with dictionary (encode from Kamenicky to CP1250). This code throws error "TypeError: string indices must be integers, not str". Please do not mention "hardcoded" paths as they are here for testing purposes only. Please can you help me and tell me what is wrong in this Python code?
def Convert(file):
kamenicky = bytes( {128 : 185,
129 : 252,
130 : 233,
131 : 239,
132 : 228,
133 : 192,
134 : 141,
135 : 232,
136 : 236,
137 : 189,
138 : 188,
139 : 237,
140 : 190,
141 : 229,
142 : 181,
143 : 178,
144 : 186,
145 : 158,
146 : 142,
147 : 244,
148 : 246,
149 : 211,
150 : 249,
151 : 218,
152 : 253,
153 : 214,
154 : 220,
155 : 138,
156 : 188,
157 : 221,
158 : 216,
159 : 157,
160 : 225,
161 : 237,
162 : 243,
163 : 250,
164 : 242,
165 : 210,
166 : 217,
167 : 212,
168 : 154,
169 : 248,
170 : 224})
out = ""
with open("test.csv", 'rb') as f:
for byte in f.read():
if byte in kamenicky:
out += kamenicky[byte]
else:
out += byte
w = open("new.csv", 'wb')
w.write(out)
w.close()
Almost same code WORKING in C#:
class Kamenicky
{
Dictionary<byte, byte> kamenicky = new Dictionary<byte, byte> {
{128, 185},
{129, 252},
{130, 233},
{131, 239},
{132, 228},
{133, 192},
{134, 141},
{135, 232},
{136, 236},
{137, 189},
{138, 188},
{139, 237},
{140, 190},
{141, 229},
{142, 181},
{143, 178},
{144, 186},
{145, 158},
{146, 142},
{147, 244},
{148, 246},
{149, 211},
{150, 249},
{151, 218},
{152, 253},
{153, 214},
{154, 220},
{155, 138},
{156, 188},
{157, 221},
{158, 216},
{159, 157},
{160, 225},
{161, 237},
{162, 243},
{163, 250},
{164, 242},
{165, 210},
{166, 217},
{167, 212},
{168, 154},
{169, 248},
{170, 224}
};
public void KamenickyToCP1250(string file)
{
List<Byte> temp = new List<byte>();
byte[] ByteFile = File.ReadAllBytes(file);
foreach (byte BYTE in ByteFile)
{
if (kamenicky.ContainsKey(BYTE)) { temp.Add(kamenicky[BYTE]); continue; }
temp.Add(BYTE);
}
File.Delete(file);
File.WriteAllBytes(file, temp.ToArray());
}
}
You need to convert byte from a single-character string to an integer. One way to do it as follows:
with open("test.csv", 'rb') as f:
for ch in f.read():
byte = ord(ch)
...

Categories