append multiple json files together and ouptut 1 Avro file using Python - python
I have a use case where I am required to append multiple json files and then convert them into 1 single Avro file. I have written the code below which appends the json files together and then convert them into AVRO file. But the issue I am having is that the JSON file gets appended but the entore JSON is enclosed in [] brackets and so I get error while converting it into AVRO file. I am trying to figure out how can I get rid of the [] from the first and the last line in JSON file? Any help is appreciated.
The error I am getting is (snippet of the error, error is too long to paste : avro.io.AvroTypeException: The datum [{'event_type': 'uplink'.....}] is not an example of the schema
My code:
Laird.py
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
from avro import schema, datafile, io
import json
from datetime import date
import glob
data = []
for f in glob.glob("*.txt"):
with open(f,) as infile:
data.append(json.load(infile))
# json.dumps(data)
with open("laird.json",'w') as outfile:
json.dump(data, outfile)
def json_to_avro():
fo = open("laird.json", "r")
data = fo.readlines()
final_header = []
final_rec = []
for header in data[0:1]:
header = header.strip("\n")
header = header.split(",")
final_header = header
for rec in data[1:]:
rec = rec.strip("\n")
rec = rec.split(" ")
rec = ' '.join(rec).split()
final_rec = rec
final_dict = dict(zip(final_header,final_rec))
# print(final_dict)
json_dumps = json.dumps(final_dict, ensure_ascii=False)
# print(json_dumps)
schema = avro.schema.parse(open("laird.avsc", "rb").read())
# print(schema)
writer = DataFileWriter(open("laird.avro", "wb"), DatumWriter(), schema)
with open("laird.json") as fp:
contents = json.load(fp)
print(contents)
writer.append(contents)
writer.close()
json_to_avro()
#Script to read/convert AVRO file to JSON
reader = DataFileReader(open("laird.avro", "rb"), DatumReader())
for user in reader:
print(user)
reader.close()
Schema: lair.avsc
{
"name": "MyClass",
"type": "record",
"namespace": "com.acme.avro",
"fields": [
{
"name": "event_type",
"type": "string"
},
{
"name": "event_data",
"type": {
"name": "event_data",
"type": "record",
"fields": [
{
"name": "device_id",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "payload",
"type": {
"type": "array",
"items": {
"name": "payload_record",
"type": "record",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "sensor_id",
"type": "string"
},
{
"name": "type",
"type": "string"
},
{
"name": "unit",
"type": "string"
},
{
"name": "value",
"type": "float"
},
{
"name": "channel",
"type": "int"
},
{
"name": "timestamp",
"type": "long"
}
]
}
}
},
{
"name": "client_id",
"type": "string"
},
{
"name": "hardware_id",
"type": "string"
},
{
"name": "timestamp",
"type": "long"
},
{
"name": "application_id",
"type": "string"
},
{
"name": "device_type_id",
"type": "string"
}
]
}
},
{
"name": "company",
"type": {
"name": "company",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "address",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "country",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "industry",
"type": "string"
},
{
"name": "latitude",
"type": "float"
},
{
"name": "longitude",
"type": "float"
},
{
"name": "name",
"type": "string"
},
{
"name": "state",
"type": "string"
},
{
"name": "status",
"type": "int"
},
{
"name": "timezone",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "zip",
"type": "string"
}
]
}
},
{
"name": "location",
"type": {
"name": "location",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "address",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "country",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "industry",
"type": "string"
},
{
"name": "latitude",
"type": "float"
},
{
"name": "longitude",
"type": "float"
},
{
"name": "name",
"type": "string"
},
{
"name": "state",
"type": "string"
},
{
"name": "status",
"type": "int"
},
{
"name": "timezone",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "zip",
"type": "string"
},
{
"name": "company_id",
"type": "int"
}
]
}
},
{
"name": "device_type",
"type": {
"name": "device_type",
"type": "record",
"fields": [
{
"name": "id",
"type": "string"
},
{
"name": "application_id",
"type": "string"
},
{
"name": "category",
"type": "string"
},
{
"name": "codec",
"type": "string"
},
{
"name": "data_type",
"type": "string"
},
{
"name": "description",
"type": "string"
},
{
"name": "manufacturer",
"type": "string"
},
{
"name": "model",
"type": "string"
},
{
"name": "name",
"type": "string"
},
{
"name": "parent_constraint",
"type": "string"
},
{
"name": "proxy_handler",
"type": "string"
},
{
"name": "subcategory",
"type": "string"
},
{
"name": "transport_protocol",
"type": "string"
},
{
"name": "version",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
}
]
}
},
{
"name": "device",
"type": {
"name": "device",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "thing_name",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "status",
"type": "int"
}
]
}
}
]
}
Generated JSON File: laird.json
[{"event_type": "uplink", "event_data": {"device_id": "42934500-fcfb-11ea-9f13-d1d0271289a6", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "42abaf00-fcfb-11ea-9c71-c517ac227ea5", "type": "rel_hum", "unit": "p", "value": 94.29, "channel": 4, "timestamp": 1605007797789}, {"name": "Temperature", "sensor_id": "42b0df20-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "temp", "unit": "c", "value": 21.64, "channel": 3, "timestamp": 1605007797789}, {"name": "Battery", "sensor_id": "42a98c20-fcfb-11ea-b4dd-cd2887a335f7", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1605007797789}, {"name": "Local Backup", "sensor_id": "42b01bd0-fcfb-11ea-9f13-d1d0271289a6", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1605007797789}, {"name": "RSSI", "sensor_id": "42b39e40-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "rssi", "unit": "dbm", "value": -53, "channel": 100, "timestamp": 1605007797789}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 10.2, "channel": 101, "timestamp": 1605007797789}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000e232", "timestamp": 1605007797789, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9153, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-18T02:08:03Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Van Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T02:08:03Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 269231, "thing_name": "Van 18-1775 (Ambient)", "created_at": "2020-09-22T17:44:27Z", "updated_at": "2020-09-25T22:39:57Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "7de32cf0-f9d2-11ea-b4dd-cd2887a335f7", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "7dfbbe00-f9d2-11ea-9c71-c517ac227ea5", "type": "rel_hum", "unit": "p", "value": 0, "channel": 4, "timestamp": 1604697684139}, {"name": "Temperature", "sensor_id": "7dfb48d0-f9d2-11ea-9c71-c517ac227ea5", "type": "temp", "unit": "c", "value": -27.22, "channel": 3, "timestamp": 1604697684139}, {"name": "Battery", "sensor_id": "7dfa5e70-f9d2-11ea-bf5c-d11ce3dbc1cb", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1604697684139}, {"name": "Local Backup", "sensor_id": "7dfb96f0-f9d2-11ea-b4dd-cd2887a335f7", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1604697684139}, {"name": "RSSI", "sensor_id": "7dfc5a40-f9d2-11ea-b4dd-cd2887a335f7", "type": "rssi", "unit": "dbm", "value": -7, "channel": 100, "timestamp": 1604697684139}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 10, "channel": 101, "timestamp": 1604697684139}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000be6a", "timestamp": 1604697684139, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9080, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:46:07Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Cooler Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T14:17:28Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 268369, "thing_name": "Cooler F-0201-AH", "created_at": "2020-09-18T17:15:04Z", "updated_at": "2020-09-25T22:39:57Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "1c5c66f0-fcfb-11ea-8ae3-2ffdc909c57b", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "1c7a4f30-fcfb-11ea-8ae3-2ffdc909c57b", "type": "rel_hum", "unit": "p", "value": 81.22, "channel": 4, "timestamp": 1605148608302}, {"name": "Temperature", "sensor_id": "1c793dc0-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "temp", "unit": "c", "value": 24.47, "channel": 3, "timestamp": 1605148608302}, {"name": "Battery", "sensor_id": "1c76a5b0-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1605148608302}, {"name": "Local Backup", "sensor_id": "1c73e690-fcfb-11ea-9c71-c517ac227ea5", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1605148608302}, {"name": "RSSI", "sensor_id": "1c780540-fcfb-11ea-b4dd-cd2887a335f7", "type": "rssi", "unit": "dbm", "value": -14, "channel": 100, "timestamp": 1605148608302}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 8.8, "channel": 101, "timestamp": 1605148608302}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000e1e3", "timestamp": 1605148608302, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9153, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-18T02:08:03Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Van Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T02:08:03Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 269213, "thing_name": "Van 19-1800 (Ambient)", "created_at": "2020-09-22T17:43:23Z", "updated_at": "2020-09-25T22:39:56Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "851fd480-f70e-11ea-9f13-d1d0271289a6", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "85411820-f70e-11ea-8ae3-2ffdc909c57b", "type": "rel_hum", "unit": "p", "value": 49.52, "channel": 4, "timestamp": 1604558153188}, {"name": "Temperature", "sensor_id": "853f9180-f70e-11ea-9f13-d1d0271289a6", "type": "temp", "unit": "c", "value": 20.52, "channel": 3, "timestamp": 1604558153188}, {"name": "Battery", "sensor_id": "85429ec0-f70e-11ea-9621-a51b22d5dc1d", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1604558153188}, {"name": "Local Backup", "sensor_id": "853f4360-f70e-11ea-9f13-d1d0271289a6", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1604558153188}, {"name": "RSSI", "sensor_id": "8543b030-f70e-11ea-8ae3-2ffdc909c57b", "type": "rssi", "unit": "dbm", "value": -91, "channel": 100, "timestamp": 1604558153188}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 8.5, "channel": 101, "timestamp": 1604558153188}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000be5b", "timestamp": 1604558153188, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9080, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:46:07Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Cooler Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T14:17:28Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 265040, "thing_name": "Cooler R-0306-PHAR", "created_at": "2020-09-15T04:47:12Z", "updated_at": "2020-09-25T22:39:54Z", "status": 0}}]
contents is a list of records but the writer.append expects a single record, so you iterate over your records and append them one by one.
You just need to change:
writer.append(contents)
to:
for record in contents:
writer.append(record)
Related
Explode json without pandas
I have a JSON object: { "data": { "geography": [ { "id": "1", "state": "USA", "properties": [ { "code": "CMD-01", "value": "34" }, { "code": "CMD-02", "value": "24" } ] }, { "id": "2", "state": "Canada", "properties": [ { "code": "CMD-04", "value": "50" }, { "code": "CMD-05", "value": "60" } ] } ] } } I want to get the result as a new JSON, but without using pandas (and all those explode, flatten and normalize functions...). Is there any option to get this structure without using pandas or having an Out of memory issue? The output should be: { "id": "1", "state": "USA", "code": "CMD-01", "value": "34" }, { "id": "1", "state": "USA", "code": "CMD-02", "value": "24", }, { "id": "2", "state": "Canada", "code": "CMD-04", "value": "50" }, { "id": "2", "state": "Canada", "code": "CMD-05", "value": "60" },
You can simply loop over the list associated with "geography" and build new dictionaries that you will add to a newly created list: dict_in = { "data": { "geography": [ { "id": "1", "state": "USA", "properties": [ { "code": "CMD-01", "value": "34" }, { "code": "CMD-02", "value": "24" } ] }, { "id": "2", "state": "Canada", "properties": [ { "code": "CMD-04", "value": "50" }, { "code": "CMD-05", "value": "60" } ] } ] } } import json rec_out = [] for obj in dict_in["data"]["geography"]: for prop in obj["properties"]: dict_out = { "id": obj["id"], "state": obj["state"] } dict_out.update(prop) rec_out.append(dict_out) print(json.dumps(rec_out, indent=4)) Output: [ { "id": "1", "state": "USA", "code": "CMD-01", "value": "34" }, { "id": "1", "state": "USA", "code": "CMD-02", "value": "24" }, { "id": "2", "state": "Canada", "code": "CMD-04", "value": "50" }, { "id": "2", "state": "Canada", "code": "CMD-05", "value": "60" } ]
What is the best way for me to iterate over this dataset to return all matching values from another key value pair if I match a separate key?
I want to be able to search through this list (see bottom of post) of dicts (I think that is what this particular arrangement is called) to search for an ['address'] that matches '0xd2'. If that match is found, I want to return/print all the corresponding ['id']s. So in this case I would like to return: 632, 315, 432, 100 I'm able to extract individual values like this: none = None print(my_dict['result'][2]["id"]) 432 I'm struggling with how to get a loop to do this properly. { "total": 4, "page": 0, "page_size": 100, "result": [ { "address": "0xd2", "id": "632", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0xd2", "id": "315", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0xd2", "id": "432", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0x44", "id": "100", "amount": "1", "name": "Suicide Squad", "group": "DC", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" } ], "status": "SYNCED" }
Welcome to StackOverflow. You can try list comprehension: [res["id"] for res in my_dict["result"] if res["address"] == "0xd2"] If you'd like to use a for loop: l = [] for res in my_dict["result"]: if res["address"] == "0xd2": l.append(res["id"])
You can use a list comprehension. import json json_string = """{ "total": 4, "page": 0, "page_size": 100, "result": [ { "address": "0xd2", "id": "632", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0xd2", "id": "315", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0xd2", "id": "432", "amount": "1", "name": "Avengers", "group": "Marvel", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" }, { "address": "0x44", "id": "100", "amount": "1", "name": "Suicide Squad", "group": "DC", "uri": "https://google.com/", "metadata": null, "synced_at": "2022-05-26T22:52:34.113Z", "last_sync": "2022-05-26T22:52:34.113Z" } ], "status": "SYNCED" }""" json_dict = json.loads(json_string) result = [elem['id'] for elem in json_dict['result'] if elem['address'] == '0xd2'] print(result) Output: ['632', '315', '432']
This would store the associated ids in the list: ids=[] for r in dataset.get('result'): if r.get('address')=='0xd2': ids.append(r.get('id'))
Why am I receiving an error when attempting to parse JSON object within for loop?
Everything with my script runs fine until I try to run it through a for loop. Specifically, when I attempt to index a specific array within the object. Before I get to the The script is intended to grab the delivery date for each tracking number in my list. This is my script: import requests import json TrackList = ['1Z3X756E0310496105','1ZX0373R0303581450','1ZX0373R0103574417'] url = 'https://onlinetools.ups.com/rest/Track' para1 = '...beginning of JSON request string...' para2 = '...end of JSON request string...' for TrackNum in TrackList: parameters = para1+TrackNum+para2 resp = requests.post(url = url, data = parameters, verify=False) data = json.loads(resp.text) DelDate = data['TrackResponse']['Shipment']['Package'][0]['Activity'][0]['Date'] print(DelDate) JSON API Response (if needed): { "TrackResponse": { "Response": { "ResponseStatus": { "Code": "1", "Description": "Success" }, "TransactionReference": { "CustomerContext": "Analytics Inquiry" } }, "Shipment": { "InquiryNumber": { "Code": "01", "Description": "ShipmentIdentificationNumber", "Value": "1ZX0373R0103574417" }, "Package": { "Activity": [ { "ActivityLocation": { "Address": { "City": "OKLAHOMA CITY", "CountryCode": "US", "PostalCode": "73128", "StateProvinceCode": "OK" }, "Code": "M3", "Description": "Front Desk", "SignedForByName": "CUMMINGS" }, "Date": "20190520", "Status": { "Code": "9E", "Description": "Delivered", "Type": "D" }, "Time": "091513" }, { "ActivityLocation": { "Address": { "City": "Oklahoma City", "CountryCode": "US", "StateProvinceCode": "OK" }, "Description": "Front Desk" }, "Date": "20190520", "Status": { "Code": "OT", "Description": "Out For Delivery Today", "Type": "I" }, "Time": "085943" }, { "ActivityLocation": { "Address": { "City": "Oklahoma City", "CountryCode": "US", "StateProvinceCode": "OK" }, "Description": "Front Desk" }, "Date": "20190520", "Status": { "Code": "DS", "Description": "Destination Scan", "Type": "I" }, "Time": "011819" }, { "ActivityLocation": { "Address": { "City": "Oklahoma City", "CountryCode": "US", "StateProvinceCode": "OK" }, "Description": "Front Desk" }, "Date": "20190519", "Status": { "Code": "AR", "Description": "Arrival Scan", "Type": "I" }, "Time": "235100" }, { "ActivityLocation": { "Address": { "City": "DFW Airport", "CountryCode": "US", "StateProvinceCode": "TX" }, "Description": "Front Desk" }, "Date": "20190519", "Status": { "Code": "DP", "Description": "Departure Scan", "Type": "I" }, "Time": "195500" }, { "ActivityLocation": { "Address": { "City": "DFW Airport", "CountryCode": "US", "StateProvinceCode": "TX" }, "Description": "Front Desk" }, "Date": "20190517", "Status": { "Code": "OR", "Description": "Origin Scan", "Type": "I" }, "Time": "192938" }, { "ActivityLocation": { "Address": { "CountryCode": "US" }, "Description": "Front Desk" }, "Date": "20190517", "Status": { "Code": "MP", "Description": "Order Processed: Ready for UPS", "Type": "M" }, "Time": "184621" } ], "PackageWeight": { "UnitOfMeasurement": { "Code": "LBS" }, "Weight": "2.00" }, "ReferenceNumber": [ { "Code": "01", "Value": "8472745558" }, { "Code": "01", "Value": "5637807:1007379402:BN81-17077A:1" }, { "Code": "01", "Value": "5637807" } ], "TrackingNumber": "1ZX0373R0103574417" }, "PickupDate": "20190517", "Service": { "Code": "001", "Description": "UPS Next Day Air" }, "ShipmentAddress": [ { "Address": { "AddressLine": "S 600 ROYAL LN", "City": "COPPELL", "CountryCode": "US", "PostalCode": "750193827", "StateProvinceCode": "TX" }, "Type": { "Code": "01", "Description": "Shipper Address" } }, { "Address": { "City": "OKLAHOMA CITY", "CountryCode": "US", "PostalCode": "73128", "StateProvinceCode": "OK" }, "Type": { "Code": "02", "Description": "ShipTo Address" } } ], "ShipmentWeight": { "UnitOfMeasurement": { "Code": "LBS" }, "Weight": "2.00" }, "ShipperNumber": "X0373R" } } } Below is the error I receive: Traceback (most recent call last): File "/Users/***/Library/Preferences/PyCharmCE2019.1/scratches/UPS_API.py", line 15, in <module> DelDate = data['TrackResponse']['Shipment']['Package'][0]['Activity'][0]['Date'] KeyError: 0
You're trying to index "Package" at index 0, but it's an object not an array. So you should be accessing ['Package']['Activity']
just take away the [0] because there is no [1] or [2]
Parsing JSON with Python to get specific value
I am trying to parse JSON with Python. I am trying to get the value of "login" which is michael for "type" which is "CreateEvent". Here's my JSON: [ { "id": "7", "type": "PushEvent", "actor": { "id": 5, "login": "michael", "display_login": "michael", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" }, "repo": { "id": 2, "name": "myorganization/puppet", "url": "https://ec2" }, "payload": { "push_id": 5, "size": 1, "distinct_size": 1, "ref": "refs/heads/dev", "head": "5584d504f971", "before": "e485f37ce935775846f33b", "commits": [ { "sha": "5584cd504f971", "author": { "email": "michael.conte#gmail.ca", "name": "michael" }, "message": "Create dev.pp", "distinct": true, "url": "https://ec2" } ] }, "public": true, "created_at": "2018-02-20T16:15:57Z", "org": { "id": 6, "login": "myorganization", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" } }, { "id": "6", "type": "CreateEvent", "actor": { "id": 5, "login": "michael", "display_login": "michael", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" }, "repo": { "id": 2, "name": "myorganization/puppet", "url": "https://ec2" }, "payload": { "ref": "dev", "ref_type": "branch", "master_branch": "master", "description": null, "pusher_type": "user" }, "public": true, "created_at": "2018-02-20T16:15:44Z", "org": { "id": 6, "login": "myorganization", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" } }, { "id": "5", "type": "PushEvent", "actor": { "id": 5, "login": "michael", "display_login": "michael", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" }, "repo": { "id": 2, "name": "myorganization/puppet", "url": "https://ec2" }, "payload": { "push_id": 3, "size": 1, "distinct_size": 1, "ref": "refs/heads/master", "head": "e485f84b875846f33b", "before": "f8bb87b952bfb4", "commits": [ { "sha": "e485f37ce6f33b", "author": { "email": "michael.conte#gmail.ca", "name": "michael" }, "message": "Create hello.pp", "distinct": true, "url": "https://ec2" } ] }, "public": true, "created_at": "2018-02-20T15:48:42Z", "org": { "id": 6, "login": "myorganization", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" } }, { "id": "4", "type": "CreateEvent", "actor": { "id": 5, "login": "michael", "display_login": "michael", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2?" }, "repo": { "id": 2, "name": "myorganization/puppet", "url": "https://ec2" }, "payload": { "ref": "master", "ref_type": "branch", "master_branch": "master", "description": null, "pusher_type": "user" }, "public": true, "created_at": "2018-02-20T15:48:21Z", "org": { "id": 6, "login": "myorganization", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" } }, { "id": "3", "type": "CreateEvent", "actor": { "id": 5, "login": "michael", "display_login": "michael", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" }, "repo": { "id": 2, "name": "myorganization/puppet", "url": "https://ec2" }, "payload": { "ref": null, "ref_type": "repository", "master_branch": "master", "description": null, "pusher_type": "user" }, "public": true, "created_at": "2018-02-20T15:48:05Z", "org": { "id": 6, "login": "myorganization", "gravatar_id": "", "url": "https://ec2", "avatar_url": "https://ec2" } } ] Here's my code: response = requests.get(url, headers=headers, verify=False) name = response.json() fname = (name['type']['actor']['login']) print(fname) When I run the above code, I get a type error. TypeError: list indices must be integers or slices, not str. What am I doing wrong? I am using Python3 for my code.
Try fname = name[0]['payload']['commits'][0]['author']['name'] The name Michael you are trying to get, is inside the dictionary named author, which is inside a single item list, which is inside the commits dictionary, which is inside the payload dictionary, which is inside a single item list. Check out the docs for more info on collection types: http://python-textbok.readthedocs.io/en/1.0/Collections.html
Fulltext Search in arangodb using AQL and python
i have stored the data in arangodb in the following format: {"data": [ { "content": "maindb", "type": "string", "name": "db_name", "key": "1745085839" }, { "type": "id", "name": "rel", "content": "1745085840", "key": "1745085839" }, { "content": "user", "type": "string", "name": "rel_name", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584001", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584002", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584003", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584004", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584005", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584006", "key": "1745085840" }, { "type": "id", "name": "tuple", "content": "174508584007", "key": "1745085840" }, { "content": "dspclient", "type": "varchar", "name": "username", "key": "174508584001" }, { "content": "12345", "type": "varchar", "name": "password", "key": "174508584001" }, { "content": "12345", "type": "varchar", "name": "cpassword", "key": "174508584001" }, { "content": "n", "type": "varchar", "name": "PostgreSQL", "key": "174508584001" }, { "content": "n", "name": "IBMDB2", "type": "varchar", "key": "174508584001" }, { "content": "n", "name": "MySQL", "type": "varchar", "key": "174508584001" }, { "content": "n", "type": "varchar", "name": "SQLServer", "key": "174508584001" }, { "content": "n", "name": "Hadoop", "type": "varchar", "key": "174508584001" }, { "content": "None", "name": "dir1", "type": "varchar", "key": "174508584001" }, { "content": "None", "name": "dir2", "type": "varchar", "key": "174508584001" }, { "content": "None", "name": "dir3", "type": "varchar", "key": "174508584001" }, { "content": "None", "name": "dir4", "type": "varchar", "key": "174508584001" }, { "type": "inet", "name": "ipaddr", "content": "1921680103", "key": "174508584001" }, { "content": "y", "name": "status", "type": "varchar", "key": "174508584001" }, { "content": "None", "type": "varchar", "name": "logintime", "key": "174508584001" }, { "content": "None", "type": "varchar", "name": "logindate", "key": "174508584001" }, { "content": "None", "type": "varchar", "name": "logouttime", "key": "174508584001" }, { "content": "client", "type": "varchar", "name": "user_type", "key": "174508584001" }, { "content": "royal", "type": "varchar", "name": "username", "key": "174508584002" }, { "content": "12345", "type": "varchar", "name": "password", "key": "174508584002" }, { "content": "12345", "type": "varchar", "name": "cpassword", "key": "174508584002" }, { "content": "n", "type": "varchar", "name": "PostgreSQL", "key": "174508584002" }, { "content": "n", "name": "IBMDB2", "type": "varchar", "key": "174508584002" }, { "content": "n", "name": "MySQL", "type": "varchar", "key": "174508584002" }, { "content": "n", "type": "varchar", "name": "SQLServer", "key": "174508584002" }, { "content": "n", "name": "Hadoop", "type": "varchar", "key": "174508584002" }, { "content": "None", "name": "dir1", "type": "varchar", "key": "174508584002" }, { "content": "None", "name": "dir2", "type": "varchar", "key": "174508584002" }, { "content": "None", "name": "dir3", "type": "varchar", "key": "174508584002" }, { "content": "None", "name": "dir4", "type": "varchar", "key": "174508584002" }, { "type": "inet", "name": "ipaddr", "content": "1921680105", "key": "174508584002" }, { "content": "y", "name": "status", "type": "varchar", "key": "174508584002" }, { "content": "190835899000", "type": "varchar", "name": "logintime", "key": "174508584002" }, { "content": "20151002", "type": "varchar", "name": "logindate", "key": "174508584002" }, { "content": "None", "type": "varchar", "name": "logouttime", "key": "174508584002" }, { "content": "client", "type": "varchar", "name": "user_type", "key": "174508584002" }, { "content": "abc", "type": "varchar", "name": "username", "key": "174508584003" }, { "content": "12345", "type": "varchar", "name": "password", "key": "174508584003" }, { "content": "12345", "type": "varchar", "name": "cpassword", "key": "174508584003" }, { "content": "n", "type": "varchar", "name": "PostgreSQL", "key": "174508584003" }, { "content": "n", "name": "IBMDB2", "type": "varchar", "key": "174508584003" }]} In order to perform fulltext search, I have created an index on content attribute by using the syntax from a python script: c.DSP.ensureFulltextIndex("content"); Where, c is database, and DSP is the collection name. Now, I am trying to perform a search operation in the above data set by using the syntax: FOR doc IN FULLTEXT(DSP, "content", "username") RETURN doc Then, an error occure: [1571] in function 'FULLTEXT()': no suitable fulltext index found for fulltext query on 'DSP' (while executing) Please tell me the problem, and also tell me what will be the syntax when i will try this query with a python script. Thanks...
Working with the 10 minutes tutorial and the driver documentation I got it working like this: from pyArango.connection import * c = Connection() db = c.createDatabase(name = "testdb") DSP= db.createCollection(name = "DSP") DSP.ensureFulltextIndex(fields=["content"]) doc = DSP.createDocument({"content": "test bla"}) doc.save() print db.AQLQuery('''FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc''', 10) Resulting in: [{u'_key': u'1241175138503', u'content': u'test bla', u'_rev': u'1241175138503', u'_id': u'DSP/1241175138503'}] I've used arangosh to revalidate the steps from the python prompt: arangosh> db._useDatabase("testdb") arangosh [testdb]> db.DSP.getIndexes() [ { "id" : "DSP/0", "type" : "primary", "fields" : [ "_key" ], "selectivityEstimate" : 1, "unique" : true, "sparse" : false }, { "id" : "DSP/1241140928711", "type" : "hash", "fields" : [ "content" ], "selectivityEstimate" : 1, "unique" : false, "sparse" : true }, { "id" : "DSP/1241142960327", "type" : "fulltext", "fields" : [ "content" ], "unique" : false, "sparse" : true, "minLength" : 2 } ] arangosh [testdb]> db.testdb.toArray() [ { "content" : "test bla", "_id" : "DSP/1241175138503", "_rev" : "1241175138503", "_key" : "1241175138503" } ] db._query('FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc')