Explode json without pandas

Explode json without pandas - python

I have a JSON object:
{
"data": {
"geography": [
{
"id": "1",
"state": "USA",
"properties": [
{
"code": "CMD-01",
"value": "34"
},
{
"code": "CMD-02",
"value": "24"
}
]
},
{
"id": "2",
"state": "Canada",
"properties": [
{
"code": "CMD-04",
"value": "50"
},
{
"code": "CMD-05",
"value": "60"
}
]
}
]
}
}
I want to get the result as a new JSON, but without using pandas (and all those explode, flatten and normalize functions...). Is there any option to get this structure without using pandas or having an Out of memory issue?
The output should be:
{ "id": "1",
"state": "USA",
"code": "CMD-01",
"value": "34"
},
{ "id": "1",
"state": "USA",
"code": "CMD-02",
"value": "24",
},
{ "id": "2",
"state": "Canada",
"code": "CMD-04",
"value": "50"
},
{ "id": "2",
"state": "Canada",
"code": "CMD-05",
"value": "60"
},

You can simply loop over the list associated with "geography" and build new dictionaries that you will add to a newly created list:
dict_in = {
"data": {
"geography": [
{
"id": "1",
"state": "USA",
"properties": [
{
"code": "CMD-01",
"value": "34"
},
{
"code": "CMD-02",
"value": "24"
}
]
},
{
"id": "2",
"state": "Canada",
"properties": [
{
"code": "CMD-04",
"value": "50"
},
{
"code": "CMD-05",
"value": "60"
}
]
}
]
}
}
import json
rec_out = []
for obj in dict_in["data"]["geography"]:
for prop in obj["properties"]:
dict_out = {
"id": obj["id"],
"state": obj["state"]
}
dict_out.update(prop)
rec_out.append(dict_out)
print(json.dumps(rec_out, indent=4))
Output:
[
{
"id": "1",
"state": "USA",
"code": "CMD-01",
"value": "34"
},
{
"id": "1",
"state": "USA",
"code": "CMD-02",
"value": "24"
},
{
"id": "2",
"state": "Canada",
"code": "CMD-04",
"value": "50"
},
{
"id": "2",
"state": "Canada",
"code": "CMD-05",
"value": "60"
}
]

Related

extract aliases from wikidata dump using python

I am trying to extract certain fields about wikidata items from the wikidata dump, but I have a problem with the aliases field for a certain language, my code is based on the code in the following URL how_to_use_a_wikidata_dump, I made my modification, but the aliases field returns empty value:
for record in wikidata(args.dumpfile):
print('i = '+str(i)+' item '+record['id']+' started!'+'\n')
item_id = pydash.get(record, 'id')
item_type = pydash.get(record, 'claims.P31[0].mainsnak.datavalue.value.id')
arabic_label = pydash.get(record, 'labels.ar.value')
english_label = pydash.get(record, 'labels.en.value')
arabic_aliases =pydash.get(record, 'aliases.ar.value')
english_aliases =pydash.get(record, 'aliases.en.value')
arabic_desc = pydash.get(record, 'descriptions.ar.value')
english_desc = pydash.get(record, 'descriptions.en.value')
main_category = pydash.get(record, 'claims.P910[0].mainsnak.datavalue.value.id')
arwiki = pydash.get(record, 'sitelinks.arwiki.title')
arwikiquote = pydash.get(record, 'sitelinks.arwikiquote.title')
enwiki = pydash.get(record, 'sitelinks.enwiki.title')
enwikiquote = pydash.get(record, 'sitelinks.enwiki
quote.title')
The JSON Format for the wikidata item can be found here:
JSON Format
Example JSON RECORD
{
"pageid": 186,
"ns": 0,
"title": "Q60",
"lastrevid": 199780882,
"modified": "2020-02-27T14:37:20Z",
"id": "Q60",
"type": "item",
"aliases": {
"en": [
{
"language": "en",
"value": "NYC"
},
{
"language": "en",
"value": "New York"
}
],
"fr": [
{
"language": "fr",
"value": "New York City"
},
{
"language": "fr",
"value": "NYC"
}
],
"zh-mo": [
{
"language": "zh-mo",
"value": "\u7d10\u7d04\u5e02"
}
]
},
"labels": {
"en": {
"language": "en",
"value": "New York City"
},
"ar": {
"language": "ar",
"value": "\u0645\u062f\u064a\u0646\u0629 \u0646\u064a\u0648 \u064a\u0648\u0631\u0643"
},
"fr": {
"language": "fr",
"value": "New York City"
},
"my": {
"language": "my",
"value": "\u1014\u101a\u1030\u1038\u101a\u1031\u102c\u1000\u103a\u1019\u103c\u102d\u102f\u1037"
},
"ps": {
"language": "ps",
"value": "\u0646\u064a\u0648\u064a\u0627\u0631\u06a9"
}
},
"descriptions": {
"en": {
"language": "en",
"value": "largest city in New York and the United States of America"
},
"it": {
"language": "it",
"value": "citt\u00e0 degli Stati Uniti d'America"
},
"pl": {
"language": "pl",
"value": "miasto w Stanach Zjednoczonych"
},
"ro": {
"language": "ro",
"value": "ora\u015ful cel mai mare din SUA"
}
},
"claims": {
"P1151": [
{
"id": "Q60$6f832804-4c3f-6185-38bd-ca00b8517765",
"mainsnak": {
"snaktype": "value",
"property": "P1151",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q6342720",
"numeric-id": 6342720
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
}
],
"P625": [
{
"id": "q60$f00c56de-4bac-e259-b146-254897432868",
"mainsnak": {
"snaktype": "value",
"property": "P625",
"datatype": "globe-coordinate",
"datavalue": {
"value": {
"latitude": 40.67,
"longitude": -73.94,
"altitude": null,
"precision": 0.00027777777777778,
"globe": "http://www.wikidata.org/entity/Q2"
},
"type": "globecoordinate"
}
},
"type": "statement",
"rank": "normal",
"references": [
{
"hash": "7eb64cf9621d34c54fd4bd040ed4b61a88c4a1a0",
"snaks": {
"P143": [
{
"snaktype": "value",
"property": "P143",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q328",
"numeric-id": 328
},
"type": "wikibase-entityid"
}
}
]
},
"snaks-order": [
"P143"
]
}
]
}
],
"P150": [
{
"id": "Q60$bdddaa06-4e4b-f369-8954-2bb010aaa057",
"mainsnak": {
"snaktype": "value",
"property": "P150",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q11299",
"numeric-id": 11299
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
},
{
"id": "Q60$0e484d5b-41a5-1594-7ae1-c3768c6206f6",
"mainsnak": {
"snaktype": "value",
"property": "P150",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q18419",
"numeric-id": 18419
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
},
{
"id": "Q60$e5000a60-42fc-2aba-f16d-bade1d2e8a58",
"mainsnak": {
"snaktype": "value",
"property": "P150",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q18424",
"numeric-id": 18424
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
},
{
"id": "Q60$4d90d6f4-4ab8-26bd-f2a5-4ac2a6eb48cd",
"mainsnak": {
"snaktype": "value",
"property": "P150",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q18426",
"numeric-id": 18426
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
},
{
"id": "Q60$ede49e3c-44f6-75a3-eb74-6a89886e30c9",
"mainsnak": {
"snaktype": "value",
"property": "P150",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q18432",
"numeric-id": 18432
},
"type": "wikibase-entityid"
}
},
"type": "statement",
"rank": "normal"
}
],
"P6": [
{
"id": "Q60$5cc8fc79-4807-9800-dbea-fe9c20ab273b",
"mainsnak": {
"snaktype": "value",
"property": "P6",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q4911497",
"numeric-id": 4911497
},
"type": "wikibase-entityid"
}
},
"qualifiers": {
"P580": [
{
"hash": "c53f3ca845b789e543ed45e3e1ecd1dd950e30dc",
"snaktype": "value",
"property": "P580",
"datatype": "time",
"datavalue": {
"value": {
"time": "+00000002014-01-01T00:00:00Z",
"timezone": 0,
"before": 0,
"after": 0,
"precision": 11,
"calendarmodel": "http://www.wikidata.org/entity/Q1985727"
},
"type": "time"
}
}
]
},
"qualifiers-order": [
"P580"
],
"type": "statement",
"rank": "preferred"
},
{
"id": "q60$cad4e313-4b5e-e089-08b9-3b1c7998e762",
"mainsnak": {
"snaktype": "value",
"property": "P6",
"datatype": "wikibase-item",
"datavalue": {
"value": {
"entity-type": "item",
"id": "Q607",
"numeric-id": 607
},
"type": "wikibase-entityid"
}
},
"qualifiers": {
"P580": [
{
"hash": "47c515b79f80e24e03375b327f2ac85184765d5b",
"snaktype": "value",
"property": "P580",
"datatype": "time",
"datavalue": {
"value": {
"time": "+00000002002-01-01T00:00:00Z",
"timezone": 0,
"before": 0,
"after": 0,
"precision": 11,
"calendarmodel": "http://www.wikidata.org/entity/Q1985727"
},
"type": "time"
}
}
],
"P582": [
{
"hash": "1f463f78538c49ef6adf3a9b18e211af7195240a",
"snaktype": "value",
"property": "P582",
"datatype": "time",
"datavalue": {
"value": {
"time": "+00000002013-12-31T00:00:00Z",
"timezone": 0,
"before": 0,
"after": 0,
"precision": 11,
"calendarmodel": "http://www.wikidata.org/entity/Q1985727"
},
"type": "time"
}
}
]
},
"qualifiers-order": [
"P580",
"P582"
]
}
],
"P856": [
{
"id": "Q60$4e3e7a42-4ec4-b7c3-7570-b103eb2bc1ac",
"mainsnak": {
"snaktype": "value",
"property": "P856",
"datatype": "url",
"datavalue": {
"value": "http://nyc.gov/",
"type": "string"
}
},
"type": "statement",
"rank": "normal"
}
]
},
"sitelinks": {
"afwiki": {
"site": "afwiki",
"title": "New York Stad",
"badges": []
},
"dewiki": {
"site": "dewiki",
"title": "New York City",
"badges": [
"Q17437798"
]
},
"dewikinews": {
"site": "dewikinews",
"title": "Kategorie:New York",
"badges": []
},
"elwiki": {
"site": "elwiki",
"title": "\u039d\u03ad\u03b1 \u03a5\u03cc\u03c1\u03ba\u03b7",
"badges": []
},
"enwiki": {
"site": "enwiki",
"title": "New York City",
"badges": []
},
"zhwikivoyage": {
"site": "zhwikivoyage",
"title": "\u7d10\u7d04",
"badges": []
},
"zuwiki": {
"site": "zuwiki",
"title": "New York (idolobha)",
"badges": []
}
}
}
The result of this code is :
english_aliases =pydash.get(record, 'aliases.en')
print(type(arabic_aliases))
print(english_aliases)
<class 'list'>
[{'language': 'en', 'value': 'Kingdom of Belgium'}, {'language': 'en', 'value': 'BEL'}, {'language': 'en', 'value': 'be'}, {'language': 'en', 'value': '🇧🇪'}, {'language': 'en', 'value': 'BE'}]

The answer is :
english_aliases= set()
if pydash.has(record, 'aliases.en'):
for itm in pydash.get(record, 'aliases.en'):
english_aliases.add(itm['value'])

How to use S3 Select for Nested Parquet Objects

I have dumped data into a parquet file.
When I use
SELECT * FROM s3object s LIMIT 1
it gives me the following result.
{
"name": "John",
"age": "45",
"country": "USA",
"experience": [{
"company": {
"name": "ABC",
"years": "10",
"position": "Manager"
}
},
{
"company": {
"name": "BBC",
"years": "2",
"position": "Assistant"
}
}
]
}
I want to filter the result where company.name = "ABC"
so, the output should be looks like following.
{
"name": "John",
"age": "45",
"country": "USA",
"experience": [{
"company": {
"name": "ABC",
"years": "10",
"position": "Manager"
}
}
]
}
or this
{
"name": "John",
"age": "45",
"country": "USA",
"experience.company.name": "ABC",
"experience.company.years": "10",
"experience.company.position": "Manager"
}
Any support is highly appreciated.
Thanks.

Why am I receiving an error when attempting to parse JSON object within for loop?

Everything with my script runs fine until I try to run it through a for loop. Specifically, when I attempt to index a specific array within the object. Before I get to the The script is intended to grab the delivery date for each tracking number in my list.
This is my script:
import requests
import json
TrackList = ['1Z3X756E0310496105','1ZX0373R0303581450','1ZX0373R0103574417']
url = 'https://onlinetools.ups.com/rest/Track'
para1 = '...beginning of JSON request string...'
para2 = '...end of JSON request string...'
for TrackNum in TrackList:
parameters = para1+TrackNum+para2
resp = requests.post(url = url, data = parameters, verify=False)
data = json.loads(resp.text)
DelDate = data['TrackResponse']['Shipment']['Package'][0]['Activity'][0]['Date']
print(DelDate)
JSON API Response (if needed):
{
"TrackResponse": {
"Response": {
"ResponseStatus": {
"Code": "1",
"Description": "Success"
},
"TransactionReference": {
"CustomerContext": "Analytics Inquiry"
}
},
"Shipment": {
"InquiryNumber": {
"Code": "01",
"Description": "ShipmentIdentificationNumber",
"Value": "1ZX0373R0103574417"
},
"Package": {
"Activity": [
{
"ActivityLocation": {
"Address": {
"City": "OKLAHOMA CITY",
"CountryCode": "US",
"PostalCode": "73128",
"StateProvinceCode": "OK"
},
"Code": "M3",
"Description": "Front Desk",
"SignedForByName": "CUMMINGS"
},
"Date": "20190520",
"Status": {
"Code": "9E",
"Description": "Delivered",
"Type": "D"
},
"Time": "091513"
},
{
"ActivityLocation": {
"Address": {
"City": "Oklahoma City",
"CountryCode": "US",
"StateProvinceCode": "OK"
},
"Description": "Front Desk"
},
"Date": "20190520",
"Status": {
"Code": "OT",
"Description": "Out For Delivery Today",
"Type": "I"
},
"Time": "085943"
},
{
"ActivityLocation": {
"Address": {
"City": "Oklahoma City",
"CountryCode": "US",
"StateProvinceCode": "OK"
},
"Description": "Front Desk"
},
"Date": "20190520",
"Status": {
"Code": "DS",
"Description": "Destination Scan",
"Type": "I"
},
"Time": "011819"
},
{
"ActivityLocation": {
"Address": {
"City": "Oklahoma City",
"CountryCode": "US",
"StateProvinceCode": "OK"
},
"Description": "Front Desk"
},
"Date": "20190519",
"Status": {
"Code": "AR",
"Description": "Arrival Scan",
"Type": "I"
},
"Time": "235100"
},
{
"ActivityLocation": {
"Address": {
"City": "DFW Airport",
"CountryCode": "US",
"StateProvinceCode": "TX"
},
"Description": "Front Desk"
},
"Date": "20190519",
"Status": {
"Code": "DP",
"Description": "Departure Scan",
"Type": "I"
},
"Time": "195500"
},
{
"ActivityLocation": {
"Address": {
"City": "DFW Airport",
"CountryCode": "US",
"StateProvinceCode": "TX"
},
"Description": "Front Desk"
},
"Date": "20190517",
"Status": {
"Code": "OR",
"Description": "Origin Scan",
"Type": "I"
},
"Time": "192938"
},
{
"ActivityLocation": {
"Address": {
"CountryCode": "US"
},
"Description": "Front Desk"
},
"Date": "20190517",
"Status": {
"Code": "MP",
"Description": "Order Processed: Ready for UPS",
"Type": "M"
},
"Time": "184621"
}
],
"PackageWeight": {
"UnitOfMeasurement": {
"Code": "LBS"
},
"Weight": "2.00"
},
"ReferenceNumber": [
{
"Code": "01",
"Value": "8472745558"
},
{
"Code": "01",
"Value": "5637807:1007379402:BN81-17077A:1"
},
{
"Code": "01",
"Value": "5637807"
}
],
"TrackingNumber": "1ZX0373R0103574417"
},
"PickupDate": "20190517",
"Service": {
"Code": "001",
"Description": "UPS Next Day Air"
},
"ShipmentAddress": [
{
"Address": {
"AddressLine": "S 600 ROYAL LN",
"City": "COPPELL",
"CountryCode": "US",
"PostalCode": "750193827",
"StateProvinceCode": "TX"
},
"Type": {
"Code": "01",
"Description": "Shipper Address"
}
},
{
"Address": {
"City": "OKLAHOMA CITY",
"CountryCode": "US",
"PostalCode": "73128",
"StateProvinceCode": "OK"
},
"Type": {
"Code": "02",
"Description": "ShipTo Address"
}
}
],
"ShipmentWeight": {
"UnitOfMeasurement": {
"Code": "LBS"
},
"Weight": "2.00"
},
"ShipperNumber": "X0373R"
}
}
}
Below is the error I receive:
Traceback (most recent call last):
File "/Users/***/Library/Preferences/PyCharmCE2019.1/scratches/UPS_API.py", line 15, in <module>
DelDate = data['TrackResponse']['Shipment']['Package'][0]['Activity'][0]['Date']
KeyError: 0

You're trying to index "Package" at index 0, but it's an object not an array. So you should be accessing ['Package']['Activity']

just take away the [0] because there is no [1] or [2]

Build relation between elasticsearch aggregators - Nested groupings

I need to create nested groupings between fields.
Let us consider the example given below,
Documents:
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Chennai"
},
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Trichy"
},
{
"keyword": "abc",
"country": "IN",
"state": "KL",
"city": "TVM"
},
{
"keyword": "abc",
"country": "US",
"state": "Cal",
"city": "California"
}
Required output(Something like this):
{
"country": "IN",
"TN": [
"Chennai",
"Trichy"
],
"KL": [
"TVM"
]
},
{
"country": "US",
"Cal": [
"California"
]
}
Query used:
{
"from": 0,
"size": 1,
"aggs": {
"country": {
"terms": {
"field": "country.keyword",
"size": 50000
}
},
"state": {
"terms": {
"field": "state.keyword",
"size": 50000
}
},
"city": {
"terms": {
"field": "city.keyword",
"size": 50000
}
}
},
"query": {
"query_string": {
"query": "(keyword:abc) "
}
}
}
For this query I got separate bucket as output for city , state and country.
But what I need is city should be grouped under state and state should be grouped under country.
Thanks in advance.

Following query with aggregation should work for you
{
"query": {
"query_string": {
"query": "(keyword:abc)"
}
},
"size": 0,
"aggs": {
"country_agg": {
"terms": {
"field": "country.keyword",
"size": 10
},
"aggs": {
"state_agg": {
"terms": {
"field": "state.keyword",
"size": 10
},
"aggs": {
"city_agg": {
"terms": {
"field": "city.keyword",
"size": 10
}
}
}
}
}
}
}
}

Fulltext Search in arangodb using AQL and python

i have stored the data in arangodb in the following format:
{"data": [
{
"content": "maindb",
"type": "string",
"name": "db_name",
"key": "1745085839"
},
{
"type": "id",
"name": "rel",
"content": "1745085840",
"key": "1745085839"
},
{
"content": "user",
"type": "string",
"name": "rel_name",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584001",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584002",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584003",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584004",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584005",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584006",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584007",
"key": "1745085840"
},
{
"content": "dspclient",
"type": "varchar",
"name": "username",
"key": "174508584001"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584001"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584001"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584001"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584001"
},
{
"content": "n",
"name": "MySQL",
"type": "varchar",
"key": "174508584001"
},
{
"content": "n",
"type": "varchar",
"name": "SQLServer",
"key": "174508584001"
},
{
"content": "n",
"name": "Hadoop",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir1",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir2",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir3",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir4",
"type": "varchar",
"key": "174508584001"
},
{
"type": "inet",
"name": "ipaddr",
"content": "1921680103",
"key": "174508584001"
},
{
"content": "y",
"name": "status",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logintime",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logindate",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logouttime",
"key": "174508584001"
},
{
"content": "client",
"type": "varchar",
"name": "user_type",
"key": "174508584001"
},
{
"content": "royal",
"type": "varchar",
"name": "username",
"key": "174508584002"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584002"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584002"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584002"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584002"
},
{
"content": "n",
"name": "MySQL",
"type": "varchar",
"key": "174508584002"
},
{
"content": "n",
"type": "varchar",
"name": "SQLServer",
"key": "174508584002"
},
{
"content": "n",
"name": "Hadoop",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir1",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir2",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir3",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir4",
"type": "varchar",
"key": "174508584002"
},
{
"type": "inet",
"name": "ipaddr",
"content": "1921680105",
"key": "174508584002"
},
{
"content": "y",
"name": "status",
"type": "varchar",
"key": "174508584002"
},
{
"content": "190835899000",
"type": "varchar",
"name": "logintime",
"key": "174508584002"
},
{
"content": "20151002",
"type": "varchar",
"name": "logindate",
"key": "174508584002"
},
{
"content": "None",
"type": "varchar",
"name": "logouttime",
"key": "174508584002"
},
{
"content": "client",
"type": "varchar",
"name": "user_type",
"key": "174508584002"
},
{
"content": "abc",
"type": "varchar",
"name": "username",
"key": "174508584003"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584003"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584003"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584003"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584003"
}]}
In order to perform fulltext search, I have created an index on content attribute by using the syntax from a python script:
c.DSP.ensureFulltextIndex("content");
Where, c is database, and DSP is the collection name. Now, I am trying to perform a search operation in the above data set by using the syntax:
FOR doc IN FULLTEXT(DSP, "content", "username") RETURN doc
Then, an error occure:
[1571] in function 'FULLTEXT()': no suitable fulltext index found for fulltext query on 'DSP' (while executing)
Please tell me the problem, and also tell me what will be the syntax when i will try this query with a python script.
Thanks...

Working with the 10 minutes tutorial and the driver documentation
I got it working like this:
from pyArango.connection import *
c = Connection()
db = c.createDatabase(name = "testdb")
DSP= db.createCollection(name = "DSP")
DSP.ensureFulltextIndex(fields=["content"])
doc = DSP.createDocument({"content": "test bla"})
doc.save()
print db.AQLQuery('''FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc''', 10)
Resulting in:
[{u'_key': u'1241175138503', u'content': u'test bla', u'_rev': u'1241175138503', u'_id': u'DSP/1241175138503'}]
I've used arangosh to revalidate the steps from the python prompt:
arangosh> db._useDatabase("testdb")
arangosh [testdb]> db.DSP.getIndexes()
[
{
"id" : "DSP/0",
"type" : "primary",
"fields" : [
"_key"
],
"selectivityEstimate" : 1,
"unique" : true,
"sparse" : false
},
{
"id" : "DSP/1241140928711",
"type" : "hash",
"fields" : [
"content"
],
"selectivityEstimate" : 1,
"unique" : false,
"sparse" : true
},
{
"id" : "DSP/1241142960327",
"type" : "fulltext",
"fields" : [
"content"
],
"unique" : false,
"sparse" : true,
"minLength" : 2
}
]
arangosh [testdb]> db.testdb.toArray()
[
{
"content" : "test bla",
"_id" : "DSP/1241175138503",
"_rev" : "1241175138503",
"_key" : "1241175138503"
}
]
db._query('FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Explode json without pandas - python

Related

extract aliases from wikidata dump using python

How to use S3 Select for Nested Parquet Objects

Why am I receiving an error when attempting to parse JSON object within for loop?

Build relation between elasticsearch aggregators - Nested groupings

Fulltext Search in arangodb using AQL and python

Categories

Resources