avro-python3 doesn't provide schema evolution? - python

I try to recreate a schema evolution case with avro-python3 (backward compatibility).
I have two schemas:
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
schema_v1 = avro.schema.Parse("""
{
"type": "record",
"namespace": "com.example",
"name": "CustomerV1",
"fields": [
{ "name": "first_name", "type": "string", "doc": "First Name of Customer" },
{ "name": "last_name", "type": "string", "doc": "Last Name of Customer" },
{ "name": "age", "type": "int", "doc": "Age at the time of registration" },
{ "name": "height", "type": "float", "doc": "Height at the time of registration in cm" },
{ "name": "weight", "type": "float", "doc": "Weight at the time of registration in kg" },
{ "name": "automated_email", "type": "boolean", "default": true, "doc": "Field indicating if the user is enrolled in marketing emails" }
]
}
""")
schema_v2 = avro.schema.Parse("""
{
"type": "record",
"namespace": "com.example",
"name": "CustomerV2",
"fields": [
{ "name": "first_name", "type": "string", "doc": "First Name of Customer" },
{ "name": "last_name", "type": "string", "doc": "Last Name of Customer" },
{ "name": "age", "type": "int", "doc": "Age at the time of registration" },
{ "name": "height", "type": "float", "doc": "Height at the time of registration in cm" },
{ "name": "weight", "type": "float", "doc": "Weight at the time of registration in kg" },
{ "name": "phone_number", "type": ["null", "string"], "default": null, "doc": "optional phone number"},
{ "name": "email", "type": "string", "default": "missing#example.com", "doc": "email address"}
]
}
""")
The second schema doesn't have automated_email field but has two additional fields: phone_number and email.
According to avro schema evolution rules if I write an avro record with schema_v1:
writer = DataFileWriter(open("customer_v1.avro", "wb"), DatumWriter(), schema_v1)
writer.append({
"first_name": "John",
"last_name": "Doe",
"age" : 34,
"height": 178.0,
"weight": 75.0,
"automated_email": True
})
writer.close()
... i can read it with schema_v2 provided there are default values for non-existing fields
reader = DataFileReader(open("customer_v1.avro", "rb"), DatumReader(reader_schema=schema_v2))
for field in reader:
print(field)
reader.close()
But I get the following error
SchemaResolutionException: Schemas do not match.
I know this works in Java. This is an example from a video course.
Is there a way to make it work in python?

fastavro, an alternative python implementation, handles this just fine.
The code to write with the first schema is here:
s1 = {
"type": "record",
"namespace": "com.example",
"name": "CustomerV1",
"fields": [
{"name": "first_name", "type": "string", "doc": "First Name of Customer"},
{"name": "last_name", "type": "string", "doc": "Last Name of Customer"},
{"name": "age", "type": "int", "doc": "Age at the time of registration"},
{
"name": "height",
"type": "float",
"doc": "Height at the time of registration in cm",
},
{
"name": "weight",
"type": "float",
"doc": "Weight at the time of registration in kg",
},
{
"name": "automated_email",
"type": "boolean",
"default": True,
"doc": "Field indicating if the user is enrolled in marketing emails",
},
],
}
record = {
"first_name": "John",
"last_name": "Doe",
"age": 34,
"height": 178.0,
"weight": 75.0,
"automated_email": True,
}
import fastavro
with open("test.avro", "wb") as fp:
fastavro.writer(fp, fastavro.parse_schema(s1), [record])
And to read with the second schema:
s2 = {
"type": "record",
"namespace": "com.example",
"name": "CustomerV2",
"fields": [
{"name": "first_name", "type": "string", "doc": "First Name of Customer"},
{"name": "last_name", "type": "string", "doc": "Last Name of Customer"},
{"name": "age", "type": "int", "doc": "Age at the time of registration"},
{
"name": "height",
"type": "float",
"doc": "Height at the time of registration in cm",
},
{
"name": "weight",
"type": "float",
"doc": "Weight at the time of registration in kg",
},
{
"name": "phone_number",
"type": ["null", "string"],
"default": None,
"doc": "optional phone number",
},
{
"name": "email",
"type": "string",
"default": "missing#example.com",
"doc": "email address",
},
],
}
import fastavro
with open("test.avro", "rb") as fp:
for record in fastavro.reader(fp, fastavro.parse_schema(s2)):
print(record)
The output as the new fields as expected:
{'first_name': 'John', 'last_name': 'Doe', 'age': 34, 'height': 178.0, 'weight': 75.0, 'phone_number': None, 'email': 'missing#example.com'}

If you change the second schema from CustomerV2 to CustomerV1 it works with avro-python3 version 1.10.0.

Related

Validating Avro schema that is referencing another schema

I am using the Python 3 avro_validator library.
The schema I want to validate references other schemas in sperate avro files. The files are in the same folder. How do I compile all the referenced schemas using the library?
Python code as follows:
from avro_validator.schema import Schema
schema_file = 'basketEvent.avsc'
schema = Schema(schema_file)
parsed_schema = schema.parse()
data_to_validate = {"test": "test"}
parsed_schema.validate(data_to_validate)
The error I get back:
ValueError: Error parsing the field [contentBasket]: The type [ContentBasket] is not recognized by Avro
And example Avro file(s) below:
basketEvent.avsc
{
"type": "record",
"name": "BasketEvent",
"doc": "Indicates that a user action has taken place with a basket",
"fields": [
{
"default": "basket",
"doc": "Restricts this event to having type = basket",
"name": "event",
"type": {
"name": "BasketEventType",
"symbols": ["basket"],
"type": "enum"
}
},
{
"default": "create",
"doc": "What is being done with the basket. Note: create / delete / update will always follow a product event",
"name": "action",
"type": {
"name": "BasketEventAction",
"symbols": ["create","delete","update","view"],
"type": "enum"
}
},
{
"default": "ContentBasket",
"doc": "The set of values that are specific to a Basket event",
"name": "contentBasket",
"type": "ContentBasket"
},
{
"default": "ProductDetail",
"doc": "The set of values that are specific to a Product event",
"name": "productDetail",
"type": "ProductDetail"
},
{
"default": "Timestamp",
"doc": "The time stamp for the event being sent",
"name": "timestamp",
"type": "Timestamp"
}
]
}
contentBasket.avsc
{
"name": "ContentBasket",
"type": "record",
"doc": "The set of values that are specific to a Basket event",
"fields": [
{
"default": [],
"doc": "A range of details about product / basket availability",
"name": "availability",
"type": {
"type": "array",
"items": "Availability"
}
},
{
"default": [],
"doc": "A range of care pland applicable to the basket",
"name": "carePlan",
"type": {
"type": "array",
"items": "CarePlan"
}
},
{
"default": "Category",
"name": "category",
"type": "Category"
},
{
"default": "",
"doc": "Unique identfier for this basket",
"name": "id",
"type": "string"
},
{
"default": "Price",
"doc": "Overall pricing info about the basket as a whole - individual product pricings will be dealt with at a product level",
"name": "price",
"type": "Price"
}
]
}
availability.avsc
{
"name": "Availability",
"type": "record",
"doc": "A range of values relating to the availability of a product",
"fields": [
{
"default": [],
"doc": "A list of offers associated with the overall basket - product level offers will be dealt with on an individual product basis",
"name": "shipping",
"type": {
"type": "array",
"items": "Shipping"
}
},
{
"default": "",
"doc": "The status of the product",
"name": "stockStatus",
"type": {
"name": "StockStatus",
"symbols": ["in stock","out of stock",""],
"type": "enum"
}
},
{
"default": "",
"doc": "The ID for the store when the stock can be collected, if relevant",
"name": "storeId",
"type": "string"
},
{
"default": "",
"doc": "The status of the product",
"name": "type",
"type": {
"name": "AvailabilityType",
"symbols": ["collection","shipping",""],
"type": "enum"
}
}
]
}
maxDate.avsc
{
"type": "record",
"name": "MaxDate",
"doc": "Indicates the timestamp for latest day a delivery should be made",
"fields": [
{
"default": "Timestamp",
"doc": "The time stamp for the delivery",
"name": "timestamp",
"type": "Timestamp"
}
]
}
minDate.avsc
{
"type": "record",
"name": "MinDate",
"doc": "Indicates the timestamp for earliest day a delivery should be made",
"fields": [
{
"default": "Timestamp",
"doc": "The time stamp for the delivery",
"name": "timestamp",
"type": "Timestamp"
}
]
}
shipping.avsc
{
"name": "Shipping",
"type": "record",
"doc": "A range of values relating to shipping a product for delivery",
"fields": [
{
"default": "MaxDate",
"name": "maxDate",
"type": "MaxDate"
},
{
"default": "MinDate",
"name": "minDate",
"type": "minDate"
},
{
"default": 0,
"doc": "Revenue generated from shipping - note, once a specific shipping object is selected, the more detailed revenye data sits within the one of object in pricing - this is more just to define if shipping is free or not",
"name": "revenue",
"type": "int"
},
{
"default": "",
"doc": "The shipping supplier",
"name": "supplier",
"type": "string"
}
]
}
timestamp.avsc
{
"name": "Timestamp",
"type": "record",
"doc": "Timestamp for the action taking place",
"fields": [
{
"default": 0,
"name": "timestampMs",
"type": "long"
},
{
"default": "",
"doc": "Timestamp converted to a string in ISO format",
"name": "isoTimestamp",
"type": "string"
}
]
}
I'm not sure if that library supports what you are trying to do, but fastavro should.
If you put the first schema in a file called BasketEvent.avsc and the second schema in a file called ContentBasket.avsc then you can do the following:
from fastavro.schema import load_schema
from fastavro import validate
schema = load_schema("BasketEvent.avsc")
validate({"test": "test"}, schema)
Note that when I tried to do this I got an error of fastavro._schema_common.UnknownType: Availability because it seems that there are other referenced schemas that you haven't posted here.

How to write a json web response to a csv file in python?

Here is a schema of the json output which I am trying to parse and write specific fields from it into a csv file (Example: cve id, description,....)
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "JSON Schema for NVD Vulnerability Data Feed version 1.1",
"id": "https://scap.nist.gov/schema/nvd/feed/1.1/nvd_cve_feed_json_1.1.schema",
"definitions": {
"def_cpe_name": {
"description": "CPE name",
"type": "object",
"properties": {
"cpe22Uri": {
"type": "string"
},
"cpe23Uri": {
"type": "string"
},
"lastModifiedDate": {
"type": "string"
}
},
"required": [
"cpe23Uri"
]
},
"def_cpe_match": {
"description": "CPE match string or range",
"type": "object",
"properties": {
"vulnerable": {
"type": "boolean"
},
"cpe22Uri": {
"type": "string"
},
"cpe23Uri": {
"type": "string"
},
"versionStartExcluding": {
"type": "string"
},
"versionStartIncluding": {
"type": "string"
},
"versionEndExcluding": {
"type": "string"
},
"versionEndIncluding": {
"type": "string"
},
"cpe_name": {
"type": "array",
"items": {
"$ref": "#/definitions/def_cpe_name"
}
}
},
"required": [
"vulnerable",
"cpe23Uri"
]
},
"def_node": {
"description": "Defines a node or sub-node in an NVD applicability statement.",
"properties": {
"operator": {"type": "string"},
"negate": {"type": "boolean"},
"children": {
"type": "array",
"items": {"$ref": "#/definitions/def_node"}
},
"cpe_match": {
"type": "array",
"items": {"$ref": "#/definitions/def_cpe_match"}
}
}
},
"def_configurations": {
"description": "Defines the set of product configurations for a NVD applicability statement.",
"properties": {
"CVE_data_version": {"type": "string"},
"nodes": {
"type": "array",
"items": {"$ref": "#/definitions/def_node"}
}
},
"required": [
"CVE_data_version"
]
},
"def_subscore": {
"description": "CVSS subscore.",
"type": "number",
"minimum": 0,
"maximum": 10
},
"def_impact": {
"description": "Impact scores for a vulnerability as found on NVD.",
"type": "object",
"properties": {
"baseMetricV3": {
"description": "CVSS V3.x score.",
"type": "object",
"properties": {
"cvssV3": {"$ref": "cvss-v3.x.json"},
"exploitabilityScore": {"$ref": "#/definitions/def_subscore"},
"impactScore": {"$ref": "#/definitions/def_subscore"}
}
},
"baseMetricV2": {
"description": "CVSS V2.0 score.",
"type": "object",
"properties": {
"cvssV2": {"$ref": "cvss-v2.0.json"},
"severity": {"type": "string"},
"exploitabilityScore": {"$ref": "#/definitions/def_subscore"},
"impactScore": {"$ref": "#/definitions/def_subscore"},
"acInsufInfo": {"type": "boolean"},
"obtainAllPrivilege": {"type": "boolean"},
"obtainUserPrivilege": {"type": "boolean"},
"obtainOtherPrivilege": {"type": "boolean"},
"userInteractionRequired": {"type": "boolean"}
}
}
}
},
"def_cve_item": {
"description": "Defines a vulnerability in the NVD data feed.",
"properties": {
"cve": {"$ref": "CVE_JSON_4.0_min_1.1.schema"},
"configurations": {"$ref": "#/definitions/def_configurations"},
"impact": {"$ref": "#/definitions/def_impact"},
"publishedDate": {"type": "string"},
"lastModifiedDate": {"type": "string"}
},
"required": ["cve"]
}
},
"type": "object",
"properties": {
"CVE_data_type": {"type": "string"},
"CVE_data_format": {"type": "string"},
"CVE_data_version": {"type": "string"},
"CVE_data_numberOfCVEs": {
"description": "NVD adds number of CVE in this feed",
"type": "string"
},
"CVE_data_timestamp": {
"description": "NVD adds feed date timestamp",
"type": "string"
},
"CVE_Items": {
"description": "NVD feed array of CVE",
"type": "array",
"items": {"$ref": "#/definitions/def_cve_item"}
}
},
"required": [
"CVE_data_type",
"CVE_data_format",
"CVE_data_version",
"CVE_Items"
]
}
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 3 17:08:51 2020
#author: Rajat Varshney
"""
import requests, json
api_url = 'https://services.nvd.nist.gov/rest/json/cve/1.0/'
cveid = input('Enter CVE ID: ')
api_call = requests.get(api_url+cveid)
print(api_call.content)
with open('cve details.txt', 'w') as outfile:
json.dump(api_call.content, outfile)

append multiple json files together and ouptut 1 Avro file using Python

I have a use case where I am required to append multiple json files and then convert them into 1 single Avro file. I have written the code below which appends the json files together and then convert them into AVRO file. But the issue I am having is that the JSON file gets appended but the entore JSON is enclosed in [] brackets and so I get error while converting it into AVRO file. I am trying to figure out how can I get rid of the [] from the first and the last line in JSON file? Any help is appreciated.
The error I am getting is (snippet of the error, error is too long to paste : avro.io.AvroTypeException: The datum [{'event_type': 'uplink'.....}] is not an example of the schema
My code:
Laird.py
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
from avro import schema, datafile, io
import json
from datetime import date
import glob
data = []
for f in glob.glob("*.txt"):
with open(f,) as infile:
data.append(json.load(infile))
# json.dumps(data)
with open("laird.json",'w') as outfile:
json.dump(data, outfile)
def json_to_avro():
fo = open("laird.json", "r")
data = fo.readlines()
final_header = []
final_rec = []
for header in data[0:1]:
header = header.strip("\n")
header = header.split(",")
final_header = header
for rec in data[1:]:
rec = rec.strip("\n")
rec = rec.split(" ")
rec = ' '.join(rec).split()
final_rec = rec
final_dict = dict(zip(final_header,final_rec))
# print(final_dict)
json_dumps = json.dumps(final_dict, ensure_ascii=False)
# print(json_dumps)
schema = avro.schema.parse(open("laird.avsc", "rb").read())
# print(schema)
writer = DataFileWriter(open("laird.avro", "wb"), DatumWriter(), schema)
with open("laird.json") as fp:
contents = json.load(fp)
print(contents)
writer.append(contents)
writer.close()
json_to_avro()
#Script to read/convert AVRO file to JSON
reader = DataFileReader(open("laird.avro", "rb"), DatumReader())
for user in reader:
print(user)
reader.close()
Schema: lair.avsc
{
"name": "MyClass",
"type": "record",
"namespace": "com.acme.avro",
"fields": [
{
"name": "event_type",
"type": "string"
},
{
"name": "event_data",
"type": {
"name": "event_data",
"type": "record",
"fields": [
{
"name": "device_id",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "payload",
"type": {
"type": "array",
"items": {
"name": "payload_record",
"type": "record",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "sensor_id",
"type": "string"
},
{
"name": "type",
"type": "string"
},
{
"name": "unit",
"type": "string"
},
{
"name": "value",
"type": "float"
},
{
"name": "channel",
"type": "int"
},
{
"name": "timestamp",
"type": "long"
}
]
}
}
},
{
"name": "client_id",
"type": "string"
},
{
"name": "hardware_id",
"type": "string"
},
{
"name": "timestamp",
"type": "long"
},
{
"name": "application_id",
"type": "string"
},
{
"name": "device_type_id",
"type": "string"
}
]
}
},
{
"name": "company",
"type": {
"name": "company",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "address",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "country",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "industry",
"type": "string"
},
{
"name": "latitude",
"type": "float"
},
{
"name": "longitude",
"type": "float"
},
{
"name": "name",
"type": "string"
},
{
"name": "state",
"type": "string"
},
{
"name": "status",
"type": "int"
},
{
"name": "timezone",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "zip",
"type": "string"
}
]
}
},
{
"name": "location",
"type": {
"name": "location",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "address",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "country",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "industry",
"type": "string"
},
{
"name": "latitude",
"type": "float"
},
{
"name": "longitude",
"type": "float"
},
{
"name": "name",
"type": "string"
},
{
"name": "state",
"type": "string"
},
{
"name": "status",
"type": "int"
},
{
"name": "timezone",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "zip",
"type": "string"
},
{
"name": "company_id",
"type": "int"
}
]
}
},
{
"name": "device_type",
"type": {
"name": "device_type",
"type": "record",
"fields": [
{
"name": "id",
"type": "string"
},
{
"name": "application_id",
"type": "string"
},
{
"name": "category",
"type": "string"
},
{
"name": "codec",
"type": "string"
},
{
"name": "data_type",
"type": "string"
},
{
"name": "description",
"type": "string"
},
{
"name": "manufacturer",
"type": "string"
},
{
"name": "model",
"type": "string"
},
{
"name": "name",
"type": "string"
},
{
"name": "parent_constraint",
"type": "string"
},
{
"name": "proxy_handler",
"type": "string"
},
{
"name": "subcategory",
"type": "string"
},
{
"name": "transport_protocol",
"type": "string"
},
{
"name": "version",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
}
]
}
},
{
"name": "device",
"type": {
"name": "device",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "thing_name",
"type": "string"
},
{
"name": "created_at",
"type": "string"
},
{
"name": "updated_at",
"type": "string"
},
{
"name": "status",
"type": "int"
}
]
}
}
]
}
Generated JSON File: laird.json
[{"event_type": "uplink", "event_data": {"device_id": "42934500-fcfb-11ea-9f13-d1d0271289a6", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "42abaf00-fcfb-11ea-9c71-c517ac227ea5", "type": "rel_hum", "unit": "p", "value": 94.29, "channel": 4, "timestamp": 1605007797789}, {"name": "Temperature", "sensor_id": "42b0df20-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "temp", "unit": "c", "value": 21.64, "channel": 3, "timestamp": 1605007797789}, {"name": "Battery", "sensor_id": "42a98c20-fcfb-11ea-b4dd-cd2887a335f7", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1605007797789}, {"name": "Local Backup", "sensor_id": "42b01bd0-fcfb-11ea-9f13-d1d0271289a6", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1605007797789}, {"name": "RSSI", "sensor_id": "42b39e40-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "rssi", "unit": "dbm", "value": -53, "channel": 100, "timestamp": 1605007797789}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 10.2, "channel": 101, "timestamp": 1605007797789}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000e232", "timestamp": 1605007797789, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9153, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-18T02:08:03Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Van Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T02:08:03Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 269231, "thing_name": "Van 18-1775 (Ambient)", "created_at": "2020-09-22T17:44:27Z", "updated_at": "2020-09-25T22:39:57Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "7de32cf0-f9d2-11ea-b4dd-cd2887a335f7", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "7dfbbe00-f9d2-11ea-9c71-c517ac227ea5", "type": "rel_hum", "unit": "p", "value": 0, "channel": 4, "timestamp": 1604697684139}, {"name": "Temperature", "sensor_id": "7dfb48d0-f9d2-11ea-9c71-c517ac227ea5", "type": "temp", "unit": "c", "value": -27.22, "channel": 3, "timestamp": 1604697684139}, {"name": "Battery", "sensor_id": "7dfa5e70-f9d2-11ea-bf5c-d11ce3dbc1cb", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1604697684139}, {"name": "Local Backup", "sensor_id": "7dfb96f0-f9d2-11ea-b4dd-cd2887a335f7", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1604697684139}, {"name": "RSSI", "sensor_id": "7dfc5a40-f9d2-11ea-b4dd-cd2887a335f7", "type": "rssi", "unit": "dbm", "value": -7, "channel": 100, "timestamp": 1604697684139}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 10, "channel": 101, "timestamp": 1604697684139}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000be6a", "timestamp": 1604697684139, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9080, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:46:07Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Cooler Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T14:17:28Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 268369, "thing_name": "Cooler F-0201-AH", "created_at": "2020-09-18T17:15:04Z", "updated_at": "2020-09-25T22:39:57Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "1c5c66f0-fcfb-11ea-8ae3-2ffdc909c57b", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "1c7a4f30-fcfb-11ea-8ae3-2ffdc909c57b", "type": "rel_hum", "unit": "p", "value": 81.22, "channel": 4, "timestamp": 1605148608302}, {"name": "Temperature", "sensor_id": "1c793dc0-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "temp", "unit": "c", "value": 24.47, "channel": 3, "timestamp": 1605148608302}, {"name": "Battery", "sensor_id": "1c76a5b0-fcfb-11ea-bf5c-d11ce3dbc1cb", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1605148608302}, {"name": "Local Backup", "sensor_id": "1c73e690-fcfb-11ea-9c71-c517ac227ea5", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1605148608302}, {"name": "RSSI", "sensor_id": "1c780540-fcfb-11ea-b4dd-cd2887a335f7", "type": "rssi", "unit": "dbm", "value": -14, "channel": 100, "timestamp": 1605148608302}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 8.8, "channel": 101, "timestamp": 1605148608302}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000e1e3", "timestamp": 1605148608302, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9153, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-18T02:08:03Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Van Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T02:08:03Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 269213, "thing_name": "Van 19-1800 (Ambient)", "created_at": "2020-09-22T17:43:23Z", "updated_at": "2020-09-25T22:39:56Z", "status": 0}}, {"event_type": "uplink", "event_data": {"device_id": "851fd480-f70e-11ea-9f13-d1d0271289a6", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "payload": [{"name": "Humidity", "sensor_id": "85411820-f70e-11ea-8ae3-2ffdc909c57b", "type": "rel_hum", "unit": "p", "value": 49.52, "channel": 4, "timestamp": 1604558153188}, {"name": "Temperature", "sensor_id": "853f9180-f70e-11ea-9f13-d1d0271289a6", "type": "temp", "unit": "c", "value": 20.52, "channel": 3, "timestamp": 1604558153188}, {"name": "Battery", "sensor_id": "85429ec0-f70e-11ea-9621-a51b22d5dc1d", "type": "batt", "unit": "p", "value": 100, "channel": 5, "timestamp": 1604558153188}, {"name": "Local Backup", "sensor_id": "853f4360-f70e-11ea-9f13-d1d0271289a6", "type": "digital_sensor", "unit": "d", "value": 1, "channel": 400, "timestamp": 1604558153188}, {"name": "RSSI", "sensor_id": "8543b030-f70e-11ea-8ae3-2ffdc909c57b", "type": "rssi", "unit": "dbm", "value": -91, "channel": 100, "timestamp": 1604558153188}, {"name": "SNR", "sensor_id": "", "type": "snr", "unit": "db", "value": 8.5, "channel": 101, "timestamp": 1604558153188}], "client_id": "b8468c50-baf0-11ea-a5e9-89c3b09de43a", "hardware_id": "0025ca0a0000be5b", "timestamp": 1604558153188, "application_id": "shipcomwireless", "device_type_id": "70776630-e15e-11ea-a8c9-05cd631755a5"}, "company": {"id": 7696, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:44:50Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "Harris Health System - Production", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-15T03:34:58Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054"}, "location": {"id": 9080, "address": "9240 Kirby Dr", "city": "Houston", "country": "United States", "created_at": "2020-09-11T18:46:07Z", "industry": "[\"Health Care\"]", "latitude": 29.671324, "longitude": -95.415535, "name": "HHS Cooler Sensors", "state": "TX", "status": 0, "timezone": "America/Chicago", "updated_at": "2020-09-18T14:17:28Z", "user_id": "a5d78945-9f24-48a1-9107-5bee62bf007a", "zip": "77054", "company_id": 7696}, "device_type": {"id": "70776630-e15e-11ea-a8c9-05cd631755a5", "application_id": "", "category": "module", "codec": "lorawan.laird.rs1xx-backup", "data_type": "", "description": "Temp Sensor", "manufacturer": "Laird", "model": "RS1xx", "name": "Laird Temp & Humidity with Local Backup", "parent_constraint": "NOT_ALLOWED", "proxy_handler": "PrometheusClient", "subcategory": "lora", "transport_protocol": "lorawan", "version": "", "created_at": "2020-08-18T14:23:51Z", "updated_at": "2020-08-18T18:16:37Z"}, "device": {"id": 265040, "thing_name": "Cooler R-0306-PHAR", "created_at": "2020-09-15T04:47:12Z", "updated_at": "2020-09-25T22:39:54Z", "status": 0}}]
contents is a list of records but the writer.append expects a single record, so you iterate over your records and append them one by one.
You just need to change:
writer.append(contents)
to:
for record in contents:
writer.append(record)

JSON schema testing

I am developing a json schema and I am trying to test if files validate against it properly. Still new to the whole json schema world (since today), apologies if my terminology is not correct.
I have different types of files, and they will differ with regards to their biomaterial_type. Each of them should be tested for "#/definitions/basic", some of them for "#/definitions/donor", and they all will have unique fields to test for.
Here is a (shortened) example containing one biomaterial_type:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"basic": {
"type": "object",
"description": "Objects shared across all samples",
"properties": {
"sample_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: EFO) links to sample ontology information."}},
"disease_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: NCIM)"}},
"disease" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Free form field "}},
"biomaterial_provider" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The name of the company, laboratory or person that provided the biological material."}},
"biomaterial_type" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The type of the biosample used (Cell Line, Primary Cell, Primary Cell Culture, Primary Tissue)",
"enum":["Cell Line", "Primary Cell", "Primary Cell Culture", "Primary Tissue"]}},
"treatment" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Any artificial modification (differentiation, activation, genome editing, etc)."}},
"biological_replicates": {
"type": "array",
"items": {
"type": "string",
"description": "List of biological replicate sample accessions"}}
},
"required": ["sample_ontology_curie", "disease_ontology_curie", "disease", "biomaterial_provider", "biomaterial_type", "treatment", "biological_replicates"]
},
"donor": {
"type": "object",
"description": "Additional set of properties for samples coming from a donor.",
"properties": {
"donor_id" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "An identifying designation for the donor that provided the cells/tissues."}},
"donor_age" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"description": "The age of the donor that provided the cells/tissues. NA if not available. If over 90 years enter as 90+. If entering a range of ages use the format “{age}-{age}”.",
"oneOf": [
{ "type": "number" },
{ "type": "string", "enum": ["90+", "NA"] },
{ "type": "string", "format": "uri" }
]
}},
"donor_age_unit" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The unit of measurement used to describe the age of the sample (year, month, week, day)",
"enum": ["year", "month", "week", "day"]}},
"donor_life_stage": {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The stage or phase of the donor when the sample was taken (embryonic, fetal, postnatal, newborn, child, adult, unknown)",
"enum": ["embryonic", "fetal", "postnatal", "newborn", "child", "adult", "unknown"]}},
"donor_health_status" : {
"type": "array", "minItems": 1, "maxItems": 1, "items": {
"type": "string",
"description": "The health status of the donor that provided the primary cell. NA if not available."}},
"donor_health_status_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: NCIM) "}},
"donor_sex" : {"type": "array", "minItems": 1, "maxItems": 1, "items": {"type": "string", "enum": ["Male", "Female", "Unknown", "Mixed"], "description": "'Male', 'Female', 'Unknown', or 'Mixed' for pooled samples."}},
"donor_ethnicity" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The ethnicity of the donor that provided the primary cell. NA if not available. If dealing with small/vulnerable populations consider identifiability issues."}}
},
"required": ["donor_id", "donor_age", "donor_age_unit", "donor_life_stage", "donor_health_status_uri", "donor_health_status", "donor_sex", "donor_ethnicity"]
}
},
"type" : "object",
"if":
{"properties":
{ "biomaterial_type": {"const": "Primary Tissue"}},
"required": ["biomaterial_type"] },
"then": {
"allOf": [
{ "$ref": "#/definitions/donor" },
{
"properties": {
"tissue_type" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The type of tissue."}},
"tissue_depot" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Details about the anatomical location from which the primary tissue was collected."}},
"collection_method" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The protocol for collecting the primary tissue."}}
},
"required": ["tissue_type", "tissue_depot", "collection_method"]
}
]
}
}
Additional biomaterial_type will be added via additional if-conditions.
Here is a example json:
{
"SAMPLE_SET": {
"SAMPLE": [
{
"TITLE": "Homo sapiens male embryo (108 days) small intestine tissue",
"SAMPLE_NAME": {
"TAXON_ID": "9606",
"SCIENTIFIC_NAME": "Homo sapiens",
"COMMON_NAME": "human"
},
"SAMPLE_ATTRIBUTES": {
"SAMPLE_ATTRIBUTE": [
{
"TAG": "SAMPLE_ONTOLOGY_URI",
"VALUE": "http://purl.obolibrary.org/obo/UBERON:0002108"
},
{
"TAG": "DISEASE_ONTOLOGY_URI",
"VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
},
{
"TAG": "DISEASE",
"VALUE": "Healthy"
},
{
"TAG": "BIOMATERIAL_PROVIDER",
"VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
},
{
"TAG": "BIOMATERIAL_TYPE",
"VALUE": "Primary Tissue"
},
{
"TAG": "TISSUE_TYPE",
"VALUE": "small intestine"
},
{
"TAG": "TISSUE_DEPOT",
"VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
},
{
"TAG": "COLLECTION_METHOD",
"VALUE": "unknown"
},
{
"TAG": "DONOR_ID",
"VALUE": "ENCDO119ASK"
},
{
"TAG": "DONOR_AGE",
"VALUE": "NA"
},
{
"TAG": "DONOR_AGE_UNIT",
"VALUE": "day"
},
{
"TAG": "DONOR_LIFE_STAGE",
"VALUE": "embryonic"
},
{
"TAG": "DONOR_HEALTH_STATUS_ONTOLOGY_URI",
"VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
},
{
"TAG": "DONOR_HEALTH_STATUS",
"VALUE": "Healthy"
},
{
"TAG": "DONOR_SEX",
"VALUE": "Male"
},
{
"TAG": "DONOR_ETHNICITY",
"VALUE": "NA"
}
]
},
"_accession": "ENCBS054KUO",
"_center_name": "ENCODE"
},
]
}
}
I am trying to test if schema makes sense using jsonschema with python:
import json
import jsonschema
from jsonschema import validate
data = ''
schema = ''
with open('data.json', 'r') as file:
data = file.read()
with open(schema.json, 'r') as file:
schema = file.read()
try:
jsonschema.validate(json.loads(data), json.loads(schema))
print('ok')
except jsonschema.ValidationError as e:
print (e.message)
except jsonschema.SchemaError as e:
print (e)
I always get "ok", even if I provide json data with errors.
Is the problem with my Python script or with my schema?
Thanks for any pointers.

Fulltext Search in arangodb using AQL and python

i have stored the data in arangodb in the following format:
{"data": [
{
"content": "maindb",
"type": "string",
"name": "db_name",
"key": "1745085839"
},
{
"type": "id",
"name": "rel",
"content": "1745085840",
"key": "1745085839"
},
{
"content": "user",
"type": "string",
"name": "rel_name",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584001",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584002",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584003",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584004",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584005",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584006",
"key": "1745085840"
},
{
"type": "id",
"name": "tuple",
"content": "174508584007",
"key": "1745085840"
},
{
"content": "dspclient",
"type": "varchar",
"name": "username",
"key": "174508584001"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584001"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584001"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584001"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584001"
},
{
"content": "n",
"name": "MySQL",
"type": "varchar",
"key": "174508584001"
},
{
"content": "n",
"type": "varchar",
"name": "SQLServer",
"key": "174508584001"
},
{
"content": "n",
"name": "Hadoop",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir1",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir2",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir3",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"name": "dir4",
"type": "varchar",
"key": "174508584001"
},
{
"type": "inet",
"name": "ipaddr",
"content": "1921680103",
"key": "174508584001"
},
{
"content": "y",
"name": "status",
"type": "varchar",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logintime",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logindate",
"key": "174508584001"
},
{
"content": "None",
"type": "varchar",
"name": "logouttime",
"key": "174508584001"
},
{
"content": "client",
"type": "varchar",
"name": "user_type",
"key": "174508584001"
},
{
"content": "royal",
"type": "varchar",
"name": "username",
"key": "174508584002"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584002"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584002"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584002"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584002"
},
{
"content": "n",
"name": "MySQL",
"type": "varchar",
"key": "174508584002"
},
{
"content": "n",
"type": "varchar",
"name": "SQLServer",
"key": "174508584002"
},
{
"content": "n",
"name": "Hadoop",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir1",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir2",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir3",
"type": "varchar",
"key": "174508584002"
},
{
"content": "None",
"name": "dir4",
"type": "varchar",
"key": "174508584002"
},
{
"type": "inet",
"name": "ipaddr",
"content": "1921680105",
"key": "174508584002"
},
{
"content": "y",
"name": "status",
"type": "varchar",
"key": "174508584002"
},
{
"content": "190835899000",
"type": "varchar",
"name": "logintime",
"key": "174508584002"
},
{
"content": "20151002",
"type": "varchar",
"name": "logindate",
"key": "174508584002"
},
{
"content": "None",
"type": "varchar",
"name": "logouttime",
"key": "174508584002"
},
{
"content": "client",
"type": "varchar",
"name": "user_type",
"key": "174508584002"
},
{
"content": "abc",
"type": "varchar",
"name": "username",
"key": "174508584003"
},
{
"content": "12345",
"type": "varchar",
"name": "password",
"key": "174508584003"
},
{
"content": "12345",
"type": "varchar",
"name": "cpassword",
"key": "174508584003"
},
{
"content": "n",
"type": "varchar",
"name": "PostgreSQL",
"key": "174508584003"
},
{
"content": "n",
"name": "IBMDB2",
"type": "varchar",
"key": "174508584003"
}]}
In order to perform fulltext search, I have created an index on content attribute by using the syntax from a python script:
c.DSP.ensureFulltextIndex("content");
Where, c is database, and DSP is the collection name. Now, I am trying to perform a search operation in the above data set by using the syntax:
FOR doc IN FULLTEXT(DSP, "content", "username") RETURN doc
Then, an error occure:
[1571] in function 'FULLTEXT()': no suitable fulltext index found for fulltext query on 'DSP' (while executing)
Please tell me the problem, and also tell me what will be the syntax when i will try this query with a python script.
Thanks...
Working with the 10 minutes tutorial and the driver documentation
I got it working like this:
from pyArango.connection import *
c = Connection()
db = c.createDatabase(name = "testdb")
DSP= db.createCollection(name = "DSP")
DSP.ensureFulltextIndex(fields=["content"])
doc = DSP.createDocument({"content": "test bla"})
doc.save()
print db.AQLQuery('''FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc''', 10)
Resulting in:
[{u'_key': u'1241175138503', u'content': u'test bla', u'_rev': u'1241175138503', u'_id': u'DSP/1241175138503'}]
I've used arangosh to revalidate the steps from the python prompt:
arangosh> db._useDatabase("testdb")
arangosh [testdb]> db.DSP.getIndexes()
[
{
"id" : "DSP/0",
"type" : "primary",
"fields" : [
"_key"
],
"selectivityEstimate" : 1,
"unique" : true,
"sparse" : false
},
{
"id" : "DSP/1241140928711",
"type" : "hash",
"fields" : [
"content"
],
"selectivityEstimate" : 1,
"unique" : false,
"sparse" : true
},
{
"id" : "DSP/1241142960327",
"type" : "fulltext",
"fields" : [
"content"
],
"unique" : false,
"sparse" : true,
"minLength" : 2
}
]
arangosh [testdb]> db.testdb.toArray()
[
{
"content" : "test bla",
"_id" : "DSP/1241175138503",
"_rev" : "1241175138503",
"_key" : "1241175138503"
}
]
db._query('FOR doc IN FULLTEXT(DSP, "content", "bla") RETURN doc')

Categories