I'm building a python class that will work with the devextreme odata datastore.
Here is my ODataAPI class:
from sqlalchemy.orm import load_only
from casservices import db
import models
class ODataAPI:
def __init__(self, model):
self.model = model
def get(self, cfg):
#"""
#config options
#sort = string of "param_name desc" - if desc is missing, sort asc
#take = int limit amount of results
#skip = int number of items to skip ahead
#filter = e.g. (substringof('needle',name)) or (role eq 'needle') or (substringof('needle',email)) or (job eq 'needle') or (office eq 'needle')
#select = csv of entities to return
#"""
q = db.session.query(self.model)
if cfg.get('$select') is not None:
splt = cfg.get('$select').split(",")
q.options(load_only(*splt))
if cfg.get('$filter') is not None:
NEED CODE HERE TO PARSE $filter
if cfg.get('$orderby') is not None:
splt = cfg.get('$orderby').split(" ")
order_direction = "ASC"
if len(splt) == 2 and splt[1] == 'desc':
order_direction = "DESC"
order_string = "%s.%s %s" % (self.model.__tablename__, splt[0], order_direction)
q = q.order_by(order_string)
if cfg.get('$top') is not None:
q = q.limit(cfg.get('$top'))
if cfg.get('$skip') is not None:
q = q.offset(cfg.get('$skip'))
items = q.all()
total_items = db.session.query(self.model).count()
data = {
"d":{
"__count": total_items,
"results": [i.as_dict() for i in items]
}
}
return data
how do I parse the following string into something I can use for filtering my set?
The get parameter comes in like this:
$filter=(substringof('needle',name)) or (role eq 'needle') or (substringof('needle',email)) or (job eq 'needle') or (office eq 'needle')
I have come across a helpul OData filter parser and it works with SQLAlchemy 🥳🎉:
Example function I made with Flask-SQLAlchemy:
def get_countries(filter, page_number, per_page):
# OData filter
query = apply_odata_query(Country.query, filter)
return paginate(query, page_number, per_page)
To call the function, you now just need to pass the filter blah blah blah:
countries = get_countries("code eq 'ZWE'", 1, 10)
You can find the library and stuff here: https://github.com/gorilla-co/odata-query. Library is also extendable.😁
Related
What i get from api:
"name":"reports"
"col_type":"array<struct<imageUrl:string,reportedBy:string>>"
So in hive schema I got:
reports array<struct<imageUrl:string,reportedBy:string>>
Note: I got hive array schema as string from api
My target:
bigquery.SchemaField("reports", "RECORD", mode="NULLABLE",
fields=(
bigquery.SchemaField('imageUrl', 'STRING'),
bigquery.SchemaField('reportedBy', 'STRING')
)
)
Note: I would like to create universal code that can handle when i receive any number of struct inside of the array.
Any tips are welcome.
I tried creating a script that parses your input which is reports array<struct<imageUrl:string,reportedBy:string>>. This converts your input to a dictionary that could be used as schema when creating a table. The main idea of the apporach is instead of using SchemaField(), you can create a dictionary which is much easier than creating SchemaField() objects with parameters using your example input.
NOTE: The script is only tested based on your input and it can parse more fields if added in struct<.
import re
from google.cloud import bigquery
def is_even(number):
if (number % 2) == 0:
return True
else:
return False
def clean_string(str_value):
return re.sub(r'[\W_]+', '', str_value)
def convert_to_bqdict(api_string):
"""
This only works for a struct with multiple fields
This could give you an idea on constructing a schema dict for BigQuery
"""
num_even = True
main_dict = {}
struct_dict = {}
field_arr = []
schema_arr = []
# Hard coded this since not sure what the string will look like if there are more inputs
init_struct = sample.split(' ')
main_dict["name"] = init_struct[0]
main_dict["type"] = "RECORD"
main_dict["mode"] = "NULLABLE"
cont_struct = init_struct[1].split('<')
num_elem = len(cont_struct)
# parse fields inside of struct<
for i in range(0,num_elem):
num_even = is_even(i)
# fields are seen on even indices
if num_even and i != 0:
temp = list(filter(None,cont_struct[i].split(','))) # remove blank elements
for elem in temp:
fields = list(filter(None,elem.split(':')))
struct_dict["name"] = clean_string(fields[0])
# "type" works for STRING as of the moment refer to
# https://cloud.google.com/bigquery/docs/schemas#standard_sql_data_types
# for the accepted data types
struct_dict["type"] = clean_string(fields[1]).upper()
struct_dict["mode"] = "NULLABLE"
field_arr.append(struct_dict)
struct_dict = {}
main_dict["fields"] = field_arr # assign dict to array of fields
schema_arr.append(main_dict)
return schema_arr
sample = "reports array<struct<imageUrl:string,reportedBy:string,newfield:bool>>"
bq_dict = convert_to_bqdict(sample)
client = bigquery.Client()
project = client.project
dataset_ref = bigquery.DatasetReference(project, '20211228')
table_ref = dataset_ref.table("20220203")
table = bigquery.Table(table_ref, schema=bq_dict)
table = client.create_table(table)
Output:
Was trying to figure out a way to get simple salesforce to just give me all the field names in a list. I want to create soql query that pretty much does the same thing as a Select * does in sql.
for obj in objects:
fields = [x["name"] for x in sf[obj].describe()["fields"]]
thanks
A list of field names in an object can be achieved as follow:
def getObjectFields(obj):
fields = getattr(sf,obj).describe()['fields']
flist = [i['name'] for i in fields]
return flist
getObjectFields('Contact')
Your query to get the effect of SELECT * would then look something like this:
sf.query_all('SELECT {} FROM Contact LIMIT 10'.format(','.join(getObjectFields('Contact'))))
On a related note:
In case it is helpful, a dictionary of label/name pairs can be achieved as follows:
def getObjectFieldsDict(obj):
fields = getattr(sf,obj).describe()['fields']
fdict = {}
for i in fields:
fdict[i['label']] = i['name']
return fdict
getObjectFieldsDict('Contact')
I find this can be useful for figuring out the names of fields with labels that do not follow the standard format (i.e. "My Favorite Website" field label for "Favorite_Website__c" field name)
This method will return a query string with all fields for the object passed in. Well all the fields the user has access to.
public static string getFullObjectQuery(String sObjectName){
Schema.SObjectType convertType = Schema.getGlobalDescribe().get(sObjectName);
Map<String,Schema.sObjectField> fieldMap = convertType.getDescribe().Fields.getMap();
Set<String> fields = fieldMap.keySet();
String Query = 'SELECT ';
for(String field: fields){
Schema.DescribeFieldResult dfr = fieldMap.get(field).getDescribe();
if(dfr.isAccessible()){
Query += field + ',';
}
}
Query = query.SubString(0,Query.length() - 1);
Query += ' FROM ' + sObjectName;
return Query;
}
#!/usr/bin/env python3
import argparse
import os
import simple_salesforce
parser = argparse.ArgumentParser()
parser.add_argument('--sandbox', action='store_true',
help='Use a sandbox')
parser.add_argument('sfobject', nargs='+', action='store',
help=('Salesforce object to query (e.g. Contact)'))
args = parser.parse_args()
sf = simple_salesforce.Salesforce(
username = os.getenv('USERNAME'),
password = os.getenv('PASSWORD'),
security_token = os.getenv('SECURITY_TOKEN'),
sandbox = args.sandbox)
for sfobject in args.sfobject:
print(sfobject)
fields = [x['name'] for x in getattr(sf, sfobject).describe()['fields']]
print(fields)
I am trying to extract raw data from a text file and after processing the raw data, I want to export it to another text file. Below is the python code I have written for this process. I am using the "petl" package in python 3 for this purpose. 'locations.txt' is the raw data file.
import glob, os
from petl import *
class ETL():
def __init__(self, input):
self.list = input
def parse_P(self):
personids = None
for term in self.list:
if term.startswith('P'):
personids = term[1:]
personid = personids.split(',')
return personid
def return_location(self):
location = None
for term in self.list:
if term.startswith('L'):
location = term[1:]
return location
def return_location_id(self, location):
location = self.return_location()
locationid = None
def return_country_id(self):
countryid = None
for term in self.list:
if term.startswith('C'):
countryid = term[1:]
return countryid
def return_region_id(self):
regionid = None
for term in self.list:
if term.startswith('R'):
regionid = term[1:]
return regionid
def return_city_id(self):
cityid = None
for term in self.list:
if term.startswith('I'):
cityid = term[1:]
return cityid
print (os.getcwd())
os.chdir("D:\ETL-IntroductionProject")
print (os.getcwd())
final_location = [['L','P', 'C', 'R', 'I']]
new_location = fromtext('locations.txt', encoding= 'Latin-1')
stored_list = []
for identifier in new_location:
if identifier[0].startswith('L'):
identifier = identifier[0]
info_list = identifier.split('_')
stored_list.append(info_list)
for lst in stored_list:
tabling = ETL(lst)
location = tabling.return_location()
country = tabling.return_country_id()
city = tabling.return_city_id()
region = tabling.return_region_id()
person_list = tabling.parse_P()
for person in person_list:
table_new = [location, person, country, region, city]
final_location.append(table_new)
totext(final_location, 'l1.txt')
However when I use "totext" function of petl, it throws me an "Assertion Error".
AssertionError: template is required
I am unable to understand what the fault is. Can some one please explain the problem I am facing and what I should be doing ?
The template parameter to the toext function is not optional there is no default format for how the rows are written in this case, you must provide a template. Check the doc for toext here for an example: https://petl.readthedocs.io/en/latest/io.html#text-files
The template describes the format of each row that it writes out using the field headers to describe things, you can optionally pass in a prologue to write the header too. A basic template in your case would be:
table_new_template = "{L} {P} {C} {R} {I}"
totext(final_location, 'l1.txt', template=table_new_template)
I have a table with equipment and each of them has dates for level of maintenance. The user can select the maintenance level. So, I should adjust my SQLAlchemy for each combination of maintenance level chosen. For example:
SELECT * WHERE (equipment IN []) AND m_level1 = DATE AND m_level2 = DATE ....)
So it is possible to have combinations for each if condition, depending on checkboxes I used multiple strings to reach my goal, but I want to improve the query using SQLAlchemy.
I assume you are using the ORM.
in that case, the filter function returns a query object. You can conditionaly build the query by doing something like
query = Session.query(schema.Object).filter_by(attribute=value)
if condition:
query = query.filter_by(condition_attr=condition_val)
if another_condition:
query = query.filter_by(another=another_val)
#then finally execute it
results = query.all()
The function filter(*criterion) means you can use tuple as it's argument, #Wolph has detail here:
SQLALchemy dynamic filter_by for detail
if we speak of SQLAlchemy core, there is another way:
from sqlalchemy import and_
filters = [table.c.col1 == filter1, table.c.col2 > filter2]
query = table.select().where(and_(*filters))
If you're trying to filter based on incoming form criteria:
form = request.form.to_dict()
filters = []
for col in form:
sqlalchemybinaryexpression = (getattr(MODEL, col) == form[col])
filters.append(sqlalchemybinaryexpression)
query = table.select().where(and_(*filters))
Where MODEL is your SQLAlchemy Model
Another resolution to this question, this case is raised in a more secure way, since it verifies if the field to be filtered exists in the model.
To add operators to the value to which you want to filter. And not having to add a new parameter to the query, we can add an operator before the value e.g ?foo=>1, '?foo=<1,?foo=>=1, ?foo=<=1 ', ?foo=!1,?foo=1, and finally between which would be like this `?foo=a, b'.
from sqlalchemy.orm import class_mapper
import re
# input parameters
filter_by = {
"column1": "!1", # not equal to
"column2": "1", # equal to
"column3": ">1", # great to. etc...
}
def computed_operator(column, v):
if re.match(r"^!", v):
"""__ne__"""
val = re.sub(r"!", "", v)
return column.__ne__(val)
if re.match(r">(?!=)", v):
"""__gt__"""
val = re.sub(r">(?!=)", "", v)
return column.__gt__(val)
if re.match(r"<(?!=)", v):
"""__lt__"""
val = re.sub(r"<(?!=)", "", v)
return column.__lt__(val)
if re.match(r">=", v):
"""__ge__"""
val = re.sub(r">=", "", v)
return column.__ge__(val)
if re.match(r"<=", v):
"""__le__"""
val = re.sub(r"<=", "", v)
return column.__le__(val)
if re.match(r"(\w*),(\w*)", v):
"""between"""
a, b = re.split(r",", v)
return column.between(a, b)
""" default __eq__ """
return column.__eq__(v)
query = Table.query
filters = []
for k, v in filter_by.items():
mapper = class_mapper(Table)
if not hasattr(mapper.columns, k):
continue
filters.append(computed_operator(mapper.columns[k], "{}".format(v))
query = query.filter(*filters)
query.all()
Here is a solution that works both for AND or OR...
Just replace or_ with and_ in the code if you need that case:
from sqlalchemy import or_, and_
my_filters = set() ## <-- use a set to contain only unique values, avoid duplicates
if condition_1:
my_filters.add(MySQLClass.id == some_id)
if condition_2:
my_filters.add(MySQLClass.name == some_name)
fetched = db_session.execute(select(MySQLClass).where(or_(*my_filters))).scalars().all()
I'm having trouble in resolving Haystack queries using SQ, objects. If I'm performing the same query but using Django ORM and it's Q objects, everything works fine.
I can't figure out what I'm doing wrong here since Haystach documentation states that SQ objects are similar to Q ones. Any help is much appreciated. Thanks!
Here's the code I have:
class PublicationSearch(object):
def __init__(self, search_data):
self.__dict__.update(search_data)
def search_all_words(self, sq):
if self.all_words:
words = self.all_words.split()
title_sq = SQ()
full_text_sq = SQ()
for word in words:
title_sq = title_sq | SQ(title__icontains=word)
full_text_sq = full_text_sq | SQ(full_text__icontains=word)
keyword_sq = title_sq | full_text_sq
sq = sq & keyword_sq
return sq
class AdvancedPublicationForm(AdvancedPublicationBaseForm):
def search(self):
cleaned_data = super(AdvancedPublicationForm, self).clean()
# if no query word was submitted, return an empty sqs
if not any(cleaned_data.itervalues()):
return self.no_query_found()
results = self.build_results(cleaned_data)
return results
def build_results(self, search_data):
sq = SQ()
results = None
searcher = PublicationSearch(search_data)
for key in search_data.iterkeys():
dispatch = getattr(searcher, 'search_%s' % key)
sq = dispatch(sq)
if sq and len(sq):
results = SearchQuerySet().models(Publication).add(sq)
else:
results = []
return results
The query for a sample of two words is looking like this:
(AND: (OR: (AND: ), ('title__icontains', u'casamento'), ('title__icontains', u'civil'), (AND: ), ('full_text__icontains', u'casamento'), ('full_text__icontains', u'civil')))
And the error returned:
Failed to query Elasticsearch using '( OR title:(casamento) OR title:(civil) OR OR full_text:(casamento) OR full_text:(civil))'
I manage to find the way. Refactoring to look like below.
title_sq.add(SQ(title__icontains=word), SQ.OR)
full_text_sq.add(SQ(full_text__icontains=word), SQ.OR)