sqlalchemy: Boolean expression for hybrid_property

sqlalchemy: Boolean expression for hybrid_property - python

I have a sqlalchemy class that represents a table with columns FileID and SettlementDate. I want to create a hybrid property to say whether a given instance is the maximum FileID for its SettlementDate, and an associated expression to use when querying. I've successfully got the property working, but am struggling with the expression. Here's the existing model:
class Hdr(model.Base):
id = Column('ID', Integer, primary_key=True)
file_id = Column('FileID', BIGINT, ForeignKey('FileRegister.Files.ID'))
settlement_date = Column('SettlementDate', Date)
#hybrid_property
def is_latest(self):
subquery = (
object_session(self)
.query(func.max(Hdr.file_id).label('file_id'))
.group_by(Hdr.settlement_date)
.subquery()
)
return (
object_session(self)
.query(func.count(Hdr.file_id).cast(Boolean))
.filter(subquery.c.file_id==self.file_id)
.scalar()
)
I'd like to think I can do something along the lines of:
subquery = (
select((func.max(Hdr.file_id).label('file_id'), ))
.group_by(Hdr.settlement_date)
.alias('a')
)
s = select(
case(
whens=[
(Hdr.file_id.in_(subquery), 1)
],
else_=0
)
)
But this raises an error Boolean value of this clause is not defined.
Any help would be greatly appreciated!
Traceback follows:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
c:\Users\\venvs\insight\lib\site-packages\sqlalchemy-1.3.0b3-py3.7-win-amd64.egg\sqlalchemy\sql\selectable.py in __init__(self, columns, whereclause, from_obj, distinct, having, correlate, prefixes, suffixes, **kwargs)
2889 try:
-> 2890 cols_present = bool(columns)
2891 except TypeError:
c:\Users\\venvs\insight\lib\site-packages\sqlalchemy-1.3.0b3-py3.7-win-amd64.egg\sqlalchemy\sql\elements.py in __bool__(self)
515 def __bool__(self):
--> 516 raise TypeError("Boolean value of this clause is not defined")
517
TypeError: Boolean value of this clause is not defined
During handling of the above exception, another exception occurred:
ArgumentError Traceback (most recent call last)
<ipython-input-20-4946a4bf7faa> in <module>
10 (Hdr.file_id.in_(subquery), 1)
11 ],
---> 12 else_=0
13 )
14 )
<string> in select(columns, whereclause, from_obj, distinct, having, correlate, prefixes, suffixes, **kwargs)
<string> in __init__(self, columns, whereclause, from_obj, distinct, having, correlate, prefixes, suffixes, **kwargs)
c:\Users\\venvs\insight\lib\site-packages\sqlalchemy-1.3.0b3-py3.7-win-amd64.egg\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
128 )
129
--> 130 return fn(*args, **kwargs)
131
132 doc = fn.__doc__ is not None and fn.__doc__ or ""
c:\Users\\venvs\insight\lib\site-packages\sqlalchemy-1.3.0b3-py3.7-win-amd64.egg\sqlalchemy\sql\selectable.py in __init__(self, columns, whereclause, from_obj, distinct, having, correlate, prefixes, suffixes, **kwargs)
2891 except TypeError:
2892 raise exc.ArgumentError(
-> 2893 "columns argument to select() must "
2894 "be a Python list or other iterable"
2895 )
ArgumentError: columns argument to select() must be a Python list or other iterable

The problem is
s = select(case(...))
The first argument to select() should be a sequence of column elements or from clause objects. It seems that SQLAlchemy at some point checks if the passed sequence is empty or not by doing bool(columns). The solution is to simply wrap it in a sequence, as you have done in creating the subquery:
s = select([case(...)])
In the hybrid property's "Python side" instead of counting if the maximum file_id of any settlement_date happens to match the instance's you could filter by the instance's settlement_date and check against the maximum:
class Hdr(model.Base):
#hybrid_property
def is_latest(self):
max_file_id = (
object_session(self)
.query(func.max(Hdr.file_id))
.filter(Hdr.settlement_date == self.settlement_date)
.scalar()
)
return max_file_id == self.file_id
In the expression you don't need to wrap the boolean expression in a scalar subquery, but return the boolean expression itself:
#is_latest.expression
def is_latest(cls):
hdr_alias = aliased(Hdr)
subquery = (
select([func.max(hdr_alias.file_id)])
.group_by(hdr_alias.settlement_date)
)
return cls.file_id.in_(subquery)

Related

object of type float has no length

Code:
def is_english(nlp, text):
'''Detect whether a text is English'''
return nlp(text)._.language['language'] == 'en'
def extract_english_text(nlp, messages):
'''Extract English text'''
return [text for text in messages if is_english(nlp, text)]
english_messages = extract_english_text(nlp, message)
Error:
Traceback (most recent call last)
<ipython-input-31-adf8c0033ef3> in <module>()
10
11
---> 12 english_messages = extract_english_text(nlp, message)
3 frames
/usr/local/lib/python3.7/dist-packages/spacy/language.py in __call__(self, text, disable, component_cfg)
425 DOCS: https://spacy.io/api/language#call
426 """
--> 427 if len(text) > self.max_length:
428 raise ValueError(
429 Errors.E088.format(length=len(text), max_length=self.max_length)
TypeError: object of type 'float' has no len()

Some of the texts passed into line 427 are of type float. There are two ways you could solve this:
You could find some way to gather only input of type str.
You could write code to check for other data types and either convert them to type str or raise an error using try/except. For that matter, you could do both, depending on the situation:
if type(text) != 'str':
try:
text = str(text)
except:
raise ValueError(f'Data type {type(text)} cannot be converted to a string.')

How can an index automatically be created when using sqlalchemy_utils.create_materialized_view()

I am trying to have sqlalchemy automatically create a materialized view using sqlalchemy_utils.view.create_materialized_view() That works, but I'd also like it to automatically create the index for it as well. sqlalchemy_utils provides a mechanism for that, but in requires an sa.Index which requires a Column object. The Column object get created from a selectable, but doesn't exist on the model to pass it in on the index creation.
class MVCustomerSearch(Base):
__table__ = create_materialized_view(
"mv_customer_search",
select(
[
TSubStation.ixSubStation.label("ixSubStation"),
TLocation.ixLocation.label("ixLocation"),
TCustomer.ixCustomer.label("ixCustomer"),
(
func.to_tsvector("english", func.coalesce(TLocation.sLocation, ""))
+ func.to_tsvector(
"english", func.coalesce(TCustomer.sCustomer, "")
)
+ func.to_tsvector(
"english", func.coalesce(TSubStation.sSubStation, "")
)
).label("tsv"),
],
).select_from(
join(
TCustomer, TLocation, TCustomer.ixCustomer == TLocation.ixCustomer
).join(TSubStation, TSubStation.ixLocation == TLocation.ixLocation)
),
metadata,
indexes=(
db.Index(
"idx_fts_customer_search",
# This needs to be a Column and won't work.
"MVCustomerSearch.tsv",
postgresql_using="gin",
)
),
)
customer = db.relationship(
"TCustomer",
uselist=False,
primaryjoin="TCustomer.ixCustomer==MVCustomerSearch.ixCustomer",
foreign_keys="TCustomer.ixCustomer",
)
location = db.relationship(
"TLocation",
uselist=False,
primaryjoin="TLocation.ixLocation==MVCustomerSearch.ixLocation",
foreign_keys="TLocation.ixLocation",
)
substation = db.relationship(
"TSubStation",
uselist=False,
primaryjoin="TSubStation.ixSubStation==MVCustomerSearch.ixSubStation",
foreign_keys="TSubStation.ixSubStation",
)
If I mock creating the index after the model exists it will work.
In [31]: from .models.dbviews import MVCustomerSearch
In [32]: idx = db.Index(
...: "idx_fts_customer_search",
...: MVCustomerSearch.tsv,
...: postgresql_using="gin",
...: )
In [33]: print(CreateIndex(idx).compile(dialect=postgresql.dialect()))
CREATE INDEX idx_fts_customer_search ON mv_customer_search USING gin (tsv)
How can I pass a Column object that can create that index before the column even exists? SqlAlchemy has a mechanism to use strings when building relationships, but that doesn't work for Index
Edit:
Add Traceback when trying with string
In [34]: idx = db.Index(
...: "idx_fts_customer_search",
...: "MVCustomerSearch.tsv",
...: postgresql_using="gin",
...: )
In [35]: print(CreateIndex(idx).compile(dialect=postgresql.dialect()))
---------------------------------------------------------------------------
CompileError Traceback (most recent call last)
<ipython-input-35-00025f58e3f9> in <module>
----> 1 print(CreateIndex(idx).compile(dialect=postgresql.dialect()))
<string> in <lambda>(self, bind, dialect, **kw)
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\elements.py in compile(self, default, bind, dialect, **kw)
479 else:
480 dialect = default.StrCompileDialect()
--> 481 return self._compiler(dialect, bind=bind, **kw)
482
483 def _compiler(self, dialect, **kw):
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\ddl.py in _compiler(self, dialect, **kw)
27 Dialect."""
28
---> 29 return dialect.ddl_compiler(dialect, self, **kw)
30
31
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\compiler.py in __init__(self, dialect, statement, bind, schema_translate_map, compile_kwargs)
320 if self.can_execute:
321 self.execution_options = statement._execution_options
--> 322 self.string = self.process(self.statement, **compile_kwargs)
323
324 #util.deprecated(
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\compiler.py in process(self, obj, **kwargs)
350
351 def process(self, obj, **kwargs):
--> 352 return obj._compiler_dispatch(self, **kwargs)
353
354 def __str__(self):
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\visitors.py in _compiler_dispatch(self, visitor, **kw)
94 )
95 else:
---> 96 return meth(self, **kw)
97
98 else:
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\dialects\postgresql\base.py in visit_create_index(self, create)
2079 preparer = self.preparer
2080 index = create.element
-> 2081 self._verify_index_table(index)
2082 text = "CREATE "
2083 if index.unique:
~\AppData\Local\pypoetry\Cache\virtualenvs\pce-testsheets-g8TS-oaq-py3.8\Lib\site-packages\sqlalchemy\sql\compiler.py in _verify_index_table(self, index)
2993 def _verify_index_table(self, index):
2994 if index.table is None:
-> 2995 raise exc.CompileError(
2996 "Index '%s' is not associated " "with any table." % index.name
2997 )
CompileError: Index 'idx_fts_customer_search' is not associated with any table.

It's possible to define an index outside of a table and of course outside of create_materialized_view(), because it's only a wrapper of creating Table. If Index() is called with table-bound columns outside of table-definition, then it is associated with that table. So, exactly as you've tried:
class MVCustomerSearch(Base):
__table__ = create_materialized_view(
"idx_fts_customer_search",
select(...),
metadata
)
Index(
"idx_fts_customer_search",
MVCustomerSearch.tsv,
postgresql_using="gin",
)
or (a bit more obviously imo):
MVCustomerSearchTable = create_materialized_view(
"idx_fts_customer_search",
select(...),
metadata
)
Index(
"idx_fts_customer_search",
MVCustomerSearchTable.c.tsv,
postgresql_using="gin",
)
class MVCustomerSearch(Base):
__table__ = MVCustomerSearchTable

A weird property happens ,When I use super() to extend from another class

A weird property happens when I use super() to extend from another class. This is the code:
from depot.io.local import LocalFileStorage
class ScriptLocalFileStorage2(LocalFileStorage):
def create(self, fileid, *args, **kwargs):
new_file_id = fileid
content, filename, content_type = self.fileinfo(*args, **kwargs)
super().__save_file(new_file_id, content, filename, content_type)
return new_file_id
This is the error:
In [1]: from depot.manager import DepotManager
...: DepotManager.configure('scripts2', {'depot.backend': 'app.utils.ScriptLocalFileStorage2', 'depot.storage_path': '/Users/jason/PycharmProjects/sw-edge/app/static/scri
...: pts'})
...: depot = DepotManager.get('scripts2')
...: fileid = depot.create("123fsfl1fdfd3232sfsdfdsff", open('/Users/yinhezhixing/Downloads/simple.txt','rb'))
<app.utils.ScriptLocalFileStorage2 object at 0x113d4aeb8>
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-1-8438648c61e5> in <module>
2 DepotManager.configure('scripts2', {'depot.backend': 'app.utils.ScriptLocalFileStorage2', 'depot.storage_path': '/Users/jason/PycharmProjects/sw-edge/app/static/scripts'})
3 depot = DepotManager.get('scripts2')
----> 4 fileid = depot.create("123fsfl1fdfd3232sfsdfdsff", open('/Users/yinhezhixing/Downloads/simple.txt','rb'))
~/PycharmProjects/sw-edge/app/utils.py in create(self, fileid, *args, **kwargs)
254 new_file_id = fileid
255 content, filename, content_type = self.fileinfo(*args, **kwargs)
--> 256 super().__save_file(new_file_id, content, filename, content_type)
257 return new_file_id
AttributeError: 'super' object has no attribute '_ScriptLocalFileStorage2__save_file'
I expected it to create a file with fileid "123fsfl1fdfd3232sfsdfdsff", but an unknown property _ScriptLocalFileStorage2__save_file happened.

The double underscore method __save_file() is subject to 'Name Mangling' because it is a 'private' method see docs here. Because of the name mangling you can see that the method you are actually calling is _ScriptLocalFileStorage2__save_file.
In general you really should avoid calling private methods - is there a public method you can use instead.

python : sqlalchemy batch insert with on_conflict_update

I have to insert approx. 30000 rows daily in my postgres database,
I have 4 columns in my database namely :
id(pkey), category, createddate, updatedon.
My requirement is to update updatedon and category column with today's date and new category if id is present, else insert a new row with createddate and updateon being same.
I found Ilja Everilä's [answer]:https://stackoverflow.com/a/44865375/5665430 for batch update
insert_statement = sqlalchemy.dialects.postgresql.insert(id_tag)
upsert_statement = insert_statement.on_conflict_do_update(
constraint='id',
set_={ "createddate": insert_statement.excluded.createddate }
)
insert_values = df.to_dict(orient='records')
conn.execute(upsert_statement, insert_values)
Its throwing AttributeError,
Traceback (most recent call last):
File "<ipython-input-60-4c5e5e0daf14>", line 5, in <module>
set_= dict(createddate = insert_statement.excluded.createddate)
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/util/langhelpers.py", line 764, in __get__
obj.__dict__[self.__name__] = result = self.fget(obj)
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/dialects/postgresql/dml.py", line 43, in excluded
return alias(self.table, name='excluded').columns
File "/home/bluepi/anaconda2/lib/python2.7/site-packages/sqlalchemy/sql/selectable.py", line 161, in alias
return _interpret_as_from(selectable).alias(name=name, flat=flat)
AttributeError: 'TextClause' object has no attribute 'alias'
I have tried one by one update as shown here http://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#postgresql-insert-on-conflict , but I am getting the same error.
Please help me understand where I am going wrong, thanks in advance.

From your comment
id_tag is nothing but mane of my table in postgres
one could deduce that id_tag is bound to a string. If you'd provided a Minimal, Complete, and Verifiable example, there'd been a lot less guesswork. As it turns out, postgresql.dml.insert() automatically wraps passed strings in a text() construct, and the result when trying to use Insert.excluded is:
In [2]: postgresql.insert('fail').excluded
~/sqlalchemy/lib/sqlalchemy/sql/selectable.py:43: SAWarning: Textual SQL FROM expression 'fail' should be explicitly declared as text('fail'), or use table('fail') for more specificity (this warning may be suppressed after 10 occurrences)
{"expr": util.ellipses_string(element)})
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-f176aac8b913> in <module>()
----> 1 postgresql.insert('fail').excluded
~/sqlalchemy/lib/sqlalchemy/util/langhelpers.py in __get__(self, obj, cls)
765 if obj is None:
766 return self
--> 767 obj.__dict__[self.__name__] = result = self.fget(obj)
768 return result
769
~/sqlalchemy/lib/sqlalchemy/dialects/postgresql/dml.py in excluded(self)
41
42 """
---> 43 return alias(self.table, name='excluded').columns
44
45 #_generative
~/sqlalchemy/lib/sqlalchemy/sql/selectable.py in alias(selectable, name, flat)
159
160 """
--> 161 return _interpret_as_from(selectable).alias(name=name, flat=flat)
162
163
AttributeError: 'TextClause' object has no attribute 'alias'
So, instead of passing a string containing the name of your table to postgresql.dml.insert() pass it an actual Table object, or a light weight table() construct that has been populated with column() objects.

Django ORM: Filter by extra attribute

I want to filter some database objects by a concatenated string.
The normal SQL query would be:
SELECT concat(firstName, ' ', name) FROM person WHERE CONCAT(firstName, ' ', name) LIKE "a%";
In the model, I have created a manager called PersonObjects:
class PersonObjects(Manager):
attrs = {
'fullName': "CONCAT(firstName, ' ', name)"
}
def get_query_set(self):
return super(PersonObjects, self).get_query_set().extra(
select=self.attrs)
I also configured this in my model:
objects = managers.PersonObjects()
Now accessing fullName works for single objects:
>>> p = models.Person.objects.get(pk=4)
>>> p.fullName
u'Fred Borminski'
But it does not work in a filter:
>>> p = models.Person.objects.filter(fullName__startswith='Alexei')
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/usr/lib/python2.7/site-packages/django/db/models/manager.py", line 141, in filter
return self.get_query_set().filter(*args, **kwargs)
File "/usr/lib/python2.7/site-packages/django/db/models/query.py", line 550, in filter
return self._filter_or_exclude(False, *args, **kwargs)
File "/usr/lib/python2.7/site-packages/django/db/models/query.py", line 568, in _filter_or_exclude
clone.query.add_q(Q(*args, **kwargs))
File "/usr/lib/python2.7/site-packages/django/db/models/sql/query.py", line 1128, in add_q
can_reuse=used_aliases)
File "/usr/lib/python2.7/site-packages/django/db/models/sql/query.py", line 1026, in add_filter
negate=negate, process_extras=process_extras)
File "/usr/lib/python2.7/site-packages/django/db/models/sql/query.py", line 1191, in setup_joins
"Choices are: %s" % (name, ", ".join(names)))
FieldError: Cannot resolve keyword 'fullName' into field. Choices are: firstName, gender, name, (...)
Is this a bug or a feature? How can I fix this?
Thanks.

It's not a bug. filter() only inspects model definitions, so it doesn't recognize fullName as a declared field (because it's not - it's an extra argument in a query).
You can add the fullName to WHERE using extra():
Person.objects.extra(where=["fullName LIKE %s"], params=["Alexei%"])

I solved this by implementing a custom Aggregate function.
In this case I needed to concatenate individual fields into a street address to be able to filter/search for matches.
The following aggregate function allows to specify a field and one or more others to perform a SQL CONCAT_WS.
Edit 3 Aug 2015:
A better implementation with details gleaned from https://stackoverflow.com/a/19529861/3230522. The previous implementation would fail if the queryset was used in a subquery. The table names are now correct, although I note that this just works for concatenation of columns from the same table.
from django.db.models import Aggregate
from django.db.models.sql.aggregates import Aggregate as SQLAggregate
class SqlAggregate(SQLAggregate):
sql_function = 'CONCAT_WS'
sql_template = u'%(function)s(" ", %(field)s, %(columns_to_concatenate)s)'
def as_sql(self, qn, connection):
self.extra['columns_to_concatenate'] = ', '.join(
['.'.join([qn(self.col[0]), qn(c.strip())]) for c in self.extra['with_columns'].split(',')])
return super(SqlAggregate, self).as_sql(qn, connection)
class Concatenate(Aggregate):
sql = SqlAggregate
def __init__(self, expression, **extra):
super(Concatenate, self).__init__(
expression,
**extra)
def add_to_query(self, query, alias, col, source, is_summary):
aggregate = self.sql(col,
source=source,
is_summary=is_summary,
**self.extra)
query.aggregates[alias] = aggregate

The proposed solution worked great with postgresql and JSONB fields in the code below. Only records that have the 'partner' key under the 'key' jsonb field are returned:
query_partner = "select key->>'partner' from accounting_subaccount " \
"where accounting_subaccount.id = subaccount_id and key ? 'partner'"
qs = queryset.extra(select={'partner': query_partner}, where=["key ? 'partner'"])

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

sqlalchemy: Boolean expression for hybrid_property - python

Related

object of type float has no length

How can an index automatically be created when using sqlalchemy_utils.create_materialized_view()

A weird property happens ,When I use super() to extend from another class

python : sqlalchemy batch insert with on_conflict_update

Django ORM: Filter by extra attribute

Categories

Resources