Maintain order when using sqlalchemy WHERE-clause and IN operator - python

Consider the following database table:
ID ticker description
1 GDBR30 30YR
2 GDBR10 10YR
3 GDBR5 5YR
4 GDBR2 2YR
It can be replicated with this piece of code:
from sqlalchemy import (
Column,
Integer,
MetaData,
String,
Table,
create_engine,
insert,
select,
)
engine = create_engine("sqlite+pysqlite:///:memory:", echo=True, future=True)
metadata = MetaData()
# Creating the table
tickers = Table(
"tickers",
metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
Column("ticker", String, nullable=False),
Column("description", String(), nullable=False),
)
metadata.create_all(engine)
# Populating the table
with engine.connect() as conn:
result = conn.execute(
insert(tickers),
[
{"ticker": "GDBR30", "description": "30YR"},
{"ticker": "GDBR10", "description": "10YR"},
{"ticker": "GDBR5", "description": "5YR"},
{"ticker": "GDBR2", "description": "2YR"},
],
)
conn.commit()
I need to filter tickers for some values:
search_list = ["GDBR10", "GDBR5", "GDBR30"]
records = conn.execute(
select(tickers.c.description).where((tickers.c.ticker).in_(search_list))
)
print(records.fetchall())
# Result
# [('30YR',), ('10YR',), ('5YR',)]
However, I need the resulting list of tuples ordered in the way search_list has been ordered. That is, I need the following result:
print(records.fetchall())
# Expected result
# [('10YR',), ('5YR',), ('30YR',)]
Using SQLite, you could create a cte with two columns (id and ticker). Applying the following code will lead to the expected result (see Maintain order when using SQLite WHERE-clause and IN operator). Unfortunately, I am not able to transfer the SQLite solution to sqlalchemy.
WITH cte(id, ticker) AS (VALUES (1, 'GDBR10'), (2, 'GDBR5'), (3, 'GDBR30'))
SELECT t.*
FROM tbl t INNER JOIN cte c
ON c.ticker = t.ticker
ORDER BY c.id
Suppose, I have search_list_tuple as folllows, how am I suppose to code the sqlalchemy query?
search_list_tuple = [(1, 'GDBR10'), (2, 'GDBR5'), (3, 'GDBR30')]

Below works and is actually equivalent to the VALUES (...) on sqlite albeit somewhat more verbose:
# construct the CTE
sub_queries = [
select(literal(i).label("id"), literal(v).label("ticker"))
for i, v in enumerate(search_list)
]
cte = union_all(*sub_queries).cte("cte")
# desired query
records = conn.execute(
select(tickers.c.description)
.join(cte, cte.c.ticker == tickers.c.ticker)
.order_by(cte.c.id)
)
print(records.fetchall())
# [('10YR',), ('5YR',), ('30YR',)]
Below is using the values() contruct, but unfortunately the resulting query fails on SQLite, but it works perfectly on postgresql:
cte = select(
values(
column("id", Integer), column("ticker", String), name="subq"
).data(list(zip(range(len(search_list)), search_list)))
).cte("cte")
qq = (
select(tickers.c.description)
.join(cte, cte.c.ticker == tickers.c.ticker)
.order_by(cte.c.id)
)
records = conn.execute(qq)
print(records.fetchall())

Related

How to upsert in mySQL so it will work with sqlite3?

I need to test a python flask app that uses mySQL to run its' queries using sqlalchemy, with sqlite3.
I've encountered an exception when trying to test an upsert function using an ON DUPLICATE clause:
(sqlite3.OperationalError) near "DUPLICATE": syntax error
After a brief search for a solution, I've found that the correct syntax for sqlite to execute upsert queries is ON CONFLICT(id) DO UPDATE SET ..., I've tried it but mySQL doesn't recognize this syntax.
What can I do? How can I do an upsert query so sqlite3 and mySQL will both execute it properly?
Example:
employees table:
id
name
1
Jeff Bezos
2
Bill Gates
INSERT INTO employees(id,name)
VALUES(1, 'Donald Trump')
ON DUPLICATE KEY UPDATE name = VALUES(name);
Should update the table to be:
id
name
1
Donald Trump
2
Bill Gates
Thanks in advance!
How can I do an upsert query so sqlite3 and mySQL will both execute it properly?
You can achieve the same result by attempting an UPDATE, and if no match is found then do an INSERT. The following code uses SQLAlchemy Core constructs, which provide further protection from the subtle differences between MySQL and SQLite . For example, if your table had a column named "order" then SQLAlchemy would emit this DDL for MySQL …
CREATE TABLE employees (
id INTEGER NOT NULL,
name VARCHAR(50),
`order` INTEGER,
PRIMARY KEY (id)
)
… and this DDL for SQLite
CREATE TABLE employees (
id INTEGER NOT NULL,
name VARCHAR(50),
"order" INTEGER,
PRIMARY KEY (id)
)
import logging
import sqlalchemy as sa
# pick one
connection_url = "mysql+mysqldb://scott:tiger#localhost:3307/mydb"
# connection_url = "sqlite://"
engine = sa.create_engine(connection_url)
def _dump_table():
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM employees").all())
def _setup_example():
employees = sa.Table(
"employees",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("name", sa.String(50)),
)
employees.drop(engine, checkfirst=True)
employees.create(engine)
# create initial example data
with engine.begin() as conn:
conn.execute(
employees.insert(),
[{"id": 1, "name": "Jeff Bezos"}, {"id": 2, "name": "Bill Gates"}],
)
def upsert_employee(id_, name):
employees = sa.Table("employees", sa.MetaData(), autoload_with=engine)
with engine.begin() as conn:
result = conn.execute(
employees.update().where(employees.c.id == id_), {"name": name}
)
logging.debug(f" {result.rowcount} row(s) updated.")
if result.rowcount == 0:
result = conn.execute(
employees.insert(), {"id": id_, "name": name}
)
logging.debug(f" {result.rowcount} row(s) inserted.")
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
_setup_example()
_dump_table()
"""
[(1, 'Jeff Bezos'), (2, 'Bill Gates')]
"""
upsert_employee(3, "Donald Trump")
"""
DEBUG:root: 0 row(s) updated.
DEBUG:root: 1 row(s) inserted.
"""
_dump_table()
"""
[(1, 'Jeff Bezos'), (2, 'Bill Gates'), (3, 'Donald Trump')]
"""
upsert_employee(1, "Elon Musk")
"""
DEBUG:root: 1 row(s) updated.
"""
_dump_table()
"""
[(1, 'Elon Musk'), (2, 'Bill Gates'), (3, 'Donald Trump')]
"""

How i can get data from DB with columns.names(keys)?

I am using SQLAlchemy to pull data from my database. More specifically, I use the db.select method. So I manage to pull out only the values from the columns or only the names of the columns, but I need to pull out in the format NAME: VALUE. Help how to do this?
connection = engine.connect()
metadata = db.MetaData()
report = db.Table('report', metadata, autoload=True, autoload_with=engine)
query = db.select([report])
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
With SQLAlchemy 1.4+ we can use .mappings() to return results in a dictionary-like format:
import sqlalchemy as sa
# …
t = sa.Table(
"t",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("txt", sa.String),
)
t.create(engine)
# insert some sample data
with engine.begin() as conn:
conn.exec_driver_sql(
"INSERT INTO t (id, txt) VALUES (1, 'foo'), (2, 'bar')"
)
# test code
with engine.begin() as conn:
results = conn.execute(select(t)).mappings().fetchall()
pprint(results)
# [{'id': 1, 'txt': 'foo'}, {'id': 2, 'txt': 'bar'}]
As the docs state, ResultProxy.fetchall() returns a list of RowProxy objects. These behave like namedtuples, but can also be used like dictionaries:
>>> ResultSet[0]['column_name']
column_value
For more info, see https://docs.sqlalchemy.org/en/13/core/tutorial.html#coretutorial-selecting

understanding insert, on conflict (upsert) in SQLAlchemy

I'm trying to understand what the set_ means in SQLAlchemy's on_conflict_do_update method. i have the following Table:
Table(
"test",
metadata,
Column("id", Integer, primary_key=True),
Column("firstname", String(100)),
Column("lastname", String(100)),
)
and what insert some like this (if i wrote it in psql)
INSERT INTO test (id, firstname, lastname) VALUES (1, 'John', 'Doe)
ON CONFLICT (id) DO UPDATE SET firstname = EXCLUDED.firstname, lastname = EXCLUDED.lastname
I did some due diligence and saw people write in the set_ like this:
import sqlalchemy.dialects import postgresql
insert_stmt = postgresql.insert(target).values([{'id':1,'firstname':'John','lastname':'Doe'}])
primary_keys = [key.name for key in inspect(target).primary_key]
update_dict = {c.name: c for c in insert_stmt.excluded if not c.primary_key}
stmt = insert_stmt.on_conflict_do_update(index_elements = primary_keys , set_ = update_dict)
engine.execute(stmt)
Is the update_dict just looking at the EXCLUDED values (the ones I want to update with) that I set in my insert_stmt? If I str(update_dict) I get an dictionary of specific information regarding the column {'firstname': Column('firstname', VARCHAR(length=100), table=<excluded>), 'lastname': Column('lastname', VARCHAR(length=100), table=<excluded>)}, is the method above the only way to retrieve the data? Can you write it out manually?

SQLAlchemy insert from select raises bindparam error

Using SQLAlchemy on PostgreSQL, I try to improve performance at insertion (about 100k egdes to insert), executing "nested inserts" in a single query for one edge and its nodes.
Using Insert.from_select, I get following error and I don't really understand why.
CompileError: bindparam() name 'name' is reserved for automatic usage in the VALUES or SET clause of this insert/update statement. Please use a name other than column name when using bindparam() with insert() or update() (for example, 'b_name').
from sqlalchemy import *
metadata = MetaData()
node = Table('node', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
)
edge = Table('edge', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('source_id', Integer(), ForeignKey(node.c.id)),
Column('target_id', Integer(), ForeignKey(node.c.id)),
)
engine = create_engine('postgres://postgres:postgres#db:5432')
metadata.create_all(engine)
e1_source = insert(node).values(name='e1_source').returning(node.c.id).cte('source')
e1_target = insert(node).values(name='e1_target').returning(node.c.id).cte('target')
e1 = insert(edge).from_select(
['source_id', 'target_id', 'name'], # bindparam error
# ['source_id', 'target_id', 'b_name'], # key error
# [edge.c.source_id, edge.c.target_id, edge.c.name], # bindparam error
select([
e1_source.c.id,
e1_target.c.id,
literal('e1'),
])
)
engine.execute(e1)
EDIT: Below, the SQL query I was expected to produces. I remain open to any suggestions to achieve my purpose though.
CREATE TABLE node (
id SERIAL PRIMARY KEY,
name VARCHAR
);
CREATE TABLE edge (
id SERIAL PRIMARY KEY,
source_id INTEGER REFERENCES node (id),
target_id INTEGER REFERENCES node (id),
name VARCHAR
);
WITH source AS (
INSERT INTO node (name)
VALUES ('e1_source')
RETURNING id
), target as (
INSERT INTO node (name)
VALUES ('e1_target')
RETURNING id
)
INSERT INTO edge (source_id, target_id, name)
SELECT source.id, target.id, 'e1'
FROM source, target;
I have finally figured out where bindparam was implicitly used by SQLAlchemy to solve my issue: in the node queries and not the edge query as I was first thinking.
But I am still not sure if this is the proper way to perform nested insert queries with SQLAlchemy and if it will improve execution time.
e1_source = insert(node).values(name=bindparam('source_name')).returning(node.c.id).cte('source')
e1_target = insert(node).values(name=bindparam('target_name')).returning(node.c.id).cte('target')
e1 = insert(edge).from_select(
['source_id', 'target_id', 'name'],
select([
e1_source.c.id,
e1_target.c.id,
literal('e1'),
])
)
engine.execute(e1, {
'source_name': 'e1_source',
'target_name': 'e1_target',
})

SQLAlchemy convert outer join ORM query to Core

I'm having problems with SQLAlchemy's select_from statement when using the core component. I try to construct an outer join query which currently looks like:
query = select([b1.c.id, b1.c.num, n1.c.name, n1.c.num, ...]
).where(and_(
... some conditions ...
)
).select_from(
???.outerjoin(
n1,
and_(
... some conditions ...
)
).select_from(... more outer joins similar to the above ...)
According to the docs, the structure should look like this:
table1 = table('t1', column('a'))
table2 = table('t2', column('b'))
s = select([table1.c.a]).\
select_from(
table1.join(table2, table1.c.a==table2.c.b)
)
My problem is that I don't have a table1 object in this case, as the select ... part consists of columns and not a single table (see question marks in my query). I've tried using n1.outerjoin(n1..., but that caused an exception (Exception: (ProgrammingError) table name "n1" specified more than once).
The above snippet is derived from a working session-based (ORM) query, which I try to convert (with limited success).
b = Table('b', metadata,
Column('id', Integer, Sequence('seq_b_id')),
Column('num', Integer, nullable=False),
Column('active', Boolean, default=False),
)
n = Table('n', metadata,
Column('b_id', Integer, nullable=False),
Column('num', Integer, nullable=False),
Column('active', Boolean, default=False),
)
p = Table('p', metadata,
Column('b_id', Integer, nullable=False),
Column('num', Integer, nullable=False),
Column('active', Boolean, default=False),
)
n1 = aliased(n, name='n1')
n2 = aliased(n, name='n2')
b1 = aliased(b, name='b1')
b2 = aliased(b, name='b2')
p1 = aliased(p, name='p1')
p2 = aliased(p, name='p2')
result = sess.query(b1.id, b1.num, n1.c.name, n1.c.num, p1.par, p1.num).filter(
b1.active==False,
b1.num==sess.query(func.max(b2.num)).filter(
b2.id==b1.id
)
).outerjoin(
n1,
and_(
n1.c.b_id==b1.id,
n1.c.num<=num,
n1.c.active==False,
n1.c.num==sess.query(func.max(n2.num)).filter(
n2.id==n1.c.id
)
)
).outerjoin(
p1,
and_(
p1.b_id==b1.id,
p1.num<=num,
p1.active==False,
p1.num==sess.query(func.max(p2.num)).filter(
p2.id==p1.id
)
)
).order_by(b1.id)
How do I go about converting this ORM query into a plain Core query?
Update:
I was able to narrow down the problem. It seems that a combination of two select_from calls causes the problem.
customer = Table('customer', metadata,
Column('id', Integer),
Column('name', String(50)),
)
order = Table('order', metadata,
Column('id', Integer),
Column('customer_id', Integer),
Column('order_num', Integer),
)
address = Table('address', metadata,
Column('id', Integer),
Column('customer_id', Integer),
Column('city', String(50)),
)
metadata.create_all(db)
customer1 = aliased(customer, name='customer1')
order1 = aliased(order, name='order1')
address1 = aliased(address, name='address1')
columns = [
customer1.c.id, customer.c.name,
order1.c.id, order1.c.order_num,
address1.c.id, address1.c.city
]
query = select(columns)
query = query.select_from(
customer1.outerjoin(
order1,
and_(
order1.c.customer_id==customer1.c.id,
)
)
)
query = query.select_from(
customer1.outerjoin(
address1,
and_(
customer1.c.id==address1.c.customer_id
)
)
)
result = connection.execute(query)
for r in result.fetchall():
print r
The above code causes the following exception:
ProgrammingError: (ProgrammingError) table name "customer1" specified more than once
'SELECT customer1.id, customer.name, order1.id, order1.order_num, address1.id, address1.city \nFROM customer, customer AS customer1 LEFT OUTER JOIN "order" AS order1 ON order1.customer_id = customer1.id, customer AS customer1 LEFT OUTER JOIN address AS address1 ON customer1.id = address1.customer_id' {}
If I was a bit more experienced in using SQLAlchemy, I would say this could be a bug...
I finally managed to solved the problem. Instead of cascading select_from, additional joins need to be chained to the actual join. The above query would read:
query = select(columns)
query = query.select_from(
customer1.outerjoin(
order1,
and_(
order1.c.customer_id==customer1.c.id,
)
).outerjoin(
address1,
and_(
customer1.c.id==address1.c.customer_id
)
)
)

Categories