How to upsert in mySQL so it will work with sqlite3? - python

I need to test a python flask app that uses mySQL to run its' queries using sqlalchemy, with sqlite3.
I've encountered an exception when trying to test an upsert function using an ON DUPLICATE clause:
(sqlite3.OperationalError) near "DUPLICATE": syntax error
After a brief search for a solution, I've found that the correct syntax for sqlite to execute upsert queries is ON CONFLICT(id) DO UPDATE SET ..., I've tried it but mySQL doesn't recognize this syntax.
What can I do? How can I do an upsert query so sqlite3 and mySQL will both execute it properly?
Example:
employees table:
id
name
1
Jeff Bezos
2
Bill Gates
INSERT INTO employees(id,name)
VALUES(1, 'Donald Trump')
ON DUPLICATE KEY UPDATE name = VALUES(name);
Should update the table to be:
id
name
1
Donald Trump
2
Bill Gates
Thanks in advance!

How can I do an upsert query so sqlite3 and mySQL will both execute it properly?
You can achieve the same result by attempting an UPDATE, and if no match is found then do an INSERT. The following code uses SQLAlchemy Core constructs, which provide further protection from the subtle differences between MySQL and SQLite . For example, if your table had a column named "order" then SQLAlchemy would emit this DDL for MySQL …
CREATE TABLE employees (
id INTEGER NOT NULL,
name VARCHAR(50),
`order` INTEGER,
PRIMARY KEY (id)
)
… and this DDL for SQLite
CREATE TABLE employees (
id INTEGER NOT NULL,
name VARCHAR(50),
"order" INTEGER,
PRIMARY KEY (id)
)
import logging
import sqlalchemy as sa
# pick one
connection_url = "mysql+mysqldb://scott:tiger#localhost:3307/mydb"
# connection_url = "sqlite://"
engine = sa.create_engine(connection_url)
def _dump_table():
with engine.begin() as conn:
print(conn.exec_driver_sql("SELECT * FROM employees").all())
def _setup_example():
employees = sa.Table(
"employees",
sa.MetaData(),
sa.Column("id", sa.Integer, primary_key=True, autoincrement=False),
sa.Column("name", sa.String(50)),
)
employees.drop(engine, checkfirst=True)
employees.create(engine)
# create initial example data
with engine.begin() as conn:
conn.execute(
employees.insert(),
[{"id": 1, "name": "Jeff Bezos"}, {"id": 2, "name": "Bill Gates"}],
)
def upsert_employee(id_, name):
employees = sa.Table("employees", sa.MetaData(), autoload_with=engine)
with engine.begin() as conn:
result = conn.execute(
employees.update().where(employees.c.id == id_), {"name": name}
)
logging.debug(f" {result.rowcount} row(s) updated.")
if result.rowcount == 0:
result = conn.execute(
employees.insert(), {"id": id_, "name": name}
)
logging.debug(f" {result.rowcount} row(s) inserted.")
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
_setup_example()
_dump_table()
"""
[(1, 'Jeff Bezos'), (2, 'Bill Gates')]
"""
upsert_employee(3, "Donald Trump")
"""
DEBUG:root: 0 row(s) updated.
DEBUG:root: 1 row(s) inserted.
"""
_dump_table()
"""
[(1, 'Jeff Bezos'), (2, 'Bill Gates'), (3, 'Donald Trump')]
"""
upsert_employee(1, "Elon Musk")
"""
DEBUG:root: 1 row(s) updated.
"""
_dump_table()
"""
[(1, 'Elon Musk'), (2, 'Bill Gates'), (3, 'Donald Trump')]
"""

Related

Maintain order when using sqlalchemy WHERE-clause and IN operator

Consider the following database table:
ID ticker description
1 GDBR30 30YR
2 GDBR10 10YR
3 GDBR5 5YR
4 GDBR2 2YR
It can be replicated with this piece of code:
from sqlalchemy import (
Column,
Integer,
MetaData,
String,
Table,
create_engine,
insert,
select,
)
engine = create_engine("sqlite+pysqlite:///:memory:", echo=True, future=True)
metadata = MetaData()
# Creating the table
tickers = Table(
"tickers",
metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
Column("ticker", String, nullable=False),
Column("description", String(), nullable=False),
)
metadata.create_all(engine)
# Populating the table
with engine.connect() as conn:
result = conn.execute(
insert(tickers),
[
{"ticker": "GDBR30", "description": "30YR"},
{"ticker": "GDBR10", "description": "10YR"},
{"ticker": "GDBR5", "description": "5YR"},
{"ticker": "GDBR2", "description": "2YR"},
],
)
conn.commit()
I need to filter tickers for some values:
search_list = ["GDBR10", "GDBR5", "GDBR30"]
records = conn.execute(
select(tickers.c.description).where((tickers.c.ticker).in_(search_list))
)
print(records.fetchall())
# Result
# [('30YR',), ('10YR',), ('5YR',)]
However, I need the resulting list of tuples ordered in the way search_list has been ordered. That is, I need the following result:
print(records.fetchall())
# Expected result
# [('10YR',), ('5YR',), ('30YR',)]
Using SQLite, you could create a cte with two columns (id and ticker). Applying the following code will lead to the expected result (see Maintain order when using SQLite WHERE-clause and IN operator). Unfortunately, I am not able to transfer the SQLite solution to sqlalchemy.
WITH cte(id, ticker) AS (VALUES (1, 'GDBR10'), (2, 'GDBR5'), (3, 'GDBR30'))
SELECT t.*
FROM tbl t INNER JOIN cte c
ON c.ticker = t.ticker
ORDER BY c.id
Suppose, I have search_list_tuple as folllows, how am I suppose to code the sqlalchemy query?
search_list_tuple = [(1, 'GDBR10'), (2, 'GDBR5'), (3, 'GDBR30')]
Below works and is actually equivalent to the VALUES (...) on sqlite albeit somewhat more verbose:
# construct the CTE
sub_queries = [
select(literal(i).label("id"), literal(v).label("ticker"))
for i, v in enumerate(search_list)
]
cte = union_all(*sub_queries).cte("cte")
# desired query
records = conn.execute(
select(tickers.c.description)
.join(cte, cte.c.ticker == tickers.c.ticker)
.order_by(cte.c.id)
)
print(records.fetchall())
# [('10YR',), ('5YR',), ('30YR',)]
Below is using the values() contruct, but unfortunately the resulting query fails on SQLite, but it works perfectly on postgresql:
cte = select(
values(
column("id", Integer), column("ticker", String), name="subq"
).data(list(zip(range(len(search_list)), search_list)))
).cte("cte")
qq = (
select(tickers.c.description)
.join(cte, cte.c.ticker == tickers.c.ticker)
.order_by(cte.c.id)
)
records = conn.execute(qq)
print(records.fetchall())

SQLAlchemy: how to work with month intervals using timedelta

I have a table in PostgreSQL with interval field. There is a possibility that someone may want to store something like INTERVAL '1 MONTH' in this table. In my Python application, I have a timedelta object which is substituted to a query string:
with sqla_engine.connect() as conn:
# 'params' contains parametrised SQL where one of the fields is a timedelta object
return conn.execute(text(query).execution_options(autocommit=autocommit), params)
I want to replace my timedelta object with something that is translated as INTERVAL '1 MONTH' by SQLAlchemy Engine. Is that possible?
And, in reverse, how can I read interval '1 month' value from PostgreSQL into something usable in Python?
I want to replace my timedelta object with something that is translated as INTERVAL '1 MONTH' by SQLAlchemy Engine. Is that possible?
PostgreSQL accepts string values for interval columns so this works:
from sqlalchemy import (
create_engine,
Table,
MetaData,
Column,
Integer,
text,
Interval,
)
engine = create_engine(
"postgresql://scott:tiger#192.168.0.199/test", echo=True
)
tbl = Table(
"tbl",
MetaData(),
Column("id", Integer, primary_key=True, autoincrement=False),
Column("intrvl", Interval()),
)
tbl.drop(engine, checkfirst=True)
tbl.create(engine)
"""SQL emitted:
CREATE TABLE tbl (
id INTEGER NOT NULL,
intrvl INTERVAL,
PRIMARY KEY (id)
)
"""
with engine.begin() as conn:
conn.execute(tbl.insert(), {"id": 1, "intrvl": "1 MONTH"})
"""SQL emitted:
2021-03-29 17:32:27,427 INFO sqlalchemy.engine.Engine INSERT INTO tbl (id, intrvl) VALUES (%(id)s, %(intrvl)s)
2021-03-29 17:32:27,428 INFO sqlalchemy.engine.Engine [generated in 0.00032s] {'id': 1, 'intrvl': '1 MONTH'}
"""
and if we query the table from psql we can see that the value has been stored:
gord#gord-dv7-xubuntu0:~$ psql -h 192.168.0.199 test scott
Password for user scott:
psql (12.6 (Ubuntu 12.6-1.pgdg18.04+1), server 12.3 (Debian 12.3-1.pgdg100+1))
Type "help" for help.
test=# select * from tbl;
id | intrvl
----+--------
1 | 1 mon
(1 row)
And, in reverse, how can I read interval '1 month' value from PostgreSQL into something usable in Python?
psycopg2 will return the value as a timedelta, but timedelta does not support months= so it just assumes 30 days:
results = conn.execute(text("SELECT * FROM tbl")).fetchall()
print(results)
# [(1, datetime.timedelta(days=30))]
Update:
Is there a way to make a correct transformation, translate it into string value "1 month" for example?
Your SQL query could ask for cast(intrvl as varchar(50)) as intrvl_str to get back a string, and if you wanted to make that automatic you could define intrvl_str as a Computed (generated) column in the table:
tbl = Table(
"tbl",
MetaData(),
Column("id", Integer, primary_key=True, autoincrement=False),
Column("intrvl", Interval()),
Column("intrvl_str", String(50), Computed("cast (intrvl as varchar(50))")),
)
tbl.drop(engine, checkfirst=True)
tbl.create(engine)
"""SQL emitted:
CREATE TABLE tbl (
id INTEGER NOT NULL,
intrvl INTERVAL,
intrvl_str VARCHAR(50) GENERATED ALWAYS AS (cast (intrvl as varchar(50))) STORED,
PRIMARY KEY (id)
)
"""
with engine.begin() as conn:
conn.execute(tbl.insert(), {"id": 1, "intrvl": "1 MONTH"})
"""SQL emitted: (same as before)
2021-03-29 17:32:27,427 INFO sqlalchemy.engine.Engine INSERT INTO tbl (id, intrvl) VALUES (%(id)s, %(intrvl)s)
2021-03-29 17:32:27,428 INFO sqlalchemy.engine.Engine [generated in 0.00032s] {'id': 1, 'intrvl': '1 MONTH'}
"""
results = conn.execute(text("SELECT * FROM tbl")).fetchall()
print(results)
# [(1, datetime.timedelta(days=30), '1 mon')]

Bulk Saving and Updating while returning IDs

So I'm using sqlalchemy for a project I'm working on. I've got an issue where I will eventually have thousands of records that need to be saved every hour. These records may be inserted or updated. I've been using bulk_save_objects for this and it's worked great. However now I have to introduce a history to these records being saved, which means I need the IDs returned so I can link these entries to an entry in a history table. I know about using return_defaults, and that works. However, it introduces a problem that my bulk_save_objects inserts and updates one entry at a time, instead of in bulk, which removes the purpose. Is there another option, where I can bulk insert and update at the same time, but retain the IDs?
The desired result can be achieved using a technique similar to the one described in the answer here by uploading the rows to a temporary table and then performing an UPDATE followed by an INSERT that returns the inserted ID values. For SQL Server, that would be an OUTPUT clause on the INSERT statement:
main_table = "team"
# <set up test environment>
with engine.begin() as conn:
conn.execute(sa.text(f"DROP TABLE IF EXISTS [{main_table}]"))
conn.execute(
sa.text(
f"""
CREATE TABLE [dbo].[{main_table}](
[id] [int] IDENTITY(1,1) NOT NULL,
[prov] [varchar](2) NOT NULL,
[city] [varchar](50) NOT NULL,
[name] [varchar](50) NOT NULL,
[comments] [varchar](max) NULL,
CONSTRAINT [PK_team] PRIMARY KEY CLUSTERED
(
[id] ASC
)
)
"""
)
)
conn.execute(
sa.text(
f"""
CREATE UNIQUE NONCLUSTERED INDEX [UX_team_prov_city] ON [dbo].[{main_table}]
(
[prov] ASC,
[city] ASC
)
"""
)
)
conn.execute(
sa.text(
f"""
INSERT INTO [{main_table}] ([prov], [city], [name])
VALUES ('AB', 'Calgary', 'Flames')
"""
)
)
# <data for upsert>
df = pd.DataFrame(
[
("AB", "Calgary", "Flames", "hard-working, handsome lads"),
("AB", "Edmonton", "Oilers", "ruffians and scalawags"),
],
columns=["prov", "city", "name", "comments"],
)
# <perform upsert, returning IDs>
temp_table = "#so65525098"
with engine.begin() as conn:
df.to_sql(temp_table, conn, index=False, if_exists="replace")
conn.execute(
sa.text(
f"""
UPDATE main SET main.name = temp.name,
main.comments = temp.comments
FROM [{main_table}] main INNER JOIN [{temp_table}] temp
ON main.prov = temp.prov AND main.city = temp.city
"""
)
)
inserted = conn.execute(
sa.text(
f"""
INSERT INTO [{main_table}] (prov, city, name, comments)
OUTPUT INSERTED.prov, INSERTED.city, INSERTED.id
SELECT prov, city, name, comments FROM [{temp_table}] temp
WHERE NOT EXISTS (
SELECT * FROM [{main_table}] main
WHERE main.prov = temp.prov AND main.city = temp.city
)
"""
)
).fetchall()
print(inserted)
"""console output:
[('AB', 'Edmonton', 2)]
"""
# <check results>
with engine.begin() as conn:
pprint(conn.execute(sa.text(f"SELECT * FROM {main_table}")).fetchall())
"""console output:
[(1, 'AB', 'Calgary', 'Flames', 'hard-working, handsome lads'),
(2, 'AB', 'Edmonton', 'Oilers', 'ruffians and scalawags')]
"""

How to automap the result set of a custom SQL query in SQLAlchemy

I'd like to run raw SQL queries through SQLAlchemy and have the resulting rows use python types which are automatically mapped from the database type. This AutoMap functionality is available for tables in the database. Is it available for any arbitrary resultset?
As an example, we build small sqlite database:
import sqlite3
con = sqlite3.connect('test.db')
cur = con.cursor()
cur.execute("CREATE TABLE Trainer (id INTEGER PRIMARY KEY, first_name VARCHAR(50), last_name VARCHAR(50), dob DATE, tiger_skill FLOAT);")
cur.execute("INSERT INTO Trainer VALUES (1, 'Joe', 'Exotic', '1963-03-05', 0.6)")
cur.execute("INSERT INTO Trainer VALUES (2, 'Carole', 'Baskin', '1961-06-06', 0.3)")
cur.close()
con.commit()
con.close()
And uing SQLAlchemy, I query the newly created database "test.db":
from sqlalchemy import create_engine
engine = create_engine("sqlite:///test.db")
connection = engine.connect()
CUSTOM_SQL_QUERY = "SELECT count(*) as total_trainers, min(dob) as first_dob from Trainer"
result = connection.execute(CUSTOM_SQL_QUERY)
for r in result:
print(r)
>>> (2, '1961-06-06')
Notice that the second column in the result set is a python string, not a python datetime.date object. Is there a way for sqlalchemy to automap an arbitrary result set? Or is this automap reflection capability limited to just actual tables in the database?

How can I let the id be auto generated in my database?

Here is my code:
import sqlite3
def insert(fields=(), values=()):
connection = sqlite3.connect('database.db')
# g.db is the database connection
cur = connection.cursor()
query = 'INSERT INTO this_database (%s) VALUES (%s)' % (
', '.join(fields),
', '.join(['?'] * len(values))
)
cur.execute(query, values)
connection.commit()
id = cur.lastrowid
cur.close()
print (id)
test example:
insert(fields = ("id", "file_name", "url", "time", "type", "description"), values = (2, "file1", "wwww.test.com", "1", "photo", "my first database test"))
I don't want to give the id manually.
I want it to add it+1 automatically.
How can I do that?
You have an INTEGER PRIMARY KEY column, which, if you leave it out when inserting items, automatically increments:
INSERT INTO this_database(file_name, url, time, type, description)
VALUES (?,?,?,?,?)
Since id is omitted, every time you insert a value using the above statement, it's automatically assigned a number by sqlite.
The documentation explaining this.

Categories