How to check inserting row on duplicity based on multiple attributes? - python

I'm working in SQLAlchemy. Is it possible to set equality of two rows so if the row is being inserted and there exists a row with the same 2 columns already , lets say 'creation_date' and 'destination_from', then the second row wont be inserted?
I don't want to create a PRIMARY KEY on those columns.
I suppose that checking manually whether there is already a row with those columns is unefficient.
# -*- coding: utf-8 -*-
from sqlalchemy import create_engine, ForeignKey
from sqlalchemy import Column
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
import datetime
engine = create_engine('sqlite:///db.db', echo=False)
Base = declarative_base()
s = sqlalchemy.orm.Session(engine)
class Flight(Base):
__tablename__ = 'flights'
id = Column(sqlalchemy.Integer, primary_key=True)
destination_from = Column(sqlalchemy.String)
destination_to = Column(sqlalchemy.String)
creation_date = Column(sqlalchemy.Date)
start_date = Column(sqlalchemy.Date)
return_date = Column(sqlalchemy.Date)
price = Column(sqlalchemy.Float)
Base.metadata.create_all(engine)
def insert_into_flights(**kwargs):
s.add(Flight(**kwargs))
s.commit()

You don't want to create a PRIMARY KEY, but can't you create a UNIQUE CONSTRAINT?
UniqueConstraint('creation_date', 'destination_from')

Related

SQLalchemy with column names starting and ending with underscores

Set RDBMS_URI env var to a connection string like postgresql://username:password#host/database, then on Python 3.9 with PostgreSQL 15 and SQLalchemy 1.14 run:
from os import environ
from sqlalchemy import Boolean, Column, Identity, Integer
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class Tbl(Base):
__tablename__ = 'Tbl'
__has_error__ = Column(Boolean)
id = Column(Integer, primary_key=True, server_default=Identity())
engine = create_engine(environ["RDBMS_URI"])
Base.metadata.create_all(engine)
Checking the database:
=> \d "Tbl"
Table "public.Tbl"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+----------------------------------
id | integer | | not null | generated by default as identity
Indexes:
"Tbl_pkey" PRIMARY KEY, btree (id)
How do I force the column names with double underscore to work?
I believe that the declarative machinery explicitly excludes attributes whose names start with a double underscore from the mapping process (based on this and this). Consequently your __has_error__ column is not created in the target table.
There are at least two possible workarounds. Firstly, you could give the model attribute a different name, for example:
_has_error = Column('__has_error__', BOOLEAN)
This will create the database column __has_attr__, accessed through Tbl._has_error*.
If you want the model's attribute to be __has_error__, then you can achieve this by using an imperative mapping.
import sqlalchemy as sa
from sqlalchemy import orm
mapper_registry = orm.registry()
tbl = sa.Table(
'tbl',
mapper_registry.metadata,
sa.Column('__has_error__', sa.Boolean),
sa.Column(
'id', sa.Integer, primary_key=True, server_default=sa.Identity()
),
)
class Tbl:
pass
mapper_registry.map_imperatively(Tbl, tbl)
mapper_registry.metadata.create_all(engine)
* I tried using a synonym to map __has_error__ to _has_error but it didn't seem to work. It probably gets exluded in the mapper as well, but I didn't investigate further.

SQLAlchemy + Pandas: saving array of strings to Postgres saves them as array of chars

I am trying to save an array of strings to Postgres but when I check, the array of strings is saved as an array of chars. Example using sqlalchemy for my database engine
df = pd.read_csv('data.csv')
df.to_sql('tablename', dtypes={'array_col':sqlalchemy.dialects.postgresql.Array(sqlalchemy.dialects.postgresql.text)})
when I query for 'array_col', I'm expecting this:
['one','two']
What I get is this
['','o','n','e','','t','w','o']
I think you need to convert the string from the csv into an array first using converters argument in read_csv() before calling to_sql.
This example assumes they are stored separated by commas themselves. I used hobbies as the name of my array column. Once the value was converted to a list I did not seem to pass dtype to to_sql but if you still need that then you would use dtype={'hobbies': ARRAY(TEXT)} in this example. I think ARRAY and TEXT are the correct types, not to be confused with array or text. I define my column array type in my sqlalchemy model below.
import sys
from io import StringIO
from sqlalchemy import (
create_engine,
Integer,
String,
)
from sqlalchemy.schema import (
Column,
)
from sqlalchemy.sql import select
from sqlalchemy.orm import declarative_base
import pandas as pd
from sqlalchemy.dialects.postgresql import ARRAY, TEXT
Base = declarative_base()
username, password, db = sys.argv[1:4]
engine = create_engine(f"postgresql+psycopg2://{username}:{password}#/{db}", echo=False)
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String(8), index=True)
hobbies = Column(ARRAY(TEXT))
Base.metadata.create_all(engine)
csv_content = '''"name","hobbies"
1,"User 1","running,jumping"
2,"User 2","sitting,sleeping"
'''
with engine.begin() as conn:
def convert_to_array(v):
return [s.strip() for s in v.split(',') if s.strip()]
content = StringIO(csv_content)
df = pd.read_csv(content, converters={'hobbies': convert_to_array})
df.to_sql('users', schema="public", con=conn, if_exists='append', index_label='id')
with engine.begin() as conn:
for user in conn.execute(select(User)).all():
print(user.id, user.name, user.hobbies)
print ("|".join(user.hobbies))
print (type(user.hobbies))

Exporting a pandas df to sqlite leads to duplicate datasets instead of one updated dataset

I'm uploading a pandas dataframe from a csv file into a sqlite database via sqlalchmemy.
The initial filling is working just fine, but when I rerun the following code, the same data is exported again and the database contains two identical datasets.
How can I change the code, so that only new or changed data is uploaded into the database?
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, DateTime
from sqlalchemy.orm import sessionmaker
from datetime import datetime
import pandas as pd
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
engine = create_engine('sqlite:///historical_data3.db')
conn = engine.connect()
Base = declarative_base()
# Declaration of the class in order to write into the database. This structure is standard and should align with SQLAlchemy's doc.
class Timeseries_Values(Base):
__tablename__ = 'Timeseries_Values'
#id = Column(Integer)
Date = Column(DateTime, primary_key=True)
ProductID = Column(Integer, primary_key=True)
Value = Column(Numeric)
#property
def __repr__(self):
return "(Date='%s', ProductID='%s', Value='%s')" % (self.Date, self.ProductID, self.Value)
fileToRead = r'V:\PYTHON\ProjectDatabase\HistoricalDATA_V13.csv'
tableToWriteTo = 'Timeseries_Values'
# Panda to create a dataframe with ; as separator.
df = pd.read_csv(fileToRead, sep=';', decimal=',', parse_dates=['Date'], dayfirst=True)
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.
listToWrite = df.to_dict(orient='records')
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
metadata = sqlalchemy.schema.MetaData(bind=engine, reflect=True)
table = sqlalchemy.Table(tableToWriteTo, metadata, autoload=True)
# Open the session
Session = sessionmaker(bind=engine)
session = Session()
# Insert the dataframe into the database in one bulk
conn.execute(table.insert(), listToWrite)
# Commit the changes
session.commit()
# Close the session
session.close()
This is working now, I 've added the df.to_sql code:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, DateTime
from sqlalchemy.orm import sessionmaker
from datetime import datetime
import pandas as pd
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
engine = create_engine('sqlite:///historical_data3.db')
conn = engine.connect()
Base = declarative_base()
# Declaration of the class in order to write into the database. This structure is standard and should align with SQLAlchemy's doc.
class Timeseries_Values(Base):
__tablename__ = 'Timeseries_Values'
#id = Column(Integer)
Date = Column(DateTime, primary_key=True)
ProductID = Column(Integer, primary_key=True)
Value = Column(Numeric)
fileToRead = r'V:\PYTHON\ProjectDatabase\HistoricalDATA_V13.csv'
tableToWriteTo = 'Timeseries_Values'
# Panda to create a dataframe with ; as separator.
df = pd.read_csv(fileToRead, sep=';', decimal=',', parse_dates=['Date'], dayfirst=True)
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.
listToWrite = df.to_dict(orient='records')
df.to_sql(name='Timeseries_Values', con=conn, if_exists='replace')
metadata = sqlalchemy.schema.MetaData(bind=engine, reflect=True)
table = sqlalchemy.Table(tableToWriteTo, metadata, autoload=True)
# Open the session
Session = sessionmaker(bind=engine)
session = Session()
# Insert the dataframe into the database in one bulk
conn.execute(table.insert(), listToWrite)
# Commit the changes
session.commit()
# Close the session
session.close()

How do I alter two different column headers of a pre-existing database table in sqlalchemy?

I am using sqlalchemy to reflect the columns of a table in a mysql database into a python script. This is a database I have inherited and some of the column headers for the table have spaces in eg "Chromosome Position". A couple of the column headers also are strings which start with a digit eg "1st time".
I would like to alters these headers so that spaces are replaced with underscores and there are no digits at the beginning of the column header string eg "1st time" becomes "firsttime". I followed the advice given sqlalchemy - reflecting tables and columns with spaces which partially solved my problem.
from sqlalchemy import create_engine, Column, event, MetaData
from sqlalchemy.ext.declarative import declarative_base, DeferredReflection
from sqlalchemy.orm import sessionmaker, Session
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.schema import Table
from twisted.python import reflect
Base = automap_base()
engine = create_engine('mysql://username:password#localhost/variants_database', echo=False)
#Using a reflection event to access the column attributes
#event.listens_for(Table, "column_reflect")
def reflect_col(inspector, table, column_info):
column_info['key'] = column_info['name'].replace(' ', '_')
metadata = MetaData()
session = Session(engine)
class Variants(Base):
__table__ = Table("variants", Base.metadata, autoload=True, autoload_with=engine)
Base.prepare(engine, reflect=True)
session = Session(engine)
a = session.query(Variants).filter(Variants.Gene == "AGL").first()
print a.Chromosome_Position
This allows me to return the values in a.Chromosome_Position. Likewise if I change the method reflect_col to:
#event.listens_for(Table, "column_reflect")
def reflect_col(inspector, table, column_info):
column_info['key'] = column_info['name'].replace('1st time', 'firsttime')
a = session.query(Variants).filter(Variants.Gene == "AGL").first()
print a.firsttime
This also allow me to return the values in a.firsttime. However I am not able to alter both attributes of the column headers at the same time so changing the method to:
#event.listens_for(Table, "column_reflect")
def reflect_col(inspector, table, column_info):
column_info['key'] = column_info['name'].replace(' ', '_')
column_info['key'] = column_info['name'].replace('1st time', 'secondcheck')
will only modify the last call to column_info which in this case is the column '1st time'. So I can return the values of a.firsttime but not a.Chromosome_Position. How do I change both column name features in the same reflection event?
It seems that you are overwriting the first value after the second replacement. I hope chaining the .replace works:
#event.listens_for(Table, "column_reflect")
def reflect_col(inspector, table, column_info):
column_info['key'] = column_info['name'].replace(' ', '_').replace('1st_time', 'secondcheck')
[EDIT]: You have to also make sure that the changes wouldn't clash.
Because in this example the first change replaces spaces with underscore, you have to adapt the second replacement, as it's already called 1st_time when the second replace is called.

Insert and update with core SQLAlchemy

I have a database that I don't have metadata or orm classes for (the database already exists).
I managed to get the select stuff working by:
from sqlalchemy.sql.expression import ColumnClause
from sqlalchemy.sql import table, column, select, update, insert
from sqlalchemy.ext.declarative import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
import pyodbc
db = create_engine('mssql+pyodbc://pytest')
Session = sessionmaker(bind=db)
session = Session()
list = []
list.append (column("field1"))
list.append (column("field2"))
list.append (column("field3"))
s = select(list)
s.append_from('table')
s.append_whereclause("field1 = 'abc'")
s = s.limit(10)
result = session.execute(s)
out = result.fetchall()
print(out)
So far so good.
The only way I can get an update/insert working is by executing a raw query like:
session.execute(<Some sql>)
I would like to make it so I can make a class out of that like:
u = Update("table")
u.Set("file1","some value")
u.Where(<some conditon>)
seasion.execute(u)
Tried (this is just one of the approaches I tried):
i = insert("table")
v = i.values([{"name":"name1"}, {"name":"name2"}])
u = update("table")
u = u.values({"name": "test1"})
I can't get that to execute on:
session.execute(i)
or
session.execute(u)
Any suggestion how to construct an insert or update without writing ORM models?
As you can see from the SQLAlchemy Overview documentation, sqlalchemy is build with two layers: ORM and Core. Currently you are using only some constructs of the Core and building everything manually.
In order to use Core you should let SQLAlchemy know some meta information about your database in order for it to operate on it. Assuming you have a table mytable with columns field1, field2, field3 and a defined primary key, the code below should perform all the tasks you need:
from sqlalchemy.sql import table, column, select, update, insert
# define meta information
metadata = MetaData(bind=engine)
mytable = Table('mytable', metadata, autoload=True)
# select
s = mytable.select() # or:
#s = select([mytable]) # or (if only certain columns):
#s = select([mytable.c.field1, mytable.c.field2, mytable.c.field3])
s = s.where(mytable.c.field1 == 'abc')
result = session.execute(s)
out = result.fetchall()
print(out)
# insert
i = insert(mytable)
i = i.values({"field1": "value1", "field2": "value2"})
session.execute(i)
# update
u = update(mytable)
u = u.values({"field3": "new_value"})
u = u.where(mytable.c.id == 33)
session.execute(u)

Categories