ORM Model class for Existing table in DB - python

ORM Model class for Existing table in DB from MicrosoftSqlServer, I tried but seems to be wrong this one is not a ORM, I don't know how to write ORM query can some one please help me.
Below is my code ca n some help me in converting this ORM based model class
from sqlalchemy import create_engine, select, MetaData, Table, and_
import json, boto3, os
from tests import test_dir
filename = os.path.join(test_dir, 'config.json')
with open(filename, 'r') as secret_data:
data = json.load(secret_data)
try:
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=data['region_name']
)
get_secret_value_response = client.get_secret_value(
SecretId=data['secret_name']
)
response = json.loads(get_secret_value_response['SecretString'])
except Exception as e:
raise e
SERVER = response['server']
DATABASE = response['database']
DRIVER = 'SQL Server'
USERNAME = response['username']
PASSWORD = response['password'].strip()
DB_URL = f"mssql://{USERNAME}:{PASSWORD}#{SERVER}/{DATABASE}?driver={DRIVER}"
engine = create_engine(DB_URL)
metadata = MetaData(bind=None)
table = Table(
'Alias_Drug_List',
metadata,
autoload=True,
autoload_with=engine
)
newDrug = select([table.columns.Alias,table.columns.NDC]).where(and_(table.columns.NDC == 'New Drug Cycle'))
activeDrug = select([table.columns.Alias])
connection = engine.connect()
new_drug = connection.execute(newDrug).fetchall()
active_drug = connection.execute(activeDrug).fetchall()

Related

perform upsert operation on postgres like pandas to_sql function using python

Before asking this question, I have read many links about UPSERT operation on Postgres:
PostgreSQL Upsert Using INSERT ON CONFLICT statement
Anyway to Upsert database using PostgreSQL in Python
But the question is different from them, since the functionality is different. What I want is to implement something like pandas to_sql function which has the following features:
Automatically creates table
Keeps the data types of each column
The only drawback of to_sql is that it doesn't UPSERT operation on Postgres. Is there anyway to implement the expected functionality (automatically create table based on columns, perform UPSERT operation and keep data types) by passing dataframe to it?
Previously implemented code using Pandas to_sql function:
class PostgreSQL:
def __init__(self):
postgres_config = config_dict[Consts.POSTGRES.value]
self.host = postgres_config[Consts.HOST.value]
self.port = postgres_config[Consts.PORT.value]
self.db_name = postgres_config[Consts.DB_NAME.value]
self.username = postgres_config[Consts.USERNAME.value]
self.password = postgres_config[Consts.PASSWORD.value]
def get_connection(self) -> object:
url_schema = Consts.POSTGRES_URL_SCHEMA.value.format(
self.username, self.password, self.host, self.port, self.db_name
)
try:
engine = create_engine(url_schema)
return engine
except Exception as e:
logger.error('Make sure you have provided correct credentials for the DB connection.')
raise e
def save_df_to_db(self, df: object, table_name: str) -> None:
df.to_sql(table_name, con=self.get_connection(), if_exists='append')
I have written a very generic code that performs UPSERT which is not supported officially in Postgres (until December 2021), using Pandas dataframe and in an efficient way.
By using the following code, it will update the existing primary key otherwise it will create a new table (in case table name doesn't exist) and add new records to the table.
Code:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, Table
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.automap import automap_base
class PostgreSQL:
def __init__(self):
postgres_config = config_dict[Consts.POSTGRES.value]
self.host = postgres_config[Consts.HOST.value]
self.port = postgres_config[Consts.PORT.value]
self.db_name = postgres_config[Consts.DB_NAME.value]
self.username = postgres_config[Consts.USERNAME.value]
self.password = postgres_config[Consts.PASSWORD.value]
def get_connection(self) -> object:
url_schema = 'postgresql://{}:{}#{}:{}/{}'.format(
self.username, self.password, self.host, self.port, self.db_name
)
try:
engine = create_engine(url_schema)
return engine
except Exception as e:
logger.error('Make sure you have provided correct credentials for the DB connection.')
raise e
def run_query(self, query: str) -> list:
engine = self.get_connection()
return engine.execute(query).fetchall()
def save_df_to_db(self, df: object, table_name: str) -> None:
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
engine = self.get_connection()
add_primary_key_query = get_query(root_dir, Directories.COMMON.value, 'add_primary_key.sql', table_name)
table_existence_query = get_query(root_dir, Directories.COMMON.value, 'table_existence.sql', table_name)
if not engine.execute(table_existence_query).first()[0]: # if table does not exist
logger.info('Create table automatically and from scratch!')
df.to_sql(table_name, con=self.get_connection(), if_exists='append')
engine.execute(add_primary_key_query)
else:
try:
df = df.replace("NaT", None)
df = df.replace(pd.NaT, None)
df = df.replace({pd.NaT: None})
df_dict = df.to_dict('records')
except AttributeError as e:
logger.error('Empty Dataframe!')
raise e
with engine.connect() as connection:
logger.info('Table already exists!')
base = automap_base()
base.prepare(engine, reflect=True,)
target_table = Table(table_name, base.metadata,
autoload=True, autoload_with=engine,)
chunks = [df_dict[i:i + 1000] for i in range(0, len(df_dict), 1000)]
for chunk in chunks:
stmt = insert(target_table).values(chunk)
update_dict = {c.name: c for c in stmt.excluded if not c.primary_key}
connection.execute(stmt.on_conflict_do_update(
constraint=f'{table_name}_pkey',
set_=update_dict)
)
logger.info('Saving data is successfully done.')
Table existence query:
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = '{}'
);
Add primary key query:
ALTER TABLE {} add primary key (id);

SqlAlchemy & pyMysql connection pooling on a lambda with multiple DB connections

So the issue is I have multiple databases that I want to use the same Database Pool in SqlAlchemy. This resides on a Lambda and the pool is created upon initiation of the Lambda. I want the subsequent DB connections to use the existing pool.
What works just fine is the initial pool connection bpConnect and any subsequent queries to that connection.
What DOESN'T work is the companyConnect connection. I get the following error:
sqlalchemy.exc.StatementError: (builtins.AttributeError) 'XRaySession' object has no attribute 'cursor'
I have these for my connections:
# Pooling
import sqlalchemy.pool as pool
#################### Engines ###################################################
def bpGetConnection():
engine_endpoint = f"mysql+pymysql://{os.environ['DB_USERNAME']}:{os.environ['DB_PASSWORD']}#{os.environ['DB_HOST']}:{str(os.environ['DB_PORT'])}/{os.environ['database']}"
engine = create_engine(engine_endpoint, echo_pool=True)
session = XRaySessionMaker(bind=engine, autoflush=True, autocommit=False)
db = session()
return db
bpPool = pool.StaticPool(bpGetConnection)
def companyGetConnection(database):
engine_endpoint = f"mysql+pymysql://{os.environ['DB_USERNAME']}:{os.environ['DB_PASSWORD']}#{os.environ['DB_HOST']}:{str(os.environ['DB_PORT'])}/{database}"
compEngine = create_engine(engine_endpoint, pool=bpPool)
session = XRaySessionMaker(bind=compEngine, autoflush=True, autocommit=False)
db = Session()
return db
#################### POOLING #############################################
def bpConnect():
conn = bpPool.connect()
return conn
def companyConnect(database):
conn = companyGetConnection(database)
return conn
#################################################################
They are called in this example:
from connections import companyConnect, bpConnect
from models import Company, Customers
def getCustomers(companyID):
db = bpConnect()
myQuery = db.query(Company).filter(Company.id == companyID).one()
compDB = companyConnect(myQuery.database)
customers = compDB.query(Customers).all()
return customers
I figured out how to do it with dynamic pools on a lambda:
class DBRegistry(object):
_db = {}
def get(self, url, **kwargs):
if url not in self._db:
engine = create_engine(url, **kwargs)
Session = XRaySessionMaker(bind=engine, autoflush=True, autocommit=False)
session = scoped_session(Session)
self._db[url] = session
return self._db[url]
compDB = DBRegistry()
def bpGetConnection():
engine_endpoint = f"mysql+pymysql://{os.environ['DB_USERNAME']}:{os.environ['DB_PASSWORD']}#{os.environ['DB_HOST']}:{str(os.environ['DB_PORT'])}/{os.environ['database']}?charset=utf8"
engine = create_engine(engine_endpoint)
session = XRaySessionMaker(bind=engine, autoflush=True, autocommit=False)
db = session()
return db
bpPool = pool.QueuePool(bpGetConnection, pool_size=500, timeout=11)
def bpConnect():
conn = bpPool.connect()
return conn
def companyConnect(database):
engine_endpoint = f"mysql+pymysql://{os.environ['DB_USERNAME']}:{os.environ['DB_PASSWORD']}#{os.environ['DB_HOST']}:{str(os.environ['DB_PORT'])}/{database}?charset=utf8"
conn = compDB.get(engine_endpoint, poolclass=QueuePool)
return conn
So basically it will use one pool for the constant connection needed on the main database and another pool which it will dynamically change the database it needs. When a connection to one of those company databases is needed, it will check if that pool already exists in the registry of pools. If the pool does not exist it will create one and register it.

Query binary data using sqlalchemy with PostgreSQL

I have a simple database storing an attachment as blob.
CREATE TABLE public.attachment
(
id integer NOT NULL,
attachdata oid,
CONSTRAINT attachment_pkey PRIMARY KEY (id)
)
-- Import a file
INSERT INTO attachment (id, attachdata) VALUES (1, lo_import('C:\\temp\blob_import.txt'))
-- Export back as file.
SELECT lo_export(attachdata, 'C:\temp\blob_export_postgres.txt') FROM attachment WHERE id = 1
I'm able to read this file back using psycopg2 directly.
from psycopg2 import connect
con = connect(dbname="blobtest", user="postgres", password="postgres", host="localhost")
cur = con.cursor()
cur.execute("SELECT attachdata FROM attachment WHERE id = 1")
oid = cur.fetchone()[0]
obj = con.lobject(oid)
obj.export('C:\\temp\\blob_export_psycopg.txt')
When I try the same using sqlalchemy, the attachdata is a bytestring of zeros.
I've tested the following code with types like BLOB, LargeBinary and BINARY.
The size of attachdata bytstring seems to be the OIDs value.
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, Binary
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
Session = sessionmaker()
engine = create_engine('postgresql://postgres:postgres#localhost:5432/blobtest', echo=True)
Base.metadata.create_all(engine)
Session.configure(bind=engine)
class Attachment(Base):
__tablename__ ="attachment"
id = Column(Integer, primary_key=True)
attachdata = Column(Binary)
session = Session()
attachment = session.query(Attachment).get(1)
with open('C:\\temp\\blob_export_sqlalchemy.txt', 'wb') as f:
f.write(attachment.attachdata)
I've searched the sqlalchemy documentation and various sources and couldn't find a solution how to export the binary data using sqlalchemy.
I had the same problem. There seems to be no way to get the large object data via the ORM. So I combined the ORM and the psycopg2 engine like this:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID
Base = declarative_base()
session_factory = sessionmaker()
engine = create_engine('postgresql+psycopg2://postgres:postgres#localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)
class Attachment(Base):
__tablename__ ="attachment"
id = Column(Integer, primary_key=True)
oid = Column(OID)
#classmethod
def insert_file(cls, filename):
conn = engine.raw_connection()
l_obj = conn.lobject(0, 'wb', 0)
with open(filename, 'rb') as f:
l_obj.write(f.read())
conn.commit()
conn.close()
session = Session()
attachment = cls(oid=l_obj.oid)
session.add(attachment)
session.commit()
return attachment.id
#classmethod
def get_file(cls, attachment_id, filename):
session = Session()
attachment = session.query(Attachment).get(attachment_id)
conn = engine.raw_connection()
l_obj = conn.lobject(attachment.oid, 'rb')
with open(filename, 'wb') as f:
f.write(l_obj.read())
conn.close()
if __name__ == '__main__':
my_id = Attachment.insert_file(r'C:\path\to\file')
Attachment.get_file(my_id, r'C:\path\to\file_out')
Not very elegant but it seems to work.
Update:
I am using events now
from sqlalchemy import create_engine, event
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID
Base = declarative_base()
session_factory = sessionmaker()
engine = create_engine('postgresql+psycopg2://postgres:postgres#localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)
class Data(Base):
__tablename__ = "attachment"
id = Column(Integer, primary_key=True)
oid = Column(OID)
#event.listens_for(Data, 'after_delete')
def remove_large_object_after_delete(_, connection, target):
raw_connection = connection.connection
l_obj = raw_connection.lobject(target.oid, 'n')
l_obj.unlink()
raw_connection.commit()
#event.listens_for(Data, 'before_insert')
def add_large_object_before_insert(_, connection, target):
raw_connection = connection.connection
l_obj = raw_connection.lobject(0, 'wb', 0)
target.oid = l_obj.oid
l_obj.write(target.ldata)
raw_connection.commit()
#event.listens_for(Data, 'load')
def inject_large_object_after_load(target, _):
session = object_session(target)
conn = session.get_bind().raw_connection()
l_obj = conn.lobject(target.oid, 'rb')
target.ldata = l_obj.read()
if __name__ == '__main__':
session = Session()
# Put
data = Data()
data.ldata = 'your large data'
session.add(data)
session.commit()
id = data.id
# Get
data2 = session.query(Data).get(id)
print(data.ldata) # Your large data is here
# Delete
session.delete(data)
session.delete(data2)
session.commit()
session.flush()
session.close()
Works good so far.
I don't understand why postgres large objects get so neglected these days. I use them a ton. Or let's say I want to but it's challenging especially in asyncio....

Autoflush error and filter_by() query giving unexpected result

My goal is to read data off of an excel sheet and create a database on a SQL server. I am trying to write a sample code using SQLalchemy and I am new to it. The code that I have so far is:
import time
from sqlalchemy import create_engine, Column, Integer, Date, String, Table, MetaData,table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine('sqlite:///:memory:', echo = False)
Base = declarative_base()
class blc(Base):
__tablename__ = 'BLC_Databse'
date = Column(String, primary_key = True)
RES = Column(String)
BTTLCOLUMN = Column(String)
CS_HR = Column(Integer)
Base.metadata.create_all(engine)
sample = blc(date=time.strftime("%m/%d/%y") , RES = 'BDY_21', BTTLCOLUMN = '2075', CS_HR = 563)
Session = sessionmaker(bind=engine)
session = Session()
sample2 = blc(date=time.strftime("%m/%d/%y") , RES = 'BDY_21', BTTLCOLUMN = '2076', CS_HR = 375)
session.add(sample2)
session.commit()
with session.no_autoflush:
result = session.query(blc).filter_by(RES = 'BDY_21').first()
print(result)
When I am performing a filter query (which I am assuming it is similar to where clause in SQL) it gives <__main__.blc object at 0x00705770> error
Eventually, I plan to have the insert clause on a loop and it will read data from an excel sheet.
Result is an object that references the class blc. To get the desired column, I had to do result.ColName.

Converting SQL commands to Python's ORM

How would you convert the following codes to Python's ORM such as by SQLalchemy?
#1 Putting data to Pg
import os, pg, sys, re, psycopg2
#conn = psycopg2.connect("dbname='tkk' host='localhost' port='5432' user='noa' password='123'")
conn = psycopg2.connect("dbname=tk user=naa password=123")
cur = conn.cursor()
cur.execute("""INSERT INTO courses (course_nro)
VALUES ( %(course_nro)s )""", dict(course_nro='abcd'))
conn.commit()
#2 Fetching
cur.execute("SELECT * FROM courses")
print cur.fetchall()
Examples about the two commands in SQLalchemy
insert
sqlalchemy.sql.expression.insert(table, values=None, inline=False, **kwargs)
select
sqlalchemy.sql.expression.select(columns=None, whereclause=None, from_obj=[], **kwargs)
After the initial declarations, you can do something like this:
o = Course(course_nro='abcd')
session.add(o)
session.commit()
and
print session.query(Course).all()
The declarations could look something like this:
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import session_maker
# create an engine, and a base class
engine = create_engine('postgre://naa:123#localhost/tk')
DeclarativeBase = declarative_base(bind=engine)
metadata = DeclarativeBase.metadata
# create a session
Session = session_maker(engine)
session = Session()
# declare the models
class Cource(DelcarativeBase):
__tablename__ = 'courses'
course_nro = Column('course_nro', CHAR(12))
This declarative method is just one way of using sqlalchemy.
Even though this is old, more examples can't hurt, right? I thought I'd demonstrate how to do this with PyORMish.
from pyormish import Model
class Course(Model):
_TABLE_NAME = 'courses'
_PRIMARY_FIELD = 'id' # or whatever your primary field is
_SELECT_FIELDS = ('id','course_nro')
_COMMIT_FIELDS = ('course_nro',)
Model.db_config = dict(
DB_TYPE='postgres',
DB_CONN_STRING='postgre://naa:123#localhost/tk'
)
To create:
new_course = Course().create(course_nro='abcd')
To select:
# return the first row WHERE course_nro='abcd'
new_course = Course().get_by_fields(course_nro='abcd')

Categories