Query binary data using sqlalchemy with PostgreSQL - python

I have a simple database storing an attachment as blob.
CREATE TABLE public.attachment
(
id integer NOT NULL,
attachdata oid,
CONSTRAINT attachment_pkey PRIMARY KEY (id)
)
-- Import a file
INSERT INTO attachment (id, attachdata) VALUES (1, lo_import('C:\\temp\blob_import.txt'))
-- Export back as file.
SELECT lo_export(attachdata, 'C:\temp\blob_export_postgres.txt') FROM attachment WHERE id = 1
I'm able to read this file back using psycopg2 directly.
from psycopg2 import connect
con = connect(dbname="blobtest", user="postgres", password="postgres", host="localhost")
cur = con.cursor()
cur.execute("SELECT attachdata FROM attachment WHERE id = 1")
oid = cur.fetchone()[0]
obj = con.lobject(oid)
obj.export('C:\\temp\\blob_export_psycopg.txt')
When I try the same using sqlalchemy, the attachdata is a bytestring of zeros.
I've tested the following code with types like BLOB, LargeBinary and BINARY.
The size of attachdata bytstring seems to be the OIDs value.
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, Binary
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
Session = sessionmaker()
engine = create_engine('postgresql://postgres:postgres#localhost:5432/blobtest', echo=True)
Base.metadata.create_all(engine)
Session.configure(bind=engine)
class Attachment(Base):
__tablename__ ="attachment"
id = Column(Integer, primary_key=True)
attachdata = Column(Binary)
session = Session()
attachment = session.query(Attachment).get(1)
with open('C:\\temp\\blob_export_sqlalchemy.txt', 'wb') as f:
f.write(attachment.attachdata)
I've searched the sqlalchemy documentation and various sources and couldn't find a solution how to export the binary data using sqlalchemy.

I had the same problem. There seems to be no way to get the large object data via the ORM. So I combined the ORM and the psycopg2 engine like this:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID
Base = declarative_base()
session_factory = sessionmaker()
engine = create_engine('postgresql+psycopg2://postgres:postgres#localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)
class Attachment(Base):
__tablename__ ="attachment"
id = Column(Integer, primary_key=True)
oid = Column(OID)
#classmethod
def insert_file(cls, filename):
conn = engine.raw_connection()
l_obj = conn.lobject(0, 'wb', 0)
with open(filename, 'rb') as f:
l_obj.write(f.read())
conn.commit()
conn.close()
session = Session()
attachment = cls(oid=l_obj.oid)
session.add(attachment)
session.commit()
return attachment.id
#classmethod
def get_file(cls, attachment_id, filename):
session = Session()
attachment = session.query(Attachment).get(attachment_id)
conn = engine.raw_connection()
l_obj = conn.lobject(attachment.oid, 'rb')
with open(filename, 'wb') as f:
f.write(l_obj.read())
conn.close()
if __name__ == '__main__':
my_id = Attachment.insert_file(r'C:\path\to\file')
Attachment.get_file(my_id, r'C:\path\to\file_out')
Not very elegant but it seems to work.
Update:
I am using events now
from sqlalchemy import create_engine, event
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID
Base = declarative_base()
session_factory = sessionmaker()
engine = create_engine('postgresql+psycopg2://postgres:postgres#localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)
class Data(Base):
__tablename__ = "attachment"
id = Column(Integer, primary_key=True)
oid = Column(OID)
#event.listens_for(Data, 'after_delete')
def remove_large_object_after_delete(_, connection, target):
raw_connection = connection.connection
l_obj = raw_connection.lobject(target.oid, 'n')
l_obj.unlink()
raw_connection.commit()
#event.listens_for(Data, 'before_insert')
def add_large_object_before_insert(_, connection, target):
raw_connection = connection.connection
l_obj = raw_connection.lobject(0, 'wb', 0)
target.oid = l_obj.oid
l_obj.write(target.ldata)
raw_connection.commit()
#event.listens_for(Data, 'load')
def inject_large_object_after_load(target, _):
session = object_session(target)
conn = session.get_bind().raw_connection()
l_obj = conn.lobject(target.oid, 'rb')
target.ldata = l_obj.read()
if __name__ == '__main__':
session = Session()
# Put
data = Data()
data.ldata = 'your large data'
session.add(data)
session.commit()
id = data.id
# Get
data2 = session.query(Data).get(id)
print(data.ldata) # Your large data is here
# Delete
session.delete(data)
session.delete(data2)
session.commit()
session.flush()
session.close()
Works good so far.
I don't understand why postgres large objects get so neglected these days. I use them a ton. Or let's say I want to but it's challenging especially in asyncio....

Related

ORM Model class for Existing table in DB

ORM Model class for Existing table in DB from MicrosoftSqlServer, I tried but seems to be wrong this one is not a ORM, I don't know how to write ORM query can some one please help me.
Below is my code ca n some help me in converting this ORM based model class
from sqlalchemy import create_engine, select, MetaData, Table, and_
import json, boto3, os
from tests import test_dir
filename = os.path.join(test_dir, 'config.json')
with open(filename, 'r') as secret_data:
data = json.load(secret_data)
try:
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=data['region_name']
)
get_secret_value_response = client.get_secret_value(
SecretId=data['secret_name']
)
response = json.loads(get_secret_value_response['SecretString'])
except Exception as e:
raise e
SERVER = response['server']
DATABASE = response['database']
DRIVER = 'SQL Server'
USERNAME = response['username']
PASSWORD = response['password'].strip()
DB_URL = f"mssql://{USERNAME}:{PASSWORD}#{SERVER}/{DATABASE}?driver={DRIVER}"
engine = create_engine(DB_URL)
metadata = MetaData(bind=None)
table = Table(
'Alias_Drug_List',
metadata,
autoload=True,
autoload_with=engine
)
newDrug = select([table.columns.Alias,table.columns.NDC]).where(and_(table.columns.NDC == 'New Drug Cycle'))
activeDrug = select([table.columns.Alias])
connection = engine.connect()
new_drug = connection.execute(newDrug).fetchall()
active_drug = connection.execute(activeDrug).fetchall()

Exporting a pandas df to sqlite leads to duplicate datasets instead of one updated dataset

I'm uploading a pandas dataframe from a csv file into a sqlite database via sqlalchmemy.
The initial filling is working just fine, but when I rerun the following code, the same data is exported again and the database contains two identical datasets.
How can I change the code, so that only new or changed data is uploaded into the database?
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, DateTime
from sqlalchemy.orm import sessionmaker
from datetime import datetime
import pandas as pd
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
engine = create_engine('sqlite:///historical_data3.db')
conn = engine.connect()
Base = declarative_base()
# Declaration of the class in order to write into the database. This structure is standard and should align with SQLAlchemy's doc.
class Timeseries_Values(Base):
__tablename__ = 'Timeseries_Values'
#id = Column(Integer)
Date = Column(DateTime, primary_key=True)
ProductID = Column(Integer, primary_key=True)
Value = Column(Numeric)
#property
def __repr__(self):
return "(Date='%s', ProductID='%s', Value='%s')" % (self.Date, self.ProductID, self.Value)
fileToRead = r'V:\PYTHON\ProjectDatabase\HistoricalDATA_V13.csv'
tableToWriteTo = 'Timeseries_Values'
# Panda to create a dataframe with ; as separator.
df = pd.read_csv(fileToRead, sep=';', decimal=',', parse_dates=['Date'], dayfirst=True)
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.
listToWrite = df.to_dict(orient='records')
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
metadata = sqlalchemy.schema.MetaData(bind=engine, reflect=True)
table = sqlalchemy.Table(tableToWriteTo, metadata, autoload=True)
# Open the session
Session = sessionmaker(bind=engine)
session = Session()
# Insert the dataframe into the database in one bulk
conn.execute(table.insert(), listToWrite)
# Commit the changes
session.commit()
# Close the session
session.close()
This is working now, I 've added the df.to_sql code:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, DateTime
from sqlalchemy.orm import sessionmaker
from datetime import datetime
import pandas as pd
# Set up of the engine to connect to the database
# the urlquote is used for passing the password which might contain special characters such as "/"
engine = create_engine('sqlite:///historical_data3.db')
conn = engine.connect()
Base = declarative_base()
# Declaration of the class in order to write into the database. This structure is standard and should align with SQLAlchemy's doc.
class Timeseries_Values(Base):
__tablename__ = 'Timeseries_Values'
#id = Column(Integer)
Date = Column(DateTime, primary_key=True)
ProductID = Column(Integer, primary_key=True)
Value = Column(Numeric)
fileToRead = r'V:\PYTHON\ProjectDatabase\HistoricalDATA_V13.csv'
tableToWriteTo = 'Timeseries_Values'
# Panda to create a dataframe with ; as separator.
df = pd.read_csv(fileToRead, sep=';', decimal=',', parse_dates=['Date'], dayfirst=True)
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.
listToWrite = df.to_dict(orient='records')
df.to_sql(name='Timeseries_Values', con=conn, if_exists='replace')
metadata = sqlalchemy.schema.MetaData(bind=engine, reflect=True)
table = sqlalchemy.Table(tableToWriteTo, metadata, autoload=True)
# Open the session
Session = sessionmaker(bind=engine)
session = Session()
# Insert the dataframe into the database in one bulk
conn.execute(table.insert(), listToWrite)
# Commit the changes
session.commit()
# Close the session
session.close()

Autoflush error and filter_by() query giving unexpected result

My goal is to read data off of an excel sheet and create a database on a SQL server. I am trying to write a sample code using SQLalchemy and I am new to it. The code that I have so far is:
import time
from sqlalchemy import create_engine, Column, Integer, Date, String, Table, MetaData,table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine('sqlite:///:memory:', echo = False)
Base = declarative_base()
class blc(Base):
__tablename__ = 'BLC_Databse'
date = Column(String, primary_key = True)
RES = Column(String)
BTTLCOLUMN = Column(String)
CS_HR = Column(Integer)
Base.metadata.create_all(engine)
sample = blc(date=time.strftime("%m/%d/%y") , RES = 'BDY_21', BTTLCOLUMN = '2075', CS_HR = 563)
Session = sessionmaker(bind=engine)
session = Session()
sample2 = blc(date=time.strftime("%m/%d/%y") , RES = 'BDY_21', BTTLCOLUMN = '2076', CS_HR = 375)
session.add(sample2)
session.commit()
with session.no_autoflush:
result = session.query(blc).filter_by(RES = 'BDY_21').first()
print(result)
When I am performing a filter query (which I am assuming it is similar to where clause in SQL) it gives <__main__.blc object at 0x00705770> error
Eventually, I plan to have the insert clause on a loop and it will read data from an excel sheet.
Result is an object that references the class blc. To get the desired column, I had to do result.ColName.

mysql Compress() with sqlalchemy

table:
id(integer primary key)
data(blob)
I use mysql and sqlalchemy.
To insert data I use:
o = Demo()
o.data = mydata
session.add(o)
session.commit()
I would like to insert to table like that:
INSERT INTO table(data) VALUES(COMPRESS(mydata))
How can I do this using sqlalchemy?
you can assign a SQL function to the attribute:
from sqlalchemy import func
object.data = func.compress(mydata)
session.add(object)
session.commit()
Here's an example using a more DB-agnostic lower() function:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base= declarative_base()
class A(Base):
__tablename__ = "a"
id = Column(Integer, primary_key=True)
data = Column(String)
e = create_engine('sqlite://', echo=True)
Base.metadata.create_all(e)
s = Session(e)
a1 = A()
a1.data = func.lower("SomeData")
s.add(a1)
s.commit()
assert a1.data == "somedata"
you can make it automatic with #validates:
from sqlalchemy.orm import validates
class MyClass(Base):
# ...
data = Column(BLOB)
#validates("data")
def _set_data(self, key, value):
return func.compress(value)
if you want it readable in python before the flush, you'd need to memoize it locally and use a descriptor to access it.

Converting SQL commands to Python's ORM

How would you convert the following codes to Python's ORM such as by SQLalchemy?
#1 Putting data to Pg
import os, pg, sys, re, psycopg2
#conn = psycopg2.connect("dbname='tkk' host='localhost' port='5432' user='noa' password='123'")
conn = psycopg2.connect("dbname=tk user=naa password=123")
cur = conn.cursor()
cur.execute("""INSERT INTO courses (course_nro)
VALUES ( %(course_nro)s )""", dict(course_nro='abcd'))
conn.commit()
#2 Fetching
cur.execute("SELECT * FROM courses")
print cur.fetchall()
Examples about the two commands in SQLalchemy
insert
sqlalchemy.sql.expression.insert(table, values=None, inline=False, **kwargs)
select
sqlalchemy.sql.expression.select(columns=None, whereclause=None, from_obj=[], **kwargs)
After the initial declarations, you can do something like this:
o = Course(course_nro='abcd')
session.add(o)
session.commit()
and
print session.query(Course).all()
The declarations could look something like this:
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import session_maker
# create an engine, and a base class
engine = create_engine('postgre://naa:123#localhost/tk')
DeclarativeBase = declarative_base(bind=engine)
metadata = DeclarativeBase.metadata
# create a session
Session = session_maker(engine)
session = Session()
# declare the models
class Cource(DelcarativeBase):
__tablename__ = 'courses'
course_nro = Column('course_nro', CHAR(12))
This declarative method is just one way of using sqlalchemy.
Even though this is old, more examples can't hurt, right? I thought I'd demonstrate how to do this with PyORMish.
from pyormish import Model
class Course(Model):
_TABLE_NAME = 'courses'
_PRIMARY_FIELD = 'id' # or whatever your primary field is
_SELECT_FIELDS = ('id','course_nro')
_COMMIT_FIELDS = ('course_nro',)
Model.db_config = dict(
DB_TYPE='postgres',
DB_CONN_STRING='postgre://naa:123#localhost/tk'
)
To create:
new_course = Course().create(course_nro='abcd')
To select:
# return the first row WHERE course_nro='abcd'
new_course = Course().get_by_fields(course_nro='abcd')

Categories