Patch unwanted column attribute - python

I have an object
class Summary():
__tablename__ = 'employeenames'
name= Column('employeeName', String(128, collation='utf8_bin'))
date = Column('dateJoined', Date)
I want to patch Summary with a mock object
class Summary():
__tablename__ = 'employeenames'
name= Column('employeeName', String)
date = Column('dateJoined', Date)
or just patch name the field to name= Column('employeeName', String)
The reason I'm doing this is that I'm doing my tests in sqlite and some queries that are only for Mysql are interfering with my tests.

I think it would be difficult to mock the column. However you could instead conditionally compile the String type for Sqlite, removing the collation.
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.types import String
#compiles(String, 'sqlite')
def compile_varchar(element, compiler, **kw):
type_expression = kw['type_expression']
type_expression.type.collation = None
return compiler.visit_VARCHAR(element, **kw)
Base = orm.declarative_base()
class Summary(Base):
__tablename__ = 'employeenames'
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column('employeeName', sa.String(128, collation='utf8_bin'))
date = sa.Column('dateJoined', sa.Date)
urls = ['mysql:///test', 'sqlite://']
for url in urls:
engine = sa.create_engine(url, echo=True, future=True)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
This script produces the expected output for MySQL:
CREATE TABLE employeenames (
id INTEGER NOT NULL AUTO_INCREMENT,
`employeeName` VARCHAR(128) COLLATE utf8_bin,
`dateJoined` DATE,
PRIMARY KEY (id)
)
but removes the collation for Sqlite:
CREATE TABLE employeenames (
id INTEGER NOT NULL,
"employeeName" VARCHAR(128),
"dateJoined" DATE,
PRIMARY KEY (id)
)

Related

How can I create a table using sqlalchemy and set it to ignore duplicates on a unique index?

I'm creating a table using SQLAclhemy. The table includes a unique index, and I would like the table to be setup to ignore attempts to insert a row that is a duplicate of an existing index value. This is what my code looks like:
import os
import sqlalchemy
from sqlalchemy import (Column, String, Integer, Float, DateTime, Index)
from sqlalchemy.ext.declarative import declarative_base
db_user = 'USERNAME'
db_password = 'PASSWORD'
address = '000.000.0.0'
port = '1111'
database = 'my_db'
engine = sqlalchemy.create_engine(
f"mariadb+mariadbconnector://{db_user}:{db_password}#{address}:{port}/{database}")
Base = declarative_base()
class Pet(Base):
__tablename__ = 'pets'
id = Column(Integer, primary_key=True)
name = Column(String(8))
weight = Column(Float)
breed = Column(Float)
birthday = Column(DateTime)
__table_args__ = (Index('breed_bd', "breed", "birthday", unique=True), )
Base.metadata.create_all(engine)
Session = sqlalchemy.orm.sessionmaker()
Session.configure(bind=engine)
session = Session()
I've seen that in straight sql, you can do things like
CREATE TABLE dbo.foo (bar int PRIMARY KEY WITH (IGNORE_DUP_KEY = ON))
or
CREATE UNIQUE INDEX UNQ_CustomerMemo ON CustomerMemo (MemoID, CustomerID)
WITH (IGNORE_DUP_KEY = ON);
I'm wondering what I should change/add in my code to accomplish something similar.

Sqlalchemy: how to define constraint newid() for Guid column (mssql)

I want to create a column (Id) of type uniqueidentifier in sqlalchemy in a table called Staging.Transactions. Also, I want the column to automatically generate new guids for inserts.
What I want to accomplish is the following (expressed in sql)
ALTER TABLE [Staging].[Transactions] ADD DEFAULT (newid()) FOR [Id]
GO
The code in sqlalchemy is currently:
from sqlalchemy import Column, Float, Date
import uuid
from database.base import base
from sqlalchemy_utils import UUIDType
class Transactions(base):
__tablename__ = 'Transactions'
__table_args__ = {'schema': 'Staging'}
Id = Column(UUIDType, primary_key=True, default=uuid.uuid4)
AccountId = Column(UUIDType)
TransactionAmount = Column(Float)
TransactionDate = Column(Date)
def __init__(self, account_id, transaction_amount, transaction_date):
self.Id = uuid.uuid4()
self.AccountId = account_id
self.TransactionAmount = transaction_amount
self.TransactionDate = transaction_date
When I create the schema from the python code it does not generate the constraint that I want in SQL - that is - to auto generate new guids/uniqueidentifiers for the column [Id].
If I try to make a manual insert I get error message: "Cannot insert the value NULL into column 'Id', table 'my_database.Staging.Transactions'; column does not allow nulls. INSERT fails."
Would appreciate tips on how I can change the python/sqlalchemy code to fix this.
I've found two ways:
1)
Do not use uuid.uuid4() in init of your table class, keep it simple:
class Transactions(base):
__tablename__ = 'Transactions'
__table_args__ = {'schema': 'Staging'}
Id = Column(String, primary_key=True)
...
def __init__(self, Id, account_id, transaction_amount, transaction_date):
self.Id = Id
...
Instead use it in the creation of a new record:
import uuid
...
new_transac = Transacatins(Id = uuid.uuid4(),
...
)
db.session.add(new_transac)
db.session.commit()
Here, db is my SQLAlchemy(app)
2)
Without uuid, you can use raw SQL to do de job (see SQLAlchemy : how can I execute a raw INSERT sql query in a Postgres database?).
Well... session.execute is a Sqlalchemy solution...
In your case, should be something like this:
table = "[Staging].[Transactions]"
columns = ["[Id]", "[AccountId]", "[TransactionAmount]", "[TransactionDate]"]
values = ["(NEWID(), \'"+str(form.AccountId.data) +"\', "+\
str(form.TransactionAmount.data) +", "+\
str(form.TransactionDate.data))"]
s_cols = ', '.join(columns)
s_vals = ', '.join(values)
insSQL = db.session.execute(f"INSERT INTO {table} ({s_cols}) VALUES {s_vals}")
print (insSQL) # to see if SQL command is OK
db.session.commit()
You have to check where single quotes are really needed.

How to set primary key auto increment in SqlAlchemy orm

I tired to use the SqlAlchemy orm to build the api to insert the values into database from uploaded excel files. when I tested on the codes it kept showing the error:
TypeError: __init__() missing 1 required positional argument: 'id'
I've updated the id key to primary key, auto increment, unique and unsigned in my local MySql data base. I believe the system cannot insert the primary key automatically because it works if I assign the value to id manually
transaction_obj = Transaction(id=1, name="David", date="2018-03-03",
product="fruit", quantity=20, amount=12.55)
Here is model.py
from sqlalchemy import Table, MetaData, Column, Integer, String, DATE, DECIMAL,ForeignKey, DateTime
from sqlalchemy.orm import mapper
metadata = MetaData()
customers = Table('customers', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('phone', String(20)),
Column('address', String(45)),
Column('source_from', String(45))
)
class Customers(object):
def __init__(self, name, phone, address, source_from):
self.name = name
self.phone = phone
self.address = address
self.source_from = source_from
def __repr__(self):
return "<Customer(name='%s', phone='%s', address='%s', " \
"source_from='%s')" % (self.name, self.phone, self.address,
self.source_from)
mapper(Customers, customers)
transaction = Table('transaction', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(20)),
Column('date', DateTime),
Column('product', String(20)),
Column('quantity', Integer),
Column('amount',DECIMAL(2))
)
class Transaction(object):
def __index__(self, name, date, product, quantity, amount):
self.name = name
self.date = date
self.product = product
self.quantity = quantity
self.amount = amount
def __repr__(self):
return "<Transaction(name='%s', date='%s', product='%s'," \
"quantity='%s', amount='%s')>" % (self.name, self.date,
self.product, self.quantity,
self.amount)
mapper(Transaction, transaction)
Here is my test coding: test.py
import json
import os
import os
import json
from sqlalchemy import create_engine
import config
import pandas as pd
conn = config.conn_str
def tran_test():
engine = create_engine(conn)
Session_class = sessionmaker(bind=engine)
Session = Session_class
# generate the object for the data we would like to insert
transaction_obj = Transaction(name="David", date="2018-03-03",
product="fruit", quantity=20, amount=12.55)
Session.add(transaction_obj)
Session.commit()
def test_uploaded_file(file):
df = pd.read_excel(file)
return df.info()
if __name__ == '__main__':
# set_env_by_setting('prod')
# conn_str = os.environ.get('ConnectionString')
# print(conn_str)
# test_uploaded_file("-1.xlsx")
tran_test()
I'm using SQLAlchemy==1.2.10, PyMySQL==0.9.2.
I'm doubting if I'm using the wrong format in model.py. Please advise. Thx.
While I'm not sure about the pattern you are using, (manually mapping to your table classes) I think you would have a much easier time making use of declarative_base which does this for you.
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
Then make sure your models inherit Base
from sqlalchemy import (
Column,
Integer,
String
)
class Customers(Base):
__tablename__ = 'customer'
id = Column(Integer, primary_key=True) # Auto-increment should be default
name = Column(String(20))
# Etc.
def __repr__(self):
return "<Customer(name='%s', phone='%s', address='%s', " \
"source_from='%s')" % (self.name, self.phone, self.address,
self.source_from)
And finally use Base to create your table:
Base.metadata.create_all(engine)
Here is a good reference to basic declarative use cases. It gets a little more complicated depending on how you are scaffolding your app but its a great starting point:
http://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/basic_use.html

How to create a pg_trgm index using SQLAlchemy for Scrapy?

I am using Scrapy to scrape data from a web forum. I am storing this data in a PostgreSQL database using SQLAlchemy. The table and columns create fine, however, I am not able to have SQLAlchemy create an index on one of the columns. I am trying to create a trigram index (pg_trgm) using gin.
The Postgresql code that would create this index is:
CREATE INDEX description_idx ON table USING gin (description gin_trgm_ops);
The SQLAlchemy code I have added to my models.py file is:
desc_idx = Index('description_idx', text("description gin_trgm_ops"), postgresql_using='gin')
I have added this line to my models.py but when I check in postgresql, the index was never created.
Below are my full models.py and pipelines.py files. Am I going about this all wrong??
Any help would be greatly appreciated!!
models.py:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Index, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings
DeclarativeBase = declarative_base()
def db_connect():
return create_engine(URL(**settings.DATABASE))
def create_forum_table(engine):
DeclarativeBase.metadata.create_all(engine)
class forumDB(DeclarativeBase):
__tablename__ = "table"
id = Column(Integer, primary_key=True)
title = Column('title', String)
desc = Column('description', String, nullable=True)
desc_idx = Index('description_idx', text("description gin_trgm_ops"), postgresql_using='gin')
pipelines.py
from scrapy.exceptions import DropItem
from sqlalchemy.orm import sessionmaker
from models import forumDB, db_connect, create_forum_table
class ScrapeforumToDB(object):
def __init__(self):
engine = db_connect()
create_forum_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
session = self.Session()
forumitem = forumDB(**item)
try:
session.add(forumitem)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
The proper way to reference an Operator Class in SQLAlchemy (such as gin_trgm_ops) is to use the postgresql_ops parameter. This will also allow tools like alembic to understand how use it when auto-generating migrations.
Index('description_idx',
'description', postgresql_using='gin',
postgresql_ops={
'description': 'gin_trgm_ops',
})
Since the Index definition uses text expression it has no references to the Table "table", which has been implicitly created by the declarative class forumDB. Compare that to using a Column as expression, or some derivative of it, like this:
Index('some_index_idx', forumDB.title)
In the above definition the index will know about the table and the other way around.
What this means in your case is that the Table "table" has no idea that such an index exists. Adding it as an attribute of the declarative class is the wrong way to do it. It should be passed to the implicitly created Table instance. The attribute __table_args__ is just for that:
class forumDB(DeclarativeBase):
__tablename__ = "table"
# Note: This used to use `text('description gin_trgm_ops')` instead of the
# `postgresql_ops` parameter, which should be used.
__table_args__ = (
Index('description_idx', "description",
postgresql_ops={"description": "gin_trgm_ops"},
postgresql_using='gin'),
)
id = Column(Integer, primary_key=True)
title = Column('title', String)
desc = Column('description', String, nullable=True)
With the modification in place, a call to create_forum_table(engine) resulted in:
> \d "table"
Table "public.table"
Column | Type | Modifiers
-------------+-------------------+----------------------------------------------------
id | integer | not null default nextval('table_id_seq'::regclass)
title | character varying |
description | character varying |
Indexes:
"table_pkey" PRIMARY KEY, btree (id)
"description_idx" gin (description gin_trgm_ops)

sqlalchemy and auto increments with postgresql

I created a table with a primary key and a sequence but via the debug ad later looking at the table design, the sequence isn't applied, just created.
from sqlalchemy import create_engine, MetaData, Table, Column,Integer,String,Boolean,Sequence
from sqlalchemy.orm import mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import json
class Bookmarks(object):
pass
#----------------------------------------------------------------------
engine = create_engine('postgresql://iser:p#host/sconf', echo=True)
Base = declarative_base()
class Tramo(Base):
__tablename__ = 'tramos'
__mapper_args__ = {'column_prefix':'tramos'}
id = Column(Integer, Sequence('seq_tramos_id', start=1, increment=1),primary_key=True)
nombre = Column(String)
tramo_data = Column(String)
estado = Column(Boolean,default=True)
def __init__(self,nombre,tramo_data):
self.nombre=nombre
self.tramo_data=tramo_data
def __repr__(self):
return '[id:%d][nombre:%s][tramo:%s]' % self.id, self.nombre,self.tramo_data
Session = sessionmaker(bind=engine)
session = Session()
tabla = Tramo.__table__
metadata = Base.metadata
metadata.create_all(engine)
the table is just created like this
CREATE TABLE tramos (
id INTEGER NOT NULL,
nombre VARCHAR,
tramo_data VARCHAR,
estado BOOLEAN,
PRIMARY KEY (id)
)
I was hoping to see the declartion of the default nexval of the sequence
but it isn't there.
I also used the __mapper_args__ but looks like it's been ignored.
Am I missing something?
I realize this is an old thread, but I stumbled on it with the same problem and were unable to find a solution anywhere else.
After some experimenting I was able to solve this with the following code:
TABLE_ID = Sequence('table_id_seq', start=1000)
class Table(Base):
__tablename__ = 'table'
id = Column(Integer, TABLE_ID, primary_key=True, server_default=TABLE_ID.next_value())
This way the sequence is created and is used as the default value for column id, with the same behavior as if created implicitly by SQLAlchemy.
I ran into a similar issue with composite multi-column primary keys. The SERIAL is only implicitly applied to a single column primary key. However, this behaviour can be controlled via the autoincrement argument (defaults to "auto"):
id = Column(Integer, primary_key=True, autoincrement=True)
You specified an explicit Sequence() object with name. If you were to omit that, then SERIAL would be added to the id primary key specification:
CREATE TABLE tramos (
id INTEGER SERIAL NOT NULL,
nombre VARCHAR,
tramo_data VARCHAR,
estado BOOLEAN,
PRIMARY KEY (id)
)
A DEFAULT is only generated if the column is not a primary key.
When inserting, SQLAlchemy will issue a select nextval(..) as needed to create a next value. See the PostgreSQL documentation for details.

Categories