Get key from IntegrityError exception - python

I'm trying to save a list of custom objects with sqlalchemy:
session.bulk_save_objects(listOfObjects)
And obtaining IntegrityError:
psycopg2.IntegrityError: duplicate key value violates unique constraint "ppoi_ukey"
And:
Key ("id")=(42555) already exists.
Is there any way to get the key Id (42555) as an integer, in order to roll back, extract this key from the list, and re insert the list again without it?

I tried something which I think might help you.
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
Base = declarative_base()
#Create a dummy model
class Person(Base):
__tablename__ = 'person'
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
def __repr__(self):
return "{ id="+str(self.id)+", name="+self.name+ " }"
# Create an engine that stores data in the local directory's
# sqlalchemy_example.db file.
engine = create_engine('sqlite:///sqlalchemy_example.db')
# Create all tables
Base.metadata.create_all(engine)
DBSession = sessionmaker(bind=engine)
session = DBSession()
list_of_objects = []
new_person = Person(id=1,name='new person')
list_of_objects.append(new_person)
new_person2 = Person(id=2,name='new person2')
list_of_objects.append(new_person2)
# Violating unique constraint purposely
new_person3 = Person(id=1,name='new person')
list_of_objects.append(new_person3)
print(list_of_objects)
for person in list_of_objects:
# with session.no_autoflush:
try:
session.add(person)
session.commit()
except IntegrityError as e:
session.rollback()
#print(e) #prints whole exception
print(e.params) #print lists of param to the query
print(e.params[0]) #assuming the first param is unique id which is violating constraint you can get it from here and use it as you wanted ..
except Exception as ex:
pass
person = session.query(Person).all()
print(len(person)) #returns 2

Related

null value for primary key when trying to insert list of dictionaries

I'm trying to do a large number of inserts with one call, and the way someone here recommended was by giving .insert a list of dictionaries. This is using SQLAlchemy Core.
As an example:
try:
engine = db.create_engine(f"postgres://user:pass#myip/addressbook", connect_args={'connect_timeout': 5})
connection = engine.connect()
metadata = db.MetaData()
except exc.OperationalError:
print_error(f":: Could not connect to myip!")
sys.exit()
table_addressbook = db.Table('addressbook', metadata, autoload=True, autoload_with=engine)
list = []
list.append({'firstname': "John", 'lastname': "Doe"})
list.append({'firstname': "Jane", 'lastname': "Doe"})
query = db.insert(table_addressbook).values(list)
connection.execute(query)
But I'm getting an error saying the column id violates a non-null constraint. This is because insert normally auto-generates the primary-key id. How do I use this method but specify that id should be auto-generated? Or is there a different method I should use?
edit
Table name is addressbook.
Column id is type integer with default sequence 'untitled_table_id_seq', constraints are PRIMARY_KEY. This was autogenerated by Postico for Mac, but I've always been able to insert without including id and it auto increments from the last inserted ID.
Columns firstname and lastname are type text, no default, no constraints.
Without any information on your model and/or connection it is a bit difficult to answer your question. Please find below a piece of code which uses insert without throwing non-null constraint errors. Hopefully it helps you.
from sqlalchemy import create_engine, Column, Integer, String, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import insert
engine = create_engine('sqlite:///:memory:', echo=True)
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
firstname = Column(String)
lastname = Column(String)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
Session.configure(bind=engine) # once engine is available
session = Session()
new_users = []
new_users.append({'firstname': "John", 'lastname': "Doe"})
new_users.append({'firstname': "Jane", 'lastname': "Doe"})
i = insert(User).values(new_users)
session.execute(i)
PS: most of this is coming from the tutorial on: https://docs.sqlalchemy.org/en/13/orm/tutorial.html
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
engine = create_engine('sqlite:///:memory:', echo=True)
Base = declarative_base()
# Example Model definition for the illustration
class Customer(Base):
__tablename__ = "customer"
id = Column(Integer, primary_key=True)
name = Column(String(255))
description = Column(String(255))
Base.metadata.create_all(engine)
######################################################
# Bulk insert using dictionaries.
######################################################
# Insert test records into `customer`table.
def bulk_insert_customers(n):
session = Session(bind=engine)
session.bulk_insert_mappings(
Customer,
[
dict(
name="customer name %d" % i,
description="customer description %d" % i,
)
for i in range(n)
],
)
session.commit()
Refer these for more examples of how to do bulk inserts in different ways:
https://docs.sqlalchemy.org/en/13/_modules/examples/performance/bulk_inserts.html

How to create a pg_trgm index using SQLAlchemy for Scrapy?

I am using Scrapy to scrape data from a web forum. I am storing this data in a PostgreSQL database using SQLAlchemy. The table and columns create fine, however, I am not able to have SQLAlchemy create an index on one of the columns. I am trying to create a trigram index (pg_trgm) using gin.
The Postgresql code that would create this index is:
CREATE INDEX description_idx ON table USING gin (description gin_trgm_ops);
The SQLAlchemy code I have added to my models.py file is:
desc_idx = Index('description_idx', text("description gin_trgm_ops"), postgresql_using='gin')
I have added this line to my models.py but when I check in postgresql, the index was never created.
Below are my full models.py and pipelines.py files. Am I going about this all wrong??
Any help would be greatly appreciated!!
models.py:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Index, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings
DeclarativeBase = declarative_base()
def db_connect():
return create_engine(URL(**settings.DATABASE))
def create_forum_table(engine):
DeclarativeBase.metadata.create_all(engine)
class forumDB(DeclarativeBase):
__tablename__ = "table"
id = Column(Integer, primary_key=True)
title = Column('title', String)
desc = Column('description', String, nullable=True)
desc_idx = Index('description_idx', text("description gin_trgm_ops"), postgresql_using='gin')
pipelines.py
from scrapy.exceptions import DropItem
from sqlalchemy.orm import sessionmaker
from models import forumDB, db_connect, create_forum_table
class ScrapeforumToDB(object):
def __init__(self):
engine = db_connect()
create_forum_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
session = self.Session()
forumitem = forumDB(**item)
try:
session.add(forumitem)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
The proper way to reference an Operator Class in SQLAlchemy (such as gin_trgm_ops) is to use the postgresql_ops parameter. This will also allow tools like alembic to understand how use it when auto-generating migrations.
Index('description_idx',
'description', postgresql_using='gin',
postgresql_ops={
'description': 'gin_trgm_ops',
})
Since the Index definition uses text expression it has no references to the Table "table", which has been implicitly created by the declarative class forumDB. Compare that to using a Column as expression, or some derivative of it, like this:
Index('some_index_idx', forumDB.title)
In the above definition the index will know about the table and the other way around.
What this means in your case is that the Table "table" has no idea that such an index exists. Adding it as an attribute of the declarative class is the wrong way to do it. It should be passed to the implicitly created Table instance. The attribute __table_args__ is just for that:
class forumDB(DeclarativeBase):
__tablename__ = "table"
# Note: This used to use `text('description gin_trgm_ops')` instead of the
# `postgresql_ops` parameter, which should be used.
__table_args__ = (
Index('description_idx', "description",
postgresql_ops={"description": "gin_trgm_ops"},
postgresql_using='gin'),
)
id = Column(Integer, primary_key=True)
title = Column('title', String)
desc = Column('description', String, nullable=True)
With the modification in place, a call to create_forum_table(engine) resulted in:
> \d "table"
Table "public.table"
Column | Type | Modifiers
-------------+-------------------+----------------------------------------------------
id | integer | not null default nextval('table_id_seq'::regclass)
title | character varying |
description | character varying |
Indexes:
"table_pkey" PRIMARY KEY, btree (id)
"description_idx" gin (description gin_trgm_ops)

sqlalchemy: column_prefix causes issues accessing model attributes

I went searching w/o result in a way to get the integer value or the boolean value from an object model created via sqlalchemy,
I mean i can add it and it works flawless but i cant get the integer value or the boolean value all i get when i tried to print it is the object name:
from sqlalchemy import create_engine, MetaData, Table, Column,Integer,String,Boolean,Sequence
from sqlalchemy.orm import mapper, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import json
class Bookmarks(object):
pass
#----------------------------------------------------------------------
engine = create_engine('postgresql://u:p#localghost/asd', echo=True)
Base = declarative_base()
class Tramo(Base):
__tablename__ = 'tramos'
__mapper_args__ = {'column_prefix':'tramos'}
id = Column(Integer, primary_key=True)
nombre = Column(String)
tramo_data = Column(String)
estado = Column(Boolean,default=True)
def __init__(self,nombre,tramo_data):
self.nombre=nombre
self.tramo_data=tramo_data
def __repr__(self):
return "[id:%s][nombre:%s][tramo:%s]" % (getattr(self, 'id'), self.nombre,self.tramo_data)
Session = sessionmaker(bind=engine)
session = Session()
tabla = Tramo.__table__
metadata = Base.metadata
metadata.create_all(engine)
b=Tramo('tramo1','adadas')
session.add(b)
session.commit()
print b
print b.id
its prints
[id:tramos.id][nombre:tramo1][tramo:adadas]
tramos.id
i cant get to print the id value, looks like the object column is in there but it doesn't return the value ot the property
i even use
session.refresh(b)
after the add but the result is the same.
According to the documentation Naming All Columns with a Prefix:
...prefix to the mapped attribute names relative to the
(table) column name ...
Since you define the mapped attributes in your class, I do not think it does what you desire.
Solution-1: remove the 'column_prefix':'tramos' from your __mapper_args__
Solution-2: print b.tramosid will print its id. You would need to change the __repr__ accordingly:
def __repr__(self):
return "[id:%s][nombre:%s][tramo:%s]" % (getattr(self, 'tramosid'), self.nombre, self.tramo_data)

SQLAlchemy - don't enforce foreign key constraint on a relationship

I have a Test model/table and a TestAuditLog model/table, using SQLAlchemy and SQL Server 2008. The relationship between the two is Test.id == TestAuditLog.entityId, with one test having many audit logs. TestAuditLog is intended to keep a history of changes to rows in the Test table. I want to track when a Test is deleted, also, but I'm having trouble with this. In SQL Server Management Studio, I set the FK_TEST_AUDIT_LOG_TEST relationship's "Enforce Foreign Key Constraint" property to "No", thinking that would allow a TestAuditLog row to exist with an entityId that no longer connects to any Test.id because the Test has been deleted. However, when I try to create a TestAuditLog with SQLAlchemy and then delete the Test, I get an error:
(IntegrityError) ('23000', "[23000] [Microsoft][ODBC SQL Server Driver][SQL Server]Cannot insert the value NULL into column 'AL_TEST_ID', table 'TEST_AUDIT_LOG'; column does not allow nulls. UPDATE fails. (515) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)") u'UPDATE [TEST_AUDIT_LOG] SET [AL_TEST_ID]=? WHERE [TEST_AUDIT_LOG].[AL_ID] = ?' (None, 8)
I think because of the foreign-key relationship between Test and TestAuditLog, after I delete the Test row, SQLAlchemy is trying to update all that test's audit logs to have a NULL entityId. I don't want it to do this; I want SQLAlchemy to leave the audit logs alone. How can I tell SQLAlchemy to allow audit logs to exist whose entityId does not connect with any Test.id?
I tried just removing the ForeignKey from my tables, but I'd like to still be able to say myTest.audits and get all of a test's audit logs, and SQLAlchemy complained about not knowing how to join Test and TestAuditLog. When I then specified a primaryjoin on the relationship, it grumbled about not having a ForeignKey or ForeignKeyConstraint with the columns.
Here are my models:
class TestAuditLog(Base, Common):
__tablename__ = u'TEST_AUDIT_LOG'
entityId = Column(u'AL_TEST_ID', INTEGER(), ForeignKey(u'TEST.TS_TEST_ID'),
nullable=False)
...
class Test(Base, Common):
__tablename__ = u'TEST'
id = Column(u'TS_TEST_ID', INTEGER(), primary_key=True, nullable=False)
audits = relationship(TestAuditLog, backref="test")
...
And here's how I'm trying to delete a test while keeping its audit logs, their entityId intact:
test = Session.query(Test).first()
Session.begin()
try:
Session.add(TestAuditLog(entityId=test.id))
Session.flush()
Session.delete(test)
Session.commit()
except:
Session.rollback()
raise
You can solve this by:
POINT-1: not having a ForeignKey neither on the RDBMS level nor on the SA level
POINT-2: explicitly specify join conditions for the relationship
POINT-3: mark relationship cascades to rely on passive_deletes flag
Fully working code snippet below should give you an idea (points are highlighted in the code):
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey
from sqlalchemy.orm import scoped_session, sessionmaker, relationship
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
engine = create_engine('sqlite:///:memory:', echo=False)
Session = sessionmaker(bind=engine)
class TestAuditLog(Base):
__tablename__ = 'TEST_AUDIT_LOG'
id = Column(Integer, primary_key=True)
comment = Column(String)
entityId = Column('TEST_AUDIT_LOG', Integer, nullable=False,
# POINT-1
#ForeignKey('TEST.TS_TEST_ID', ondelete="CASCADE"),
)
def __init__(self, comment):
self.comment = comment
def __repr__(self):
return "<TestAuditLog(id=%s entityId=%s, comment=%s)>" % (self.id, self.entityId, self.comment)
class Test(Base):
__tablename__ = 'TEST'
id = Column('TS_TEST_ID', Integer, primary_key=True)
name = Column(String)
audits = relationship(TestAuditLog, backref='test',
# POINT-2
primaryjoin="Test.id==TestAuditLog.entityId",
foreign_keys=[TestAuditLog.__table__.c.TEST_AUDIT_LOG],
# POINT-3
passive_deletes='all',
)
def __init__(self, name):
self.name = name
def __repr__(self):
return "<Test(id=%s, name=%s)>" % (self.id, self.name)
Base.metadata.create_all(engine)
###################
## tests
session = Session()
# create test data
tests = [Test("test-" + str(i)) for i in range(3)]
_cnt = 0
for _t in tests:
for __ in range(2):
_t.audits.append(TestAuditLog("comment-" + str(_cnt)))
_cnt += 1
session.add_all(tests)
session.commit()
session.expunge_all()
print '-'*80
# check test data, delete one Test
t1 = session.query(Test).get(1)
print "t: ", t1
print "t.a: ", t1.audits
session.delete(t1)
session.commit()
session.expunge_all()
print '-'*80
# check that audits are still in the DB for deleted Test
t1 = session.query(Test).get(1)
assert t1 is None
_q = session.query(TestAuditLog).filter(TestAuditLog.entityId == 1)
_r = _q.all()
assert len(_r) == 2
for _a in _r:
print _a
Another option would be to duplicate the column used in the FK, and make the FK column nullable with ON CASCADE SET NULL option. In this way you can still check the audit trail of deleted objects using this column.

How can I select all rows with sqlalchemy?

I am trying to get all rows from a table.
In controller I have:
meta.Session.query(User).all()
The result is [, ], but I have 2 rows in this table.
I use this model for the table:
import hashlib
import sqlalchemy as sa
from sqlalchemy import orm
from allsun.model import meta
t_user = sa.Table("users",meta.metadata,autoload=True)
class Duplicat(Exception):
pass
class LoginExistsException(Exception):
pass
class EmailExistsException(Exception):
pass
And next, in the same file:
class User(object):
def loginExists(self):
try:
meta.Session
.query(User)
.filter(User.login==self.login)
.one()
except orm.exc.NoResultFound:
pass
else:
raise LoginExistsException()
def emailExists(self):
try:
meta
.Session
.query(User)
.filter(User.email==self.email)
.one()
except orm.exc.NoResultFound:
pass
else:
raise EmailExistsException()
def save(self):
meta.Session.begin()
meta.Session.save(self)
try:
meta.Session.commit()
except sa.exc.IntegrityError:
raise Duplicat()
orm.mapper(User, t_user)
You can easily import your model and run this:
from models import User
# User is the name of table that has a column name
users = User.query.all()
for user in users:
print user.name
I use the following snippet to view all the rows in a table. Use a query to find all the rows. The returned objects are the class instances. They can be used to view/edit the values as required:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine, Sequence
from sqlalchemy import String, Integer, Float, Boolean, Column
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class MyTable(Base):
__tablename__ = 'MyTable'
id = Column(Integer, Sequence('user_id_seq'), primary_key=True)
some_col = Column(String(500))
def __init__(self, some_col):
self.some_col = some_col
engine = create_engine('sqlite:///sqllight.db', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
for class_instance in session.query(MyTable).all():
print(vars(class_instance))
session.close()

Categories