Preventing duplicate entries with sqlalchemy in preexisting sqllite table - python

I have a preexisting sqllite table which I am accessing with sqlalchemy. I realized has a number of duplicate 'case' numbers exist. If I understand correctly it appears that you cannot add a unique constraint to a table after table creation with sqllite, after removing the dups using:
DELETE FROM mytable
WHERE id NOT IN
(
SELECT MIN(id)
FROM judgements
GROUP BY "case"
I've decided to use sqlalchemy to prevent addition of additional dups. I'm working with scrapy and have a pipeline element that looks like:
class DynamicSQLlitePipeline(object):
def __init__(self,table_name):
db_path = "sqlite:///"+settings.SETTINGS_PATH+"\\data.db"
_engine = create_engine(db_path)
_connection = _engine.connect()
_metadata = MetaData()
_stack_items = Table(table_name, _metadata,
Column("id", Integer, primary_key=True),
Column("case", Text , unique=True),
....)
_metadata.create_all(_engine)
self.connection = _connection
self.stack_items = _stack_items
def process_item(self, item, spider):
try:
ins_query = self.stack_items.insert().values(
case=item['case'],
....
)
self.connection.execute(ins_query)
except IntegrityError:
print('THIS IS A DUP')
return item
The only change I've made is to add unique=True to column 'case' . However on testing, dups are still being added/ How can I get this working?

The code snippet below works on my side with python version 2.7 and sqlalchemy version 1.0.9 and sqlite version 3.15.2.
from sqlalchemy import create_engine, MetaData, Column, Integer, Table, Text
from sqlalchemy.exc import IntegrityError
class DynamicSQLlitePipeline(object):
def __init__(self, table_name):
db_path = "sqlite:///data.db"
_engine = create_engine(db_path)
_connection = _engine.connect()
_metadata = MetaData()
_stack_items = Table(table_name, _metadata,
Column("id", Integer, primary_key=True),
Column("case", Text, unique=True),)
_metadata.create_all(_engine)
self.connection = _connection
self.stack_items = _stack_items
def process_item(self, item):
try:
ins_query = self.stack_items.insert().values(case=item['case'])
self.connection.execute(ins_query)
except IntegrityError:
print('THIS IS A DUP')
return item
if __name__ == '__main__':
d = DynamicSQLlitePipeline("pipeline")
item = {
'case': 'sdjwaichjkneirjpewjcmelkdfpoewrjlkxncdsd'
}
print d.process_item(item)
And the output for the second run would be like :
THIS IS A DUP
{'case': 'sdjwaichjkneirjpewjcmelkdfpoewrjlkxncdsd'}
I did not see much difference between your code logic. The only difference might be the version I guess.

Related

sqlalchemy.exc.InvalidRequestError: Table 'thetab' is already defined for this MetaData instance

In one of my GET call, I need to pass to the query function a declarative class for which the name is supposed to change dynamically:
async def get_computers_both(db: Session, table_name: str, q: str = "", skip: int = 0, limit: str = "100"):
class Computer(Base):
__tablename__ = table_name
extend_existing = True
id = Column(Integer, primary_key=True, index=True)
limit = str(limit)
return db.query(Computer).filter(
or_(
Computer.computername.contains(q),
)
).limit(limit).all()
Problem: every time I do this GET call (i.e. I load the corresponding webpage), I get:
sqlalchemy.exc.InvalidRequestError: Table 'thetab' is already defined
for this MetaData instance. Specify 'extend_existin
g=True' to redefine options and columns on an existing Table object.
I have tried adding "extend_existing=True" when I create the Table object in the GET call:
#app.get("/computers/{db_name}/{table_name}", response_class=HTMLResponse)
async def read_computers(request: Request, db: Session = Depends(get_custom_db)):
Where get_custom_db contains :
computers = sqlalchemy.Table(
table_name,
metadata,
extend_existing=True,
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
sqlalchemy.Column("computername", sqlalchemy.String),
)
But still the same...
Many thanks!

SQL-Alchemy: having problems creating relationships without ForeignKeys

Im trying to create relations but without foreign key constraints in db
quite similar to this post:
sqlalchemy: create relations but without foreign key constraint in db?
However im trying to do it with classical mapping
and I cant figure out what Im doing wrong with it
from sqlalchemy import (
Table,
MetaData,
Column,
String,
)
from sqlalchemy.orm import mapper, relationship
from uuid import uuid4
class InspectionRecord:
def __init__(self, equipment):
self.equipment = equipment
class InspectedItem:
def __init__(self, item):
self.item = item
metadata = MetaData()
inspected_items = Table(
'inspected_items',
metadata,
Column('inspection_id', String(50)),
Column('inspected_item_id', String(50), primary_key=True),
Column('item', String(50))
)
inspection_records = Table(
'inspection_records',
metadata,
Column('inspection_id', String(50), primary_key=True, default=uuid4),
Column('equipment', String(50))
)
def start_mappers():
inspected_items_mapper = mapper(InspectedItem, inspected_items)
inspection_records_mapper = mapper(InspectionRecord, inspection_records, properties={
"inspected_items": relationship(inspected_items_mapper,
primaryjoin='foreign(inspected_items.inspection_id) == inspection_records.inspection_id',
uselist=False)}
) # this is the part where I'm having difficulties with
if __name__ == '__main__':
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
engine = create_engine('sqlite:///foo.db')
metadata.drop_all(bind=engine)
metadata.create_all(engine)
start_mappers()
Session = sessionmaker(bind=engine)
session = Session()
inspection_record = InspectionRecord(equipment='equipment_01')
session.add(inspection_record)
after so many attempts i even with additional tinkering i only get
this error
sqlalchemy.exc.InvalidRequestError: When initializing mapper mapped class InspectionRecord->inspection_records, expression '[InspectedItem.inspection_id]' failed to locate a name ("name 'InspectedItem' is not defined")
Any help would be really really really appreciated :)
got this working:
change to mapper to mapper_registry.map_imperatively
def start_mappers():
inspected_items_mapper = mapper_registry.map_imperatively(InspectedItem, inspected_items)
inspection_records_mapper = mapper_registry.map_imperatively(InspectionRecord, inspection_records, properties={
"inspected_items": relationship(InspectedItem,
primaryjoin='foreign(InspectedItem.inspection_id) == InspectionRecord.inspection_id',
uselist=False)}

Executing a sqlalchemy exists query

I'm having trouble understanding how to execute a query to check and see if a matching record already exists in sqlalchemy. Most of the examples I can find online seem to reference "session" and "query" objects that I don't have.
Here's a short complete program that illustrates my problem:
1. sets up in-memory sqlite db with "person" table.
2. inserts two records into the person table.
3. check if a particular record exists in the table. This is where it barfs.
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData
from sqlalchemy.sql.expression import exists
engine = create_engine('sqlite:///:memory:', echo=False)
metadata = MetaData()
person = Table('person', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(255), nullable=False))
metadata.create_all(engine)
conn = engine.connect()
s = person.insert()
conn.execute(s, name="Alice")
conn.execute(s, name="Bob")
print("I can see the names in the table:")
s = person.select()
result = conn.execute(s)
print(result.fetchall())
print('This query looks like it should check to see if a matching record exists:')
s = person.select().where(person.c.name == "Bob")
s = exists(s)
print(s)
print("But it doesn't run...")
result = conn.execute(s)
The output of this program is:
I can see the names in the table:
[(1, 'Alice'), (2, 'Bob')]
This query looks like it should check to see if a matching record exists:
EXISTS (SELECT person.id, person.name
FROM person
WHERE person.name = :name_1)
But it doesn't run...
Traceback (most recent call last):
File "/project_path/db_test/db_test_env/exists_example.py", line 30, in <module>
result = conn.execute(s)
File "/project_path/db_test/db_test_env/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 945, in execute
return meth(self, multiparams, params)
File "/project_path/db_test/db_test_env/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 265, in _execute_on_connection
raise exc.ObjectNotExecutableError(self)
sqlalchemy.exc.ObjectNotExecutableError: Not an executable object: <sqlalchemy.sql.selectable.Exists object at 0x105797438>
The s.exists() is only building the exists clause. All you need to do to get your code to work is to generate a select for it.
s = exists(s).select()
Here's your full example:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData
from sqlalchemy.sql.expression import exists
engine = create_engine('sqlite:///:memory:', echo=False)
metadata = MetaData()
person = Table('person', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(255), nullable=False))
metadata.create_all(engine)
conn = engine.connect()
s = person.insert()
conn.execute(s, name="Alice")
conn.execute(s, name="Bob")
print("I can see the names in the table:")
s = person.select()
result = conn.execute(s)
print(result.fetchall())
print('This query looks like it should check to see if a matching record exists:')
s = person.select().where(person.c.name == "Bob")
s = exists(s).select()
print(s)
print("And it runs fine...")
result = conn.execute(s)
print(result.fetchall())
exists is used in SQL subqueries. If you had a table posts containing blog post with an author_id, mapping back to people, you might use a query like the following to find people who had made a blog post:
select * from people where exists (select author_id from posts where author_id = people.id);
You can't have a exists as the outermost statement in an SQL query; it's an operator to use in SQL boolean clauses.
So, SQLAlchemy is not letting you execute that query because it's not well-formed.
If you want to see if a row exists, just construct a select statement with a where clause and see how many rows the query returns.
Try this instead:
...
s = person.select().where(person.c.name == "Bob")
s = select(exists(s))
print(s)
...
Unless someone suggests a better answer, here's what I've come up with that works. Having the DB count the matching records and send just the count to the python app.
from sqlalchemy import select, func # more imports not in my example code above
s = select([func.count(1)]).select_from(person).where(person.c.name == "Bob")
print(s)
record_count = conn.execute(s).scalar()
print("Matching records: ", record_count)
Example output:
SELECT count(:count_2) AS count_1
FROM person
WHERE person.name = :name_1
Matching records: 1

Updating row in SqlAlchemy ORM

I am trying to obtain a row from DB, modify that row and save it again.
Everything by using SqlAlchemy
My code
from sqlalchemy import Column, DateTime, Integer, String, Table, MetaData
from sqlalchemy.orm import mapper
from sqlalchemy import create_engine, orm
metadata = MetaData()
product = Table('product', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(1024), nullable=False, unique=True),
)
class Product(object):
def __init__(self, id, name):
self.id = id
self.name = name
mapper(Product, product)
db = create_engine('sqlite:////' + db_path)
sm = orm.sessionmaker(bind=db, autoflush=True, autocommit=True, expire_on_commit=True)
session = orm.scoped_session(sm)
result = session.execute("select * from product where id = :id", {'id': 1}, mapper=Product)
prod = result.fetchone() #there are many products in db so query is ok
prod.name = 'test' #<- here I got AttributeError: 'RowProxy' object has no attribute 'name'
session .add(prod)
session .flush()
Unfortunately it does not work, because I am trying to modify RowProxy object. How can I do what I want (load, change and save(update) row) in SqlAlchemy ORM way?
I assume that your intention is to use Object-Relational API.
So to update row in db you'll need to do this by loading mapped object from the table record and updating object's property.
Please see code example below.
Please note I've added example code for creating new mapped object and creating first record in table also there is commented out code at the end for deleting the record.
from sqlalchemy import Column, DateTime, Integer, String, Table, MetaData
from sqlalchemy.orm import mapper
from sqlalchemy import create_engine, orm
metadata = MetaData()
product = Table('product', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(1024), nullable=False, unique=True),
)
class Product(object):
def __init__(self, id, name):
self.id = id
self.name = name
def __repr__(self):
return "%s(%r,%r)" % (self.__class__.name,self.id,self.name)
mapper(Product, product)
db = create_engine('sqlite:////temp/test123.db')
metadata.create_all(db)
sm = orm.sessionmaker(bind=db, autoflush=True, autocommit=True, expire_on_commit=True)
session = orm.scoped_session(sm)
#create new Product record:
if session.query(Product).filter(Product.id==1).count()==0:
new_prod = Product("1","Product1")
print "Creating new product: %r" % new_prod
session.add(new_prod)
session.flush()
else:
print "product with id 1 already exists: %r" % session.query(Product).filter(Product.id==1).one()
print "loading Product with id=1"
prod = session.query(Product).filter(Product.id==1).one()
print "current name: %s" % prod.name
prod.name = "new name"
print prod
prod.name = 'test'
session.add(prod)
session.flush()
print prod
#session.delete(prod)
#session.flush()
PS SQLAlchemy also provides SQL Expression API that allows to work with table records directly without creating mapped objects. In my practice we are using Object-Relation API in most of the applications, sometimes we use SQL Expressions API when we need to perform low level db operations efficiently such as inserting or updating thousands of records with one query.
Direct links to SQLAlchemy documentation:
Object Relational Tutorial
SQL Expression Language Tutorial

How to deal with the sqlalchemy's DB table blocking policy?

Can someone explain, how can I avoid application freezing when I, for example, have a list of entities and an ability to move to detail pages.
So, I open the list, and one sqlalchemy session starts, then I open one detail page and another one goes, then another one, and application freeze, because one session blocks another.
I cannot use one session for whole application, because I then can't say, that something was edited on form by just checking out session.dirty, new, deleted attributes and application state handling becomes the hell of fragile unreadable code.
Do I need to implement some another kind of session handling policy?
Do I need to tune sqlalchemy mapping or sql server?
Here is the minimal working example:
from sqlalchemy import MetaData, Table, Column, FetchedValue, ForeignKey, create_engine
from sqlalchemy.types import BigInteger, String
from sqlalchemy.orm import mapper, relationship, sessionmaker, Session
class Ref(object):
id = None
name = None
id_parent = None
class TableMapper(object):
def __init__(self, metadata, mapped_type):
self._table = None
self._mapped_type = mapped_type
def get_table(self):
return self._table
def set_table(self, table):
assert isinstance(table, Table)
self._table = table
class RefTableMapper(TableMapper):
def __init__(self, metadata):
TableMapper.__init__(self, metadata, Ref)
self.set_table(Table('Ref', metadata,
Column('id', BigInteger,
primary_key = True, nullable = False),
Column('name', String),
Column('id_parent', BigInteger,
ForeignKey('Ref.id'))
))
def map_table(self):
r_parent = relationship(Ref,
uselist = False,
remote_side = [self._table.c.id],
primaryjoin = (
self._table.c.id_parent == self._table.c.id))
mapper(Ref, self._table,
properties = {'parent': r_parent})
return self._table
class Mapper(object):
def __init__(self, url, echo = False):
self._engine = create_engine(url, echo = echo)
self._metadata = MetaData(self._engine)
self._Session = sessionmaker(bind = self._engine, autoflush = False)
ref_t = RefTableMapper(self._metadata).map_table()
def create_session(self):
return self._Session()
if __name__ == '__main__':
mapp = Mapper(r'mssql://username:pwd#Server\SQLEXPRESS/DBName', True)
s = mapp.create_session()
rr = s.query(Ref).all()
s1 = mapp.create_session()
merged = s1.merge(rr)
merged.flush()
s2 = mapp.create_session()
rr1 = s2.query(Ref).all() #application freezes!
SQL Server's default isolation mode locks entire tables very aggressively. (The above example seems like perhaps you're emitting an UPDATE and then emitting SELECT in a different transaction while the previous transaction is pending, though session.merge() does not accept a list and the contents of the table aren't specified above so its difficult to say).
Anyway, it's typical practice to enable multi-version concurrency control (SQL server calls it "row versioning") so that it has reasonable ability to lock individual rows against each other instead of full tables:
ALTER DATABASE MyDatabase SET ALLOW_SNAPSHOT_ISOLATION ON
ALTER DATABASE MyDatabase SET READ_COMMITTED_SNAPSHOT ON
Detail on this is available at http://msdn.microsoft.com/en-us/library/ms175095.aspx .

Categories