SQLAlchemy bulk insert statement in Postgres database throws AttributeError - python

I am trying to insert rows in Python SQLAlchemy by bulk into a Postgres database by using an insert statement. I need to use the insert statement instead of bulk_insert_mappings, as I want to silently ignore failed insertion of duplicate entries. This was not apparent before, but I have added it now.
The table is created as it should. However, even a very simple insert operation via statement API throws this error:
AttributeError: '_NoResultMetaData' object has no attribute '_indexes_for_keys'
Minimal Verifiable Example:
import os
import sqlalchemy
from sqlalchemy import (
Column,
INTEGER,
TEXT
)
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Test(Base):
__tablename__ = 'test'
id = Column(INTEGER, primary_key=True)
data = Column(TEXT)
engine = sqlalchemy.create_engine(os.environ['DATABASE_CONNECTION'])
Session = sessionmaker(engine)
Base.metadata.create_all(engine, Base.metadata.tables.values(), checkfirst=True)
connection = engine.connect()
buffer = [
{
'data': "First test"
},
{
'data': "Second test"
}
]
insert_statement = insert(Test).values(buffer)
# Using insert statement instead of bulk_insert_mappings so I can do nothing when adding duplicate entries
insert_or_do_nothing = insert_statement.on_conflict_do_nothing(index_elements=[Company.local_id])
orm_statement = sqlalchemy.select(Test).from_statement(insert_or_do_nothing)
with Session() as session:
session.execute(orm_statement).scalars()
connection.close()
Full stacktrace:
Traceback (most recent call last):
File "/project/path/test.py", line 41, in <module>
session.execute(orm_statement).scalars()
File "/venv/path/sqlalchemy/orm/session.py", line 1715, in execute
result = compile_state_cls.orm_setup_cursor_result(
File "/venv/path/sqlalchemy/orm/context.py", line 354, in orm_setup_cursor_result
return loading.instances(result, querycontext)
File "/venv/path/sqlalchemy/orm/loading.py", line 89, in instances
cursor.close()
File "/venv/path/sqlalchemy/util/langhelpers.py", line 70, in __exit__
compat.raise_(
File "/venv/path/sqlalchemy/util/compat.py", line 208, in raise_
raise exception
File "/venv/path/sqlalchemy/orm/loading.py", line 69, in instances
*[
File "/venv/path/sqlalchemy/orm/loading.py", line 70, in <listcomp>
query_entity.row_processor(context, cursor)
File "/venv/path/sqlalchemy/orm/context.py", line 2627, in row_processor
_instance = loading._instance_processor(
File "/venv/path/sqlalchemy/orm/loading.py", line 715, in _instance_processor
primary_key_getter = result._tuple_getter(pk_cols)
File "/venv/path/sqlalchemy/engine/result.py", line 934, in _tuple_getter
return self._metadata._row_as_tuple_getter(keys)
File "/venv/path/sqlalchemy/engine/result.py", line 106, in _row_as_tuple_getter
indexes = self._indexes_for_keys(keys)
AttributeError: '_NoResultMetaData' object has no attribute '_indexes_for_keys'
Am I misusing the statement interface? The ORM statement looks fine:
INSERT INTO test (data) VALUES (:data_m0), (:data_m1)
I am using
PostgreSQL 14.4
psycopg2-binary 2.9.3
SQLAlchemy 1.4.39

Looking at the docs you could try to use session.bulk_insert_mappings().
buffer = [
{
'data': "First test"
},
{
'data': "Second test"
}
]
with Session() as session:
session.bulk_insert_mappings(Test, buffer)

I found a solution that uses insert statement: Avoid using the ORM statements. For some reason, using plain statements seems to do the job, whilst ORM ones throw the AttributeError.
This is confusing, as the official documentation calls for ORM statements:
# THIS APPROACH DID NOT WORK FOR ME
stmt = stmt.on_conflict_do_update(
index_elements=[User.name], set_=dict(fullname=stmt.excluded.fullname)
).returning(User)
orm_stmt = (
select(User)
.from_statement(stmt)
.execution_options(populate_existing=True)
)
for user in session.execute(
orm_stmt,
).scalars():
print("inserted or updated: %s" % user)
But if you omit the ORM statement part, all is good
# THIS WORKS
insert_statement = insert(Test).values(buffer)
insert_or_do_nothing = insert_statement.on_conflict_do_nothing(index_elements=[Test.id])
with Session() as session:
session.execute(insert_or_do_nothing)
session.commit()

Related

Why BINARY usage in SQLAlchemy with Python3 cause a TypeError: 'string argument without an encoding'

I read a lot of similar questions but none of the clearly answer my issue.
I'm using sqlalchemy-utils EncryptedType on a mysql table column.
The table creation and the insert is ok, but when I'm trying to do a query a receive:
Traceback (most recent call last):
File "workspace/bin/test.py", line 127, in <module>
result = session.query(Tester).all()
File "workspace\ERP\venv\lib\site-packages\sqlalchemy\orm\query.py", line 3244, in all
return list(self)
File "workspace\venv\lib\site-packages\sqlalchemy\orm\loading.py", line 101, in instances
cursor.close()
File "workspace\venv\lib\site-packages\sqlalchemy\util\langhelpers.py", line 69, in __exit__
exc_value, with_traceback=exc_tb,
File "workspace\venv\lib\site-packages\sqlalchemy\util\compat.py", line 178, in raise_
raise exception
File "workspace\venv\lib\site-packages\sqlalchemy\orm\loading.py", line 81, in instances
rows = [proc(row) for row in fetch]
File "workspace\venv\lib\site-packages\sqlalchemy\orm\loading.py", line 81, in <listcomp>
rows = [proc(row) for row in fetch]
File "workspace\venv\lib\site-packages\sqlalchemy\orm\loading.py", line 642, in _instance
populators,
File "workspace\venv\lib\site-packages\sqlalchemy\orm\loading.py", line 779, in _populate_partial
dict_[key] = getter(row)
File "workspace\venv\lib\site-packages\sqlalchemy\engine\result.py", line 107, in __getitem__
return processor(self._row[index])
File "workspace\venv\lib\site-packages\sqlalchemy\sql\sqltypes.py", line 944, in process
value = bytes(value)
TypeError: string argument without an encoding
I found out that this error occurs only using python 3, not using python 2.
And also that the problem is with the sqlalchemy bynary type, because I get the same error with Binary, Varbinary, and Blob columns.
Since bytes in python3 needs an encoding for strings, I changed the code of sqlalchemy\sql\sqltypes.py on line 944 to value = bytes(value, 'utf-8) and al works well, so my question is:
why I need to change the sqlalchemy code? Is sqlalchemy fully usable with python3? Is it safe to change the code of a package?
Here is a code sample to try:
from sqlalchemy import MetaData, Integer, Column, Table, Binary, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
DB_CONFIG = {
'user': 'user_test',
'password': 'PSW_test',
'host': '127.0.0.1',
'database': 'db_test',
}
if __name__ == '__main__':
Base = declarative_base()
engine = create_engine("mysql+mysqlconnector://%(user)s:%(password)s#%(host)s/%(database)s" % DB_CONFIG,
echo=False)
Base.metadata.bind = engine
db_sessionmaker = sessionmaker(bind=engine)
Session = scoped_session(db_sessionmaker)
# create the table
meta = MetaData()
tests = Table(
'test', meta,
Column('id', Integer, primary_key=True),
Column('attr', Binary)
)
meta.create_all(engine)
class Test(Base):
__tablename__ = 'test'
id = Column(Integer, primary_key=True)
attr = Column(Binary)
new_test = Test(attr='try'.encode('utf-8'))
session = Session()
session.add(new_test)
session.commit()
result = session.query(Test).all()
for a in result:
print(a, a.id, a.attr)
Session.remove()
Thanks to the hint provided by Ilja Everilä, I was able to find a solution. Maybe not the best solution, but now is working.
I think that the root cause is that my DB-API automatically converts bytes to str during the query. So I just disabled this behavior by adding a parameter to the create_engine:
engine = create_engine("mysql+mysqlconnector://%(user)s:%(password)s#%(host)s/%(database)s" % DB_CONFIG, connect_args={'use_unicode': False})
The consequence is that if you have a String column it will be returned in queries as bytes not as 'str', and you have to manually decode it.
Surely there is a better solution.
There does seem to be anything wrong with the MySQL connector. Just switch your mysql-connector-python to mysqlclient. I had the same problem and it helped me.
Instead of mysql+mysqlconnector://<user>:<password>#<host>[:<port>]/<dbname> you'll have mysql+mysqldb://<user>:<password>#<host>[:<port>]/<dbname>
The SQLAchemy docs suggests using mysqlclient (fork of MySQL-Python)¶ over MySQL-Connector¶.
The MySQL Connector/Python DBAPI has had many issues since its release, some of which may remain unresolved, and the mysqlconnector dialect is not tested as part of SQLAlchemy’s continuous integration. The recommended MySQL dialects are mysqlclient and PyMySQL.

Reading attributes of a detached sqlalchemy object raises DetachedInstanceError

I am using short lived sqlalchemy sessions to add objects to a sqlite database. A few objects outlive the session in a readonly, detached state. Unfortunately, accessing attributes of a detached object throws an exception if the session has been closed. Here is a simplified code example
from sqlalchemy import Column, String, Integer, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Foo(Base):
__tablename__ = 'foo'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
engine = create_engine('sqlite:///foo.db', echo=False)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
f = Foo(name='foo1')
print('state=transient : name=', f.name)
session.add(f)
print('state=pending : name=', f.name)
session.commit()
session.close()
print('state=detached : name=', f.name)
# output
state=transient : name= foo1
state=pending : name= foo1
Traceback (most recent call last):
File "scratch_46.py", line 23, in <module>
print('state=detached : name=', f.name)
File "lib/python3.7/site-packages/sqlalchemy/orm/attributes.py", line 282, in __get__
return self.impl.get(instance_state(instance), dict_)
File "lib/python3.7/site-packages/sqlalchemy/orm/attributes.py", line 705, in get
value = state._load_expired(state, passive)
File "lib/python3.7/site-packages/sqlalchemy/orm/state.py", line 660, in _load_expired
self.manager.deferred_scalar_loader(self, toload)
File "lib/python3.7/site-packages/sqlalchemy/orm/loading.py", line 913, in load_scalar_attributes
"attribute refresh operation cannot proceed" % (state_str(state))
sqlalchemy.orm.exc.DetachedInstanceError: Instance <Foo at 0x7f266c758ac8> is not bound to a Session; attribute refresh operation cannot proceed (Background on this error at: http://sqlalche.me/e/bhk3)
Oddly enough, the error is not thrown if I do either of these
Read the name attribute between the commit and close calls while the object is in the persistent state
Expunge the object from the session and leave the session open. Still detached state, but I can access the attribute.
Should I be able to read attributes of a detached object? Is there a way to have objects safely outlive transient sessions? I read the suggested link in the output, but it talks mostly about parent/child relationships.
I created a an issue on the sqlalchemy repo because I thought this was a bug at first, but now I am not so certain.
Dug into this more and found that I was getting bit by the default value of expire_on_commit being true on the session. When that is on, the commit call expires the object, which forced sqlalchemy to reload it the next time someone reads an attribute. If that is delayed until after session close, then the attributes can't be fetched.
I don't really want this auto expiration, since I know my objects are in a good state and are readonly after being saved. Setting expire_on_commit to false in the session maker resolves the issue.

Iterate a pymongo cursor from MLab

Why won't the cursor iterate? I feel sure there should be an easy solution.
I have tried multiple Stack Overflow answers and the documentation for Mongodb
https://docs.mongodb.com/getting-started/python/query/
The code is as per below:
from pymongo import MongoClient
#Connect to Mongo Client
client = MongoClient('mongodb://the_username:the_password#ds047124.mlab.com:47124/politicians_from_theage')
db = client.politicians_from_theage #define database used
# Define Collection
collection = db.posts
print collection
Result:
Collection(Database(MongoClient(host=['ds047124.mlab.com:47124'], document_class=dict, tz_aware=False, connect=True), u'politicians_from_theage'), u'posts')
Then the cursor will print its location:
# Define Cursor
my_cursor = collection.find()
print my_cursor
Result:
<pymongo.cursor.Cursor object at 0x0000000003247518>
Then to try and iterate over the cursor provides a timeout:
# Perform query
cursor = db.posts.find()
#Iterate the cursor and print the documents.
for document in cursor:
print(document) #No Luck
Traceback Error or Iteration:
Traceback (most recent call last):
File "C:\PythonC\PythonWebScraping\17_MongoInterface\mongoget.py", line 18, in <module>
for result_object in my_cursor:
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 1090, in next
if len(self.__data) or self._refresh():
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 1012, in _refresh
self.__read_concern))
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 850, in __send_message
**kwargs)
File "C:\Python27\lib\site-packages\pymongo\mongo_client.py", line 827, in _send_message_with_response
server = topology.select_server(selector)
File "C:\Python27\lib\site-packages\pymongo\topology.py", line 210, in select_server
address))
File "C:\Python27\lib\site-packages\pymongo\topology.py", line 186, in select_servers
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: ds047124.mlab.com:47124: timed out
I have tried iterating on 'cursor', 'my_cursor' and 'collection', each of which provides a traceback error of server timeout.
Any help/insight would be greatly appreciated
This may help you:-
# Perform query
cursor = db.posts.find().toAray(function(err, result){
#Iterate the cursor and print the documents.
for document in result:
print(document);
}) //Will give you array of objects.
Let me know if it works.
Found the answer, I was focusing on the cursor rather than loading the object from the cursor from JSON to a list of JSON.
Final code is below (removing the URI)
import json
from datetime import date, timedelta
from pymongo import MongoClient
from bson import json_util
#Connect to Mongo Client
client = MongoClient('mongodb://user:pword#ds047124.mlab.com:47124/politicians_from_theage')
db = client.politicians_from_theage #define database used
print db
# Define Collection
collection = db.posts
print collection # print Collection(Database(MongoClient(host=['ds047124.mlab.com:47124']...
cursor = collection.find()
print cursor
# Obtain json
json_docs = []
for doc in cursor:
json_doc = json.dumps(doc, default=json_util.default)
json_docs.append(json_doc)
print json_docs #json result
# List Comprehension version
#json_docs = [json.dumps(doc, default=json_util.default) for doc in cursor]
#To get back from json again as string list
docs = [json.loads(j_doc, object_hook=json_util.object_hook) for j_doc in json_docs]
print docs
print 'kitty terminates program'
Try this:
cursor = db.posts.find()
for document in list(cursor):
print(document)

error to use django cursor to save escaped characters

I have a url which I want to save into the MySQL database using the "cursor" tool offered by django, but I keep getting the "not enough arguments for format string" error because this url contains some escaped characters (non-ascii characters). The testing code is fairly short:
test.py
import os
import runconfig #configuration file
os.environ['DJANGO_SETTINGS_MODULE'] = runconfig.django_settings_module
from django.db import connection,transaction
c = connection.cursor()
url = "http://www.academicjournals.org/ijps/PDF/pdf2011/18mar/G%C3%B3mez-Berb%C3%ADs et al.pdf"
dbquery = "INSERT INTO main_crawl_document SET url="+url
c.execute(dbquery)
transaction.commit_unless_managed()
The full error message is
Traceback (most recent call last):
File "./test.py", line 14, in <module>
c.execute(dbquery)
File "/usr/local/lib/python2.6/site-packages/django/db/backends/util.py", line 38, in execute
sql = self.db.ops.last_executed_query(self.cursor, sql, params)
File "/usr/local/lib/python2.6/site-packages/django/db/backends/__init__.py", line 505, in last_executed_query
return smart_unicode(sql) % u_params
TypeError: not enough arguments for format string
Can anybody help me?
You're opening yourself up for a possible SQL injection. Instead, use c.execute() properly:
url = "http://www.academicjournals.org/ijps/PDF/pdf2011/18mar/G%C3%B3mez-Berb%C3%ADs et al.pdf"
dbquery = "INSERT INTO main_crawl_document SET url=?"
c.execute(dbquery, (url,))
transaction.commit_unless_managed()
The .execute method should accept an iterable of parameters to use for escaping, assuming it's the normal dbapi method (which it should be with Django).

mysqldb interfaceError

I have a very weird problem with mysqldb (mysql module for python).
I have a file with queries for inserting records in tables. If I call the functions from the file, it works just fine; but when trying to call one of the functions from another file it throws me a
_mysql_exception.InterfaceError: (0, '')
I really don't get what I'm doing wrong here..
I call the function from buildDB.py :
import create
create.newFormat("HD", 0,0,0)
The function newFormat(..) is in create.py (imported) :
from Database import Database
db = Database()
def newFormat(name, width=0, height=0, fps=0):
format_query = "INSERT INTO Format (form_name, form_width, form_height, form_fps) VALUES ('"+name+"',"+str(width)+","+str(height)+","+str(fps)+");"
db.execute(format_query)
And the class Database is the following :
import MySQLdb
from MySQLdb.constants import FIELD_TYPE
class Database():
def __init__(self):
server = "localhost"
login = "seq"
password = "seqmanager"
database = "Sequence"
my_conv = { FIELD_TYPE.LONG: int }
self.conn = MySQLdb.connection(host=server, user=login, passwd=password, db=database, conv=my_conv)
# self.cursor = self.conn.cursor()
def close(self):
self.conn.close()
def execute(self, query):
self.conn.query(query)
(I put only relevant code)
Traceback :
Z:\sequenceManager\mysql>python buildDB.py
D:\ProgramFiles\Python26\lib\site-packages\MySQLdb\__init__.py:34: DeprecationWa
rning: the sets module is deprecated
from sets import ImmutableSet
INSERT INTO Format (form_name, form_width, form_height, form_fps) VALUES ('HD',0
,0,0);
Traceback (most recent call last):
File "buildDB.py", line 182, in <module>
create.newFormat("HD")
File "Z:\sequenceManager\mysql\create.py", line 52, in newFormat
db.execute(format_query)
File "Z:\sequenceManager\mysql\Database.py", line 19, in execute
self.conn.query(query)
_mysql_exceptions.InterfaceError: (0, '')
The warning has never been a problem before so I don't think it's related.
I got this error when I was trying to use a closed connection.
Problem resolved.. I was initializing the database twice.. Sorry if you lost your time reading this !
I couldn't get your setup to work. I gives me the same error all the time. However the way you connect to and make queries to the db with the query seems to be "non-standard".
I had better luck with this setup:
conn = MySQLdb.Connection(user="user", passwd="******",
db="somedb", host="localhost")
cur = conn.cursor()
cur.execute("insert into Format values (%s,%s,%s,%s);", ("hd",0,0,0))
This way you can take advantage of the db modules input escaping which is a must to mitigate sql injection attacks.

Categories