I'm retrieving a json file from online that I want to insert into my database using peewee. The problem is that some of the rows may already exist in my database. The solution should be to either ignore or replace the duplicate rows.
The InsertQuery function supports adding multiple rows, but I cannot figure out how to either suppress errors that the instance already exists or to replace the existing instance.
Starting with an empty database test, I run the following code
from peewee import *
from peewee import InsertQuery
database = MySQLDatabase('test', **{'password': 'researchServer', 'user': 'root'})
class BaseModel(Model):
class Meta:
database = database
class Image(BaseModel):
url = CharField(unique=True)
database.connect()
database.create_tables([Image])
images= [{'url': 'one'}, {'url':'two'}]
try:
image_entry = InsertQuery(Image, rows=images)
image_entry.execute()
except:
print 'error'
This produces no errors and successfully adds 'one' and 'two' to my table.
If I then run,
images= [{'url':'three'}, {'url': 'one'}, {'url':'four'}]
try:
image_entry = InsertQuery(Image, rows=images)
image_entry.execute()
except:
print 'error'
The execute function throws an error and neither 'three' or 'four' get added to the database.
I suppose one solution would be to check each row before adding it to the database, but this seems like it would be more inefficient.
You can use on_conflict() or upsert() on your InsertQuery.
on_conflict() will add an SQL ON CONFLICT clause with the given argument but only works with SQLite. upsert() basically turns the query into a REPLACE INTO on MySQL. You still need to call execute() after.
Image.insert_many(images).upsert(True).execute()
peewee doc upsert
I haven't been able to find a solution in peewee, but here's one that I wrote for SQLAlchemy.
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey
from sqlalchemy import create_engine
#Make the table
metadata = MetaData()
image = Table('image', metadata,
Column('url', String(250), primary_key=True))
db = create_engine('mysql://root:researchServer#localhost/test3')
metadata.create_all(db)
conn = engine.connect()
#Insert the first set of rows
images = [{'url': 'one'}, {'url': 'two'}]
inserter = db.image.insert()
conn.execute(inserter, images)
#Insert the second set of rows with some duplicates
images = [{'url': 'three'}, {'url': 'one'}, {'url':'four'}]
try:
inserter = db.image.insert().prefix_with("IGNORE")
conn.execute(inserter, images)
except:
print 'error'
The key is using the 'prefix_with' method to add the 'ignore' to the SQL expression. I was greatly helped by SQLAlchemy INSERT IGNORE
Related
I would like to switch on the fast_executemany option for the pyODBC driver while using SQLAlchemy to insert rows to a table. By default it is of and the code runs really slow... Could anyone suggest how to do this?
Edits:
I am using pyODBC 4.0.21 and SQLAlchemy 1.1.13 and a simplified sample of the code I am using are presented below.
import sqlalchemy as sa
def InsertIntoDB(self, tablename, colnames, data, create = False):
"""
Inserts data into given db table
Args:
tablename - name of db table with dbname
colnames - column names to insert to
data - a list of tuples, a tuple per row
"""
# reflect table into a sqlalchemy object
meta = sa.MetaData(bind=self.engine)
reflected_table = sa.Table(tablename, meta, autoload=True)
# prepare an input object for sa.connection.execute
execute_inp = []
for i in data:
execute_inp.append(dict(zip(colnames, i)))
# Insert values
self.connection.execute(reflected_table.insert(),execute_inp)
Try this for pyodbc
crsr = cnxn.cursor()
crsr.fast_executemany = True
Starting with version 1.3, SQLAlchemy has directly supported fast_executemany, e.g.,
engine = create_engine(connection_uri, fast_executemany=True)
I am trying to query one of my tables in my Postgres database using SqlAlchemy in Python 3. It runs the query fine but as I go through each row in the result that SqlAlchemy returns, I try to use the attribute 'text' (one of my column names). I receive this error:
'str' object has no attribute 'text'
I have printed the attribute like so:
for row in result:
print(row.text)
This does not give the error. The code that produces the error is below. However, to give my environment:
I have two servers running. One is for my database the other is for my python server.
Database Server:
Postgres v9.6 - On Amazon's RDS
Server with Python
Linux 3.13.0-65-generic x86_64 - On an Amazon EC2 Instance
SqlAlchemy v1.1.5
Python v3.4.3
Flask 0.11.1
Files related:
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import re
from nltk import sent_tokenize
class DocumentProcess:
def __init__(self):
...
Engine = sa.create_engine(
CONFIG.POSTGRES_URL,
client_encoding='utf8',
pool_size=20,
max_overflow=0
)
# initialize SQLAlchemy
Base = automap_base()
# reflect the tables
Base.prepare(Engine, reflect=True)
# Define all needed tables
self.Document = Base.classes.documents
self.session = Session(Engine)
...
def process_documents(self):
try:
offset = 5
limit = 50
###### This is the query in question ##########
result = self.session.query(self.Document) \
.order_by(self.Document.id) \
.offset(offset) \
.limit(limit)
for row in result:
# The print statement below does print out the text
print(row.text)
# when passing document.text to sent_tokenize, it
# gives the following error:
# 'str' object has no attribute 'text'
snippets = sent_tokenize(row.text.strip('\n')) # I have removed strip, but the same problem
except Exception as e:
logging.info(format(e))
raise e
This is my model for Document, in my PostgreSQL database:
class Document(db.Model):
__tablename__ = "documents"
id = db.Column(db.Integer, primary_key=True)
text = db.Column(db.Text)
tweet = db.Column(db.JSON)
keywords = db.Column(db.ARRAY(db.String), nullable=True)
def to_dict(self):
return dict(
id=self.id,
text=self.text,
tweet=self.tweet,
keywords=self.keywords
)
def json(self):
return jsonify(self.to_dict())
def __repr__(self):
return "<%s %r>" % (self.__class__, self.to_dict())
Things I have tried
Before, I did not have order_by in the Document query. This was working before. However, even removing order_by does not fix it anymore.
Used a SELECT statement and went through the result manually, but still the same result
What I haven't tried
I am wondering if its because I named the column 'text'. I noticed that when I write this query out in postgres, it highlights it as a reserved word. I'm confused why my query worked before, but now it doesn't work. Could this be the issue?
Any thoughts on this issue would be much appreciated.
It turns out that text is a reserved word in PostgreSQL. I renamed the column name and refactored my code to match. This solved the issue.
You are likely to get this error in PostgreSQL if you are creating a foreign table and one of the column datatype is text. Change it to character varying() and the error disappears!
I have csv file that include some information about computer as like ostype, ram, cpu value and ı have sql database that already has same information and ı want to updated that database table with by python script. database table and csv file has uniqe "id" parameters.
import csv
with open("Hypersanal.csv") as csvfile:
readCSV = csv.reader(csvfile, delimiter=';')
for row in readCSV:
print row
Depending on what type of database, there will be some slight adjustments to make to the code.
For this example, I'll use SQLAlchemy with the pymysql driver. To find out what the first part of the connection String should be (depends on the kind of database you want to connect to), check SQLAlchemy Doc about Dialects.
First, we import the necessary modules
from sqlalchemy import *
from sqlalchemy.orm import create_session
from sqlalchemy.ext.declarative import declarative_base
Then, we create the connection string
dialect_part = "mysql+pymysql://"
# username is the usernmae we'll use to connect to the db, and password the corresponding password
# server_name is the name fo the server where the db is. It INCLUDES the port number (eg : 'localhost:9080')
# database is the name of the db on the server we'll work on
connection_string = dialect_part+username+":"+password+"#"+server_name+"/"+database
Some more setups needed for SQLAlchemy :
Base = declarative_base()
engine = create_engine(connection_string)
metadata = MetaData(bind=engine)
Now, we have a link to the db, but need some more work before being able to do anything to it.
We create a class corresponding to the table of the db we'll hit. This class will 'autofill' according to how the table is in the db. You can also fill it manually.
class TableWellHit(Base):
__table__ = Table(name_of_the_table, metadata, autoload=True)
Now, to be able to interact with the table, we need to create a session :
session = create_session(bind=engine)
Now, we need to begin the session, and we'll be set.
Your code will now be used.
import csv
with open("Hypersanal.csv") as csvfile:
readCSV = csv.reader(csvfile, delimiter=';')
for row in readCSV:
# print row
# I chose to push each value from the db one by one
# If you're sure there won't be any duplicates for the primary key, you can put the session.begin() before the for loop
session.begin()
# I create an element for the db
new_element = TableWellHit(field_in_table=row[field])
An example for this, imagine you have required fiels 'username' and 'password' in the table, and row contains a dictionnary containing 'user' and 'pass' as keys.
The elements will be created by : TableWellHit(username=row['user],password=row['pass'])
# I add the element to the table
# I choose to merge instead of add, so as to prevent duplicates, one more time
session.merge(new_element)
# Now, we commit our changes to the db
# This also closes the session
# if you put the session.begin() outside of the loop, do the same for the session.commit()
session.commit()
Hope this answers your question, and if he does not, just let me know so I can correct my answer.
edit :
For MSSQL :
- Install pymssql (pip install pymssql)
The connection_string should be of the following form, according to this SQLAlchemy page : mssql+pymssql://<username>:<password>#<freetds_name>/?charset=utf8
Using merge allows you to create or update a value, depending on whether or not it already exists.
I want to implement a function that gives information about all the tables (and their column names) that are present in a database (not only those created with SQLAlchemy). While reading the documentation it seems to me that this is done via reflection but I didn't manage to get something working. Any suggestions or examples on how to do this?
start with an engine:
from sqlalchemy import create_engine
engine = create_engine("postgresql://u:p#host/database")
quick path to all table /column names, use an inspector:
from sqlalchemy import inspect
inspector = inspect(engine)
for table_name in inspector.get_table_names():
for column in inspector.get_columns(table_name):
print("Column: %s" % column['name'])
docs: http://docs.sqlalchemy.org/en/rel_0_9/core/reflection.html?highlight=inspector#fine-grained-reflection-with-inspector
alternatively, use MetaData / Tables:
from sqlalchemy import MetaData
m = MetaData()
m.reflect(engine)
for table in m.tables.values():
print(table.name)
for column in table.c:
print(column.name)
docs: http://docs.sqlalchemy.org/en/rel_0_9/core/reflection.html#reflecting-all-tables-at-once
First set up the sqlalchemy engine.
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.engine import url
connect_url = url.URL(
'oracle',
username='db_username',
password='db_password',
host='db_host',
port='db_port',
query=dict(service_name='db_service_name'))
engine = create_engine(connect_url)
try:
engine.connect()
except Exception as error:
print(error)
return
Like others have mentioned, you can use the inspect method to get the table names.
But in my case, the list of tables returned by the inspect method was incomplete.
So, I found out another way to find table names by using pure SQL queries in sqlalchemy.
query = text("SELECT table_name FROM all_tables where owner = '%s'"%str('db_username'))
table_name_data = self.session.execute(query).fetchall()
Just for sake of completeness of answer, here's the code to fetch table names by inspect method (if it works good in your case).
inspector = inspect(engine)
table_names = inspector.get_table_names()
Hey I created a small module that helps easily reflecting all tables in a database you connect to with SQLAlchemy, give it a look: EZAlchemy
from EZAlchemy.ezalchemy import EZAlchemy
DB = EZAlchemy(
db_user='username',
db_password='pezzword',
db_hostname='127.0.0.1',
db_database='mydatabase',
d_n_d='mysql' # stands for dialect+driver
)
# this function loads all tables in the database to the class instance DB
DB.connect()
# List all associations to DB, you will see all the tables in that database
dir(DB)
I'm proposing another solution as I was not satisfied by any of the previous in the case of postgres which uses schemas. I hacked this solution together by looking into the pandas source code.
from sqlalchemy import MetaData, create_engine
from typing import List
def list_tables(pg_uri: str, schema: str) -> List[str]:
with create_engine(pg_uri).connect() as conn:
meta = MetaData(conn, schema=schema)
meta.reflect(views=True)
return list(meta.tables.keys())
In order to get a list of all tables in your schema, you need to form your postgres database uri pg_uri (e.g. "postgresql://u:p#host/database" as in the zzzeek's answer) as well as the schema's name schema. So if we use the example uri as well as the typical schema public we would get all the tables and views with:
list_tables("postgresql://u:p#host/database", "public")
While reflection/inspection is useful, I had trouble getting the data out of the database. I found sqlsoup to be much more user-friendly. You create the engine using sqlalchemy and pass that engine to sqlsoup.SQlSoup. ie:
import sqlsoup
def create_engine():
from sqlalchemy import create_engine
return create_engine(f"mysql+mysqlconnector://{database_username}:{database_pw}#{database_host}/{database_name}")
def test_sqlsoup():
engine = create_engine()
db = sqlsoup.SQLSoup(engine)
# Note: database must have a table called 'users' for this example
users = db.users.all()
print(users)
if __name__ == "__main__":
test_sqlsoup()
If you're familiar with sqlalchemy then you're familiar with sqlsoup. I've used this to extract data from a wordpress database.
I made a table using SQLAlchemy and forgot to add a column. I basically want to do this:
users.addColumn('user_id', ForeignKey('users.user_id'))
What's the syntax for this? I couldn't find it in the docs.
I have the same problem, and a thought of using migration library only for this trivial thing makes me
tremble. Anyway, this is my attempt so far:
def add_column(engine, table_name, column):
column_name = column.compile(dialect=engine.dialect)
column_type = column.type.compile(engine.dialect)
engine.execute('ALTER TABLE %s ADD COLUMN %s %s' % (table_name, column_name, column_type))
column = Column('new_column_name', String(100), primary_key=True)
add_column(engine, table_name, column)
Still, I don't know how to insert primary_key=True into raw SQL request.
This is referred to as database migration (SQLAlchemy doesn't support migration out of the box). You can look at using sqlalchemy-migrate to help in these kinds of situations, or you can just ALTER TABLE through your chosen database's command line utility,
See this section of the SQLAlchemy documentation: http://docs.sqlalchemy.org/en/latest/core/metadata.html#altering-schemas-through-migrations
Alembic is the latest software to offer this type of functionality and is made by the same author as SQLAlchemy.
I have a database called "ncaaf.db" built with sqlite3 and a table called "games". So I would CD into the same directory on my linux command prompt and do
sqlite3 ncaaf.db
alter table games add column q4 type float
and that is all it takes! Just make sure you update your definitions in your sqlalchemy code.
from sqlalchemy import create_engine
engine = create_engine('sqlite:///db.sqlite3')
engine.execute('alter table table_name add column column_name String')
I had the same problem, I ended up just writing my own function in raw sql. If you are using SQLITE3 this might be useful.
Then if you add the column to your class definition at the same time it seems to do the trick.
import sqlite3
def add_column(database_name, table_name, column_name, data_type):
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
if data_type == "Integer":
data_type_formatted = "INTEGER"
elif data_type == "String":
data_type_formatted = "VARCHAR(100)"
base_command = ("ALTER TABLE '{table_name}' ADD column '{column_name}' '{data_type}'")
sql_command = base_command.format(table_name=table_name, column_name=column_name, data_type=data_type_formatted)
cursor.execute(sql_command)
connection.commit()
connection.close()
I've recently had this same issue so I took a point from AlexP in an earlier answer. The problem was in getting the new column into my program's metadata. Using sqlAlchemy's append_column functionality had some unexpected downstream effects ('str' object has no attribute 'dialect impl'). I corrected this by adding the column with DDL (MySQL database in this case) and then reflecting the table back from the DB into my metadata.
Code is as roughly as follows (modified slightly from what I have in order to reduce it to its minimal essence. I apologize for any mistakes - if there, they should be minor)...
try:
# Use back quotes as a protection against SQL Injection Attacks. Can we do more?
common.qry_engine.execute('ALTER TABLE %s ADD COLUMN %s %s' %
('`' + self.tbl.schema + '`.`' + self.tbl.name + '`',
'`' + self.outputs[new_col] + '`', 'VARCHAR(50)'))
except exc.SQLAlchemyError as msg:
raise GRError(desc='Unable to physically add derived column to table. Contact support.',
data=str(self.outputs), other_info=str(msg))
try: # Refresh the metadata to show the new column
self.tbl = sqlalchemy.Table(self.tbl.name, self.tbl.metadata, extend_existing=True, autoload=True)
except exc.SQLAlchemyError as msg:
raise GRError(desc='Unable to establish metadata for new column. Contact support.',
data=str(self.outputs), other_info=str(msg))
Yes you can
Install sqlalchemy-migrate (pip install sqlalchemy-migrate) and use it in your script to call Table and Column create() method:
from sqlalchemy import String, MetaData, create_engine
from migrate.versioning.schema import Table, Column
db_engine = create_engine(app.config.get('SQLALCHEMY_DATABASE_URI'))
db_meta = MetaData(bind=db_engine)
table = Table('tabel_name' , db_meta)
col = Column('new_column_name', String(20), default='foo')
col.create(table)
Just continuing the simple way proposed by chasmani, little improvement
'''
# simple migration
# columns to add:
# last_status_change = Column(BigInteger, default=None)
# last_complete_phase = Column(String, default=None)
# complete_percentage = Column(DECIMAL, default=0.0)
'''
import sqlite3
from config import APP_STATUS_DB
from sqlalchemy import types
def add_column(database_name: str, table_name: str, column_name: str, data_type: types, default=None):
ret = False
if default is not None:
try:
float(default)
ddl = ("ALTER TABLE '{table_name}' ADD column '{column_name}' '{data_type}' DEFAULT {default}")
except:
ddl = ("ALTER TABLE '{table_name}' ADD column '{column_name}' '{data_type}' DEFAULT '{default}'")
else:
ddl = ("ALTER TABLE '{table_name}' ADD column '{column_name}' '{data_type}'")
sql_command = ddl.format(table_name=table_name, column_name=column_name, data_type=data_type.__name__,
default=default)
try:
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
cursor.execute(sql_command)
connection.commit()
connection.close()
ret = True
except Exception as e:
print(e)
ret = False
return ret
add_column(APP_STATUS_DB, 'procedures', 'last_status_change', types.BigInteger)
add_column(APP_STATUS_DB, 'procedures', 'last_complete_phase', types.String)
add_column(APP_STATUS_DB, 'procedures', 'complete_percentage', types.DECIMAL, 0.0)
If using docker:
go to the terminal of the container holding your DB
get into the db: psql -U usr [YOUR_DB_NAME]
now you can alter tables using raw SQL: alter table [TABLE_NAME] add column [COLUMN_NAME] [TYPE]
Note you will need to have mounted your DB for the changes to persist between builds.
Adding the column "manually" (not using python or SQLAlchemy) is perhaps the easiest?
Same problem over here. What I will do is iterating over the db and add each entry to a new database with the extra column, then delete the old db and rename the new to this one.