I tried to update multiple rows (approx. 350000) with a single query by implementing the following function:
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = e.name
FROM (VALUES %s) AS e(id, name)
WHERE e.id = t.id;"""
conn = get_db_connection()
cur = conn.cursor()
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update, template=None, page_size=100
)
While trying to run the function above, only 31 records were updated. Then, I tried to update row by row with the following function:
def update_items_row_by_row(rows_to_update):
sql_query = """UPDATE contact SET name = %s WHERE id = %s"""
conn = get_db_connection()
with tqdm(total=len(rows_to_update)) as pbar:
for id, name in rows_to_update:
cur = conn.cursor()
# execute the UPDATE statement
cur.execute(sql_query, (name, id))
# get the number of updated rows
# Commit the changes to the database
conn.commit()
cur.close()
pbar.update(1)
The latter has updated all the records so far but is very slow (estimated to end in 9 hours).
Does anyone know what is the efficient way to update multiple records?
By splitting the list into chunks of size equal to page_size, it worked well:
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = data.name
FROM (VALUES %s) AS data (id, name)
WHERE t.id = data.id"""
conn = get_db_connection()
cur = conn.cursor()
n = 100
with tqdm(total=len(rows_to_update)) as pbar:
for i in range(0, len(rows_to_update), n):
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update[i:i + n], template=None, page_size=n
)
conn.commit()
pbar.update(cur.rowcount)
cur.close()
conn.close()
The problem with your original function appears to be that you forgot to apply commit. When you execute an insert/update query with psycopg2 a transaction is opened but not finalized until commit is called. See my edits in your function (towards the bottom).
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = e.name
FROM (VALUES %s) AS e(id, name)
WHERE e.id = t.id;"""
conn = get_db_connection()
cur = conn.cursor()
psycopg2.extras.execute_values(cur, sql_query, rows_to_update)
## solution below ##
conn.commit() # <- We MUST commit to reflect the inserted data
cur.close()
conn.close()
return "success :)"
If you don't want to call conn.commit() each time you create a new cursor, you can use autocommit such as
conn = get_db_connection()
conn.set_session(autocommit=True)
Related
I have a function to connect to a db and write data to it
The function accepts a list of dictionaries with data
conn = psycopg2.connect(
database="db_database",
user='user',
password='password',
host='127.0.0.1',
)
conn.autocommit = True
cursor = conn.cursor()
get_data = "SELECT * FROM customer_new WHERE login_id = %s;" # Availability check
cursor.execute(get_data, (values_list['login id'],))
last_item = cursor.fetchone()
if last_item == None:
'''If there are NO matches by login_id in the database, then it adds to the database'''
sql = ("""INSERT INTO customer_new (login, telegram_id, orders,
sum_orders, average_check, balance, group_customer, zamena, reg_date, bot_or_site, login_id)
VALUES (%(login)s, %(telegram_id)s, %(orders)s, %(sum_orders)s, %(average_check)s, %(balance)s, %(group)s, %(zamena)s, %(reg_date)s, %(bot_or_site)s, %(login id)s);""")
cursor.execute(sql, values_list)
else:
'''If there are matches in the database, then get the row id and update the data in it'''
item_id_last = last_item[0]
sql = ("""UPDATE customer_new SET (login, telegram_id, orders,
sum_orders, average_check, balance, group_customer, zamena, reg_date, bot_or_site, login_id)
VALUES (%(login)s, %(telegram_id)s, %(orders)s, %(sum_orders)s, %(average_check)s, %(balance)s, %(group)s, %(zamena)s, %(reg_date)s, %(bot_or_site)s, %(login id)s)""")
cursor.execute(sql, [values_list, item_id_last])
conn.close()
I need to write the received data to the database, if the received data is in the database, then they need to be updated.
How to implement it correctly?
I run this code but it doesn't commit anything.
def them_mon(self):
ten_mon = ['Tin học', 'Toán', 'Nhạc', 'Mỹ thuật', 'Sinh', 'Lý', 'Văn', 'Thể dục', 'Sử', 'Địa', 'GDCD', 'TTH', 'AVTH', 'KHKT']
len_tm = len(ten_mon)
i = 0
while i < len_tm:
ten = ten_mon[i]
#print(ten)
sql = "INSERT INTO bang_diem(TEN_MON) VALUES(?)"
self.conn.execute(sql, (ten,))
i+=1
self.conn.commit()
No record is added or anything in bang_diem
You have to execute with cursor object and not the connection object
# Creates or opens a DB
db = sqlite3.connect('data.db')
# Get a cursor object
cursor = db.cursor()
cursor.execute("INSERT INTO tabe_name (column1, column2) VALUES(?,?,?,?)", (column1, column2))
db.commit()
I'm using postgres to log orders. I have one function for inserting a row for each order and another function that updates the order once the order has gone through and been confirmed. As of now I'm using 'INSERT' to insert a order and RETURN the row_id. I then use UPDATE with the row_id that was returned in the INSERT to update the order. How can I do this in bulk? I see that psycopg2 has a executemany function, but according to the documentation, it can't return anything. Is there any way to do this?
def initial_log(self, orders):
with self.conn.cursor() as cur:
row_ids = []
for order in orders:
cur.execute('INSERT INTO orders (order_num, order_amount)
VALUES (%s, %s) RETURNING order_id;'
(order.num, order.amount))
row_id = cur.fetchone()[0]
row_ids.append(row_id)
self.conn.commit()
return row_ids
def update_log(self, row_ids, updated_orders):
with self.conn.cursor() as cur:
for row_id, order in zip(row_ids, updated_orders):
status_list = order.messages
encoded_status = encode_status(status_list)
cur.execute('UPDATE orders SET (final_order_amount, order_ack,
order_time, status) =
(%s, %s, current_timestamp, %s) WHERE order_id = %s',
(order.final_amount,
order.ack_num, encoded_status, row_id))
self.conn.commit()
def initial_log(self, orders):
insert = '''
insert into orders (order_num, order_amount)
values {}
returning order_id;
'''.format(','.join(['%s'] * len(orders)))
t = [(order.num, order.amount) for order in orders]
cur = self.conn.cursor()
# print cur.mogrify(insert, t)
cur.execute(insert, t)
rs = cur.fetchall()
self.conn.commit()
row_ids = [row[0] for row in rs]
return row_ids
def update_log(self, row_ids, updated_orders):
update = '''
update orders o
set (final_order_amount, order_ack, order_time, status) =
(s.final_order_amount, s.order_ack, current_timestamp, s.status)
from (values
{}
) s (final_order_amount, order_ack, status, order_id)
where o.order_id = s.order_id
'''.format(','.join(['%s'] * len(updated_orders)))
t = [
(order.final_amount, order.ack_num, encode_status(order.messages), row_id)
for row_id, order in zip(row_ids, updated_orders)
]
cur = self.conn.cursor()
# print cur.mogrify(update, t)
cur.execute(update, t)
self.conn.commit()
Uncomment cursor.mogrify when checking what is being sent to the server.
I have a small problem with this class which handle my DB. It still saying:
cursor.execute(sql)
ValueError: operation parameter must be str
I tried lots of things but nothing work as i want. I looked over https://docs.python.org/3.4/library/sqlite3.html and i'm sure i do the same things.
import sqlite3
class Database():
def __init__(self):
try:
self.db = sqlite3.connect('../database.sqlite')
self.cur = self.db.cursor()
self.cur.execute('pragma foreign_keys="1"')
except sqlite3.Error as e:
raise e
def select(self,sql):
cursor = self.db.cursor()
cursor.execute(sql)
records = cursor.fetchall()
cursor.close()
return records
def insert(self,sql):
cursor = self.db.cursor()
cursor.execute(sql)
newID = cursor.lastrowid
self.db.commit()
cursor.close()
return newID
def execute(self,sql):
""" execute any SQL statement but no return value given """
cursor = self.db.cursor()
cursor.execute(sql)
self.db.commit()
cursor.close()
if __name__ == '__main__':
db = Database()
#sql = "SELECT skuref, titre_prod FROM product"
t = ("888888",)
sql= "UPDATE product SET created = 1 WHERE skuref = ?", t
db.execute(sql)
If someone can help me it would be grateful.Later i wanted to pass something like this in the main program inside a for loop
lastpost = record[0]
if created = True
sql = "UPDATE product SET created = 1 WHERE skuref = ?",(lastpost,)
db.execute(sql)
sql is a tuple containing SQL statement and the parameters.
Change as following, so that sql and parameters are passed separately, instead of being passed as a tuple:
def execute(self, sql):
""" execute any SQL statement but no return value given """
cursor = self.db.cursor()
cursor.execute(*sql) # <------
self.db.commit()
cursor.close()
With your statement
sql = "UPDATE product SET created = 1 WHERE skuref = ?",(lastpost,)
you have created a tupel like
("UPDATE product SET created = 1 WHERE skuref = ?", (lastpost,))
You have to give the arguments as parameters to the execute() function.
Also your if statement is bad: no :, = instead of == and the whole check for True is no nesesary.
Try this:
lastpost = record[0]
if created:
sql = "UPDATE product SET created = 1 WHERE skuref = ?"
db.execute(sql, lastpost)
Issue: I can't figure out how to run a query in the correct way so that it returns a mapped dictionary. The query will use counts from multiple tables.
I am using psycopg2 for a postgresql database, and I will be using the results to create a report on day to day deltas on these counts.
Given that, can someone provide an example on how to execute multiple queries and return a dictionary that I can use for comparison purposes? Thanks! I image in a for loop is needed somewhere in here.
tables = ['table1', 'table2']
def db_query():
query = "select count(*) from (a_table) where error_string != '';"
conn = psycopg2.connect(database=db, user=user, password=password, host=host)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute(query, tables)
output = cur.fetchall()
conn.close()
return output
I haven't used postgresql, so you might want to also check this out as a reference: How to store count values in python.
That being said, rearrange your code into something like this. Be sure to make conn global so you don't have to make more than one connection, and make sure you're also closing cur:
conn = None
def driverFunc():
global conn
try:
conn = psycopg2.connect(database=db, user=user, password=password, host=host)
tables = ['table1', 'table2']
countDict = {}
for thisTable in tables:
db_query(thisTable, countDict)
finally:
if not conn == None:
conn.close()
def db_query(tableName, countDict):
# Beware of SQL injection with the following line:
query = "select count(*) from " + tableName + " where error_string != '';"
cur = None
try:
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute(query)
countDict[tableName] = int(cur.fetchone())
finally:
if not cur == None:
cur.close()