How to use Asyncio module in migrate database from Sqlite to Postgres? - python

I have a script to migrate database from Sqlite to Postgres. My original scipt works, but when I try to use Asyncio to speed up program, my new code even running slower than the original a few seconds. The transfer speed of tables is very slow. Can anyone suggest to me, where am I wrong ?
My original code :
import psycopg2, sqlite3, sys
import time
start_time = time.time()
sqdb="D://Python//SqliteToPostgreFull//testmydb6.db" #folder contain sqlite db
sqlike="table"
pgdb="testmydb7" #postgres db
pguser="postgres"
pgpswd="1234"
pghost="127.0.0.1"
pgport="5432"
consq=sqlite3.connect(sqdb)
cursq=consq.cursor()
tabnames=[]
print()
cursq.execute('SELECT name FROM sqlite_master WHERE type="table" AND name LIKE "%table%";')
tabgrab = cursq.fetchall()
for item in tabgrab:
tabnames.append(item[0])
print(tabgrab)
for table in tabnames:
print(table)
cursq.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?;", (table,))
create = cursq.fetchone()[0]
cursq.execute("SELECT * FROM %s;" %table)
rows=cursq.fetchall()
colcount=len(rows[0])
pholder='%s,'*colcount
newholder=pholder[:-1]
try:
conpg = psycopg2.connect(database=pgdb, user=pguser, password=pgpswd,
host=pghost, port=pgport)
curpg = conpg.cursor()
curpg.execute("DROP TABLE IF EXISTS %s;" %table)
create = create.replace("AUTOINCREMENT", "")
curpg.execute(create)
curpg.executemany("INSERT INTO %s VALUES (%s);" % (table, newholder),rows)
conpg.commit()
if conpg:
conpg.close()
except psycopg2.DatabaseError as e:
print ('Error %s' % e)
sys.exit(1)
finally:
print("Complete")
consq.close()
duration = time.time() - start_time
print(f"Duration {duration} seconds")
My code with Asyncio module :
import psycopg2, sqlite3, sys
import time
import asyncio
sqdb="D://Python//SqliteToPostgreFull//testmydb6.db"
sqlike="table"
pgdb="testmydb9"
pguser="postgres"
pgpswd="1234"
pghost="127.0.0.1"
pgport="5432"
consq=sqlite3.connect(sqdb)
cursq=consq.cursor()
tabnames=[]
print()
cursq.execute('''SELECT name FROM sqlite_master WHERE type="table" AND name LIKE "'''+sqlike+'''%";''')
tabgrab = cursq.fetchall()
for item in tabgrab:
tabnames.append(item[0])
print(tabgrab)
async def copyTable(table):
cursq.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?;", (table,))
create = cursq.fetchone()[0]
cursq.execute("SELECT * FROM %s;" %table)
rows=cursq.fetchall()
colcount=len(rows[0])
pholder='%s,'*colcount
newholder=pholder[:-1]
try:
conpg = psycopg2.connect(database=pgdb, user=pguser, password=pgpswd,
host=pghost, port=pgport)
curpg = conpg.cursor()
curpg.execute("DROP TABLE IF EXISTS %s;" %table)
create = create.replace("AUTOINCREMENT", "")
curpg.execute(create)
curpg.executemany("INSERT INTO %s VALUES (%s);" % (table, newholder),rows)
conpg.commit()
if conpg:
conpg.close()
except psycopg2.DatabaseError as e:
print ('Error %s' % e)
sys.exit(1)
finally:
print("Complete")
async def main():
for table in tabnames:
a = loop.create_task(copyTable(table,))
await asyncio.wait([a])
if __name__ == "__main__":
start_time = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
duration = time.time() - start_time
print(f"Duration {duration} seconds")

If both databases located on a same computer, asyncio won't speedup a process: there's no network overhead to parallelize. Quite opposite: overhead for using coroutines will make program a bit slower.
Please, read this answer for detailed explanation.

Related

executing a sql query using python

I'm trying to create a small python app to extract data from specific table of database.
The extracted rows have to be between CREATION_DATETIME specified by user.
Heres the code:
startdate = input("Prosze podac poczatek przedzialu czasowego (format RRRR-MM-DD GG:MM:SS): ")
enddate = input("Prosze podac koniec przedzialu czasowego (format RRRR-MM-DD GG:MM:SS): ")
query = "SELECT * FROM BRDB.RFX_IKW_MODIFY_EXEC_ORDER_CANCEL_LOG WHERE CREATION_DATETIME between '%s' and '%s' ORDER BY CREATION_DATETIME DESC;"
tuple1 = (startdate, enddate)
cursor.execute(*query, (tuple1,))
records = cursor.fetchall()
print("Total number of rows in table: ", cursor.rowcount)
print(records)
I'm not much of developer and I'm stuck at error "TypeError: CMySQLCursorPrepared.execute() takes from 2 to 4 positional arguments but 104 were given" in various counts, depends on how I try to modify the code.
Could you guys help me out in specyfing that query correctly?
Thank you in advance.
Tried various tutorial about parametrized query but with no luck.
You're starring the query, making it an iterable of the characters making up the string, which probably isn't what you meant (i.e., you should emove the * operator). In addition, tuple1 is already a tuple, you shouldn't enclose it inside another tuple:
cursor.execute(query, tuple1)
# Remove the *-^
# Use tuple1 directly-^
here is the full code
import mysql.connector
from mysql.connector import Error
try:
print("Laczenie z baza danych....")
connection = mysql.connector.connect(host='',
port='',
database='',
user='',
password='')
if connection.is_connected():
db_Info = connection.get_server_info()
print("Wersja servera MySQL:", db_Info)
cursor = connection.cursor(prepared=True)
cursor.execute("select database();")
record = cursor.fetchone()
print("Pomyslnie polaczono z baza danych: ", record)
except Error as e:
print("Blad polaczenia!", e)
quit()
try:
startdate = input("Prosze podac poczatek przedzialu czasowego (format RRRR-MM-DD GG:MM:SS): ")
enddate = input("Prosze podac koniec przedzialu czasowego (format RRRR-MM-DD GG:MM:SS): ")
query = "SELECT * FROM BRDB.RFX_IKW_MODIFY_EXEC_ORDER_CANCEL_LOG WHERE CREATION_DATETIME between '%s' and '%s' ORDER BY CREATION_DATETIME DESC;"
tuple1 = (startdate, enddate,)
cursor.execute(query, tuple1)
records = cursor.fetchall()
print("Fetching each row using column name")
for row in records:
message_id = row["MESSAGE_ID"]
executable_order_id = row["EXECUTABLE_ORDER_ID"]
creation_datetime = row["CREATION_DATETIME"]
message_type = row["MESSAGE_TYPE"]
message_status = row["MESSAGE_STATUS"]
print(message_id, executable_order_id, creation_datetime, message_status)
except mysql.connector.Error as e:
print("Error reading data from MySQL table", e)
finally:
if connection.is_connected():
cursor.close()
connection.close()
print("MySQL connection is closed")

Sqlite3 Programming Error -> SQLite objects created in a thread can only be used in that same thread

Im programming for my friend a website like youtube.
But I always get this error when i go to http://localhost:2389/watch.v=f4efc9de771d4aba85ee0a88bbce08b9
This is the server code:
#app.route('/watch.v=<VideoId>')
def watch(VideoId):
return render_template(
"video.html",
VideoName=video.load_from_id(VideoId),
VideoId=VideoId
)
and this is the database helper:
from sqlite3 import OperationalError
from qrcode import *
from pathlib import Path
import sqlite3
import os
import uuid
DEFAULT_PATH = (os.getcwd() + "\\api\database.db")
connection = sqlite3.connect(DEFAULT_PATH)
cursor = connection.cursor()
cur = cursor
def generateID():
return uuid.uuid4().hex
class video:
def db():
try:
connection = sqlite3.connect(DEFAULT_PATH)
cursor = connection.cursor()
cursor.execute("CREATE TABLE video (name, videoID);")
connection.commit()
print("Creating Database in:", DEFAULT_PATH.upper())
print("[+] Database successfull created!")
except OperationalError:
print("[*] Database allready exists!")
def load_from_id(id):
cursor.execute(f'SELECT name from video WHERE videoID="{id}"')
v = cursor.fetchall()
return str(v).replace("[", "").replace("]", "").replace("'", "").replace("(", "").replace(")", "").strip(",")
class new:
def newVideo(name):
i = generateID()
NewUserData = "INSERT INTO video (name, videoID) VALUES (?, ?)"
cursor.execute(NewUserData, (name, i))
connection.commit()
print(f"[+] Video successfull ceated ({name}, {i}).")
if __name__ == "__main__":
#video.db()
#video.new.new("Test_01")
n = video.load_from_id("f4efc9de771d4aba85ee0a88bbce08b9")
print(n)
And this is the error:
Traceback (most recent call last):
File "C:\Users\admin\OneDrive\Desktop\youtube\server.py"
cursor.execute(f'SELECT name from video WHERE videoID="{id}"')
sqlite3.ProgrammingError: SQLite objects created in a thread can only be used in that same thread.
The object was created in thread id 15232 and this is thread id 13568.
I hope someone can help me.
You need to create the cursor in same method that is load_from_id(id)
Your code would look like
def load_from_id(id):
cursor = cursor = connection.cursor()
cursor.execute(f'SELECT name from video WHERE videoID="{id}"')
[...]
I found the problem.
I have to do this:
def load_from_id(id):
try:
cursor = connection.cursor()
data = f'SELECT name from video WHERE videoID="{id}"'
cursor.execute(data)
v = cursor.fetchall()
return str(v).replace("[", "").replace("]", "").replace("'", "").replace("(", "").replace(")", "").strip(",")
except ProgrammingError as pe:
print(pe)

MySQL: I don't understand why this is happening?

import sqlite3
import traceback
from time import sleep
import mysql.connector
def check_user(user_id):
conn = mysql.connector.connect(host='localhost', database='online', user='root1', password='rootRRR111_')
cur = conn.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS online(id INT, last_online_date TEXT)')
conn.commit()
select = "SELECT * FROM online WHERE id = %s LIMIT 0, 1"
result = cur.execute(select, (user_id,))
if result is None:
insert = ('INSERT INTO online (id, last_online_date) VALUES (%s, %s)')
cur.reset()
cur.execute(insert, (user_id, online_time))
conn.commit()
def update_online_status(user_id, online_time):
conn = mysql.connector.connect(host='localhost', database='online', user='root1', password='rootRRR111_')
cursor = conn.cursor()
select = 'SELECT last_online_date FROM online WHERE id = %s'
result = cursor.execute(select, (user_id,))
old_online = result
online_time = f'{old_online},{online_time}'
cursor.reset()
cursor.execute('UPDATE online SET last_online_date = %s WHERE id = %s', (online_time, user_id))
conn.commit()
app = Client("my_account")
app.start()
while True:
try:
with open('ids.ini', 'r') as file:
users = file.read().splitlines()
for user in users:
result = app.get_users(user)
user_id = result['id']
if result['status'] == 'offline':
unix_timestamp = float(result['last_online_date'])
local_timezone = tzlocal.get_localzone()
local_time = datetime.fromtimestamp(unix_timestamp, local_timezone)
online_time = local_time.strftime("%Y/%m/%d %H:%M:%S")
elif result['status'] == 'online':
now = datetime.now()
online_time = now.strftime("%Y/%m/%d %H:%M:%S")
check_user(user_id)
update_online_status(user_id, online_time)
# sleep(300)
except Exception:
traceback.print_exc()
continue
app.stop()
I am writing a program that would read the online status of a user in telegram.
Instead of writing online to an existing user, a huge number of identical rows appear in the database.
Example:
Table with repetitions
When I try to fix something, there are a lot of errors.
mysql.connector.errors.programmingerror: not all parameters were used in the sql statement
mysql.connector.errors.internalerror: unread result found
and other...
Pls help!!

How can I use concurrency to migrate database in Python?

I have a script used to migrate data from SQLite to Postgres. I just use a for loop to transfer tables one by one. Now, I want to experiment with transfering multiple tables in concurrency using threads, multiprocessing or asyncio to speed up the program to compare the runtimes between those ways.
How do you do one of those ways?
Here is my script:
import psycopg2, sqlite3, sys
import time
import multiprocessing
sqdb="C://Users//duongnb//Desktop//Python//SqliteToPostgreFull//testmydb6.db"
sqlike="table"
pgdb="testmydb11"
pguser="postgres"
pgpswd="1234"
pghost="127.0.0.1"
pgport="5432"
consq=sqlite3.connect(sqdb)
cursq=consq.cursor()
tabnames=[]
print()
cursq.execute('SELECT name FROM sqlite_master WHERE type="table" AND name LIKE "%table%";')
tabgrab = cursq.fetchall()
for item in tabgrab:
tabnames.append(item[0])
print(tabgrab)
def copyTable(table):
print(table)
cursq.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?;", (table,))
create = cursq.fetchone()[0]
cursq.execute("SELECT * FROM %s;" %table)
rows=cursq.fetchall()
colcount=len(rows[0])
pholder='%s,'*colcount
newholder=pholder[:-1]
try:
conpg = psycopg2.connect(database=pgdb, user=pguser, password=pgpswd,
host=pghost, port=pgport)
curpg = conpg.cursor()
curpg.execute("DROP TABLE IF EXISTS %s;" %table)
create = create.replace("AUTOINCREMENT", "")
curpg.execute(create)
curpg.executemany("INSERT INTO %s VALUES (%s);" % (table, newholder),rows)
conpg.commit()
if conpg:
conpg.close()
except psycopg2.DatabaseError as e:
print ('Error %s' % e)
sys.exit(1)
finally:
print("Complete")
consq.close()
if __name__ == "__main__":
start_time = time.time()
for table in tabnames:
p = multiprocessing.Process(target = copyTable, args = (table))
p.start()
for table in tabnames:
p.join()
print("All processes finished.")
duration = time.time() - start_time
print(f"Duration {duration} seconds")
You should put the inner of for table in tabnames into a function, say copyTable. Then you're able to use the multiprocessing package to parallelize your code. It should look something like this:
for table in tabnames:
p = multiprocessing.Process(target = copyTable, args = (table))
p.start()
for table in tabnames:
p.join()
print("All processes finished.")
But you can speed up your code even more if you use a COPY (https://www.postgresql.org/docs/current/sql-copy.html) instead of the many INSERT commands.
Instead of the multiprocessing module, you can also use the threading module, which works quite similarly. Then you have threads instead of processes. Because of the interpreter lock I would expect a worse performance with this.

Insert Google Analytics API data to postgresql [python]

I want to store bulk data to postgresql.
The data I got are from google analytics [API]. The data is about pageviews and here is my code:
data = '[["20151201","path","title",345], ["20151202","path","title",321], ["20151203","path","title",214]]'
def storeJson( jsonFile, tableName ):
conn = psycopg2.connect( host=hostname, user=username, password=password, dbname=database )
try:
cur = conn.cursor()
# Here is the problem:
cur.executemany( "INSERT INTO " + tableName + " VALUES(%s)", [jsonFile])
conn.commit()
except psycopg2.DatabaseError as e:
if conn:
conn.rollback()
print("Error %s" %e)
exit()
finally:
if conn:
cur.close()
conn.close()
def main()
storeJson(data, "daily_pageviews")
if __name__ == '__main__':
main()
with the code above, i got error message like this:
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 1 column 12 (char 11)
Can someone enlighten me? Thanks guys!
Finally, here is the case: First, my data isn't json format but it is list of list format. Here is the solution I got from my friend using sqlalchemy:
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import MetaData, Table
engine = create_engine('postgresql://db_username:db_password#ip/dbname')
metadata = MetaData()
metadata.bind = engine
def storeJson( jsonFile, tableName ):
table = Table(tableName, metadata, autoload=True)
#import ipdb; ipdb.set_trace()
def to_dicts(rows):
for row in rows:
data = {}
for i, column in enumerate(table.columns):
data[column.name] = row[i]
yield data
params = list(to_dicts(jsonFile))
engine.execute(table.insert(), params)
return
Assuming value of jsonFile list ordered exactly like the table on db.
Note: You could install sqlalchemy using pip
python -m pip install sqlalchemy --user
As for "how to" get data from google analytics, you could visit it's site: https://developers.google.com/analytics/devguides/reporting/core/v3/quickstart/service-py
jsonFile is a string in your case. You need to load it with json.loads():
import json
data = json.loads(jsonFile)
cur.executemany("INSERT INTO " + tableName + " VALUES(%s, %s, %s, %s)", data)
Note that I have 4 placeholders in the query - each for every item in every sublist.

Categories