I have the following Postgres query where I am fetching data from table1 with rows ~25 million and would like to write the output of the below query into multiple files.
query = """ WITH sequence AS (
SELECT
a,
b,
c
FROM table1 )
select * from sequence;"""
Below is the python script to fetch the complete dataset. How can I modify the script to fetch it to multiple files (eg. each file has 10000 rows)
#IMPORT LIBRARIES ########################
import psycopg2
from pandas import DataFrame
#CREATE DATABASE CONNECTION ########################
connect_str = "dbname='x' user='x' host='x' " "password='x' port = x"
conn = psycopg2.connect(connect_str)
cur = conn.cursor()
conn.autocommit = True
cur.execute(query)
df = DataFrame(cur.fetchall())
Thanks
Here are 3 methods that may help
use psycopg2 named cursor cursor.itersize = 2000
snippet
with conn.cursor(name='fetch_large_result') as cursor:
cursor.itersize = 20000
query = "SELECT * FROM ..."
cursor.execute(query)
for row in cursor:
....
use psycopg2 named cursor fetchmany(size=2000)
snippet
conn = psycopg2.connect(conn_url)
cursor = conn.cursor(name='fetch_large_result')
cursor.execute('SELECT * FROM <large_table>')
while True:
# consume result over a series of iterations
# with each iteration fetching 2000 records
records = cursor.fetchmany(size=2000)
if not records:
break
for r in records:
....
cursor.close() # cleanup
conn.close()
Finally you could define the a SCROLL CURSOR
Define a SCROLL CURSOR
snippet
BEGIN MY_WORK;
-- Set up a cursor:
DECLARE scroll_cursor_bd SCROLL CURSOR FOR SELECT * FROM My_Table;
-- Fetch the first 5 rows in the cursor scroll_cursor_bd:
FETCH FORWARD 5 FROM scroll_cursor_bd;
CLOSE scroll_cursor_bd;
COMMIT MY_WORK;
Please note Not naming the cursor in psycopg2 will cause the cursor to be client side as opposed to server side.
Related
# Create a database or connect to one
conn = sqlite3.connect("tasks_database.db")
# Create the cursor of the database
c = conn.cursor()
# Create Table
c.execute("""CREATE TABLE IF NOT EXISTS tasks (
task_name text
)""")
def query_func():
# Create database or connect to one
connect = sqlite3.connect("tasks_database.db")
# Create Cursor
cursor = connect.cursor()
# Query the database
cursor.execute("SELECT *, oid FROM tasks ")
records = cursor.fetchall()
print(records)
def final_crtb_func():
print("Submit Button Called")
crtb_func()
# Create a database or connect to one
conn = sqlite3.connect("tasks_database.db")
# Create Cursor
c = conn.cursor()
# Insert to Table
c.execute("INSERT INTO tasks VALUES (:task_name)",
{
"task_name": inbox_entry.get()
}
)
# Commit Changes
conn.commit()
# Close Connection
conn.close()
# Create Cursor
c = conn.cursor()
conn.commit()
conn.close()
I called these functions but when I say print records it prints a blank list, although It should have the value of an entry. Any Help?
Note: I made sure that I clicked the button that had the command of the final_crtb_fuc
I am trying to calculate the mode value of each row and store the value in the judge = judge column, however it updates only the first record and leaves the loop
ps: Analisador is my table and resultado_2 is my db
import sqlite3
import statistics
conn = sqlite3.connect("resultado_2.db")
cursor = conn.cursor()
data = cursor.execute("SELECT Bow, FastText, Glove, Wordvec, Python, juiz, id FROM Analisador")
for x in data:
list = [x[0],x[1],x[2],x[3],x[4],x[5],x[6]]
mode = statistics.mode(list)
try:
cursor.execute(f"UPDATE Analisador SET juiz={mode} where id={row[6]}") #row[6] == id
conn.commit()
except:
print("Error")
conn.close()
You have to fetch your records after SQL is executed:
cursor.execute("SELECT Bow, FastText, Glove, Wordvec, Python, juiz, id FROM Analisador")
data = cursor.fetchall()
That type of SQL query is different from UPDATE (that you're using in your code too) which doesn't need additional step after SQL is executed.
I am trying to query MS SQL Server for a table.column, then insert this output into a sqlite table.
This example has one numeric column in the SQL Server source table.
I think I've almost got it by scouring the other answers.
Please let me know what I am missing.
import sqlite3
import pyodbc
#def connect_msss():
ODBC_Prod = ODBC_Prod
SQLSN = SQLSN
SQLpass = SQLpass
conn_str = ('DSN='+ODBC_Prod+';UID='+SQLSN+';PWD='+SQLpass)
conn = pyodbc.connect(conn_str)
#def connect_sqlite():
sl3Conn = sqlite3.connect('server_test.db')
c = sl3Conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS mrn_test (PTMRN NUMERIC)')
#def query_msss():
cur = conn.cursor()
cur.execute("SELECT TOP 50 PTMRN FROM dbo.atl_1234_mrntest")
rows = cur.fetchall()
for row in rows:
c.execute("INSERT INTO mrn_test VALUES (?)", row)
conn.commit()
#connect_msss()
#connect_sqlite()
#query_msss()
Error 1:
c.execute('CREATE TABLE IF NOT EXISTS mrn_test
(PTMRN NUMERIC)')
Out[117]: <sqlite3.Cursor at 0x2d1a742fc70>
Error 2:
cur = conn.cursor() cur.execute("SELECT TOP 50 PTMRN FROM
dbo.atl_1234_mrntest")
Out[118]: <pyodbc.Cursor at 0x2d1a731b990>
You're not committing the executed changes on the sqlite connection, after the c.execute step you're committing the MySQL DB connection. I think you need to replace conn.commit() at the end with sl3Conn.commit().
I am working with a SQL Database on Python. After making the connection, I want to use the output of one query in another query.
Example: query1 gives me a list of all tables in a schema. I want to use each table name from query1 in my query2.
query2 = "SELECT TOP 200 * FROM db.schema.table ORDER BY ID"
I want to use this query for each of the table in the output of query1.
Can someone help me with the Python code for it?
Here is a working example on how to do what you are looking to do. I didn't look up the schemes for the tablelist, but you can simply substitute the SQL code to do so. I just 'faked it' by unioning a statement of 2 tables. There are plenty of other answer on that SQL code and I don't want to clutter this answer:
How do I get list of all tables in a database using TSQL?
It looks like the key part you may have been missing was the join step to build the second SQL statement. This should be enough of a starting point to craft exactly what you are looking for.
import pypyodbc
def main():
table_list = get_table_list()
for table in table_list:
print_table(table)
def print_table(table):
thesql = " ".join(["SELECT TOP 10 businessentityid FROM", table])
connection = get_connection()
cursor = connection.cursor()
cursor.execute(thesql)
for row in cursor:
print (row["businessentityid"])
cursor.close()
connection.close()
def get_table_list():
table_list = []
thesql = ("""
SELECT 'Sales.SalesPerson' AS thetable
UNION
SELECT 'Person.BusinessEntity' thetable
""")
connection = get_connection()
cursor = connection.cursor()
cursor.execute(thesql)
for row in cursor:
table_list.append(row["thetable"])
cursor.close()
connection.close()
return table_list
def get_connection():
'''setup connection depending on which db we are going to write to in which environment'''
connection = pypyodbc.connect(
"Driver={SQL Server};"
"Server=YOURSERVER;"
"Database=AdventureWorks2014;"
"Trusted_Connection=yes"
)
return connection
main ()
Using python and MySQLdb, how can I check if there are any records in a mysql table (innodb)?
Just select a single row. If you get nothing back, it's empty! (Example from the MySQLdb site)
import MySQLdb
db = MySQLdb.connect(passwd="moonpie", db="thangs")
results = db.query("""SELECT * from mytable limit 1""")
if not results:
print "This table is empty!"
Something like
import MySQLdb
db = MySQLdb.connect("host", "user", "password", "dbname")
cursor = db.cursor()
sql = """SELECT count(*) as tot FROM simpletable"""
cursor.execute(sql)
data = cursor.fetchone()
db.close()
print data
will print the number or records in the simpletable table.
You can then test if to see if it is bigger than zero.