How to properly insert data into a postgres table? - python

I'm using the following to try and insert a record into a postgresql database table, but it's not working. I don't get any errors, but there are no records in the table. I do commit at the end of my code as well. What could it possibly be? It's frustrating since I am not getting any syntax error in my code.
Excuse me for my lack of knowledge, As I am fairly new to Python.
Here is the code:
connection = psycopg2.connect(host=config.DB_HOST, database=config.DB_NAME, user=config.DB_USER, password=config.DB_PASS, port=config.DB_PORT)
cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
cursor.execute("select * from stock where is_etf = TRUE")
etfs = cursor.fetchall()
dates = ['2022-08-23', '2022-08-24']
for current_date in dates:
for etf in etfs:
print(etf['symbol'])
with open(f"new/{current_date}/{etf['symbol']}.csv") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
if len(row) == 8:
ticker = row[3]
if ticker:
shares = row[5]
weight = row[7]
cursor.execute("""
SELECT * FROM stock WHERE symbol = %s
""", (ticker,))
stock = cursor.fetchone()
if stock:
cursor.execute("""
INSERT INTO etf_holding (etf_id, holding_id, dt, shares, weight)
VALUES (%s, %s, %s, %s, %s)
""", (etf['id'], stock['id'], current_date, shares, weight))
connection.commit()
as a result I get this:
This is the unpopulated table:

Related

Python DataFrame to MYSQL: TypeError: not enough arguments for format string

Been playing with this for 14 hours (I am a beginner)
Data is pulled from one database table to search on yahoo for all the data on that ticker and then its "meant" to upload it.
I orginally had it as panda df but got "ambiguous error" so I have now put it as [] again. New error. I rack my brains :( However, it does work if I leave it blank.
from __future__ import print_function
import yfinance as yf
import pandas as pd
import datetime
import warnings
import MySQLdb as mdb
import requests
import numpy as np
import MySQLdb as mdb
import requests
# Obtain a database connection to the MySQL instance
con = mdb.connect("localhost","sec_user","","securities_master")
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
print(data)
return [(d[0], d[1]) for d in data]
def get_daily_historic_data_yahoo(ticker):
blow = yf.download(ticker)
data = []
data.append(yf.download(ticker).reset_index())
return data
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
df.insert(0, 'data_vendor_id', data_vendor_id)
df.insert(1, 'symbol_id', symbol_id)
df.insert(3, 'created_date', now)
df.insert(4, 'last_updated_date', now)
daily_data = []
daily_data.append(df)
#df = daily_data
# Amend the data to include the vendor ID and symbol ID
# Connect to the MySQL instance
db_host = 'localhost'
db_user = ''
db_pass = ''
db_name = 'securities_master'
con = mdb.connect("localhost", "sec_user", "", "securities_master"
# host=db_host, user=db_user, passwd=db_pass, db=db_name
)
try:
mdb.connect
# If connection is not successful
except:
print("Can't connect to database")
return 0
# If Connection Is Successful
print("Connected")
final_str = """INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i+1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
insert_daily_data_into_db('1', t[0], yf_data)
print("Successfully added Yahoo Finance pricing data to DB.")
Errors
Traceback (most recent call last):
File "/home/quant/price_retrieval.py", line 106, in <module>
insert_daily_data_into_db('1', t[0], yf_data)
File "/home/quant/price_retrieval.py", line 88, in insert_daily_data_into_db
cur.executemany(final_str, daily_data)
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 230, in executemany
return self._do_execute_many(
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 255, in _do_execute_many
v = values % escape(next(args), conn)
TypeError: not enough arguments for format string
I'm no data scientist so there's probably a more elegant way to fix it directly with pandas. But the way I usually work with MySQL (and really any SQL drivers) is to give it lists of python tuples.
If you parse each row of the pandas data frame with for row in df.itertuples(): and craft each tuple carefully - making sure the types match the SQL table, all should work ;)
Example:
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
daily_data = []
created_date = now
last_updated_date = now
for row in df.itertuples():
_index = row[0] # discard
date = row[1]
open = row[2]
high = row[3]
low = row[4]
close = row[5]
adj_close_price = row[6]
volume = row[7]
daily_data.append((int(data_vendor_id), symbol_id, date, created_date, last_updated_date, open, high, low, close, volume, adj_close_price))
# Connect to the MySQL instance
con = mdb.connect(host="localhost", user="user", password="yourpassword",
db="yourdbname", port=3306)
final_str = """
INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
I've tried not to tamper with your existing code too much. Just enough to make it work.
I think what was happening there for you was that you're technically passing it a list of pandas dataframes with only a single pandas dataframe in the list. Instead what you want is a list of tuples with 11 fields to unpack per tuple.
Maybe you mean to pass the dataframe directly i.e. not contained inside of a list but I still don't think that would be right because 1) there's an "Index" column in the dataframe which would give erroneous results 2) you'd need to call some methods on the dataframe to retrieve only the values (not the headers to the columns) and transform it to the correct list of tuples. It's probably very doable but I will leave that to you to find out.
I am also assuming your table schema is something like this:
CREATE TABLE IF NOT EXISTS daily_price (
data_vendor_id INT,
symbol_id INT,
price_date DATETIME,
created_date DATETIME,
last_updated_date TIMESTAMP,
open_price VARCHAR(256),
high_price VARCHAR(256),
low_price VARCHAR(256),
close_price VARCHAR(256),
volume INT,
adj_close_price VARCHAR(256)
);

cursor.fetchall() brings only one row in Python with PostgreSQL

I am trying to create a training app in python to work with a database of movies, adding movie details via a text menu prompting user input for all fields (movie name, actors, company, etc.). I am using PostgreSQL as the database and import psycopg2 in Python.
From user input, I am collecting data which I then want to store in my database tables 'movies' and 'actors'. For one movie, there are several actors. I have this code:
def insert_movie(name, actors, company, year):
connection = psycopg2.connect(user='postgres', password='postgres', database='movie')
cursor = connection.cursor()
query1 = "INSERT INTO movies (name, company, year) VALUES (%s, %s, %s);"
cursor.execute(query1, (name, company, year))
movie_id = cursor.fetchone[0]
print(movie_id)
query2 = 'INSERT INTO actors (last_name, first_name, actor_ordinal) VALUES (%s, %s, %s);'
for actor in actors:
cursor.execute(query2, (tuple(actor)))
rows = cursor.fetchall()
actor_id1 = [row[0] for row in rows]
actor_id2 = [row[1] for row in rows]
print(actor_id1)
print(actor_id2)
connection.commit()
connection.close()
This works great for printing movie_id after query1. However for printing actor_id2, I get IndexError: list index out of range.
If I leave only actor_id1 in query3 like this:
query2 = 'INSERT INTO actors (last_name, first_name, actor_ordinal) VALUES (%s, %s, %s);'
for actor in actors:
cursor.execute(query2, (tuple(actor)))
rows = cursor.fetchall()
actor_id1 = [row[0] for row in rows]
print(actor_id1)
, I get printed the following result:
movie_id --> 112
actor2_id --> 155
The problem that I cannot retrieve actor1_id with this code, which is 154.
Can anyone help with using fetchall correctly here?
OK, I have found out the answer. The fetch should be used inside the loop as we should execute fetch for every row and not after the whole query for all rows altogether:
query2 = 'INSERT INTO actors (last_name, first_name, actor_ordinal) VALUES (%s, %s, %s);'
actor_ids = []
for actor in actors:
cursor.execute(query2, (tuple(actor)))
actor_id = cursor.fetchone()[0]
actor_ids.append(actor_id)
print(actor_ids)

How to insert a CSV file data into MYSQL using Python efficiently?

I have a CSV input file with aprox. 4 million records.
The insert is running since +2hours and still has not finished.
The Database is still empty.
Any suggestions on how to to actually insert the values (using insert into) and faster, like breaking the insert in chunks?
I'm pretty new to python.
csv file example
43293,cancelled,1,0.0,
1049007,cancelled,1,0.0,
438255,live,1,0.0,classA
1007255,xpto,1,0.0,
python script
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in csv_data:
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
cur.execute(insert_str, row)
conn.commit()
finally:
conn.close()
UPDATE:
Thanks for all the inputs.
As suggested, I tried a counter to insert in batches of 100 and a smaller csv data set (1000 lines).
The problem now is only 100 records are inserted, although the counter passes 10 x 100 several times.
code change:
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
count = 0
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in csv_data:
count += 1
print(count)
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
if count >= 100:
cur.execute(insert_str, row)
print("count100")
conn.commit()
count = 0
if not row:
cur.execute(insert_str, row)
conn.commit()
finally:
conn.close()
There are many ways to optimise this insert. Here are some ideas:
You have a for loop over the entire dataset. You can do a commit() every 100 or so
You can insert many rows into one insert
you can combine the two and make a multi-row insert every 100 rows on your CSV
If python is not a requirement for you can do it directly using MySQL as it's explained here. (If you must do it using python, you can still prepare that statement in python and avoid looping through the file manually).
Examples:
for number 2 in the list, the code will have the following structure:
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
to_insert = []
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES "
template = '(%s, %s, %s, %s, %s)'
count = 0
for row in csv_data:
count += 1
to_insert.append(tuple(row))
if count % 100 == 0:
query = insert_str + '\n'.join([template % r for r in to_insert])
cur.execute(query)
to_insert = []
conn.commit()
query = insert_str + '\n'.join(template % to_insert)
cur.execute(query)
conn.commit()
finally:
conn.close()
Here. Try this snippet and let me know if it worked using executemany().
with open(xing_csv_input, newline='') as csvfile:
csv_data = tuple(csv.reader(csvfile, delimiter=',', quotechar='"'))
csv_data = (row for row in csv_data)
query = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
try:
cur.executemany(query, csv_data)
conn.commit()
except:
conn.rollback()

How to insert rows of information into different rows in a mysql database?

I don't know how to insert a list of info into a mysql database.
I'm trying to insert rows data into a database but it is simply inserting the last row three times. The list is named "t" and it is a tuple .
Data:
11/04/19,17:33,33.4,55
11/04/19,17:34,22.9,57
11/04/19,17:35,11.9,81
Code:
import mysql.connector
sql = mysql.connector.connect(
host=' ',
user=' ',
password=' ',
db=" "
)
cursor = sql.cursor()
f = open("C:\Cumulus\data\Apr19log.txt","r")
while True:
s = f.readline()
list=[]
if (s != ""):
t=s.split(',')
for item in t:
list.append(item)
else:
break;
sqllist = """INSERT INTO station_fenelon (variable, date,
time,outside_temp, outside_humidity)
VALUES (%s, %s, %s, %s, %s)"""
record =[(1, t[0], t[1], t[2],t[3]),
(2, t[0], t[1], t[2],t[3]),
(3, t[0], t[1], t[2],t[3])]
cursor.executemany(sqllist, record)
sql.commit()
I want to create three rows in the database with this list of information but is is only showing the last row of information in the database.
Try this.
import mysql.connector
sql = mysql.connector.connect(host='',user='',password='',db='')
cursor = sql.cursor()
f = open("C:\Cumulus\data\Apr19log.txt","r")
st=[i.strip().split(',') for i in f.readlines()]
sqllist = """INSERT INTO station_fenelon (variable, date, time, outside_temp, outside_humidity) VALUES (%s, %s, %s, %s, %s)"""
record = [(i+1, j[0], j[1], j[2], j[3]) for i, j in enumerate(st)]
cursor.executemany(sqllist, record)
sql.commit()

MySQL not accepting executemany() INSERT, running Python from Excel (datanitro)

I HAVE ADDED MY OWN ANSWER THAT WORKS BUT OPEN TO IMPROVEMENTS
After seeing a project at datanitro. I took on getting a connection to MySQL (they use SQLite) and I was able to import a small test table into Excel from MySQL.
Inserting new updated data from the Excel sheet was this next task and so far I can get one row to work like so...
import MySQLdb
db = MySQLdb.connect("xxx","xxx","xxx","xxx")
c = db.cursor()
c.execute("""INSERT INTO users (id, username, password, userid, fname, lname)
VALUES (%s, %s, %s, %s, %s, %s);""",
(Cell(5,1).value,Cell(5,2).value,Cell(5,3).value,Cell(5,4).value,Cell(5,5).value,Cell(5,6).value,))
db.commit()
db.close()
...but attempts at multiple rows will fail. I suspect either issues while traversing rows in Excel. Here is what I have so far...
import MySQLdb
db = MySQLdb.connect(host="xxx.com", user="xxx", passwd="xxx", db="xxx")
c = db.cursor()
c.execute("select * from users")
usersss = c.fetchall()
updates = []
row = 2 # starting row
while True:
data = tuple(CellRange((row,1),(row,6)).value)
if data[0]:
if data not in usersss: # new record
updates.append(data)
row += 1
else: # end of table
break
c.executemany("""INSERT INTO users (id, username, password, userid, fname, lname) VALUES (%s, %s, %s, %s, %s, %s)""", updates)
db.commit()
db.close()
...as of now, I don't get any errors, but my new line is not added (id 3). This is what my table looks like in Excel...
The database holds the same structure, minus id 3. There has to be a simpler way to traverse the rows and pull the unique content for INSERT, but after 6 hours trying different things (and 2 new Python books) I am going to ask for help.
If I run either...
print '[%s]' % ', '.join(map(str, updates))
or
print updates
my result is
[]
So this is likely not passing any data to MySQL in the first place.
LATEST UPDATE AND WORKING SCRIPT
Not exactly what I want, but this has worked for me...
c = db.cursor()
row = 2
while Cell(row,1).value != None:
c.execute("""INSERT IGNORE INTO users (id, username, password, userid, fname, lname)
VALUES (%s, %s, %s, %s, %s, %s);""",
(CellRange((row,1),(row,6)).value))
row = row + 1
Here is your problem:
while True:
if data[0]:
...
else:
break
Your first id is 0, so in the first iteration of the loop data[0] will be falsely and your loop will exit, without ever adding any data. What you probably ment is:
while True:
if data[0] is not None:
...
else:
break
I ended up finding a solution that gets me an Insert on new and allows for UPDATE of those that are changed. Not exactly a Python selection based on a single query, but will do.
import MySQLdb
db = MySQLdb.connect("xxx","xxx","xxx","xxx")
c = db.cursor()
row = 2
while Cell(row,1).value is not None:
c.execute("INSERT INTO users (id, username, password, \
userid, fname, lname) \
VALUES (%s, %s, %s, %s, %s, %s) \
ON DUPLICATE KEY UPDATE \
id=VALUES(id), username=VALUES(username), password=VALUES(password), \
userid=VALUES(userid), fname=VALUES(fname), lname=VALUES(lname);",
(CellRange((row,1),(row,6)).value))
row = row + 1
db.commit()
db.close()

Categories