I am trying to dynamically generate MySQL insert/update queries given a csv file.
I have a csv file hobbies.csv:
id,name,hobby
"1","rick","coding"
"2","mike","programming"
"3","tim","debugging"
I then have 2 functions: 1 to generate the queries, 1 to update the database:
generate_sql.py
from connect_to_database import read_db_config
from config_parser import read_csv_files
from update_db import insert_records
import csv
def generate_mysql_queries():
csv_file_list, table_list, temp_val, temp_key, temp_table, reader, header, data, data_list = ([] for i in range(9))
val_param = '%s'
query = ''
total_queries = 0
db = read_db_config(filename='config.ini', section='mysql')
csv_file_dict = read_csv_files(filename='config.ini', section='data')
for key, value in csv_file_dict.items():
temp_val = [value]
temp_key = [key]
csv_file_list.append(temp_val)
table_list.append(temp_key)
for index, files in enumerate(csv_file_list):
with open("".join(files), 'r') as f:
reader = csv.reader(f)
header.append(next(reader))
data.append([row for row in reader])
for d in range(len(data[index])):
val_param_subs = ','.join((val_param,) * len(data[index][d]))
total_queries += 1
query = """INSERT INTO """ + str(db['database']) + """.""" + """""".join('{0}'.format(t) for t in table_list[index]) + \
"""(""" + """, """.join('{0}'.format(h) for h in header[index]) + """) VALUES (%s)""" % val_param_subs + \
""" ON DUPLICATE KEY UPDATE """ + """=%s, """.join(header[index]) + """=%s"""
data_list.append(data[index][d])
insert_records(query, data_list)
I then pass the query and data to insert_records() in update_db.py:
from mysql.connector import MySQLConnection, Error
from connect_to_database import read_db_config
def insert_records(query, data):
query_string = query
data_tuple = tuple(data)
try:
db_config = read_db_config(filename='config.ini', section='mysql')
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
cursor.executemany(query, data_tuple)
print("\tExecuted!")
conn.commit()
except Error as e:
print('\n\tError:', e)
print("\n\tNot Executed!")
finally:
cursor.close()
conn.close()
The data passed into cursor.executemany(query, data_string) looks like the following (query is a string and data_tuple is a tuple):
query: INSERT INTO test.hobbies(id, name, hobby) VALUES (%s,%s,%s) ON DUPLICATE KEY UPDATE id=%s, name=%s, hobby=%s
data_tuple: (['1', 'rick', 'coding'], ['2', 'mike', 'programming'], ['3', 'tim', 'debugging'])
Given these two parameters, I get the following error:
Error: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '%s, name=%s, hobby=%s' at line 1
I've tried passing in the same string non-dynamically by just sending the full string without the '%s' parameters and it works fine. What am I missing? Any help is much appreciated.
Probably is the use of the triple double quotes in python. When you use this
query = """INSERT INTO """ + str(db['database']) + """.""" + """""".join('{0}'.format(t) for t in table_list[index]) + \
"""(""" + """, """.join('{0}'.format(h) for h in header[index]) + """) VALUES (%s)""" % val_param_subs + \
""" ON DUPLICATE KEY UPDATE """ + """=%s, """.join(header[index]) + """=%s"""
You're saying to python that everything is a string including %s.
Related
I'm using Python 2 and have the following code:
with conn.cursor() as cursor:
info("Updating {} records".format(len(records_to_update)))
for record in records_to_update:
query = "UPDATE my_table SET "
params_setters = []
# Process all fields except wsid when updating
for index, header in enumerate(DB_COLUMNS_IN_ORDER[1:]):
if record[index] is not None:
params_setters.append("{} = '{}' ".format(header, record[index]))
query += " , ".join(params_setters)
query += " WHERE id = '{}'".format(record[0])
cursor.execute(query)
How can I use query params for escaping here and not have to do it manually in places like:
params_setters.append("{} = '{}' ".format(header, record[index]))
If I understand your question, you want to use a prepared statement. If you are using a driver where %s is used to represent a query parameter (SQLite uses ?), then:
with conn.cursor() as cursor:
info("Updating {} records".format(len(records_to_update)))
params = []
for record in records_to_update:
query = "UPDATE my_table SET "
params_setters = []
# Process all fields except wsid when updating
for index, header in enumerate(DB_COLUMNS_IN_ORDER[1:]):
if record[index] is not None:
params_setters.append("{} = %s ".format(header))
params.append(record[index])
query += " , ".join(params_setters)
query += " WHERE id = %s"
params.append(record[0])
cursor.execute(query, params)
I am pretty new in python developing. I have a long python script what "clone" a database and add additional stored functions and procedures. Clone means copy only the schema of DB.These steps work fine.
My question is about pymysql insert exection:
I have to copy some table contents into the new DB. I don't get any sql error. If I debug or print the created INSERT INTO command is correct (I've tested it in an sql editor/handler). The insert execution is correct becuse the result contain the exact row number...but all rows are missing from destination table in dest.DB...
(Ofcourse DB_* variables have been definied!)
import pymysql
liveDbConn = pymysql.connect(DB_HOST, DB_USER, DB_PWD, LIVE_DB_NAME)
testDbConn = pymysql.connect(DB_HOST, DB_USER, DB_PWD, TEST_DB_NAME)
tablesForCopy = ['role', 'permission']
for table in tablesForCopy:
with liveDbConn.cursor() as liveCursor:
# Get name of columns
liveCursor.execute("DESCRIBE `%s`;" % (table))
columns = '';
for column in liveCursor.fetchall():
columns += '`' + column[0] + '`,'
columns = columns.strip(',')
# Get and convert values
values = ''
liveCursor.execute("SELECT * FROM `%s`;" % (table))
for result in liveCursor.fetchall():
data = []
for item in result:
if type(item)==type(None):
data.append('NULL')
elif type(item)==type('str'):
data.append("'"+item+"'")
elif type(item)==type(datetime.datetime.now()):
data.append("'"+str(item)+"'")
else: # for numeric values
data.append(str(item))
v = '(' + ', '.join(data) + ')'
values += v + ', '
values = values.strip(', ')
print("### table: %s" % (table))
testDbCursor = testDbConn.cursor()
testDbCursor.execute("INSERT INTO `" + TEST_DB_NAME + "`.`" + table + "` (" + columns + ") VALUES " + values + ";")
print("Result: {}".format(testDbCursor._result.message))
liveDbConn.close()
testDbConn.close()
Result is:
### table: role
Result: b"'Records: 16 Duplicates: 0 Warnings: 0"
### table: permission
Result: b'(Records: 222 Duplicates: 0 Warnings: 0'
What am I doing wrong? Thanks!
You have 2 main issues here:
You don't use conn.commit() (which would be either be liveDbConn.commit() or testDbConn.commit() here). Changes to the database will not be reflected without committing those changes. Note that all changes need committing but SELECT, for example, does not.
Your query is open to SQL Injection. This is a serious problem.
Table names cannot be parameterized, so there's not much we can do about that, but you'll want to parameterize your values. I've made multiple corrections to the code in relation to type checking as well as parameterization.
for table in tablesForCopy:
with liveDbConn.cursor() as liveCursor:
liveCursor.execute("SELECT * FROM `%s`;" % (table))
name_of_columns = [item[0] for item in liveCursor.description]
insert_list = []
for result in liveCursor.fetchall():
data = []
for item in result:
if item is None: # test identity against the None singleton
data.append('NULL')
elif isinstance(item, str): # Use isinstance to check type
data.append(item)
elif isinstance(item, datetime.datetime):
data.append(item.strftime('%Y-%m-%d %H:%M:%S'))
else: # for numeric values
data.append(str(item))
insert_list.append(data)
testDbCursor = testDbConn.cursor()
placeholders = ', '.join(['`%s`' for item in insert_list[0]])
testDbCursor.executemany("INSERT INTO `{}.{}` ({}) VALUES ({})".format(
TEST_DB_NAME,
table,
name_of_columns,
placeholders),
insert_list)
testDbConn.commit()
From this github thread, I notice that executemany does not work as expected in psycopg2; it instead sends each entry as a single query. You'll need to use execute_batch:
from psycopg2.extras import execute_batch
execute_batch(testDbCursor,
"INSERT INTO `{}.{}` ({}) VALUES ({})".format(TEST_DB_NAME,
table,
name_of_columns,
placeholders),
insert_list)
testDbConn.commit()
How to insert data into table using python pymsql
Find my solution below
import pymysql
import datetime
# Create a connection object
dbServerName = "127.0.0.1"
port = 8889
dbUser = "root"
dbPassword = ""
dbName = "blog_flask"
# charSet = "utf8mb4"
conn = pymysql.connect(host=dbServerName, user=dbUser, password=dbPassword,db=dbName, port= port)
try:
# Create a cursor object
cursor = conn.cursor()
# Insert rows into the MySQL Table
now = datetime.datetime.utcnow()
my_datetime = now.strftime('%Y-%m-%d %H:%M:%S')
cursor.execute('INSERT INTO posts (post_id, post_title, post_content, \
filename,post_time) VALUES (%s,%s,%s,%s,%s)',(5,'title2','description2','filename2',my_datetime))
conn.commit()
except Exception as e:
print("Exeception occured:{}".format(e))
finally:
conn.close()
I am moving data from MySQL to MSSQL - however I have a problem with insert into statement when I have ' in value.
for export i have used code below:
import pymssql
import mysql.connector
conn = pymssql.connect(host='XXX', user='XXX',
password='XXX', database='XXX')
sqlcursor = conn.cursor()
cnx = mysql.connector.connect(user='root',password='XXX',
database='XXX')
cursor = cnx.cursor()
sql= "SELECT Max(ID) FROM XXX;"
cursor.execute(sql)
row=cursor.fetchall()
maxID = str(row)
maxID = maxID.replace("[(", "")
maxID = maxID.replace(",)]", "")
AMAX = int(maxID)
LC = 1
while LC <= AMAX:
LCC = str(LC)
sql= "SELECT * FROM XX where ID ='"+ LCC +"'"
cursor.execute(sql)
result = cursor.fetchall()
data = str(result)
data = data.replace("[(","")
data = data.replace(")]","")
data = data.replace("None","NULL")
#print(row)
si = "insert into [XXX].[dbo].[XXX] select " + data
#print(si)
#sys.exit("stop")
try:
sqlcursor.execute(si)
conn.commit()
except Exception:
print("-----------------------")
print(si)
LC = LC + 1
print('Import done | total count:', LC)
It is working fine until I have ' in one of my values:
'N', '0000000000', **"test string'S nice company"**
I would like to avoid spiting the data into columns and then checking if there is ' in the data - as my table has about 500 fields.
Is there a smart way of replacing ' with ''?
Answer:
Added SET QUOTED_IDENTIFIER OFF to insert statement:
si = "SET QUOTED_IDENTIFIER OFF insert into [TechAdv].[dbo].[aem_data_copy]
select " + data
In MSSQL, you can SET QUOTED_IDENTIFIER OFF, then you can use double quotes to escape a singe quote, or use two single quotes to escape one quote.
I want to store bulk data to postgresql.
The data I got are from google analytics [API]. The data is about pageviews and here is my code:
data = '[["20151201","path","title",345], ["20151202","path","title",321], ["20151203","path","title",214]]'
def storeJson( jsonFile, tableName ):
conn = psycopg2.connect( host=hostname, user=username, password=password, dbname=database )
try:
cur = conn.cursor()
# Here is the problem:
cur.executemany( "INSERT INTO " + tableName + " VALUES(%s)", [jsonFile])
conn.commit()
except psycopg2.DatabaseError as e:
if conn:
conn.rollback()
print("Error %s" %e)
exit()
finally:
if conn:
cur.close()
conn.close()
def main()
storeJson(data, "daily_pageviews")
if __name__ == '__main__':
main()
with the code above, i got error message like this:
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 1 column 12 (char 11)
Can someone enlighten me? Thanks guys!
Finally, here is the case: First, my data isn't json format but it is list of list format. Here is the solution I got from my friend using sqlalchemy:
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import MetaData, Table
engine = create_engine('postgresql://db_username:db_password#ip/dbname')
metadata = MetaData()
metadata.bind = engine
def storeJson( jsonFile, tableName ):
table = Table(tableName, metadata, autoload=True)
#import ipdb; ipdb.set_trace()
def to_dicts(rows):
for row in rows:
data = {}
for i, column in enumerate(table.columns):
data[column.name] = row[i]
yield data
params = list(to_dicts(jsonFile))
engine.execute(table.insert(), params)
return
Assuming value of jsonFile list ordered exactly like the table on db.
Note: You could install sqlalchemy using pip
python -m pip install sqlalchemy --user
As for "how to" get data from google analytics, you could visit it's site: https://developers.google.com/analytics/devguides/reporting/core/v3/quickstart/service-py
jsonFile is a string in your case. You need to load it with json.loads():
import json
data = json.loads(jsonFile)
cur.executemany("INSERT INTO " + tableName + " VALUES(%s, %s, %s, %s)", data)
Note that I have 4 placeholders in the query - each for every item in every sublist.
I am trying to write a csv file into a table in SQL Server database using python. I am facing errors when I pass the parameters , but I don't face any error when I do it manually. Here is the code I am executing.
cur=cnxn.cursor() # Get the cursor
csv_data = csv.reader(file(Samplefile.csv')) # Read the csv
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",rows)
cnxn.commit()
Error:
pyodbc.DataError: ('22001', '[22001] [Microsoft][ODBC SQL Server Driver][SQL Server]String or binary data would be truncated. (8152) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)')
However when I insert the values manually. It works fine
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",'A','B','C','D')
I have ensured that the TABLE is there in the database, data types are consistent with the data I am passing. Connection and cursor are also correct. The data type of rows is "list"
Consider building the query dynamically to ensure the number of placeholders matches your table and CSV file format. Then it's just a matter of ensuring your table and CSV file are correct, instead of checking that you typed enough ? placeholders in your code.
The following example assumes
CSV file contains column names in the first line
Connection is already built
File name is test.csv
Table name is MyTable
Python 3
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
columns = next(reader)
query = 'insert into MyTable({0}) values ({1})'
query = query.format(','.join(columns), ','.join('?' * len(columns)))
cursor = connection.cursor()
for data in reader:
cursor.execute(query, data)
cursor.commit()
If column names are not included in the file:
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
data = next(reader)
query = 'insert into MyTable values ({0})'
query = query.format(','.join('?' * len(data)))
cursor = connection.cursor()
cursor.execute(query, data)
for data in reader:
cursor.execute(query, data)
cursor.commit()
I modified the code written above by Brian as follows since the one posted above wouldn't work on the delimited files that I was trying to upload. The line row.pop() can also be ignored as it was necessary only for the set of files that I was trying to upload.
import csv
def upload_table(path, filename, delim, cursor):
"""
Function to upload flat file to sqlserver
"""
tbl = filename.split('.')[0]
cnt = 0
with open (path + filename, 'r') as f:
reader = csv.reader(f, delimiter=delim)
for row in reader:
row.pop() # can be commented out
row = ['NULL' if val == '' else val for val in row]
row = [x.replace("'", "''") for x in row]
out = "'" + "', '".join(str(item) for item in row) + "'"
out = out.replace("'NULL'", 'NULL')
query = "INSERT INTO " + tbl + " VALUES (" + out + ")"
cursor.execute(query)
cnt = cnt + 1
if cnt % 10000 == 0:
cursor.commit()
cursor.commit()
print("Uploaded " + str(cnt) + " rows into table " + tbl + ".")
You can pass the columns as arguments. For example:
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)", *rows)
If you are using MySqlHook in airflow , if cursor.execute() with params throw san error
TypeError: not all arguments converted during string formatting
use %s instead of ?
with open('/usr/local/airflow/files/ifsc_details.csv','r') as csv_file:
csv_reader = csv.reader(csv_file)
columns = next(csv_reader)
query = '''insert into ifsc_details({0}) values({1});'''
query = query.format(','.join(columns), ','.join(['%s'] * len(columns)))
mysql = MySqlHook(mysql_conn_id='local_mysql')
conn = mysql.get_conn()
cursor = conn.cursor()
for data in csv_reader:
cursor.execute(query, data)
cursor.commit()
I got it sorted out. The error was due to the size restriction restriction of table. It changed the column capacity like from col1 varchar(10) to col1 varchar(35) etc. Now it's working fine.
Here is the script and hope this works for you:
import pandas as pd
import pyodbc as pc
connection_string = "Driver=SQL Server;Server=localhost;Database={0};Trusted_Connection=Yes;"
cnxn = pc.connect(connection_string.format("DataBaseNameHere"), autocommit=True)
cur=cnxn.cursor()
df= pd.read_csv("your_filepath_and_filename_here.csv").fillna('')
query = 'insert into TableName({0}) values ({1})'
query = query.format(','.join(df.columns), ','.join('?' * len(df1.columns)))
cur.fast_executemany = True
cur.executemany(query, df.values.tolist())
cnxn.close()
You can also import data into SQL by using either:
The SQL Server Import and Export Wizard
SQL Server Integration Services (SSIS)
The OPENROWSET function
More details can be found on this webpage:
https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-data-from-excel-to-sql?view=sql-server-2017