How to insert into a table with character['] in the field (pymssql) - python

I'm trying to populate my db from a csv file using python.
Below is the code I use to populate my sales table:
import csv
import pymssql as psql
conn = psql.connect('localhost:8888', 'SA', 'superSecret','videogame')
cursor = conn.cursor()
cursor.execute("""
IF OBJECT_ID('sales', 'U') IS NOT NULL
DROP TABLE sales
CREATE TABLE sales
(
Id int,
Name varchar(250),
Platform varchar(250),
Year int,
Genre varchar(250),
Publisher varchar(250),
NA_Sales float,
EU_Sales float,
JP_Sales float,
Other_Sales float,
Global_Sales float
)
""")
conn.commit()
with open ('./sales.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
row[1] = row[1].replace("'", "")
row[5] = row[5].replace("'", "")
data = tuple(row)
query = 'insert into sales values {0}'.format(data).replace("N/A","0")
print(query)
cursor.execute(query)
conn.commit()
conn.close()
However, some of my data contains the character:(') (e.g. Assassin's creed)in their name column. This caused an error, as below:
insert into sales values ('129', "Assassin's Creed III", 'PS3', '2012', 'Action', 'Ubisoft', '2.64', '2.56', '0.16', '1.14', '6.5')
Traceback (most recent call last):
File "pymssql.pyx", line 447, in pymssql.Cursor.execute (pymssql.c:7119)
File "_mssql.pyx", line 1011, in _mssql.MSSQLConnection.execute_query (_mssql.c:11586)
File "_mssql.pyx", line 1042, in _mssql.MSSQLConnection.execute_query (_mssql.c:11466)
File "_mssql.pyx", line 1175, in _mssql.MSSQLConnection.format_and_run_query (_mssql.c:12746)
File "_mssql.pyx", line 1586, in _mssql.check_cancel_and_raise (_mssql.c:16880)
File "_mssql.pyx", line 1630, in _mssql.maybe_raise_MSSQLDatabaseException (_mssql.c:17524)
_mssql.MSSQLDatabaseException: (207, b"Invalid column name 'Assassin's Creed III'.DB-Lib error message 20018, severity 16:\nGeneral SQL Server error: Check messages from the SQL Server\n")
Is there any workaround for this other than manually update the row (e.g. row[1] = row[1].replace("'","")?
Thanks!!

You could use a proper parameterized query, like this:
row = ["Assassin's", "N/A", 9] # test data as list (e.g., from CSV)
data = tuple("0" if x=="N/A" else x for x in row)
print(data) # ("Assassin's", '0', 9)
placeholders = ','.join(['%s' for i in range(len(data))])
query = 'INSERT INTO sales VALUES ({0})'.format(placeholders)
print(query) # INSERT INTO sales VALUES (%s,%s,%s)
cursor.execute(query, data)

You can replace the ' with a \', which should stop it crashing whilst preserving the apostrophe in your data:
with open ('./sales.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
row[1] = row[1].replace("'", "\'")
row[5] = row[5].replace("'", "\'")
data = tuple(row)
query = 'insert into sales values {0}'.format(data).replace("N/A","0")
print(query)
cursor.execute(query)

Related

Inserting csv into MySQL database with python library mysql.connector

I have trouble with insert of csv data into MySQL tabel with mysql.connector .
The code I use looks like this :
import mysql.connector
import csv
andreport = 'testowa.csv'
cnx = mysql.connector.connect(
user='xxxxx',
password='xxxxx',
host='xxxxxx',
database='xxxxx')
cursor = cnx.cursor()
with open(andreport, 'r') as csv_data:
for row in csv_data:
cursor.execute(
"INSERT INTO flex(date, Store, Vendor, Shelf)"
"VALUES({},{},{},{})", row)
cnx.commit()
cursor.close()
cnx.close()
print("Done")
The error I get :
C:\Users\Iw4n\PycharmProjects\Learning\venv\Scripts\python.exe C:/Users/Iw4n/PycharmProjects/Learning/Orange_android_MySQL_insertion.py
Traceback (most recent call last):
File "C:/Users/Iw4n/PycharmProjects/Learning/Orange_android_MySQL_insertion.py", line 15, in <module>
cursor.execute(
File "C:\Users\Iw4n\PycharmProjects\Learning\venv\lib\site-packages\mysql\connector\cursor.py", line 551, in execute
self._handle_result(self._connection.cmd_query(stmt))
File "C:\Users\Iw4n\PycharmProjects\Learning\venv\lib\site-packages\mysql\connector\connection.py", line 490, in cmd_query
result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
File "C:\Users\Iw4n\PycharmProjects\Learning\venv\lib\site-packages\mysql\connector\connection.py", line 395, in _handle_result
raise errors.get_exception(packet)
mysql.connector.errors.ProgrammingError: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '},{},{},{})' at line 1
When i wrapped {} into '' , as many rows as were in csv been inserted into datbase as {},{}
same story goes for %s if I use it , i got the same error as above, when it's wrapped in '' , %s is insetred into database.
I also found information to add f in fron of "INSERT~ but it did not help.
Can anyone give me some suggestion on how to overcome this and correctly insert data to MySQL ?
Final code that is working as intended :
import mysql.connector
import csv
andreport = 'testowa.csv'
cnx = mysql.connector.connect(
user='xxxxx',
password='xxxxx',
host='xxxxx',
database='xxxxx')
cursor = cnx.cursor()
with open(andreport, mode='r') as csv_data:
reader = csv.reader(csv_data, delimiter=';')
csv_data_list = list(reader)
for row in csv_data_list:
cursor.execute("""
INSERT INTO flex(
date, Agency, MediaSource, Campaign)
VALUES(%s,%s,%s,%s)""",
(row[0], row[1], row[2], row[3]))
cnx.commit()
cursor.close()
cnx.close()
print("Done")
I'm guessing that seems the problem is that you passed one argument (row) instead of four. So try this:
cursor.execute("""
INSERT INTO flex(date, Store, Vendor, Shelf)
VALUES(%s,%s,%s,%s)""",(row[0], row[1], row[2], row[3], ))
Looking at the documentation for MySQLCursor.excute() method, it seems like adding some %s as the parameters in your insert statement might fix this?
import mysql.connector
import csv
andreport = 'testowa.csv'
cnx = mysql.connector.connect(
user='xxxxx',
password='xxxxx',
host='xxxxxx',
database='xxxxx')
cursor = cnx.cursor()
insert_statement = (
"INSERT INTO flex(date, Store, Vendor, Shelf)"
"VALUES (%s, %s, %s, %s)"
)
with open(andreport, mode='r') as csv_data:
reader = csv.reader(csv_data, delimiter=';')
csv_data_list = list(reader)
for row in csv_data_list:
cursor.execute(insert_statement, row)
cnx.commit()
cursor.close()
cnx.close()
print("Done")
Let me know if this gets you anywhere, or if you see a new error!
Edit: updated CSV reading to convert to a list.

mariaDB: column count doesn't match value count at row 1

I don't get what's the problem here. I want to build a web scraper that scrapes amazon and takes the price and the name into a database. But for some reason, it tells me that the columns and values are not matching. I do have one additional column in my database called "timestamp" where I automatically put in the time, but that is handled by the database. I am using MariaDB. A friend said I can use the MySQL API for MariaDB as well.
P.S. preis = price, coming from Germany, switching between English and German sometimes, just in case anyone is wondering.
import requests, time, csv, pymysql
from bs4 import BeautifulSoup as bs
#URL = input("URL")
URL = "https://www.amazon.de/gp/product/B075FTXF15/ref=crt_ewc_img_bw_3?ie=UTF8&psc=1&smid=A24FLB4J0NZBNT"
def SOUPIT (tempURL):
URL = tempURL
page = requests.get(URL,headers={"User-Agent":"Defined"})
soup = bs(page.content, "html.parser")
raw_price = soup.find(id="priceblock_ourprice").get_text()
price = raw_price[:-2]
raw_name = soup.find(id="productTitle").get_text()
name = raw_name.strip()
for i in range(0,len(name)-1):
if name[i] == "(":
name = name[:i]
break
data = [name, price, time.strftime("%H:%M:%S"), time.strftime("%d.%m.%Y")]
return(data)
data = SOUPIT(URL)
while True:
data = SOUPIT(URL)
db = pymysql.connect("localhost", "root", "root", "test")
cursor = db.cursor()
if (data == None):
break
print("break")
else:
name = data[0]
preis = data[1]
sql = """INSERT INTO amazon_preise (Name, Preis) VALUES ('{}',{})""".format(name,preis)
cursor.execute(sql)
db.commit()
print("success")
print(data)
time.sleep(60)
error message:
Traceback (most recent call last):
File "amazonscraper_advanced.py", line 43, in <module>
cursor.execute(sql)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\cursors.py", line 170, in execute
result = self._query(query)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\cursors.py", line 328, in _query
conn.query(q)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\connections.py", line 732, in _read_query_result
result.read()
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\connections.py", line 684, in _read_packet
packet.check_error()
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\pymysql\err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.InternalError: (1136, "Column count doesn't match value count at row 1")
The problem is caused, at least partially, by a using string formatting to insert values into an SQL statement.
Here is the scraped data:
>>> data = ['Sweatshirt Alien VS. Predator Z100088', '32,99', '14:08:43', '08.09.2019']
>>> name, preis, *_ = data
Let's create the SQL statement
>>> sql = """INSERT INTO amazon_preise (Name, Preis) VALUES ('{}',{})""".format(name,preis)
And display it:
>>> sql
"INSERT INTO amazon_preise (Name, Preis) VALUES ('Sweatshirt Alien VS. Predator Z100088',32,99)"
Observe that the VALUES clause contains three comma-separated values; this is because the web page displays currency in the German style, that is with commas separating the cents from the euros. When interpolated into the SQL statement
preis becomes two values instead of one.
The right way to fix this would be to convert preis from a string to a float or decimal, and use parameter substitution instead of string formatting to interpolate the values..
>>> fpreis = float(preis.replace(',', '.'))
>>> sql = """INSERT INTO amazon_preise (Name, Preis) VALUES (%s, %s)"""
>>> cursor.execute(sql, (name, fpreis))

Writing a csv file into SQL Server database using python

I am trying to write a csv file into a table in SQL Server database using python. I am facing errors when I pass the parameters , but I don't face any error when I do it manually. Here is the code I am executing.
cur=cnxn.cursor() # Get the cursor
csv_data = csv.reader(file(Samplefile.csv')) # Read the csv
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",rows)
cnxn.commit()
Error:
pyodbc.DataError: ('22001', '[22001] [Microsoft][ODBC SQL Server Driver][SQL Server]String or binary data would be truncated. (8152) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)')
However when I insert the values manually. It works fine
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",'A','B','C','D')
I have ensured that the TABLE is there in the database, data types are consistent with the data I am passing. Connection and cursor are also correct. The data type of rows is "list"
Consider building the query dynamically to ensure the number of placeholders matches your table and CSV file format. Then it's just a matter of ensuring your table and CSV file are correct, instead of checking that you typed enough ? placeholders in your code.
The following example assumes
CSV file contains column names in the first line
Connection is already built
File name is test.csv
Table name is MyTable
Python 3
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
columns = next(reader)
query = 'insert into MyTable({0}) values ({1})'
query = query.format(','.join(columns), ','.join('?' * len(columns)))
cursor = connection.cursor()
for data in reader:
cursor.execute(query, data)
cursor.commit()
If column names are not included in the file:
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
data = next(reader)
query = 'insert into MyTable values ({0})'
query = query.format(','.join('?' * len(data)))
cursor = connection.cursor()
cursor.execute(query, data)
for data in reader:
cursor.execute(query, data)
cursor.commit()
I modified the code written above by Brian as follows since the one posted above wouldn't work on the delimited files that I was trying to upload. The line row.pop() can also be ignored as it was necessary only for the set of files that I was trying to upload.
import csv
def upload_table(path, filename, delim, cursor):
"""
Function to upload flat file to sqlserver
"""
tbl = filename.split('.')[0]
cnt = 0
with open (path + filename, 'r') as f:
reader = csv.reader(f, delimiter=delim)
for row in reader:
row.pop() # can be commented out
row = ['NULL' if val == '' else val for val in row]
row = [x.replace("'", "''") for x in row]
out = "'" + "', '".join(str(item) for item in row) + "'"
out = out.replace("'NULL'", 'NULL')
query = "INSERT INTO " + tbl + " VALUES (" + out + ")"
cursor.execute(query)
cnt = cnt + 1
if cnt % 10000 == 0:
cursor.commit()
cursor.commit()
print("Uploaded " + str(cnt) + " rows into table " + tbl + ".")
You can pass the columns as arguments. For example:
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)", *rows)
If you are using MySqlHook in airflow , if cursor.execute() with params throw san error
TypeError: not all arguments converted during string formatting
use %s instead of ?
with open('/usr/local/airflow/files/ifsc_details.csv','r') as csv_file:
csv_reader = csv.reader(csv_file)
columns = next(csv_reader)
query = '''insert into ifsc_details({0}) values({1});'''
query = query.format(','.join(columns), ','.join(['%s'] * len(columns)))
mysql = MySqlHook(mysql_conn_id='local_mysql')
conn = mysql.get_conn()
cursor = conn.cursor()
for data in csv_reader:
cursor.execute(query, data)
cursor.commit()
I got it sorted out. The error was due to the size restriction restriction of table. It changed the column capacity like from col1 varchar(10) to col1 varchar(35) etc. Now it's working fine.
Here is the script and hope this works for you:
import pandas as pd
import pyodbc as pc
connection_string = "Driver=SQL Server;Server=localhost;Database={0};Trusted_Connection=Yes;"
cnxn = pc.connect(connection_string.format("DataBaseNameHere"), autocommit=True)
cur=cnxn.cursor()
df= pd.read_csv("your_filepath_and_filename_here.csv").fillna('')
query = 'insert into TableName({0}) values ({1})'
query = query.format(','.join(df.columns), ','.join('?' * len(df1.columns)))
cur.fast_executemany = True
cur.executemany(query, df.values.tolist())
cnxn.close()
You can also import data into SQL by using either:
The SQL Server Import and Export Wizard
SQL Server Integration Services (SSIS)
The OPENROWSET function
More details can be found on this webpage:
https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-data-from-excel-to-sql?view=sql-server-2017

Mysql import from script

I am using csv parsing for import csv files in my sql. I have few tables in Mysql. I wnat to import csv file in all table once. so i have use the below script. In that country is able to import but state table did not it show me the following error
csv_spliter.py:74: Warning: Incorrect integer value: ''country_id'' for column 'id' at row 1
row)
csv_spliter.py:74: Warning: Incorrect integer value: ''0'' for column 'id' at row 1
row)
csv_spliter.py:74: Warning: Incorrect integer value: ''1'' for column 'id' at row 1
row)
csv_spliter.py:74: Warning: Incorrect integer value: ''2'' for column 'id' at row 1
row)
csv_spliter.py:74: Warning: Incorrect integer value: ''3'' for column 'id' at row 1
row)
Done
Traceback (most recent call last):
File "csv_spliter.py", line 89, in <module>
row)
File "/usr/lib/python2.7/dist-packages/MySQLdb/cursors.py", line 174, in execute
self.errorhandler(self, exc, value)
File "/usr/lib/python2.7/dist-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.IntegrityError: (1452, 'Cannot add or update a child row: a foreign key constraint fails (`lcm`.`state`, CONSTRAINT `state_ibfk_1` FOREIGN KEY (`country`) REFERENCES `country` (`id`) ON DELETE CASCADE)')
you#you-desktop:~/Desktop$
<i>
# initialize with empty ints and dicts
name,cities,countries,states=[],[],[],[]
with open('ind.csv','rb') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
reader.next() #skip header
for row in reader:
name.append(row[0])
cities.append(row[2])
states.append(row[3])
countries.append(row[4])
cl = list(set(countries))
sl = list(set(states))
citl = list(set(cities))
inf1 = list(set(name))
with open('countries.csv','w') as cfile:
writer = csv.writer(cfile, delimiter=',')
writer.writerow(['country_id','name'])
for i,x in enumerate(cl):
writer.writerow([i,x])
with open('state.csv','w') as cfile:
writer = csv.writer(cfile, delimiter=',')
writer.writerow(['state_id','country_id','state'])
for i,x in enumerate(sl):
writer.writerow([i,x,cl.index(countries[states.index(x)])])
with open('cities.csv','w') as cfile:
writer = csv.writer(cfile,delimiter=',')
writer.writerow(['city_id','city','st_id','country_id'])
for i,x in enumerate(citl):
writer.writerow([i,x,sl.index(states[cities.index(x)]),
cl.index(countries[cities.index(x)])
])
with open('inf123.csv','w') as cfile:
writer = csv.writer(cfile,delimiter=',')
writer.writerow(['Name_id', 'Name','city_id','st_id','country_id'])
for i,x in enumerate(inf1):
writer.writerow([i,x,
citl.index(cities[name.index(x)]),
sl.index(states[name.index(x)]),
cl.index(countries[name.index(x)])
])
import MySQLdb
import csv
mydb = MySQLdb.connect(host="localhost", # The Host
user="root", # username
passwd="root", # password
db="abcm") # name of the data base
cursor = mydb.cursor()
csv_data = csv.reader(file('countries.csv'))
for row in csv_data:
cursor.execute('INSERT INTO country(id, \
name )' \
'VALUES("%s", "%s")',
row)
#close the connection to the database.
mydb.commit()
cursor.close()
print "Done"
cursor = mydb.cursor()
csv_data = csv.reader(file('state.csv'))
for row in csv_data:
cursor.execute('INSERT INTO state(id, \
country, name )' \
'VALUES("%s", "%s", "%s")',
row)
#close the connection to the database.
mydb.commit()
cursor.close()
print "Done"
</i>
Can any one give me some advice how I can do this?
Are you converting your strings to integers before dumping them in the database?
I think that's where the Incorrect integer value error messages are coming from.

sqlite3.OperationalError: no such column. But that's not a column

I seem to be getting a rather odd error. And, for the life of me I can't figure out what's wrong. But on a piece of SQLite code, I'm getting this error:
Traceback (most recent call last):
File "test.py", line 38, in <module>
populateTables()
File "test.py", line 20, in populateTables
curs.execute("SELECT * FROM tracks WHERE ISRC = " + line[8])
sqlite3.OperationalError: no such column: USTCZ0993316
The odd part is that USTCZ0993316 is a piece of data that I want to compare to. I don't know why it seems to think it is a column. Here is a much small version that gives the same issue.
import sqlite3
import csv
def tableSetup(name):
if(name=="tracks"):
curs.execute("CREATE TABLE tracks(id INT UNIQUE, name TINYTEXT, album_id INT, client_id INT, acr_record_num INT, ISRC TINYTEXT UNIQUE, track_length TINYTEXT, client_share FLOAT)")
def populateTables():
tracks_csv=csv.reader(open('tables/tracks.csv', 'rU'), delimiter=";", quotechar='"')
tracks_csv.next()
for line in tracks_csv:
curs.execute("SELECT * FROM tracks WHERE id = " + line[0])
if not curs.fetchall():
if "\"" in line[1]:
line[1]=line[1].replace("\"","'")
curs.execute("INSERT INTO tracks VALUES("+line[0]+",\""+line[1]+"\","+line[2]+","+line[3]+","+line[4]+",\""+line[5]+"\",\""+line[7]+"\","+line[12]+")")
override_csv=csv.reader(open('tables/artist_override.csv', 'rU'), delimiter=",", quotechar='"')
override_csv.next()
for line in override_csv:
curs.execute("SELECT * FROM tracks WHERE ISRC = " + line[8])
print curs.fetchone()
#Set required Table Names
tables = ["tracks"]
testOut=open('tables/testOut.txt','w')
conn = sqlite3.connect('tables/test.db')
curs = conn.cursor()
# Create table if they don't already exist
curs.execute("SELECT name FROM sqlite_master WHERE type='table'")
tableResults = curs.fetchall()
print
for table in tables:
if not any(table == result[0] for result in tableResults):
tableSetup(table)
populateTables()
conn.commit()
curs.close()
If it doesn't have quotes then it's a column or a number.
curs.execute("SELECT * FROM tracks WHERE ISRC = ?", (line[8],))
[Edit: "%s" isn't proper syntax. It should be "?". Also, if you're going to be condescending, at least be right about it.]

Categories