Import Data from .csv file into mysql using python - python

I am trying to import data from two columns of a .csv file (time hh:mm, float). I created a database and a table in mysql.
import mysql.connector
import csv
mydb = mysql.connector.connect(host='127.0.0.1',
user= 'xxx',
passwd='xxx',
db='pv_datenbank')
cursor = mydb.cursor()
# get rid of the '' at the beginning of the .csv file
s = open('Sonneneinstrahlung.csv', mode='r', encoding='utf-8-sig').read()
open('Sonneneinstrahlung.csv', mode='w', encoding='utf-8').write(s)
print(s)
with open('Sonneneinstrahlung.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=';')
sql = """INSERT INTO einstrahlung ('Uhrzeit', 'Einstrahlungsdaten') VALUES (%s, %s)"""
for row in csv_reader:
print(row)
print(cursor.rowcount, "was inserted.")
cursor.executemany(sql, csv_reader)
#cursor.execute(sql, row, multi=True)
mydb.commit()
mydb.close()
If I run the program with executemany(), result is the following:
['01:00', '1']
'-1 was inserted.'
and after this I do get the error code: Not all parameters were used again.
When I try the same thing with the execute() operator, no error is shown, but the data is not inserted in the table of my database.
Here you can see the input data:

executemany takes a statement and a sequence of sets of parameters.
Try this:
with open('Sonneneinstrahlung.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=';')
sql = """INSERT INTO einstrahlung (Uhrzeit, Einstrahlungsdaten) VALUES (%s, %s)"""
cursor.executemany(sql, csv_reader)
mydb.commit()

Related

importing single .csv into mysql with python

when running this code i am getting a Error while connecting to MySQL Not all parameters were used in the SQL statement
I have tried also to ingest these with another technique
import mysql.connector as msql
from mysql.connector import Error
import pandas as pd
empdata = pd.read_csv('path_to_file', index_col=False, delimiter = ',')
empdata.head()
try:
conn = msql.connect(host='localhost', user='test345',
password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("CREATE DATABASE timetheft")
print("Database is created")
except Error as e:
print("Error while connecting to MySQL", e)
try:
conn = msql.connect(host='localhost', database='timetheft', user='test345', password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("select database();")
record = cursor.fetchone()
print("You're connected to database: ", record)
cursor.execute('DROP TABLE IF EXISTS company;')
print('Creating table....')
create_contracts_table = """
CREATE TABLE company ( ID VARCHAR(40) PRIMARY KEY,
Company_Name VARCHAR(40),
Country VARCHAR(40),
City VARCHAR(40),
Email VARCHAR(40),
Industry VARCHAR(30),
Employees VARCHAR(30)
);
"""
cursor.execute(create_company_table)
print("Table is created....")
for i,row in empdata.iterrows():
sql = "INSERT INTO timetheft.company VALUES (%S, %S, %S, %S, %S,%S,%S,%S)"
cursor.execute(sql, tuple(row))
print("Record inserted")
# the connection is not auto committed by default, so we must commit to save our changes
conn.commit()
except Error as e:
print("Error while connecting to MySQL", e)
second technique I tried
LOAD DATA LOCAL INFILE 'path_to_file'
INTO TABLE copmany
FIELDS TERMINATED BY ';'
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
worked better but many errors. only 20% of rows ingested.
Finally here is an excerpt from the .csv (data is consistent throughout all 1K rows)
"ID";"Company_Name";"Country";"City";"Email";"Industry";"Employees"
217520699;"Enim Corp.";"Germany";"Bamberg";"posuere#diamvel.edu";"Internet";"51-100"
352428999;"Lacus Vestibulum Consulting";"Germany";"Villingen-Schwenningen";"egestas#lacusEtiambibendum.org";"Food Production";"100-500"
371718299;"Dictum Ultricies Ltd";"Germany";"Anklam";"convallis.erat#sempercursus.co.uk";"Primary/Secondary Education";"100-500"
676789799;"A Consulting";"Germany";"Andernach";"massa#etrisusQuisque.ca";"Government Relations";"100-500"
718526699;"Odio LLP";"Germany";"Eisenhüttenstadt";"Quisque.varius#euismod.org";"E-Learning";"11-50"
I fixed these issues to get the code to work:
make the number of placeholders in the insert statement equal to the number of columns
the placeholders should be lower-case '%s'
the cell delimiter appears to be a semi-colon, not a comma.
For simply reading a csv with ~1000 rows Pandas is overkill (and iterrows seems not to behave as you expect). I've used the csv module from the standard library instead.
import csv
...
sql = "INSERT INTO company VALUES (%s, %s, %s, %s, %s, %s, %s)"
with open("67359903.csv", "r", newline="") as f:
reader = csv.reader(f, delimiter=";")
# Skip the header row.
next(reader)
# For large files it may be more efficient to commit
# rows in batches.
cursor.executemany(sql, reader)
conn.commit()
If using the csv module is not convenient, the dataframe's itertuples method may be used to iterate over the data:
empdata = pd.read_csv('67359903.csv', index_col=False, delimiter=';')
for tuple_ in empdata.itertuples(index=False):
cursor.execute(sql, tuple_)
conn.commit()
Or the dataframe can be dumped to the database directly.
import sqlalchemy as sa
engine = sa.create_engine('mysql+mysqlconnector:///test')
empdata.to_sql('company', engine, index=False, if_exists='replace')

Unable to import CSV file data to mysql table

I am using the code below-
import mysql.connector
import csv
mydb = mysql.connector.connect(host='xxxxx', user='xxxx', passwd='xxxxxx')
cursor = mydb.cursor()
cursor.execute("SHOW DATABASES")
l = cursor.fetchall()
cursor.execute("USE DB ")
with open('details.csv','rt')as f:
csv_data = csv.reader(f)
for row in csv_data:
cursor.execute ('INSERT INTO student (id,name,age,course) VALUES (%s,%s,%s,%s)',row)
cursor.close()
mydb.commit()
mydb.close()
I am getting the following error-
"ProgrammingError: Not all parameters were used in the SQL statement"
python 3.x,windows7,Mysql 2012 version.
for row in csv_data:
listval = []
for col in row:
listval.append(col)
cursor.execute ('INSERT INTO student (id,name,age,course) VALUES (%s,%s,%s,%s) ', listval)
This will work :))
In this case, the main problem is the string format. I did't know what object is row, in your error it is a list, so using each element will solve the issue.
for row in csv_data:
cursor.execute ('INSERT INTO student (id,name,age,course) VALUES (%s,%s,%s,%s)',
row[0], row[1], row[2], row[3])

How to insert a CSV file data into MYSQL using Python efficiently?

I have a CSV input file with aprox. 4 million records.
The insert is running since +2hours and still has not finished.
The Database is still empty.
Any suggestions on how to to actually insert the values (using insert into) and faster, like breaking the insert in chunks?
I'm pretty new to python.
csv file example
43293,cancelled,1,0.0,
1049007,cancelled,1,0.0,
438255,live,1,0.0,classA
1007255,xpto,1,0.0,
python script
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in csv_data:
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
cur.execute(insert_str, row)
conn.commit()
finally:
conn.close()
UPDATE:
Thanks for all the inputs.
As suggested, I tried a counter to insert in batches of 100 and a smaller csv data set (1000 lines).
The problem now is only 100 records are inserted, although the counter passes 10 x 100 several times.
code change:
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
count = 0
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in csv_data:
count += 1
print(count)
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
if count >= 100:
cur.execute(insert_str, row)
print("count100")
conn.commit()
count = 0
if not row:
cur.execute(insert_str, row)
conn.commit()
finally:
conn.close()
There are many ways to optimise this insert. Here are some ideas:
You have a for loop over the entire dataset. You can do a commit() every 100 or so
You can insert many rows into one insert
you can combine the two and make a multi-row insert every 100 rows on your CSV
If python is not a requirement for you can do it directly using MySQL as it's explained here. (If you must do it using python, you can still prepare that statement in python and avoid looping through the file manually).
Examples:
for number 2 in the list, the code will have the following structure:
def csv_to_DB(xing_csv_input, db_opts):
print("Inserting csv file {} to database {}".format(xing_csv_input, db_opts['host']))
conn = pymysql.connect(**db_opts)
cur = conn.cursor()
try:
with open(xing_csv_input, newline='') as csvfile:
csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
to_insert = []
insert_str = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES "
template = '(%s, %s, %s, %s, %s)'
count = 0
for row in csv_data:
count += 1
to_insert.append(tuple(row))
if count % 100 == 0:
query = insert_str + '\n'.join([template % r for r in to_insert])
cur.execute(query)
to_insert = []
conn.commit()
query = insert_str + '\n'.join(template % to_insert)
cur.execute(query)
conn.commit()
finally:
conn.close()
Here. Try this snippet and let me know if it worked using executemany().
with open(xing_csv_input, newline='') as csvfile:
csv_data = tuple(csv.reader(csvfile, delimiter=',', quotechar='"'))
csv_data = (row for row in csv_data)
query = "INSERT INTO table_x (ID, desc, desc_version, val, class) VALUES (%s, %s, %s, %s, %s)"
try:
cur.executemany(query, csv_data)
conn.commit()
except:
conn.rollback()

sql queries and python

I am trying use sqlite3 and python3 on the CSV file to extract the booking_id table for some specific data. But I am getting a KeyError which means the requested table is not in the dictionary. I don't get it.
import csv
import sqlite3
con = sqlite3.connect(":memory:")
cur = con.cursor()
cur.execute("CREATE TABLE t (booking_id,customer_id,source,status,checkin,checkout,oyo_rooms,hotel_id,amount,discount,date,PRIMARY KEY(booking_id))")
with open('TableA.csv', 'r') as fin:
dr = csv.DictReader(fin, delimiter='\t')
to_db = [(i['booking_id'], i['customer_id'], i['source'], i['status'], i['checkin'], i['checkout'],
i['oyo_rooms'], i['hotel_id'], i['amount'], i['discount'], i['date']) for i in dr]
cur.executemany(
"INSERT INTO t (booking_id,customer_id,source,status,checkin,checkout,oyo_rooms,hotel_id,amount,discount,date) VALUES (?, ?,?, ?, ?,?, ?,?, ?,?);", to_db)
con.commit()
con.close()
#error message
KeyError: 'booking_id'
This is the csv file - https://pastebin.com/xbgFryhZ
The key error you're getting means your csv file doesn't have that column. Post the first few lines from the csv file so we can check it out.
EDIT:
Now that you added the CSV file we can see that it is separated by commas, not tabs.
Change
dr = csv.DictReader(fin, delimiter='\t')
to
dr = csv.DictReader(fin, delimiter=',')

Writing a csv file into SQL Server database using python

I am trying to write a csv file into a table in SQL Server database using python. I am facing errors when I pass the parameters , but I don't face any error when I do it manually. Here is the code I am executing.
cur=cnxn.cursor() # Get the cursor
csv_data = csv.reader(file(Samplefile.csv')) # Read the csv
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",rows)
cnxn.commit()
Error:
pyodbc.DataError: ('22001', '[22001] [Microsoft][ODBC SQL Server Driver][SQL Server]String or binary data would be truncated. (8152) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)')
However when I insert the values manually. It works fine
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",'A','B','C','D')
I have ensured that the TABLE is there in the database, data types are consistent with the data I am passing. Connection and cursor are also correct. The data type of rows is "list"
Consider building the query dynamically to ensure the number of placeholders matches your table and CSV file format. Then it's just a matter of ensuring your table and CSV file are correct, instead of checking that you typed enough ? placeholders in your code.
The following example assumes
CSV file contains column names in the first line
Connection is already built
File name is test.csv
Table name is MyTable
Python 3
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
columns = next(reader)
query = 'insert into MyTable({0}) values ({1})'
query = query.format(','.join(columns), ','.join('?' * len(columns)))
cursor = connection.cursor()
for data in reader:
cursor.execute(query, data)
cursor.commit()
If column names are not included in the file:
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
data = next(reader)
query = 'insert into MyTable values ({0})'
query = query.format(','.join('?' * len(data)))
cursor = connection.cursor()
cursor.execute(query, data)
for data in reader:
cursor.execute(query, data)
cursor.commit()
I modified the code written above by Brian as follows since the one posted above wouldn't work on the delimited files that I was trying to upload. The line row.pop() can also be ignored as it was necessary only for the set of files that I was trying to upload.
import csv
def upload_table(path, filename, delim, cursor):
"""
Function to upload flat file to sqlserver
"""
tbl = filename.split('.')[0]
cnt = 0
with open (path + filename, 'r') as f:
reader = csv.reader(f, delimiter=delim)
for row in reader:
row.pop() # can be commented out
row = ['NULL' if val == '' else val for val in row]
row = [x.replace("'", "''") for x in row]
out = "'" + "', '".join(str(item) for item in row) + "'"
out = out.replace("'NULL'", 'NULL')
query = "INSERT INTO " + tbl + " VALUES (" + out + ")"
cursor.execute(query)
cnt = cnt + 1
if cnt % 10000 == 0:
cursor.commit()
cursor.commit()
print("Uploaded " + str(cnt) + " rows into table " + tbl + ".")
You can pass the columns as arguments. For example:
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)", *rows)
If you are using MySqlHook in airflow , if cursor.execute() with params throw san error
TypeError: not all arguments converted during string formatting
use %s instead of ?
with open('/usr/local/airflow/files/ifsc_details.csv','r') as csv_file:
csv_reader = csv.reader(csv_file)
columns = next(csv_reader)
query = '''insert into ifsc_details({0}) values({1});'''
query = query.format(','.join(columns), ','.join(['%s'] * len(columns)))
mysql = MySqlHook(mysql_conn_id='local_mysql')
conn = mysql.get_conn()
cursor = conn.cursor()
for data in csv_reader:
cursor.execute(query, data)
cursor.commit()
I got it sorted out. The error was due to the size restriction restriction of table. It changed the column capacity like from col1 varchar(10) to col1 varchar(35) etc. Now it's working fine.
Here is the script and hope this works for you:
import pandas as pd
import pyodbc as pc
connection_string = "Driver=SQL Server;Server=localhost;Database={0};Trusted_Connection=Yes;"
cnxn = pc.connect(connection_string.format("DataBaseNameHere"), autocommit=True)
cur=cnxn.cursor()
df= pd.read_csv("your_filepath_and_filename_here.csv").fillna('')
query = 'insert into TableName({0}) values ({1})'
query = query.format(','.join(df.columns), ','.join('?' * len(df1.columns)))
cur.fast_executemany = True
cur.executemany(query, df.values.tolist())
cnxn.close()
You can also import data into SQL by using either:
The SQL Server Import and Export Wizard
SQL Server Integration Services (SSIS)
The OPENROWSET function
More details can be found on this webpage:
https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-data-from-excel-to-sql?view=sql-server-2017

Categories