importing single .csv into mysql with python

importing single .csv into mysql with python - python

when running this code i am getting a Error while connecting to MySQL Not all parameters were used in the SQL statement
I have tried also to ingest these with another technique
import mysql.connector as msql
from mysql.connector import Error
import pandas as pd
empdata = pd.read_csv('path_to_file', index_col=False, delimiter = ',')
empdata.head()
try:
conn = msql.connect(host='localhost', user='test345',
password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("CREATE DATABASE timetheft")
print("Database is created")
except Error as e:
print("Error while connecting to MySQL", e)
try:
conn = msql.connect(host='localhost', database='timetheft', user='test345', password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("select database();")
record = cursor.fetchone()
print("You're connected to database: ", record)
cursor.execute('DROP TABLE IF EXISTS company;')
print('Creating table....')
create_contracts_table = """
CREATE TABLE company ( ID VARCHAR(40) PRIMARY KEY,
Company_Name VARCHAR(40),
Country VARCHAR(40),
City VARCHAR(40),
Email VARCHAR(40),
Industry VARCHAR(30),
Employees VARCHAR(30)
);
"""
cursor.execute(create_company_table)
print("Table is created....")
for i,row in empdata.iterrows():
sql = "INSERT INTO timetheft.company VALUES (%S, %S, %S, %S, %S,%S,%S,%S)"
cursor.execute(sql, tuple(row))
print("Record inserted")
# the connection is not auto committed by default, so we must commit to save our changes
conn.commit()
except Error as e:
print("Error while connecting to MySQL", e)
second technique I tried
LOAD DATA LOCAL INFILE 'path_to_file'
INTO TABLE copmany
FIELDS TERMINATED BY ';'
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
worked better but many errors. only 20% of rows ingested.
Finally here is an excerpt from the .csv (data is consistent throughout all 1K rows)
"ID";"Company_Name";"Country";"City";"Email";"Industry";"Employees"
217520699;"Enim Corp.";"Germany";"Bamberg";"posuere#diamvel.edu";"Internet";"51-100"
352428999;"Lacus Vestibulum Consulting";"Germany";"Villingen-Schwenningen";"egestas#lacusEtiambibendum.org";"Food Production";"100-500"
371718299;"Dictum Ultricies Ltd";"Germany";"Anklam";"convallis.erat#sempercursus.co.uk";"Primary/Secondary Education";"100-500"
676789799;"A Consulting";"Germany";"Andernach";"massa#etrisusQuisque.ca";"Government Relations";"100-500"
718526699;"Odio LLP";"Germany";"Eisenhüttenstadt";"Quisque.varius#euismod.org";"E-Learning";"11-50"

I fixed these issues to get the code to work:
make the number of placeholders in the insert statement equal to the number of columns
the placeholders should be lower-case '%s'
the cell delimiter appears to be a semi-colon, not a comma.
For simply reading a csv with ~1000 rows Pandas is overkill (and iterrows seems not to behave as you expect). I've used the csv module from the standard library instead.
import csv
...
sql = "INSERT INTO company VALUES (%s, %s, %s, %s, %s, %s, %s)"
with open("67359903.csv", "r", newline="") as f:
reader = csv.reader(f, delimiter=";")
# Skip the header row.
next(reader)
# For large files it may be more efficient to commit
# rows in batches.
cursor.executemany(sql, reader)
conn.commit()
If using the csv module is not convenient, the dataframe's itertuples method may be used to iterate over the data:
empdata = pd.read_csv('67359903.csv', index_col=False, delimiter=';')
for tuple_ in empdata.itertuples(index=False):
cursor.execute(sql, tuple_)
conn.commit()
Or the dataframe can be dumped to the database directly.
import sqlalchemy as sa
engine = sa.create_engine('mysql+mysqlconnector:///test')
empdata.to_sql('company', engine, index=False, if_exists='replace')

Related

while inserting records in mysql, argument values are passing as 0 in python

In console.log I am getting the correct argument value, but when I try to add the argument value in the insert statement it is passing as 0. As a whole, the code works fine without errors, but for assg_id, instead of the actual value, it is inserting 0.
import pandas as pd
import sys
print ("parameters from nodejs", str(sys.argv[1]))
df = pd.read_csv("./userSetupData.csv")
df.head()
import mysql.connector as msql
from mysql.connector import Error
try:
conn = msql.connect(host='localhost', database='pythonTest', user='root', password='0000', auth_plugin='mysql_native_password')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("select database();")
record = cursor.fetchone()
assgn_id = str(sys.argv[1])
print('Checking the parameter value', assgn_id)
for i,row in df.iterrows():
#here %S means string values
sql = "INSERT INTO pythonTest.usr_stg VALUES (%s,%s,%s,%s,%s,assgn_id)"
cursor.execute(sql, tuple(row))
print("Record inserted")
# the connection is not auto committed by default, so we must commit to save our changes
conn.commit()
except Error as e:
print("Error while connecting to MySQL", e)

sql has normal string and it treats assgn_id as normal text in string, not Python's variable
You would have to use f-string and "{assgn_id}" to put value in this string
sql = f"INSERT INTO pythonTest.usr_stg VALUES (%s,%s,%s,%s,%s, {assgn_id})"
Or you should put assgn_id in tuple with parameters (and use %s in query)
sql = "INSERT INTO pythonTest.usr_stg VALUES (%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, tuple(row.to_list() + [assgn_id]) )

Python code for inserting a CSV file into MySQL table not working as expected

I am new to Python and having some basic problems. I am trying to insert into an existing MySQL table (holding) from a csv file (test.csv), skip the header row and then return the number of rows inserted. Both tables have identical column headings.
The code below is not inserting the data from the csv file, but just inserting the values (which are my column heading, I thought I needed to declare these in the values), so this method is wrong as it only seems to add the values manually and not inert data from the csv file. Can someone tell me what I'm doing wrong please?
This is the code:
`import csv
try:
mydb = mysql.connector.connect(
host=localhost, port=5306,
user="XX",
passwd="XX",
database="python_test",
auth_plugin='mysql_native_password'
)
# This skips the first row of the CSV file.
with open((r'c:\TELS\\Uploaded\test.csv')) as f:
reader = csv.reader(f)
#next(reader) # skip header
data = [r for r in reader]
data.pop(0) # remove header
mycur = mydb.cursor()
query = "INSERT INTO hold (`Name`, `Address`, `Age`, `DOB) VALUES (%s, %s, %s, %S)"
values = (`Name`, `Address`, `Age`, `DOB`)
mycur.execute(query, values)
mydb.commit()
print(mycur.rowcount, "records inserted.")
#close the connection to the database.
mycur.close()`

The content of your values variable is what will be inserted so you need to map it to your data variable.
You could either do it one row at a time...
query = "INSERT INTO hold (`Name`, `Address`, `Age`, `DOB`) VALUES (%s, %s, %s, %s)"
# also notice how i converted your last %S into %s
for row in data:
mycur.execute(query, row)
mydb.commit()
mycur.close()
Or use the executemany() function :
query = "INSERT INTO hold (`Name`, `Address`, `Age`, `DOB`) VALUES (%s, %s, %s, %s)"
mycur.executemany(query, data)
mydb.commit()
print(mycur.rowcount, "records inserted.")
mycur.close()

importing from excel to mysql

I am trying to import data from excel to MySQl below is my code , problem here is that it only writes the last row from my excel sheet to MySQl db and i want it to import all the rows from my excel sheet.
import pymysql
import xlrd
book = xlrd.open_workbook('C:\SqlExcel\Backup.xlsx')
sheet = book.sheet_by_index(0)
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password='',
db='test')
cursor = connection.cursor()
query = """INSERT INTO report_table (FirstName, LastName) VALUES (%s, %s)"""
for r in range(1, sheet.nrows):
fname = sheet.cell(r,1).value
lname = sheet.cell(r,2).value
values = (fname, lname)
cursor.execute(query, values)
connection.commit()
cursor.close()
connection.close()

You code is currently only storing the last pair, and writing that to the database. You need to call fname and lname inside the loop and write each pair seperately to the database.
You can ammend your code to this:
import pymysql
import xlrd
book = xlrd.open_workbook('C:\SqlExcel\Backup.xlsx')
sheet = book.sheet_by_index(0)
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password='',
db='test',
autocommit=True)
cursor = connection.cursor()
query = """INSERT INTO report_table (FirstName, LastName) VALUES (%s, %s)"""
# loop over each row
for r in range(1, sheet.nrows):
# extract each cell
fname = sheet.cell(r,1).value
lname = sheet.cell(r,2).value
# extract cells into pair
values = fname, lname
# write pair to db
cursor.execute(query, values)
# close everything
cursor.close()
connection.close()
Note: You can set autocommit=True in the connect phase. PyMySQL disables autocommit by default. This means you dont have to call cursor.commit() after your query.

Your variable values have to be inside the for instruction like this :
import pymysql
import xlrd
book = xlrd.open_workbook('C:\SqlExcel\Backup.xlsx')
sheet = book.sheet_by_index(0)
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password='',
db='test')
cursor = connection.cursor()
query = """INSERT INTO report_table (FirstName, LastName) VALUES (%s, %s)"""
for r in range(1, sheet.nrows):
fname = sheet.cell(r,1).value
lname = sheet.cell(r,2).value
values = (fname, lname)
cursor.execute(query, values)
connection.commit()
cursor.close()
connection.close()

Sorry, I don't know much about databases, so nor about pymysql. But assumed all the rest is correct I guess it could work like:
...
cursor = connection.cursor()
query = """INSERT INTO report_table (FirstName, LastName) VALUES (%s, %s)"""
for r in range(1, sheet.nrows):
fname = sheet.cell(r,1).value
lname = sheet.cell(r,2).value
values = (fname, lname)
cursor.execute(query, values)
connection.commit()
cursor.close()
connection.close()

Is this something you will do on a regular basis? I see the script you're writing but I am not sure if this is something you need to run over and over again or if you are just importing the data into MySQL once.
If this is a one shot deal, you can try this.
Open the spreadsheet and SELECT ALL then COPY all your data. Paste it into a text document and save the text document (let's say the text document will be in c:\temp\exceldata.txt). You can then load it all into the table with one command:
LOAD DATA INFILE 'c:/temp/exceldata.txt'
INTO TABLE report_table
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES;
I am making a few assumptions here:
The spreadsheet has only two columns and they are in the same order as the fields in your table.
You do NOT need to clear out the table before the load. If you do, issue the command TRUNCATE TABLE report_table; before the load.
Note, I chose a tab delimited format because I prefer it. You could save the file as a .CSV file and adjust the command as follows:
LOAD DATA INFILE 'c:/temp/exceldata.txt'
INTO TABLE report_table
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES;
The "optionally enclosed by" is there because Excel will put quotes around text data with a comma in it.
If you need to do this on a regular basis, you can still use the CSV method by writing an excel script that saves the file to a .CSV copy whenever the spreadsheet is saved. I have done that too.
I have never written python but this is how I do it in PHP.
HTH

This code worked for me after taking help from the above suggestion the error was of indentation now its working :)
import pymysql
import xlrd
book = xlrd.open_workbook('C:\SqlExcel\Backup.xlsx')
sheet = book.sheet_by_index(0)
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password='',
db='test',
autocommit=True)
cursor = connection.cursor()
query = """INSERT INTO report_table (FirstName, LastName) VALUES (%s, %s)"""
for r in range(1, sheet.nrows):
fname = sheet.cell(r,1).value
lname = sheet.cell(r,2).value
values = (fname, lname)
cursor.execute(query, values)
cursor.close()
connection.close()

psycopg2 postgres database syntax error near value

I am trying to insert info from a pandas DataFrame into a database table by using a function that I wrote:
def insert(table_name="", name="", genere="", year=1, impd_rating=float(1)):
conn = psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur = conn.cursor()
cur.execute("INSERT INTO %s VALUES %s,%s,%s,%s" % (table_name, name, genere, year, impd_rating))
conn.commit()
conn.close()
When I try to use this function like this:
b=0
for row in DF['id']:
insert(impd_rating=float(DF['idbm_rating'][b]),
year=int(DF['year'][b]),
name=str(DF['name'][b]),
genere=str(DF['genere'][b]),
table_name='test_movies')
b = b+1
I get the following syntax error:
SyntaxError: invalid syntax
PS D:\tito\scripts\database training> python .\postgres_script.py
Traceback (most recent call last):
File ".\postgres_script.py", line 56, in <module>insert (impd_rating=float(DF['idbm_rating'][b]),year=int(DF['year'][b]),name=str(DF['name'][b]),genere=str(DF['genere'][b]),table_name='test_movies')
File ".\postgres_script.py", line 15, in insert
cur.execute("INSERT INTO %s VALUES %s,%s,%s,%s"  % (table_name ,name ,genere , year,impd_rating))
psycopg2.ProgrammingError: syntax error at or near "Avatar"
LINE 1: INSERT INTO test_movies VALUES Avatar,action,2009,7.9
I also tried to change the str replacement method from %s to .format()
but I had the same error.

The error message is explicit, this SQL command is wrong at Avatar: INSERT INTO test_movies VALUES Avatar,action,2009,7.9. Simply because values must be enclosed in parenthesis, and character strings must be quoted, so the correct SQL is:
INSERT INTO test_movies VALUES ('Avatar','action',2009,7.9)
But building a full SQL command by concatenating parameters is bad practice (*), only the table name should be directly inserted into the command because is is not a SQL parameter. The correct way is to use a parameterized query:
cur.execute("INSERT INTO %s VALUES (?,?,?,?)" % (table_name,) ,(name ,genere , year,impd_rating)))
(*) It was the cause of numerous SQL injection flaws because if one of the parameter contains a semicolumn (;) what comes after could be interpreted as a new command

Pandas has a DataFrame method for this, to_sql:
# Only needs to be executed once.
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
df.to_sql('test_movies', con=conn, if_exists='append', index=False)
This should hopefully get you going in the right direction.

In your original query
INSERT INTO %s VALUES %s,%s,%s,%s
there is a sql problem: you need braces around the values, i.e. it should be VALUES (%s, %s, %s, %s). On top of that the table name cannot be merged as a parameter, or it would be escaped as a string, which is not what you want.
You can use the psycopg 2.7 sql module to merge the table name to the query, with placeholders for the values:
from psycopg2 import sql
query = sql.SQL("INSERT INTO {} VALUES (%s, %s, %s, %s)").format(
sql.Identifier('test_movies'))
cur.execute(query, ('Avatar','action',2009,7.9))
This will make secure both merging the table name and the arguments to the query.

Hello mohamed mahrous,
First install psycopg2 package for the access access PostgreSQL database.
Try this below code,
import psycopg2
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur=conn.cursor()
def insert(table_name,name,genere,year,impd_rating):
query = "INSERT INTO "+table_name+"(name,genere,year,impd_rating) VALUES(%s,%s,%s,%s)"
try:
print query
cur.execute(query,(name,genere,year,impd_rating))
except Exception, e:
print "Not execute..."
conn.commit()
b=0
for row in DF['id']:
insert (impd_rating=float(DF['idbm_rating'][b]),year=int(DF['year'][b]),name=str(DF['name'][b]),genere=str(DF['genere'][b]),table_name='test_movies')
b= b+1
conn.close()
Example,
import psycopg2
conn=psycopg2.connect("dbname='database1' user='postgres' password='postgres333' host='localhost' port=5433 ")
cur=conn.cursor()
def insert(table_name,name,genere,year,impd_rating):
query = "INSERT INTO "+table_name+"(name,genere,year,impd_rating) VALUES(%s,%s,%s,%s)"
try:
print query
cur.execute(query,(name,genere,year,impd_rating))
except Exception, e:
print "Not execute"
conn.commit()
b=0
for row in DF['id']:
insert (impd_rating="7.0",year="2017",name="Er Ceo Vora Mayur",genere="etc",table_name="test_movies")
b= b+1
conn.close()
I hope my answer is helpful.
If any query so comment please.

i found a solution for my issue by using sqlalchemy and pandas to_sql method
thanks for help everyone
from sqlalchemy import *
import pandas as pd
def connect(user, password, db, host='localhost', port=5433):
'''Returns a connection and a metadata object'''
# We connect with the help of the PostgreSQL URL
# postgresql://federer:grandestslam#localhost:5432/tennis
url = 'postgresql://{}:{}#{}:{}/{}'
url = url.format(user, password, host, port, db)
# The return value of create_engine() is our connection object
con = sqlalchemy.create_engine(url, client_encoding='utf8')
# We then bind the connection to MetaData()
meta = sqlalchemy.MetaData(bind=con, reflect=True)
return con, meta
con, meta = connect('postgres','postgres333','database1')
movies= Table('test',meta,
Column('id',Integer,primary_key=True),
Column('name',String),
Column('genere',String),
Column('year',Integer),
Column('idbm_rating',REAL))
meta.create_all(con)
DF=pd.read_csv('new_movies.txt',sep=' ',engine='python')
DF.columns=('id','name' ,'genere' ,'year' ,'idbm_rating' )
DF.to_sql('movies', con=con, if_exists='append', index=False)

Export specific column from one database to another one

I want to export specific column from one database to another one using Python but its not coming:
# Display all Non-Duplicate data
import sqlite3
import csv
conn = sqlite3.connect('data.db')
# STEP 2 : create a small data file with only three fields account_id, product_id and unit_quantity
cursor = conn.execute("SELECT field1,field12,field14 FROM database")
for row in cursor:
print row[0:11]
print "Operation done successfully";
conn.close()

Create second connection and insert directly
conn = sqlite3.connect('data.db')
cursor = conn.execute("SELECT field1,field12,field14 FROM database")
export = sqlite3.connect('exported.db')
#get result as list
for values in cursor.fetchall():
export.execute('INSERT INTO tablename(field1,field12,field14) VALUES (%s, %s, %s)' % (values[0], values[1], values[2]))
export.commit()
export.close()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.