Exporting specific MySQL data from table to Excel using Python - python

I'm fairly new to mysql-connector-python. I'm in the middle of learning how to use python to make my job a little easier. Could you please help?
I have a database with one table.Lets call it ‘table1’ The table is broken down with columns like this:
sent_time | delivered_time |id1_active |id2_active |id3_active |id1_inactive |id2_inactive |id3_inactive |location_active |location_inactive …..`lots more
Lets say that these are two or more customers delivering goods to and from each other. Each customer has three id#s.
Right now I want to see if I can at least print out id1_active and id2_active for all of them…ideally, I want to print the entire rows with a specific id mentioned in any of the columns…can you look below and see what I’m doing wrong and maybe help me figure out a solution for my ideal goal?
I keep getting this error
line 18, in
c.writerow([id1_active,id2_active] )
TypeError: ‘str’ does not support the buffer interface
This is what I have thus far…
I created a ‘config.ini’ file to make my life a bit easier
[mysql]
host = localhost
database = db_name
user = root
password = blahblah
I created a ‘python_mysql_dbconfig.py’
from configparser import ConfigParser
def read_db_config(filename=’config.ini’, section=’mysql’):
“”” Read database configuration file and return a dictionary object
:param filename: name of the configuration file
:param section: section of database configuration
:return: a dictionary of database parameters
“””
# create parser and read ini configuration file
parser = ConfigParser()
parser.read(filename)
# get section, default to mysql
db = {}
if parser.has_section(section):
items = parser.items(section)
for item in items:
db[item[0]] = item[1]
else:
raise Exception(‘{0} not found in the {1} file’.format(section, filename))
return db
Then I created ‘export.py’
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
import csv
filename=open(‘test.csv’,’wb’)
c=csv.writer(filename)
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
query = (“SELECT id1_active, id2_active from table1”)
cursor.execute(query)
for id1_active, id2_active in cursor:
c.writerow([id1_active,id2_active] )
cursor.close()
filename.close()
cnn.close()
Could you tell me what I’m doing wrong?

You need to fetch the data and loop through data instead of cursor:
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
import csv
filename=open(‘test.csv’,’wb’)
c=csv.writer(filename)
db_config = read_db_config()
conn = MySQLConnection(**db_config)
cursor = conn.cursor()
query = (“SELECT id1_active, id2_active from table1”)
cursor.execute(query)
#You first need to fetch data
data = cursor.fetchall()
for item in data:
c.writerow(item )
cursor.close()
filename.close()
cnn.close()

I started figuring things out using xlswriter. I was able to export all data from the table but now I cannot get my code to pull the data that falls between two datetimes....it keeps telling me
Traceback (most recent call last):
File "C:\Python34\timerange.py", line 36, in <module>
start_date = datetime.datetime(userIn,timeShape)
AttributeError: type object 'datetime.datetime' has no attribute 'datetime'
Please, can you tell me what I'm doing wrong?
# Establish a MySQL connection
from mysql.connector import MySQLConnection, Error
from python_mysql_dbconfig import read_db_config
db_config = read_db_config()
conn = MySQLConnection(**db_config)
import xlsxwriter
from xlsxwriter.workbook import Workbook
import datetime
from datetime import datetime
cursor = conn.cursor()
#creates the workbook
workbook = xlsxwriter.Workbook('imhere.xlsx')
worksheet = workbook.add_worksheet()
#formatting definitions
bold = workbook.add_format({'bold': True})
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
timeShape = '%Y-%m-%d %H:%M:%S'
query = ("SELECT sent_time, delivered_time, customer_name, id1_active, id2_active, id3_active, id1_inactive, id2_inactive, id3_inactive, location_active, location_inactive FROM table1 ")
"WHERE sent_time BETWEEN %s AND %s"
userIn = input("Type Start Date (YYYY-MM-DD hh:mm:ss):")
userEnd = input("Type End Date (YYYY-MM-DD hh:mm:ss):")
start_date = datetime.datetime(userIn,timeShape)
end_date = datetime.datetime(userEnd, timeShape)
# Execute sql Query
cursor.execute(query, (start_date, end_date))
#result = cursor.fetchall()
#sets up the header row
worksheet.write('A1','sent_time',bold)
worksheet.write('B1', 'delivered_time',bold)
worksheet.write('C1', 'customer_name',bold)
worksheet.write('D1', 'id1_active',bold)
worksheet.write('E1', 'id2_active',bold)
worksheet.write('F1', 'id3_active',bold)
worksheet.write('G1', 'id1_inactive',bold)
worksheet.write('H1', 'id2_inactive',bold)
worksheet.write('I1', 'id3_inactive',bold)
worksheet.write('J1', 'location_active',bold)
worksheet.write('K1', 'location_inactive',bold)
worksheet.autofilter('A1:K1')
print("sent_time", "delivered_time", "customer_name", "id1_active", "id2_active", "id3_active", "id1_inactive", "id2_inactive", "id3_inactive", "location_active", "location_inactive")
for row in cursor:
print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9],row[10])
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r, row in enumerate(cursor, start=1): #where you want to start printing results inside workbook
for c, col in enumerate(row):
worksheet.write_datetime(r,0,row[0], date_format)
worksheet.write_datetime(r,1, row[1], date_format)
worksheet.write(r,2, row[2])
worksheet.write(r,3, row[3])
worksheet.write(r,4, row[4])
worksheet.write(r,5, row[5])
worksheet.write(r,6, row[6])
worksheet.write(r,7, row[7])
worksheet.write(r,8, row[8])
worksheet.write(r,9, row[9])
worksheet.write(r,10, row[10])
#close out everything and save
cursor.close()
workbook.close()
conn.close()
#print number of rows and bye-bye message
print ("- - - - - - - - - - - - -")
rows = len(result)
print ("I just imported "+ str(rows) + " rows from MySQL!")
print ("")
print ("Good to Go!!!")
print ("")

Related

python query on Postgres

I am trying to get the fields of each row in stored procedure in a Postgres DB. I keep getting "tuple index out of range" I am basically copying and pasting the code from a tutorial website and still getting the same error.
When I do only row[0] it print out the entire query.
ConfRoom = ("ConfRoom1",612,1589540397,1589540425,/var/lib/freeswitch/recordings/10.91.50.217/archive/2020/May/15/7b4def4e-0494-439f-8540-1f339e3ec375,1a4652e7-61fc-4fb8-b564-19adec09ec0e)
tuple index out of range
#!/usr/bin/python3
import psycopg2
from config import config
def connect():
""" Connect to the PostgreSQL database server """
conn = None
try:
# read connection parameters
params = config()
# connect to the PostgreSQL server
conn = psycopg2.connect(**params)
# create a cursor
cur = conn.cursor()
# execute a statement
postgreSQL_select_Query = 'SELECT "public"."fn_get_recordings"()'
cur.execute(postgreSQL_select_Query)
conf_query = cur.fetchall()
print(conf_query)
for row in conf_query:
print("ConfRoom = ", row[0], )
print("ConfDescription = ", row[1])
print("StartEpoch = ", row[2])
print("EndEpoch = ", row[3])
print("Location = ", row[4])
print("MeetingID = ", row[5], "\n")
Thanks
select * from fn_get_recordings() worked.

Incorrect date value when loading xlsx file to table using pymysql and xlrd

(Very) beginner python user here. I'm trying to load an xlsx file into a MySQL table using xlrd and pymysql python libraries and I'm getting an error:
pymysql.err.InternalError: (1292, "Incorrect date value: '43500' for column 'invoice_date' at row 1")
The datatype for invoice_date for my table is DATE. The format for this field on my xlsx file is also Date. Things work fine if I change the table datatype to varchar, but I'd prefer to have the data load into my table as a date instead of converting after the fact. Any ideas as to why I'm getting this error? It appears that xlrd or pymysql is reading '2/4/2019' in my xlxs file as '43500' and mysql is rejecting it due to a datatype mismatch.
import xlrd
import pymysql as MySQLdb
# Open workbook and define first sheet
book = xlrd.open_workbook("2019_Complete.xlsx")
sheet = book.sheet_by_index(0)
# MySQL connection
database = MySQLdb.connect (host="localhost", user="root",passwd="password", db="vendor")
# Get cursor, which is used to traverse the databse, line by line
cursor = database.cursor()
# INSERT INTO SQL query
query = """insert into table values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r in range(1, sheet.nrows):
lp = sheet.cell(r,0).value
pallet_lp = sheet.cell(r,1).value
bol = sheet.cell(r,2).value
invoice_date = sheet.cell(r,3).value
date_received = sheet.cell(r,4).value
date_repaired = sheet.cell(r,5).value
time_in_repair = sheet.cell(r,6).value
date_shipped = sheet.cell(r,7).value
serial_number = sheet.cell(r,8).value
upc = sheet.cell(r,9).value
product_type = sheet.cell(r,10).value
product_description = sheet.cell(r,11).value
repair_code = sheet.cell(r,12).value
condition = sheet.cell(r,13).value
repair_cost = sheet.cell(r,14).value
parts_cost = sheet.cell(r,15).value
total_cost = sheet.cell(r,16).value
repair_notes = sheet.cell(r,17).value
repair_cap = sheet.cell(r,18).value
complaint = sheet.cell(r,19).value
delta = sheet.cell(r,20).value
# Assign values from each row
values = (lp, pallet_lp, bol, invoice_date, date_received, date_repaired, time_in_repair, date_shipped, serial_number, upc, product_type, product_description, repair_code, condition, repair_cost, parts_cost, total_cost, repair_notes, repair_cap, complaint, delta)
# Execute sql Query
cursor.execute(query, values)
# Close the cursor
cursor.close()
# Commit the transaction
database.commit()
# Close the database connection
database.close()
# Print results
print ("")
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("I just imported " + columns + " columns and " + rows + " rows to MySQL!")
You can see this answer for a more detailed explanation, but basically Excel treats dates as a number relative to 1899-12-31, and so to convert your date value to an actual date you need to convert that number into an ISO format date which MySQL will accept. You can do that using date.fromordinal and date.isoformat. For example:
dval = 43500
d = date.fromordinal(dval + 693594)
print(d.isoformat())
Output:
2019-02-04

SQLalchemy- How I can extract table from sqlite file?

I want to extract table information from sqlite file.
I could list all the table name following this page and tried to extract table information using query method on the session instance. But I got following error.
sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) no such column: ComponentSizes [SQL: 'SELECT ComponentSizes']
Does anyone know how should I revise following code in order to extract table specifying the table name?
class read():
def __init__(self,path):
engine = create_engine("sqlite:///" + sqlFile)
inspector=inspect(engine)
for table_name in inspector.get_table_names():
for column in inspector.get_columns(table_name):
#print("Column: %s" % column['name'])
print (table_name+" : "+column['name'])
Session = sessionmaker(bind=engine)
self.session = Session()
def getTable(self,name):
table=self.session.query(name).all()
return table
if __name__ == '__main__':
test=read(sqlFile)
test.getTable('ComponentSizes')
The error you are getting is suggestive of what is going wrong. Your code is translating into SQL - SELECT ComponentSizes which is incomplete. It's not clear for what is your end goal. If you want to extract contents of a table into CSV, you could do this:
import sqlite3
import csv
con = sqlite3.connect('mydatabase.db')
outfile = open('mydump.csv', 'wb')
outcsv = csv.writer(outfile)
cursor = con.execute('select * from ComponentSizes')
# dump column titles (optional)
outcsv.writerow(x[0] for x in cursor.description)
# dump rows
outcsv.writerows(cursor.fetchall())
outfile.close()
Else, if you want contents of the table into a pandas df for further analysis, you could choose to do this:
import sqlite3
import pandas as pd
# Create your connection.
cnx = sqlite3.connect('file.db')
df = pd.read_sql_query("SELECT * FROM ComponentSizes", cnx)
Hope it helps. Happy coding!

Python throwing an error - String indices must be integers

I'm working on my python script using the version 2.6 for XBMC media application.
I have got a problem with my python script, I'm trying to pull the data off from the sqlite3 database but I'm getting an error TypeError: string indices must be integers.
The error are jumping on this line:
programming = channelMap[row['channel']], row["title"], row["start_date"], row["stop_date"]
Here is the full code:
import xbmc
import xbmcgui
import xbmcaddon
import os
import urllib2
import StringIO
import sqlite3
from sqlite3 import dbapi2 as database
from xml.etree import ElementTree
import xml.etree.ElementTree as ET
from UserDict import DictMixin
import datetime
import time
class MyClass(xbmcgui.WindowXML):
def onAction(self, action):
#DOWNLOAD THE XML SOURCE HERE
url = ADDON.getSetting('allchannels.url')
req = urllib2.Request(url)
response = urllib2.urlopen(req)
data = response.read()
response.close()
profilePath = xbmc.translatePath(os.path.join('special://userdata/addon_data/script.tvguide', ''))
if os.path.exists(profilePath):
profilePath = profilePath + 'source.db'
con = database.connect(profilePath)
cur = con.cursor()
cur.execute('CREATE TABLE programs(channel TEXT, title TEXT, start_date TIMESTAMP, stop_date TIMESTAMP, description TEXT)')
con.commit()
con.close
tv_elem = ElementTree.parse(StringIO.StringIO(data)).getroot()
profilePath = xbmc.translatePath(os.path.join('special://userdata/addon_data/script.tvguide', ''))
profilePath = profilePath + 'source.db'
con = sqlite3.connect(profilePath)
cur = con.cursor()
channels = OrderedDict()
# Get the loaded data
for channel in tv_elem.findall('channel'):
channel_name = channel.find('display-name').text
for program in channel.findall('programme'):
title = program.find('title').text
start_time = program.get("start")
stop_time = program.get("stop")
cur.execute("INSERT INTO programs(channel, title, start_date, stop_date)" + " VALUES(?, ?, ?, ?)", [channel_name, title, start_time, stop_time])
con.commit()
print 'Channels store into database are now successfully!'
cur.execute('SELECT channel, title, start_date, stop_date FROM programs')
programList = list()
channelMap = dict()
results = cur.fetchall()
cur.close
for channel_result in results:
for row in channel_result:
programming = channelMap[row['channel']], row["title"], row["start_date"], row["stop_date"]
print(programming)
I keep getting a same request of error in my XBMC log.
EDIT: When I try this:
programList = list()
channelMap = dict()
for c in channels:
if c.id:
channelMap[c.id] = c
strCh = '(\'' + '\',\''.join(channelMap.keys()) + '\')'
cur.execute('SELECT * FROM programs WHERE channel')
for row in cur:
programming = program(channelMap[row['channel']], row["title"], row["start_date"], row["stop_date"])
programList.append(programming)
print(programming)
Here is the error on the xbmc log:
- NOTE: IGNORING THIS CAN LEAD TO MEMORY LEAKS!
Error Type: <type 'exceptions.TypeError'>
Error Contents: tuple indices must be integers, not str
Traceback (most recent call last):
File "C:\Users\user\AppData\Roaming\XBMC\addons\script.tvguide\test.py", line 1679, in onAction
programming = program(channelMap[row['channel']], row["title"], row["start_date"], row["stop_date"])
TypeError: tuple indices must be integers, not str
-->End of Python script error report<--
You are looping over each row in the result, then over each column. The columns are strings:
for channel_result in results:
for row in channel_result:
So channel_result is a row (a tuple by default), then you loop over that with for row in channel_result. This makes each row object a single column value.
You appear to expect row to be a dictionary instead; that is not the case here. You could just print the row directly; the columns are listed in the same order as the original SELECT:
for row in results:
programming = (channelMap[row[0]],) + row[1:]
If you really wanted a dictionary for each row, you'll have to tell sqlite3 to so by setting the row_factory attribute on the connection:
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
con = sqlite3.connect(profilePath)
con.row_factory = dict_factory
after which you use the one loop:
for row in results:
and row will be a dictionary with keys corresponding to the column names.
If I see it right, the error is pretty precise. Your return value should be a list of strings. And as such, you need to user integers for indices in that line. Python just simply doesn't know what to do with row['channel'], because there's no index 'channel' defined for row.
The easiest is probably to try and print the value of row there, then you should be able to debug it.
Here are a the docs: https://docs.python.org/2/library/sqlite3.html#sqlite3.Cursor

Writing a csv file into SQL Server database using python

I am trying to write a csv file into a table in SQL Server database using python. I am facing errors when I pass the parameters , but I don't face any error when I do it manually. Here is the code I am executing.
cur=cnxn.cursor() # Get the cursor
csv_data = csv.reader(file(Samplefile.csv')) # Read the csv
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",rows)
cnxn.commit()
Error:
pyodbc.DataError: ('22001', '[22001] [Microsoft][ODBC SQL Server Driver][SQL Server]String or binary data would be truncated. (8152) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)')
However when I insert the values manually. It works fine
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)",'A','B','C','D')
I have ensured that the TABLE is there in the database, data types are consistent with the data I am passing. Connection and cursor are also correct. The data type of rows is "list"
Consider building the query dynamically to ensure the number of placeholders matches your table and CSV file format. Then it's just a matter of ensuring your table and CSV file are correct, instead of checking that you typed enough ? placeholders in your code.
The following example assumes
CSV file contains column names in the first line
Connection is already built
File name is test.csv
Table name is MyTable
Python 3
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
columns = next(reader)
query = 'insert into MyTable({0}) values ({1})'
query = query.format(','.join(columns), ','.join('?' * len(columns)))
cursor = connection.cursor()
for data in reader:
cursor.execute(query, data)
cursor.commit()
If column names are not included in the file:
...
with open ('test.csv', 'r') as f:
reader = csv.reader(f)
data = next(reader)
query = 'insert into MyTable values ({0})'
query = query.format(','.join('?' * len(data)))
cursor = connection.cursor()
cursor.execute(query, data)
for data in reader:
cursor.execute(query, data)
cursor.commit()
I modified the code written above by Brian as follows since the one posted above wouldn't work on the delimited files that I was trying to upload. The line row.pop() can also be ignored as it was necessary only for the set of files that I was trying to upload.
import csv
def upload_table(path, filename, delim, cursor):
"""
Function to upload flat file to sqlserver
"""
tbl = filename.split('.')[0]
cnt = 0
with open (path + filename, 'r') as f:
reader = csv.reader(f, delimiter=delim)
for row in reader:
row.pop() # can be commented out
row = ['NULL' if val == '' else val for val in row]
row = [x.replace("'", "''") for x in row]
out = "'" + "', '".join(str(item) for item in row) + "'"
out = out.replace("'NULL'", 'NULL')
query = "INSERT INTO " + tbl + " VALUES (" + out + ")"
cursor.execute(query)
cnt = cnt + 1
if cnt % 10000 == 0:
cursor.commit()
cursor.commit()
print("Uploaded " + str(cnt) + " rows into table " + tbl + ".")
You can pass the columns as arguments. For example:
for rows in csv_data: # Iterate through csv
cur.execute("INSERT INTO MyTable(Col1,Col2,Col3,Col4) VALUES (?,?,?,?)", *rows)
If you are using MySqlHook in airflow , if cursor.execute() with params throw san error
TypeError: not all arguments converted during string formatting
use %s instead of ?
with open('/usr/local/airflow/files/ifsc_details.csv','r') as csv_file:
csv_reader = csv.reader(csv_file)
columns = next(csv_reader)
query = '''insert into ifsc_details({0}) values({1});'''
query = query.format(','.join(columns), ','.join(['%s'] * len(columns)))
mysql = MySqlHook(mysql_conn_id='local_mysql')
conn = mysql.get_conn()
cursor = conn.cursor()
for data in csv_reader:
cursor.execute(query, data)
cursor.commit()
I got it sorted out. The error was due to the size restriction restriction of table. It changed the column capacity like from col1 varchar(10) to col1 varchar(35) etc. Now it's working fine.
Here is the script and hope this works for you:
import pandas as pd
import pyodbc as pc
connection_string = "Driver=SQL Server;Server=localhost;Database={0};Trusted_Connection=Yes;"
cnxn = pc.connect(connection_string.format("DataBaseNameHere"), autocommit=True)
cur=cnxn.cursor()
df= pd.read_csv("your_filepath_and_filename_here.csv").fillna('')
query = 'insert into TableName({0}) values ({1})'
query = query.format(','.join(df.columns), ','.join('?' * len(df1.columns)))
cur.fast_executemany = True
cur.executemany(query, df.values.tolist())
cnxn.close()
You can also import data into SQL by using either:
The SQL Server Import and Export Wizard
SQL Server Integration Services (SSIS)
The OPENROWSET function
More details can be found on this webpage:
https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-data-from-excel-to-sql?view=sql-server-2017

Categories