MS Access database locked by Python - python

I'm writing a dataframe to an MS Access database using Python. It works, but I can't seem to close the connection (i.e. the database is locked by Python).
Here's my code:
# imports
import urllib
import pyodbc
import pandas as pd
from sqlalchemy import create_engine
# connection
connection_string = (
r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
r'DBQ=M:\Larry\Access\test2.accdb;'
r'ExtendedAnsiSQL=1;'
)
connection_uri = f"access+pyodbc:///?odbc_connect={urllib.parse.quote_plus(connection_string)}"
engine = create_engine(connection_uri)
# create dataframe
d = {'name':['daryl','other daryl'],'team':['a','b']}
df = pd.DataFrame(d)
# write to database
df.to_sql('test',engine,if_exists='replace',index=False)
I tried adding this after the create engine line:
conn = engine.connect()
and this after the dataframe is written to the database:
conn.close()
I don't get an error from these additions but the database is still locked. How do I close the connection so that the lock is removed?

Since you are maintaining a connection instance of the database with engine object, after operations, consider Engine.dispose() to release the resource:
...
# write to database
df.to_sql('test', engine, if_exists='replace', index=False)
engine.dispose()

Related

Error when trying to write data to a Datamart database using pyodbc [duplicate]

I am attempting to write a Python script that can take Excel sheets and import them into my SQL Server Express (with Windows Authentication) database as tables. To do this, I am using pandas to read the Excel files into a pandas DataFrame, I then hope to use pandas.to_sql() to import the data into my database. To use this function, however, I need to use sqlalchemy.create_engine().
I am able to connect to my database using pyodbc alone, and run test queries. This conection is done with the followng code:
def create_connection(server_name, database_name):
config = dict(server=server_name, database= database_name)
conn_str = ('SERVER={server};DATABASE={database};TRUSTED_CONNECTION=yes')
return pyodbc.connect(r'DRIVER={ODBC Driver 13 for SQL Server};' + conn_str.format(**config))
...
server = '<MY_SERVER_NAME>\SQLEXPRESS'
db = '<MY_DATABASE_NAME>
connection = create_connection(server, db)
cursor = connection.cursor()
cursor.execute('CREATE VIEW test_view AS SELECT * FROM existing_table')
cursor.commit()
However, this isn't much use as I can't use pandas.to_sql() - to do so I need an engine from sqlalchemy.create_engine(), but I am struggling to figure out how to use my same details in my create_connection() function above to successfully create an engine and connect to the database.
I have tried many, many combinations along the lines of:
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?driver={ODBC Driver 13 for SQL Server}?trusted_connection=yes")
conn = engine.connect().connection
or
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?trusted_connection=yes")
conn = engine.connect().connection
A Pass through exact Pyodbc string works for me:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
connection_string = (
r"Driver=ODBC Driver 17 for SQL Server;"
r"Server=(local)\SQLEXPRESS;"
r"Database=myDb;"
r"Trusted_Connection=yes;"
)
connection_url = URL.create(
"mssql+pyodbc",
query={"odbc_connect": connection_string}
)
engine = create_engine(connection_url)
df = pd.DataFrame([(1, "foo")], columns=["id", "txt"])
pd.to_sql("test_table", engine, if_exists="replace", index=False)

Error when creating sqlalchemy engine: Driver keyword syntax error (IM012) [duplicate]

I am attempting to write a Python script that can take Excel sheets and import them into my SQL Server Express (with Windows Authentication) database as tables. To do this, I am using pandas to read the Excel files into a pandas DataFrame, I then hope to use pandas.to_sql() to import the data into my database. To use this function, however, I need to use sqlalchemy.create_engine().
I am able to connect to my database using pyodbc alone, and run test queries. This conection is done with the followng code:
def create_connection(server_name, database_name):
config = dict(server=server_name, database= database_name)
conn_str = ('SERVER={server};DATABASE={database};TRUSTED_CONNECTION=yes')
return pyodbc.connect(r'DRIVER={ODBC Driver 13 for SQL Server};' + conn_str.format(**config))
...
server = '<MY_SERVER_NAME>\SQLEXPRESS'
db = '<MY_DATABASE_NAME>
connection = create_connection(server, db)
cursor = connection.cursor()
cursor.execute('CREATE VIEW test_view AS SELECT * FROM existing_table')
cursor.commit()
However, this isn't much use as I can't use pandas.to_sql() - to do so I need an engine from sqlalchemy.create_engine(), but I am struggling to figure out how to use my same details in my create_connection() function above to successfully create an engine and connect to the database.
I have tried many, many combinations along the lines of:
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?driver={ODBC Driver 13 for SQL Server}?trusted_connection=yes")
conn = engine.connect().connection
or
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?trusted_connection=yes")
conn = engine.connect().connection
A Pass through exact Pyodbc string works for me:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
connection_string = (
r"Driver=ODBC Driver 17 for SQL Server;"
r"Server=(local)\SQLEXPRESS;"
r"Database=myDb;"
r"Trusted_Connection=yes;"
)
connection_url = URL.create(
"mssql+pyodbc",
query={"odbc_connect": connection_string}
)
engine = create_engine(connection_url)
df = pd.DataFrame([(1, "foo")], columns=["id", "txt"])
pd.to_sql("test_table", engine, if_exists="replace", index=False)

pyodbc.connect() works, but not sqlalchemy.create_engine().connect()

I am attempting to write a Python script that can take Excel sheets and import them into my SQL Server Express (with Windows Authentication) database as tables. To do this, I am using pandas to read the Excel files into a pandas DataFrame, I then hope to use pandas.to_sql() to import the data into my database. To use this function, however, I need to use sqlalchemy.create_engine().
I am able to connect to my database using pyodbc alone, and run test queries. This conection is done with the followng code:
def create_connection(server_name, database_name):
config = dict(server=server_name, database= database_name)
conn_str = ('SERVER={server};DATABASE={database};TRUSTED_CONNECTION=yes')
return pyodbc.connect(r'DRIVER={ODBC Driver 13 for SQL Server};' + conn_str.format(**config))
...
server = '<MY_SERVER_NAME>\SQLEXPRESS'
db = '<MY_DATABASE_NAME>
connection = create_connection(server, db)
cursor = connection.cursor()
cursor.execute('CREATE VIEW test_view AS SELECT * FROM existing_table')
cursor.commit()
However, this isn't much use as I can't use pandas.to_sql() - to do so I need an engine from sqlalchemy.create_engine(), but I am struggling to figure out how to use my same details in my create_connection() function above to successfully create an engine and connect to the database.
I have tried many, many combinations along the lines of:
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?driver={ODBC Driver 13 for SQL Server}?trusted_connection=yes")
conn = engine.connect().connection
or
engine = create_engine("mssql+pyodbc://#C<MY_SERVER_NAME>\SQLEXPRESS/<MY_DATABASE_NAME>?trusted_connection=yes")
conn = engine.connect().connection
A Pass through exact Pyodbc string works for me:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
connection_string = (
r"Driver=ODBC Driver 17 for SQL Server;"
r"Server=(local)\SQLEXPRESS;"
r"Database=myDb;"
r"Trusted_Connection=yes;"
)
connection_url = URL.create(
"mssql+pyodbc",
query={"odbc_connect": connection_string}
)
engine = create_engine(connection_url)
df = pd.DataFrame([(1, "foo")], columns=["id", "txt"])
pd.to_sql("test_table", engine, if_exists="replace", index=False)

Python pandas to_sql 'append'

I am trying to send monthly data to a MySQL database using Python's pandas to_sql command. My program runs one month of data at a time and I want to append the new data onto the existing database. However, Python gives me an error:
_mysql_exceptions.OperationalError: (1050, "Table 'cps_basic_tabulation' already exists")
Here is my code for connecting and exporting:
conn = MySQLdb.connect(host = config.get('db', 'host'),
user = config.get('db', 'user'),
passwd = config.get('db', 'password'),
db = 'cps_raw')
combined.to_sql(name = "cps_raw.cps_basic_tabulation",
con = conn,
flavor = 'mysql',
if_exists = 'append')
I have also tried using:
from sqlalchemy import create_engine
Replacing conn = MySQLdb.connect... with:
engine = mysql+mysqldb://<user>:<password>#<host>[:<port>]/<dbname>
conn = engine.connect().connection
Any ideas on why I cannot append to a database?
Thanks!
Starting from pandas 0.14, you have to provide directly the sqlalchemy engine, and not the connection object:
engine = create_engine("mysql+mysqldb://<user>:<password>#<host>[:<port>]/<dbname>")
combined.to_sql("cps_raw.cps_basic_tabulation", engine, if_exists='append')
Since I had the same error message and stumbled across this post I leave this here for others to find.
I found two ways to solve the duplicated table creation although I lack the insight as to why this solves it:
Either pass the database name in the url when creating a connection
or pass the database name as a schema in pd.to_sql.
Doing both does not hurt. Also, a few years later it is (again?) possible to pass the pure connection to pandas. My guess would be that in the previous answer by joris the first of my solution cases might have implicitly solved the problem.
```
#create connection to MySQL DB via sqlalchemy & pymysql
user = credentials['user']
password = credentials['password']
port = credentials['port']
host = credentials['hostname']
dialect = 'mysql'
driver = 'pymysql'
db_name = 'test_db'
# setup SQLAlchemy
from sqlalchemy import create_engine
cnx = f'{dialect}+{driver}://{user}:{password}#{host}:{port}/'
engine = create_engine(cnx)
# create database
with engine.begin() as con:
con.execute(f"CREATE DATABASE {db_name}")
############################################################
# either pass the db_name vvvv - HERE- vvvv after creating a database
cnx = f'{dialect}+{driver}://{user}:{password}#{host}:{port}/{db_name}'
############################################################
engine = create_engine(cnx)
table = 'test_table'
col = 'test_col'
with engine.begin() as con:
# this would work here instead of creating a new engine with a new link
# con.execute(f"USE {db_name}")
con.execute(f"CREATE TABLE {table} ({col} CHAR(1));")
# insert into database
import pandas as pd
df = pd.DataFrame({col : ['a','b','c']})
with engine.begin() as con:
# this has no effect here
# con.execute(f"USE {db_name}")
df.to_sql(
name= table,
if_exists='append',
# passing con = cnx here would equally work
con=con,
############################################################
# or pass it as a schema vvvv - HERE - vvvv
#schema=db_name,
############################################################
index=False
)```
Tested with python version 3.8.13, sqlalchemy 1.4.32 and pandas 1.4.2.
Same problem might have appeared here and here.

Get data from pandas into a SQL server with PYODBC

I am trying to understand how python could pull data from an FTP server into pandas then move this into SQL server. My code here is very rudimentary to say the least and I am looking for any advice or help at all. I have tried to load the data from the FTP server first which works fine.... If I then remove this code and change it to a select from ms sql server it is fine so the connection string works, but the insertion into the SQL server seems to be causing problems.
import pyodbc
import pandas
from ftplib import FTP
from StringIO import StringIO
import csv
ftp = FTP ('ftp.xyz.com','user','pass' )
ftp.set_pasv(True)
r = StringIO()
ftp.retrbinary('filname.csv', r.write)
pandas.read_table (r.getvalue(), delimiter=',')
connStr = ('DRIVER={SQL Server Native Client 10.0};SERVER=localhost;DATABASE=TESTFEED;UID=sa;PWD=pass')
conn = pyodbc.connect(connStr)
cursor = conn.cursor()
cursor.execute("INSERT INTO dbo.tblImport(Startdt, Enddt, x,y,z,)" "VALUES (x,x,x,x,x,x,x,x,x,x.x,x)")
cursor.close()
conn.commit()
conn.close()
print"Script has successfully run!"
When I remove the ftp code this runs perfectly, but I do not understand how to make the next jump to get this into Microsoft SQL server, or even if it is possible without saving into a file first.
For the 'write to sql server' part, you can use the convenient to_sql method of pandas (so no need to iterate over the rows and do the insert manually). See the docs on interacting with SQL databases with pandas: http://pandas.pydata.org/pandas-docs/stable/io.html#io-sql
You will need at least pandas 0.14 to have this working, and you also need sqlalchemy installed. An example, assuming df is the DataFrame you got from read_table:
import sqlalchemy
import pyodbc
engine = sqlalchemy.create_engine("mssql+pyodbc://<username>:<password>#<dsnname>")
# write the DataFrame to a table in the sql database
df.to_sql("table_name", engine)
See also the documentation page of to_sql.
More info on how to create the connection engine with sqlalchemy for sql server with pyobdc, you can find here:http://docs.sqlalchemy.org/en/rel_1_1/dialects/mssql.html#dialect-mssql-pyodbc-connect
But if your goal is to just get the csv data into the SQL database, you could also consider doing this directly from SQL. See eg Import CSV file into SQL Server
Python3 version using a LocalDB SQL instance:
from sqlalchemy import create_engine
import urllib
import pyodbc
import pandas as pd
df = pd.read_csv("./data.csv")
quoted = urllib.parse.quote_plus("DRIVER={SQL Server Native Client 11.0};SERVER=(localDb)\ProjectsV14;DATABASE=database")
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
df.to_sql('TargetTable', schema='dbo', con = engine)
result = engine.execute('SELECT COUNT(*) FROM [dbo].[TargetTable]')
result.fetchall()
Yes, the bcp utility seems to be the best solution for most cases.
If you want to stay within Python, the following code should work.
from sqlalchemy import create_engine
import urllib
import pyodbc
quoted = urllib.parse.quote_plus("DRIVER={SQL Server};SERVER=YOUR\ServerName;DATABASE=YOur_Database")
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
df.to_sql('Table_Name', schema='dbo', con = engine, chunksize=200, method='multi', index=False, if_exists='replace')
Don't avoid method='multi', because it significantly reduces the task execution time.
Sometimes you may encounter the following error.
ProgrammingError: ('42000', '[42000] [Microsoft][ODBC SQL Server
Driver][SQL Server]The incoming request has too many parameters. The
server supports a maximum of 2100 parameters. Reduce the number of
parameters and resend the request. (8003) (SQLExecDirectW)')
In such a case, determine the number of columns in your dataframe: df.shape[1]. Divide the maximum supported number of parameters by this value and use the result's floor as a chunk size.
I found that using bcp utility (https://learn.microsoft.com/en-us/sql/tools/bcp-utility) works best when you have a large dataset. I have 2.7 million rows that inserts at 80K rows/sec. You can store your data frame as csv file (use tabs for separator if your data doesn't have tabs and utf8 encoding). With bcp, I've used format "-c" and it works without issues so far.
This worked for me on Python 3.5.2:
import sqlalchemy as sa
import urllib
import pyodbc
conn= urllib.parse.quote_plus('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
engine = sa.create_engine('mssql+pyodbc:///?odbc_connect={}'.format(conn))
frame.to_sql("myTable", engine, schema='dbo', if_exists='append', index=False, index_label='myField')
"As the Connection represents an open resource against the database, we want to always limit the scope of our use of this object to a specific context, and the best way to do that is by using Python context manager form, also known as the with statement."
https://docs.sqlalchemy.org/en/14/tutorial/dbapi_transactions.html
The example would then be
from sqlalchemy import create_engine
import urllib
import pyodbc
connection_string = (
"Driver={SQL Server Native Client 11.0};"
"Server=myserver;"
"UID=myuser;"
"PWD=mypwd;"
"Database=mydb;"
)
quoted = urllib.parse.quote_plus(connection_string)
engine = create_engine(f'mssql+pyodbc:///?odbc_connect={quoted}')
with engine.connect() as cnn:
df.to_sql('mytable',con=cnn, if_exists='replace', index=False)
Following is what worked for me using sqlalchemy. Pay attention to the last part ?driver=SQL+Server'.
import sqlalchemy
import pyodbc
engine = sqlalchemy.create_engine('mssql+pyodbc://MyUser:MyPWD#dataserver.sandbox.myserver/MY_DB?driver=SQL+Server')
dt.to_sql("PatientResultTest", engine,if_exists='append')
The SQL table needs an index column at the beginning to store the index value of dataframe.
# using class function
import pandas as pd
import pyodbc
import sqlalchemy
import urllib
class data_frame_to_sql():
def__init__(self,dataFrame,sql_table_name):
self.dataFrame=dataFrame
self.sql_table_name=sql_table_name
def conversion(self):
params = urllib.parse.quote_plus("DRIVER={SQL Server};"
"SERVER=######;"
"DATABASE=####;"
"UID=#####;"
"PWD=###;")
try:
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect={}".format(params))
return f"Table '{self.sql_table_name}' added sucsessfully in database" ,self.dataFrame.to_sql(self.sql_table_name, engine)
except Exception as e :
e=str(e).replace(".","")
print(f"{e} in Database." )
data={"BusinessEntityID":["1","2","3"],"FirstName":["raj","abhi","amir"],"LastName":["kapoor","bachn","khhan"]}
df = pd.DataFrame(data, columns= ['BusinessEntityID','FirstName','LastName'])
ab=data_frame_to_sql(df,"ab").conversion()
print(ab)
It's not necessary to use sqlamchemy, one could create a connection with pyodbc directly to use it with pandas, as below: `with pyodbc.connect('DRIVER={ODBC Driver 18 for SQL Server};SERVER='+server
+';DATABASE='+database+';UID='+username+';PWD='+ password) as newconn:
df = pd.read_sql(,newconn)
`

Categories