I want to create a class in Python that establishes a connection to SnowFlake. I have a user.txt file that specifies an account, warehouse, database, schema, and my user. Here is the code I have so far:
import pandas as pd
import snowflake.connector
import os
from getpass import getpass
import sfcommon.sfdb as sf
class Database:
def __init__(self):
self.environ = os.environ['SFPASSWORD'] = getpass(prompt='Password: ', stream=None)
self._conn = sf.get_connect()
self._cursor = self._conn.cursor()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
#property
def connection(self):
return self._conn
#property
def cursor(self):
return self._cursor
def commit(self):
self.connection.commit()
def close(self, commit=True):
if commit:
self.commit()
self.connection.close()
def execute(self, sql, params=None):
self.cursor.execute(sql, params or ())
def fetchall(self):
return self.cursor.fetchall()
def fetchone(self):
return self.cursor.fetchone()
def query(self, sql, params=None):
self.cursor.execute(sql, params or ())
return self.fetchall()
sql = "select * from test_database"
After running this class, I would hope to run this block of code to retrieve all the rows in my database in a pandas dataframe:
with Database() as test:
resultSet = pd.read_sql(sql, conn)
I have managed to create a connection to Snowflake but have been trying to get it into a class for easier readability.
Also, I have this block of code that I'm not sure how to integrate into my Database class
%reload_ext sql_magic
%config SQL.conn_name = 'conn'
You can use the below code for dataframe .
---------------------------------------------------------------------------------
import snowflake.connector
import pandas as pd
# creates a connection
def openConn():
ctx = snowflake.connector.connect(
account='',
user='',
password='',
database='',
schema='public',
warehouse='',
role='',
)
return ctx
def main():
conn = openConn()
cs = conn.cursor()
cs.execute("""<query>""")
# Put it all to a data frame
sql_data = pd.DataFrame(cs.fetchall())
print(sql_data)
if __name__ == "__main__":
main()
------------------------------------------------------------------------------
Related
I was trying to create a wrapper class that used enter/exit to allow for a context manager to close a postgres db connection.
class ClosingConnection:
def __init__(self, schema_name: str) -> None:
"""
:param schema_name: the db schema (i.e. the tenant)
"""
super().__init__()
self.schema_name = schema_name
def __enter__(self):
try:
self.conn = psycopg2.connect(
host=os.environ["DB_HOST"],
port=os.environ["DB_PORT"],
database=os.environ["DB_NAME"],
user=os.environ["DB_USERNAME"],
password=password,
options=f"-c search_path={self.schema_name}",
cursor_factory=psycopg2.extras.DictCursor,
)
return self.conn
except psycopg2.OperationalError:
pass
def __exit__(self, exc_type, exc_val, exc_tb):
self.conn.close()
Usage
def get_db(schema_name: str):
return ClosingConnection(schema_name)
with get_db("test") as db:
cursor = db.cursor()
cursor.execute("some sql")
rv = cursor.fetchall()
cursor.close()
This works fine to retrieve data, but if I do an upsert, only the first upsert is performed. The remaining ones rv returns correctly, but the actual database is never updated.
In contrast if I get rid of the closing class, this works fine
Usage
def get_db(schema_name: str):
try:
self.conn = psycopg2.connect(
host=os.environ["DB_HOST"],
port=os.environ["DB_PORT"],
database=os.environ["DB_NAME"],
user=os.environ["DB_USERNAME"],
password=password,
options=f"-c search_path={self.schema_name}",
cursor_factory=psycopg2.extras.DictCursor,
)
return self.conn
except psycopg2.OperationalError:
pass
conn = get_db("test")
try:
cursor = db.cursor()
cursor.execute("some sql")
rv = cursor.fetchall()
cursor.close()
finally:
conn.close()
For curiosity here is the upsert command I'm using (I've also tried the older with upsert version)
INSERT INTO settings_account (setting_name, setting_value)
VALUES (%(setting_name)s, %(setting_value)s)
ON CONFLICT (setting_name)
DO
UPDATE SET setting_value=EXCLUDED.setting_value
RETURNING *
Why is the wrapper class causing the db writes to fail? Is the idea of wrapper class flawed?
Thanks the comment above, i was able to get this working with:
class ClosingConnection:
def __init__(self, schema_name: str) -> None:
"""
:param schema_name: the db schema (i.e. the tenant)
"""
super().__init__()
self.schema_name = schema_name
def __enter__(self):
try:
self.conn = psycopg2.connect(
host=os.environ["DB_HOST"],
port=os.environ["DB_PORT"],
database=os.environ["DB_NAME"],
user=os.environ["DB_USERNAME"],
password=password,
options=f"-c search_path={self.schema_name}",
cursor_factory=psycopg2.extras.DictCursor,
)
return self.conn
except psycopg2.OperationalError:
pass
def __exit__(self, exc_type, exc_val, exc_tb):
self.conn.commit()
self.conn.close()
def get_db(schema_name: str):
return ClosingConnection(schema_name)
with get_db("test") as db:
with db.cursor() as cursor:
cursor.execute("some sql")
rv = cursor.fetchall()
I've got class to connect to my Database.
import psycopg2, psycopg2.extensions
from parseini import config
import pandas as pd, pandas.io.sql as sqlio
class MyDatabase:
def __init__(self, name='mydb.ini'):
self.params = config(filename=name)
self.my_connection = psycopg2.connect(**self.params)
self.my_cursor = self.my_connection.cursor()
def fetch_all_as_df(self, sql_statement):
return sqlio.read_sql_query(sql_statement, self.my_connection)
def df_to_sql(self, df):
table = 'sometable'
return sqlio.to_sql(df, table, self.my_connection)
def __del__(self):
self.my_cursor.close()
self.my_connection.close()
How could I reconnect to database and handle psycopg2.OperationalError in my case?
You could make a decorator that tries to reconnect when psycopg2.InterfaceError or psycopg2.OperationalError are raised.
That's just an example how it could work and probably needs adjustments:
import time
from functools import wraps
import psycopg2, psycopg2.extensions
def retry(fn):
#wraps(fn)
def wrapper(*args, **kw):
cls = args[0]
for x in range(cls._reconnectTries):
print(x, cls._reconnectTries)
try:
return fn(*args, **kw)
except (psycopg2.InterfaceError, psycopg2.OperationalError) as e:
print ("\nDatabase Connection [InterfaceError or OperationalError]")
print ("Idle for %s seconds" % (cls._reconnectIdle))
time.sleep(cls._reconnectIdle)
cls._connect()
return wrapper
class MyDatabase:
_reconnectTries = 5
_reconnectIdle = 2 # wait seconds before retying
def __init__(self, name='mydb.ini'):
self.my_connection = None
self.my_cursor = None
self.params = config(filename=name)
self._connect()
def _connect(self):
self.my_connection = psycopg2.connect(**self.params)
self.my_cursor = self.my_connection.cursor()
#retry
def fetch_all_as_df(self, sql_statement):
return sqlio.read_sql_query(sql_statement, self.my_connection)
#retry
def dummy(self):
self.my_cursor.execute('select 1+2 as result')
return self.my_cursor.fetchone()
#retry
def df_to_sql(self, df):
table = 'sometable'
return sqlio.to_sql(df, table, self.my_connection)
def __del__(self):
# Maybe there is a connection but no cursor, whatever close silently!
for c in (self.my_cursor, self.my_connection):
try:
c.close()
except:
pass
db = MyDatabase()
time.sleep(30) # some time to shutdown the database
print(db.dummy())
Output:
Database Connection [InterfaceError or OperationalError]
Idle for 2 seconds
Database Connection [InterfaceError or OperationalError]
Idle for 2 seconds
Database Connection [InterfaceError or OperationalError]
Idle for 2 seconds
Database Connection [InterfaceError or OperationalError]
Idle for 2 seconds
(3,)
Note: _connect itself is not decorated, so this code assumes an initial connect always works!
I'm new at Python and I'm trying to build a start project to get into this language.
I created a SQLite3 DB and managed to make transactions with it.
Everything works fine.
I wanted to get deeper in Python so I've been searching and discovered Decorators and Context Manager and I was trying to implement these concepts on my Query Execution's functions. However, I'm stuck with a problem.
I've created a class that handles the open and close connection tasks.
DB_ContextManager.py class:
class DB_ContextManager():
def __init__(self, db_connection):
self.db_connection = db_connection
def __enter__(self):
self.conn = sqlite3.connect(self.db_connection)
return self.conn
def __exit__(self, exc_type, exc_val, exc_tb): # obligatory params
self.conn.close()
And also created ConnectionDB.py which is responsible for executing queries.
from Database.DB_ContextManager import DB_ContextManager as DB_CM
# Handles SELECT queries
def ExecuteSelectQuery(self, pQuery):
try:
with DB_CM(db_connection_string) as conn:
cur = conn.cursor()
cur.execute(pQuery)
result = cur.fetchall()
return result
except Exception as e:
LH.Handler(log_folder, 'ConnectionDB', 'Queries', 'ExecuteSelectQuery', e)
raise DE.ConnectionDB_Exception()
# Handles INSERTs, UPDATEs, DELETEs queries
def ExecuteNonQuery(self, pQuery):
try:
with DB_CM(db_connection_string) as conn:
cur = conn.cursor()
cur.execute(pQuery)
except Exception as e:
LH.Handler(log_folder, 'ConnectionDB', 'Queries', 'ExecuteSelectNonQuery', e)
raise DE.ConnectionDB_Exception()
As you can see
with DB_CM(db_connection_string) as conn:
cur = conn.cursor()
cur.execute(pQuery)
is repeated in each function
To avoid this situation, I'd like to create a Decorator function that encapsulates this piece of code.
My problem is that the cursor 'dies' inside the ContextManager and, for example, ExecuteSelectQuery needs the cursor to fetch the return data after the query was executed.
I know it's a small project and thinking so long term in future may not be necessary. But, remember, it's a start project and I'm learning to apply new concepts.
SOLUTION
As #blhsing suggested, I return the connection object instead of the cursor in the ContextManager.
Also I handles commit() and rollback() in it.
So, summarizing:
ConnectionDB.py
def ExecuteSelectQuery(self, pQuery):
with DB_CM(db_connection_string, pQuery) as cur:
result = cur.fetchall()
return result
def ExecuteSelectNonQuery(self, pQuery):
with DB_CM(db_connection_string, pQuery) as cur:
pass
and ConnectionDB.py
class DB_ContextManager():
def __init__(self, db_connection, pQuery):
self.db_connection = db_connection
self.query = pQuery
def __enter__(self):
try:
self.conn = sqlite3.connect(self.db_connection)
cur = self.conn.cursor()
cur.execute(self.query)
self.conn.commit()
return cur
except Exception as e:
LH.Handler(log_folder, 'DB_ContextManager', 'DB_ContextManager', '__enter__', e)
self.conn.rollback()
raise DE.ConnectionDB_Exception()
def __exit__(self, exc_type, exc_val, exc_tb): # obligatory params
self.conn.close()
You can make the context manager return the cursor instead of the connection object:
class DB_CM():
def __init__(self, db_connection):
self.db_connection = db_connection
def __enter__(self):
self.conn = sqlite3.connect(self.db_connection)
cur = self.conn.cursor()
cur.execute(pQuery)
return cur
def __exit__(self, exc_type, exc_val, exc_tb): # obligatory params
self.conn.close()
so that the try block of ExecuteSelectQuery can be revised as:
with DB_CM(db_connection_string) as cur:
result = cur.fetchall()
return result
and the try block of ExecuteNonQuery can be simply:
with DB_CM(db_connection_string):
pass
I wrote a method in a class that tries to read some data from a database. I've also decorated this method with a decorator which checks if the connection to the database was open, and in case it was not, restarts it.
class HiveConnection(object):
def __init__(self, host, user, password):
"""Instantiate a HiveConnector object."""
self.host = host
self.user = user
self.password = password
self.port = 10000
self.auth_mechanism = 'PLAIN'
self._connection = self._connect()
def _connect(self):
"""Start the connection to database."""
try:
return connect(host=self.host, port=self.port,
user=self.user, password=self.password,
auth_mechanism=self.auth_mechanism)
except TTransportException as error:
print('Failed attempt to connect')
self._connect()
def _disconnect(self):
"""Close connection to database."""
self._connection.close()
def hadoop_connection_handler(function):
"""Start a database connection if not already open."""
#wraps(function)
def wrapper(inst, *args, **kwargs):
if not inst._connection:
inst._connect()
return function(inst, *args, **kwargs)
return wrapper
#hadoop_connection_handler
def read(self, query):
"""Execute a query to pull the data.
Args:
query: [str] Query to pull the data.
Returns:
A list of namedtuple (`Row`).
"""
columns = self._columns(query)
cursor = self._connection.cursor()
cursor.execute(query)
Record = namedtuple("Record", columns)
data = map(Record._make, cursor.fetchall())
cursor.close()
return data
Now I want to write a unit test to make sure this actually works.
from unittest.mock import patch, MagicMock
from nose.tools import assert_equal, raises
from services.db_connections import HiveConnection
class TestHiveConnection:
"""Integration test suite for HiveConnection class."""
def setUp(self):
self.hive = HiveConnection(user='username', password='password', host='myhost.net')
def test_reconnect(self):
"""If the connection drops, the object should be able to establish a
new connection.
"""
query = 'SELECT * FROM database.table 1'
self.hive._connect = MagicMock()
self.hive._disconnect()
self.hive.read(query)
assert_equal(self.hive._connect.called, True)
The above test always fails. self.hive._connect.called equals in fact to False. This I think it's because the connect() method is called within the decorator. How should I change the test to account for that?
i was planning to change my project to multiprocesses so i can use more resources,here's my database module code
import pymysql
import threading
class tdb:
def __init__(self):
self.totalEffected = 0
pass
def start(self):
self.conn = pymysql.connect(host='xxxx', port=3306, user='root', passwd='xxxx', db='xxxx', charset='utf8')
def select(self,sql,args=None):
cur = self.conn.cursor()
cur.execute(sql,args)
result = cur.fetchall()
cur.close()
return result
def execute(self,sql,args=None):
cur = self.conn.cursor()
result = cur.execute(sql,args)
cur.close()
self.totalEffected+=result
return result
# def __commit(self,callback):
def __commitCallback(self,result):
print('commit result:',result)
self.conn.close()
def errorc(self,*args):
print('error')
def end(self):
# init()
# p.apply_async(self.conn.commit, callback=self.__commitCallback,error_callback=self.errorc)
if self.totalEffected!=0:
thread = threading.Thread(target=self.t)
thread.start()
else:
self.conn.close()
# p.apply(self.conn.commit)
# self.conn.close()
# print('result:' ,result.get())
def t(self):
self.conn.commit()
self.conn.close()
the only operation that really need to handle is conn.commit(), i use thread to do it ,so i can immediately return. i once use Pool.apply_async(),but it didn't callback, so i want to know how to make the other process call me , so i don't have to spend my time waiting recieve.