How to create a SQLite Table with a JSON column using SQLAlchemy? - python

According to this answer SQLite supports JSON data since version 3.9. I use version 3.24 in combination with SQLALchemy (1.2.8) and Python 3.6, but I cannot create any tables containing JSON columns.
What am I missing or doing wrong? A minimal (not) working example is given below:
import sqlalchemy as sa
import os
import tempfile
metadata = sa.MetaData()
foo = sa.Table(
'foo',
metadata,
sa.Column('bar', sa.JSON)
)
tmp_dir = tempfile.mkdtemp()
dbname = os.path.join(tmp_dir, 'foo.db')
engine = sa.create_engine('sqlite:////' + dbname)
metadata.bind = engine
metadata.create_all()
This fails giving the following error:
sqlalchemy.exc.CompileError: (in table 'foo', column 'bar'): Compiler <sqlalchemy.dialects.sqlite.base.SQLiteTypeCompiler object at 0x7f1eae1dab70> can't render element of type <class 'sqlalchemy.sql.sqltypes.JSON'>
Thanks!

Use a TEXT column. Sqlite has a JSON extension with some functions for working with JSON data, but no dedicated JSON type.

Related

AttributeError: 'pyodbc.Cursor' object has no attribute 'dialect'

I am trying to create table in database as this is my connection as the below code:
# pyodbc connection connect to server
conn = pyodbc.connect(
"driver={SQL Server};server=xxxxxxxxxxx; database=master; trusted_connection=true",
autocommit=True, Trusted_Connection='Yes')
crsr = conn.cursor()
# connect db (connect to database name) using SQL-Alchemy
engine = create_engine(
'mssql+pyodbc://xxxxxxxxxxx/master?driver=SQL+Server+Native+Client+11.0')
connection = engine.connect()
it's just a pyodbc conncetion
and this is the error I found:
Traceback (most recent call last):
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/app.py", line 22, in <module>
dfeed_gsm_relation_m.push_dfeed_gsm_relation_sql()
File "C:\Users\haroo501\PycharmProjects\ToolUpdated\meta_data\dfeed_gsm_relation_m.py", line 31, in push_dfeed_gsm_relation_sql
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
AttributeError: 'pyodbc.Cursor' object has no attribute 'dialect'
and this is the code that creates the table in the database using MetaData():
from sqlalchemy import MetaData, Table, Column, Integer, String, Date, Float
from database import connec
import sqlalchemy as db
import pandas as pd
import numpy as np
from txt_to_csv import convert_to_csv
import os
def push_dfeed_gsm_relation_sql():
# Create a ditionary for all gsm_relations_mnm relation excel file
dataf_gsm_relation_col_dict = {
'cell_name': 'Cellname',
'n_cell_name': 'Ncellname',
'technology': 'Technology',
}
# table name in database 'df_gsm_relation'
DATAF_GSM_RELATION = 'df_gsm_relation'
# Create a list for dataf_gsm_relation_cols and put the dictionary in the list
dataf_gsm_relation_cols = list(dataf_gsm_relation_col_dict.keys())
dataf_gsm_relation_cols_meta = MetaData()
dataf_gsm_relation_relations = Table(
DATAF_GSM_RELATION, dataf_gsm_relation_cols_meta,
Column('id', Integer, primary_key=True),
Column(dataf_gsm_relation_cols[0], Integer),
Column(dataf_gsm_relation_cols[1], Integer),
Column(dataf_gsm_relation_cols[2], String),
)
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
dataf_gsm_rel_txt = 'gsmrelation_mnm.txt'
dataf_gsm_txt_df = pd.read_csv(dataf_gsm_rel_txt, sep=';')
dataf_gsm_rel_df_column_index = list(dataf_gsm_txt_df.columns)
dataf_gsm_txt_df.reset_index(inplace=True)
dataf_gsm_txt_df.drop(columns=dataf_gsm_txt_df.columns[-1], inplace=True)
dataf_gsm_rel_df_column_index = dict(zip(list(dataf_gsm_txt_df.columns), dataf_gsm_rel_df_column_index))
dataf_gsm_txt_df.rename(columns=dataf_gsm_rel_df_column_index, inplace=True)
dataf_gsm_txt_df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1', index=False)
dataf_gsm_rel_excel = 'gsmrelation_mnm.csv'
dataf_gsm_rel_df = pd.read_csv(os.path.join(os.path.dirname(__file__), dataf_gsm_rel_excel), dtype={
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[0]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[1]]: int,
dataf_gsm_relation_col_dict[dataf_gsm_relation_cols[2]]: str,
})
dataf_gsm_relations_table_query = db.insert(dataf_gsm_relation_relations)
dataf_gsm_relations_values_list = []
dataf_gsm_relations_row_count = 1
for i in dataf_gsm_rel_df.index:
dataf_gsm_relations_row = dataf_gsm_rel_df.loc[i]
dataf_gsm_rel_df_record = {'id': dataf_gsm_relations_row_count}
for col in dataf_gsm_relation_col_dict.keys():
if col == dataf_gsm_relation_cols[0] or col == dataf_gsm_relation_cols[1]:
dataf_gsm_rel_df_record[col] = int(dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]])
else:
dataf_gsm_rel_df_record[col] = dataf_gsm_relations_row[dataf_gsm_relation_col_dict[col]]
dataf_gsm_relations_values_list.append(dataf_gsm_rel_df_record)
dataf_gsm_relations_row_count += 1
ResultProxy_dataf_gsm_relations = connec.crsr.execute(dataf_gsm_relations_table_query,
dataf_gsm_relations_values_list)
as the problem in this part:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
dataf_gsm_relation_relations.drop(connec.crsr)
dataf_gsm_relation_cols_meta.create_all(connec.crsr)
I know dialect function is related to from sqlalchemy import create_engine and this is my old connection as I changed to new connection using import pyodbc .....
So how can I solve this case using pyodbc module?
Edited
The other way to solve this is how to CREATE and DROP table in existing database using SQL ALCHEMY
and this is the related code example:
from database import connec
def create_db():
create_bd_query = "CREATE DATABASE MyNewDatabase"
connec.crsr.execute(create_bd_query)
def delete_database():
delete_bd_query = "DROP DATABASE MyNewDatabase"
connec.crsr.execute(delete_bd_query)
You cannot just import a completley different module and expect it to be the same :)
Dialects are what SQLalchemy uses to communicate to different drivers.
In this instance Pyodbc IS the driver so it has no need for a dialect.
From SQLAlchemy:
Dialects
The dialect is the system SQLAlchemy uses to communicate with various types of DBAPI implementations and databases. The sections that follow contain reference documentation and notes specific to the usage of each backend, as well as notes for the various DBAPIs.
All dialects require that an appropriate DBAPI driver is installed.
Included Dialects
PostgreSQL
MySQL
SQLite
Oracle
Microsoft SQL Server
Microsoft SQL Server
Support for the Microsoft SQL Server database.
DBAPI Support
The following dialect/DBAPI options are available. Please refer to individual
DBAPI sections for connect information.
PyODBC
mxODBC
pymssql
zxJDBC for Jython
adodbapi
Judging from the error and by looking at the PyODBC Wiki Documentation
I think this line:
if connec.crsr.dialect.has_table(connec.crsr, DATAF_GSM_RELATION):
should read:
# Does table 'DATAF_GSM_RELATION' exist?
if connec.crsr.tables(table=DATAF_GSM_RELATION).fetchone():
...

How to reflect an oracle database with BINARY_DOUBLE type columns

I tried to reflect an existing oracle database into sqlalchemy metadata:
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy import Table
db_uri = 'oracle://USER:PASS#MYDBTNSNAME'
engine = create_engine(db_uri)
# create a MetaData instance
metadata = MetaData()
# reflect db schema to MetaData
metadata.reflect(bind=engine)
This returns the following:
SAWarning: Did not recognize type 'BINARY_DOUBLE' of column 'column_1'(coltype, colname))
I have tried to import native types and also the types from dialect oracle using
from sqlalchemy.types import *
from sqlalchemy.dialects.oracle import *
but it seems it does not recognize BINARY_DOUBLE type
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-b69d481f6a4e> in <module>()
1 from sqlalchemy.types import *
----> 2 from sqlalchemy.dialects.oracle import *
AttributeError: module 'sqlalchemy.dialects.oracle' has no attribute 'BINARY_DOUBLE'
I am using SQLAlchemy, version '1.2.1'
Have you tried overriding the default mapping relfection in your db? Like so
from sqlalchemy.dialects.oracle.base import BINARY_DOUBLE
group_table = sa.Table('groups', metadata,
sa.Column('your_column', BINARY_DOUBLE(asdecimal=True)),
autoload=True,
include_columns=[
'your_column',
'...'
],
)
Or just importing that BINARY_DOBULE from sqlalchemy.dialects.oracle.base
I'd ask that in a comment, but I can't do so as I just joined.
As described in sqlalchemy changelog, this functionality has been included in version 1.2.8:
[oracle] [bug] Added reflection capabilities for the oracle.BINARY_FLOAT, oracle.BINARY_DOUBLE datatypes
I have checked it using 1.2.18 version and now reflection works.
You're trying to use a column type that is not well supported by sqlalchemy / DB driver. So avoid that type.
Create a view as (roughly) SELECT * FROM base_table, with the minor tweak of casting binary double columns to a more convenient numeric type. Then reflect the view.

Specifying pyODBC options (fast_executemany = True in particular) using SQLAlchemy

I would like to switch on the fast_executemany option for the pyODBC driver while using SQLAlchemy to insert rows to a table. By default it is of and the code runs really slow... Could anyone suggest how to do this?
Edits:
I am using pyODBC 4.0.21 and SQLAlchemy 1.1.13 and a simplified sample of the code I am using are presented below.
import sqlalchemy as sa
def InsertIntoDB(self, tablename, colnames, data, create = False):
"""
Inserts data into given db table
Args:
tablename - name of db table with dbname
colnames - column names to insert to
data - a list of tuples, a tuple per row
"""
# reflect table into a sqlalchemy object
meta = sa.MetaData(bind=self.engine)
reflected_table = sa.Table(tablename, meta, autoload=True)
# prepare an input object for sa.connection.execute
execute_inp = []
for i in data:
execute_inp.append(dict(zip(colnames, i)))
# Insert values
self.connection.execute(reflected_table.insert(),execute_inp)
Try this for pyodbc
crsr = cnxn.cursor()
crsr.fast_executemany = True
Starting with version 1.3, SQLAlchemy has directly supported fast_executemany, e.g.,
engine = create_engine(connection_uri, fast_executemany=True)

sqlalchemy add index to existing sqlite3 database

I have created a database with pandas :
import numpy as np
import sqlite3
import pandas as pd
import sqlite3
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
df = pd.DataFrame(np.random.normal(0, 1, (10, 2)), columns=['A', 'B'])
path = 'sqlite:////home/username/Desktop/example.db'
engine = create_engine(path, echo=False)
df.to_sql('flows', engine, if_exists='append', index=False)
# This is only to show I am able to read the database
df_l = pd.read_sql("SELECT * FROM flows WHERE A>0 AND B<0", engine)
Now I would like to add one or more indexes to the database.
Is this case I would like to make first only the column A and then both the columns indices.
How can I do that?
If possible I would like a solution that uses only SqlAlchemy so that it is independent from the choice of the database.
You should use reflection to get hold of the table that pandas created for you.
With reference to:
SQLAlchemy Reflecting Database Objects
A Table object can be instructed to load information about itself from
the corresponding database schema object already existing within the
database. This process is called reflection. In the most simple case
you need only specify the table name, a MetaData object, and the
autoload=True flag. If the MetaData is not persistently bound, also
add the autoload_with argument:
you could try this:
meta = sqlalchemy.MetaData()
meta.reflect(bind=engine)
flows = meta.tables['flows']
# alternative of retrieving the table from meta:
#flows = sqlalchemy.Table('flows', meta, autoload=True, autoload_with=engine)
my_index = sqlalchemy.Index('flows_idx', flows.columns.get('A'))
my_index.create(bind=engine)
# lets confirm it is there
inspector = reflection.Inspector.from_engine(engine)
print(inspector.get_indexes('flows'))
This seems to work for me. You will have to define the variables psql_URI, table, and col yourself. Here I assume that the table name / column name may be in (partial) uppercase but you want the name of the index to be lowercase.
Derived from the answer here: https://stackoverflow.com/a/72976667/3406189
import sqlalchemy
from sqlalchemy.orm import Session
engine_psql = sqlalchemy.create_engine(psql_URI)
autocommit_engine = engine_psql.execution_options(isolation_level="AUTOCOMMIT")
with Session(autocommit_engine) as session:
session.execute(
f'CREATE INDEX IF NOT EXISTS idx_{table.lower()}_{col.lower()} ON sdi_ai."{table}" ("{col}");'
)

List database tables with SQLAlchemy

I want to implement a function that gives information about all the tables (and their column names) that are present in a database (not only those created with SQLAlchemy). While reading the documentation it seems to me that this is done via reflection but I didn't manage to get something working. Any suggestions or examples on how to do this?
start with an engine:
from sqlalchemy import create_engine
engine = create_engine("postgresql://u:p#host/database")
quick path to all table /column names, use an inspector:
from sqlalchemy import inspect
inspector = inspect(engine)
for table_name in inspector.get_table_names():
for column in inspector.get_columns(table_name):
print("Column: %s" % column['name'])
docs: http://docs.sqlalchemy.org/en/rel_0_9/core/reflection.html?highlight=inspector#fine-grained-reflection-with-inspector
alternatively, use MetaData / Tables:
from sqlalchemy import MetaData
m = MetaData()
m.reflect(engine)
for table in m.tables.values():
print(table.name)
for column in table.c:
print(column.name)
docs: http://docs.sqlalchemy.org/en/rel_0_9/core/reflection.html#reflecting-all-tables-at-once
First set up the sqlalchemy engine.
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.engine import url
connect_url = url.URL(
'oracle',
username='db_username',
password='db_password',
host='db_host',
port='db_port',
query=dict(service_name='db_service_name'))
engine = create_engine(connect_url)
try:
engine.connect()
except Exception as error:
print(error)
return
Like others have mentioned, you can use the inspect method to get the table names.
But in my case, the list of tables returned by the inspect method was incomplete.
So, I found out another way to find table names by using pure SQL queries in sqlalchemy.
query = text("SELECT table_name FROM all_tables where owner = '%s'"%str('db_username'))
table_name_data = self.session.execute(query).fetchall()
Just for sake of completeness of answer, here's the code to fetch table names by inspect method (if it works good in your case).
inspector = inspect(engine)
table_names = inspector.get_table_names()
Hey I created a small module that helps easily reflecting all tables in a database you connect to with SQLAlchemy, give it a look: EZAlchemy
from EZAlchemy.ezalchemy import EZAlchemy
DB = EZAlchemy(
db_user='username',
db_password='pezzword',
db_hostname='127.0.0.1',
db_database='mydatabase',
d_n_d='mysql' # stands for dialect+driver
)
# this function loads all tables in the database to the class instance DB
DB.connect()
# List all associations to DB, you will see all the tables in that database
dir(DB)
I'm proposing another solution as I was not satisfied by any of the previous in the case of postgres which uses schemas. I hacked this solution together by looking into the pandas source code.
from sqlalchemy import MetaData, create_engine
from typing import List
def list_tables(pg_uri: str, schema: str) -> List[str]:
with create_engine(pg_uri).connect() as conn:
meta = MetaData(conn, schema=schema)
meta.reflect(views=True)
return list(meta.tables.keys())
In order to get a list of all tables in your schema, you need to form your postgres database uri pg_uri (e.g. "postgresql://u:p#host/database" as in the zzzeek's answer) as well as the schema's name schema. So if we use the example uri as well as the typical schema public we would get all the tables and views with:
list_tables("postgresql://u:p#host/database", "public")
While reflection/inspection is useful, I had trouble getting the data out of the database. I found sqlsoup to be much more user-friendly. You create the engine using sqlalchemy and pass that engine to sqlsoup.SQlSoup. ie:
import sqlsoup
def create_engine():
from sqlalchemy import create_engine
return create_engine(f"mysql+mysqlconnector://{database_username}:{database_pw}#{database_host}/{database_name}")
def test_sqlsoup():
engine = create_engine()
db = sqlsoup.SQLSoup(engine)
# Note: database must have a table called 'users' for this example
users = db.users.all()
print(users)
if __name__ == "__main__":
test_sqlsoup()
If you're familiar with sqlalchemy then you're familiar with sqlsoup. I've used this to extract data from a wordpress database.

Categories