Extract data from json file python3 - python

How can I get all the data in my image database into the database ?
My code :
import re
import json
import sqlite3
connection = sqlite3.connect('example.db')
cursor = connection.cursor()
print ("Opened database successfully");
with open('tem.txt', encoding='utf-8-sig') as json_file:
data = json.load(json_file)
for p in data:
data[p] = re.sub("<[^>]+>", "", str(data[p]))
print("%s: %s" % (p, data[p]))
I use SQLite:

Use pandas to read the Json and create a dataframe and then write it into the DB
I have added a sample code.
from sqlalchemy import create_engine
import pandas as pd
df = pd.read_json ('path\data.json')
engine = sqlalchemy.create_engine('sqlite:///my.db', echo=False)
df.to_sql('mytable', con=engine, if_exists='append')
The advantage of this is that with pandas you can make changes in the data very easily.

Related

How to read a .db file in Python?

I have a excel file and want to store my excel file into a .db file. I have done that through sqlite. Now, I want to read my .db file through Python which I am unable to do as the code I have used says that the data is empty.
Below is the code:
df=pd.read_excel('filename.xlsx')
db='xyzDB'
conn=sqlite3.connect(db + '.sqlite')
c=conn.cursor()
table_list = [a for a in c.execute("SELECT name FROM sqlite_master WHERE type = 'Sheet1'")]
print(tablelist)
#another method
chunksize = 10000
for chunk in pd.read_excel('filename.xlsx', chunksize=chunksize):
chunk.columns = chunk.columns.str.replace(' ', '_') #replacing
chunk.to_sql(name='Sheet1', con=conn)
names = list(map(lambda x: x[0], c.description)) #Returns the column names
print(names)
for row in c:
print(row)
Note: have found these two codes from net and didn't understand the code. Would appreciate if you could guide me.
Try something like this ...
import pandas as pd
import sqlite3 as sq
# read csv into data frame
df=pd.read_csv('addresses.csv')
sql_data = 'addresses.sqlite'
conn = sq.connect(sql_data)
# write the data frame to the db
df.to_sql('addresses', conn, if_exists='replace', index=False)
conn.commit()
# read back from the database
print(pd.read_sql('select * from addresses', conn))
conn.close()

Create an excel file from BytesIO using python

I am using pandas library to store excel into bytesIO memory. Later, I am storing this bytesIO object into SQL Server as below-
df = pandas.DataFrame(data1, columns=['col1', 'col2', 'col3'])
output = BytesIO()
writer = pandas.ExcelWriter(output,engine='xlsxwriter')
df.to_excel(writer)
writer.save()
output.seek(0)
workbook = output.read()
#store into table
Query = '''
INSERT INTO [TABLE]([file]) VALUES(?)
'''
values = (workbook)
cursor = conn.cursor()
cursor.execute(Query, values)
cursor.close()
conn.commit()
#Create excel file.
Query1 = "select [file] from [TABLE] where [id] = 1"
result = conn.cursor().execute(Query1).fetchall()
print(result[0])
Now, I want to pull the BytesIO object back from table and create an excel file and store it locally. How Do I do it?
Finally, I got solution.Below are the steps performed:
Takes Dataframe and convert it to excel and store it in memory in BytesIO format.
Store BytesIO object in Database column having varbinary(max)
Pull the stored BytesIO object and create an excel file locally.
Python Code:
#Get Required data in DataFrame:
df = pandas.DataFrame(data1, columns=['col1', 'col2', 'col3'])
#Convert the data frame to Excel and store it in BytesIO object `workbook`:
output = BytesIO()
writer = pandas.ExcelWriter(output,engine='xlsxwriter')
df.to_excel(writer)
writer.save()
output.seek(0)
workbook = output.read()
#store into Database table
Query = '''
INSERT INTO [TABLE]([file]) VALUES(?)
'''
values = (workbook)
cursor = conn.cursor()
cursor.execute(Query, values)
cursor.close()
conn.commit()
#Retrieve the BytesIO object from Database
Query1 = "select [file] from [TABLE] where [id] = 1"
result = conn.cursor().execute(Query1).fetchall()
WriteObj = BytesIO()
WriteObj.write(result[0][0])
WriteObj.seek(0)
df = pandas.read_excel(WriteObj)
df.to_excel("outputFile.xlsx")

SQL query output to .csv

I am running SQL query from python API and want to collect data in Structured(column-wise data under their header).CSV format.
This is the code so far I have.
sql = "SELECT id,author From researches WHERE id < 20 "
cursor.execute(sql)
data = cursor.fetchall()
print (data)
with open('metadata.csv', 'w', newline='') as f_handle:
writer = csv.writer(f_handle)
header = ['id', 'author']
writer.writerow(header)
for row in data:
writer.writerow(row)
Now the data is being printed on the console but not getting in .CSV file this is what I am getting as output:
What is that I am missing?
Here is a simple example of what you are trying to do:
import sqlite3 as db
import csv
# Run your query, the result is stored as `data`
with db.connect('vehicles.db') as conn:
cur = conn.cursor()
sql = "SELECT make, style, color, plate FROM vehicle_vehicle"
cur.execute(sql)
data = cur.fetchall()
# Create the csv file
with open('vehicle.csv', 'w', newline='') as f_handle:
writer = csv.writer(f_handle)
# Add the header/column names
header = ['make', 'style', 'color', 'plate']
writer.writerow(header)
# Iterate over `data` and write to the csv file
for row in data:
writer.writerow(row)
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from urllib.parse import quote_plus
params = quote_plus(r'Driver={SQL Server};Server=server_name; Database=DB_name;Trusted_Connection=yes;')
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
sql_string = '''SELECT id,author From researches WHERE id < 20 '''
final_data_fetch = pd.read_sql_query(sql_string, engine)
final_data_fetch.to_csv('file_name.csv')
Hope this helps!
with mysql - export csv with mysqlclient library - utf8
import csv
import MySQLdb as mariadb;
import sys
tablelue="extracted_table"
try:
conn = mariadb.connect(
host="127.0.0.1",
port=3306,
user="me",
password="mypasswd",
database="mydb")
cur = conn.cursor()
instruction="show columns from " + tablelue
cur.execute(instruction)
myresult = cur.fetchall()
work=list()
i=0
for x in myresult:
work.append(x[0])
i=i+1
wsql = "SELECT * FROM " + tablelue
cur.execute(wsql)
wdata = cur.fetchall()
# Create the csv file
fichecrit=tablelue+".csv"
with open(fichecrit, 'w', newline='', encoding="utf8") as f_handle:
writer = csv.writer(f_handle,delimiter=";")
# Add the header/column names
header = work
writer.writerow(header)
# Iterate over `data` and write to the csv file
for row in wdata:
writer.writerow(row)
conn.close()
except Exception as e:
print(f"Error: {e}")
sys.exit(0)
You can dump all results to the csv file without looping:
data = cursor.fetchall()
...
writer.writerows(data)

Pandas DF columns to SQLite tables

I have a database that I have created in sqlite that was created from a dictionary of pandas dataframes that were created from a file of csv's. When I import the database to sqlite, each of the table names ends in ".csv" How can I strip this?
Here is the code:
import os
import glob
import pandas as pd
files = glob.glob(os.path.join("staging" + "/*.csv"))
print(files)
# Create an empty dictionary to hold the dataframes from csvs
dict_ = {}
# Write the files into the dictionary
for file in files:
fname = os.path.basename(file)
dict_[fname] = pd.read_csv(file, header = 0, dtype = str, encoding = 'cp1252').fillna('')
And for the sqlite DB:
# Create the SQL Lite database
import sqlite3
conn = sqlite3.connect("medicare_hospital_compare.db")
# Convert the dict_[file]'s to SQL tables
for key, df in dict_.items():
df.to_sql(key, conn, flavor = None, schema = None, if_exists = 'replace',
index = True, index_label = None, chunksize = None, dtype = None)

How to open and convert sqlite database to pandas dataframe

I have downloaded some datas as a sqlite database (data.db) and I want to open this database in python and then convert it into pandas dataframe.
This is so far I have done
import sqlite3
import pandas
dat = sqlite3.connect('data.db') #connected to database with out error
pandas.DataFrame.from_records(dat, index=None, exclude=None, columns=None, coerce_float=False, nrows=None)
But its throwing this error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 980, in from_records
coerce_float=coerce_float)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 5353, in _to_arrays
if not len(data):
TypeError: object of type 'sqlite3.Connection' has no len()
How to convert sqlite database to pandas dataframe
Despite sqlite being part of the Python Standard Library and is a nice and easy interface to SQLite databases, the Pandas tutorial states:
Note In order to use read_sql_table(), you must have the SQLAlchemy
optional dependency installed.
But Pandas still supports sqlite3 access if you want to avoid installing SQLAlchemy:
import sqlite3
import pandas as pd
# Create your connection.
cnx = sqlite3.connect('file.db')
df = pd.read_sql_query("SELECT * FROM table_name", cnx)
As stated here, but you need to know the name of the used table in advance.
The line
data = sqlite3.connect('data.db')
opens a connection to the database. There are no records queried up to this. So you have to execute a query afterward and provide this to the pandas DataFrame constructor.
It should look similar to this
import sqlite3
import pandas as pd
dat = sqlite3.connect('data.db')
query = dat.execute("SELECT * From <TABLENAME>")
cols = [column[0] for column in query.description]
results= pd.DataFrame.from_records(data = query.fetchall(), columns = cols)
I am not really firm with SQL commands, so you should check the correctness of the query. should be the name of the table in your database.
Parsing a sqlite .db into a dictionary of dataframes without knowing the table names:
def read_sqlite(dbfile):
import sqlite3
from pandas import read_sql_query, read_sql_table
with sqlite3.connect(dbfile) as dbcon:
tables = list(read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", dbcon)['name'])
out = {tbl : read_sql_query(f"SELECT * from {tbl}", dbcon) for tbl in tables}
return out
Search sqlalchemy, engine and database name in google (sqlite in this case):
import pandas as pd
import sqlalchemy
db_name = "data.db"
table_name = "LITTLE_BOBBY_TABLES"
engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True})
df = pd.read_sql_table(table_name, engine)
I wrote a piece of code up that saves tables in a database file such as .sqlite or .db and creates an excel file out of it with each table as a sheet or makes individual tables into csvs.
Note: You don't need to know the table names in advance!
import os, fnmatch
import sqlite3
import pandas as pd
#creates a directory without throwing an error
def create_dir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
print("Created Directory : ", dir)
else:
print("Directory already existed : ", dir)
return dir
#finds files in a directory corresponding to a regex query
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
#convert sqlite databases(.db,.sqlite) to pandas dataframe(excel with each table as a different sheet or individual csv sheets)
def save_db(dbpath=None,excel_path=None,csv_path=None,extension="*.sqlite",csvs=True,excels=True):
if (excels==False and csvs==False):
print("Atleast one of the parameters need to be true: csvs or excels")
return -1
#little code to find files by extension
if dbpath==None:
files=find(extension,os.getcwd())
if len(files)>1:
print("Multiple files found! Selecting the first one found!")
print("To locate your file, set dbpath=<yourpath>")
dbpath = find(extension,os.getcwd())[0] if dbpath==None else dbpath
print("Reading database file from location :",dbpath)
#path handling
external_folder,base_name=os.path.split(os.path.abspath(dbpath))
file_name=os.path.splitext(base_name)[0] #firstname without .
exten=os.path.splitext(base_name)[-1] #.file_extension
internal_folder="Saved_Dataframes_"+file_name
main_path=os.path.join(external_folder,internal_folder)
create_dir(main_path)
excel_path=os.path.join(main_path,"Excel_Multiple_Sheets.xlsx") if excel_path==None else excel_path
csv_path=main_path if csv_path==None else csv_path
db = sqlite3.connect(dbpath)
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print(len(tables),"Tables found :")
if excels==True:
#for writing to excel(xlsx) we will be needing this!
try:
import XlsxWriter
except ModuleNotFoundError:
!pip install XlsxWriter
if (excels==True and csvs==True):
writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing Excel Sheet ",i," : ",table_name)
table.to_excel(writer, sheet_name=table_name, index=False)
print("Parsing CSV File ",i," : ",table_name)
table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')
writer.save()
elif excels==True:
writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing Excel Sheet ",i," : ",table_name)
table.to_excel(writer, sheet_name=table_name, index=False)
writer.save()
elif csvs==True:
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing CSV File ",i," : ",table_name)
table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')
cursor.close()
db.close()
return 0
save_db();
If data.db is your SQLite database and table_name is one of its tables, then you can do:
import pandas as pd
df = pd.read_sql_table('table_name', 'sqlite:///data.db')
No other imports needed.
i have stored my data in database.sqlite table name is Reviews
import sqlite3
con=sqlite3.connect("database.sqlite")
data=pd.read_sql_query("SELECT * FROM Reviews",con)
print(data)

Categories