Python Iterate through list of dictionaries and save to db - python

Im new to python and trying to save raw post data in python into mysql.
I want to iterate over each element in the json that is posted and save all the data to DB.
json list of objects: (30 objects with each 11 columns)
[
{
"col1":7878,
"col2":"c004979d3969a86a8fdcda2f92eb39e3",
"col3":"b000yht23",
...
"col11":2
},
{
"col1":7878,
"col2":"c004979d3969a86a8fdcda2f92eb39e3",
"col3":"b000yht23"
...
"col11":43
},
#upto 30 objects
....
]
'json_test' table desc:
CREATE TABLE json_test (
`col1` varchar(250) NOT NULL,
`col2` varchar(250) NOT NULL,
`col3` varchar(250) NOT NULL,
`col4` varchar(250) NOT NULL,
`col5` varchar(250) NOT NULL,
`col6` varchar(250) NOT NULL,
`col7` varchar(250) NOT NULL,
`col8` varchar(250) NOT NULL,
`col9` varchar(250) NOT NULL,
`col10` varchar(250) NOT NULL,
`col11` varchar(200) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
UPDATED to save data to DB:
My py code looks like:
from flask import Flask, abort, request
import json
import pymysql
app = Flask(__name__)
#app.route('/foo', methods=['GET','POST'])
def foo():
jsonobject=request.json
if not jsonobject:
abort(400)
# load- converts JSON source text to a Python value
#readable_json=json.dumps(jsonobject)
#UPDATED with column_names
k=0
for i in jsonobject:
# Connect to the database
conn = pymysql.connect(host='10.20.3.4', port=3306, user='root', passwd='', db='python_db')
try:
with conn.cursor() as cursor:
column_names = ['col1','col2','col3',...'col11']
column_names_str = ', '.join(column_names)
binds_str = ', '.join('%s' for _ in range(len(column_names)))
sql=("INSERT INTO `json_test` ({column_names})" \
" VALUES({binds})"
.format(column_names=column_names_str,binds=binds_str))
for data_dict in jsonobject:
values = [data_dict[column_name]
for column_name in column_names]
cursor.execute(sql, values)
print("Insert successfull!")
#UPDATED
k+=1
conn.commit()
finally:
conn.close()
return "Insert successful"
#return json.dumps(jsonobject)
if __name__ == '__main__':
app.run(host='10.22.1.168',debug=True,port=7845)
UPDATED code result:
Only the last record seems to be inserting

Replace this mess
#UPDATED with column_names
k=0
for i in jsonobject:
# Connect to the database
conn = pymysql.connect(host='10.20.3.4', port=3306, user='root', passwd='', db='python_db')
try:
with conn.cursor() as cursor:
column_names = ['col1','col2','col3',...'col11']
column_names_str = ', '.join(column_names)
binds_str = ', '.join('%s' for _ in range(len(column_names)))
sql=("INSERT INTO `json_test` ({column_names})" \
" VALUES({binds})"
.format(column_names=column_names_str,binds=binds_str))
for data_dict in jsonobject:
values = [data_dict[column_name]
for column_name in column_names]
cursor.execute(sql, values)
print("Insert successfull!")
#UPDATED
k+=1
conn.commit()
finally:
conn.close()
return "Insert successful"
with
try:
with conn.cursor() as cursor:
columns_names = ['col1', 'col2', 'col3', 'col4', 'col5', 'col6',
'col7', 'col8', 'col9', 'col10', 'col11']
columns_names_str = ', '.join(columns_names)
binds_str = ', '.join('%s' for _ in range(len(columns_names)))
for data_dict in jsonobject:
sql = ("INSERT INTO json_test ({columns_names}) "
"VALUES ({binds})"
.format(columns_names=columns_names_str,
binds=binds_str))
values = [data_dict[column_name]
for column_name in columns_names]
cursor.execute(sql, values)
print("Insert successfull!")
conn.commit()
finally:
conn.close()
Summation
k object is redundant,
also name i is unclear and makes me think like it is some kind of index when it is not: it is a dict object,
we don't need to create connection for each object from jsonobject because it is an expensive operation,
we don't need to create sql object on each iteration as well (it remains unchanged),
storing columns names in list/tuple will save us from writing them twice: in a query and in values extraction.
creating binds str
%s, %s, ...
dynamically based on number of columns saves us from typo when we've missed/added too many bind aliases

json.dumps does the opposite of what you claim; it converts a Python object into a string.
The result of request.json is already a Python datastructure. You don't need to do anything else with it.

Related

My pipeline is duplicating the same table in postgresql

The following function is supposed to create a table in postgresql, but if I execute it with the same name in the SQL Script and df.to_sql('spotify'...) it freezes and does nothing, and when I change the name for either of the two statements, it creates 2 different tables in my db. Something is telling me I should not be creating 2 connections (conn * and engine) but I'm not sure about this.
I want to be able to create my own table to then append it to my df...
I'd highly appreciate some feedback.
def extraction():
TOKEN = 'BQC1Cas7Nj6T61Gkq7ufKa2e6MKjNXjembypav0wsMuEVATyZSZRbgELPXR1i12Qzz8doLck1cueDIn-uqp0EcvyYeVHnFIEGb4MkCjgmIl8975UIDkCvP9WTBzUDHok1RmuQw6ySeHMkREuY-KtWm367yopkyBWQYuR28It'
#We need headers to send the information along with our request, so this should be part of our request.
headers = {
"Accept":"application/json",
"Content-Type":"application/json",
"Authorization":"Bearer {token}".format(token=TOKEN)
}
r = requests.get("https://api.spotify.com/v1/me/player/recently-played",headers = headers)
response = r.json()
if 'error' in response:
print('The TOKEN is either wrong or has expired')
else:
#if my response went smoothly, then we proceed to extract and loop through my .json dictionary and get the values from it.
my_song_list = []
global df
for song in response['items']:
artist_id = song['track']['artists'][0]['id']
artist_name = song['track']['artists'][0]['name']
artist_link = song['track']['artists'][0]['external_urls']['spotify']
album_id = song['track']['album']['id']
album_name = song['track']['album']['name']
album_link = song['track']['album']['external_urls']['spotify']
song_id = song['track']['id']
song_name = song['track']['name']
song_link = song['track']['external_urls']['spotify']
duration_ms = song['track']['duration_ms']
popularity = song['track']['popularity']
disc_number = song['track']['disc_number']
played_at = song['played_at'].split(".")[0]
song_dic = {'artist_id': artist_id,
'artist_name':artist_name,
'artist_link':artist_link,
'album_id':album_id,
'album_name':album_name,
'album_link':album_link,
'song_id':song_id,
'song_name':song_name,
'song_link':song_link,
'duration_ms':duration_ms,
'popularity':popularity,
'disc_number':disc_number,
'played_at':played_at
}
my_song_list.append(song_dic) #now, in order to convert my DICTIONARY to a DATAFRAME, I should consider appending it to a LIST first.
df = pd.DataFrame(my_song_list) #now that all my songs are in a LIST datatype, I can convert it to a dataframe.
#This is a basic transformation performed in my dataframe:
#Re-ordering columns in my df
df = df[["artist_id","artist_link","album_id","album_name","album_link","song_id","song_name","song_link","duration_ms","popularity","disc_number","played_at"]]
#Creating two columns (date, time) by spliting the played_at column.
df[['date','time']] = df['played_at'].str.split('T',expand=True)
# #Right now, played_at, date & time are objects, so we need to change these to timestamp.
df['date'] = pd.to_datetime(df['date'])
df['time'] = pd.to_datetime(df['time'])
df['played_at'] = pd.to_datetime(df['played_at'])
df['played_at'] = df['played_at'].dt.tz_localize('US/Central')
return df
def loading():
#psycopg2 is only used when connecting to a PostgreSQL Database, so we first make contact by setting some basic info.
conn = psycopg2.connect(host='127.0.0.1',port='5432',dbname='Athenas',user='postgres',password='cis15a')
#Creating a cursor to display my PostgreSQL Version
cur = conn.cursor()
print('=============================================================')
print('Connected to Athenas')
print('PostgreSQL database version:')
print("=============================================================")
cur.execute('SELECT version()')
db_version = cur.fetchone()
print(db_version)
#Creating the "Spotify_API" table in PostgreSQL using the psycopg2 library.
table_py = """
CREATE TABLE IF NOT EXISTS spotify(
unique_identifier SERIAL PRIMARY KEY,
artist_link VARCHAR(255) NOT NULL,
album_id VARCHAR(255) NOT NULL,
album_name VARCHAR(255) NULL,
album_link VARCHAR(255) NOT NULL,
song_id VARCHAR(255) NOT NULL,
song_name VARCHAR(255) NOT NULL,
song_link VARCHAR(255) NOT NULL,
duration INT NOT NULL,
popularity INT NULL,
disc_number INT NULL,
played_at TIMESTAMP NOT NULL,
date DATE NOT NULL,
time TIME NOT NULL
)
"""
#Executing my "table" using my cur variable.
try:
cur.execute(table_py)
print("=============================================================")
print("=============================================================")
print("=============================================================")
print("All good")
print("=============================================================")
print("=============================================================")
print("=============================================================")
except Exception as e:
print("An error occurred when initializing the database")
print("=============================================================")
print("=============================================================")
print("=============================================================")
#In order to load my existing dataframe to the table we previously created using the psycopg2 library,
#we now need to create an engine using SQLALCHEMY and APPEND my dataframe to the spotify_API Table.
engine = sa.create_engine('postgresql://postgres:cis15a#localhost:5432/Athenas')
df.to_sql('spotify', con = engine, index=False, if_exists='append')
print("=============================================================")
print("=============================================================")
print('The ETL ran succesfully')
print("=============================================================")
print("=============================================================")
cur.close()
conn.commit()
You execute 2 times the create table statement:
cur.execute(table_py)
if cur.execute(table_py) == True:
What you wanted to do was:
try:
cur.execute(table_py)
print("All good")
except Exception as e:
print("An error occurred when initializing the database")

Cannot code auto increment id in SQLite Flask

I have just created a new column, Id in DB Browser sqlite. I am not sure how am I suppose to code this portion in App.py. Should I use a id = request.form['id]?
App.py
#app.route('/addrec', methods=['POST', 'GET'])
def addrec():
if request.method == 'POST':
id =
use = session['user'].get("name")
ema = session['user'].get("preferred_username")
type = request.form['type']
uploadre = request.form['uploadre']
amt = request.form['amt']
description = request.form['description']
if request.form.get("price"):
price_checked = "Yes"
else:
price_checked = "No"
conn = sql.connect(db_path)
c = conn.cursor()
c.execute(
"INSERT INTO SubmitClaim VALUES (?,?,?,?,?,?,?,?)", (id, use,ema, type, uploadre, amt,price_checked, description))
conn.commit()
c.execute("SELECT * FROM SubmitClaim")
print(c.fetchall())
conn.close()
return render_template('base.html', user=session["user"], version=msal.__version__)
This is my table in DB Browswer Sqlite
CREATE TABLE "SubmitClaim" (
"id" INTEGER,
"Name" TEXT NOT NULL,
"Email" TEXT NOT NULL,
"ClaimType" TEXT NOT NULL,
"UploadReceipt" TEXT NOT NULL,
"ClaimAmount" INTEGER NOT NULL,
"checkbox" TEXT NOT NULL,
"ClaimDescription" TEXT NOT NULL,
PRIMARY KEY("id")
)
Here's how you set up an integer primary key column in sqlite and then insert and select from it:
import sqlite3
conn = sqlite3.connect('test.db')
conn.execute('''
CREATE TABLE SubmitClaim (
Id INTEGER PRIMARY KEY NOT NULL,
Name NVARCHAR NOT NULL,
Email NVARCHAR NOT NULL,
ClaimType NVARCHAR NOT NULL,
UploadReceipt NVARCHAR NOT NULL,
ClaimAmount INTEGER NOT NULL,
Checkbox NVARCHAR NOT NULL,
ClaimDescription NVARCHAR NOT NULL
)
''')
conn.commit()
conn.execute("INSERT INTO SubmitClaim (Name, Email, ClaimType, UploadReceipt, ClaimAmount, Checkbox, ClaimDescription) VALUES ('Foo Bar', 'foo#bar.com', 'A', 'Blah', 10, 'Checked', 'Description goes here')")
conn.commit()
cursor = conn.execute('SELECT * FROM SubmitClaim')
for row in cursor:
print(row)
And here's a colab notebook demonstration: https://colab.research.google.com/drive/1OhV9lWSBxLpOv45bNKmtRx9H0j6BZ-S3?usp=sharing
So your code sample above becomes:
#app.route('/addrec', methods=['POST', 'GET'])
def addrec():
if request.method == 'POST':
use = session['user'].get("name")
ema = session['user'].get("preferred_username")
type = request.form['type']
uploadre = request.form['uploadre']
amt = request.form['amt']
description = request.form['description']
if request.form.get("price"):
price_checked = "Yes"
else:
price_checked = "No"
conn = sql.connect(db_path)
conn.execute('''
INSERT INTO SubmitClaim
(Name, Email, ClaimType, UploadReceipt, ClaimAmount, Checkbox, ClaimDescription)
VALUES (?,?,?,?,?,?,?)''',
(use, ema, type, uploadre, amt, price_checked, description))
conn.commit()
c = conn.execute("SELECT * FROM SubmitClaim")
print(c.fetchall())
conn.close()
return render_template('base.html', user=session["user"], version=msal.__version__)

How can I let the id be auto generated in my database?

Here is my code:
import sqlite3
def insert(fields=(), values=()):
connection = sqlite3.connect('database.db')
# g.db is the database connection
cur = connection.cursor()
query = 'INSERT INTO this_database (%s) VALUES (%s)' % (
', '.join(fields),
', '.join(['?'] * len(values))
)
cur.execute(query, values)
connection.commit()
id = cur.lastrowid
cur.close()
print (id)
test example:
insert(fields = ("id", "file_name", "url", "time", "type", "description"), values = (2, "file1", "wwww.test.com", "1", "photo", "my first database test"))
I don't want to give the id manually.
I want it to add it+1 automatically.
How can I do that?
You have an INTEGER PRIMARY KEY column, which, if you leave it out when inserting items, automatically increments:
INSERT INTO this_database(file_name, url, time, type, description)
VALUES (?,?,?,?,?)
Since id is omitted, every time you insert a value using the above statement, it's automatically assigned a number by sqlite.
The documentation explaining this.

python MySQL update specific column fetchall()

I'm new to python and I want to update every record that has count 0 in the database. I have tried a lot can't find anything like help.
for row in cur.fetchall():
if row[3] == 0:
cur.execute("UPDATE tble SET count = 1 WHERE name = %s" %row[1])
Assuming your table has this structure:
CREATE TABLE `test` (
`sno` int(11) NOT NULL,
`name` varchar(50) NOT NULL,
`count` int(11) NOT NULL,
`dtCreated` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
);
Here is the simple code code-
import pymysql
conn = pymysql.connect(host='localhost', unix_socket='', user='USER', passwd='PASSWORD', db='DATABASENAME')
cur = conn.cursor()
cur.execute("SELECT * FROM test")
for r in cur:
curr = conn.cursor()
sql = """UPDATE test SET count = 1 WHERE name = '%s'""" % r[1]
# print(sql)
try:
# Execute the SQL command
curr.execute(sql)
# Commit your changes in the database
conn.commit()
except:
# Rollback in case there is any error
conn.rollback()
curr.close()
cur.close()
conn.close()
Also, since you mentioned that you are new to python remember to commit, every time, whenever you run INSERT, UPDATE or DELETE like queries.
Hope it helps.

How to get columns from a query in python?

I have that query in a python program:
And i should create a multidimensional array (if it possible) or four arrays from this query for every column from the query.
Can you suggest an elegant way to solve it?
conn = #connection to the server
cursor=conn.cursor()
query = (" select id, name, phone, city from guest")
cursor.execute(query)
results = cursor.fetchall
for i in results:
print i
cursor.close()
conn.close()
Not elegant but it may assist to unravel the mysterious Python Connector Cursor Class and transfers the list of tuples (see Copperfield comment) with the data from the query, into a list (phoneList) of dictionaries (entries) with details of each entry in the database, that might be easier to work with in your python script:
# ref: https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlcursor.html
import mysql.connector
db = 'test'
table = 'phonebook'
phoneList = []
drop_table = ("DROP TABLE IF EXISTS {};").format(table)
# By default, the starting value for AUTO_INCREMENT is 1, and it will increment by 1 for each new record.
# To let the AUTO_INCREMENT sequence start with another value, use the following SQL statement:
# ALTER TABLE phonebook AUTO_INCREMENT=100;
create_table = ("CREATE TABLE {} ("
"id int NOT NULL AUTO_INCREMENT,"
"name varchar(30) NOT NULL,"
"phone varchar(30) NOT NULL,"
"city varchar(30) NOT NULL,"
"PRIMARY KEY (id))"
" ENGINE=InnoDB DEFAULT CHARSET=latin1;").format(table)
Names = {'Bill':{'phone':'55123123','city':'Melbourne'},
'Mary':{'phone':'77111123','city':'Sydney'},
'Sue':{'phone':'55888123','city':'Melbourne'},
'Harry':{'phone':'77777123','city':'Sydney'},
'Fred':{'phone':'88123444','city':'Yongala'},
'Peter':{'phone':'55999123','city':'Melbourne'}}
cnx = mysql.connector.connect(user='mysqluser', password='xxxx',host='127.0.0.1',database=db)
cursor = cnx.cursor(dictionary=True) # key to using **row format
cursor.execute(drop_table)
cursor.execute(create_table)
# populate db
for name,detail in dict.items(Names):
sql = ("INSERT INTO {} (name,phone,city) VALUES ('{}','{}','{}')".format(table,name,detail['phone'],detail['city']))
cursor.execute(sql)
sql = ("SELECT id,name,phone,city FROM {}".format(table))
cursor.execute(sql)
for row in cursor:
print("{id} {name} {phone} {city}".format(**row))
phoneList.append(row)
print phoneList[0]['name'],phoneList[0]['city']
print phoneList[3]['name'],phoneList[3]['phone']
for entries in phoneList: # list of dictionaries
print entries['name'],entries
for entries in phoneList:
for k,v in dict.items(entries):
print k,v
print "\n"
cnx.close()

Categories