JSON string Replace single quote with two single quotes in Python - python

I'm using Python to grab a JSON response. I have a json string that has fields that contain a single quote inside of the data. I'm trying to replace a single escaped quote ' with two single quotes so that I can insert it into SQL server. The problem is, when I use:
json_str = json_string.replace("\'","''")
it is changing it to ''.
How can I massage this data so that I can get it into SQL server?
Code snippet:
import pyodbc
import json
import requests
import csv
import datetime as DT
from datetime import datetime
import pytz
from pytz import timezone
# Dates / Times
today = DT.date.today()
week_ago = today - DT.timedelta(days=2)
# Settings
auth = ''
view_tickets = []
# List tickets from a View
print(f'Getting tickets...')
url = f'https:... created>{week_ago}'
headers = {'Content-Type': 'application/json'}
while url:
response = requests.get(url, auth=auth, headers=headers)
page_data = response.json()
# convert json to string
json_string = json.dumps(page_data)
# remove backslashes that screw up SQL import
json_string = json_string.encode().decode('unicode_escape')
#json_str = json_string.replace("\'","''")
# SQL EXPRESS INSERT Process
conn=pyodbc.connect('Driver={SQL Server};Server=NY-.\SQLEXPRESS;Database=Metrics;Trusted_Connection=yes;')
conn.timeout = 60
conn.autocommit = True
# Call SQL and trap Error if raised
try:
cursor = conn.cursor()
# SQL string
sql_declare = "DECLARE #json NVARCHAR(MAX) = N'" + json_str + "'"
sql_stat = "INSERT INTO Metrics.dbo.Tickets SELECT * FROM OPENJSON(#json, '$.results') WITH (id INTEGER '$.id', subject VARCHAR(255) '$.subject')"
sql = sql_declare + " " + sql_stat
cursor.execute(sql)
print('Connected to database')
except pyodbc.Error as err:
print('Error !!!!! %s' % err)
except:
print('something else failed')
conn.close()
print('closed db connection')
I have tried string.replace but that is not working.
Sample String:
'{"results": [{"subject": "Golf International de Longwy & Golf de Thionville Rive Droite - Add course request", "raw_subject": "Golf International de Longwy & Golf de Thionville Rive Droite - Add course request", "description": "Name GILBERT RAVAGLI\nEmail xxxxx\nPhone 00352781695321 france\n\nMessage\n\nBonjour J'ai acheter une montre Les deux golf qui sont dans mon secteur n'apparaissent pas sur la montre Le golf internationnal de Longwy et le golf de Thionville rive droite France Pouvez vous les cartographier que la montre puisse me servir? Cordialement"}], "facets": null, "next_page": null, "previous_page": null, "count": 72}'
SQL Code:
DECLARE #json NVARCHAR(MAX)
SET #json = N'{"results": [{"subject": "Golf International de Longwy & Golf de Thionville Rive Droite - Add course request", "raw_subject": "Golf International de Longwy & Golf de Thionville Rive Droite - Add course request", "description": "Name GILBERT RAVAGLI\nEmail xxxxx\nPhone 00352781695321 france\n\nMessage\n\nBonjour J'ai acheter une montre IZZO SWAMI Les deux golf qui sont dans mon secteur n'apparaissent pas sur la montre Le golf internationnal de Longwy et le golf de Thionville rive droite France Pouvez vous les cartographier que la montre puisse me servir? Cordialement"}], "facets": null, "next_page": null, "previous_page": null, "count": 72}'
SELECT * FROM OPENJSON (#json)
WITH (
subject VARCHAR(250) '$.subject'
)

Shouldn't all of this:
sql_declare = "DECLARE #json NVARCHAR(MAX) = N'" + json_str + "'"
sql_stat = "INSERT ... FROM OPENJSON(#json, '$.results') ..."
sql = sql_declare + " " + sql_stat
cursor.execute(sql)
Just be:
sql = "INSERT ... FROM OPENJSON(?, '$.results') ...);"
cursor.execute(sql, json_string)
Now you're passing in a parameter and don't have to deal with local variables in T-SQL, string delimiters, single quotes, etc.

Related

More efficient way to manipulate large dataframe

It's my first real Python script, so feel free to make comments in order to improve my code.
The purpose of this script is to extract 2 Oracle tables with Python, store them in a dataframe and then join them with pandas.
But for queries returning more than 500k lines I feel that it is slow. Do you know why?
import pandas as pd
from datetime import date
from sqlalchemy import create_engine
import cx_Oracle, time
import pandas as pd
import config
## Variable pour le timer
start = time.time()
## User input en ligne de commande
year = input('Saisir une annee : ')
month = input('Saisir le mois, au fomat MM : ')
societe_var = input('SA (APPLE,PEACH,BANANA,ANANAS,ALL) : ')
## SA + BU correspondantes aux SA
sa_list = ['APPLE','PEACH','BANANA','ANANAS']
bu_list_MERE = ['006111','1311402','1311403','1311404','1340115','13411106','1311407','1111','6115910','1166157','6811207','8311345','1111','1188100','8118101','8811102','8810113','8811104','8118105','8811106','8811107','8118108','1111']
bu_list_GARE = ['131400','310254']
bu_list_VOYA = ['0151100','1110073','1007115','1311335','1113340','1311341','1113342','1331143']
bu_list_RESO = ['1211345','13111345','11113395','73111345']
#Permet de pointre vers la bonne liste en fonction de la SA saisie
bu_list_map = {
'APPLE': bu_list_APPLE,
'PEACH': bu_list_PEACH,
'BANANA': bu_list_BANANA,
'ANANAS' : bu_list_ANANAS
}
if societe_var == 'ALL' :
print('non codé pour le moment')
elif societe_var in sa_list :
bu_list = bu_list_map.get(societe_var)
sa_var = societe_var
i=1
for bu in bu_list :
start_bu = time.time()
## On vient ici charger la requête SQL avec les bonnes variables pour gla_va_parametre -- EPOST
query1 = open('gla_va_parametre - VAR.sql',"r").read()
query1 = query1.replace('#ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','"C__en__PS_S1_D_OP_UNIT13".OPERATING_UNIT')
## On vient ici charger la requête SQL avec les bonnes variables pour cle-gla_tva -- FPOST
query2 = open('cle-gla_tva - VAR.sql',"r").read()
query2 = query2.replace('#ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','OPERATING_UNIT')
# Param de connexion
connection_EPOST = cx_Oracle.connect(user=config.user_EPOST, password=config.password_EPOST, dsn=config.host_EPOST, )
connection_FPOST = cx_Oracle.connect(user=config.user_FPOST, password=config.password_FPOST, dsn=config.host_FPOST, )
## Récup partie EPOST
with connection_EPOST :
# On déclare une variable liste vide
dfl = []
# On déclare un DataFrame vide
dfs = pd.DataFrame()
z=1
# Start Chunking
for chunk in pd.read_sql(query1, con=connection_EPOST,chunksize=25000) :
# Start Appending Data Chunks from SQL Result set into List
dfl.append(chunk)
print('chunk num : ' + str(z))
z = z + 1
# Start appending data from list to dataframe
dfs = pd.concat(dfl, ignore_index=True)
print('param récupéré')
## Récup partie FPOST
with connection_FPOST :
# On déclare une variable liste vide
df2 = []
# On déclare un DataFrame vide
dfs2 = pd.DataFrame()
# Start Chunking
for chunk in pd.read_sql(query2, con=connection_FPOST,chunksize=10000) :
# Start Appending Data Chunks from SQL Result set into List
df2.append(chunk)
# Start appending data from list to dataframe
dfs2 = pd.concat(df2, ignore_index=True)
print('clé récupéré')
print('Début de la jointure')
jointure = pd.merge(dfs,dfs2,how='left',left_on=['Code_BU_GL','Code_division','Code_ecriture','Date_comptable','Code_ligne_ecriture','UNPOST_SEQ'],right_on=['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE','UNPOST_SEQ']).drop(columns= ['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE'])
jointure.to_csv('out\gla_va_'+year+month+"_"+societe_var+"_"+bu+"_"+date.today().strftime("%Y%m%d")+'.csv', index=False, sep='|')
print('Fichier ' + str(i) + "/" + str(len(bu_list)) + ' généré en : '+ str(time.time() - start_bu)+' secondes')
i = i + 1
print("L'extraction du périmètre de la SA " + societe_var + " s'est effectué en :" + str((time.time() - start)/60) + " min" )

Issue with SQLite query between 2 dates

I'm working on a Python project which use SQlite3 database.
I created database with only one table called "Message" with this kind of data:
connexion = sqlite3.connect(BDD)
c = connexion.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {arg}(
Id INTEGER PRIMARY KEY AUTOINCREMENT,
Jour_Heure_Reception text,
Jour_Heure_Reponse text,
Theme text,
Motif text,
Risque_incident_client text,
Transfert_sans_action text,
Matricule text,
Origine text)""")
connexion.commit()
connexion.close();
My goal is to search between 2 dates in the Jour_Heure_Reponse column, and return the number of entries by matricule.
To do that, I use this SQlite query:
def nbr(arg):
"requette dans la bdd de statistiques mails retournant le nombre de messages par utilisateur sur la période arg"
#Création de la liste des utilisateurs ayant saisies des entrées dans la BDD sur la période
connexion = sqlite3.connect(BDD)
c = connexion.cursor()
date_selection = str((datetime.now() - timedelta(arg)).strftime('%d/%m/%Y'))
yesterday = str(datetime.now().strftime('%d/%m/%Y'))
c.execute(f"""
select Matricule from Message Where
Jour_Heure_Reception >= "{date_selection}"
and Jour_Heure_Reponse < "{yesterday}" """ )
agents = c.fetchall()
liste_agents = []
for i in agents:
if not i[0] in liste_agents:
liste_agents.append(i[0])
c.close()
# calcul du nombre d'entrées pour chaque matricules présent dans la liste crée précédemment
connexion = sqlite3.connect(BDD)
c = connexion.cursor()
liste_affichage = []
for i in liste_agents:
c.execute(f"""SELECT * FROM Message where
Matricule = "{i}" and
Jour_Heure_Reception >= "{date_selection}" and
Jour_Heure_Reponse < "{yesterday}" """)
test = c.fetchall()
print(i)
for i in test:
print(i[1])
data_list = [str(i),str(len(test))]
liste_affichage.append(data_list)
c.close()
The problème is that one:
When i call mi nbr func, it return nothing if mi arg is not 1, and even with 1 arg, the result is not logical.
for exemple, calling nrb(1), return this ( i only print dates ):
02/06/2020
02/06/2020
02/06/2020
02/06/2020
02/06/2020
02/06/2020
02/06/2020
03/06/2020
03/06/2020
03/06/2020
03/06/2020
02/07/2020
02/07/2020
02/07/2020
03/07/2020
03/08/2020
02/09/2020
03/09/2020
03/09/2020
09/08/2020
09/08/2020
09/08/2020
09/08/2020
09/08/2020
01/10/2020
02/10/2020
02/10/2020
As you can see, the timedelta is not respected.
As the datatype is stored as Text, I send dates as str after a time.strftime() conversion.
Where am I going wrong?
Sqlite does not have a dedicated date type. It supports storing date using a text field in ISO-8601 format. You will have to do date format conversion, so that you can perform your comparison. For the format details and documentation see this:
https://www.sqlite.org/lang_datefunc.html
You should use %Y-%m-%d as format string.

Flask response character set

I'm developing my API and currently I store one of my fields in spanish. Database is Postgres 9.5 (field type is varchar):
File Encoding : utf-8
Text field:
"text" varchar(65536) COLLATE "default"
When i return the value from text, I use flask_sqlalchemy to get my data.
class Serializer(object):
"""
Serialize information from database to a JSON Dictionary
"""
def serialize(self):
return {c: getattr(self, c) for c in inspect(self).attrs.keys()}
#staticmethod
def serialize_list(l):
return [m.serialize() for m in l]
class AutoSerialize(object):
"""
Mixin for retrieving public fields of model in json-compatible format'
"""
__public__ = None
def get_public(self, exclude=(), extra=()):
"Returns model's PUBLIC data for jsonify"
data = {}
keys = self._sa_instance_state.attrs.items()
public = self.__public__ + extra if self.__public__ else extra
for k, field in keys:
if public and k not in public: continue
if k in exclude: continue
value = self._serialize(field.value)
if value:
data[k] = value
return data
#classmethod
def _serialize(cls, value, follow_fk=False):
if type(value) in (datetime,):
ret = value.isoformat()
elif hasattr(value, '__iter__'):
ret = []
for v in value:
ret.append(cls._serialize(v))
elif AutoSerialize in value.__class__.__bases__:
ret = value.get_public()
else:
ret = value
return ret
My field in my Model is defined as follows and my class inherits Serializer and AutoSerialize:
description = Column(String(65536), nullable=True)
This is how I return my values to API client:
articles = Model.Bits.query.order_by(Model.Bits.publishedAt.desc()).limit(10).all()
if articles:
log.info('api() | %d Articles found ' % len(articles))
response = []
values = ['author', 'title', 'description', 'url', 'urlToImage', 'publishedAt']
response = [{value: getattr(d, value) for value in values} for d in articles]
return jsonify(articles=response, status='ok', source='my_source', sortBy='latest')
My response looks like this using curl:
{
"author": "Bros Lopez",
"description": "Spotify quiere ser m\u00e1s competitivo en su servicio de recomendaciones de contenido frente a marcas como Apple Music y Pandora. La empresa anunci\u00f3 la compra de la startup francesa Niland, la cual utiliza un sistema de inteligencia artificial para b\u00fasqueda y recomendaciones de contenido. Con 50 millones de usuarios activos Spotify quiere ser m\u00e1s rentable, pues a pesar de que el a\u00f1o pasado gener\u00f3 $3.1 mmdd en ventas, su margen bruto fue del 14%, pagando cerca de 2.7 mmdd en sellos discogr\u00e1ficos y editoriales. Por su parte, Pandora, unos de los principales competidores de Spotify, podr\u00eda ser adquirida por la empresa de radiodifusi\u00f3n SiriusXM, quien el a\u00f1o pasado le hizo una propuesta de compra por $3.4 mmdd. More Info",
"publishedAt": "Fri, 19 May 2017 20:00:00 GMT",
"title": "\u00bfPandora o Spotify?",
"url": "http://www.cnbc.com/2017/05/18/spotify-buys-niland-french-ai-music-startup.html",
"urlToImage": "https://ci4.googleusercontent.com/proxy/TWmEZRwlpPQrjs4HGZGx2041GryyquO7CjSR0oVBK-JUy4Xv3qHSiDow056iW8DV059chC93zFeXc4GVHKnzPpweUy-JzamK-l9pkW-Hgl1PnOun5s4XsE7K2NXBJljp-1Ltf5jyjfcn4j63Hv68FdFuqsw5UNTFBKkFug0=s0-d-e1-ft#https://gallery.mailchimp.com/f82949535ab2aab4bafde98f6/images/1f0dc47c-358b-4625-8744-105ffccfed98.jpg"
}
Is the encoding correct? I tried different client and characters are displayed correctly, so not sure if it is up to the client to display info properly or server.
It is a client's job to parse such characters, which curl is obviously not doing "out-of-the-box". Depending on OS/shell/encoding you are using, there might be some ways (or the others) to pipe the response to some other command which would parse those characters or some similar approach.

A query in SQLite3 with python

I'm testing a query with python and sqlite3. First method works fine, but
second is not working. It is about the defined type of variable containing the resgisters in DB:
import sqlite3
def insertar():
db1=sqlite3.connect('tabla.db')
print("Estas en la funcion insertar")
nombre1=raw_input("Escribe el titulo de la novela: ")
autor1=raw_input("Escribe el autor de la novela: ")
year1=str(input("Digita el any de la novela: "))
consulta=db1.cursor()
strConsulta = "insert into tabla(nombre, autor, year) values\
('"+nombre1+"','"+autor1+"','"+year1+"')"
print(strConsulta)
consulta.execute(strConsulta)
consulta.close()
db1.commit()
db1.close()
def consultar():
db2 = sqlite3.connect("tabla.db")
print("Estas en la funcion insertar")
db2row_factory = sqlite3.Row
consulta = db2.cursor()
consulta.execute("select * from tabla")
filas = consulta.fetchall()
lista = []
for fila in filas:
s = {}
s['nombre'] = fila['nombre']
s['autor'] = fila['autor']
s['year'] = str(fila['year'])
lista.append(s)
consulta.close()
db2.close()
return(lista)
#consultar()
def menu():
Opcion= input("\nIngresa la opcion deseada\n1.Inserta un valor en la tabla\n2.Consultar los valores de la tabla\n")
if Opcion==1:
insertar()
menu()
elif Opcion==2:
ListaNovelas = consultar()
for novela in ListaNovelas:
print(novela['nombre'],novela['autor'],novela['year'])
menu()
menu()
I get this error while testing the second method consultar().
$ python file.py
Ingresa la opcion deseada
1.Inserta un valor en la tabla
2.Consultar los valores de la tabla
2
Estas en la funcion insertar
Traceback (most recent call last):
File "insertar.py", line 56, in <module>
menu()
File "insertar.py", line 51, in menu
ListaNovelas = consultar()
File "insertar.py", line 33, in consultar
s['nombre'] = fila['nombre']
TypeError: tuple indices must be integers, not str
db2row_factory = sqlite3.Row
This is the problematic line. Instead you meant to set the row_factory factory on the db2 connection instance:
db2.row_factory = sqlite3.Row
Then, all the fetched rows would be now sqlite3.Row instances having dictionary-like access to field values.

Python, database results are saved with ('result')

I select some urls from my database using a simple code
def crearArchivo(self):
#Se conecta a la base de datos
db = MySQLdb.connect("localhost","root","","proyectoacademias" )
cursor = db.cursor()
#Selecciona la lista de valores que no hayan sido procesados
sql = "SELECT url_paper_web FROM acw_papers_web WHERE finalizado = 0"
cursor.execute(sql)
#Recibe todos los datos
datos = cursor.fetchall()
#Crea el archivo
archivo = open('urls.txt','w')
archivo.close()
#Lo abre
archivo = open('urls.txt','w')
#Establece un contador para determinar el numero de filas
contador=0;
#Para cada fila la guarda en el archivo y si no es la ultima agrega un salto de linea
for columna in datos:
contador+=1;
archivo.write(str(columna))
if(contador!=len(datos)):
archivo.write("\n")
#Se cierra todo
archivo.close()
db.close()
This method places those links in a file... the problem is that those links instead of being saved like http:// google.com they are saved like ('http://google.com')
Each columna variable is actualy a row, not a single column. Simply select the first value in that row:
archivo.write(columna[0])
That is because columna is a tuple. It looks like you want to write the first (and in this case, the only element):
archivo.write(columna[0])

Categories