Loop Through an Table in SQL and Update a row every time - python

and having lots of trouble trying to figure out how I can update several rows in a SQLite data base.
Efectively I am getting an location on a Database I gathered, and running through Google maps to get the Latitude and Longitude. In general its working, but the loop fails!
It does it once, gets the first line that meet criteria and finish, and I can´t figure it out why it´s not keep going!! Can anyone help? The script below:
# coding=utf-8
import urllib
import sqlite3
import json
conn = sqlite3.connect('ArchDailyProjects.sqlite')
cur = conn.cursor()
#Google Prep
ServiceUrl="https://maps.googleapis.com/maps/api/geocode/json?"
FimDoURL="&key=????????????????????????????????" #I have the key right, this part works fine
#cur.execute('SELECT * FROM Lugares' )
#print type(cur)
#print cur
#row=cur.fetchone()
for row in cur.execute('SELECT * FROM LugareS' ):
print 'Entramos no While'
Loc_id = str(row[0])
Loc_Name = str(row[1])
Loc_Lat = row[2]
print Loc_Name
if Loc_Lat is None:
print Loc_Name
print Loc_Lat
print "Buscando "+Loc_Name+" no Google Maps"
try:
Url = ServiceUrl + urllib.urlencode({"sensor": "false", "address": Loc_Name}) + FimDoURL
Uh = urllib.urlopen(Url)
Dados = Uh.read()
try: js = json.loads(str(Dados))
except: js = None
except: continue
if "status" not in js or js["status"] != "OK":
print "===== Beeehhhh!!! Não conseguimos encontrar essa cidade===="
print Dados
continue
else:
Loc_FormatedAdress = js["results"][0]["formatted_address"]
Loc_Lat = js["results"][0]["geometry"]["location"]["lat"]
Loc_Lon = js["results"][0]["geometry"]["location"]["lng"]
print Dados
print 'Endereço Google: ', Loc_FormatedAdress
print 'Latitude: ', Loc_Lat
print 'Longitude: ', Loc_Lon
cur.execute('''UPDATE Lugares SET Latitude= ?, Longitude=?, GoogleLoc=? WHERE id= ?
''', (Loc_Lat, Loc_Lon, Loc_FormatedAdress, Loc_id))
#row=cur.fetchone()
else: #row=cur.fetchone()
continue
conn.commit()
Thank you guys!

If the file is large, you may not want to load the entire database into memory with "fetchall" but read only one row at a time, and update entries on the go. You can do this by creating two cursors.
import sqlite3 as sq3
conn = sq3.connect(db_name)
cur = conn.cursor()
cur2 = conn.cursor()
for row in cur.execute('SELECT * FROM Table' ):
cur2.execute('''UPDATE Table SET variable = ? WHERE id= ?''', (variable, id))
works fine.

for row in cur.execute('SELECT * FROM LugareS' ):
...
cur.execute('''UPDATE Lugares SET Latitude= ?, Longitude=?, GoogleLoc=? WHERE id= ?
You are executing a different query on the same cursor object; the UPDATE does not have any result rows.
Simply read all the data before looping over it:
cur.execute('SELECT id, Name FROM Lugares WHERE Latitude IS NULL')
empty_rows = cur.fetchall()
for row in empty_rows:
...

Related

Getting row counts from Redshift during unload process and counting rows loaded in S3

My python code looks like below where I am unloading data from Redshift to Amazon S3 bucket. I am trying to get row count from Redshift and S3 bucket to ensure that all the data is loaded. Additionally, I would also like to get last uploaded date from S3 bucket so that I know when last unload was performed. Kindly suggest the code with explanation.
Thanks in advance for your time and efforts!
import csv
import redshift_connector
import sys
CSV_FILE="Tables.csv"
CSV_DELIMITER=';'
S3_DEST_PATH="s3://..../"
DB_HOST="MY HOST"
DB_PORT=1234
DB_DB="MYDB"
DB_USER="MY_READ"
DB_PASSWORD="MY_PSWD"
IM_ROLE="arn:aws:iam::/redshift-role/unload data","arn:aws::iam::/write in bucket"
def get_tables(path):
tables=[]
with open (path, 'r') as file:
csv_reader = csv.reader (file,delimiter=CSV_DELIMITER)
header = next(csv_reader)
if header != None:
for row in csv_reader:
tables.append(row)
return tables
def unload(conn, tables, s3_path):
cur = conn.cursor()
for table in tables:
print(f">{table[0]}.{table[1]}")
try:
query= f'''unload('select * from {table[0]}.{table[1]}' to '{s3_path}/{table[1]}/'
iam_role '{IAM_ROLE}'
CSV
PARALLEL FALSE
CLEANPATH;'''
print(f"loading in progress")
cur.execute(query)
print(f"Done.")
except Esception as e:
print("Failed to load")
print(str(e))
sys.exit(1)
cur.close()
def main():
try:
conn = redshift_connector.connect(
host=DB_HOST,
port=DB_PORT,
database= DB_DB,
user= DB_USER,
password=DB_PASSWORD
)
tables = get_tables(CSV_FILE)
unload(conn,tables,S3_DEST_PATH)
conn.close()
except Exception as e:
print(e)
sys.exit(1)
Update code based on SO User's comment
tables=['schema1.tablename','schema2.table2']
conn=redshift_connector.connect(
host='my_host',
port= "my_port",
database='my_db'
user="user"
password='password')
cur=conn.cursor()
cur.execute ('select count(*) from {',' .join("'"+y+"'" for y in tables)}')
results=cur.fetchall()
print("The table {} contained".format(tables[0]),*result[0],"rows"+"\n" ) #Printing row counts along with table names
cur.close()
conn.close()
2nd Update:
tables=['schema1.tablename','schema2.table2']
conn=redshift_connector.connect(
host='my_host',
port= "my_port",
database='my_db'
user="user"
password='password')
cur=conn.cursor()
for table in tables:
cur.execute(f'select count(*) from {table};')
results=cur.fetchone()
for row in result:
print("The table {} contained".format(tables[0]),result[0],"rows"+"\n" ) #Printing row counts along with table names
The simple query to get number of rows is
query = "select count(*) from {table_name}"
For Redshift, all you need to do is
cur.execute(query)
row_count = cur.fetchall()
Using boto3, you can use a similar SQL query to fetch S3 row count as well, as elucidated in this answer.
Edit:
Corrected your updated approach a little:
cur=conn.cursor()
for table in tables:
cur.execute(f'select count(*) from {table};')
result=cur.fetchone()
count = result[0] if result else 0
print(f"The table {table} contained {count} rows.\n" )

Updating results from a mysql-connector fetchall

I'm trying to select certain records from the civicrm_address table and update the geocode columns. I use fetchall to retrieve the rows then, within the same loop, I try to update with the results of the geocoder API, passing the civicrm_address.id value in the update_sql statement.
The rowcount after the attempted update and commit is always -1 so I am assuming it failed for some reason but I have yet to figure out why.
import geocoder
import mysql.connector
mydb = mysql.connector.connect(
[redacted]
)
mycursor = mydb.cursor(dictionary=True)
update_cursor = mydb.cursor()
sql = """
select
a.id
, street_address
, city
, abbreviation
from
civicrm_address a
, civicrm_state_province b
where
location_type_id = 6
and
a.state_province_id = b.id
and
street_address is not null
and
city is not null
limit 5
"""
mycursor.execute(sql)
rows = mycursor.fetchall()
print(mycursor.rowcount, "records selected")
for row in rows:
address_id = int(row["id"])
street_address = str(row["street_address"])
city = str(row["city"])
state = str(row["abbreviation"])
myaddress = street_address + " " + city + ", " + state
g = geocoder.arcgis(myaddress)
d = g.json
latitude = d["lat"]
longitude = d["lng"]
update_sql = """
begin work;
update
civicrm_address
set
geo_code_1 = %s
, geo_code_2 = %s
where
id = %s
"""
var=(latitude, longitude, address_id)
print(var)
update_cursor.execute(update_sql, var, multi=True)
mydb.commit()
print(update_cursor.rowcount)
mycursor.close()
update_cursor.close()
mydb.close()
Here is a simpler script:
I have executed the update_sql statement directly in the MySQL workbench and it succeeds. It is not working from Python.
import geocoder
import mysql.connector
try:
mydb = mysql.connector.connect(
[redacted]
)
mycursor = mydb.cursor(dictionary=True)
update_cursor = mydb.cursor()
update_sql = """
begin work;
update
civicrm_address
set
geo_code_1 = 37.3445
, geo_code_2 = -118.5366074
where
id = 65450;
"""
update_cursor.execute(update_sql, multi=True)
mydb.commit()
print(update_cursor.rowcount, "row(s) were updated")
except mysql.connector.Error as error:
print("Failed to update record to database: {}".format(error))
mydb.rollback()
finally:
# closing database connection.
if (mydb.is_connected()):
mydb.close()
I have it working now. I did remove the "begin work" statement but not the multi=True and it wouldn't work. Later I removed the multi=True statement and it works.

Python - how to issue SQL insert into statement with ' in value

I am moving data from MySQL to MSSQL - however I have a problem with insert into statement when I have ' in value.
for export i have used code below:
import pymssql
import mysql.connector
conn = pymssql.connect(host='XXX', user='XXX',
password='XXX', database='XXX')
sqlcursor = conn.cursor()
cnx = mysql.connector.connect(user='root',password='XXX',
database='XXX')
cursor = cnx.cursor()
sql= "SELECT Max(ID) FROM XXX;"
cursor.execute(sql)
row=cursor.fetchall()
maxID = str(row)
maxID = maxID.replace("[(", "")
maxID = maxID.replace(",)]", "")
AMAX = int(maxID)
LC = 1
while LC <= AMAX:
LCC = str(LC)
sql= "SELECT * FROM XX where ID ='"+ LCC +"'"
cursor.execute(sql)
result = cursor.fetchall()
data = str(result)
data = data.replace("[(","")
data = data.replace(")]","")
data = data.replace("None","NULL")
#print(row)
si = "insert into [XXX].[dbo].[XXX] select " + data
#print(si)
#sys.exit("stop")
try:
sqlcursor.execute(si)
conn.commit()
except Exception:
print("-----------------------")
print(si)
LC = LC + 1
print('Import done | total count:', LC)
It is working fine until I have ' in one of my values:
'N', '0000000000', **"test string'S nice company"**
I would like to avoid spiting the data into columns and then checking if there is ' in the data - as my table has about 500 fields.
Is there a smart way of replacing ' with ''?
Answer:
Added SET QUOTED_IDENTIFIER OFF to insert statement:
si = "SET QUOTED_IDENTIFIER OFF insert into [TechAdv].[dbo].[aem_data_copy]
select " + data
In MSSQL, you can SET QUOTED_IDENTIFIER OFF, then you can use double quotes to escape a singe quote, or use two single quotes to escape one quote.

How to change the cursor to the next row using pyodbc in Python

I am trying to fetch records after a regular interval from a database table which growing with records. I am using Python and its pyodbc package to carry out the fetching of records. While fetching, how can I point the cursor to the next row of the row which was read/fetched last so that with every fetch I can only get the new set of records inserted.
To explain more,
my table has 100 records and they are fetched.
after an interval the table has 200 records and I want to fetch rows from 101 to 200. And so on.
Is there a way with pyodbc cursor?
Or any other suggestion would be very helpful.
Below is the code I am trying:
#!/usr/bin/python
import pyodbc
import csv
import time
conn_str = (
"DRIVER={PostgreSQL Unicode};"
"DATABASE=postgres;"
"UID=userid;"
"PWD=database;"
"SERVER=localhost;"
"PORT=5432;"
)
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()
def fetch_table(**kwargs):
qry = kwargs['qrystr']
try:
#cursor = conn.cursor()
cursor.execute(qry)
all_rows = cursor.fetchall()
rowcnt = cursor.rowcount
rownum = cursor.description
#return (rowcnt, rownum)
return all_rows
except pyodbc.ProgrammingError as e:
print ("Exception occured as :", type(e) , e)
def poll_db():
for i in [1, 2]:
stmt = "select * from my_database_table"
rows = fetch_table(qrystr = stmt)
print("***** For i = " , i , "******")
for r in rows:
print("ROW-> ", r)
time.sleep(10)
poll_db()
conn.close()
I don't think you can use pyodbc, or any other odbc package, to find "new" rows. But if there is a 'timestamp' column in your database, or if you can add such a column (some databases allow for it to be automatically populated as the time of insertion so you don't have to change the insert queries) then you can change your query to select only the rows whose timestamp is greater than the previous timestamp. And you can keep changing the prev_timestamp variable on each iteration.
def poll_db():
prev_timestamp = ""
for i in [1, 2]:
if prev_timestamp == "":
stmt = "select * from my_database_table"
else:
# convert your timestamp str to match the database's format
stmt = "select * from my_database_table where timestamp > " + str(prev_timestamp)
rows = fetch_table(qrystr = stmt)
prev_timestamp = datetime.datetime.now()
print("***** For i = " , i , "******")
for r in rows:
print("ROW-> ", r)
time.sleep(10)

python not correctly save data if I use pypyodbc

I wrote a simple script for parsing csv and insert data into SQL Server.
So, the very strange issue is that some variables are lost if I call them in a if condition.
This is the script:
# DB connection
conn = pypyodbc.connect('DRIVER={SQL Server};SERVER=xxx.xxx.xxx.xxx;DATABASE=SCAN;UID=user;PWD=password')
cursor = conn.cursor()
def main() :
reader = csv.reader(file(filename, "rb"), delimiter=';')
for row in reader :
ip = row[0]
host = row[1]
domain = row[2]
# get Operating System ID
os_id = getOperatingSystem(row[3])
manufacturer = row[4]
model = row[5]
# get computer_manufacturer ID
computer_manufacturer = getManufacturer(manufacturer, computer_model)
arch = getArch(row[6])
values = [ip, host, domain, os_id, manufacturer, arch]
hostIP = getHostIP(ip)
print "hostIP: " +str(hostIP)
if hostIP == 0:
print values
# insert values in DB
cursor.execute(
"""
INSERT INTO dbo.hosts (ip, host, domain, os_id, manufacturer, arch_id)
VALUES (?, ?, ?, ?, ?, ?)
""", values)
cursor.commit()
# return host IP ID
def getHostIP(hostIP) :
cursor.execute("SELECT id FROM mytable WHERE ip = ?", [hostIP])
row = cursor.fetchone()
if row is not None :
return row[0]
return 0
# return ID of Computer Manufacturer
def getComputerManufacturer(manufacturer, computer_model) :
cursor.execute("SELECT id FROM manufacturer WHERE manufacturer = ? AND computer_model = ?", [manufacturer, computer_model])
row = cursor.fetchone()
if row is not None:
return row[0]
else :
return setComputerManufacturer(manufacturer, computer_model)
If I commented cursor_execute and cursor_commit lines the print values correctly shows data, else it shows only the same csv line.
Can you give me a little help?
Thanks

Categories