INSERT Error Python MySQL Connector: Failed executing the operation - python

I am scraping a shopify store using the products.json page. Attempting to insert the scraped products into my MySQL DB using the Python connector, but hitting below error:
Something went wrong: Failed executing the operation; b'Name'
Code is below:
import json
import pandas as pd
import mysql.connector
import ScraperConfig as conf
class myScraper():
def __init__(self, baseurl):
self.baseurl = baseurl
def downloadjson(self, page):
r = requests.get(self.baseurl + f'products.json?limit=250&page={page}', timeout=5)
if r.status_code != 200:
print('Bad status code', r.status_code)
if len(r.json()['products']) > 0:
data = r.json()['products']
return data
else:
return
def parsejson(self, jsondata):
products = []
for prod in jsondata:
vendor = prod['vendor']
name = prod['title']
handle = prod['handle']
createdDateTime = prod['created_at']
description = prod['body_html']
productType = prod['product_type']
for images in prod['images']:
vendorProductId = images['product_id']
try:
imageURL = images['src']
except:
imageURL = 'None'
for variant in prod['variants']:
item = {
'name': name,
'handle': handle,
'description': description,
'productVariantId': variant['id'],
'createdDateTime': createdDateTime,
'productType': productType,
'vendorProductId': vendorProductId,
'imageURL': imageURL,
'price': variant['price'],
'salePrice': variant['compare_at_price'],
'available': variant['available'],
'updatedDateTime': variant['updated_at'],
'vendor': vendor
}
products.append(item)
return products
def main():
scrape = Scraper('https://www.someshopifysite.com/')
results = []
for page in range(1,2):
data = scrape.downloadjson(page)
print('Getting page: ', page)
try:
results.append(scrape.parsejson(data))
except:
print(f'Completed, total pages = {page - 1}')
break
return results
if __name__ == '__main__':
db = mysql.connector.connect(
user=conf.user,
host=conf.host,
passwd=conf.passwd,
database=conf.database)
cursor = db.cursor()
products = main()
totals = [item for i in products for item in i]
for p in totals:
sql = """INSERT INTO `table` (`Name`, `Handle`, `Descritpion`, `VariantId`, `CreatedDateTime`, `ProductType`, `VendorProductId`, `ImageURL`, `Price`, `SalePrice`, `Available`, `UpdatedDateTime`, `Vendor`)
VALUES (%(`Name`)s, %(`Handle`)s, %(`Descritpion`)s, %(`VariantId`)s, %(`CreatedDateTime`)s, %(`ProductType`)s, %(`VendorProductId`)s, %(`ImageURL`)s, %(`Price`)s, %(`SalePrice`)s, %(`Available`)s, %(`UpdatedDateTime`)s, %(`Vendor`)s)"""
try:
cursor.executemany(sql, totals)
print('Committed to DB')
except mysql.connector.Error as err:
print("Something went wrong: {}".format(err))
db.commit() ```

Remove backticks from the following and all similar parts of the query:
%(`Name`)s
In general I'd remove backticks except for quoting column names that map to keywords.

Related

How to retrieve Redis database values in order of key using Python

I have a python application where a client retrieves csv data row by row from a server using a grpc stream. Data from each row is added to a dictionary which in turn is saved to a redis database. When I attempt to retrieve the data from the redis database in a seperate flask application, the data is not coming out in order and is duplicating much of the time. How can I retrieve the data in order of the key without duplicates?
Client
def run():
#Average number of comments metric
average_num_comments = 0
response_count = 0
comment_count = 0
try:
conn = redis.StrictRedis(host='redis', port=6379)
conn.flushdb()
except Exception as ex:
print('Error:', ex)
while True:
with grpc.insecure_channel('redditserver:50051') as channel:
stub = route_guide_pb2_grpc.RouteGuideStub(channel)
responses = stub.SendRedditPost(route_guide_pb2.PostRequestReddit(response='Recieved'))
#Single post with most letters in title
lg_post_title = ''
for response in responses:
response_count += 1
comment_count = int(response.num_comments) + comment_count
average_num_comments = avg(response_count, comment_count)
if (len(response.title) > len(lg_post_title)):
lg_post_title = response.title
redisdict = {"Largest Post Title": lg_post_title, "Comment Count": comment_count, "Average No. Comments": average_num_comments}
try:
conn = redis.StrictRedis(host='redis', port=6379)
conn.hmset(response_count, redisdict)
except Exception as ex:
print('Error:', ex)
time.sleep(2)
Flask Application
[![#app.route('/')
def get_page():
data = ''
try:
conn = redis.StrictRedis(host='redis', port=6379, decode_responses=True)
for key in conn.scan_iter():
value = conn.hgetall(key)
data = value
time.sleep(2)
print("KEY: " + key, file=sys.stderr)
print(data, file=sys.stderr)
except Exception as ex:
data = 'Error:' + str(ex)
return render_template('index.html', x=data)][1]][1]

Python KeyError: 'destinationAccount'

i've code with the following structure from a website i'm scraping data:
destinationAccount:
ownerBuilding: ( collapse to destinationAccount)
label: ( collapse to ownerBuilding )
_id: ( collapse to ownerBuilding )
vban: ( collapse to destinationAccount)
_id: ( collapse to destinationAccount)
When I try to read this Key with this
vban = str(transaction["destinationAccount"]["vban"])
It gives me KeyError: 'destinationAccount'
Anyone have an Idea why this comes up? When I run my Code, it will copy everything I need into the MySQL Database but as I already said, the KeyError popup and the Interval isn't working
sched = BlockingScheduler()
sched.add_job(start, 'interval', seconds=5)
sched.start()
because it stop runing after the error appears. When I comment out this one vban = str(transaction["destinationAccount"]["vban"]) no error is coming up. I checked now more than 10 times, the structure is there on the website as I showed at the top. Any solution would be amazing.
def getData():
databaseConn = dbConnect()
cursor = databaseConn.cursor()
for x in range(3):
x = x * 25
transactions = json.loads(makeRequest("URL.bla/transactions?offset=" + str(x), authToken, True).text)
for transaction in transactions:
person = ""
try:
person = transaction["destinationAccount"]["ownerCharacter"]["name"]
except:
try:
person = transaction["destinationAccount"]["ownerFactory"]["label"]
except:
try:
person = transaction["destinationAccount"]["ownerBuilding"]["label"]
except:
person = str("unbekannt")
reference = ""
try:
reference = str(translateTable[transaction["reference"]])
except:
reference = str(transaction["reference"])
vban = str(transaction["destinationAccount"]["vban"])
amount = str(transaction["amount"])
taxAmount =str(transaction["taxAmount"])
gesamt = (float(amount) + float(taxAmount))
created = parse(str(transaction["created"]))
date = str(created.date())
time = str(created.time()).split(".")[0]
sql = "INSERT INTO finanzen (transaktion, date, time, sendto, vban, amount, tax, gesamt, text) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
val = (str(transaction["uuid"]), date, time, str(person), vban, amount, taxAmount, gesamt, reference)
try:
cursor.execute(sql, val)
databaseConn.commit()
except:
print("Fehler Datenbank")
dbClose(databaseConn,cursor)
Print result :
{'_id': 'CENSORED',
'uuid': 'CENSORED',
'amount': 11.8421,
'taxAmount': 3.1479,
'type': 'digital',
'created': 'Date',
'reference': 'CENSORED',
'sourceAccount': {'_id': 'CENSORED',
'ownerCharacter': {'_id': 'CENSORED',
'name': 'NAME'},
'vban': 'NUMBER'},
'destinationAccount': {'_id': 'CENSORED',
'vban': 'NUMBER',
'ownerBuilding': {'_id': 'CENSORED',
'label': 'Eclipse Towers'}}}
Difficult without seeing the full list but I suspect some of the items are missing the key. Have you tried a check on the key existing. Using your example:
transaction = {
"_id":"CENSORED",
"uuid":"CENSORED",
"amount":11.8421,
"taxAmount":3.1479,
"type":"digital",
"created":"Date",
"reference":"CENSORED",
"sourceAccount":{
"_id":"CENSORED",
"ownerCharacter":{
"_id":"CENSORED",
"name":"NAME"
},
"vban":"NUMBER"
},
"destinationAccount":{
"_id":"CENSORED",
"ownerBuilding":{
"_id":"CENSORED",
"label":"Eclipse Towers"
}
}
}
if 'vban' in transaction['destinationAccount']:
vban = str(transaction["destinationAccount"]["vban"])
else:
vban = "none"
Thanks to #Johnny John Boy for the Hint.
vban = ""
try:
vban = str(transaction["destinationAccount"]["vban"])
except:
try:
vban = str(transaction["sourceAccount"]["vban"])
except:
vban = str("Unbekannt")
This is the solution to fix the KeyError because there was a second part. now it works as it should without any error.

main() takes 0 positional arguments but 2 were given

I have the following code
client = bigquery.Client()
dataset_id = 'dataset' # replace with your dataset ID
table_id = 'table' # replace with your table ID
table_ref = client.dataset(dataset_id).table(table_id)
table = client.get_table(table_ref) # API request
rows_to_insert = []
bq = bigquery.Client(project='project-id')
query = """SELECT Url FROM `project-id.dataset.urltable`"""
query_job = bq.query(query)
data = query_job.result()
rows = list(data)
def main():
for row in rows:
URL = urllib.request.urlopen(row[0])
soup_page = soup(URL, features="lxml")
try:
data = json.loads(soup_page.find_all('script', type='application/ld+json')[1].text)
except:
data ='unknown'
try:
price_ruw = data['offers']['price']
shopprice = price_ruw.replace(',','.')
except:
price = 0
try:
ean = data['gtin13']
ean = str(ean)
except:
ean = 'unknown'
try:
title_ruw1 = data['name']
title_ruw = title_ruw1
tile_trim = title_ruw[:750]
title = tile_trim.replace("'", "")
except:
title = "unknown"
try:
reviews = data['aggregateRating']['reviewCount']
except:
reviews = 0
try:
score = (float(data['aggregateRating']['ratingValue']) * 2)
except:
score = 0
datenow = (datetime.datetime.now())
shoplink = row[0]
rows_to_insert.append([shoplink,ean,title,reviews,score,shopprice,datenow])
client.insert_rows(table, rows_to_insert) # API request
main()
Testing this code in Google Cloud platform gives
Error: function crashed. Details:
main() takes 0 positional arguments but 2 were given
However when deploying this code it does not give an error. Only scheduling this query does not work since it keeps giving the error below.
For deploying i use the following command (which works)
gcloud functions deploy <function> --entry-point main --
runtime python37 --trigger-resource <name> --trigger-event google.pubsub.topic.publish --timeout 540s
It's not clear how you're trigging this function, but it seems like a "Background Function", which means that it needs to take two arguments, even if they're unused:
def main(data, context):
...
See https://cloud.google.com/functions/docs/concepts/events-triggers for more information.

process pool executor hangs using map

I'm having an issue using python 3 and concurrent.futures ProcessPoolExecutor and the map function.
My code is this:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import psycopg2
import psycopg2.extensions
import psycopg2.extras
from asq import query
import select
import concurrent.futures
import asyncio
class UpdateElastic:
def __init__(self):
conn = psycopg2.connect(
"dbname=db user=mad password=hat host=blah",
async_=True
)
self.wait(conn)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("SELECT * FROM table",)
self.wait(cur.connection)
self.report_files = cur.fetchall()
cur.execute("SELECT * FROM othertable",)
self.wait(cur.connection)
self.payment_events = cur.fetchall()
cur.close()
conn.close()
self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
# pass
def wait(self, conn):
while 1:
state = conn.poll()
if state == psycopg2.extensions.POLL_OK:
break
elif state == psycopg2.extensions.POLL_WRITE:
select.select([], [conn.fileno()], [])
elif state == psycopg2.extensions.POLL_READ:
select.select([conn.fileno()], [], [])
else:
raise psycopg2.OperationalError("poll() returned %s" % state)
def get_es_indices(self):
indices = self.esconn.indices.get_alias("digital-sales-csv*")
return list(indices.keys())
def update_documents(self, index, scroll_id=None):
print(index)
# return index
# documents = _get_es_documents(conn, index)
# print(documents['_scroll_id'])
# scroll_id = documents['_scroll_id']
# for document in documents['hits']['hits']:
# ids = {
# "report_id": document['_source']['report_id'],
# "payment_id": document['_source']['payment_id'],
# "document_id": document['_id']
# }
# asyncio.run(_update_es_document(conn, index, report_files, payment_events, ids))
# update_documents(index, conn, report_files, payment_events, scroll_id)
def main():
print('main called')
print('instantiating UpdateElastic')
us = UpdateElastic()
print('UpdateElastic instantiated')
print('setting up ProcessPoolExecutor')
blah = ['abc', 'def', 'ghi']
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
print('calling executor.map')
executor.map(us.update_documents, blah, timeout=10)
if __name__ == "__main__":
main()
With this code, all I'm expecting it to do is print out the values of the array that I've passed, so:
'abc'
'def'
'ghi'
However, after printing: calling executor.map, it hangs.
When i change my constructor to be:
class UpdateElastic:
def __init__(self):
# conn = psycopg2.connect(
# "dbname=db user=mad password=hat host=blah",
# async_=True
# )
# self.wait(conn)
# cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# cur.execute("SELECT * FROM table",)
# self.wait(cur.connection)
# self.report_files = cur.fetchall()
# cur.execute("SELECT * FROM othertable",)
# self.wait(cur.connection)
# self.payment_events = cur.fetchall()
# cur.close()
# conn.close()
# self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
pass
(containing only a "pass" in the constructor), it will actually print out the values of the array, as expected.
I'm running this on python 3.7.3, on OSX Mojave 10.14.2.

Insert into Odoo db with a specific id using cursor.commit and psycopg2

I'm trying to migrate some models from OpenERP 7 to Odoo 8 by code. I want to insert objects into new table maintaining the original id number, but it doesn't do it.
I want to insert the new object including its id number.
My code:
import openerp
from openerp import api, modules
from openerp.cli import Command
import psycopg2
class ImportCategory(Command):
"""Import categories from source DB"""
def process_item(self, model, data):
if not data:
return
# Model structure
model.create({
'id': data['id'],
'parent_id': None,
'type': data['type'],
'name': data['name']
})
def run(self, cmdargs):
# Connection to the source database
src_db = psycopg2.connect(
host="127.0.0.1", port="5432",
database="db_name", user="db_user", password="db_password")
src_cr = src_db.cursor()
try:
# Query to retrieve source model data
src_cr.execute("""
SELECT c.id, c.parent_id, c.name, c.type
FROM product_category c
ORDER BY c.id;
""")
except psycopg2.Error as e:
print e.pgerror
openerp.tools.config.parse_config(cmdargs)
dbname = openerp.tools.config['db_name']
r = modules.registry.RegistryManager.get(dbname)
cr = r.cursor()
with api.Environment.manage():
env = api.Environment(cr, 1, {})
# Define target model
product_category = env['product.category']
id_ptr = None
c_data = {}
while True:
r = src_cr.fetchone()
if not r:
self.process_item(product_category, c_data)
break
if id_ptr != r[0]:
self.process_item(product_category, c_data)
id_ptr = r[0]
c_data = {
'id': r[0],
'parent_id': r[1],
'name': r[2],
'type': r[3]
}
cr.commit()
How do I do that?
The only way I could find was to use reference attributes in others objects to relate them in the new database. I mean create relations over location code, client code, order number... and when they are created in the target database, look for them and use the new ID.
def run(self, cmdargs):
# Connection to the source database
src_db = psycopg2.connect(
host="localhost", port="5433",
database="bitnami_openerp", user="bn_openerp", password="bffbcc4a")
src_cr = src_db.cursor()
try:
# Query to retrieve source model data
src_cr.execute("""
SELECT fy.id, fy.company_id, fy.create_date, fy.name,
p.id, p.code, p.company_id, p.create_date, p.date_start, p.date_stop, p.special, p.state,
c.id, c.name
FROM res_company c, account_fiscalyear fy, account_period p
WHERE p.fiscalyear_id = fy.id AND c.id = fy.company_id AND p.company_id = fy.company_id
ORDER BY fy.id;
""")
except psycopg2.Error as e:
print e.pgerror
openerp.tools.config.parse_config(cmdargs)
dbname = openerp.tools.config['db_name']
r = modules.registry.RegistryManager.get(dbname)
cr = r.cursor()
with api.Environment.manage():
env = api.Environment(cr, 1, {})
# Define target model
account_fiscalyear = env['account.fiscalyear']
id_fy_ptr = None
fy_data = {}
res_company = env['res.company']
r = src_cr.fetchone()
if not r:
self.process_fiscalyear(account_fiscalyear, fy_data)
break
company = res_company.search([('name','like',r[13])])
print "Company id: {} | Company name: {}".format(company.id,company.name)
The previous code is only an extract from the whole source code.

Categories