process pool executor hangs using map - python

I'm having an issue using python 3 and concurrent.futures ProcessPoolExecutor and the map function.
My code is this:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import psycopg2
import psycopg2.extensions
import psycopg2.extras
from asq import query
import select
import concurrent.futures
import asyncio
class UpdateElastic:
def __init__(self):
conn = psycopg2.connect(
"dbname=db user=mad password=hat host=blah",
async_=True
)
self.wait(conn)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("SELECT * FROM table",)
self.wait(cur.connection)
self.report_files = cur.fetchall()
cur.execute("SELECT * FROM othertable",)
self.wait(cur.connection)
self.payment_events = cur.fetchall()
cur.close()
conn.close()
self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
# pass
def wait(self, conn):
while 1:
state = conn.poll()
if state == psycopg2.extensions.POLL_OK:
break
elif state == psycopg2.extensions.POLL_WRITE:
select.select([], [conn.fileno()], [])
elif state == psycopg2.extensions.POLL_READ:
select.select([conn.fileno()], [], [])
else:
raise psycopg2.OperationalError("poll() returned %s" % state)
def get_es_indices(self):
indices = self.esconn.indices.get_alias("digital-sales-csv*")
return list(indices.keys())
def update_documents(self, index, scroll_id=None):
print(index)
# return index
# documents = _get_es_documents(conn, index)
# print(documents['_scroll_id'])
# scroll_id = documents['_scroll_id']
# for document in documents['hits']['hits']:
# ids = {
# "report_id": document['_source']['report_id'],
# "payment_id": document['_source']['payment_id'],
# "document_id": document['_id']
# }
# asyncio.run(_update_es_document(conn, index, report_files, payment_events, ids))
# update_documents(index, conn, report_files, payment_events, scroll_id)
def main():
print('main called')
print('instantiating UpdateElastic')
us = UpdateElastic()
print('UpdateElastic instantiated')
print('setting up ProcessPoolExecutor')
blah = ['abc', 'def', 'ghi']
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
print('calling executor.map')
executor.map(us.update_documents, blah, timeout=10)
if __name__ == "__main__":
main()
With this code, all I'm expecting it to do is print out the values of the array that I've passed, so:
'abc'
'def'
'ghi'
However, after printing: calling executor.map, it hangs.
When i change my constructor to be:
class UpdateElastic:
def __init__(self):
# conn = psycopg2.connect(
# "dbname=db user=mad password=hat host=blah",
# async_=True
# )
# self.wait(conn)
# cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# cur.execute("SELECT * FROM table",)
# self.wait(cur.connection)
# self.report_files = cur.fetchall()
# cur.execute("SELECT * FROM othertable",)
# self.wait(cur.connection)
# self.payment_events = cur.fetchall()
# cur.close()
# conn.close()
# self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
pass
(containing only a "pass" in the constructor), it will actually print out the values of the array, as expected.
I'm running this on python 3.7.3, on OSX Mojave 10.14.2.

Related

luigi.Task.complete = True when no output is created

Luigi is considering the task as finished even when the object is empty and no output is created:
import luigi
import luigi.contrib.azureblob as ab
import luigi.format as fm
import pandas as pd
import sqlalchemy
DB_ACCT = '*-*-*' # database credentials
STGE_ACCT = '-*-*-' # storage credentials
class MyTask:
ref_date = luigi.DateParameter()
#finished = False
def run(self):
eng = sqlalchemy.create_engine(DB_ACCT)
query = f"""SELECT * FROM tbl WHERE date_column = DATE '{self.ref_date}'"""
with eng.connect() as conn, conn.begin():
df = pd.read_sql(query, conn)
with self.output().open('w') as f:
df.to_parquet(f)
def output(self):
path_blob = f'{self.ref_date}.parquet'
return ab.AzureBlobTarget('container-name', path_blob, STGE_ACCT, fm.Nop)
Is this problematic in terms of luigi's expected behavior?
Appreciate any help.

'Session' object has no attribute 'session'

I am receiving an error
this is my code block (simplified, but still demonstrates error)
import neo4j
import sys
import uuid
from neo4j import GraphDatabase
def create_population_point(tx, _point, _uuid, _tl, _tr, _ll, _lr, _band):
print("Add a record block A")
tx.run("CREATE (n:Population_Point
{point:$point,uuid:$uuid,TL:$tl,TR:$tr,BL:$bl,BR:$br,Band_1:$band}),"
"point=_point,uuid=_uuid,tl=_tl,tr=_tr,ll=_ll,lr=_lr,band=_band")
def main():
uri = "neo4j://localhost:7687"
username = "neo4j"
password = "P#ssword2"
databaseConnection = GraphDatabase.driver(uri, auth=(username, password))
databaseSession = databaseConnection.session()
print("Connection established")
print("Variables assigned values")
_point = "D007_S001_T001"
_uuid = uuid.uuid4()
_tl = "28.27291"
_tr = "-81.65765"
_ll = "28.27291"
_lr = "-81.65765"
_band = "455"
print("Ready to execute")
with databaseSession.session() as session:
result = session.write_transaction(create_population_point, _point, _uuid, _tl, _tr, _ll,
_lr, _band)
databaseConnection.close()
print("Connection closed")
if __name__ == "__main__":
main()
This is the line that is throwing the error
with databaseSession.session() as session:
running python 3.10.4
First you create
databaseSession = databaseConnection.session()
next you use
with databaseSession.session() as session:
so finally you try to use
databaseConnection.session().session()
and this is wrong.
You could use directly
result = databaseSession.write_transaction(..)
or use databaseConnection instead of databaseSession in
with databaseConnection.session() as session:
result = session.write_transaction(..)
(and remove databaseSession = databaseConnection.session() because you don't use it)

INSERT Error Python MySQL Connector: Failed executing the operation

I am scraping a shopify store using the products.json page. Attempting to insert the scraped products into my MySQL DB using the Python connector, but hitting below error:
Something went wrong: Failed executing the operation; b'Name'
Code is below:
import json
import pandas as pd
import mysql.connector
import ScraperConfig as conf
class myScraper():
def __init__(self, baseurl):
self.baseurl = baseurl
def downloadjson(self, page):
r = requests.get(self.baseurl + f'products.json?limit=250&page={page}', timeout=5)
if r.status_code != 200:
print('Bad status code', r.status_code)
if len(r.json()['products']) > 0:
data = r.json()['products']
return data
else:
return
def parsejson(self, jsondata):
products = []
for prod in jsondata:
vendor = prod['vendor']
name = prod['title']
handle = prod['handle']
createdDateTime = prod['created_at']
description = prod['body_html']
productType = prod['product_type']
for images in prod['images']:
vendorProductId = images['product_id']
try:
imageURL = images['src']
except:
imageURL = 'None'
for variant in prod['variants']:
item = {
'name': name,
'handle': handle,
'description': description,
'productVariantId': variant['id'],
'createdDateTime': createdDateTime,
'productType': productType,
'vendorProductId': vendorProductId,
'imageURL': imageURL,
'price': variant['price'],
'salePrice': variant['compare_at_price'],
'available': variant['available'],
'updatedDateTime': variant['updated_at'],
'vendor': vendor
}
products.append(item)
return products
def main():
scrape = Scraper('https://www.someshopifysite.com/')
results = []
for page in range(1,2):
data = scrape.downloadjson(page)
print('Getting page: ', page)
try:
results.append(scrape.parsejson(data))
except:
print(f'Completed, total pages = {page - 1}')
break
return results
if __name__ == '__main__':
db = mysql.connector.connect(
user=conf.user,
host=conf.host,
passwd=conf.passwd,
database=conf.database)
cursor = db.cursor()
products = main()
totals = [item for i in products for item in i]
for p in totals:
sql = """INSERT INTO `table` (`Name`, `Handle`, `Descritpion`, `VariantId`, `CreatedDateTime`, `ProductType`, `VendorProductId`, `ImageURL`, `Price`, `SalePrice`, `Available`, `UpdatedDateTime`, `Vendor`)
VALUES (%(`Name`)s, %(`Handle`)s, %(`Descritpion`)s, %(`VariantId`)s, %(`CreatedDateTime`)s, %(`ProductType`)s, %(`VendorProductId`)s, %(`ImageURL`)s, %(`Price`)s, %(`SalePrice`)s, %(`Available`)s, %(`UpdatedDateTime`)s, %(`Vendor`)s)"""
try:
cursor.executemany(sql, totals)
print('Committed to DB')
except mysql.connector.Error as err:
print("Something went wrong: {}".format(err))
db.commit() ```
Remove backticks from the following and all similar parts of the query:
%(`Name`)s
In general I'd remove backticks except for quoting column names that map to keywords.

Error: objects can be used in same thread

I'm working on a project very similar to this one: GitHub
I have a class:
class DBfunctions:
def __init__(self, dbname = '../example.db'):
self.debname = dbname
self.conn = sqlite3.connect(dbname)
def search_db(self, telegram_id):
telegram_id = (telegram_id,)
sql = 'SELECT * FROM user WHERE id = ?;'
row = self.conn.execute(sql,telegram_id)
return row
def newuser_db(self, tele_id, name, nick):
par = (tele_id, name, nick, 0)
sql = 'INSERT INTO user VALUES(?,?,?,?);'
self.conn.execute(sql, par)
self.conn.commit()
than i have the main project:
from file import DBfunctions
db = DBfunction()
def start(update: Update, context: CallbackContext): #befor edit: somethingtodo
flag = db.search_db(update.effective_user.id) # here problems start
if flag == None:
db.newuser_db(update.effective_user.id, update.effective_user.first_name, update.effective_user.username)
update.message.reply_text(
'Hi!',
reply_markup=markup,
)
else:
update.message.reply_text(
'Hey! Welcome back!',
reply_markup=markup,
)
def main():
db.setup() # this function is to create tables if not exist yet
dispatcher.add_handler(CommandHandler('start', start))
# other function but nothing correlated
if __name__ == '__main__':
main()
And than the error appears:
File "filefolder/file.py", line 29, in search_db
row = self.conn.execute(sql,telegram_id)
sqlite3.ProgrammingError: SQLite objects created in a thread can only be used in that same thread. The object was created in thread id 15004 and this is thread id 11036.
I can't figure out what i can do to fix it... and don't understand what is different from the project that I find on github (linked)

gloabl name cx not defined

I wanna call an Oracle function returning an objectby using cx_Oracle`s cursor.callfunc(). But this is not working
Here you can see my code:
import cx_Oracle
import json
import web
urls = (
"/", "index",
"/grid", "grid",
)
app = web.application(urls, globals(),web.profiler )
web.config.debug = True
connection = cx_Oracle.Connection("TEST_3D/limo1013#10.40.33.160:1521/sdetest")
typeObj = connection.gettype("MDSYS.SDO_GEOMETRY")
class index:
def GET(self):
return "hallo moritz "
class grid:
def GET(self):
web.header('Access-Control-Allow-Origin', '*')
web.header('Access-Control-Allow-Credentials', 'true')
web.header('Content-Type', 'application/json')
cursor = connection.cursor()
cursor.arraysize = 10000 # default = 50
cursor.execute("""SELECT a.id AS building_nr, c.Geometry AS geometry, d.Classname FROM building a, THEMATIC_SURFACE b, SURFACE_GEOMETRY c, OBJECTCLASS d WHERE a.grid_id_400 = 4158 AND a.id = b.BUILDING_ID AND b.LOD2_MULTI_SURFACE_ID = c.ROOT_ID AND c.GEOMETRY IS NOT NULL AND b.OBJECTCLASS_ID = d.ID""")
obj = cursor.fetchone()
obj = obj[1]
print obj
cursor.callfunc("SDO2GEOJSON", cx.Oracle.OBJECT, [obj])
# Aufruf der App
if __name__ == "__main__":
app.run(web.profiler)
Error message:
at /grid
global name 'cx' is not defined
But I am sure that cx_Oracle is correct installed. Furthermore I use import cx_Oracle at the beginning and this is working.
What is wrong?
Simple typo. In the line
cursor.callfunc("SDO2GEOJSON", cx.Oracle.OBJECT, [obj])
You should use cx_Oracle.OBJECT

Categories