I'm having an issue using python 3 and concurrent.futures ProcessPoolExecutor and the map function.
My code is this:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import psycopg2
import psycopg2.extensions
import psycopg2.extras
from asq import query
import select
import concurrent.futures
import asyncio
class UpdateElastic:
def __init__(self):
conn = psycopg2.connect(
"dbname=db user=mad password=hat host=blah",
async_=True
)
self.wait(conn)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("SELECT * FROM table",)
self.wait(cur.connection)
self.report_files = cur.fetchall()
cur.execute("SELECT * FROM othertable",)
self.wait(cur.connection)
self.payment_events = cur.fetchall()
cur.close()
conn.close()
self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
# pass
def wait(self, conn):
while 1:
state = conn.poll()
if state == psycopg2.extensions.POLL_OK:
break
elif state == psycopg2.extensions.POLL_WRITE:
select.select([], [conn.fileno()], [])
elif state == psycopg2.extensions.POLL_READ:
select.select([conn.fileno()], [], [])
else:
raise psycopg2.OperationalError("poll() returned %s" % state)
def get_es_indices(self):
indices = self.esconn.indices.get_alias("digital-sales-csv*")
return list(indices.keys())
def update_documents(self, index, scroll_id=None):
print(index)
# return index
# documents = _get_es_documents(conn, index)
# print(documents['_scroll_id'])
# scroll_id = documents['_scroll_id']
# for document in documents['hits']['hits']:
# ids = {
# "report_id": document['_source']['report_id'],
# "payment_id": document['_source']['payment_id'],
# "document_id": document['_id']
# }
# asyncio.run(_update_es_document(conn, index, report_files, payment_events, ids))
# update_documents(index, conn, report_files, payment_events, scroll_id)
def main():
print('main called')
print('instantiating UpdateElastic')
us = UpdateElastic()
print('UpdateElastic instantiated')
print('setting up ProcessPoolExecutor')
blah = ['abc', 'def', 'ghi']
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
print('calling executor.map')
executor.map(us.update_documents, blah, timeout=10)
if __name__ == "__main__":
main()
With this code, all I'm expecting it to do is print out the values of the array that I've passed, so:
'abc'
'def'
'ghi'
However, after printing: calling executor.map, it hangs.
When i change my constructor to be:
class UpdateElastic:
def __init__(self):
# conn = psycopg2.connect(
# "dbname=db user=mad password=hat host=blah",
# async_=True
# )
# self.wait(conn)
# cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# cur.execute("SELECT * FROM table",)
# self.wait(cur.connection)
# self.report_files = cur.fetchall()
# cur.execute("SELECT * FROM othertable",)
# self.wait(cur.connection)
# self.payment_events = cur.fetchall()
# cur.close()
# conn.close()
# self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
pass
(containing only a "pass" in the constructor), it will actually print out the values of the array, as expected.
I'm running this on python 3.7.3, on OSX Mojave 10.14.2.
Related
Luigi is considering the task as finished even when the object is empty and no output is created:
import luigi
import luigi.contrib.azureblob as ab
import luigi.format as fm
import pandas as pd
import sqlalchemy
DB_ACCT = '*-*-*' # database credentials
STGE_ACCT = '-*-*-' # storage credentials
class MyTask:
ref_date = luigi.DateParameter()
#finished = False
def run(self):
eng = sqlalchemy.create_engine(DB_ACCT)
query = f"""SELECT * FROM tbl WHERE date_column = DATE '{self.ref_date}'"""
with eng.connect() as conn, conn.begin():
df = pd.read_sql(query, conn)
with self.output().open('w') as f:
df.to_parquet(f)
def output(self):
path_blob = f'{self.ref_date}.parquet'
return ab.AzureBlobTarget('container-name', path_blob, STGE_ACCT, fm.Nop)
Is this problematic in terms of luigi's expected behavior?
Appreciate any help.
I am receiving an error
this is my code block (simplified, but still demonstrates error)
import neo4j
import sys
import uuid
from neo4j import GraphDatabase
def create_population_point(tx, _point, _uuid, _tl, _tr, _ll, _lr, _band):
print("Add a record block A")
tx.run("CREATE (n:Population_Point
{point:$point,uuid:$uuid,TL:$tl,TR:$tr,BL:$bl,BR:$br,Band_1:$band}),"
"point=_point,uuid=_uuid,tl=_tl,tr=_tr,ll=_ll,lr=_lr,band=_band")
def main():
uri = "neo4j://localhost:7687"
username = "neo4j"
password = "P#ssword2"
databaseConnection = GraphDatabase.driver(uri, auth=(username, password))
databaseSession = databaseConnection.session()
print("Connection established")
print("Variables assigned values")
_point = "D007_S001_T001"
_uuid = uuid.uuid4()
_tl = "28.27291"
_tr = "-81.65765"
_ll = "28.27291"
_lr = "-81.65765"
_band = "455"
print("Ready to execute")
with databaseSession.session() as session:
result = session.write_transaction(create_population_point, _point, _uuid, _tl, _tr, _ll,
_lr, _band)
databaseConnection.close()
print("Connection closed")
if __name__ == "__main__":
main()
This is the line that is throwing the error
with databaseSession.session() as session:
running python 3.10.4
First you create
databaseSession = databaseConnection.session()
next you use
with databaseSession.session() as session:
so finally you try to use
databaseConnection.session().session()
and this is wrong.
You could use directly
result = databaseSession.write_transaction(..)
or use databaseConnection instead of databaseSession in
with databaseConnection.session() as session:
result = session.write_transaction(..)
(and remove databaseSession = databaseConnection.session() because you don't use it)
I am scraping a shopify store using the products.json page. Attempting to insert the scraped products into my MySQL DB using the Python connector, but hitting below error:
Something went wrong: Failed executing the operation; b'Name'
Code is below:
import json
import pandas as pd
import mysql.connector
import ScraperConfig as conf
class myScraper():
def __init__(self, baseurl):
self.baseurl = baseurl
def downloadjson(self, page):
r = requests.get(self.baseurl + f'products.json?limit=250&page={page}', timeout=5)
if r.status_code != 200:
print('Bad status code', r.status_code)
if len(r.json()['products']) > 0:
data = r.json()['products']
return data
else:
return
def parsejson(self, jsondata):
products = []
for prod in jsondata:
vendor = prod['vendor']
name = prod['title']
handle = prod['handle']
createdDateTime = prod['created_at']
description = prod['body_html']
productType = prod['product_type']
for images in prod['images']:
vendorProductId = images['product_id']
try:
imageURL = images['src']
except:
imageURL = 'None'
for variant in prod['variants']:
item = {
'name': name,
'handle': handle,
'description': description,
'productVariantId': variant['id'],
'createdDateTime': createdDateTime,
'productType': productType,
'vendorProductId': vendorProductId,
'imageURL': imageURL,
'price': variant['price'],
'salePrice': variant['compare_at_price'],
'available': variant['available'],
'updatedDateTime': variant['updated_at'],
'vendor': vendor
}
products.append(item)
return products
def main():
scrape = Scraper('https://www.someshopifysite.com/')
results = []
for page in range(1,2):
data = scrape.downloadjson(page)
print('Getting page: ', page)
try:
results.append(scrape.parsejson(data))
except:
print(f'Completed, total pages = {page - 1}')
break
return results
if __name__ == '__main__':
db = mysql.connector.connect(
user=conf.user,
host=conf.host,
passwd=conf.passwd,
database=conf.database)
cursor = db.cursor()
products = main()
totals = [item for i in products for item in i]
for p in totals:
sql = """INSERT INTO `table` (`Name`, `Handle`, `Descritpion`, `VariantId`, `CreatedDateTime`, `ProductType`, `VendorProductId`, `ImageURL`, `Price`, `SalePrice`, `Available`, `UpdatedDateTime`, `Vendor`)
VALUES (%(`Name`)s, %(`Handle`)s, %(`Descritpion`)s, %(`VariantId`)s, %(`CreatedDateTime`)s, %(`ProductType`)s, %(`VendorProductId`)s, %(`ImageURL`)s, %(`Price`)s, %(`SalePrice`)s, %(`Available`)s, %(`UpdatedDateTime`)s, %(`Vendor`)s)"""
try:
cursor.executemany(sql, totals)
print('Committed to DB')
except mysql.connector.Error as err:
print("Something went wrong: {}".format(err))
db.commit() ```
Remove backticks from the following and all similar parts of the query:
%(`Name`)s
In general I'd remove backticks except for quoting column names that map to keywords.
I'm working on a project very similar to this one: GitHub
I have a class:
class DBfunctions:
def __init__(self, dbname = '../example.db'):
self.debname = dbname
self.conn = sqlite3.connect(dbname)
def search_db(self, telegram_id):
telegram_id = (telegram_id,)
sql = 'SELECT * FROM user WHERE id = ?;'
row = self.conn.execute(sql,telegram_id)
return row
def newuser_db(self, tele_id, name, nick):
par = (tele_id, name, nick, 0)
sql = 'INSERT INTO user VALUES(?,?,?,?);'
self.conn.execute(sql, par)
self.conn.commit()
than i have the main project:
from file import DBfunctions
db = DBfunction()
def start(update: Update, context: CallbackContext): #befor edit: somethingtodo
flag = db.search_db(update.effective_user.id) # here problems start
if flag == None:
db.newuser_db(update.effective_user.id, update.effective_user.first_name, update.effective_user.username)
update.message.reply_text(
'Hi!',
reply_markup=markup,
)
else:
update.message.reply_text(
'Hey! Welcome back!',
reply_markup=markup,
)
def main():
db.setup() # this function is to create tables if not exist yet
dispatcher.add_handler(CommandHandler('start', start))
# other function but nothing correlated
if __name__ == '__main__':
main()
And than the error appears:
File "filefolder/file.py", line 29, in search_db
row = self.conn.execute(sql,telegram_id)
sqlite3.ProgrammingError: SQLite objects created in a thread can only be used in that same thread. The object was created in thread id 15004 and this is thread id 11036.
I can't figure out what i can do to fix it... and don't understand what is different from the project that I find on github (linked)
I wanna call an Oracle function returning an objectby using cx_Oracle`s cursor.callfunc(). But this is not working
Here you can see my code:
import cx_Oracle
import json
import web
urls = (
"/", "index",
"/grid", "grid",
)
app = web.application(urls, globals(),web.profiler )
web.config.debug = True
connection = cx_Oracle.Connection("TEST_3D/limo1013#10.40.33.160:1521/sdetest")
typeObj = connection.gettype("MDSYS.SDO_GEOMETRY")
class index:
def GET(self):
return "hallo moritz "
class grid:
def GET(self):
web.header('Access-Control-Allow-Origin', '*')
web.header('Access-Control-Allow-Credentials', 'true')
web.header('Content-Type', 'application/json')
cursor = connection.cursor()
cursor.arraysize = 10000 # default = 50
cursor.execute("""SELECT a.id AS building_nr, c.Geometry AS geometry, d.Classname FROM building a, THEMATIC_SURFACE b, SURFACE_GEOMETRY c, OBJECTCLASS d WHERE a.grid_id_400 = 4158 AND a.id = b.BUILDING_ID AND b.LOD2_MULTI_SURFACE_ID = c.ROOT_ID AND c.GEOMETRY IS NOT NULL AND b.OBJECTCLASS_ID = d.ID""")
obj = cursor.fetchone()
obj = obj[1]
print obj
cursor.callfunc("SDO2GEOJSON", cx.Oracle.OBJECT, [obj])
# Aufruf der App
if __name__ == "__main__":
app.run(web.profiler)
Error message:
at /grid
global name 'cx' is not defined
But I am sure that cx_Oracle is correct installed. Furthermore I use import cx_Oracle at the beginning and this is working.
What is wrong?
Simple typo. In the line
cursor.callfunc("SDO2GEOJSON", cx.Oracle.OBJECT, [obj])
You should use cx_Oracle.OBJECT