The following class Downloader is supposed to query a SQL database multiple times and store the results in a list of pandas.DataFrame objects.
I would like to use multiprocessing to speed up the retrieval, however I get the error
line 53, in run_queries
dfs_queries = p.map(run_query, queries)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 567, in get
raise self._value
PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
I have looked into this question which suggests that the pyodbc connection and cursor objects can not be pickled. Is there a way to still use the pool.map(f, arglist) from multiprocessing when f is reliant on a sql connection?
import pyodbc
from multiprocessing import Pool as ThreadPool
import pandas as pd
class Downloader(object):
def _connect(self, path_db_config):
# ... Loads a config file from which it gets dsn, user and password ... #
con_string = 'DSN=%s;UID=%s;PWD=%s;' % (dsn, user, password)
return pyodbc.connect(con_string)
def run_queries(self):
queries = [# List of sql queries #]
p = ThreadPool(len(queries))
def run_query(query):
cnxn = self._connect(PATH_DB_CONFIG)
df = pd.read_sql(query, cnxn)
return df
return p.map(run_query, queries)
Thanks for the help!!
Related
I'm trying so hard to get my socket server python script to loop every so often to check for updates in a mysql table.
The code works on the first time no problem. on the second loop and every loop after it throws errors.
Things i've tried:
try/catch (for multiple loops to see if ANY work)
threading
conn.close()
cursor.close() (not cursor.commit() any changes so this through
errors of course)
However, I can put the code in a stand alone file and spam running the file and it works perfectly.
It doesn't seem to like running the sql code in the same process or file (which i thought threading fixed but i guess i was wrong?)
Here is the error: (note the first line is the output i'm trying to print in a loop for testing)
(17, 'Default2', 1, 'uploads/2/projects/5e045c87109820.19290695.blend', '')
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.7_3.7.1776.0_x64__qbz5n2kfra8p0\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.7_3.7.1776.0_x64__qbz5n2kfra8p0\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "D:\xampp\htdocs\urender\serverfiles\test.py", line 7, in func
fqueue = queuedb.checkQueue()
File "D:\xampp\htdocs\urender\serverfiles\queuedb.py", line 7, in checkQueue
cursor = conn.cursor()
File "C:\Users\hackn\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\mysql\connector\connection.py", line 806, in cursor
self.handle_unread_result()
File "C:\Users\hackn\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\mysql\connector\connection.py", line 1059, in handle_unread_result
raise errors.InternalError("Unread result found")
mysql.connector.errors.InternalError: Unread result found
[Finished in 6.727s]
Here is the basic code from the test.py:
import queuedb
from threading import Thread
import time
def func():
time.sleep(5)
fqueue = queuedb.checkQueue()
return fqueue
func()
fqueue = queuedb.checkQueue()
print(fqueue)
Thread(target=func).start()
This is from my queuedb.py:
from dbconnect import dbconnect
import sys
def checkQueue():
sql = "SELECT * FROM renderqueue WHERE renderer=''"
conn = dbconnect.conn
cursor = conn.cursor()
cursor.execute(sql)
result = cursor.fetchone()
return result
And this is the dbconnect.py:
import mysql.connector
import sys
from xml.dom import minidom
def parseXML():
try:
xmlpath = "urender/serverfiles/dbvariables.xml"
mydoc = minidom.parse(xmlpath)
items = mydoc.getElementsByTagName('item')
dbserver = items[0].attributes['dbserver'].value
dbuser = items[1].attributes['dbuser'].value
dbpass = items[2].attributes['dbpass'].value
dbname = items[3].attributes['dbname'].value
return dbserver, dbuser, dbpass, dbname
except:
print("Something went wrong with the XML DATA")
sys.exit()
dbserver = parseXML()[0]
dbuser = parseXML()[1]
dbpass = parseXML()[2]
dbname = parseXML()[3]
class dbconnect:
conn = mysql.connector.connect(host=dbserver, user=dbuser, passwd=dbpass, database=dbname)
I'm sorry for such a long post but I hope i've explained the problem well enough and given an adequate amount of info.
hckm101,
As indicated by the exception, there are unread rows associated with your cursor.
To solve this, you have two solutions :
Use a buffered cursor, replacing your code with
conn.cursor(buffered=True)
Or, retrieve every result associated to your cursor using a for loop with something like : for row in cursor: dosomething(row)
For more information, there is plenty of documentation available online (here)
So I am trying to retrieve data from database with MySQLdb in pandas dataframe.
import MySQLdb as mysqldb
import MySQLdb.cursors
import pandas as pd
def connection():
db = mysqldb.connect(
host="123.456.7.890",
user="user",
passwd="password",
db="database",
port=12345,
cursorclass=MySQLdb.cursors.DictCursor
)
return db
mysql = connection()
def testing():
cur = mysql.cursor()
query = cur.execute("select * from table1")
result = cur.fetchall()
cur.close()
result_df = pd.DataFrame(result)
return result_df
When I print the 'testing' function, I get an error:
Traceback (most recent call last):
File "C:/Users/xx/PycharmProjects/practice/python.py", line 97, in <module>
print(testing())
File "C:/Users/xx/PycharmProjects/practice/python.py", line 94, in testing
result_df = pd.DataFrame(result)
File "C:\Users\xx\PycharmProjects\practice\venv\lib\site-packages\pandas\core\frame.py", line 422, in __init__
raise ValueError('DataFrame constructor not properly called!')
ValueError: DataFrame constructor not properly called!
I put cursorclass to MySQLdb.cursors.DictCursor to get the data in dictionary form but instead it seems like I'm getting them in tuple. Currently using python 3.7.
It works when I use pymysql but seems quite slow.
Why won't the cursor iterate? I feel sure there should be an easy solution.
I have tried multiple Stack Overflow answers and the documentation for Mongodb
https://docs.mongodb.com/getting-started/python/query/
The code is as per below:
from pymongo import MongoClient
#Connect to Mongo Client
client = MongoClient('mongodb://the_username:the_password#ds047124.mlab.com:47124/politicians_from_theage')
db = client.politicians_from_theage #define database used
# Define Collection
collection = db.posts
print collection
Result:
Collection(Database(MongoClient(host=['ds047124.mlab.com:47124'], document_class=dict, tz_aware=False, connect=True), u'politicians_from_theage'), u'posts')
Then the cursor will print its location:
# Define Cursor
my_cursor = collection.find()
print my_cursor
Result:
<pymongo.cursor.Cursor object at 0x0000000003247518>
Then to try and iterate over the cursor provides a timeout:
# Perform query
cursor = db.posts.find()
#Iterate the cursor and print the documents.
for document in cursor:
print(document) #No Luck
Traceback Error or Iteration:
Traceback (most recent call last):
File "C:\PythonC\PythonWebScraping\17_MongoInterface\mongoget.py", line 18, in <module>
for result_object in my_cursor:
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 1090, in next
if len(self.__data) or self._refresh():
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 1012, in _refresh
self.__read_concern))
File "C:\Python27\lib\site-packages\pymongo\cursor.py", line 850, in __send_message
**kwargs)
File "C:\Python27\lib\site-packages\pymongo\mongo_client.py", line 827, in _send_message_with_response
server = topology.select_server(selector)
File "C:\Python27\lib\site-packages\pymongo\topology.py", line 210, in select_server
address))
File "C:\Python27\lib\site-packages\pymongo\topology.py", line 186, in select_servers
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: ds047124.mlab.com:47124: timed out
I have tried iterating on 'cursor', 'my_cursor' and 'collection', each of which provides a traceback error of server timeout.
Any help/insight would be greatly appreciated
This may help you:-
# Perform query
cursor = db.posts.find().toAray(function(err, result){
#Iterate the cursor and print the documents.
for document in result:
print(document);
}) //Will give you array of objects.
Let me know if it works.
Found the answer, I was focusing on the cursor rather than loading the object from the cursor from JSON to a list of JSON.
Final code is below (removing the URI)
import json
from datetime import date, timedelta
from pymongo import MongoClient
from bson import json_util
#Connect to Mongo Client
client = MongoClient('mongodb://user:pword#ds047124.mlab.com:47124/politicians_from_theage')
db = client.politicians_from_theage #define database used
print db
# Define Collection
collection = db.posts
print collection # print Collection(Database(MongoClient(host=['ds047124.mlab.com:47124']...
cursor = collection.find()
print cursor
# Obtain json
json_docs = []
for doc in cursor:
json_doc = json.dumps(doc, default=json_util.default)
json_docs.append(json_doc)
print json_docs #json result
# List Comprehension version
#json_docs = [json.dumps(doc, default=json_util.default) for doc in cursor]
#To get back from json again as string list
docs = [json.loads(j_doc, object_hook=json_util.object_hook) for j_doc in json_docs]
print docs
print 'kitty terminates program'
Try this:
cursor = db.posts.find()
for document in list(cursor):
print(document)
I have an interesting problem. I am mocking urllib2.urlopen with the python mock library as follows:
def mock_url_open_conn_for_json_feed():
json_str = """
{"actions":[{"causes":[{"shortDescription":"Started by user anonymous","userId":null,"userName":"anonymous"}]}],"artifacts":[],"building":false,"description":null,"duration":54,"estimatedDuration":54,
"fullDisplayName":"test3#1",
"id":"2012-08-24_14-10-34","keepLog":false,"number":1,"result":"SUCCESS","timestamp":1345842634000,
"url":"http://localhost:8080/job/test3/1/","builtOn":"","changeSet":{"items":[],"kind":null},"culprits":[]}
"""
return StringIO(json_str)
def test_case_foo(self):
io = mock_url_open_conn_for_json_feed()
io.seek(0)
mylib.urllib2.urlopen = Mock(return_value=io)
test_obj.do_your_thing()
def test_case_foo_bar(self)
io = mock_url_open_conn_for_json_feed()
io.seek(0)
mylib.urllib2.urlopen = Mock(return_value=io)
test_obj.param = xyz
test_obj.do_your_thing()
class ObjUnderTest():
def do_your_thing(self):
conn = urllib2.urlopen(url)
simplejson.load(conn)
the first unit test "test_case_foo" runs without a problem. But simplejson.load closes the StringIO, so "test_case_foo_bar" calls on do_your_thing() and it tries to simplejson.load the same StringIO object (even though I return the constructor of StringIO), and it's already been closed. I get the following error:
json = simplejson.load(conn)
File "/Users/sam/Library/Python/2.7/lib/python/site-packages/simplejson/__init__.py", line 391, in load
return loads(fp.read(),
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/StringIO.py", line 127, in read
_complain_ifclosed(self.closed)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/StringIO.py", line 40, in _complain_ifclosed
raise ValueError, "I/O operation on closed file"
ValueError: I/O operation on closed file
I have two questions:
1) Why is the StringIO constructor not returning a new object?
2) Is there a work around for this? Or a better way to achieve what I'm trying to achieve?
I have a very weird problem with mysqldb (mysql module for python).
I have a file with queries for inserting records in tables. If I call the functions from the file, it works just fine; but when trying to call one of the functions from another file it throws me a
_mysql_exception.InterfaceError: (0, '')
I really don't get what I'm doing wrong here..
I call the function from buildDB.py :
import create
create.newFormat("HD", 0,0,0)
The function newFormat(..) is in create.py (imported) :
from Database import Database
db = Database()
def newFormat(name, width=0, height=0, fps=0):
format_query = "INSERT INTO Format (form_name, form_width, form_height, form_fps) VALUES ('"+name+"',"+str(width)+","+str(height)+","+str(fps)+");"
db.execute(format_query)
And the class Database is the following :
import MySQLdb
from MySQLdb.constants import FIELD_TYPE
class Database():
def __init__(self):
server = "localhost"
login = "seq"
password = "seqmanager"
database = "Sequence"
my_conv = { FIELD_TYPE.LONG: int }
self.conn = MySQLdb.connection(host=server, user=login, passwd=password, db=database, conv=my_conv)
# self.cursor = self.conn.cursor()
def close(self):
self.conn.close()
def execute(self, query):
self.conn.query(query)
(I put only relevant code)
Traceback :
Z:\sequenceManager\mysql>python buildDB.py
D:\ProgramFiles\Python26\lib\site-packages\MySQLdb\__init__.py:34: DeprecationWa
rning: the sets module is deprecated
from sets import ImmutableSet
INSERT INTO Format (form_name, form_width, form_height, form_fps) VALUES ('HD',0
,0,0);
Traceback (most recent call last):
File "buildDB.py", line 182, in <module>
create.newFormat("HD")
File "Z:\sequenceManager\mysql\create.py", line 52, in newFormat
db.execute(format_query)
File "Z:\sequenceManager\mysql\Database.py", line 19, in execute
self.conn.query(query)
_mysql_exceptions.InterfaceError: (0, '')
The warning has never been a problem before so I don't think it's related.
I got this error when I was trying to use a closed connection.
Problem resolved.. I was initializing the database twice.. Sorry if you lost your time reading this !
I couldn't get your setup to work. I gives me the same error all the time. However the way you connect to and make queries to the db with the query seems to be "non-standard".
I had better luck with this setup:
conn = MySQLdb.Connection(user="user", passwd="******",
db="somedb", host="localhost")
cur = conn.cursor()
cur.execute("insert into Format values (%s,%s,%s,%s);", ("hd",0,0,0))
This way you can take advantage of the db modules input escaping which is a must to mitigate sql injection attacks.