How to create a file when working with threads in python? - python

Have a look at this code :
import threading
import time
def my_inline_function(number):
#do some stuff
download_thread = threading.Thread(target=function_that_writes, args=number)
download_thread.start()
#continue doing stuff
i = 0
while(i < 10000):
print str(i) + " : Main thread"
time.sleep(1)
i = i + 1
def function_that_writes(number):
i = number
file = open("dummy.txt", 'w')
while (i < 10000):
string = str(i) + " : child thread"
file.write(string)
time.sleep(1)
file.close()
my_inline_function(5)
function_that_writes(5)
With does my_inline_function(), which starts a thread, not create a file?
But when I am calling a function_that_writes(...) directly, which is not running in a thread, it is able to create a file.
Why am I getting this behaviour?

You need to supply your argument as a tuple args=(number,):
download_thread = threading.Thread(target=function_that_writes, args=(number,))
The exception is pretty clear here:
Exception in thread Thread-1:
Traceback (most recent call last):
File "/Users/mike/anaconda/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/Users/mike/anaconda/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
TypeError: function_that_writes() argument after * must be an iterable, not int

Related

How to recover the return value of a function passed to multiprocessing.Process?

I have looked at this question to get started and it works just fine How can I recover the return value of a function passed to multiprocessing.Process?
But in my case I would like to write a small tool, that would connect to many computers and gather some statistics, each stat would be gathered within a Process to make it snappy. But as soon as I try to wrap up the multiprocessing command in a class for a machine then it fails.
Here is my code
import multiprocessing
import pprint
def run_task(command):
p = subprocess.Popen(command, stdout = subprocess.PIPE, universal_newlines = True, shell = False)
result = p.communicate()[0]
return result
MACHINE_NAME = "cptr_name"
A_STAT = "some_stats_A"
B_STAT = "some_stats_B"
class MachineStatsGatherer():
def __init__(self, machineName):
self.machineName = machineName
manager = multiprocessing.Manager()
self.localStats = manager.dict() # creating a shared ressource for the sub processes to use
self.localStats[MACHINE_NAME] = machineName
def gatherStats(self):
self.runInParallel(
self.GatherSomeStatsA,
self.GatherSomeStatsB,
)
self.printStats()
def printStats(self):
pprint.pprint(self.localStats)
def runInParallel(self, *fns):
processes = []
for fn in fns:
process = multiprocessing.Process(target=fn, args=(self.localStats))
processes.append(process)
process.start()
for process in processes:
process.join()
def GatherSomeStatsA(self, returnStats):
# do some remote command, simplified here for the sake of debugging
result = "Windows"
returnStats[A_STAT] = result.find("Windows") != -1
def GatherSomeStatsB(self, returnStats):
# do some remote command, simplified here for the sake of debugging
result = "Windows"
returnStats[B_STAT] = result.find("Windows") != -1
def main():
machine = MachineStatsGatherer("SOMEMACHINENAME")
machine.gatherStats()
return
if __name__ == '__main__':
main()
And here is the error message
Traceback (most recent call last):
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "d:\workdir\trunks6\Tools\VTKAppTester\Utils\NXMachineMonitorShared.py", line 45, in GatherSomeStatsA
returnStats[A_STAT] = result.find("Windows") != -1
TypeError: 'str' object does not support item assignment
Process Process-3:
Traceback (most recent call last):
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "d:\workdir\trunks6\Tools\VTKAppTester\Utils\NXMachineMonitorShared.py", line 50, in GatherSomeStatsB
returnStats[B_STAT] = result.find("Windows") != -1
TypeError: 'str' object does not support item assignment
The issue is coming from this line
process = multiprocessing.Process(target=fn, args=(self.localStats))
it should have a extra comma at the end of args like so
process = multiprocessing.Process(target=fn, args=(self.localStats,))

How to download single file using multiple threads using python requests library

I have tried this code it is throwing some error I have changed from urllib2 to requests library
I ran this code in the pycharm and I got the following error
I can't able to install the urllib2 module
I need to download a single file with multiple threads using
the requests Library
using multi-threading a file can be downloaded in the form of chunks simultaneously from different threads.
Error:
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 932, in
_bootstrap_inner
self.run()
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:/Users/suresh_ram/PycharmProjects/DownloadManager/multithreaded_downloader.py", line 37, in downloadChunk
dataDict[idx] = open(req,"wb").write(req.content)
TypeError: expected str, bytes or os.PathLike object, not Response
Exception in thread Thread-3:
Traceback (most recent call last):
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:/Users/suresh_ram/PycharmProjects/DownloadManager/multithreaded_downloader.py", line 37, in downloadChunk
dataDict[idx] = open(req,"wb").write(req.content)
TypeError: expected str, bytes or os.PathLike object, not Response
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\suresh_ram\AppData\Local\Programs\Python\Python38\lib\threading.py", line 870, in run
import threading
import time
URL = "http://www.nasa.gov/images/content/607800main_kepler1200_1600-1200.jpg"
def buildRange(value, numsplits):
lst = []
for i in range(numsplits):
if i == 0:
lst.append('%s-%s' % (i, int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
else:
lst.append('%s-%s' % (int(round(1 + i * value/(numsplits*1.0),0)), int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
return lst
def main(url=None, splitBy=3):
start_time = time.time()
if not url:
print("Please Enter some url to begin download.")
return
fileName = url.split('/')[-1]
sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None)
print("%s bytes to download." % sizeInBytes)
if not sizeInBytes:
print("Size cannot be determined.")
return
dataDict = {}
# split total num bytes into ranges
ranges = buildRange(int(sizeInBytes), splitBy)
def downloadChunk(idx, irange):
req = requests.get(url)
req.headers['Range'] = 'bytes={}'.format(irange)
dataDict[idx] = open(req,"wb").write(req.content)
# create one downloading thread per chunk
downloaders = [
threading.Thread(
target=downloadChunk,
args=(idx, irange),
)
for idx,irange in enumerate(ranges)
]
# start threads, let run in parallel, wait for all to finish
for th in downloaders:
th.start()
for th in downloaders:
th.join()
print ('done: got {} chunks, total {} bytes'.format(
len(dataDict), sum( (
len(chunk) for chunk in dataDict.values()
) )
))
print( "--- %s seconds ---" % str(time.time() - start_time))
if os.path.exists(fileName):
os.remove(fileName)
# reassemble file in correct order
with open(fileName, 'w') as fh:
for _idx,chunk in sorted(dataDict.iteritems()):
fh.write(chunk)
print ("Finished Writing file %s" % fileName)
print ('file size {} bytes'.format(os.path.getsize(fileName)))
if __name__ == '__main__':
main("https://bugs.python.org/file47781/Tutorial_EDIT.pdf")```

Python multiprocessing Deadlock using Queue

I have a python program like below.
from multiprocessing import Lock, Process, Queue, current_process
import time
lock = Lock()
def do_job(tasks_to_accomplish, tasks_that_are_done):
while not tasks_to_accomplish.empty():
task = tasks_to_accomplish.get()
print(task)
lock.acquire()
tasks_that_are_done.put(task + ' is done by ' + current_process().name)
lock.release()
time.sleep(1)
return True
def main():
number_of_task = 10
number_of_processes = 4
tasks_to_accomplish = Queue()
tasks_that_are_done = Queue()
processes = []
for i in range(number_of_task):
tasks_to_accomplish.put("Task no " + str(i))
# creating processes
for w in range(number_of_processes):
p = Process(target=do_job, args=(tasks_to_accomplish, tasks_that_are_done))
processes.append(p)
p.start()
# completing process
for p in processes:
p.join()
# print the output
while not tasks_that_are_done.empty():
print(tasks_that_are_done.get())
return True
if __name__ == '__main__':
main()
Sometimes program run perfectly but sometimes it gets stuck and doesn't complete. When quit manually, it produces following error.
$ python3 multiprocessing_example.py
Task no 0
Task no 1
Task no 2
Task no 3
Task no 4
Task no 5
Task no 6
Task no 7
Task no 8
Task no 9
^CProcess Process-1:
Traceback (most recent call last):
File "multiprocessing_example.py", line 47, in <module>
main()
File "multiprocessing_example.py", line 37, in main
p.join()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 121, in join
res = self._popen.wait(timeout)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/popen_fork.py", line 51, in wait
return self.poll(os.WNOHANG if timeout == 0.0 else 0)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/popen_fork.py", line 29, in poll
pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "multiprocessing_example.py", line 9, in do_job
task = tasks_to_accomplish.get()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/queues.py", line 94, in get
res = self._recv_bytes()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
Can someone tell me what is the issue with the program? I am using python 3.6.
Note: Lock is not needed around a Queue.
lock.acquire()
tasks_that_are_done.put(task + ' is done by ' + current_process().name)
lock.release()
Queue
The Queue class in this module implements all the required locking semantics.
Question: ... what is the issue with the program?
You are using Queue.empty() and Queue.get(),
such leads to Deadlock on calling join() because there is no guarantee that the empty() State don't change until get()
was reaching.
Deadlock prone:
while not tasks_to_accomplish.empty():
task = tasks_to_accomplish.get()
Instead of using empty/get, Pair use for instance:
import queue
while True:
try:
task = tasks_to_accomplish.get_nowait()
except queue.Empty:
break
else:
# Handle task here
...
tasks_to_accomplish.task_done()

Threading gives typeError

Im busy with a project for my study and I keep getting this error:
Exception in thread Thread-62:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
TypeError: 'long' object is not callable
The function that produces this error is:
teller = 0
def toRedis(dstip, srcip, dnsname):
global teller
ignoreDom = config.getSetting('setup', 'ignore')
if dnsname in ignoreDom:
pass
else:
teller += 1
answer = {"_id": teller, "destination": dstip, "source": srcip, "name": dnsname}
r_serv.hmset("_id" + str(teller), answer)
t = threading.Thread(target=r_serv.hset("_id" + str(teller),
"vt", VTHandler(r_serv.hget("_id" + str(teller), "source"))))
t.daemon = True
t.start()
print r_serv.hgetall("_id" + str(teller))
I'm pretty sure it comes from the thread, as that is inside the error. But I can't figure out what is going wrong, it seems just fine to me. At the first few moments it doesn't give me an error but after 20 seconds or so, the error keeps popping up, even tho the script keeps running while these errors are printed out.
t = threading.Thread(target=r_serv.hset("_id" + str(teller),
"vt", VTHandler(r_serv.hget("_id" + str(teller), "source"))))
You are calling the r_serv.hset function and then assigning its return value to the target kwarg (which is then being called and raising the exception), instead of assigning the function itself to the target kwarg.
What you should be doing is:
t = threading.Thread(target=r_serv.hset, args=("_id" + str(teller),
"vt", VTHandler(r_serv.hget("_id" + str(teller), "source"))))

Python unclear error for multiprocessing

I test python multiprocessing and write simple program:
from multiprocessing import Process
from time import sleep
def f(name):
print 'hello', name
x=1
while True:
x+=1
sleep(1)
print 'subprocess '+str(x)
if x==10:
quit()
if __name__ == '__main__':
p = Process(target=f, args=('bob',))
p.start()
x=1
while True:
x+=1
sleep(0.1)
print 'main process '+str(x)
if x==100:
quit()
Its work, but I has little error:
Traceback (most recent call last):
File "ttt.py", line 17, in <module>
p.start()
File "/usr/lib64/python2.6/multiprocessing/process.py", line 104, in start
self._popen = Popen(self)
File "/usr/lib64/python2.6/multiprocessing/forking.py", line 99, in __init__
code = process_obj._bootstrap()
File "/usr/lib64/python2.6/multiprocessing/process.py", line 242, in _bootstrap
sys.stderr.write(e.args[0] + '\n')
TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'
Use sys.exit() instead of quit(). The latter is meant to be used only in the interactive interpreter.
As Kevin noted, you can use return in f to exit the function normally. This would be perhaps more appropriate.

Categories