The problem is when script finished it didn't write like this "Process finished with exit code 1". But print("exiting...") is working. What's the problem?
from gevent import monkey
import gevent
monkey.patch_all()
threads_count = 2
urls_dic = {}
for filename in search_term:
if os.path.isfile(filename):
urls_dic[filename] = []
f = open(filename, mode='r')
for line in f:
urls_dic[filename].append(line.strip())
f.close()
new_urls = []
for i in range(threads_count):
new_urls.insert(i, {})
for key in urls_dic:
new_urls[i][key] = []
for url in range(i, len(urls_dic[key]), threads_count):
new_urls[i][key].append(urls_dic[key][url])
threads = [gevent.spawn(start, new_urls[i], i + 1) for i in range(threads_count)]
gevent.joinall(threads)
for job in threads:
print("job=", job)
for key in job.value:
write_excel(job.value[key], key)
print("exiting")
exit(1)
Related
I using python 3 and mcstatus
My source code is:
from mcstatus import JavaServer
import concurrent.futures
import json
imput_file_name = "dockerformcstatus\dockerMyScript\pyscript\input.txt"
def scaner(data):
print("Processing data: ", data)
global serverJsonData
global timeout
server = JavaServer.lookup(data,timeout)
print(server)
try:
serverJson = server.status().raw
print(serverJson)
serverJsonData.append(serverJson)
except:
print("can't connect")
if __name__ == "__main__":
serverJsonData = []
timeout = 5
data = None
with open(imput_file_name,"r") as f:
data = f.readlines()
#print(data)
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(scaner,data)
#scaner("168.119.4.46:25619")
with open("serverdata.json", "w") as f:
f.write(json.dumps(serverJsonData))
The problem is theat If I use the with concurrent.futures.ProcessPoolExecutor() as executor executor.map(scaner,data) then it not wants to write into the list "serverJsonData", but if I run it as scaner("168.119.4.46:25619") then it works.
I Tryed debug it but not get the reason for this "bug"
I'm trying to write a submitter for a job scheduler. As I do not know when the jobs come and how long the jobs will run, I use multiprocessing to spawn one process for each job with subprocess and detach to be able to process the next job. Meanwhile this works pretty good, but I'd like to get the returncode after the jobs finished, is that possible? I tried several subprocess variations, but those returning the RC were blocking the process for the runtime of the job.
#!/usr/bin/python3
# coding=utf-8
import time
import multiprocessing
import subprocess
JobsList = []
def SubmitJob(jobname):
""" Submit the next requested job """
print(f"Starting job {jobname}...")
JobDir ="/home/xxxxx/Jobs/"
JobMem = "{}{}.sh".format(JobDir, jobname)
SysoutFile = "./Sysout/{}.out".format(jobname)
fh = open(SysoutFile, 'w')
kwargs = {}
kwargs.update(start_new_session=True)
p = subprocess.Popen(JobMem, shell = False, stdout = fh, **kwargs)
pid = p.pid
print(f"Job {jobname} pid {pid} submitted...")
def PrepareSubmit():
""" Create and start one process per job """
jobs = []
for Job in JobsList:
process = multiprocessing.Process(target=SubmitJob,
args=(Job,))
jobs.append(process)
JobsList.remove(Job)
for j in jobs:
j.start()
for j in jobs:
j.join()
print("All jobs submitted...")
def main():
""" Check queue for new job requests """
number_of_lines = 0
jobs_list = []
while 1:
job_queue = open("/home/xxxxx/Development/Python/#Projects/Scheduler/jobs.que", 'r')
lines = job_queue.readlines()
if len(lines) > number_of_lines:
jobs_list.append(lines[len(lines)-1])
NewJob = lines[len(lines)-1][:-1]
JobsList.append(NewJob)
PrepareSubmit()
number_of_lines = number_of_lines+1
time.sleep(1)
if __name__ == "__main__":
main()
The while loop in main() is for testing purpose only.
Can any someone tell me if that is possible and how? Thanks in advance.
This is the code that gives me a return code but doesn't send a job until the previous job is finished. So if I have a long-running job, it delays the process of running jobs, what I called blocking.
def Submit(job):
""" Submit the next requested job """
print(f"Starting job {job}...")
JobDir ="/home/uwe/Jobs/"
JobMem = "{}{}.sh".format(JobDir, job)
SysoutFile = "./Sysout/{}.out".format(job)
fh = open(SysoutFile, 'w')
kwargs = {}
kwargs.update(start_new_session=True)
p = subprocess.Popen(JobMem, shell = False, stdout = fh, **kwargs)
pid = p.pid
while p.poll() == None:
a = p.poll()
print(a)
time.sleep(1)
else:
rc = p.returncode
print(f"PID: {pid} rc: {rc}")
def main():
JobsList = ['JOB90501','JOB00001','JOB00002','JOB00003']
for Job in JobsList:
Submit(Job)
Roy, this is my current code after your last hint:
def SubmitJob(jobname):
""" Submit the next requested job """
JobDir ="/home/uwe/Jobs/"
JobMem = "{}{}.sh".format(JobDir, jobname)
SysoutFile = "./Sysout/{}.out".format(jobname)
fh = open(SysoutFile, 'w')
kwargs = {}
kwargs.update(start_new_session=True)
p = subprocess.Popen(JobMem, shell = False, stdout = fh, **kwargs)
ProcessList[p] = p.pid
print(f"Started job {jobname} - PID: {p.pid}")
def main():
c_JobsList = ['JOB00001','JOB00002','JOB00003']
for Job in c_JobsList:
SubmitJob(Job)
for p, pid in ProcessList.items():
RcFile = "./Sysout/{}.rc".format(pid)
f = open(RcFile, 'w')
while p.poll() == None:
a = p.poll()
time.sleep(1)
else:
rc = p.returncode
f.writelines(str(rc))
print(f"PID: {pid} rc: {rc}")
f.close()
and the output:
Started job JOB00001 - PID: 5426
Started job JOB00002 - PID: 5427
Started job JOB00003 - PID: 5429
PID: 5426 rc: 0
PID: 5427 rc: 0
PID: 5429 rc: 8
Edit (the original answer below for future reference)
The natuaram means to use for this purpose is Popen.poll, but apparently it doesn't work in some cases (see https://lists.gt.net/python/bugs/633489). The solution I'd like to propose is using Popen.wait with a very short timeout, as in the following code sample:
import subprocess
import time
p = subprocess.Popen(["/bin/sleep", "3"])
print(f"Created process {p.pid}")
count = 0
while True:
try:
ret = p.wait(.001) # wait for 1 ms
print(f"Got a return code {ret}")
break
except subprocess.TimeoutExpired as e:
print("..", end = "")
time.sleep(.5)
print(f"Still waiting, count is {count}")
count += 1
print ("Done!")
The output I'm getting is:
Created process 30040
..Still waiting, count is 0
..Still waiting, count is 1
..Still waiting, count is 2
..Still waiting, count is 3
..Still waiting, count is 4
..Still waiting, count is 5
Got a return code 0
Done
Original idea - Popen.poll
The method you should be using is Popen.poll (documentation). It returns the exit status of the process, or None if it's still running.
To use it, you'll have to keep the 'popen' objects you get when you call subprocess.Popen, and later in time poll on these objects.
How in the main thread can I track the duration of the function write_file()?
Task: create a condition, if the execution time of the function is more than 10 seconds, then it is necessary to restart the function.
from multiprocessing import Pool
def write_file(file: str):
f = open(file, 'w')
for item in range(0, 1500000):
f.write("%s\n" % item)
f.close()
if __name__ == '__main__':
list_files = ['1.txt', '2.txt', '3.txt']
with Pool(3) as p:
p.map(write_file, list_files)
I found attempt to amend Pool to be overcomplicated here.
The Pool class let workers to be alive untill the whole working queue is done and thus has complex mechanism to control it.
Instead, if you have not very stict requirenments on 10 second, you can use the following code:
from multiprocessing import Process
import time
pdict = {}
for fname in list_files:
p = Process(target = write_file, args = (fname,))
pdict[fname] = p
p.start()
while pdict:
to_del = []
time.sleep(10)
for pname in pdict:
if pdict[pname].exitcode == None or pdict[pname].is_alive():
pdict[pname].terminate() #killing old; that should also release file resource
pdict[pname] = Process(target = write_file, args = (pname,))
pdict[pname].start() #simply creating new and starting
else:
to_del.append(pname)
for pname in to_del:
del pdict[pname]
I have a very simple Python script using gevent.pool to download URLs (see below). The script runs fine for a couple of days and then locks up. I noticed that the memory usage is very high at that time. Am I using gevent incorrectly?
import sys
from gevent import monkey
monkey.patch_all()
import urllib2
from gevent.pool import Pool
inputFile = open(sys.argv[1], 'r')
urls = []
counter = 0
for line in inputFile:
counter += 1
urls.append(line.strip())
inputFile.close()
outputDirectory = sys.argv[2]
def fetch(url):
try:
body = urllib2.urlopen("http://" + url, None, 5).read()
if len(body) > 0:
outputFile = open(outputDirectory + "/" + url, 'w')
outputFile.write(body)
outputFile.close()
print "Success", url
except:
pass
pool = Pool(int(sys.argv[3]))
pool.map(fetch, urls)
body = urllib2.urlopen("http://" + url, None, 5).read()
Above line reads the entire content in memory as a string. To prevent that, change fetch() as follow:
def fetch(url):
try:
u = urllib2.urlopen("http://" + url, None, 5)
try:
with open(outputDirectory + "/" + url, 'w') as outputFile:
while True:
chunk = u.read(65536)
if not chunk:
break
outputFile.write(chunk)
finally:
u.close()
print "Success", url
except:
print "Fail", url
In script loop performs files downloading and saving (curl). But loop iterations too quick, so downloading and saving actions have no time to complete it's operations. Thereat result files comes broken
def get_images_thread(table):
class LoopThread ( threading.Thread ):
def run ( self ):
global db
c=db.cursor()
c.execute(""" SELECT * FROM js_stones ORDER BY stone_id LIMIT 1
""")
ec = EasyCurl(table)
while(1):
stone = c.fetchone()
if stone == None:
break
img_fname = stone[2]
print img_fname
url = "http://www.jstone.it/"+img_fname
fname = url.strip("/").split("/")[-1].strip()
ec.perform(url, filename="D:\\Var\\Python\\Jstone\\downloadeble_pictures\\"+fname,
progress=ec.textprogress)
This is an excerpt from the examples for the PycURL library,
# Make a queue with (url, filename) tuples
queue = Queue.Queue()
for url in urls:
url = url.strip()
if not url or url[0] == "#":
continue
filename = "doc_%03d.dat" % (len(queue.queue) + 1)
queue.put((url, filename))
# Check args
assert queue.queue, "no URLs given"
num_urls = len(queue.queue)
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
fp = open(filename, "wb")
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 300)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
import traceback
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
fp.close()
sys.stdout.write(".")
sys.stdout.flush()
# Start a bunch of threads
threads = []
for dummy in range(num_conn):
t = WorkerThread(queue)
t.start()
threads.append(t)
# Wait for all threads to finish
for thread in threads:
thread.join()
If you're asking what I think you're asking,
from time import sleep
sleep(1)
should "solve"(It's hacky to the max!) your problem. Docs here. I would check that that really is your problem, though. It seems catastrophically unlikely that pausing for a few seconds would stop files from downloading brokenly. Some more detail would be nice too.
os.waitpid()
might also help.