I want to write a python script that will read all output of other scripts and save it to a log file. This is my simple test that doesn't work.
#parent.py
def writer():
i = 10
while i:
print(i)
i -= 1
print('Start of parent process \n')
process = subprocess.Popen(['python3', 'reader.py'], stdin=sys.stdout)
print('Start of writer')
writer()
#reader.py
def reader():
count = 10
log = open('log', 'w')
while count:
print(f'count = {count}')
i = input()
print(i)
log.write(i)
count -= 1
log.close()
print('Second process started')
reader()
print('Second process finish')
I need to set the input of the child process, but not send over some channels. I already have few scripts, they use simple echo for printing, and my reader should log that.
Related
I know, I should use .join(). I am already using, but here is the thing: I make a round of threads (about 100) to perform some action, and after they complete, I start another 100 threads.
The context is that I am trying to check if x ports on my pc are open using threads. I start 100 threads and each check 100 different values and write their response into a txt file. The problem is that some of the ports are not being written to the file, while others are. When I run the code below, wanting to scan the ports from 3000 to 4000, I wanted my file to have 1000 lines, each specifying if the port is open or closed, but when I run it, it has, like, 930. Sometimes more, sometimes less, but never 1000 lines. Check below this code for another thing I tried.
def check_range_ports(ip_host, initial_port, final_port):
threads = []
count = initial_port
loop = 0
number_of_threads = 100
while count < final_port:
if count + number_of_threads > final_port:
number_of_threads = final_port - count + 1
for i in range(count, count + number_of_threads):
t = threading.Thread(target=check_port, args=(ip_host, i))
t.daemon = True
threads.append(t)
for i in range(number_of_threads):
threads[i].start()
for i in range(number_of_threads):
threads[i].join()
count += number_of_threads
loop += 1
threads = []
def check_port(ip_host, port):
try:
time.sleep(0.5)
my_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
my_socket.settimeout(5)
result = my_socket.connect_ex((ip_host, port))
with open("./ports.txt", "a+", encoding="utf-8") as f:
if result == 0:
f.write(f"Port {port} is open.\n")
else:
f.write(f"Port {port} is closed.\n")
my_socket.close()
except socket.timeout:
print("Timeout on socket!")
sys.exit()
except socket.gaierror:
print("Error on the host!")
sys.exit()
except KeyboardInterrupt:
print("Exiting program!")
sys.exit()
Here is another thing I tried. I created 10 threads, and each of these threads created 100 subthreads more, and each of these subthreads would write a line in a file. It works better than the previous, but I can't get 1000 lines exactly, which is what I am aiming.
What I'm thinking of doing is doable? If yes, how can I achieve it?
def start_threads(ip_host, initial_port, final_port):
threads = []
initial_port = 3000
final_port = 4000
number_of_ports_to_be_scanned = final_port - initial_port
ip_host = 'XXX.XXX.X.XX'
number_of_threads = 0
if number_of_ports_to_be_scanned / 100 != 0:
number_of_threads = int(number_of_ports_to_be_scanned / 100) + 1
else:
number_of_threads = number_of_ports_to_be_scanned / 100
count = 0
for i in range(number_of_threads):
# if initial_port + count > final_port:
# number_of_threads = final_port - number_of_ports_to_be_scanned + 1
t = threading.Thread(
target=check_testing_port,
args=(ip_host, initial_port + count, final_port)
)
# t.daemon = True
t.start()
threads.append(t)
count += 100
# for i in range(number_of_threads):
# threads[i].start()
for i in range(number_of_threads):
threads[i].join()
def check_testing_port(ip_host, port, final_port):
sub_threads = []
number_of_sub_threads = 100
print(port)
if port + 100 > final_port:
number_of_sub_threads = port - final_port
for i in range(port, port + number_of_sub_threads):
t = threading.Thread(target=check_port, args=(ip_host, i))
# t.daemon = True
t.start()
sub_threads.append(t)
# for i in range(number_of_sub_threads):
# sub_threads[i].start()
for i in range(number_of_sub_threads):
sub_threads[i].join()
def check_port(ip_host, port):
with open("./testing_ports.txt", "a", encoding="utf-8") as f:
f.write(f"Port {port}" + "\n")
In check_port you wrote
with open("ports.txt", "a+") as f:
f.write(...)
That is insane.
In the sense that, it is a critical section and you're not holding a lock.
Acquire a mutex before messing with the file.
Or write thread-specific files, which
subsequently are combined into a single file.
Better yet, tell all threads to write to
a single Queue, and have just one thread
read enqueued results and append them to a text file.
You need to properly synchronise the access to the shared buffer you are writing to (the output file) concurrently. Only one thread at a time must write to the output file, otherwise you'll get a data race leading to the data corruption you observed.
You can ensure that only one thread is writing to the shared file by using a mutex, a queue or any other suitable concurrency primitive. Here is an example using a queue:
import threading
import time
from queue import Queue
# Sentinel object to signal end of writing
__END = object()
def check_range_ports(ip_host: str):
threads: list[threading.Thread] = []
number_of_threads = 100
queue = Queue()
# Start the compute threads
for i in range(number_of_threads):
t = threading.Thread(target=check_port, args=(ip_host, i, queue))
t.start()
threads.append(t)
# Start the writer thread
tw = threading.Thread(target=writer, args=("output.txt", queue))
tw.start()
# Wait for all compute threads to finish
for i in range(number_of_threads):
threads[i].join()
# Signal to the writer loop to end
queue.put(__END)
# Wait for the writer thread to finish
tw.join()
def check_port(ip_host: str, port: int, queue: Queue):
time.sleep(0.5)
# Enqueue the result to teh synchronisation queue
queue.put((ip_host, port))
def writer(filename: str, queue: Queue):
with open(filename, "w") as f:
while True:
item = queue.get()
# End the write loop if there is no more item
# to process
if item is __END:
break
# This write operation is sequential and thread-safe
ip_host, port = item
f.write(f"Port {port} of host {ip_host} is open.\n")
def main():
check_range_ports("127.0.0.1")
if __name__ == "__main__":
main()
I have a filelist that contains a list of input files for a process called "Analysis".
I have written a python code that reads this filelist and starts this process "Analysis" for each line in the file.
Also checks if the count of processes is less than or equal to 3. If the count is less than 3, read the next line and start the next process. If count is already 3, then wait for any one of the process to end, untill the next one starts.
Basically, the idea is to run 3 processes at a time
This is my code.
#!/usr/bin/python
import os
import psutil
import getpass
from time import time, sleep
def app_call():
with open('file_list', 'r') as mf:
for line in mf:
print(line)
cmd = "analysis " + line.strip()
print(cmd)
os.system(cmd)
def start_3_process():
while True:
sleep(60 - time() % 60)
count = sum(1 for proc in psutil.process_iter() if proc.name() == 'analysis' and proc.username() == getpass.getuser())
print(count)
if( count < 3 ):
print("no of process is less than 3")
app_call()
else:
print("no of process is " + str(count))
start_3_process()
The problem here is when the process count falls below 3, this script again starts reading the file_list till line5 and then again starts reading from line 1 and again the process call starts from line 1 in the file to line 5.
The filelist has 5 lines like
input1
input2
input3
input4
input5
how can i make my code robust, like when the 5 lines are read from my file, the read should not be repeated.
this code runs on python 2.x
I think the best way to do this is put all the input in a Queue,ask threading to get them.this won't repeat to read.
# -*- coding: UTF-8 -*-
import time
import Queue
import threading
class myThread(threading.Thread):
def __init__(self, name, q):
threading.Thread.__init__(self)
self.name = name
self.q = q
def run(self):
while 1:
try:
things_to_be_done(self.name, self.q)
except Queue.Empty:
break
def things_to_be_done(name, q):
q_obj = q.get(timeout=1)
time.sleep(1)
print(name)
def main(link_list, time):
thread_list = [str(n) for n in range(time)]
length = len(link_list)
queue = Queue.Queue(length)
threads = []
for q_obj in link_list:
queue.put(q_obj)
for name in thread_list:
thread = myThread(name, queue)
thread.start()
threads.append(thread)
if __name__ == '__main__':
times = 3
link_list = [1, 2, 3, 4, 5, 6, 7, 8, 9]
main(link_list, times)
I wrote (copied and adapted) a python program to do some backups. It basically reads the files from a source location and copies or creates hard links in a destination. It is multi-threaded and that part works ok. The problem is that the loop that is called in the threads never ends. The relevant code is below. I read that it might be one of the threads raising an error and hanging but don't know how to check that. The program reads from a file queue, process the files, then is supposed to end. It reads and processes the files till the queue is empty and then just hangs. Any suggestions on what to try or to look at? hopefully i pasted enough code to help
print ("beginning backup")
backup(path_to_source, path_to_full, dest_path, backup_type)
print ("Backup finished")
# --- clean full backup
if backup_type == "Full":
print ("Cleaning up full backup - deleting deleted files")
clean_backup()
print ("Backup has been cleaned. Done")
#--------------------------------------------------
def backup(path_to_source, path_to_full, dest_path, backup_type):
threadWorkerCopyInc(source_files)
print ("All Done") #(never executes)
def threadWorkerCopyInc(fileNameList):
global num_threads
#fileQueue = queue.queue()
for i in range(num_threads):
t = threading.Thread(target=IncCopyWorker)
t.daemon = True
t.start()
#threads.append(t)
for fileName in fileNameList:
fileQueue.put(fileName)
fileQueue.join()
#for i in range(len(threads)):
# fileQueue.put('None')
#for t in threads:
# t.join()
print('done with threadworkercopyinc') #(never executes)
def IncCopyWorker():
print ('Starting IncCopyWorker. ') #executes
while True:
filename = fileQueue.get()
if filename == 'None':
print("\nFilename is none") #executes when fileQueue.qsize() = 0
with threadlock: processedfiles +=1
print ("Files to go: %d processed files: %d newfiles: %d hardlinks: %d not copied: %d dfsr: %d " %(fileQueue.qsize(), processedfiles, newfiles, hardlinks, notcopied, dfsr), end = '\r')
is_dfsr = filename.upper().find("DFSR")
if (is_dfsr == -1):
#do main processing here
else:
with threadlock: dfsr+=1
fileQueue.task_done()
print ("done with while true loop") #never Executes
So it looks like my problem was that i included continue statements in my try/except blocks to try and speed up execution. When there is a continue or break statement the loop never ended. When i took out the continue statements it finished the way it was supposed to. I don't know the reason -just the solution
I've been struggling with this program for some time.
I wrote this program to learn how to run the same program in different CPU's at the same time and to reduce processing time.
By itself, the program is not complex, just detect if there are some screenshots in different directories, but as I said before my final goal is not to detect those files, but to learn multiprocessing.
To be easier to test, instead of reading files or reading user inputs, I just set those variables so you won't have to be typing those things. I'll be faster to test this way.
Here's the code:
from multiprocessing import Array, Lock, Pool
def DetectSCREENSHOT(file, counter, total):
# To check if this function is ever accessed
print 'Entered in DetectSCREENSHOT function'
if file.startswith('Screenshot') == False:
print ' (%s %%) ERROR: the image is not a screenshot ' %(round((float(counter)/total)*100, 1))
return 0
else:
print ' (%s %%) SUCCESS: the image is a screenshot' %(round((float(counter)/total)*100, 1))
return 1
def DetectPNG(file_list, counter_success_errors, lock):
# To check if this function is ever accessed
print 'Entered in DetectPNG function'
for i in range(len(file_list)):
file = file_list[i]
# If the file is an .png image:
if file.endswith('.png'):
lock.acquire()
counter_success_errors[0] = counter_success_errors[0] + 1
lock.release()
if DetectSCREENSHOT(file, counter_success_errors[0], len(file_list)) == 1:
lock.acquire()
counter_success_errors[1] = counter_success_errors[1] + 1
lock.release()
else:
lock.acquire()
counter_success_errors[2] = counter_success_errors[2] + 1
lock.release()
def Main():
# file_list = # List of lists of files in different directories
file_list = [['A.png', 'B.png', 'C.txt', 'Screenshot_1.png'], ['D.txt', 'Screenshot_2.png', 'F.png', 'Screenshot_3.png']]
# Array where the first cell is a counter, the second one, the number of successes and the third one, the number of errors.
counter_success_errors = Array('i', 3)
lock = Lock()
counter_success_errors[0] = 0
counter_success_errors[1] = 0
counter_success_errors[2] = 0
# Number of CPUS's to be used (will set the number of different processes to be run)
# CPUs = raw_input('Number of CPUs >> ')
CPUs = 2
pool = Pool(processes=CPUs)
for i in range(CPUs):
pool.apply_async(DetectPNG, [file_list[i], counter_success_errors, lock])
pool.close()
pool.join()
success = int(counter_success_errors[1])
errors = int(counter_success_errors[2])
print(' %s %s %s successfully. %s %s.' %('Detected', success, 'screenshot' if success == 1 else 'screenshots', errors, 'error' if errors == 1 else 'errors'))
#################################################
if __name__ == "__main__":
Main()
When I execute it, I don't get any errors, but it looks like it doesn't even access to both DetectPNG and DetectSCREENSHOT functions.
What's broken in the code?
Output: Detected 0 screenshots successfully. 0 errors.
Let's say I have the code:
if __name__ == "__main__":
j = 0
while True:
j = j + 1
print j
time.sleep(1)
if I Ctrl+C or if I closed the program window in the middle of runtime, is there any way to persist the state of the process before it died? And then recover the process from the exact point that it died? If not in python is there any other language that has this functionality?
Crtl+C raises KeyboardInterrupt. You can put your code inside a try except block and handle the KeyboardInterrupt before exiting.
import sys
import time
if __name__ == "__main__":
j = 0
try:
while True:
j = j + 1
print(j)
time.sleep(1)
except KeyboardInterrupt:
with open('myprog.dat', 'r') as f:
f.write('j = {}'.format(j))
sys.exit("Data saved!")
You could also check if the data file exists, and read from the data file to initialize the value of j.