Threading with Queue and simultaneous print in Python - python

I'm trying to complete exercise about threading module. In my example, I just want to create workers which will just print filenames.
import optparse
import os
import queue
import threading
def main():
opts, args = parse_options()
filelist = get_files(args)
worker_queue= queue.Queue()
for i in range(opts.count):
threadnum = "{0}: ".format(i+1) if opts.debug else ""
worker = Worker(worker_queue, threadnum)
worker.daemon = True
worker.start()
for file in filelist:
worker_queue.put(file)
worker_queue.join()
class Worker(threading.Thread):
def __init__(self, worker_queue, threadnum):
super().__init__()
self.worker_queue = worker_queue
self.threadnum = threadnum
self.result = []
def run(self):
while True:
try:
file = self.worker_queue.get()
self.process(file)
finally:
self.worker_queue.task_done()
def process(self, file):
print("{0}{1}".format(self.threadnum, file))
def parse_options():
parser = optparse.OptionParser(
usage = "xmlsummary.py [options] [path] outputs a summary of the XML files in path; path defaults to .")
parser.add_option("-t", "--threads", dest = "count", default = 7,type = "int", help = ("the number of threads to use (1..20) [default %default]"))
parser.add_option("-v", "--verbose", default = False, action = "store_true", help = ("show verbose information if requested, [default %default]"))
parser.add_option("-d", "--debug", dest = "debug", default = False, action = "store_true", help = ("show debug information such as thread id, [default, %default]"))
opts, args = parser.parse_args()
if not (1 <= opts.count <= 20):
parser.error("threads must be in following range (1..20)")
return opts, args
def get_files(args):
filelist = []
for item in args:
if os.path.isfile(item):
filelist.append(item)
else:
for root, dirs , files in os.walk(item):
for file in files:
filelist.append(os.path.join(root, file))
return filelist
main()
This code returns me with -d option (which will include thread ID in output):
1: C:\P\1.jpg2: C:\P\2.jpg3: C:\P\3chapter.bat4: C:\P\423.txt5: C:\P\a.txt6: C:\P\bike.dat7: C:\P\binary1.dat
The first question is:
all threads print out in one line because each thread use one sys.stdout?
I have change print command with following:
def process(self, file):
print("{0}{1}\n".format(self.threadnum, file))
and now this I have following results:
1: C:\P\1.jpg
2: C:\P\2.jpg
3: C:\P\3chapter.bat
4: C:\P\423.txt
5: C:\P\a.txt
6: C:\P\bike.dat
7: C:\P\binary1.dat
1: C:\P\dckm.txt
2: C:\P\dlkcm.txt
3: C:\P\email.html
The second question is:
how to remove empty lines from the output?

You are on the right track with the sys.stdout. A simple solutin to both problems would be a function like this
def tprint(msg):
sys.stdout.write(str(msg) + '\n')
sys.stdout.flush()
and use it instead sys.stdout

Related

how to retrieve data from file on a diferent folder

I have a python script to scan internet with the following structure that I'm trying to adapt to read alist of IPs instead a range of IPs
/ip-ranges
range.txt
/script
loader.py
scanner.py
the scanner.py is the following
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
The loader.py is the following
# Import modules
from os import listdir
from sys import exit
# Select IP ranges from directory
def SelectIPRanges():
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i+1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
ranges = ranges_file.readlines() # Read all lines.
return ranges, file.split("/")[-1]
The changes that I have made on the scanner.py was on this part
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
the original scanner.py is
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
def IPsRange(start='', end=''):
if not start and not end:
return []
if not end and start.__contains__("-"):
start, end = start.split("-")
end = end.replace("\n","")
start = int(ip_address(start).packed.hex(), 16)
end = int(ip_address(end).packed.hex(), 16)
return [ip_address(ip).exploded for ip in range(start, end)]
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
At this moment I'm getting some hard time to link the loader.py and scanner.py. There are some changes that I know I need to do but can't figure out exactly what. I would requeste some guidance here
Thanks you all
So you load all possible ips with loader, and process them with scanner correct?
Right. I see. You suggest to import loader.py in scanner.py, but in this case I'd rather make a class that holds related functionality:
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
import ipaddress
from os import listdir
from sys import exit
# Scan result
class IP_Scanner():
def __init__(self, name='', atype='', path='', title=''):
self.name = name
self.type = atype
self.path = path
self.title = title
self.processed_range = []
# Return IPs in IPv4 range, inclusive.
def process(self, ip):
try:
line = "".join(ip.split())
if ipaddress.ip_interface(line).network:
self.processed_range.append(line)
except Exception:
print("INVALID: "+line)
return 0
def set_range(self):
# Select IP ranges from directory
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i + 1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
self.range = ranges_file.readlines() # Read all lines.
return self.range, file.split("/")[-1]
# Scan IP address range
def scan_range(self):
if self.range:
threads = []
# *-- Scan IP range --*
for address in self.range:
t = Thread(
target=process, #__СheckAddrThreaded, #I'm assuming this is what your process is doing
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
if __name__ == '__main__':
x = IP_Scanner()
x.set_range()
x.scan_range()
print('Success on: '+x.processed_range)
Many other ways to do this, but this encapsulates the related info into an object so you can run many instances of it in parallel.
For imports from a different folder with init.py (or package), see: https://realpython.com/absolute-vs-relative-python-imports/

Python subprocess with real-time input and multiple consoles

The main issue
In a nutshell: I want two consoles for my programm. One for active user input. And the other one for pure log output. (Working code including the accepted answer is in the question's text below, under section "Edit-3". And under section "Edit-1" and section "Edit-2" are functioning workarounds.)
For this I have a main command line Python script, which is supposed to open an additional console for log output only. For this I intend to redirect the log output, which would be printed on the main script's console, to the stdin of the second console, which I start as a subprocess. (I use subprocess, because I didn't find any other way to open a second console.)
The problem is, that it seems that I'm able to send to the stdin of this second console - however, nothing gets printed on this second console.
Following is the code I used for experimenting (with Python 3.4 on PyDev under Windows 10). The function writing(input, pipe, process) contains the part, where the generated string is copied to the as pipe passed stdin, of the via subprocess opened console. The function writing(...) is run via the class writetest(Thread). (I left some code, which I commented out.)
import os
import sys
import io
import time
import threading
from cmd import Cmd
from queue import Queue
from subprocess import Popen, PIPE, CREATE_NEW_CONSOLE
REPETITIONS = 3
# Position of "The class" (Edit-2)
# Position of "The class" (Edit-1)
class generatetest(threading.Thread):
def __init__(self, queue):
self.output = queue
threading.Thread.__init__(self)
def run(self):
print('run generatetest')
generating(REPETITIONS, self.output)
print('generatetest done')
def getout(self):
return self.output
class writetest(threading.Thread):
def __init__(self, input=None, pipe=None, process=None):
if (input == None): # just in case
self.input = Queue()
else:
self.input = input
if (pipe == None): # just in case
self.pipe = PIPE
else:
self.pipe = pipe
if (process == None): # just in case
self.process = subprocess.Popen('C:\Windows\System32\cmd.exe', universal_newlines=True, creationflags=CREATE_NEW_CONSOLE)
else:
self.process = proc
threading.Thread.__init__(self)
def run(self):
print('run writetest')
writing(self.input, self.pipe, self.process)
print('writetest done')
# Position of "The function" (Edit-2)
# Position of "The function" (Edit-1)
def generating(maxint, outline):
print('def generating')
for i in range(maxint):
time.sleep(1)
outline.put_nowait(i)
def writing(input, pipe, process):
print('def writing')
while(True):
try:
print('try')
string = str(input.get(True, REPETITIONS)) + "\n"
pipe = io.StringIO(string)
pipe.flush()
time.sleep(1)
# print(pipe.readline())
except:
print('except')
break
finally:
print('finally')
pass
data_queue = Queue()
data_pipe = sys.stdin
# printer = sys.stdout
# data_pipe = os.pipe()[1]
# The code of 'C:\\Users\\Public\\Documents\\test\\test-cmd.py'
# can be found in the question's text further below under "More code"
exe = 'C:\Python34\python.exe'
# exe = 'C:\Windows\System32\cmd.exe'
arg = 'C:\\Users\\Public\\Documents\\test\\test-cmd.py'
arguments = [exe, arg]
# proc = Popen(arguments, universal_newlines=True, creationflags=CREATE_NEW_CONSOLE)
proc = Popen(arguments, stdin=data_pipe, stdout=PIPE, stderr=PIPE,
universal_newlines=True, creationflags=CREATE_NEW_CONSOLE)
# Position of "The call" (Edit-2 & Edit-1) - file init (proxyfile)
# Position of "The call" (Edit-2) - thread = sockettest()
# Position of "The call" (Edit-1) - thread0 = logtest()
thread1 = generatetest(data_queue)
thread2 = writetest(data_queue, data_pipe, proc)
# time.sleep(5)
# Position of "The call" (Edit-2) - thread.start()
# Position of "The call" (Edit-1) - thread0.start()
thread1.start()
thread2.start()
# Position of "The call" (Edit-2) - thread.join()
# Position of "The call" (Edit-1) - thread.join()
thread1.join(REPETITIONS * REPETITIONS)
thread2.join(REPETITIONS * REPETITIONS)
# data_queue.join()
# receiver = proc.communicate(stdin, 5)
# print('OUT:' + receiver[0])
# print('ERR:' + receiver[1])
print("1st part finished")
A slightly different approach
The following additional code snippet works in regard to extracting the stdout from the subprocess. However, the previously sent stdin still isn't print on the second console. Also, the second console is closed immediately.
proc2 = Popen(['C:\Python34\python.exe', '-i'],
stdin=PIPE,
stdout=PIPE,
stderr=PIPE,
creationflags=CREATE_NEW_CONSOLE)
proc2.stdin.write(b'2+2\n')
proc2.stdin.flush()
print(proc2.stdout.readline())
proc2.stdin.write(b'len("foobar")\n')
proc2.stdin.flush()
print(proc2.stdout.readline())
time.sleep(1)
proc2.stdin.close()
proc2.terminate()
proc2.wait(timeout=0.2)
print("Exiting Main Thread")
More info
As soon as I use one of the paramaters stdin=data_pipe, stdout=PIPE, stderr=PIPE for starting the subprocess, the resulting second console isn't active and doesn't accept keyboard input (which isn't desired, though might be helpful information here).
The subprocess method communicate() can't be used for this as it waits for the process to end.
More code
Finally the code for the file, which is for the second console.
C:\Users\Public\Documents\test\test-cmd.py
from cmd import Cmd
from time import sleep
from datetime import datetime
INTRO = 'command line'
PROMPT = '> '
class CommandLine(Cmd):
"""Custom console"""
def __init__(self, intro=INTRO, prompt=PROMPT):
Cmd.__init__(self)
self.intro = intro
self.prompt = prompt
self.doc_header = intro
self.running = False
def do_dummy(self, args):
"""Runs a dummy method."""
print("Do the dummy.")
self.running = True
while(self.running == True):
print(datetime.now())
sleep(5)
def do_stop(self, args):
"""Stops the dummy method."""
print("Stop the dummy, if you can.")
self.running = False
def do_exit(self, args):
"""Exits this console."""
print("Do console exit.")
exit()
if __name__ == '__main__':
cl = CommandLine()
cl.prompt = PROMPT
cl.cmdloop(INTRO)
Thoughts
So far I'm even not certain if the Windows command line interface offers the capability to accept other input than the one from the keyboard (instead of the desired stdin pipe or similar). Though, with it having some sort of passive mode, I expect it.
Why is this not working?
Edit-1: Workaround via file (proof of concept)
Using a file as workaround in order display it's new content, as suggested in the answer of Working multiple consoles in python, is working in general. However, since the log file will grow up to many GB, it isn't a practical solution in this case. It would at least require file splitting and the proper handling of it.
The class:
class logtest(threading.Thread):
def __init__(self, file):
self.file = file
threading.Thread.__init__(self)
def run(self):
print('run logtest')
logging(self.file)
print('logtest done')
The function:
def logging(file):
pexe = 'C:\Python34\python.exe '
script = 'C:\\Users\\Public\\Documents\\test\\test-004.py'
filek = '--file'
filev = file
file = open(file, 'a')
file.close()
time.sleep(1)
print('LOG START (outer): ' + script + ' ' + filek + ' ' + filev)
proc = Popen([pexe, script, filek, filev], universal_newlines=True, creationflags=CREATE_NEW_CONSOLE)
print('LOG FINISH (outer): ' + script + ' ' + filek + ' ' + filev)
time.sleep(2)
The call:
# The file tempdata is filled with several strings of "0\n1\n2\n"
# Looking like this:
# 0
# 1
# 2
# 0
# 1
# 2
proxyfile = 'C:\\Users\\Public\\Documents\\test\\tempdata'
f = open(proxyfile, 'a')
f.close()
time.sleep(1)
thread0 = logtest(proxyfile)
thread0.start()
thread0.join(REPETITIONS * REPETITIONS)
The tail script ("test-004.py"):
As Windows doesn't offer the tail command, I used the following script instead (base on the answer for How to implement a pythonic equivalent of tail -F?), which worked for this. The additional, yet kind of unnecessary class CommandLine(Cmd) was initially an attempt to keep the second console open (because the script file argument was missing). Though, it also proved itself as useful for keeping the console fluently printing the new log file content. Otherwise the output wasn't deterministic/predictable.
import time
import sys
import os
import threading
from cmd import Cmd
from argparse import ArgumentParser
def main(args):
parser = ArgumentParser(description="Parse arguments.")
parser.add_argument("-f", "--file", type=str, default='', required=False)
arguments = parser.parse_args(args)
if not arguments.file:
print('LOG PRE-START (inner): file argument not found. Creating new default entry.')
arguments.file = 'C:\\Users\\Public\\Documents\\test\\tempdata'
print('LOG START (inner): ' + os.path.abspath(os.path.dirname(__file__)) + ' ' + arguments.file)
f = open(arguments.file, 'a')
f.close()
time.sleep(1)
words = ['word']
console = CommandLine(arguments.file, words)
console.prompt = ''
thread = threading.Thread(target=console.cmdloop, args=('', ))
thread.start()
print("\n")
for hit_word, hit_sentence in console.watch():
print("Found %r in line: %r" % (hit_word, hit_sentence))
print('LOG FINISH (inner): ' + os.path.abspath(os.path.dirname(__file__)) + ' ' + arguments.file)
class CommandLine(Cmd):
"""Custom console"""
def __init__(self, fn, words):
Cmd.__init__(self)
self.fn = fn
self.words = words
def watch(self):
fp = open(self.fn, 'r')
while True:
time.sleep(0.05)
new = fp.readline()
print(new)
# Once all lines are read this just returns ''
# until the file changes and a new line appears
if new:
for word in self.words:
if word in new:
yield (word, new)
else:
time.sleep(0.5)
if __name__ == '__main__':
print('LOG START (inner - as main).')
main(sys.argv[1:])
Edit-1: More thoughts
Three workarounds, which I didn't try yet and might work are sockets (also suggested in this answer Working multiple consoles in python), getting a process object via the process ID for more control, and using the ctypes library for directly accessing the Windows console API, allowing to set the screen buffer, as the console can have multiple buffers, but only one active buffer (stated in the remarks of the documentation for the CreateConsoleScreenBuffer function).
However, using sockets might be the easiest one. And at least the size of the log doesn't matter this way. Though, connection problems might be a problem here.
Edit-2: Workaround via sockets (proof of concept)
Using sockets as workaround in order display new log enties, as it also was suggested in the answer of Working multiple consoles in python, is working in general, too. Though, this seems to be too much effort for something, which should be simply sent to the process of the receiving console.
The class:
class sockettest(threading.Thread):
def __init__(self, host, port, file):
self.host = host
self.port = port
self.file = file
threading.Thread.__init__(self)
def run(self):
print('run sockettest')
socketing(self.host, self.port, self.file)
print('sockettest done')
The function:
def socketing(host, port, file):
pexe = 'C:\Python34\python.exe '
script = 'C:\\Users\\Public\\Documents\\test\test-005.py'
hostk = '--address'
hostv = str(host)
portk = '--port'
portv = str(port)
filek = '--file'
filev = file
file = open(file, 'a')
file.close()
time.sleep(1)
print('HOST START (outer): ' + pexe + script + ' ' + hostk + ' ' + hostv + ' ' + portk + ' ' + portv + ' ' + filek + ' ' + filev)
proc = Popen([pexe, script, hostk, hostv, portk, portv, filek, filev], universal_newlines=True, creationflags=CREATE_NEW_CONSOLE)
print('HOST FINISH (outer): ' + pexe + script + ' ' + hostk + ' ' + hostv + ' ' + portk + ' ' + portv + ' ' + filek + ' ' + filev)
time.sleep(2)
The call:
# The file tempdata is filled with several strings of "0\n1\n2\n"
# Looking like this:
# 0
# 1
# 2
# 0
# 1
# 2
proxyfile = 'C:\\Users\\Public\\Documents\\test\\tempdata'
f = open(proxyfile, 'a')
f.close()
time.sleep(1)
thread = sockettest('127.0.0.1', 8888, proxyfile)
thread.start()
thread.join(REPETITIONS * REPETITIONS)
The socket script ("test-005.py"):
The following script is based on Python: Socket programming server-client application using threads. Here I just keept the class CommandLine(Cmd) as log entry generator. At this point it should't be a problem, to put client into the main script, which calls the second console and then feed the queue with real log enties instead of (new) file lines. (The server is the printer.)
import socket
import sys
import threading
import time
from cmd import Cmd
from argparse import ArgumentParser
from queue import Queue
BUFFER_SIZE = 5120
class CommandLine(Cmd):
"""Custom console"""
def __init__(self, fn, words, queue):
Cmd.__init__(self)
self.fn = fn
self.words = words
self.queue = queue
def watch(self):
fp = open(self.fn, 'r')
while True:
time.sleep(0.05)
new = fp.readline()
# Once all lines are read this just returns ''
# until the file changes and a new line appears
self.queue.put_nowait(new)
def main(args):
parser = ArgumentParser(description="Parse arguments.")
parser.add_argument("-a", "--address", type=str, default='127.0.0.1', required=False)
parser.add_argument("-p", "--port", type=str, default='8888', required=False)
parser.add_argument("-f", "--file", type=str, default='', required=False)
arguments = parser.parse_args(args)
if not arguments.address:
print('HOST PRE-START (inner): host argument not found. Creating new default entry.')
arguments.host = '127.0.0.1'
if not arguments.port:
print('HOST PRE-START (inner): port argument not found. Creating new default entry.')
arguments.port = '8888'
if not arguments.file:
print('HOST PRE-START (inner): file argument not found. Creating new default entry.')
arguments.file = 'C:\\Users\\Public\\Documents\\test\\tempdata'
file_queue = Queue()
print('HOST START (inner): ' + ' ' + arguments.address + ':' + arguments.port + ' --file ' + arguments.file)
# Start server
thread = threading.Thread(target=start_server, args=(arguments.address, arguments.port, ))
thread.start()
time.sleep(1)
# Start client
thread = threading.Thread(target=start_client, args=(arguments.address, arguments.port, file_queue, ))
thread.start()
# Start file reader
f = open(arguments.file, 'a')
f.close()
time.sleep(1)
words = ['word']
console = CommandLine(arguments.file, words, file_queue)
console.prompt = ''
thread = threading.Thread(target=console.cmdloop, args=('', ))
thread.start()
print("\n")
for hit_word, hit_sentence in console.watch():
print("Found %r in line: %r" % (hit_word, hit_sentence))
print('HOST FINISH (inner): ' + ' ' + arguments.address + ':' + arguments.port)
def start_client(host, port, queue):
host = host
port = int(port) # arbitrary non-privileged port
queue = queue
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
soc.connect((host, port))
except:
print("Client connection error" + str(sys.exc_info()))
sys.exit()
print("Enter 'quit' to exit")
message = ""
while message != 'quit':
time.sleep(0.05)
if(message != ""):
soc.sendall(message.encode("utf8"))
if soc.recv(BUFFER_SIZE).decode("utf8") == "-":
pass # null operation
string = ""
if (not queue.empty()):
string = str(queue.get_nowait()) + "\n"
if(string == None or string == ""):
message = ""
else:
message = string
soc.send(b'--quit--')
def start_server(host, port):
host = host
port = int(port) # arbitrary non-privileged port
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# SO_REUSEADDR flag tells the kernel to reuse a local socket in TIME_WAIT state, without waiting for its natural timeout to expire
soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
print("Socket created")
try:
soc.bind((host, port))
except:
print("Bind failed. Error : " + str(sys.exc_info()))
sys.exit()
soc.listen(5) # queue up to 5 requests
print("Socket now listening")
# infinite loop- do not reset for every requests
while True:
connection, address = soc.accept()
ip, port = str(address[0]), str(address[1])
print("Connected with " + ip + ":" + port)
try:
threading.Thread(target=client_thread, args=(connection, ip, port)).start()
except:
print("Thread did not start.")
traceback.print_exc()
soc.close()
def client_thread(connection, ip, port, max_buffer_size=BUFFER_SIZE):
is_active = True
while is_active:
client_input = receive_input(connection, max_buffer_size)
if "--QUIT--" in client_input:
print("Client is requesting to quit")
connection.close()
print("Connection " + ip + ":" + port + " closed")
is_active = False
elif not client_input == "":
print("{}".format(client_input))
connection.sendall("-".encode("utf8"))
else:
connection.sendall("-".encode("utf8"))
def receive_input(connection, max_buffer_size):
client_input = connection.recv(max_buffer_size)
client_input_size = sys.getsizeof(client_input)
if client_input_size > max_buffer_size:
print("The input size is greater than expected {}".format(client_input_size))
decoded_input = client_input.decode("utf8").rstrip() # decode and strip end of line
result = process_input(decoded_input)
return result
def process_input(input_str):
return str(input_str).upper()
if __name__ == '__main__':
print('HOST START (inner - as main).')
main(sys.argv[1:])
Edit-2: Furthermore thoughts
Having direct control of the subprocess' console input pipe/buffer would be the preferable solution to this problem. For this is the bounty of 500 Reputation.
Unfortunately I'm running out of time. Therefore I might use one of those workarounds for now and replace them with the proper solution later. Or maybe I have to use the nuclear option, just one console, where the ongoing log output is paused during any user keyboard input, and printed afterwards. Of course this might lead to buffer problems, when the user decides to type something just half the way.
Edit-3: Code including the accepted answer (one file)
With the answer from James Kent I get the desired behavior, when I start a script with the code via the Windows command line (cmd) or PowerShell. However, when I start this same script via Eclipse/PyDev with "Python run", then the output is always printed on the main Eclipse/PyDev console, while the second console of the subprocess remains empty and stays inactive. Though, I guess this is another system/environment speciality and a different issue.
from sys import argv, stdin, stdout
from threading import Thread
from cmd import Cmd
from time import sleep
from datetime import datetime
from subprocess import Popen, PIPE, CREATE_NEW_CONSOLE
INTRO = 'command line'
PROMPT = '> '
class CommandLine(Cmd):
"""Custom console"""
def __init__(self, subprocess, intro=INTRO, prompt=PROMPT):
Cmd.__init__(self)
self.subprocess = subprocess
self.intro = intro
self.prompt = prompt
self.doc_header = intro
self.running = False
def do_date(self, args):
"""Prints the current date and time."""
print(datetime.now())
sleep(1)
def do_exit(self, args):
"""Exits this command line application."""
print("Exit by user command.")
if self.subprocess is not None:
try:
self.subprocess.terminate()
except:
self.subprocess.kill()
exit()
class Console():
def __init__(self):
if '-r' not in argv:
self.p = Popen(
['python.exe', __file__, '-r'],
stdin=PIPE,
creationflags=CREATE_NEW_CONSOLE
)
else:
while True:
data = stdin.read(1)
if not data:
# break
sleep(1)
continue
stdout.write(data)
def write(self, data):
self.p.stdin.write(data.encode('utf8'))
self.p.stdin.flush()
def getSubprocess(self):
if self.p:
return self.p
else:
return None
class Feeder (Thread):
def __init__(self, console):
self.console = console
Thread.__init__(self)
def run(self):
feeding(self.console)
def feeding(console):
for i in range(0, 100):
console.write('test %i\n' % i)
sleep(1)
if __name__ == '__main__':
p = Console()
if '-r' not in argv:
thread = Feeder(p)
thread.setDaemon(True)
thread.start()
cl = CommandLine(subprocess=p.getSubprocess())
cl.use_rawinput = False
cl.prompt = PROMPT
cl.cmdloop('\nCommand line is waiting for user input (e.g. help).')
Edit-3: Honorable mentions
In the questions's text above I have mentioned using the ctypes library for directly accessing the Windows console API as another workround (under "Edit-1: More thoughts"). Or using just one console in a way, that the input prompt always stays at the bottom as nuclear option to this entire problem. (under "Edit-2: Furthermore thoughts")
For using the ctypes library I would have oriented myself on the following answer to Change console font in Windows. And for using just one console I would have tried the following answer to Keep console input line below output. I think both of these answers may offer potential merrit regarding this problem and maybe they are helpful to others how come accross this post. Also, I if i find the time, I will try if they work somehow.
The issue you're up against is the architecture of the console subsystem on Windows, the console window that you normally see is not hosted by cmd.exe but instead by conhost.exe, a child process of a conhost window can only connect to a single conhost instance meaning you're limited to a single window per process.
This then leads on to having an extra process for each console window you wish to have, then in order to look at displaying anything in that window you need to look at how stdin and stdout are normally handled, in that they are written and read from by the conhost instance, except if you turn stdin into a pipe (so you can write to the process) it no longer comes from conhost but instead from your parent process and as such conhost has no visibility of it. This means that anything written to stdin is only read by the child process so is not displayed by conhost.
As far as I know there isn't a way to share the pipe like that.
As a side effect if you make stdin a pipe then all keyboard input sent to the new console window goes nowhere, as stdin is not connected to that window.
For an output only function this means you can spawn a new process that communicates with the parent via a pipe to stdin and echos everything to stdout.
Heres an attempt:
#!python3
import sys, subprocess, time
class Console():
def __init__(self):
if '-r' not in sys.argv:
self.p = subprocess.Popen(
['python.exe', __file__, '-r'],
stdin=subprocess.PIPE,
creationflags=subprocess.CREATE_NEW_CONSOLE
)
else:
while True:
data = sys.stdin.read(1)
if not data:
break
sys.stdout.write(data)
def write(self, data):
self.p.stdin.write(data.encode('utf8'))
self.p.stdin.flush()
if (__name__ == '__main__'):
p = Console()
if '-r' not in sys.argv:
for i in range(0, 100):
p.write('test %i\n' % i)
time.sleep(1)
So a nice simple pipe between two processes and echoing the input back to the output if its the subprocess, I used a -r to signify whether the instance is a process but there are other ways depending on how you implement it.
Several things to note:
the flush after writing to stdin is needed as python normally uses buffering.
the way this approach is written is aimed at being in its own module hence the use of __file__
due to the use of __file__ this approach may need modification if frozen using cx_Freeze or similar.
EDIT 1
for a version that can be frozen with cx_Freeze:
Console.py
import sys, subprocess
class Console():
def __init__(self, ischild=True):
if not ischild:
if hasattr(sys, 'frozen'):
args = ['Console.exe']
else:
args = [sys.executable, __file__]
self.p = subprocess.Popen(
args,
stdin=subprocess.PIPE,
creationflags=subprocess.CREATE_NEW_CONSOLE
)
else:
while True:
data = sys.stdin.read(1)
if not data:
break
sys.stdout.write(data)
def write(self, data):
self.p.stdin.write(data.encode('utf8'))
self.p.stdin.flush()
if (__name__ == '__main__'):
p = Console()
test.py
from Console import Console
import sys, time
if (__name__ == '__main__'):
p = Console(False)
for i in range(0, 100):
p.write('test %i\n' % i)
time.sleep(1)
setup.py
from cx_Freeze import setup, Executable
setup(
name = 'Console-test',
executables = [
Executable(
'Console.py',
base=None,
),
Executable(
'test.py',
base=None,
)
]
)
EDIT 2
New version that should work under dev tools like IDLE
Console.py
#!python3
import ctypes, sys, subprocess
Kernel32 = ctypes.windll.Kernel32
class Console():
def __init__(self, ischild=True):
if ischild:
# try allocate new console
result = Kernel32.AllocConsole()
if result > 0:
# if we succeed open handle to the console output
sys.stdout = open('CONOUT$', mode='w')
else:
# if frozen we assume its names Console.exe
# note that when frozen 'Win32GUI' must be used as a base
if hasattr(sys, 'frozen'):
args = ['Console.exe']
else:
# otherwise we use the console free version of python
args = ['pythonw.exe', __file__]
self.p = subprocess.Popen(
args,
stdin=subprocess.PIPE
)
return
while True:
data = sys.stdin.read(1)
if not data:
break
sys.stdout.write(data)
def write(self, data):
self.p.stdin.write(data.encode('utf8'))
self.p.stdin.flush()
if (__name__ == '__main__'):
p = Console()
test.py
from Console import Console
import sys, time
if (__name__ == '__main__'):
p = Console(False)
for i in range(0, 100):
p.write('test %i\n' % i)
time.sleep(1)
setup.py
from cx_Freeze import setup, Executable
setup(
name = 'Console-test',
executables = [
Executable(
'Console.py',
base='Win32GUI',
),
Executable(
'test.py',
base=None,
)
]
)
This could be made more robust, i.e. always checking for an existing console and detaching it if found before creating a new console, and possibly better error handling.
Since you are on windows you can use win32console module to open a second console or multiple consoles for your thread or subprocess output. This is the most simple and easiest way that works if you are on windows.
Here is a sample code:
import win32console
import multiprocessing
def subprocess(queue):
win32console.FreeConsole() #Frees subprocess from using main console
win32console.AllocConsole() #Creates new console and all input and output of subprocess goes to this new console
while True:
print(queue.get())
#prints any output produced by main script passed to subprocess using queue
if __name__ == "__main__":
queue = multiprocessing.Queue()
multiprocessing.Process(target=subprocess, args=[queue]).start()
while True:
print("Hello World in main console")
queue.put("Hello work in sub process console")
#sends above string to subprocess and it prints it into its console
#and whatever else you want to do in ur main process
You can also do this with threading. You have to use queue module if you want the queue functionality as threading module doesn't have queue
Here is the win32console module documentation

Python Arguments, Required 1 Argument

I am working on a zip file password cracker, and I need a brute-force option. I have the basic code for it written out, but I am not sure of how to use an argument to make it work. How can I write an argument to make this work?
import optparse
import zipfile
import argparse
from threading import Thread
parser = argparse.ArgumentParser()
parser.add_argument("-b", "--brute")
def extract_zip(zFile, password):
try:
password_encoded = bytes(password.encode('utf-8'))
zFile.setpassword(password_encoded)
zFile.testzip()
print ("[+] Password Found: " + password + '\n')
except:
pass
def Main():
parser = optparse.OptionParser("useage &prog "+\
"-f <zipfile> -d <dictionary> / -b <brute force>")
parser.add_option('-f', dest='zname', type='string',\
help='specify zip file')
parser.add_option('-d', dest='dname', type='string',\
help='specify dictionary file')
parser.add_option('-b', dest='bname', type='string',\
help='specify brute force')
(options, arg) = parser.parse_args()
if (options.zname == None) | (options.dname == None) | (options.bname == None):
print (parser.usage)
exit(0)
else:
zname = options.zname
dname = options.dname
bname = options.bname
zFile = zipfile.ZipFile(zname)
passFile = open(dname)
brute = open(bname)
for line in passFile.readlines():
password = line.strip('\n')
t = Thread(target=extract_zip, args=(zFile, password))
t.start()
if args.brute:
characters = '1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM'
for length in range(1, int(args.brute)):
to_attempt = product(characters, repeat=length)
t = Thread(target=extract_zip, args=(zFile, password))
t.start()
if __name__ == '__main__':
Main()

strace a python function

Is it possible to strace a python function for opened files, and differentiate if they were opened by python or a subprocess?
read_python, read_external = [], []
#strace_read(read_python, read_external)
function test():
file = open("foo.txt", "r")
subprocess.call(["cat", "bar.txt"])
for file in read_python:
print("python: ", file)
for file in read_external:
print("external: ", file)
So the output is as:
>>> python: foo.txt
>>> external: bar.txt
I'm most interested in using a decorator. Differentiating isn't a priority.
Conceptually, my best guess is to replace instances of load_function(open) with wrappers ... actually, I have no idea, there are too many ways to access open.
I'd solve it in a much simpler way but with similar result. Instead of figuring out how to enable strace on a single function:
Create decorator like this: (untested)
-
def strace_mark(f):
def wrapper(*args, **kwargs):
try:
open('function-%s-start' % f.__name__, 'r')
except:
pass
ret = f(*args, **kwargs)
try:
open('function-%s-end' % f.__name__, 'r')
except:
pass
return ret
Run the whole app under strace -e file.
Get only the parts between calls open(function-something-start) and open(function-something-end).
If you do strace -f, you get the python/external separation for free. Just look at what pid calls the function.
This is the solution I used:
#!/usr/bin/env python3
import multiprocessing
import selectors
import os
import array
import fcntl
import termios
import subprocess
import decorator
import locale
import io
import codecs
import re
import collections
def strace(function):
StraceReturn = collections.namedtuple("StraceReturn", ["return_data", "pid", "strace_data"])
def strace_filter(stracefile, pid, exclude_system=False):
system = ( "/bin"
, "/boot"
, "/dev"
, "/etc"
, "/lib"
, "/proc"
, "/root"
, "/run"
, "/sbin"
, "/srv"
, "/sys"
, "/tmp"
, "/usr"
, "/var"
)
encoding = locale.getpreferredencoding(False)
for line in stracefile:
match = re.search(r'^(?:\[pid\s+(\d+)\]\s+)?open\(\"((?:\\x[0-9a-f]{2})+)\",', line, re.IGNORECASE)
if match:
p, f = match.groups(pid)
f = codecs.escape_decode(f.encode("ascii"))[0].decode(encoding)
if exclude_system and f.startswith(system):
continue
yield (p, f)
def strace_reader(conn_parent, conn_child, barrier, pid):
conn_parent.close()
encoding = locale.getpreferredencoding(False)
strace_args = ["strace", "-e", "open", "-f", "-s", "512", "-xx", "-p", str(pid)]
process_data = io.StringIO()
process = subprocess.Popen\
( strace_args
, stdout = subprocess.DEVNULL
, stderr = subprocess.PIPE
, universal_newlines = True
)
selector = selectors.DefaultSelector()
selector.register(process.stderr, selectors.EVENT_READ)
selector.select()
barrier.wait()
selector.register(conn_child, selectors.EVENT_READ)
while len(selector.get_map()):
events = selector.select()
for key, mask in events:
if key.fd == conn_child.fileno():
conn_child.recv()
selector.unregister(key.fd)
process.terminate()
try:
process.wait(5)
except TimeoutError:
process.kill()
process.wait()
else:
ioctl_buffer = array.array("i", [0])
try:
fcntl.ioctl(key.fd, termios.FIONREAD, ioctl_buffer)
except OSError:
read_bytes = 1024
else:
read_bytes = max(1024, ioctl_buffer[0])
data = os.read(key.fd, read_bytes)
if data:
# store all data, simpler but not as memory-efficient
# as:
# result, leftover_line = strace_filter\
# ( leftover_line + data.decode(encoding)
# , pid
# )
# process_data.append(result)
# with, after this loop, a final:
# result = strace_filter(leftover_line + "\n", pid)
# process_data.append(result)
process_data.write(data.decode(encoding))
else:
selector.unregister(key.fd)
selector.close()
process_data.seek(0, io.SEEK_SET)
for pidfile in strace_filter(process_data, pid):
conn_child.send(pidfile)
conn_child.close()
def strace_wrapper(function, *args, **kw):
strace_data = list()
barrier = multiprocessing.Barrier(2)
conn_parent, conn_child = multiprocessing.Pipe(duplex = True)
process = multiprocessing.Process\
( target=strace_reader
, args=(conn_parent, conn_child, barrier, os.getpid())
)
process.start()
conn_child.close()
barrier.wait()
function_return = function()
conn_parent.send(None)
while True:
try:
strace_data.append(conn_parent.recv())
except EOFError:
break
process.join(5)
if process.is_alive():
process.terminate()
process.join(5)
if process.is_alive():
os.kill(process.pid, signal.SIGKILL)
process.join()
conn_parent.close()
return StraceReturn(function_return, os.getpid(), strace_data)
return decorator.decorator(strace_wrapper, function)
#strace
def test():
print("Entering test()")
process = subprocess.Popen("cat +μυρτιὲς.txt", shell=True)
f = open("test\"test", "r")
f.close()
process.wait()
print("Exiting test()")
return 5
print(test())
Note that any information strace generates after the termination event will be collected. To avoid that, use a while not signaled loop, and terminate the subprocess after the loop (the FIONREAD ioctl is a holdover from this case; I didn't see any reason to remove it).
In hindsight, the decorator could have been greatly simplified had I used a temporary file, rather than multiprocessing/pipe.
A child process is forked to then fork strace - in other words, strace is tracing its grandparent. Some linux distributions only allow strace to trace its children. I'm not sure how to work around this restriction - having the main program continue executing in the child fork (while the parent execs strace) is probably a bad idea - the program will trade PIDs like a hot potato if the decorated functions are used too often.

multiprocessing - execute external command and wait before proceeding

I am using Linux. I have an external executable called "combine" and a loop of 20 iterations.
Per each iteration, "combine" needs to be called with an argument that depends on the i-th iteration. Example:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
How do I achieve this using the multiprocessing module in Python?
You can use subprocess.Popen to launch the external commands asynchronously, and store each Popen object returned in a list. Once you've launched all the processes, just iterate over them and wait for each to finish using popen_object.wait.
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
However, this will launch twenty concurrent processes, which is probably going to hurt performance.
To avoid that, you can use a ThreadPool to limit yourself to some lower number of concurrent processes (multiprocessing.cpu_count is a good number), and then use pool.join to wait for them all to finish.
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
Each thread will release the GIL while waiting for the subprocess to complete, so they'll all run in parallel.
My solution to this problem is to create and manage a list of subprocesses. Pay special attention to startencoder and manageprocs. That is where the actual work is being started and managed.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)

Categories