I'm trying to parse an HTTP request line (e.g. GET / HTTP/1.1\r\n), which is easy with socket.makefile().readline() (BaseHTTPRequestHandler uses it), like:
print sock.makefile().readline()
unfortunately, as the documentation says, when using makefile() the socket must be in blocking mode (it can not have a timeout); how can I implement a readline()-like function that does the same without using makefile() file object interface and not reading more than needed (as it'd discard data I will need after)?
a pretty inefficient example:
request_line = ""
while not request_line.endswith('\n'):
request_line += sock.recv(1)
print request_line
Four and a half years later, I would suggest asyncio's Streams for this, but here's how you might do it properly using BytesIO
Note that this implementation "shrinks" the in-memory BytesIO object each time a line is detected. If you didn't care about that, this could be a lot fewer lines.
import socket
import time
from io import BytesIO
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('localhost', 1234))
sock.setblocking(False)
def handle_line(line):
# or, print("Line Received:", line.decode().rstrip())
print(f"Line Received: {line.decode().rstrip()!r}")
with BytesIO() as buffer:
while True:
try:
resp = sock.recv(100) # Read in some number of bytes -- balance this
except BlockingIOError:
print("sleeping") # Do whatever you want here, this just
time.sleep(2) # illustrates that it's nonblocking
else:
buffer.write(resp) # Write to the BytesIO object
buffer.seek(0) # Set the file pointer to the SoF
start_index = 0 # Count the number of characters processed
for line in buffer:
start_index += len(line)
handle_line(line) # Do something with your line
""" If we received any newline-terminated lines, this will be nonzero.
In that case, we read the remaining bytes into memory, truncate
the BytesIO object, reset the file pointer and re-write the
remaining bytes back into it. This will advance the file pointer
appropriately. If start_index is zero, the buffer doesn't contain
any newline-terminated lines, so we set the file pointer to the
end of the file to not overwrite bytes.
"""
if start_index:
buffer.seek(start_index)
remaining = buffer.read()
buffer.truncate(0)
buffer.seek(0)
buffer.write(remaining)
else:
buffer.seek(0, 2)
(The original answer was so bad that it wasn't worth keeping (I promise), but should be available in the edit history).
SocketStreamReader
Here is a (buffered) line-reader that does not use asyncio. It can be used as a "synchronous" socket-based replacement for asyncio.StreamReader.
import socket
from asyncio import IncompleteReadError # only import the exception class
class SocketStreamReader:
def __init__(self, sock: socket.socket):
self._sock = sock
self._recv_buffer = bytearray()
def read(self, num_bytes: int = -1) -> bytes:
raise NotImplementedError
def readexactly(self, num_bytes: int) -> bytes:
buf = bytearray(num_bytes)
pos = 0
while pos < num_bytes:
n = self._recv_into(memoryview(buf)[pos:])
if n == 0:
raise IncompleteReadError(bytes(buf[:pos]), num_bytes)
pos += n
return bytes(buf)
def readline(self) -> bytes:
return self.readuntil(b"\n")
def readuntil(self, separator: bytes = b"\n") -> bytes:
if len(separator) != 1:
raise ValueError("Only separators of length 1 are supported.")
chunk = bytearray(4096)
start = 0
buf = bytearray(len(self._recv_buffer))
bytes_read = self._recv_into(memoryview(buf))
assert bytes_read == len(buf)
while True:
idx = buf.find(separator, start)
if idx != -1:
break
start = len(self._recv_buffer)
bytes_read = self._recv_into(memoryview(chunk))
buf += memoryview(chunk)[:bytes_read]
result = bytes(buf[: idx + 1])
self._recv_buffer = b"".join(
(memoryview(buf)[idx + 1 :], self._recv_buffer)
)
return result
def _recv_into(self, view: memoryview) -> int:
bytes_read = min(len(view), len(self._recv_buffer))
view[:bytes_read] = self._recv_buffer[:bytes_read]
self._recv_buffer = self._recv_buffer[bytes_read:]
if bytes_read == len(view):
return bytes_read
bytes_read += self._sock.recv_into(view[bytes_read:])
return bytes_read
Usage:
reader = SocketStreamReader(sock)
line = reader.readline()
Here is my solution written in Python 3. In the example I use io.BytesIO.read() instead of socket.recv() but the idea is the same
CHUNK_SIZE = 16 # you can set it larger or smaller
buffer = bytearray()
while True:
chunk = stream.read(CHUNK_SIZE)
buffer.extend(chunk)
if b'\n' in chunk or not chunk:
break
firstline = buffer[:buffer.find(b'\n')]
However, the rest of the message is partially in the buffer and partially waiting in the socket. You can either keep writing the content into the buffer and read from the buffer to have the entire request in one piece (it should be fine unless you parse a huge requests)
or you can wrap it with a generator and read it part by part
def reader(buffer, stream):
yield buffer[buffer.find(b'\n') + 1:]
while True:
chunk = stream.read(2048)
if not chunk: break
yield chunk
Related
Explanation
I'm currently trying to control a smart power strip using a python script. To accomplish this, I'm using a TCP connection with the socket module. Around 75% of the time, I get the response/data I was looking for and everything works perfectly. However, around 25% of the time, the response is cut off at the exact same length, 1024 bytes. This doesn't make any sense to me, as my buffer size is actually set to 2048 bytes. The speed at which I wait in between using recv() doesn't seem to effect/cause this either. Altough TCP is a stream of bytes, is it still possible that this could have to do with packet fragmentation?
Code
Main Code
ip='192.168.0.62'
port=9999
sock_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock_tcp.connect((ip, port))
sock_tcp.send(encrypt('{"system":{"get_sysinfo":{}}}'))
data = sock_tcp.recv(2048)
sock_tcp.close()
print len(data) #On succesful runs output is 1221, on unsuccesful runs it is 1024
rec = decrypt(data[4:])
print str(rec) #See output below
Encrypt Function
def encrypt(string):
key = 171
result = pack('>I', len(string))
for i in string:
a = key ^ ord(i)
key = a
result += chr(a)
return result
Decrypt Function
def decrypt(string):
key = 171
result = ""
for i in string:
a = key ^ ord(i)
key = ord(i)
result += chr(a)
return result
Output
The string itself that I recieve. It's most likeley not relevant, but I thought I would include it anyway. This is value of the variable rec.
Desired and regular output
Full desired output
{"system":{"get_sysinfo":{"sw_ver":"1.0.6 Build 180627
Rel.081000","hw_ver":"1.0","model":"HS300(US)","deviceId":"80067B24A755F99C4D6C1807455E09F91AB7B2AA","oemId":"5C9E6254BEBAED63B2B6102966D24C17","hwId":"34C41AA028022D0CCEA5E678E8547C54","rssi":-60,"longitude_i":-1222955,"latitude_i":379078,"alias":"TP-LINK_Power
Strip_4F01","mic_type":"IOT.SMARTPLUGSWITCH","feature":"TIM:ENE","mac":"B0:BE:76:12:4F:01","updating":0,"led_off":0,"children":[{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA00","state":0,"alias":"CezHeat","on_time":0,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA01","state":1,"alias":"CezUVB","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA02","state":1,"alias":"CyanHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA03","state":1,"alias":"ZanderHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA04","state":1,"alias":"CairoHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA05","state":1,"alias":"KodaMister","on_time":191208,"next_action":{"type":-1}}],"child_num":6,"err_code":0}}}
Abnormal and rarer output
Cut off output
{"system":{"get_sysinfo":{"sw_ver":"1.0.6 Build 180627
Rel.081000","hw_ver":"1.0","model":"HS300(US)","deviceId":"80067B24A755F99C4D6C1807455E09F91AB7B2AA","oemId":"5C9E6254BEBAED63B2B6102966D24C17","hwId":"34C41AA028022D0CCEA5E678E8547C54","rssi":-59,"longitude_i":-1222955,"latitude_i":379078,"alias":"TP-LINK_Power
Strip_4F01","mic_type":"IOT.SMARTPLUGSWITCH","feature":"TIM:ENE","mac":"B0:BE:76:12:4F:01","updating":0,"led_off":0,"children":[{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA00","state":0,"alias":"CezHeat","on_time":0,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA01","state":1,"alias":"CezUVB","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA02","state":1,"alias":"CyanHeat","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA03","state":1,"alias":"ZanderHeat","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA04","state":1,"alias":"CairoHeat","on
Conclusion
If anyone could provide me with a solution or explanation as to why the output/stream gets cut off, it would be much appreciated. I used a lot of the code from this open source module. I'm also looking to understand more of how this all works, so if you could explain a bit more I would really appreciate it.
As per the documentation, the bufsize argument only specifies the maximum amount of data to be read:
socket.recv(bufsize[, flags])
Receive data from the socket. The return
value is a bytes object representing the data received. The maximum
amount of data to be received at once is specified by bufsize. See the
Unix manual page recv(2) for the meaning of the optional argument
flags; it defaults to zero.
To ensure full data transfer a function like this can be used, which waits for the end of the socket connection (indicated by and empty string returned from recv):
def recv_all(connection):
"""
Function for all data
:param connection: socket connection
:return: received data
"""
data = list()
while True:
data.append(connection.recv(2048))
if not data[-1]:
return b''.join(data)
Another example that might fit your application better could be to wait for a fixed message size (1221 as indicated by your question):
def recv_message(connection):
data = list()
transferred_bytes= 0
while transferred_bytes < 1221:
data.append(connection.recv(min(1221-transferred_bytes, 2048)))
if not data[-1]:
raise RuntimeError("socket connection broken")
transferred_bytes += len(data[-1])
return b''.join(data)
This is only a complement to SimonF's answer. The cause of the problem is indeed that TCP is a stream protocol, so packets can be fragmented or re-assembled at any state: sender TCP/IP stack, network equipments, receiver TCP/IP stack - I include the user layer library in the TCP/IP stack here for simplification.
That is the reason why, you should always use a higher level protocol above TCP to be able to split the stream in sensible messages. Here you could note that the end of a message is '}}}', so you could concatenate the input in a buffer until you find that pattern:
def recv_until(c, guard):
"""Receive data from a socket until guard if found on input"""
guard_sz = len(guard) - 1
data = b''
sz = 0
while True:
buffer = c.recv(1024) # read by chuncks of size 1024 (change value to your needs)
got = len(buffer)
data += buffer # concatenate in buffer
ix = data.find(guard, sz - guard_sz if sz > guard_sz else 0) # is guard found?
if ix != -1:
return (data[:ix + guard_sz + 1], # return the message, and what could be behind it
data[ix + guard_sz + 1:])
sz += got
The trick is to considere guard_sz byte from the last chunk, in the case where the guard could be split in two chunks.
Marco, please use recv_into(buffer[, nbytes[, flags]]) method for the socket.
My example for TCP-microserver:
import socket
import struct
def readReliably(s,n):
buf = bytearray(n)
view = memoryview(buf)
sz = 0
while sz < n:
k = s.recv_into(view[sz:],n-sz)
sz += k
# print 'readReliably()',sz
return sz,buf
def writeReliably(s,buf,n):
sz = 0
while sz < n:
k = s.send(buf[sz:],n-sz)
sz += k
# obj = s.makefile(mode='w')
# obj.flush()
# print 'writeReliably()',sz
return sz
# Client
host = "127.0.0.1"
port = 23456
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(10)
s.connect((host,port))
# Request
buf = struct.pack("4B",*[0x01,0x02,0x03,0x04])
io.writeReliably(s,buf,4)
# Response
sz,buf = io.readReliably(s,4)
a = struct.unpack("4B",buf)
print repr(a)
# Server
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
#s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
#s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
#s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
s.bind((host,port))
s.listen(10) # unaccepted connections
while True:
sk,skfrom = s.accept()
sz,buf = io.readReliably(sk,4)
a = struct.unpack("4B",buf)
print repr(a)
# ...
io.writeReliably(sk,struct.pack("4B",*[0x01,0x02,0x03,0x04]))
Client side:
def send_file_to_hashed(data, tcpsock):
time.sleep(1)
f = data
flag = 0
i=0
tcpsock.send(hashlib.sha256(f.read()).hexdigest())
f.seek(0)
time.sleep(1)
l = f.read(BUFFER_SIZE-64)
while True:
while (l):
tcpsock.send(hashlib.sha256(l).hexdigest() + l)
time.sleep(1)
hashok = tcpsock.recv(6)
if hashok == "HASHOK":
l = f.read(BUFFER_SIZE-64)
flag = 1
if hashok == "BROKEN":
flag = 0
if not l:
time.sleep(1)
tcpsock.send("DONE")
break
return (tcpsock,flag)
def upload(filename):
flag = 0
while(flag == 0):
with open(os.getcwd()+'\\data\\'+ filename +'.csv', 'rU') as UL:
tuplol = send_file_to_hashed(UL ,send_to_sock(filename +".csv",send_to("upload",TCP_IP,TCP_PORT)))
(sock,flagn) = tuplol
flag = flagn
time.sleep(2)
sock.close()
Server Side:
elif(message == "upload"):
message = rec_OK(self.sock)
fis = os.getcwd()+'/data/'+ time.strftime("%H:%M_%d_%m_%Y") + "_" + message
f = open(fis , 'w')
latest = open(os.getcwd()+'/data/' + message , 'w')
time.sleep(1)
filehash = rec_OK(self.sock)
print("filehash:" + filehash)
while True:
time.sleep(1)
rawdata = self.sock.recv(BUFFER_SIZE)
log.write("rawdata :" + rawdata + "\n")
data = rawdata[64:]
dhash = rawdata[:64]
log.write("chash: " + dhash + "\n")
log.write("shash: " + hashlib.sha256(data).hexdigest() + "\n")
if dhash == hashlib.sha256(data).hexdigest():
f.write(data)
latest.write(data)
self.sock.send("HASHOK")
log.write("HASHOK\n" )
print"HASHOK"
else:
self.sock.send("HASHNO")
print "HASHNO"
log.write("HASHNO\n")
if rawdata == "DONE":
f.close()
f = open(fis , 'r')
if (hashlib.sha256(f.read()).hexdigest() == filehash):
print "ULDONE"
log.write("ULDONE")
f.close()
latest.close()
break
else:
self.sock.send("BROKEN")
print hashlib.sha256(f.read()).hexdigest()
log.write("BROKEN")
print filehash
print "BROKEN UL"
f.close()
So the data upload is working fine in all tests that i ran from my computer, even worked fine while uploading data over my mobile connection and still sometimes people say it takes a long time and they kill it after a few minutes. the data is there on their computers but not on the server. I don't know what is happening please help!
First of all: this is unrelated to sha.
Streaming over the network is unpredictable. This line
rawdata = self.sock.recv(BUFFER_SIZE)
doesn't guarantee that you read BUFFER_SIZE bytes. You may have read only 1 byte in the worst case scenario. Therefore your server side is completely broken because of the assumption that rawdata contains whole message. It is even worse. If the client sends command and hash fast you may get e.g. rawdata == 'DONEa2daf78c44(...) which is a mixed output.
The "hanging" part just follows from that. Trace your code and see what happens when the server receives partial/broken messages ( I already did that in my imagination :P ).
Streaming over the network is almost never as easy as calling sock.send on one side and sock.recv on the other side. You need some buffering/framing protocol. For example you can implement this simple protocol: always interpret first two bytes as the size of incoming message, like this:
client (pseudocode)
# convert len of msg into two-byte array
# I am assuming the max size of msg is 65536
buf = bytearray([len(msg) & 255, len(msg) >> 8])
sock.sendall(buf)
sock.sendall(msg)
server (pseudocode)
size = to_int(sock.recv(1))
size += to_int(sock.recv(1)) << 8
# You need two calls to recv since recv(2) can return 1 byte.
# (well, you can try recv(2) with `if` here to avoid additional
# syscall, not sure if worth it)
buffer = bytearray()
while size > 0:
tmp = sock.recv(size)
buffer += tmp
size -= len(tmp)
Now you have properly read data in buffer variable which you can work with.
WARNING: the pseudocode for the server is simplified. For example you need to check for empty recv() result everywhere (including where size is calculated). This is the case when the client disconnects.
So unfortunately there's a lot of work in front of you. You have to rewrite whole sending and receving code.
What is the best approach to process a socket connection where I need var data to end with a line break \n?
I'm using the code below but sometimes the tcp packets get chunked and it takes a long time to match data.endswith("\n").
I've also tried other approaches, like saving the last line if it doesn't end with \n and append it to dataon the next loop. but this also doesn't work because multiple packets get chunked and the 1st and 2nd part don't match.
I've no control over the other end, it basically sends multiple lines that end in \r\n.
Any suggestion will be welcome, as I don't have much knowledge on socket connections.
def receive_bar_updates(s):
global all_bars
data = ''
buffer_size = 4096
while True:
data += s.recv(buffer_size)
if not data.endswith("\n"):
continue
lines = data.split("\n")
lines = filter(None, lines)
for line in lines:
if line.startswith("BH") or line.startswith("BC"):
symbol = str(line.split(",")[1])
all_bars[symbol].append(line)
y = Thread(target=proccess_bars, kwargs={'symbol': symbol})
y.start()
data = ""
Example of "normal" data:
line1\r\n
line2\r\n
line3\r\n
Example of chunked data:
line1\r\n
line2\r\n
lin
If you have a raw input that you want to process as line, the io module is your friend because it will do the low level assembling of packets in lines.
You could use:
class SocketIO(io.RawIOBase):
def __init__(self, sock):
self.sock = sock
def read(self, sz=-1):
if (sz == -1): sz=0x7FFFFFFF
return self.sock.recv(sz)
def seekable(self):
return False
It is more robust than endswith('\n') because if one packet contains an embedded newline ('ab\ncd'), the io module will correctly process it. Your code could become:
def receive_bar_updates(s):
global all_bars
data = ''
buffer_size = 4096
fd = SocketIO(s) # fd can be used as an input file object
for line in fd:
if should_be_rejected_by_filter(line): continue # do not know what filter does...
if line.startswith("BH") or line.startswith("BC"):
symbol = str(line.split(",")[1])
all_bars[symbol].append(line)
y = Thread(target=proccess_bars, kwargs={'symbol': symbol})
y.start()
Use socket.socket.makefile() to wrap the socket in a class that implenents Text I/O. It handles buffering, converting between bytes and strings, and lets you iterate over lines. Remember to flush any writes.
Example:
#!/usr/bin/env python3
import socket, threading, time
def client(addr):
with socket.create_connection(addr) as conn:
conn.sendall(b'aaa')
time.sleep(1)
conn.sendall(b'bbb\n')
time.sleep(1)
conn.sendall(b'cccddd\n')
time.sleep(1)
conn.sendall(b'eeefff')
time.sleep(1)
conn.sendall(b'\n')
conn.shutdown(socket.SHUT_WR)
response = conn.recv(1024)
print('client got %r' % (response,))
def main():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) as listen_socket:
listen_socket.bind(('localhost', 0))
listen_socket.listen(1)
addr = listen_socket.getsockname()
threading.Thread(target=client, args=(addr,)).start()
conn, _addr = listen_socket.accept()
conn_file = conn.makefile(mode='rw', encoding='utf-8')
for request in conn_file:
print('server got %r' % (request,))
conn_file.write('response1\n')
conn_file.flush()
if __name__ == '__main__':
main()
$ ./example.py
server got 'aaabbb\n'
server got 'cccddd\n'
server got 'eeefff\n'
client got b'response1\n'
$
Are you accepting different connections? Or is it one stream of data, split up by \r\n's?
When accepting multiple connections you'd wait for a connection with s.accept() and then process all its data. When you have all of the packet, process its data, and wait for the next connection.
What you do then depends on what the structure of each packet would be.
(Example: https://wiki.python.org/moin/TcpCommunication)
If instead you are consuming a stream of data, you should probably process each 'line' you find in a separate thread, while you keep consuming on another.
Edit:
So, if I have your situation correct; one connection, the data being a string broken up by \r\n, ending with a \n. The data however does not correspond to what you are expecting, instead looping infinitely waiting for a \n.
The socket interface, as I understand it, ends with an empty data result. So the last buffer might have ended with a \n, but then just continued getting None objects, trying to find another \n.
Instead, try adding this:
if not data:
break
Full code:
def receive_bar_updates(s):
global all_bars
data = ''
buffer_size = 4096
while True:
data += s.recv(buffer_size)
if not data:
break
if not data.endswith("\n"):
continue
lines = data.split("\n")
lines = filter(None, lines)
for line in lines:
if line.startswith("BH") or line.startswith("BC"):
symbol = str(line.split(",")[1])
all_bars[symbol].append(line)
y = Thread(target=proccess_bars, kwargs={'symbol': symbol})
y.start()
data = ""
Edit2: Oops, wrong code
I have not tested this code, but it should work:
def receive_bar_updates(s):
global all_bars
data = ''
buf = ''
buffer_size = 4096
while True:
if not "\r\n" in data: # skip recv if we already have another line buffered.
data += s.recv(buffer_size)
if not "\r\n" in data:
continue
i = data.rfind("\r\n")
data, buf = data[:i+2], data[i+2:]
lines = data.split("\r\n")
lines = filter(None, lines)
for line in lines:
if line.startswith("BH") or line.startswith("BC"):
symbol = str(line.split(",")[1])
all_bars[symbol].append(line)
y = Thread(target=proccess_bars, kwargs={'symbol': symbol})
y.start()
data = buf
Edit: Forgot to mention, i only modified the code for receiving the data, i have no idea what the rest of the function (starting with lines = data.split("\n")) is for.
Edit 2: Now uses "\r\n" for linebreaks instead of "\n".
Edit 3: Fixed an issue.
You basically seem to want to read lines from the socket. Maybe you're better off not using low level recv calls but just use sock.makefile() and treat the result as a regular file where you can read lines from: from line in sfile: ...
That leaves the delay/chunk issue. This is likely to be caused by Nagle's algorithm on the sending side. Try disabling that:
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
I want to send chunks of bytes from server to client and then save those chunks in a list.
I do this thing with 20 threads for sending, and 20 threads for recieving.
In each chunk i insert at the start two bytes represents the number of the chunk ID that should be indicator for the client to know in ehach index of the list, the chunk shoukd be placed.
Client:
global chunks
chunks = ['' for _ in xrange(WORKERS)] # That list will hold the chunks before writing it to file.
threads_list = []
for _ in xrange(0, WORKERS):
worker = threading.Thread(target=job, kwargs={"server_tunnel": server_tunnel})
worker.start()
threads_list.append(worker)
for is_end in threads_list:
is_end.join() # Stop the all program till the all file has been sent
Client-job function (threading):
def job(server_tunnel):
download = ''
global chunks
while not download == "DIE THREAD!":
curr_bytes = server_tunnel.recv(SPEED) # 7B 8192 4096
chunks[int(curr_bytes[0:2])] += curr_bytes[2:]
And the server:
# Making a list of chunks and send the chunks by workers
chunk = curr_file_size / WORKERS
chunks = []
with open(path, 'rb') as curr_file:
for _ in xrange(0, WORKERS):
chunks.append(curr_file.read(chunk))
chunks[WORKERS - 1] += curr_file.read(curr_file_size % WORKERS)
threads_list = []
for i in xrange(0, WORKERS):
worker = threading.Thread(target=self.job, kwargs={"curr_file_size": curr_file_size,
"curr_file": curr_file,
"client_socket": client_socket,
"chunk": chunks[i],
"worker": i})
Server-job (threading):
def job(self, curr_file_size, curr_file, client_socket, chunk, worker):
sent = 0
length_chunk = len(chunk)
while sent < length_chunk:
try:
client_socket.sendall(str(worker).zfill(2) + chunk[sent:sent + SPEED]) # 7a ,8192 4096
sent += SPEED
except socket.error:
print 'One link died. E2'
self.clients.remove(self.chosen)
client_socket.close()
self.chosen = ''
client_socket.send("DIE THREAD!")
Now..
I get this error while trying to send a file from the server to the client:
chunks[int(curr_bytes[0:2])] += curr_bytes[2:] ValueError: invalid
literal for int() with base 10: '{\xa2'
chunks[int(curr_bytes[0:2])] += curr_bytes[2:] ValueError: invalid
literal for int() with base 10: 'DI' etc..
Any suggestions?
I'm working on a server, and all of the data is line based. I want to be able to raise an exception when a line exceeds a given length without reading any more data than I have to. For example, client X sends a line that's 16KB long even though the line-length limit is 1024 bytes. After reading more than 1024 bytes, I want to stop reading additional data, close the socket and raise an exception. I've looked through the docs and some of the source code, and I don't see a way to do this without rewriting the _readline method. Is there an easier way that I'm overlooking?
EDIT: Comments made me realize I need to add more information. I know I could write the logic to do this without much work, but I was hoping to use builtins to take advantage of efficient buffering with memoryview rather than implementing it myself again or going with the naive approach of reading chunks, joing and splitting as needed without a memoryview.
I don't really like accepting answers that don't really answer the question, so here's the approach I actually ended up taking, and I'll just mark it community wiki or unanswered later if no one has a better solution:
#!/usr/bin/env python3
class TheThing(object):
def __init__(self, connection, maxlinelen=8192):
self.connection = connection
self.lines = self._iterlines()
self.maxlinelen = maxlinelen
def _iterlines(self):
"""
Yield lines from class member socket object.
"""
buffered = b''
while True:
received = self.connection.recv(4096)
if not received:
if buffered:
raise Exception("Unexpected EOF.")
yield received
continue
elif buffered:
received = buffered + received
if b'\n' in received:
for line in received.splitlines(True):
if line.endswith(b'\n'):
if len(line) > self.maxlinelen:
raise LineTooLong("Line size: %i" % len(line))
yield line
else:
buffered = line
else:
buffered += received
if len(buffered) > self.maxlinelen:
raise LineTooLong("Too much data in internal buffer.")
def _readline(self):
"""
Return next available line from member socket object.
"""
return next(self.lines)
I haven't bothered comparing the code to be certain, but I'm doing fewer concatenations and splits, so I think mine may be more efficient.
I realize that your edit is clarifying that what you want is a builtin approach to achieving your goal. But I am not aware of anything existing that will help you in that fine grained control over the readline approach. But I thought I might just include an example that does do a coded approach with a generator and a split... Just for fun.
Reference this other question/answer for a nice generator that reads lines:
https://stackoverflow.com/a/822788/496445
Based on that reader:
server.py
import socket
MAXLINE = 100
def linesplit(sock, maxline=0):
buf = sock.recv(16)
done = False
while not done:
# mid line check
if maxline and len(buf) > maxline:
yield buf, True
if "\n" in buf:
(line, buf) = buf.split("\n", 1)
err = maxline and len(line) > maxline
yield line+"\n", err
else:
more = sock.recv(16)
if not more:
done = True
else:
buf = buf+more
if buf:
err = maxline and len(buf) > maxline
yield buf, err
HOST = ''
PORT = 50007
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((HOST, PORT))
s.listen(1)
conn, addr = s.accept()
print 'Connected by', addr
for line, err in linesplit(conn, MAXLINE):
if err:
print "Error: Line greater than allowed length %d (got %d)" \
% (MAXLINE, len(line))
break
else:
print "Received data:", line.strip()
conn.close()
client.py
import socket
import time
import random
HOST = ''
PORT = 50007
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((HOST, PORT))
while True:
val = 'x'*random.randint(1, 50)
if random.random() > .5:
val += "\n"
s.sendall(val)
time.sleep(.1)
s.close()
output
Connected by ('127.0.0.1', 57912)
Received data: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Received data: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
Received data: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
...
Received data: xxxxxxxxxxx
Received data: xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Error: Line greater than allowed length 100 (got 102)
The server reads over the data it receives and constantly checks the length of the line once it assembles one. If at any time the line exceeds the amount specified, it returns an error code. I threw this together kind of fast so I am sure the checks could be cleaned up a bit more, and the read buffer amount can be changed to address how quickly you want to detect the long lines before consuming too much data. In the output example above, I only got 2 more bytes than is allowed, and it stopped.
The client just sends random length data, with a 50/50 change of a newline.