Capture UDP packets in Python without loss - python

I'm trying to capture a fast stream of UDP packets without missing any.
Packet size is known and fixed at 1472 bytes.
I'm generating UDP transmissions with another Python application and the first two bytes have a incremental counter so I can check if any have been lost on capture.
Approach 1: read packet -> write to file
Packets are being written to file one at a time as they come in.
Result: 100 to 200 packets lost out of 10,000 received
import socket
import capture_verify
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
address = ("192.168.252.147", 23850)
sock.bind(address)
previous_first_val = 0
out_file = open("out_bin.bin", "wb")
packet_count = 1E5
while(packet_count):
data, addr = sock.recvfrom(2048)
out_file.write(data)
packet_count -= 1
capture_verify.verify_file()
Approach 2: read packets to memory buffer, write to file once 10K captured
Packets are being stored to a pre-allocated buffer. Then written to file once 10K packets received.
Result: 7 to 15 packets lost out of 10,000 received
import socket
import constants as cs
import capture_verify
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
address = ("192.168.252.147", 23850)
sock.bind(address)
out_file = open("out_bin.bin", "wb")
packet_count = 1E5
bytes_to_read = int(cs.packet_size * packet_count)
in_buf = bytearray(bytes_to_read)
view = memoryview(in_buf)
while packet_count:
# nbytes = sock.recv_into(view, bytes_to_read)
sock.recvfrom_into(view, cs.packet_size)
packet_count -= 1
nbytes = cs.packet_size
view = view[nbytes:]
out_file.write(in_buf)
capture_verify.verify_file()
Is there anything I can do to speed up the code and ensure no packet loss?
*UDP protocol is a requirement, can't throttle down transmission speed. Currently using Python 3.7 on Windows 7

Related

How to develop a robust UDP Client in python?

I have to develop a UDP Client in Python. The purpose of the UDP client is to receive the packets via a port and process it (requires a map lookup) and then publish the processed data to a Kafka topic. The number of Packets received in a second is more than 2000.
I have tried a code which is as shown below. But there are packet losses.
import socket
from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers=config.KAFKA_BOOTSTRAP_SERVER,
value_serializer=lambda m: json.dumps(m).encode('ascii'),security_protocol='SSL')
client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
client_socket.settimeout(1.0)
addr = ("0.0.0.0", 5000)
client_socket.bind(addr)
while True:
data, server = client_socket.recvfrom(1024)
d_1 = some_logic()
producer.send("XYZ",d_1)
Please suggest me a approach with a small code snippet to perform this activity without or minimal packet loss
Thanks in advance.
Using this code :
sender.py
import socket
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
receiver.py
import json
import socket
import tqdm # pip install
receiver_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
receiver_socket.settimeout(5.0)
addr = ("127.0.0.1", 6410)
receiver_socket.bind(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
data_bytes, from_address = receiver_socket.recvfrom(1024)
data = json.loads(data_bytes)
progress_bar.update(1)
(using tqdm for easy speed monitoring)
I am around ~80 K it/s on my computer, which is roughly 80 times more than your case.
Try it yourself, see how much you get. Then add d_1 = some_logic() and measure again. Then add producer.send("XYZ",d_1) and measure again.
This will give you a pretty good picture of what is slowing you. Then ask another question on the specific problem. Better if you produce a Minimal Reproducible Example
Edit:
Indeed, the sender saturates the receiver, such that packets get dropped. It's because the receiver throughput is lower than the sender (because of the processing time), so here is an alternative :
steady_sender.py
import socket
import time
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
start_time = time.time()
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
current_time = time.time()
remaining_time = 0.001 - (current_time - start_time) # until next millisecond
time.sleep(remaining_time)
It tries to send one packet every millisecond. It stays around ~900 packets/s for me, because the code is too simple (falling asleep takes time too !).
This way, the receiver processes fast enough so that no packet gets dropped (because UDP).
But here is another version, where the sender is bursty : it sends 1000 packet then goes to sleep until the next second.
bursty_sender.py
import socket
import time
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
start_time = time.time()
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
if progress_bar.n % 1000 == 0:
current_time = time.time()
remaining_time = 1.0 - (current_time - start_time) # until next second
time.sleep(remaining_time)
It sends on average ~990 packets per second (losing less time to getting in and out of sleep). But the receiver only handles ~280 per second, the rest got dropped because the burst filled the receiver's buffer.
If I'm sending bursts at 400/s I process ~160/s.
You can monitor the drop using your OS's tool for monitoring network packet drop, Python can't.
If you don't want to drop, another solution is to use a queue : have the first one simply read from the socket and adding it to the queue, and the other reads from the queue and process. But then you have to ensure that the queue does not grow too large.
I'm able to handle bursts of 50 with my current system config, nearly 100, but not 150.
Here is an example with the queue :
queued_receiver.py
import json
import queue
import socket
import threading
import tqdm # pip install
messages_queue = queue.Queue(maxsize=-1) # infinite
received_packets_bar = tqdm.tqdm(position=0, desc="received", unit_scale=True)
queue_size_bar = tqdm.tqdm(position=1, desc="queue size", unit_scale=True)
processed_packets_bar = tqdm.tqdm(position=2, desc="processed", unit_scale=True)
def read_from_the_socket_into_the_queue():
receiver_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
receiver_socket.settimeout(5.0)
addr = ("127.0.0.1", 6410)
receiver_socket.bind(addr)
while True:
data_bytes, from_address = receiver_socket.recvfrom(1024)
# no processing at all here ! we want to ensure the packet gets read, so that we are not dropping
messages_queue.put_nowait(data_bytes)
queue_size_bar.update(1)
received_packets_bar.update(1)
def read_from_the_queue_and_process():
while True:
data_bytes = messages_queue.get(block=True, timeout=None) # until a message is available
data = json.loads(data_bytes)
queue_size_bar.update(-1)
processed_packets_bar.update(1)
sum(range(10**5)) # slow computation, adjust
socket_thread = threading.Thread(target=read_from_the_socket_into_the_queue)
process_thread = threading.Thread(target=read_from_the_queue_and_process)
socket_thread.start()
process_thread.start()

Ending TCP connection when no data is sent from equipment

For some background: We currently have a piece of equipment in house which we use to measure the height of an object. It will scan the object, compare it with a reference image and return a pattern match percentage, and if that percentage is above some specified threshold, it will take the height of the object. We use Non-Procedural Ethernet to connect to the sensor through a python socket, and the data is sent by the sensor. The code below showcases how I connect to the sensor:
import socket
import time
import pandas as pd
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
try:
s.connect(("192.168.1.20", 8500))
s.settimeout(30)
data = []
print('Recording data...')
while True:
msg = s.recv(496)
d = msg.decode('utf-8').split(',')
data.append(d)
finally:
s.close()
out = []
out.append({'height': float(data[i][0]),
'reference_pctg': float(data[i][1].split('\r')[0])
})
csv = pd.DataFrame(data = out)
csv.to_csv('./data/' + sheet + '.csv', index = False)
print(csv)
Currently, the socket lasts 30 seconds, and is timed out after that. Issue is, we cannot use the controller to close the connection when the data is done being sent. Is there any way to set the socket to close when the sensor doesn't send any data for a specified time?

Python socket module: Recv() data response cut off

Explanation
I'm currently trying to control a smart power strip using a python script. To accomplish this, I'm using a TCP connection with the socket module. Around 75% of the time, I get the response/data I was looking for and everything works perfectly. However, around 25% of the time, the response is cut off at the exact same length, 1024 bytes. This doesn't make any sense to me, as my buffer size is actually set to 2048 bytes. The speed at which I wait in between using recv() doesn't seem to effect/cause this either. Altough TCP is a stream of bytes, is it still possible that this could have to do with packet fragmentation?
Code
Main Code
ip='192.168.0.62'
port=9999
sock_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock_tcp.connect((ip, port))
sock_tcp.send(encrypt('{"system":{"get_sysinfo":{}}}'))
data = sock_tcp.recv(2048)
sock_tcp.close()
print len(data) #On succesful runs output is 1221, on unsuccesful runs it is 1024
rec = decrypt(data[4:])
print str(rec) #See output below
Encrypt Function
def encrypt(string):
key = 171
result = pack('>I', len(string))
for i in string:
a = key ^ ord(i)
key = a
result += chr(a)
return result
Decrypt Function
def decrypt(string):
key = 171
result = ""
for i in string:
a = key ^ ord(i)
key = ord(i)
result += chr(a)
return result
Output
The string itself that I recieve. It's most likeley not relevant, but I thought I would include it anyway. This is value of the variable rec.
Desired and regular output
Full desired output
{"system":{"get_sysinfo":{"sw_ver":"1.0.6 Build 180627
Rel.081000","hw_ver":"1.0","model":"HS300(US)","deviceId":"80067B24A755F99C4D6C1807455E09F91AB7B2AA","oemId":"5C9E6254BEBAED63B2B6102966D24C17","hwId":"34C41AA028022D0CCEA5E678E8547C54","rssi":-60,"longitude_i":-1222955,"latitude_i":379078,"alias":"TP-LINK_Power
Strip_4F01","mic_type":"IOT.SMARTPLUGSWITCH","feature":"TIM:ENE","mac":"B0:BE:76:12:4F:01","updating":0,"led_off":0,"children":[{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA00","state":0,"alias":"CezHeat","on_time":0,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA01","state":1,"alias":"CezUVB","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA02","state":1,"alias":"CyanHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA03","state":1,"alias":"ZanderHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA04","state":1,"alias":"CairoHeat","on_time":191208,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA05","state":1,"alias":"KodaMister","on_time":191208,"next_action":{"type":-1}}],"child_num":6,"err_code":0}}}
Abnormal and rarer output
Cut off output
{"system":{"get_sysinfo":{"sw_ver":"1.0.6 Build 180627
Rel.081000","hw_ver":"1.0","model":"HS300(US)","deviceId":"80067B24A755F99C4D6C1807455E09F91AB7B2AA","oemId":"5C9E6254BEBAED63B2B6102966D24C17","hwId":"34C41AA028022D0CCEA5E678E8547C54","rssi":-59,"longitude_i":-1222955,"latitude_i":379078,"alias":"TP-LINK_Power
Strip_4F01","mic_type":"IOT.SMARTPLUGSWITCH","feature":"TIM:ENE","mac":"B0:BE:76:12:4F:01","updating":0,"led_off":0,"children":[{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA00","state":0,"alias":"CezHeat","on_time":0,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA01","state":1,"alias":"CezUVB","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA02","state":1,"alias":"CyanHeat","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA03","state":1,"alias":"ZanderHeat","on_time":191207,"next_action":{"type":-1}},{"id":"80067B24A755F99C4D6C1807455E09F91AB7B2AA04","state":1,"alias":"CairoHeat","on
Conclusion
If anyone could provide me with a solution or explanation as to why the output/stream gets cut off, it would be much appreciated. I used a lot of the code from this open source module. I'm also looking to understand more of how this all works, so if you could explain a bit more I would really appreciate it.
As per the documentation, the bufsize argument only specifies the maximum amount of data to be read:
socket.recv(bufsize[, flags])
Receive data from the socket. The return
value is a bytes object representing the data received. The maximum
amount of data to be received at once is specified by bufsize. See the
Unix manual page recv(2) for the meaning of the optional argument
flags; it defaults to zero.
To ensure full data transfer a function like this can be used, which waits for the end of the socket connection (indicated by and empty string returned from recv):
def recv_all(connection):
"""
Function for all data
:param connection: socket connection
:return: received data
"""
data = list()
while True:
data.append(connection.recv(2048))
if not data[-1]:
return b''.join(data)
Another example that might fit your application better could be to wait for a fixed message size (1221 as indicated by your question):
def recv_message(connection):
data = list()
transferred_bytes= 0
while transferred_bytes < 1221:
data.append(connection.recv(min(1221-transferred_bytes, 2048)))
if not data[-1]:
raise RuntimeError("socket connection broken")
transferred_bytes += len(data[-1])
return b''.join(data)
This is only a complement to SimonF's answer. The cause of the problem is indeed that TCP is a stream protocol, so packets can be fragmented or re-assembled at any state: sender TCP/IP stack, network equipments, receiver TCP/IP stack - I include the user layer library in the TCP/IP stack here for simplification.
That is the reason why, you should always use a higher level protocol above TCP to be able to split the stream in sensible messages. Here you could note that the end of a message is '}}}', so you could concatenate the input in a buffer until you find that pattern:
def recv_until(c, guard):
"""Receive data from a socket until guard if found on input"""
guard_sz = len(guard) - 1
data = b''
sz = 0
while True:
buffer = c.recv(1024) # read by chuncks of size 1024 (change value to your needs)
got = len(buffer)
data += buffer # concatenate in buffer
ix = data.find(guard, sz - guard_sz if sz > guard_sz else 0) # is guard found?
if ix != -1:
return (data[:ix + guard_sz + 1], # return the message, and what could be behind it
data[ix + guard_sz + 1:])
sz += got
The trick is to considere guard_sz byte from the last chunk, in the case where the guard could be split in two chunks.
Marco, please use recv_into(buffer[, nbytes[, flags]]) method for the socket.
My example for TCP-microserver:
import socket
import struct
def readReliably(s,n):
buf = bytearray(n)
view = memoryview(buf)
sz = 0
while sz < n:
k = s.recv_into(view[sz:],n-sz)
sz += k
# print 'readReliably()',sz
return sz,buf
def writeReliably(s,buf,n):
sz = 0
while sz < n:
k = s.send(buf[sz:],n-sz)
sz += k
# obj = s.makefile(mode='w')
# obj.flush()
# print 'writeReliably()',sz
return sz
# Client
host = "127.0.0.1"
port = 23456
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(10)
s.connect((host,port))
# Request
buf = struct.pack("4B",*[0x01,0x02,0x03,0x04])
io.writeReliably(s,buf,4)
# Response
sz,buf = io.readReliably(s,4)
a = struct.unpack("4B",buf)
print repr(a)
# Server
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
#s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
#s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
#s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
s.bind((host,port))
s.listen(10) # unaccepted connections
while True:
sk,skfrom = s.accept()
sz,buf = io.readReliably(sk,4)
a = struct.unpack("4B",buf)
print repr(a)
# ...
io.writeReliably(sk,struct.pack("4B",*[0x01,0x02,0x03,0x04]))

Sending messages via TCP sockets

I am making a code that the server sends from 1 to 256 kbytes to client. The client receives the message and must return it to the server. The process must be repeated 1000 times. The message is read from a file. The server is sending and the client is picking it up and sending it back, but at a certain moment it stops. I'd like to know what's wrong with my code.
Server:
import socket
hostA = '127.0.0.1'
portA = 50031
udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
udp.bind((hostA, portA))
dest = ('127.0.0.1',50008)
arquivo = 'texto.txt'
arq = open(arquivo , 'r')
arq = arq.read()
for i in range(0, 9):
dado = arq[0:(1024 * (2**i))]
for j in range(0, 1000):
for k in range(0, (len(dado) / 1024)):
x = dado[k:k+1024]
udp.sendto(x, dest)
for k in range(0, (len(dado) / 1024)):
msg, cliente = udp.recvfrom(1024)
udp.close()
Client:
import socket
hostB = '127.0.0.1'
portB = 50008
udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
udp.bind((hostB, portB))
orig = ('127.0.0.1',50031)
dado = ""
for i in range(0, 9):
for j in range(0, 1000):
for l in range(0, ((1024 * (2**i))/1024)):
msg, cliente = udp.recvfrom(1024)
dado += msg
for k in range(0, ((1024 * (2**i))/1024)):
x = dado[k:k+1024]
udp.sendto(x, orig)
udp.close()
Your question asks about "TCP sockets", but you aren't using TCP.
udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
udp.bind((hostA, portA))
You are using UDP. Unlike TCP, UDP does not detect lost packets or retransmit or re-order data.
Your protocol has no tolerance for packet loss. If a single packet from either the server to the client or the client to the server is lost, each side will wait for the other forever.
You have other issues too. Imagine if two datagrams sent by the client are received in the wrong order by the server. The data will be corrupted.
If you want to design on top of UDP rather than TCP, you have to implement yourself everything TCP provides that you need. If you need lost datagram detection and retransmission, you need to implement it yourself or use TCP. Ditto for transmit pacing, handling out of order reception, and so on.

Python TCP Socket losing data in recv [acting weird]

I wrote a simplistic socket client for reading data in Python 3.4
The problem I'm having is that when the server sends a small amount of data (around 1000) bytes, it will read it perfectly but when a large chunk of data is being handled around (9500 bytes) it will only give me a small chunk of data (like 1100-ish chunks). I can't seem to figure out why its behaving so erratically when handling the huge amount of data. I know that my data is not larger than ssize_t maximum of 32767.
It works perfectly when handling small data and completely turns 180 and behaves differently when handling a huge amount of data. I know that this is not a problem in the TCP server because I tested it with a PHP TCP client and it worked perfectly when handling the huge amount of data.
Any help is greatly appreciated.
import socket
import json
# Written in Python 3.4.
class SocketClient:
def __init__(self, host, port, format, timeout = None):
# Constructor
self.host = host
self.port = port
self.format = format
self.timeout = timeout
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def send(self, firstname, lastname, parameters = [], format = 'json'):
if self.socket is not None:
self.socket.connect((self.host, self.port))
data = {}
data['firstname'] = firstname
data['lastname'] = lastname
data['parameters'] = parameters
data['format'] = format
self.socket.send(bytes(json.dumps(data), "utf-8"))
result = self.socket.recv(32767)
result = result.decode()
return result
def shutdown(self):
if socket is not None:
self.socket.shutdown(socket.SHUT_RDWR)
self.socket.close()
if __name__ == __main__:
client = SocketClient("127.0.0.1", 8080, 'json')
response = client.send('foo', 'bar', ['foobar'])
print(response)
client.shutdown()
TCP is a streaming protocol. Data is delivered in junks of bytes, where the length is determined by many factors. One is, that internal buffers are limited to some thousand bytes. You never can read 32767 bytes at once.
The only guarantee with recv is, that you get at least 1 byte and at most the number of bytes you say. Your code have to cope with this, that means, you have to do more than one recv call until you have the amount of bytes you need. That means on the other side, a protocol that don't have end-of-message indicators or a length encoded is badly broken. In your case: you have to parse the json byte stream until a valid json expression is sent. But what about 1245.6? Is it finished after receiving 1 or 12 or ...?
To repair your protocol, simply send some length information with your json data.
For sending you should use sendall instead of send.
You can use recv_into(buffer[, nbytes[, flags]]) method:
def readReliably(s,n):
buf = bytearray(n)
view = memoryview(buf)
sz = 0
while sz < n:
k = s.recv_into(view[sz:],n-sz)
sz += k
# print 'readReliably()',sz
return sz,buf
def writeReliably(s,buf,n):
sz = 0
while sz < n:
k = s.send(buf[sz:],n-sz)
sz += k
# obj = s.makefile(mode='w')
# obj.flush()
# print 'writeReliably()',sz
return sz
See the full example at: https://stackoverflow.com/a/55446223/966789
while True:
sk,skfrom = s.accept()
sz,buf = io.readReliably(sk,4)
a = struct.unpack("4B",buf)
print repr(a)
# ...
io.writeReliably(sk,struct.pack("4B",*[0x01,0x02,0x03,0x04]))
That is how recv works in more than one language... https://docs.python.org/3.4/library/socket.html#socket.socket.recv

Categories