I have to develop a UDP Client in Python. The purpose of the UDP client is to receive the packets via a port and process it (requires a map lookup) and then publish the processed data to a Kafka topic. The number of Packets received in a second is more than 2000.
I have tried a code which is as shown below. But there are packet losses.
import socket
from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers=config.KAFKA_BOOTSTRAP_SERVER,
value_serializer=lambda m: json.dumps(m).encode('ascii'),security_protocol='SSL')
client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
client_socket.settimeout(1.0)
addr = ("0.0.0.0", 5000)
client_socket.bind(addr)
while True:
data, server = client_socket.recvfrom(1024)
d_1 = some_logic()
producer.send("XYZ",d_1)
Please suggest me a approach with a small code snippet to perform this activity without or minimal packet loss
Thanks in advance.
Using this code :
sender.py
import socket
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
receiver.py
import json
import socket
import tqdm # pip install
receiver_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
receiver_socket.settimeout(5.0)
addr = ("127.0.0.1", 6410)
receiver_socket.bind(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
data_bytes, from_address = receiver_socket.recvfrom(1024)
data = json.loads(data_bytes)
progress_bar.update(1)
(using tqdm for easy speed monitoring)
I am around ~80 K it/s on my computer, which is roughly 80 times more than your case.
Try it yourself, see how much you get. Then add d_1 = some_logic() and measure again. Then add producer.send("XYZ",d_1) and measure again.
This will give you a pretty good picture of what is slowing you. Then ask another question on the specific problem. Better if you produce a Minimal Reproducible Example
Edit:
Indeed, the sender saturates the receiver, such that packets get dropped. It's because the receiver throughput is lower than the sender (because of the processing time), so here is an alternative :
steady_sender.py
import socket
import time
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
start_time = time.time()
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
current_time = time.time()
remaining_time = 0.001 - (current_time - start_time) # until next millisecond
time.sleep(remaining_time)
It tries to send one packet every millisecond. It stays around ~900 packets/s for me, because the code is too simple (falling asleep takes time too !).
This way, the receiver processes fast enough so that no packet gets dropped (because UDP).
But here is another version, where the sender is bursty : it sends 1000 packet then goes to sleep until the next second.
bursty_sender.py
import socket
import time
import tqdm # pip install
# example data from https://opensource.adobe.com/Spry/samples/data_region/JSONDataSetSample.html
data = '\
[{"id":"0001","type":"donut","name":"Cake","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil\'s Food"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"Powdered Sugar"},{"id":"5006","type":"Chocolate with Sprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0002","type":"donut","name":"Raised","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]},{"id":"0003","type":"donut","name":"Old Fashioned","ppu":0.55,"batters":{"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"}]},"topping":[{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}]}]\
'.encode("ascii")
assert len(data) == 1011, len(data) # close to the 1000 you average in your case
sender_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sender_socket.settimeout(1.0) # 1 second is laaarge
addr = ("127.0.0.1", 6410)
sender_socket.connect(addr)
progress_bar = tqdm.tqdm(unit_scale=True)
while True:
start_time = time.time()
bytes_sent = sender_socket.send(data)
assert bytes_sent == 1011, bytes_sent
progress_bar.update(1)
if progress_bar.n % 1000 == 0:
current_time = time.time()
remaining_time = 1.0 - (current_time - start_time) # until next second
time.sleep(remaining_time)
It sends on average ~990 packets per second (losing less time to getting in and out of sleep). But the receiver only handles ~280 per second, the rest got dropped because the burst filled the receiver's buffer.
If I'm sending bursts at 400/s I process ~160/s.
You can monitor the drop using your OS's tool for monitoring network packet drop, Python can't.
If you don't want to drop, another solution is to use a queue : have the first one simply read from the socket and adding it to the queue, and the other reads from the queue and process. But then you have to ensure that the queue does not grow too large.
I'm able to handle bursts of 50 with my current system config, nearly 100, but not 150.
Here is an example with the queue :
queued_receiver.py
import json
import queue
import socket
import threading
import tqdm # pip install
messages_queue = queue.Queue(maxsize=-1) # infinite
received_packets_bar = tqdm.tqdm(position=0, desc="received", unit_scale=True)
queue_size_bar = tqdm.tqdm(position=1, desc="queue size", unit_scale=True)
processed_packets_bar = tqdm.tqdm(position=2, desc="processed", unit_scale=True)
def read_from_the_socket_into_the_queue():
receiver_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
receiver_socket.settimeout(5.0)
addr = ("127.0.0.1", 6410)
receiver_socket.bind(addr)
while True:
data_bytes, from_address = receiver_socket.recvfrom(1024)
# no processing at all here ! we want to ensure the packet gets read, so that we are not dropping
messages_queue.put_nowait(data_bytes)
queue_size_bar.update(1)
received_packets_bar.update(1)
def read_from_the_queue_and_process():
while True:
data_bytes = messages_queue.get(block=True, timeout=None) # until a message is available
data = json.loads(data_bytes)
queue_size_bar.update(-1)
processed_packets_bar.update(1)
sum(range(10**5)) # slow computation, adjust
socket_thread = threading.Thread(target=read_from_the_socket_into_the_queue)
process_thread = threading.Thread(target=read_from_the_queue_and_process)
socket_thread.start()
process_thread.start()
Working on learning socket programming and I am having a strange issue crop up between my two codes depending on what IP I try to run them through.
Server:
import socket
import time
import datetime
import filecmp
HOST = 'localhost'
PORT = 9100
n = 1
x = 0
average_list = []
print('I am ready for any client side request \n')
file_comparison = "send.txt"
s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind((HOST,PORT))
s.listen(1)
conn, addr = s.accept()
while n <= 100:
data = conn.recv(1024)
file = 'receive1.txt';
print('I am starting receiving file', file,'for the',n,'th time')
a = datetime.datetime.now()
f = open(file, 'wb')
f.write(data)
print('I am finishing receiving file', file,'for the',n,'th time')
b = datetime.datetime.now()
rawtime = b - a
millidelta = rawtime * 1000
average_list.append(millidelta)
real_average = ((sum(average_list, datetime.timedelta(0,0))) / n)
print('The time used in milliseconds to receive',file,'for the',n,'th time','is:',millidelta,'milliseconds')
print('The average time to receive',file,'in milliseconds is:',real_average)
if filecmp.cmp(file,file_comparison,shallow=False):
x = x+1
n=n + 1
f.close()
conn.close()
s.close()
print('I am done \n')
print('Total errors: ',x,'out of',n-1 )
Client:
import socket
import datetime
import time
import filecmp
#initializing host, port, filename, total time and number of times to send the file
host = 'localhost'
port = 9100
fileName = "send.txt"
n = 1
average_list = []
file_to_send = open(fileName,'rb')
while n <= 100:
data = file_to_send.read(1024)
s=socket.socket()
s.connect((host,port))
s.sendall(data)
#reading the next 1024 bits
print('I am connecting to server side:',host,'\n')
print('I am sending file',fileName,'for the',n,'th time')
a = datetime.datetime.now()
print('I am finishing sending file',fileName,'for the',n,'th time')
b = datetime.datetime.now()
rawtime = b - a
millidelta = rawtime * 1000
average_list.append(millidelta)
real_average = ((sum(average_list, datetime.timedelta(0,0))) / n)
print('The time used in milliseconds to send',fileName,'for the',n,'th time','is:',millidelta,'milliseconds')
print('The average time to send',fileName,'in milliseconds is:',real_average)
n = n + 1
file_to_send.close()
s.close()
print('I am done')
In this current iteration my client side code simply runs through the loop trying to send the data of a .txt file to a server that isnt receiving anything. If i change 'localhost' to my actual IP address, I instead get the server side code cycling through its while loop while the client side gives up after 2 iterations with:
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
with the error citing line 15, "s.connect((host,port)) as the cause of the issue. Ultimately Im stuck since changing my host between what I assumed were two correct implementations of the host are giving me drastically different results with neither working as intended.
What I think the error is trying to tell us from other times I have seen that is that the port the socket is trying to connect is still connected to another socket.
So my diagnosis of why that might be happening is that the s.close() is not in the while loop so it keeps making a new socket and then tries to connect on the same port.
Edit: I got a chance to run it on my side and it works for me if I pull the whole making and binding of a socket out of the loop like this:
import socket
import datetime
import time
import filecmp
#initializing host, port, filename, total time and number of times to send the file
host = 'localhost'
port = 9100
fileName = "send.txt"
n = 1
average_list = []
file_to_send = open(fileName,'rb')
s=socket.socket()
s.connect((host,port))
while n <= 100:
data = file_to_send.read(1024)
s.sendall(data)
#reading the next 1024 bits
print('I am connecting to server side:',host,'\n')
print('I am sending file',fileName,'for the',n,'th time')
a = datetime.datetime.now()
print('I am finishing sending file',fileName,'for the',n,'th time')
b = datetime.datetime.now()
rawtime = b - a
millidelta = rawtime * 1000
average_list.append(millidelta)
real_average = ((sum(average_list, datetime.timedelta(0,0))) / n)
print('The time used in milliseconds to send',fileName,'for the',n,'th time','is:',millidelta,'milliseconds')
print('The average time to send',fileName,'in milliseconds is:',real_average)
n = n + 1
s.close()
file_to_send.close()
This definitely works for me and sends the file 100 times and it gets received 100 times but I don't know if in your use case you need it to be a hundred new sockets instead of one socket sending 100 files that get successfully received.
Running some production code I faced the following problem:
When sending HTTP requests to some server, server immediately closes the connection after sending response, which, for some reason, results in data loss.
Analyzing TCP dumps i can see that conversation goes as this:
client request
server ack
server push
server fin, ack (sent after ~0.000020 secs after previous push)
As the result my code can't get data sent by the server, (i'm guessing because of the small delay after push POLLHUP event might go before POLLIN?)
Trying to mimic the problem I've written the following code:
(It mimics the client behaviour on my side)
client:
import time
import socket
from errno import EAGAIN
from select import poll, POLLIN, POLLPRI, POLLERR, POLLHUP, POLLNVAL
def main(buf=""):
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False)
client.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
client.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
polling_object = poll()
polling_object.register(client, POLLPRI | POLLIN | POLLERR | POLLHUP)
in_buf = ""
sock_closed = False
try:
client.connect(("127.0.0.1", 8877))
except socket.error, e:
pass
while True and not sock_closed:
events = polling_object.poll(0)
for _, e in events:
if e & (POLLIN | POLLPRI):
while True:
try:
data = client.recv(1024)
if data:
in_buf += data
elif data == "":
client.close()
sock_closed = True
break
except socket.error, e:
if e.args[0] == EAGAIN:
break
else:
raise
elif e & (POLLERR|POLLHUP|POLLNVAL):
client.close()
sock_closed = True
if buf and not sock_closed:
try:
b_sent = client.send(buf)
if b_sent == len(buf):
buf = ""
else:
buf = buf[b_sent:]
except socket.error, e:
if e.args[0] != EAGAIN:
client.close()
sock_closed = True
time.sleep(0.5)
if sock_closed:
return in_buf
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
buf = sys.argv[1]
else:
buf = 'hello'
print main(buf)
server
import datetime
import time
import socket
def main():
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(("127.0.0.1", 8877))
server.listen(0)
client, _ = server.accept()
t1 = time.time()
data = ""
while not data:
data += client.recv(1024)
print "recv data %s" % data
client.sendall('{"ok": 1}')
t2 = time.time()
client.close()
t3 = time.time()
server.close()
return t1, t2, t3
if __name__ == '__main__':
c_r, d_s, c_c = main()
print "Connection received at ", datetime.datetime.fromtimestamp(c_r)
print "All Data sent after %.12f secs" % (d_s - c_r)
print "Connection closed after %.12f secs" % (c_c - d_s)
Running this code won't help me reproduce the problem because my client still can get data from socket buffer, which is kind of obviously by just following the code. The only difference is that in tcp dump it goes like this:
client request
server ack
server push
client ack
server fin, ack
I'm wondering is there a way to send fin, ack right after push without "letting" client to sent ack? Can it be done using python?
I want to send strings from a text file to my local port but i have to open connection and close it for each string. Therefore, data flow is very slow.(Almost in two seconds 1 string). How can i make it faster?
while 1:
conn, addr = s.accept()
line_number = random.randint(1,2261074)
liste.append(line_number)
line = linecache.getline(filename,line_number)
sendit = line.split(" ")[1]
print type(sendit)
print "sending: " + sendit
conn.send(sendit)
conn.close()
print('End Of Stream.')
The answer suggests sending 10 messages to Spark on each connection, separating each message by 1 second, then closing the connection.
It might be better to keep the connection open and use a non-blocking socket at the server end.
The server code below keeps the connection open, sending messages in batches on a non-blocking socket, with an idle delay between each batch.
This can be used to test how fast Spark can receive messages. I've set it to send in batches of 50 messages, then wait 1 second before sending the next 50.
Spark receives all messages OK on my machine, even if I set the idle delay to zero.
You can experiment and adjust as needed for your application.
Server code:
import socket
import time
import select
def do_send(sock, msg, timeout):
readers = []
writers = [sock]
excepts = [sock]
rxs, txs, exs = select.select(readers, writers, excepts, timeout)
if sock in exs:
return False
elif sock in txs:
sock.send(msg)
return True
else:
return False
host = 'localhost'
port = 9999
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind((host, port))
s.listen(1)
print "waiting for client"
conn, addr = s.accept()
print "client connected"
conn.setblocking(0)
batchSize = 50
idle = 1.
count = 0
running = 1
while running:
try:
sc = 0
while (sc < batchSize):
if do_send (conn, 'Hello World ' + repr(count) + '\n', 1.0):
sc += 1
count += 1
print "sent " + repr(batchSize) + ", waiting " + repr(idle) + " seconds"
time.sleep(idle)
except socket.error:
conn.close()
running = 0
print "done"
Simple Spark code:
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
sc = SparkContext("local[2]","test")
ssc = StreamingContext(sc, 1)
ststr = ssc.socketTextStream("localhost", 9999)
lines = ststr.flatMap(lambda line: line.split('\n'))
lines.pprint()
ssc.start()
ssc.awaitTermination()
Hope this may be useful.
I've been struggling to get my traceroute up and running, and I was hoping for a bit of help. I'm running this with Python 2.7 on a Linux VM. Below is my source code (please ignore the spacing on the first line; I had a hard time figuring out SO's code formatting tool, but the indentations are correct in my local copy.)
def main(dest_name):
dest_addr = socket.gethostbyname(dest_name)
# Define UDP and ICMP
udp = socket.getprotobyname('udp')
icmp = socket.getprotobyname('icmp')
timer = 1
port = 54321
maxHops = 40
totalRTT = 0
while True:
# Create sender and receiver. Sender uses UDP, receiver uses IDMP
sender = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, udp)
# Assign TTL to sender, increment TTL
sender.setsockopt(socket.SOL_IP, socket.IP_TTL, timer)
receiver = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
receiver.settimeout(15.0)
# Bind socket and send message from sender to receiver
receiver.bind(("", port))
sender.sendto("", (dest_name, port))
# Ensures that not receiving won't stall the program
# receiver.setblocking(0)
addr = None
name = None
count = 0
try:
# Keep track of RTT
startTime = time.time()
# Reads an array of 512-byte sized blocks from sender into addr
(_,addr) = receiver.recvfrom(512)
addr = addr[0]
# Try to get site name
try:
name = socket.gethostbyaddr(addr)[0]
except socket.error:
name = addr
# Process socket errors
except socket.error as exc:
pass
# Close both sockets
finally:
sender.close()
receiver.close()
endTime = time.time()
# Record RTT, total RTT, convert to ms
RTT = (endTime - startTime) * 1000
totalRTT += RTT
if addr is not None:
host = "%s (%s)" % (name, addr)
else:
host = "*"
print("%d\t%s" % (timer, host))
print(" %f" % RTT + " ms")
timer += 1
if addr == dest_addr or timer > maxHops:
print("Total RTT: %f\n" % totalRTT)
print("Hop count: %d\n" % timer)
break
if __name__ == "__main__":
main('www.google.com')
My output looks something like this:
1 129.22.144.2 (129.22.144.2)
3.091097 ms
2 10.2.0.98 (10.2.0.98)
4.683971 ms
3 10.2.3.169 (10.2.3.169)
6.258011 ms
4 *
15015.315056 ms
5 *
15015.240908 ms
It continues to time out until my max hop count is reached. Does anyone have suggestions?
Thanks!
Did some research. The issue ended up being the port number I was using. When writing a traceroute, use port 33434.