Network traffic monitor with pcapy in python - python

I have written simple network traffic monitor to get transfer rate in B/s and/or total data transfer (in B). However when I test it by transferring a file with ftp (using Total Commander) I just can not make it measure total transfer as a size of the file. It always give much lower size than actual one.
I am not sure if I am doing something wrong..
The BPF filter I set is
dst <IP of ftp server pc>
Below is my source code :
import threading
import sys
import pcapy
import time
import logging as logger
class NetMonitor(threading.Thread):
_timeout = 1
#classmethod
def get_net_interfaces(cls):
return pcapy.findalldevs()
def __init__(self, device, bpf_filter):
threading.Thread.__init__(self)
self.active = True
self._net_monitor = pcapy.open_live(device, 65535, 0, 1000) #self.timeout * 1000)
self._net_monitor.setfilter(bpf_filter)
#self.dumper = self.net_monitor.dump_open("pkt_dump.txt")
self._current_bytes_rate = 0
self.total_transfer = 0 # total number of Bytes transfered
#<--- this is to calc average transfer B/s
self._tmp_bytes_per_sec_sum = 0 # sums up B/s values from each dispatch iteration (eventually used to calc average value)
self._inc = 0 # number of dispatch iterations (eventually used to calc average B/s value)
#--->
self._dispatch_bytes_sum = 0 # sums up packets size for one dispatch call
def __handle_packet(self, header, data):
# method is called for each packet by dispatch call (pcapy)
self._dispatch_bytes_sum += len(data) #header.getlen() #len(data)
#logger.debug("h: ({}, {}, {}), d:{}".format(header.getlen(), header.getcaplen(), header.getts(), len(data)))
#self.dumper.dump(header, data)
def update(self):
self._dispatch_bytes_sum = 0
# process packets
packets_nr = self._net_monitor.dispatch(-1, self.__handle_packet)
self.total_transfer += self._dispatch_bytes_sum
self._inc += 1
self._current_bytes_rate = self._dispatch_bytes_sum # add single dispatch B/s -> timeout is 1 s
self._tmp_bytes_per_sec_sum += self._current_bytes_rate
logger.debug('inc:{}, current rate: {} B/s, avg rate: {} B/s, total:{} B'.format(self._inc, self.current_rate, self.avg_rate, self.total_transfer))
return self._current_bytes_rate, packets_nr
def get_avg_bytes_rate(self):
if self._inc:
return self._tmp_bytes_per_sec_sum / self._inc
else:
return 0
def get_current_bytes_rate(self):
return self._current_bytes_rate
def run(self):
while(self.active):
self.update()
time.sleep(self._timeout)
# average B/s rate
avg_rate = property(get_avg_bytes_rate)
# current B/s rate
current_rate = property(get_current_bytes_rate)
if __name__ == '__main__':
filter = ' '.join(sys.argv[2:])
print filter
#nm0 = NetMonitor(pcapy.findalldevs()[0], filter)
nm1 = NetMonitor(pcapy.findalldevs()[1], filter)
nm1.start()
start_time = time.time()
while time.time() - start_time < int(sys.argv[1]):
print "current {} B/s, avg {} B/s, total transfer {} B".format(nm1.current_rate, nm1.avg_rate, nm1.total_transfer)
time.sleep(1)
nm1.active = False
nm1.join()
print "++++++ total: {}, avg: {}".format(nm1.total_transfer, nm1.avg_rate)
Any advice is much appreciated.
Cheers.

Use a filter to capture only the useful tcp streams, ftp-data :
port ftp-data
I suggest also to capture in promiscous mode, only the packet headers ( you don't need the full data to know the length ):
open_live( device, 4096, True, 100 )
In your handler, it is correct to use header.getlen().

Related

Read multiple DS18B20 temperature sensors faster using Raspberry Pi

My custom sensor dashboard requests new readings every second.
This worked well, until I hooked up 3 DS18B20 temperature sensors (1-wire protocol, so all on 1 pin), which each take 750ms to provide new data.
This is the class I currently use to read the temperature of each sensor:
# ds18b20.py
# written by Roger Woollett
import os
import glob
import time
class DS18B20:
# much of this code is lifted from Adafruit web site
# This class can be used to access one or more DS18B20 temperature sensors
# It uses OS supplied drivers and one wire support must be enabled
# To do this add the line
# dtoverlay=w1-gpio
# to the end of /boot/config.txt
#
# The DS18B20 has three pins, looking at the flat side with the pins pointing
# down pin 1 is on the left
# connect pin 1 to GPIO ground
# connect pin 2 to GPIO 4 *and* GPIO 3.3V via a 4k8 (4800 ohm) pullup resistor
# connect pin 3 to GPIO 3.3V
# You can connect more than one sensor to the same set of pins
# Only one pullup resistor is required
def __init__(self):
# Load required kernel modules
os.system('modprobe w1-gpio')
os.system('modprobe w1-therm')
# Find file names for the sensor(s)
base_dir = '/sys/bus/w1/devices/'
device_folder = glob.glob(base_dir + '28*')
self._num_devices = len(device_folder)
self._device_file = list()
i = 0
while i < self._num_devices:
self._device_file.append(device_folder[i] + '/w1_slave')
i += 1
def _read_temp(self, index):
# Issue one read to one sensor
# You should not call this directly
# First check if this index exists
if index >= len(self._device_file):
return False
f = open(self._device_file[index], 'r')
data = f.read()
f.close()
return data
def tempC(self, index=0):
# Call this to get the temperature in degrees C
# detected by a sensor
data = self._read_temp(index)
retries = 0
# Check for error
if data == False:
return None
while (not "YES" in data) and (retries > 0):
# Read failed so try again
time.sleep(0.1)
#print('Read Failed', retries)
data = self._read_temp(index)
retries -= 1
if (retries == 0) and (not "YES" in data):
return None
(discard, sep, reading) = data.partition(' t=')
if reading == 85000:
# 85ºC is the boot temperature of the sensor, so ignore that value
return None
temperature = float(reading) / 1000.0
return temperature
def device_count(self):
# Call this to see how many sensors have been detected
return self._num_devices
I already tried to return the previous temperature reading if the current one isn't finished yet, however this didn't reduce the time it took to read a sensor, so I guess the only way is to do things asynchronously.
I could reduce the precision to reduce the time it takes per reading, but ideally I would read all of the sensors simultaneously on separate threads.
How can I best implement this? Or are there other ways to improve the reading speed of multiple DS18B20 sensors?
Thanks for any insights!
You're facing some limitations introduced by the Linux kernel driver. If you were interacting with the OneWire protocol directly, you would only have a single 750ms read cycle for all three sensors, rather than (3 * 750ms). When speaking the 1-wire protocol directly, you can issue a single "convert temperature" command to all devices on the bus, as described here, and then read all the sensors.
The Linux driver explicitly doesn't support this mode of operation:
If none of the devices are parasite powered it would be possible to convert all the devices at the same time and then go back to read individual sensors. That isn’t currently supported. The driver also doesn’t support reduced precision (which would also reduce the conversion time) when reading values.
That means you're stuck with a 750ms per device read cycle. Your best option is probably placing the sensor reading code in a separate thread, e.g.:
import glob
import threading
import time
# Note that we're inheriting from threading.Thread here;
# see https://docs.python.org/3/library/threading.html
# for more information.
class DS18B20(threading.Thread):
default_base_dir = "/sys/bus/w1/devices/"
def __init__(self, base_dir=None):
super().__init__()
self._base_dir = base_dir if base_dir else self.default_base_dir
self.daemon = True
self.discover()
def discover(self):
device_folder = glob.glob(self._base_dir + "28*")
self._num_devices = len(device_folder)
self._device_file: list[str] = []
for i in range(self._num_devices):
self._device_file.append(device_folder[i] + "/w1_slave")
self._values: list[float | None] = [None] * self._num_devices
self._times: list[float] = [0.0] * self._num_devices
def run(self):
"""Thread entrypoint: read sensors in a loop.
Calling DS18B20.start() will cause this method to run in
a separate thread.
"""
while True:
for dev in range(self._num_devices):
self._read_temp(dev)
# Adjust this value as you see fit, noting that you will never
# read actual sensor values more often than 750ms * self._num_devices.
time.sleep(1)
def _read_temp(self, index):
for i in range(3):
with open(self._device_file[index], "r") as f:
data = f.read()
if "YES" not in data:
time.sleep(0.1)
continue
disacard, sep, reading = data.partition(" t=")
temp = float(reading) / 1000.0
self._values[index] = temp
self._times[index] = time.time()
break
else:
print(f"failed to read device {index}")
def tempC(self, index=0):
return self._values[index]
def device_count(self):
"""Return the number of discovered devices"""
return self._num_devices
Because this is a thread, you need to .start() it first, so your
code would look something like:
d = DS18B20()
d.start()
while True:
for i in range(d.device_count()):
print(f'dev {i}: {d.tempC(i)}')
time.sleep(0.5)
You can call the tempC method as often as you want, because it's
just return a value from the _values array. The actual update
frequency is controlled by the loop in the run method (and the
minimum cycle time imposed by the sensors).

Multiprocessing function not writing to file or printing

I'm working on a Raspberry Pi (3 B+) making a data collection device and I'm
trying to spawn a process to record the data coming in and write it to a file. I have a function for the writing that works fine when I call it directly.
When I call it using the multiprocess approach however, nothing seems to happen. I can see in task monitors in Linux that the process does in fact get spawned but no file gets written, and when I try to pass a flag to it to shut down it doesn't work, meaning I end up terminating the process and nothing seems to have happened.
I've been over this every which way and can't see what I'm doing wrong; does anyone else? In case it's relevant, these are functions inside a parent class, and one of the functions is meant to spawn another as a thread.
Code I'm using:
from datetime import datetime, timedelta
import csv
from drivers.IMU_SEN0 import IMU_SEN0
import multiprocessing, os
class IMU_data_logger:
_output_filename = ''
_csv_headers = []
_accelerometer_headers = ['Accelerometer X','Accelerometer Y','Accelerometer Z']
_gyroscope_headers = ['Gyroscope X','Gyroscope Y','Gyroscope Z']
_magnetometer_headers = ['Bearing']
_log_accelerometer = False
_log_gyroscope= False
_log_magnetometer = False
IMU = None
_writer=[]
_run_underway = False
_process=[]
_stop_value = 0
def __init__(self,output_filename='/home/pi/blah.csv',log_accelerometer = True,log_gyroscope= True,log_magnetometer = True):
"""data logging device
NOTE! Multiple instances of this class should not use the same IMU devices simultaneously!"""
self._output_filename = output_filename
self._log_accelerometer = log_accelerometer
self._log_gyroscope = log_gyroscope
self._log_magnetometer = log_magnetometer
def __del__(self):
# TODO Update this
if self._run_underway: # If there's still a run underway, end it first
self.end_recording()
def _set_up(self):
self.IMU = IMU_SEN0(self._log_accelerometer,self._log_gyroscope,self._log_magnetometer)
self._set_up_headers()
def _set_up_headers(self):
"""Set up the headers of the CSV file based on the header substrings at top and the input flags on what will be measured"""
self._csv_headers = []
if self._log_accelerometer is not None:
self._csv_headers+= self._accelerometer_headers
if self._log_gyroscope is not None:
self._csv_headers+= self._gyroscope_headers
if self._log_magnetometer is not None:
self._csv_headers+= self._magnetometer_headers
def _record_data(self,frequency,stop_value):
self._set_up() #Run setup in thread
"""Record data function, which takes a recording frequency, in herz, as an input"""
previous_read_time=datetime.now()-timedelta(1,0,0)
self._run_underway = True # Note that a run is now going
Period = 1/frequency # Period, in seconds, of a recording based on the input frequency
print("Writing output data to",self._output_filename)
with open(self._output_filename,'w',newline='') as outcsv:
self._writer = csv.writer(outcsv)
self._writer.writerow(self._csv_headers) # Write headers to file
while stop_value.value==0: # While a run continues
if datetime.now()-previous_read_time>=timedelta(0,1,0): # If we've waited a period, collect the data; otherwise keep looping
print("run underway value",self._run_underway)
if datetime.now()-previous_read_time>=timedelta(0,Period,0): # If we've waited a period, collect the data; otherwise keep looping
previous_read_time = datetime.now() # Update previous readtime
next_row = []
if self._log_accelerometer:
# Get values in m/s^2
axes = self.IMU.read_accelerometer_values()
next_row += [axes['x'],axes['y'],axes['z']]
if self._log_gyroscope:
# Read gyro values
gyro = self.IMU.read_gyroscope_values()
next_row += [gyro['x'],gyro['y'],gyro['z']]
if self._log_magnetometer:
# Read magnetometer value
b= self.IMU.read_magnetometer_bearing()
next_row += b
self._writer.writerow(next_row)
# Close the csv when done
outcsv.close()
def start_recording(self,frequency_in_hz):
# Create recording process
self._stop_value = multiprocessing.Value('i',0)
self._process = multiprocessing.Process(target=self._record_data,args=(frequency_in_hz,self._stop_value))
# Start recording process
self._process.start()
print(datetime.now().strftime("%H:%M:%S.%f"),"Data logging process spawned")
print("Logging Accelerometer:",self._log_accelerometer)
print("Logging Gyroscope:",self._log_gyroscope)
print("Logging Magnetometer:",self._log_magnetometer)
print("ID of data logging process: {}".format(self._process.pid))
def end_recording(self,terminate_wait = 2):
"""Function to end the recording multithread that's been spawned.
Args: terminate_wait: This is the time, in seconds, to wait after attempting to shut down the process before terminating it."""
# Get process id
id = self._process.pid
# Set stop event for process
self._stop_value.value = 1
self._process.join(terminate_wait) # Wait two seconds for the process to terminate
if self._process.is_alive(): # If it's still alive after waiting
self._process.terminate()
print(datetime.now().strftime("%H:%M:%S.%f"),"Process",id,"needed to be terminated.")
else:
print(datetime.now().strftime("%H:%M:%S.%f"),"Process",id,"successfully ended itself.")
====================================================================
ANSWER: For anyone following up here, it turns out the problem was my use of the VS Code debugger which apparently doesn't work with multiprocessing and was somehow preventing the success of the spawned process. Many thanks to Tomasz Swider below for helping me work through issues and, eventually, find my idiocy. The help was very deeply appreciated!!
I can see few thing wrong in your code:
First thing
stop_value == 0 will not work as the multiprocess.Value('i', 0) != 0, change that line to
while stop_value.value == 0
Second, you never update previous_read_time so it will write the readings as fast as it can, you will run out of disk quick
Third, try use time.sleep() the thing you are doing is called busy looping and it is bad, it is wasting CPU cycles needlessly.
Four, terminating with self._stop_value = 1 probably will not work there must be other way to set that value maybe self._stop_value.value = 1.
Well here is a pice of example code based on the code that you have provided that is working just fine:
import csv
import multiprocessing
import time
from datetime import datetime, timedelta
from random import randint
class IMU(object):
#staticmethod
def read_accelerometer_values():
return dict(x=randint(0, 100), y=randint(0, 100), z=randint(0, 10))
class Foo(object):
def __init__(self, output_filename):
self._output_filename = output_filename
self._csv_headers = ['xxxx','y','z']
self._log_accelerometer = True
self.IMU = IMU()
def _record_data(self, frequency, stop_value):
#self._set_up() # Run setup functions for the data collection device and store it in the self.IMU variable
"""Record data function, which takes a recording frequency, in herz, as an input"""
previous_read_time = datetime.now() - timedelta(1, 0, 0)
self._run_underway = True # Note that a run is now going
Period = 1 / frequency # Period, in seconds, of a recording based on the input frequency
print("Writing output data to", self._output_filename)
with open(self._output_filename, 'w', newline='') as outcsv:
self._writer = csv.writer(outcsv)
self._writer.writerow(self._csv_headers) # Write headers to file
while stop_value.value == 0: # While a run continues
if datetime.now() - previous_read_time >= timedelta(0, 1,
0): # If we've waited a period, collect the data; otherwise keep looping
print("run underway value", self._run_underway)
if datetime.now() - previous_read_time >= timedelta(0, Period,
0): # If we've waited a period, collect the data; otherwise keep looping
next_row = []
if self._log_accelerometer:
# Get values in m/s^2
axes = self.IMU.read_accelerometer_values()
next_row += [axes['x'], axes['y'], axes['z']]
previous_read_time = datetime.now()
self._writer.writerow(next_row)
# Close the csv when done
outcsv.close()
def start_recording(self, frequency_in_hz):
# Create recording process
self._stop_value = multiprocessing.Value('i', 0)
self._process = multiprocessing.Process(target=self._record_data, args=(frequency_in_hz, self._stop_value))
# Start recording process
self._process.start()
print(datetime.now().strftime("%H:%M:%S.%f"), "Data logging process spawned")
print("ID of data logging process: {}".format(self._process.pid))
def end_recording(self, terminate_wait=2):
"""Function to end the recording multithread that's been spawned.
Args: terminate_wait: This is the time, in seconds, to wait after attempting to shut down the process before terminating it."""
# Get process id
id = self._process.pid
# Set stop event for process
self._stop_value.value = 1
self._process.join(terminate_wait) # Wait two seconds for the process to terminate
if self._process.is_alive(): # If it's still alive after waiting
self._process.terminate()
print(datetime.now().strftime("%H:%M:%S.%f"), "Process", id, "needed to be terminated.")
else:
print(datetime.now().strftime("%H:%M:%S.%f"), "Process", id, "successfully ended itself.")
if __name__ == '__main__':
foo = Foo('/tmp/foometer.csv')
foo.start_recording(20)
time.sleep(5)
print('Ending recording')
foo.end_recording()

Profiling Serial communication using timeit

I need to communicate with an embedded system over RS232. For this I want to profile the time it takes to send a response to each command.
I've tested this code using two methods: datetime.now() and timeit()
Method #1
def resp_time(n,msg):
"""Given number of tries - n and bytearray list"""
msg = bytearray(msg)
cnt = 0
timer = 0
while cnt < n:
time.sleep(INTERVAL)
a = datetime.datetime.now()
ser.flush()
ser.write(msg)
line = []
for count in ser.read():
line.append(count)
if count == '\xFF':
# print line
break
b = datetime.datetime.now()
c = b-a
# print c.total_seconds()*1000
timer = timer + c.total_seconds()*1000
cnt = cnt + 1
return timer/n
ser = serial.Serial(COMPORT,BAUDRATE,serial.EIGHTBITS, serial.PARITY_NONE, serial.STOPBITS_ONE, timeout=16)
if ser.isOpen():
print "Serial port opened at: Baud:",COMPORT,BAUDRATE
cmd = read_file()
# returns a list of commands [msg1,msg2....]
n = 100
for index in cmd:
timer = resp_time(n,index)
print "Time in msecs over %d runs: %f " % (n,timer)
Method #2
def com_loop(msg):
msg = bytearray(msg)
time.sleep(INTERVAL)
ser.flush()
ser.write(msg)
line = []
for count in ser.read():
line.append(count)
if count == '\xFF':
break
if __name__ == '__main__':
import timeit
ser = serial.Serial(COMPORT,BAUDRATE,serial.EIGHTBITS, serial.PARITY_NONE, serial.STOPBITS_ONE, timeout=16)
if ser.isOpen():
print "Serial port opened at: Baud:",COMPORT,BAUDRATE
cmd = read_file()
# returns a list of commands [msg1,msg2....]
n = 100
for index in cmd:
t = timeit.timeit("com_loop(index)","from __main__ import com_loop;index=%s;" % index,number = n)
print t/100
With datetime I get 2 milli-sec to execute a command & with timeit I get 200 milli-sec for the same command.
I suspect I'm not calling timeit() properly, can someone point me in the right direction?
I'd assume 200µs is closer to the truth, considering your comport will have something like 115200baud; assuming messages are 8 bytes long, transmitting one message would take about 9/115200 s ~= 10/100000 = 1/10,000 = 100µs on the serial line alone. Being faster than that will be pretty impossible.
Python is definitely not the language of choice to do timing characterization at these scales. You will need to get a logic analyzer, or work very close to the serial controller (which I hope is directly attached to your PC's IO controller and not some USB devices, because that will introduce latencies in the same order of magnitude, at least). If you're talking about microseconds, the limiting factor in measurement is usually the random time it takes for your PC to react to an interrupt, the OS to run the interrupt service routine, the scheduler to continue your userland process, and then starts python with its levels and levels of indirection. You're basically measuring the size of single grains of sand by holding a banana next to them.

Redis: # of channels degrading latency. How to prevent degradation?

pub.py
import redis
import datetime
import time
import json
import sys
import threading
import gevent
from gevent import monkey
monkey.patch_all()
def main(chan):
redis_host = '10.235.13.29'
r = redis.client.StrictRedis(host=redis_host, port=6379)
while True:
def getpkg():
package = {'time': time.time(),
'signature' : 'content'
}
return package
#test 2: complex data
now = json.dumps(getpkg())
# send it
r.publish(chan, now)
print 'Sending {0}'.format(now)
print 'data type is %s' % type(now)
time.sleep(1)
def zerg_rush(n):
for x in range(n):
t = threading.Thread(target=main, args=(x,))
t.setDaemon(True)
t.start()
if __name__ == '__main__':
num_of_chan = 10
zerg_rush(num_of_chan)
cnt = 0
stop_cnt = 21
while True:
print 'Waiting'
cnt += 1
if cnt == stop_cnt:
sys.exit(0)
time.sleep(30)
sub.py
import redis
import threading
import time
import json
import gevent
from gevent import monkey
monkey.patch_all()
def callback(ind):
redis_host = '10.235.13.29'
r = redis.client.StrictRedis(host=redis_host, port=6379)
sub = r.pubsub()
sub.subscribe(str(ind))
start = False
avg = 0
tot = 0
sum = 0
while True:
for m in sub.listen():
if not start:
start = True
continue
got_time = time.time()
decoded = json.loads(m['data'])
sent_time = float(decoded['time'])
dur = got_time - sent_time
tot += 1
sum += dur
avg = sum / tot
print decoded #'Recieved: {0}'.format(m['data'])
file_name = 'logs/sub_%s' % ind
f = open(file_name, 'a')
f.write('processing no. %s' % tot)
f.write('it took %s' % dur)
f.write('current avg: %s\n' % avg)
f.close()
def zerg_rush(n):
for x in range(n):
t = threading.Thread(target=callback, args=(x,))
t.setDaemon(True)
t.start()
def main():
num_of_chan = 10
zerg_rush(num_of_chan)
while True:
print 'Waiting'
time.sleep(30)
if __name__ == '__main__':
main()
I am testing redis pubsub to replace the use of rsh to communicate with remote boxes.
One of the things I have tested for was the number of channels affecting latency of publish and pubsub.listen().
Test: One publisher and one subscriber per channel (publisher publish every one second). Incremented the number of channels from and observed the latency (The duration from the moment publisher publish a message to the moment subscriber got the message via listen)
num of chan--------------avg latency in seconds
10:----------------------------------0.004453
50:----------------------------------0.005246
100:---------------------------------0.0155
200:---------------------------------0.0221
300:---------------------------------0.0621
Note: tested on 2 CPU + 4GB RAM + 1 NICs RHEL6.4 VM.
What can I do to maintain low latency with high number of channels?
Redis is single-threaded so increasing more cpus wont help. maybe more RAM? if so, how much more?
Anything I can do code-wise or bottleneck is in Redis itself?
Maybe the limitation comes from the way my test codes are written with threading?
EDIT:
Redis Cluster vs ZeroMQ in Pub/Sub, for horizontally scaled distributed systems
Accepted answer says "You want to minimize latency, I guess. The number of channels is irrelevant. The key factors are the number of publishers and number of subscribers, message size, number of messages per second per publisher, number of messages received by each subscriber, roughly. ZeroMQ can do several million small messages per second from one node to another; your bottleneck will be the network long before it's the software. Most high-volume pubsub architectures therefore use something like PGM multicast, which ZeroMQ supports."
From my testings, i dont know if this is true. (The claim that the number of channels is irrelevant)
For example, i did a testing.
1) One channel. 100 publishers publishing to a channel with 1 subscriber listening. Publisher publishing one second at a time. latency was 0.00965 seconds
2) Same testing except 1000 publishers. latency was 0.00808 seconds
Now during my channel testing:
300 channels with 1 pub - 1 sub resulted in 0.0621 and this is only 600 connections which is less than above testing yet significantly slow in latency

Benchmarking tool using twisted

I am trying to write a web benchmarking tool base on twisted. Twisted is very fantastic asynchronous framework for web applications. Because I get started with this framework for just two weeks, I face a problem, here is it:
When I test this benchmarking tool compare with ApacheBench, the result differs greatly on the same concurrency. Here is the result of my tool:
python pyab.py 50000 50 http://xx.com/a.txt
speed:1063(q/s), worker:50, interval:7, req_made:7493, req_done:7443, req_error:0
And Here is the result of Apache Bench:
ab -c 50 -n 50000 http://xx.com/a.txt
Server Software: nginx/1.4.1
Server Hostname: 203.90.245.26
Server Port: 8080
Document Path: /a.txt
Document Length: 6 bytes
Concurrency Level: 50
Time taken for tests: 6.89937 seconds
Complete requests: 50000
Failed requests: 0
Write errors: 0
Total transferred: 12501750 bytes
HTML transferred: 300042 bytes
Requests per second: 8210.27 [#/sec] (mean)
Time per request: 6.090 [ms] (mean)
Time per request: 0.122 [ms] (mean, across all concurrent requests)
Transfer rate: 2004.62 [Kbytes/sec] received
Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.8 0 4
Processing: 1 5 3.4 5 110
Waiting: 0 2 3.6 2 109
Total: 1 5 3.5 5 110
Percentage of the requests served within a certain time (ms)
50% 5
66% 6
75% 6
80% 6
90% 7
95% 7
98% 8
99% 8
100% 110 (longest request)
On the same url and concurrency, ApacheBench can go up to 8000 req/sec, while pyab only 1000 req/sec.
Here is my code(pyab.py):
from twisted.internet import reactor,threads
from twisted.internet.protocol import Protocol
from twisted.internet.defer import Deferred
from twisted.web.client import Agent
from twisted.web.client import HTTPConnectionPool
from twisted.web.http_headers import Headers
from twisted.python import log
import time, os, stat, logging, sys
from collections import Counter
logging.basicConfig(
#filename= "/%s/log/%s.%s" % (RUN_DIR,RUN_MODULE,RUN_TIME),
format="%(asctime)s [%(levelname)s] %(message)s",
level=logging.WARNING,
#level=logging.DEBUG,
stream=sys.stdout
)
#log.startLogging(sys.stdout)
observer = log.PythonLoggingObserver()
observer.start()
class IgnoreBody(Protocol):
def __init__(self, deferred, tl):
self.deferred = deferred
self.tl = tl
def dataReceived(self, bytes):
pass
def connectionLost(self, reason):
self.deferred.callback(None)
class Pyab:
def __init__( self, n = 50000, concurrency = 100, url='http://203.90.245.26:8080/a.txt'):
self.n = n
self.url = url
self.pool = HTTPConnectionPool(reactor, persistent=True)
self.pool.maxPersistentPerHost = concurrency
self.agent = Agent(reactor, connectTimeout = 5, pool = self.pool)
#self.agent = Agent(reactor, connectTimeout = 5)
self.time_start = time.time()
self.max_worker = concurrency
self.cnt = Counter({
'worker' : 0 ,
'req_made' : 0,
'req_done' : 0,
'req_error' : 0,
})
def monitor( self ):
interval = int(time.time() - self.time_start)
speed = 0
if interval != 0:
speed = int( self.cnt['req_done'] / interval )
log.msg("speed:%d(q/s), worker:%d, interval:%d, req_made:%d, req_done:%d, req_error:%d"
% (speed, self.cnt['worker'], interval, self.cnt['req_made'], self.cnt['req_done'], self.cnt['req_error']), logLevel=logging.WARNING)
reactor.callLater(1, lambda : self.monitor())
def start( self ):
self.keeprunning = True
self.monitor()
self.readMore()
def stop( self ):
self.keeprunning = False
def readMore( self ):
while self.cnt['worker'] < self.max_worker and self.cnt['req_done'] < self.n :
self.make_request()
if self.keeprunning and self.cnt['req_done'] < self.n:
reactor.callLater( 0.0001, lambda: self.readMore() )
else:
reactor.stop()
def make_request( self ):
d = self.agent.request(
'GET',
#'http://examplexx.com/',
#'http://example.com/',
#'http://xa.xingcloud.com/v4/qvo/WDCXWD7500AADS-00M2B0_WD-WCAV5E38536685366?update0=ref0%2Ccor&update1=nation%2Ccn&action0=visit&_ts=1376397973636',
#'http://203.90.245.26:8080/a.txt',
self.url,
Headers({'User-Agent': ['Twisted Web Client Example']}),
None)
self.cnt['worker'] += 1
self.cnt['req_made'] += 1
def cbResponse(resp):
self.cnt['worker'] -= 1
self.cnt['req_done'] += 1
log.msg('response received')
finished = Deferred()
resp.deliverBody(IgnoreBody(finished, self))
return finished
def cbError(error):
self.cnt['worker'] -= 1
self.cnt['req_error'] += 1
log.msg(error, logLevel=logging.ERROR)
d.addCallback(cbResponse)
d.addErrback(cbError)
if __name__ == '__main__' :
if len(sys.argv) < 4:
print "Usage: %s <n> <concurrency> <url>" % (sys.argv[0])
sys.exit()
ab = Pyab(n=int(sys.argv[1]), concurrency=int(sys.argv[2]), url=sys.argv[3])
ab.start()
reactor.run()
Is there any wrong with my code? Thanks!
When I last used it, ab was known to have dozens of serious bugs. Sometimes that would cause it to report massively inflated results. Sometimes it would report negative results. Sometimes it would crash. I'd try another tool, like httperf, as a sanity check.
However, if your server is actually that fast, then you might have another issue.
Even if ab has been fixed, you're talking here about a C program versus a Python program running on CPython. 8x slower than C in Python is not actually all that bad, so I don't expect there is actually anything wrong with your program, except that it doesn't make use of spawnProcess and multi-core concurrency.
For starters, see if you get any better results on PyPy.

Categories