I was looking for a fuzzing library and I happened to see "boofuzz"
though there are no examples of how to use the library for http fuzzing.
This is the only code I see in their github page, but they say it was taken from sulley (an old fuzzing library):
import sys
sys.path.insert(0, '../')
from boofuzz.primitives import String, Static, Delim
class Group(object):
blocks = []
def __init__(self, name, definition=None):
self.name = name
if definition:
self.definition = definition
def add_definition(self, definition):
assert isinstance(definition, (list, tuple)), "Definition must be a list or a tuple!"
self.definition = definition
def render(self):
return "".join([x.value for x in self.definition])
def exhaust(self):
for item in self.definition:
while item.mutate():
current_value = item.value
self.log_send(current_value)
recv_data = self.send_buffer(current_value)
self.log_recv(recv_data)
def __repr__(self):
return '<%s [%s items]>' % (self.__class__.__name__, len(self.definition))
# noinspection PyMethodMayBeStatic
def send_buffer(self, current_value):
return "Sent %s!" % current_value
def log_send(self, current_value):
pass
def log_recv(self, recv_data):
pass
s_static = Static
s_delim = Delim
s_string = String
CloseHeader = Group(
"HTTP Close Header",
definition=[
# GET / HTTP/1.1\r\n
s_static("GET / HTTP/1.1\r\n"),
# Connection: close
s_static("Connection"), s_delim(":"), s_delim(" "), s_string("close"),
s_static("\r\n\r\n")
]
)
OpenHeader = Group(
"HTTP Open Header",
definition=[
# GET / HTTP/1.1\r\n
Static("GET / HTTP/1.1\r\n"),
# Connection: close
Static("Connection"), Delim(":"), Delim(" "), String("open"),
Static("\r\n\r\n")
]
)
# CloseHeader = Group("HTTP Close Header")
# CloseHeader.add_definition([
# # GET / HTTP/1.1\r\n
# s_static("GET / HTTP/1.1\r\n"),
# # Connection: close
# s_static("Connection"), s_delim(":"), s_delim(" "), s_string("close"),
# s_static("\r\n\r\n")
# ])
Why would they post it, if it's another's library code? And is there a good explanation of how to work with the boofuzz library?
If you Google "http protocol format", the first result right now is this HTTP tutorial. If you read a few pages there, you can get a pretty good description of the protocol format. Based on that, I wrote the following fuzz script, source code here:
#!/usr/bin/env python
# Designed for use with boofuzz v0.0.9
from boofuzz import *
def main():
session = Session(
target=Target(
connection=SocketConnection("127.0.0.1", 80, proto='tcp')
),
)
s_initialize(name="Request")
with s_block("Request-Line"):
s_group("Method", ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'CONNECT', 'OPTIONS', 'TRACE'])
s_delim(" ", name='space-1')
s_string("/index.html", name='Request-URI')
s_delim(" ", name='space-2')
s_string('HTTP/1.1', name='HTTP-Version')
s_static("\r\n", name="Request-Line-CRLF")
s_static("\r\n", "Request-CRLF")
session.connect(s_get("Request"))
session.fuzz()
if __name__ == "__main__":
main()
Although I got tripped up for a while because I only had one CRLF. After checking RFC 2616 (Section 5), it's pretty clear that this example should end with two CRLFs.
Request = Request-Line ; Section 5.1
*(( general-header ; Section 4.5
| request-header ; Section 5.3
| entity-header ) CRLF) ; Section 7.1
CRLF
[ message-body ] ; Section 4.3
[...]
Request-Line = Method SP Request-URI SP HTTP-Version CRLF
Obviously, this fuzz script doesn't come close to covering the whole protocol. Just a few things that could be added:
HTTP Headers (there are a lot)
Specialized formats for each HTTP Method
A message body (e.g. on POST)
Some way to choose valid URIs for the particular target server
Report warnings based on server response (could get noisy, but server errors do tend to indicate... errors)
Related
I am trying to format the output of the logging to have the levelname on the right side of the terminal always. I currently have a script that looks like:
import logging, os, time
fn = 'FN'
start = time.time()
def getTerminalSize():
import os
env = os.environ
def ioctl_GWINSZ(fd):
try:
import fcntl, termios, struct, os
cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ,
'1234'))
except:
return
return cr
cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
if not cr:
try:
fd = os.open(os.ctermid(), os.O_RDONLY)
cr = ioctl_GWINSZ(fd)
os.close(fd)
except:
pass
if not cr:
cr = (env.get('LINES', 25), env.get('COLUMNS', 80))
return int(cr[1]), int(cr[0])
(width, _) = getTerminalSize()
level_width = 8
message_width = width - level_width - 4
FORMAT = '%(message)-{len1:{width1}d}s [%(levelname){len2:{width2}d}s]'.format(
len1 = message_width,_
len2 = level_width,_
width1 = len(str(message_width)),_
width2 = len(str(level_width)))
logging.basicConfig(format=FORMAT, level="DEBUG")
logging.debug("Debug Message")
logging.info("Info Message")
logging.warning("Warning Message")
logging.error("Error Message")
logging.critical("Critical Message")
logging.info("Starting File: " + os.path.basename(fn) + "\n-----------------------------------------")
logging.info("\tTo read data: %s"%(time.time() - start))
The output looks like:
Debug Message [ DEBUG]
Info Message [ INFO]
Warning Message [ WARNING]
Error Message [ ERROR]
Critical Message [CRITICAL]
Starting File: Channel209.Raw32
----------------------------------------- [ INFO]
To read data: 0.281999826431 [
INFO]
I would like the output to look something like this instead and can't quite figure it out:
Debug Message [ DEBUG]
Info Message [ INFO]
Warning Message [ WARNING]
Error Message [ ERROR]
Critical Message [CRITICAL]
Starting File: Channel209.Raw32
----------------------------------------- [ INFO]
To read data: 0.281999826431 [ INFO]
As #Carpetsmoker said, to do what I truly desired required creating a new formatter class overwriting the default.
The following class worked well for this process:
import logging
import textwrap
import itertools
'''
MyFormatter class
Adapted from: https://stackoverflow.com/questions/6847862/how-to-change-the-format-of-logged-messages-temporarily-in-python
https://stackoverflow.com/questions/3096402/python-string-formatter-for-paragraphs
Authors: Vinay Sajip, unutbu
'''
class MyFormatter(logging.Formatter):
#This function overwrites logging.Formatter.format
#We conver the msg into the overall format we want to see
def format(self,record):
widths=[getTerminalSize()[0] - 12 ,10]
form='{row[0]:<{width[0]}} {row[1]:<{width[1]}}'
#Instead of formatting...rewrite message as desired here
record.msg = self.Create_Columns(form,widths,[record.msg],["[%8s]"%record.levelname])
#Return basic formatter
return super(MyFormatter,self).format(record)
def Create_Columns(self,format_str,widths,*columns):
'''
format_str describes the format of the report.
{row[i]} is replaced by data from the ith element of columns.
widths is expected to be a list of integers.
{width[i]} is replaced by the ith element of the list widths.
All the power of Python's string format spec is available for you to use
in format_str. You can use it to define fill characters, alignment, width, type, etc.
formatter takes an arbitrary number of arguments.
Every argument after format_str and widths should be a list of strings.
Each list contains the data for one column of the report.
formatter returns the report as one big string.
'''
result=[]
for row in zip(*columns):
#Create a indents for each row...
sub = []
#Loop through
for r in row:
#Expand tabs to spaces to make our lives easier
r = r.expandtabs()
#Find the leading spaces and create indend character
if r.find(" ") == 0:
i = 0
for letters in r:
if not letters == " ":
break
i += 1
sub.append(" "*i)
else:
sub.append("")
#Actually wrap and creat the string to return...stolen from internet
lines=[textwrap.wrap(elt, width=num, subsequent_indent=ind) for elt,num,ind in zip(row,widths,sub)]
for line in itertools.izip_longest(*lines,fillvalue=''):
result.append(format_str.format(width=widths,row=line))
return '\n'.join(result)
It relies on getting the terminal size in some function called getTerminalSize. I used Harco Kuppens' Method that I will not repost here.
An example driver program is as follows, where MyFormatter and getTerminalSize are located in Colorer:
import logging
import Colorer
logger = logging.getLogger()
logger_handler = logging.StreamHandler()
logger.addHandler(logger_handler)
logger_handler.setFormatter(Colorer.MyFormatter("%(message)s"))
logger.setLevel("DEBUG")
logging.debug("\t\tTHIS IS A REALY long DEBUG Message that works and wraps around great........")
logging.info(" THIS IS A REALY long INFO Message that works and wraps around great........")
logging.warning("THIS IS A REALY long WARNING Message that works and wraps around great........")
logging.error("\tTHIS IS A REALY long ERROR Message that works and wraps around great........")
logging.critical("THIS IS A REALY long CRITICAL Message that works and wraps around great........")
Where the output is (commented for readability):
# THIS IS A REALY long DEBUG Message that works and [ DEBUG]
# wraps around great........
# THIS IS A REALY long INFO Message that works and wraps around [ INFO]
# great........
# THIS IS A REALY long WARNING Message that works and wraps around [ WARNING]
# great........
# THIS IS A REALY long ERROR Message that works and wraps [ ERROR]
# around great........
# THIS IS A REALY long CRITICAL Message that works and wraps around [CRITICAL]
# great........
I modified the last lines to look like:
logging.info("Starting File: %s" % os.path.basename(fn))
logging.info("%s" % ('-' * 15))
logging.info(" To read data: %s" % (time.time() - start))
Your error was using a newline (\n) and tab (\t) character.
Or, if you must keep the newline (which seems rather odd to me), you could manually add the spaces, like so:
logging.info("Starting File: %s\n%s%s" % (
os.path.basename(fn),
('-' * 15),
' ' * (width - 15 - 12)))
Other notes
You should create a Minimal, Complete, Tested and Readable. Your code wasn't working, I needed to modify a few things just to get the example running. See your messages edit history for what I had to edit.
Since Python 3.3, there's os.get_terminal_size. If that isn't available, then doing subprocess.call(['tput cols'], shell=True) seems a whole lot simpler to me...
I wrote a python script to fetch all of my gmail. I have hundreds of thousands of old emails, of which about 10,000 were unread.
After successfully fetching all of my email, I find that gmail has marked all the fetched emails as "read". This is disastrous for me since I need to check all unread emails only.
How can I recover the information about which emails were unread? I dumped each mail object into files, the core of my code is shown below:
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login(user,pwd)
m.select("[Gmail]/All Mail")
resp, items = m.uid('search', None, 'ALL')
uids = items[0].split()
for uid in uids:
resp, data = m.uid('fetch', uid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
dumbobj(uid, mail)
I am hoping there is either an option to undo this in gmail, or a member inside the stored mail objects reflecting the seen-state information.
For anyone looking to prevent this headache, consider this answer here. This does not work for me, however, since the damage has already been done.
Edit:
I have written the following function to recursively "grep" all strings in an object, and applied it to a dumped email object using the following keywords:
regex = "(?i)((marked)|(seen)|(unread)|(read)|(flag)|(delivered)|(status)|(sate))"
So far, no results (only an unrelated "Delivered-To"). Which other keywords could I try?
def grep_object (obj, regex , cycle = set(), matched = set()):
import re
if id(obj) in cycle:
return
cycle.update([id(obj)])
if isinstance(obj, basestring):
if re.search(regex, obj):
matched.update([obj])
def grep_dict (adict ):
try:
[ [ grep_object(a, regex, cycle, matched ) for a in ab ] for ab in adict.iteritems() ]
except:pass
grep_dict(obj)
try:grep_dict(obj.__dict__)
except:pass
try:
[ grep_object(elm, regex, cycle, matched ) for elm in obj ]
except: pass
return matched
grep_object(mail_object, regex)
I'm having a similar problem (not with gmail), and the biggest problem for me was to make a reproducible test case; and I finally managed to produce one (see below).
In terms of the Seen flag, I now gather it goes like this:
If a message is new/unseen, IMAP fetch for \Seen flag will return empty (i.e. it will not be present, as related to the email message).
If you do IMAP select on a mailbox (INBOX), you get a "flag" UNSEEN which contains a list of ids (or uids) of emails in that folder that are new (do not have the \Seen flag)
In my test case, if you fetch say headers for a message with BODY.PEEK, then \Seen on a message is not set; if you fetch them with BODY, then \Seen is set
In my test case, also fetching (RFC822) doesn't set \Seen (unlike your case with Gmail)
In the test case, I try to do pprint.pprint(inspect.getmembers(mail)) (in lieu of your dumpobj(uid, mail)) - but only after I'm certain \Seen has been set. The output I get is posted in mail_object_inspect.txt - and as far as I can see, there is no mention of 'new/read/seen' etc. in none of the readable fields; furthermore mail.as_string() prints:
'From: jesse#example.com\nTo: user#example.com\nSubject: This is a test message!\n\nHello. I am executive assistant to the director of\nBear Stearns, a failed investment Bank. I have\naccess to USD6,000,000. ...\n'
Even worse, there is no mention of "fields" anywhere in the imaplib code (below filenames are printed if they do not contain case-insensitive "field" anywhere):
$ grep -L -i field /usr/lib/python{2.7,3.2}/imaplib.py
/usr/lib/python2.7/imaplib.py
/usr/lib/python3.2/imaplib.py
... so I guess that information was not saved with your dumps.
Here is a bit on reconstructing the test case. The hardest was to find a small IMAP server, that can be quickly ran with some arbitrary users and emails, but without having to install a ton of stuff on your system. Finally I found one: trivial-server.pl, the example file of Perl's Net::IMAP::Server; tested on Ubuntu 11.04.
The test case is pasted in this gist, with two files (with many comments) that I'll try to post abridged:
trivial-serverB.pl - Perl (v5.10.1) Net::IMAP::Server server (has a terminal output paste at end of file with a telnet client session)
testimap.py - Python 2.7/3.2 imaplib
client (has a terminal output paste at end of file, of itself operating with the server)
trivial-serverB.pl
First, make sure you have Net::IMAP::Server - note, it has many dependencies, so the below command may take a while to install:
sudo perl -MCPAN -e 'install Net::IMAP::Server'
Then, in the directory where you got trivial-serverB.pl, create a subdirectory with SSL certificates:
mkdir certs
openssl req \
-x509 -nodes -days 365 \
-subj '/C=US/ST=Oregon/L=Portland/CN=localhost' \
-newkey rsa:1024 -keyout certs/server-key.pem -out certs/server-cert.pem
Finally run the server with administrative properties:
sudo perl trivial-serverB.pl
Note that the trivial-serverB.pl has a hack which will let a client to connect without SSL. Here is trivial-serverB.pl:
#!/usr/bin/perl
use v5.10.1;
use feature qw(say);
use Net::IMAP::Server;
package Demo::IMAP::Hack;
$INC{'Demo/IMAP/Hack.pm'} = 1;
sub capabilityb {
my $self = shift;
print STDERR "Capabilitin'\n";
my $base = $self->server->capability;
my #words = split " ", $base;
#words = grep {$_ ne "STARTTLS"} #words
if $self->is_encrypted;
unless ($self->auth) {
my $auth = $self->auth || $self->server->auth_class->new;
my #auth = $auth->sasl_provides;
# hack:
#unless ($self->is_encrypted) {
# # Lack of encrpytion makes us turn off all plaintext auth
# push #words, "LOGINDISABLED";
# #auth = grep {$_ ne "PLAIN"} #auth;
#}
push #words, map {"AUTH=$_"} #auth;
}
return join(" ", #words);
}
package Demo::IMAP::Auth;
$INC{'Demo/IMAP/Auth.pm'} = 1;
use base 'Net::IMAP::Server::DefaultAuth';
sub auth_plain {
my ( $self, $user, $pass ) = #_;
# XXX DO AUTH CHECK
$self->user($user);
return 1;
}
package Demo::IMAP::Model;
$INC{'Demo/IMAP/Model.pm'} = 1;
use base 'Net::IMAP::Server::DefaultModel';
sub init {
my $self = shift;
$self->root( Demo::IMAP::Mailbox->new() );
$self->root->add_child( name => "INBOX" );
}
###########################################
package Demo::IMAP::Mailbox;
use base qw/Net::IMAP::Server::Mailbox/;
use Data::Dumper;
my $data = <<'EOF';
From: jesse#example.com
To: user#example.com
Subject: This is a test message!
Hello. I am executive assistant to the director of
Bear Stearns, a failed investment Bank. I have
access to USD6,000,000. ...
EOF
my $msg = Net::IMAP::Server::Message->new($data);
sub load_data {
my $self = shift;
$self->add_message($msg);
}
my %ports = ( port => 143, ssl_port => 993 );
$ports{$_} *= 10 for grep {$> > 0} keys %ports;
$myserv = Net::IMAP::Server->new(
auth_class => "Demo::IMAP::Auth",
model_class => "Demo::IMAP::Model",
user => 'nobody',
log_level => 3, # at least 3 to output 'CONNECT TCP Peer: ...' message; 4 to output IMAP commands too
%ports,
);
# apparently, this overload MUST be after the new?! here:
{
no strict 'refs';
*Net::IMAP::Server::Connection::capability = \&Demo::IMAP::Hack::capabilityb;
}
# https://stackoverflow.com/questions/27206371/printing-addresses-of-perl-object-methods
say " -", $myserv->can('validate'), " -", $myserv->can('capability'), " -", \&Net::IMAP::Server::Connection::capability, " -", \&Demo::IMAP::Hack::capabilityb;
$myserv->run();
testimap.py
With the server above running in one terminal, in another terminal you can just do:
python testimap.py
The code will simply read fields and content from the one (and only) message the server above presents, and will eventually restore (remove) the \Seen field.
import sys
if sys.version_info[0] < 3: # python 2.7
def uttc(x):
return x
else: # python 3+
def uttc(x):
return x.decode("utf-8")
import imaplib
import email
import pprint,inspect
imap_user = 'nobody'
imap_password = 'whatever'
imap_server = 'localhost'
conn = imaplib.IMAP4(imap_server)
conn.debug = 3
try:
(retcode, capabilities) = conn.login(imap_user, imap_password)
except:
print(sys.exc_info()[1])
sys.exit(1)
# not conn.select(readonly=1), else we cannot modify the \Seen flag later
conn.select() # Select inbox or default namespace
(retcode, messages) = conn.search(None, '(UNSEEN)')
if retcode == 'OK':
for num in uttc(messages[0]).split(' '):
if not(num):
print("No messages available: num is `{0}`!".format(num))
break
print('Processing message: {0}'.format(num))
typ, data = conn.fetch(num,'(FLAGS)')
isSeen = ( "Seen" in uttc(data[0]) )
print('Got flags: {2}: {0} .. {1}'.format(typ,data, # NEW: OK .. ['1 (FLAGS ())']
"Seen" if isSeen else "NEW"))
print('Peeking headers, message: {0} '.format(num))
typ, data = conn.fetch(num,'(BODY.PEEK[HEADER])')
pprint.pprint(data)
typ, data = conn.fetch(num,'(FLAGS)')
isSeen = ( "Seen" in uttc(data[0]) )
print('Got flags: {2}: {0} .. {1}'.format(typ,data, # NEW: OK .. ['1 (FLAGS ())']
"Seen" if isSeen else "NEW"))
print('Get RFC822 body, message: {0} '.format(num))
typ, data = conn.fetch(num,'(RFC822)')
mail = email.message_from_string(uttc(data[0][1]))
#pprint.pprint(inspect.getmembers(mail))
typ, data = conn.fetch(num,'(FLAGS)')
isSeen = ( "Seen" in uttc(data[0]) )
print('Got flags: {2}: {0} .. {1}'.format(typ,data, # NEW: OK .. ['1 (FLAGS ())']
"Seen" if isSeen else "NEW"))
print('Get headers, message: {0} '.format(num))
typ, data = conn.fetch(num,'(BODY[HEADER])') # note, FLAGS (\\Seen) is now in data, even if not explicitly requested!
pprint.pprint(data)
print('Get RFC822 body, message: {0} '.format(num))
typ, data = conn.fetch(num,'(RFC822)')
mail = email.message_from_string(uttc(data[0][1]))
pprint.pprint(inspect.getmembers(mail)) # this is in mail_object_inspect.txt
pprint.pprint(mail.as_string())
typ, data = conn.fetch(num,'(FLAGS)')
isSeen = ( "Seen" in uttc(data[0]) )
print('Got flags: {2}: {0} .. {1}'.format(typ,data, # Seen: OK .. ['1 (FLAGS (\\Seen))']
"Seen" if isSeen else "NEW"))
conn.select() # select again, to see flags server side
# * OK [UNSEEN 0] # no more unseen messages (if there was only one msg in folder)
print('Restoring flag to unseen/new, message: {0} '.format(num))
ret, data = conn.store(num,'-FLAGS','\\Seen')
if ret == 'OK':
print("Set back to unseen; Got OK: {0}{1}{2}".format(data,'\n',30*'-'))
print(mail)
typ, data = conn.fetch(num,'(FLAGS)')
isSeen = ( "Seen" in uttc(data[0]) )
print('Got flags: {2}: {0} .. {1}'.format(typ,data, # NEW: OK .. [b'1 (FLAGS ())']
"Seen" if isSeen else "NEW"))
conn.close()
References
How do I mock an IMAP server in Python, despite extreme laziness?
Get only NEW Emails imaplib and python
Undoing "marked as read" status of emails fetched with imaplib
http://www.skytale.net/blog/archives/23-Manual-IMAP.html
IMAP FETCH Subject
https://mail.python.org/pipermail/python-list/2009-March/527020.html
http://www.thecodingforums.com/threads/re-imaplib-fetch-message-flags.673872/
I'm using this wonderful resource to grab tweets from twitter.
Twitter streams the tweets, and this script grabs them and displays them in the terminal.
I apologize for the beginners question, but how would I limit the number of tweets or the total size of all tweets that are received using this script?
I understand that I can write these to a file and watch the size of the file, but is there a way to do count the bytes received or number of tweets received?
The script is below as well as in the link above.
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2012 Gustav ArngÄrden
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import time
import pycurl
import urllib
import json
import oauth2 as oauth
API_ENDPOINT_URL = 'https://stream.twitter.com/1.1/statuses/filter.json'
USER_AGENT = 'TwitterStream 1.0' # This can be anything really
# You need to replace these with your own values
OAUTH_KEYS = {'consumer_key': <Consumer key>,
'consumer_secret': <Consumer secret>,
'access_token_key': <Token key>,
'access_token_secret': <Token secret>}
# These values are posted when setting up the connection
POST_PARAMS = {'include_entities': 0,
'stall_warning': 'true',
'track': 'iphone,ipad,ipod'}
class TwitterStream:
def __init__(self, timeout=False):
self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret'])
self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret'])
self.conn = None
self.buffer = ''
self.timeout = timeout
self.setup_connection()
def setup_connection(self):
""" Create persistant HTTP connection to Streaming API endpoint using cURL.
"""
if self.conn:
self.conn.close()
self.buffer = ''
self.conn = pycurl.Curl()
# Restart connection if less than 1 byte/s is received during "timeout" seconds
if isinstance(self.timeout, int):
self.conn.setopt(pycurl.LOW_SPEED_LIMIT, 1)
self.conn.setopt(pycurl.LOW_SPEED_TIME, self.timeout)
self.conn.setopt(pycurl.URL, API_ENDPOINT_URL)
self.conn.setopt(pycurl.USERAGENT, USER_AGENT)
# Using gzip is optional but saves us bandwidth.
self.conn.setopt(pycurl.ENCODING, 'deflate, gzip')
self.conn.setopt(pycurl.POST, 1)
self.conn.setopt(pycurl.POSTFIELDS, urllib.urlencode(POST_PARAMS))
self.conn.setopt(pycurl.HTTPHEADER, ['Host: stream.twitter.com',
'Authorization: %s' % self.get_oauth_header()])
# self.handle_tweet is the method that are called when new tweets arrive
self.conn.setopt(pycurl.WRITEFUNCTION, self.handle_tweet)
def get_oauth_header(self):
""" Create and return OAuth header.
"""
params = {'oauth_version': '1.0',
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': int(time.time())}
req = oauth.Request(method='POST', parameters=params, url='%s?%s' % (API_ENDPOINT_URL,
urllib.urlencode(POST_PARAMS)))
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), self.oauth_consumer, self.oauth_token)
return req.to_header()['Authorization'].encode('utf-8')
def start(self):
""" Start listening to Streaming endpoint.
Handle exceptions according to Twitter's recommendations.
"""
backoff_network_error = 0.25
backoff_http_error = 5
backoff_rate_limit = 60
while True:
self.setup_connection()
try:
self.conn.perform()
except:
# Network error, use linear back off up to 16 seconds
print 'Network error: %s' % self.conn.errstr()
print 'Waiting %s seconds before trying again' % backoff_network_error
time.sleep(backoff_network_error)
backoff_network_error = min(backoff_network_error + 1, 16)
continue
# HTTP Error
sc = self.conn.getinfo(pycurl.HTTP_CODE)
if sc == 420:
# Rate limit, use exponential back off starting with 1 minute and double each attempt
print 'Rate limit, waiting %s seconds' % backoff_rate_limit
time.sleep(backoff_rate_limit)
backoff_rate_limit *= 2
else:
# HTTP error, use exponential back off up to 320 seconds
print 'HTTP error %s, %s' % (sc, self.conn.errstr())
print 'Waiting %s seconds' % backoff_http_error
time.sleep(backoff_http_error)
backoff_http_error = min(backoff_http_error * 2, 320)
def handle_tweet(self, data):
""" This method is called when data is received through Streaming endpoint.
"""
self.buffer += data
if data.endswith('\r\n') and self.buffer.strip():
# complete message received
message = json.loads(self.buffer)
self.buffer = ''
msg = ''
if message.get('limit'):
print 'Rate limiting caused us to miss %s tweets' % (message['limit'].get('track'))
elif message.get('disconnect'):
raise Exception('Got disconnect: %s' % message['disconnect'].get('reason'))
elif message.get('warning'):
print 'Got warning: %s' % message['warning'].get('message')
else:
print 'Got tweet with text: %s' % message.get('text')
if __name__ == '__main__':
ts = TwitterStream()
ts.setup_connection()
ts.start()
Why not a counter in your __init__:
def __init__(self, timeout=False):
...
self.tweetsSoFar = 0
def handleTweet(self, data):
...
else:
self.tweetsSoFar += 1 # we've seen another tweet!
print 'Got tweet with text: %s' % message.get('text')
def start(self):
...
while self.tweetsSoFar < 50: # stop at 50 tweets
...
Hope this helps
I'm not too familiar with the API, but assuming each complete message represents a tweet, then what you can do is to first add a counter in the class:
self.tweets = 0
Then increment it in handle_tweet after a full message is received:
if data.endswith('\r\n') and self.buffer.strip():
# complete message received
message = json.loads(self.buffer)
...
...
else:
print 'Got tweet with text: %s' % message.get('text')
self.tweets += 1
if self.tweets == SOME_LIMIT:
self.conn.close() # or exit()
I'd like to write a small Bluetooth server application to my Nokia phone in PyS60. It needs to be able to send response to the client's request and be able to push data to the client as well.
option 1:
if I use socket.recv(1024), the program waits until something is received, therefore the server can't push data to the client. The Python for S60 implementation is missing the socket.settimeout() method, so I couldn't write a proper non-blocking code.
oprion 2:
The socket.makefile() approach was looking good, but couldn't make it work. When I replaced the conn.recv(1024) to fd = socket.makefile() fd.readline(), it didn't read a thing.
option 3:
Looked into the select() function, but had no luck with it. When I changed the conn.recv() to the r,w,e = select.select([conn],[],[]) like it's been suggested the client doesn't even connect. It hangs at "Waiting for the client...". Strange...
I know that there are pretty nice server implementations and asynchronous API-s as well, but I only need a really basic stuff here. Thanks in advance!
here's what I have:
sock = btsocket.socket(btsocket.AF_BT, btsocket.SOCK_STREAM)
channel = btsocket.bt_rfcomm_get_available_server_channel(sock)
sock.bind(("", channel))
sock.listen(1)
btsocket.bt_advertise_service(u"name", sock, True, btsocket.RFCOMM)
print "Waiting for the client..."
conn, client_mac = sock.accept()
print "connected: " + client_mac
while True:
try:
data = conn.recv(1024)
if len(data) != 0:
print "received [%s]" % data
if data.startswith("something"): conn.send("something\r\n")
else:
conn.send("some other data \r\n")
except:
pass
It's obviously blocking, so the "some other data" is never sent, but it's the best I've got so far. At least I can send something in reply to the client.
Found the solution finally!
The select function wasn't working with the btsocket module of the newer PyS60 ports.
Someone wrote a new_btsocket (available here) with a working select function.
Here is a simple example based on an echo server
#!/usr/bin/python
import socket
import select
server = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
server.bind( ('localhost', 12556) )
server.listen( 5 )
toread = [server]
running = 1
# we will shut down when all clients disconenct
while running:
rready,wready,err = select.select( toread, [], [] )
for s in rready:
if s == server:
# accepting the socket, which the OS passes off to another
# socket so we can go back to selecting. We'll append this
# new socket to the read list we select on next pass
client, address = server.accept()
toread.append( client ) # select on this socket next time
else:
# Not the server's socket, so we'll read
data = s.recv( 1024 )
if data:
print "Received %s" % ( data )
else:
print "Client disconnected"
s.close()
# remove socket so we don't watch an invalid
# descriptor, decrement client count
toread.remove( s )
running = len(toread) - 1
# clean up
server.close()
That said, I still find socketserver cleaner and easier. Implement handle_request and call serve_forever
Here's an Epoll Server Implementation (non-blocking)
http://pastebin.com/vP6KPTwH (same thing as below, felt this might be easier to copy)
use python epollserver.py to start the server.
Test it using wget localhost:8888
import sys
import socket, select
import fcntl
import email.parser
import StringIO
import datetime
"""
See:
http://docs.python.org/library/socket.html
"""
__author__ = ['Caleb Burns', 'Ben DeMott']
def main(argv=None):
EOL1 = '\n\n'
EOL2 = '\n\r\n'
response = 'HTTP/1.0 200 OK\r\nDate: Mon, 1 Jan 1996 01:01:01 GMT\r\n'
response += 'Content-Type: text/plain\r\nContent-Length: 13\r\n\r\n'
response += 'Hello, world!'
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Tell the server socket file descriptor to destroy itself when this program ends.
socketFlags = fcntl.fcntl(serversocket.fileno(), fcntl.F_GETFD)
socketFlags |= fcntl.FD_CLOEXEC
fcntl.fcntl(serversocket.fileno(), fcntl.F_SETFD, socketFlags)
serversocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
serversocket.bind(('0.0.0.0', 8888))
serversocket.listen(1)
# Use asynchronous sockets.
serversocket.setblocking(0)
# Allow a queue of up to 128 requests (connections).
serversocket.listen(128)
# Listen to socket events on the server socket defined by the above bind() call.
epoll = select.epoll()
epoll.register(serversocket.fileno(), select.EPOLLIN)
print "Epoll Server Started..."
try:
#The connection dictionary maps file descriptors (integers) to their corresponding network connection objects.
connections = {}
requests = {}
responses = {}
while True:
# Ask epoll if any sockets have events and wait up to 1 second if no events are present.
events = epoll.poll(1)
# fileno is a file desctiptor.
# event is the event code (type).
for fileno, event in events:
# Check for a read event on the socket because a new connection may be present.
if fileno == serversocket.fileno():
# connection is a new socket object.
# address is client IP address. The format of address depends on the address family of the socket (i.e., AF_INET).
connection, address = serversocket.accept()
# Set new socket-connection to non-blocking mode.
connection.setblocking(0)
# Listen for read events on the new socket-connection.
epoll.register(connection.fileno(), select.EPOLLIN)
connections[connection.fileno()] = connection
requests[connection.fileno()] = b''
responses[connection.fileno()] = response
# If a read event occured, then read the new data sent from the client.
elif event & select.EPOLLIN:
requests[fileno] += connections[fileno].recv(1024)
# Once we're done reading, stop listening for read events and start listening for EPOLLOUT events (this will tell us when we can start sending data back to the client).
if EOL1 in requests[fileno] or EOL2 in requests[fileno]:
epoll.modify(fileno, select.EPOLLOUT)
# Print request data to the console.
epoll.modify(fileno, select.EPOLLOUT)
data = requests[fileno]
eol = data.find("\r\n") #this is the end of the FIRST line
start_line = data[:eol] #get the contents of the first line (which is the protocol information)
# method is POST|GET, etc
method, uri, http_version = start_line.split(" ")
# re-used facebooks httputil library (works well to normalize and parse headers)
headers = HTTPHeaders.parse(data[eol:])
print "\nCLIENT: FD:%s %s: '%s' %s" % (fileno, method, uri, datetime.datetime.now())
# If the client is ready to receive data, sent it out response.
elif event & select.EPOLLOUT:
# Send response a single bit at a time until the complete response is sent.
# NOTE: This is where we are going to use sendfile().
byteswritten = connections[fileno].send(responses[fileno])
responses[fileno] = responses[fileno][byteswritten:]
if len(responses[fileno]) == 0:
# Tell the socket we are no longer interested in read/write events.
epoll.modify(fileno, 0)
# Tell the client we are done sending data and it can close the connection. (good form)
connections[fileno].shutdown(socket.SHUT_RDWR)
# EPOLLHUP (hang-up) events mean the client has disconnected so clean-up/close the socket.
elif event & select.EPOLLHUP:
epoll.unregister(fileno)
connections[fileno].close()
del connections[fileno]
finally:
# Close remaining open socket upon program completion.
epoll.unregister(serversocket.fileno())
epoll.close()
serversocket.close()
#!/usr/bin/env python
#
# Copyright 2009 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""HTTP utility code shared by clients and servers."""
class HTTPHeaders(dict):
"""A dictionary that maintains Http-Header-Case for all keys.
Supports multiple values per key via a pair of new methods,
add() and get_list(). The regular dictionary interface returns a single
value per key, with multiple values joined by a comma.
>>> h = HTTPHeaders({"content-type": "text/html"})
>>> h.keys()
['Content-Type']
>>> h["Content-Type"]
'text/html'
>>> h.add("Set-Cookie", "A=B")
>>> h.add("Set-Cookie", "C=D")
>>> h["set-cookie"]
'A=B,C=D'
>>> h.get_list("set-cookie")
['A=B', 'C=D']
>>> for (k,v) in sorted(h.get_all()):
... print '%s: %s' % (k,v)
...
Content-Type: text/html
Set-Cookie: A=B
Set-Cookie: C=D
"""
def __init__(self, *args, **kwargs):
# Don't pass args or kwargs to dict.__init__, as it will bypass
# our __setitem__
dict.__init__(self)
self._as_list = {}
self.update(*args, **kwargs)
# new public methods
def add(self, name, value):
"""Adds a new value for the given key."""
norm_name = HTTPHeaders._normalize_name(name)
if norm_name in self:
# bypass our override of __setitem__ since it modifies _as_list
dict.__setitem__(self, norm_name, self[norm_name] + ',' + value)
self._as_list[norm_name].append(value)
else:
self[norm_name] = value
def get_list(self, name):
"""Returns all values for the given header as a list."""
norm_name = HTTPHeaders._normalize_name(name)
return self._as_list.get(norm_name, [])
def get_all(self):
"""Returns an iterable of all (name, value) pairs.
If a header has multiple values, multiple pairs will be
returned with the same name.
"""
for name, list in self._as_list.iteritems():
for value in list:
yield (name, value)
def items(self):
return [{key: value[0]} for key, value in self._as_list.iteritems()]
def get_content_type(self):
return dict.get(self, HTTPHeaders._normalize_name('content-type'), None)
def parse_line(self, line):
"""Updates the dictionary with a single header line.
>>> h = HTTPHeaders()
>>> h.parse_line("Content-Type: text/html")
>>> h.get('content-type')
'text/html'
"""
name, value = line.split(":", 1)
self.add(name, value.strip())
#classmethod
def parse(cls, headers):
"""Returns a dictionary from HTTP header text.
>>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
>>> sorted(h.iteritems())
[('Content-Length', '42'), ('Content-Type', 'text/html')]
"""
h = cls()
for line in headers.splitlines():
if line:
h.parse_line(line)
return h
# dict implementation overrides
def __setitem__(self, name, value):
norm_name = HTTPHeaders._normalize_name(name)
dict.__setitem__(self, norm_name, value)
self._as_list[norm_name] = [value]
def __getitem__(self, name):
return dict.__getitem__(self, HTTPHeaders._normalize_name(name))
def __delitem__(self, name):
norm_name = HTTPHeaders._normalize_name(name)
dict.__delitem__(self, norm_name)
del self._as_list[norm_name]
def get(self, name, default=None):
return dict.get(self, HTTPHeaders._normalize_name(name), default)
def update(self, *args, **kwargs):
# dict.update bypasses our __setitem__
for k, v in dict(*args, **kwargs).iteritems():
self[k] = v
#staticmethod
def _normalize_name(name):
"""Converts a name to Http-Header-Case.
>>> HTTPHeaders._normalize_name("coNtent-TYPE")
'Content-Type'
"""
return "-".join([w.capitalize() for w in name.split("-")])
if(__name__ == '__main__'):
sys.exit(main(sys.argv))
I am a Python re-newbie. I would like advice on handling program parameters which are in a file in json format. Currently, I am doing something like what is shown below, however, it seems too wordy, and the idea of typing the same literal string multiple times (sometimes with dashes and sometimes with underscores) seems juvenile - error prone - stinky... :-) (I do have many more parameters!)
#!/usr/bin/env python
import sys
import os
import json ## for control file parsing
# control parameters
mpi_nodes = 1
cluster_size = None
initial_cutoff = None
# ...
#process the arguments
if len(sys.argv) != 2:
raise Exception(
"""Usage:
run_foo <controls.json>
Where:
<control.json> is a dictionary of run parameters
"""
)
# We expect a .json file with our parameters
controlsFileName = sys.argv[1]
err = ""
err += "" #validateFileArgument(controlsFileName, exists=True)
# read in the control parameters from the .json file
try:
controls = json.load(open(controlsFileName, "r"))
except:
err += "Could not process the file '" + controlsFileName + "'!\n"
# check each control parameter. The first one is optional
if "mpi-nodes" in controls:
mpi_nodes = controls["mpi-nodes"]
else:
mpi_nodes = controls["mpi-nodes"] = 1
if "cluster-size" in controls:
cluster_size = controls["cluster-size"]
else:
err += "Missing control definition for \"cluster-size\".\n"
if "initial-cutoff" in controls:
initial_cutoff = controls["initial-cutoff"]
else:
err += "Missing control definition for \"initial-cutoff\".\n"
# ...
# Quit if any of these things were not true
if len(err) > 0:
print err
exit()
#...
This works, but it seems like there must be a better way. I am stuck with the requirements to use a json file and to use the hyphenated parameter names. Any ideas?
I was looking for something with more static binding. Perhaps this is as good as it gets.
Usually, we do things like this.
def get_parameters( some_file_name ):
source= json.loads( some_file_name )
return dict(
mpi_nodes= source.get('mpi-nodes',1),
cluster_size= source['cluster-size'],
initial_cutoff = source['initial-cutoff'],
)
controlsFileName= sys.argv[1]
try:
params = get_parameters( controlsFileName )
except IOError:
print "Could not process the file '{0}'!".format( controlsFileName )
sys.exit( 1 )
except KeyError, e:
print "Missing control definition for '{0}'.".format( e.message )
sys.exit( 2 )
A the end params['mpi_nodes'] has the value of mpi_nodes
If you want a simple variable, you do this. mpi_nodes = params['mpi_nodes']
If you want a namedtuple, change get_parameters like this
def get_parameters( some_file_name ):
Parameters= namedtuple( 'Parameters', 'mpi_nodes, cluster_size, initial_cutoff' )
return Parameters( source.get('mpi-nodes',1),
source['cluster-size'],
source['initial-cutoff'],
)
I don't know if you'd find that better or not.
the argparse library is nice, it can handle most of the argument parsing and validation for you as well as printing pretty help screens
[1] http://docs.python.org/dev/library/argparse.html
I will knock up a quick demo showing how you'd want to use it this arvo.
Assuming you have many more parameters to process, something like this could work:
def underscore(s):
return s.replace('-','_')
# parameters with default values
for name, default in (("mpi-nodes", 1),):
globals()[underscore(name)] = controls.get(name, default)
# mandatory parameters
for name in ("cluster-size", "initial-cutoff"):
try:
globals()[underscore(name)] = controls[name]
except KeyError:
err += "Missing control definition for %r" % name
Instead of manipulating globals, you can also make this more explicit:
def underscore(s):
return s.replace('-','_')
settings = {}
# parameters with default values
for name, default in (("mpi-nodes", 1),):
settings[underscore(name)] = controls.get(name, default)
# mandatory parameters
for name in ("cluster-size", "initial-cutoff"):
try:
settings[underscore(name)] = controls[name]
except KeyError:
err += "Missing control definition for %r" % name
# print out err if necessary
mpi_nodes = settings['mpi_nodes']
cluster_size = settings['cluster_size']
initial_cutoff = settings['initial_cutoff']
I learned something from all of these responses - thanks! I would like to get feedback on my approach which incorporates something from each suggestion. In addition to the conditions imposed by the client, I want something:
1) that is fairly obvious to use and to debug
2) that is easy to maintain and modify
I decided to incorporate str.replace, namedtuple, and globals(), creating a ControlParameters namedtuple in the globals() namespace.
#!/usr/bin/env python
import sys
import os
import collections
import json
def get_parameters(parameters_file_name ):
"""
Access all of the control parameters from the json filename given. A
variable of type namedtuple named "ControlParameters" is injected
into the global namespace. Parameter validation is not performed. Both
the names and the defaults, if any, are defined herein. Parameters not
found in the json file will get values of None.
Parameter usage example: ControlParameters.cluster_size
"""
parameterValues = json.load(open(parameters_file_name, "r"))
Parameters = collections.namedtuple( 'Parameters',
"""
mpi_nodes
cluster_size
initial_cutoff
truncation_length
"""
)
parameters = Parameters(
parameterValues.get(Parameters._fields[0].replace('_', '-'), 1),
parameterValues.get(Parameters._fields[1].replace('_', '-')),
parameterValues.get(Parameters._fields[2].replace('_', '-')),
parameterValues.get(Parameters._fields[3].replace('_', '-'))
)
globals()["ControlParameters"] = parameters
#process the program argument(s)
err = ""
if len(sys.argv) != 2:
raise Exception(
"""Usage:
foo <control.json>
Where:
<control.json> is a dictionary of run parameters
"""
)
# We expect a .json file with our parameters
parameters_file_name = sys.argv[1]
err += "" #validateFileArgument(parameters_file_name, exists=True)
if err == "":
get_parameters(parameters_file_name)
cp_dict = ControlParameters._asdict()
for name in ControlParameters._fields:
if cp_dict[name] == None:
err += "Missing control parameter '%s'\r\n" % name
print err
print "Done"