Uploading to GCS from Twisted - python

I need to put files into Google's cloud storage from a twisted application.
I had been using Amazon and txAWS but now I'm using GCS I'm not sure if anything exists which will let me do this?
Is it possible to use txAWS with GCS? It sounds like an odd question but it's possible to use boto's S3Connection with GCS so maybe there's a way to do the same with txAWS?

I would suggest using the Twisted Web client with the GCS JSON API. Here's an example of listing the contents of a bucket:
import json
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.internet.protocol import Protocol
from twisted.web.client import Agent
from twisted.web.error import Error
from twisted.web.http_headers import Headers
GCS_BASE_URL = 'https://www.googleapis.com/storage/v1beta1'
GCS_API_KEY = '<your-api-key>'
GCS_BUCKET = '<your-bucket>'
class ResponseAccumulate(Protocol):
def __init__(self, finished):
self.finished = finished
self.fullbuffer = ''
def dataReceived(self, bytes):
print 'Received %d bytes.' % len(bytes)
self.fullbuffer += bytes
def connectionLost(self, reason):
if isinstance(reason, Error):
print 'Finished receiving body:', reason.getErrorMessage()
else:
parsed = json.loads(self.fullbuffer)
print 'Bucket contents:'
for item in parsed['items']:
print ' ', item['id']
self.finished.callback(None)
agent = Agent(reactor)
d = agent.request(
'GET',
'%s/b/%s/o?key=%s' % (GCS_BASE_URL, GCS_BUCKET, GCS_API_KEY),
Headers({'User-Agent': ['Twisted Web Client Example']}),
None)
def cbResponse(response):
print 'Response received', response.code
finished = Deferred()
response.deliverBody(ResponseAccumulate(finished))
return finished
d.addCallback(cbResponse)
def cbShutdown(ignored):
reactor.stop()
d.addBoth(cbShutdown)
reactor.run()

Related

Twisted HTTP Client access peer IP (server)

How can I access remote peer IP in this Twisted HTTP Client example? (From Twisted Docs)
Working with this example:
from sys import argv
from pprint import pformat
from twisted.internet.task import react
from twisted.web.client import Agent, readBody
from twisted.web.http_headers import Headers
def cbRequest(response):
#print 'Response version:', response.version
#print 'Response code:', response.code
#print 'Response phrase:', response.phrase
#print 'Response headers:'
#print pformat(list(response.headers.getAllRawHeaders()))
poweredby = response.headers.getRawHeaders("X-Powered-By")
server = response.headers.getRawHeaders("Server")
print poweredby
print server
d = readBody(response)
d.addCallback(cbBody)
return d
def cbBody(body):
print 'Response body:'
#print body
def main(reactor, url=b"http://www.example.com/"):
agent = Agent(reactor)
d = agent.request(
'GET', url,
Headers({'User-Agent': ['Twisted Web Client Example']}),
None)
d.addCallback(cbRequest)
return d
react(main, argv[1:])
After searching on the Internet and SO, I found that it can be read from:
self.xmlstream.transport.getHandle().getpeername()
or
self.transport.getPeer()
However I don't know which Class "self" refers to and where to put it in the example code?
Any help? Tips? Ideas?
Thanks,
It is possible to get the address, though you have to hack through some layers of abstraction and touch a private attribute:
from __future__ import print_function
from twisted.web.client import Agent
from twisted.internet.task import react
from twisted.internet.protocol import Protocol
from twisted.internet.defer import Deferred
class ReadAddress(Protocol):
def __init__(self):
self.result = Deferred()
def connectionMade(self):
self.result.callback(self.transport._producer.getPeer())
def readAddress(response):
p = ReadAddress()
response.deliverBody(p)
return p.result
#react
def main(reactor):
a = Agent(reactor)
d = a.request(b"GET", b"http://www.google.com/")
d.addCallback(readAddress)
d.addCallback(print)
return d
Ideally, there would be a simpler (public!) interface to retrieve information like this. It would be excellent if you could file a feature request in the Twisted tracker.

Writing a Twisted Client to send looping GET request to multiple API calls and record response

I haven't done twisted programming in a while so I'm trying to get back into it for a new project. I'm attempting to set up a twisted client that can take a list of servers as an argument, and for each server it sends an API GET call and writes the return message to a file. This API GET call should be repeated every 60 seconds.
I've done it successfully with a single server using Twisted's agent class:
from StringIO import StringIO
from twisted.internet import reactor
from twisted.internet.protocol import Protocol
from twisted.web.client import Agent
from twisted.web.http_headers import Headers
from twisted.internet.defer import Deferred
import datetime
from datetime import timedelta
import time
count = 1
filename = "test.csv"
class server_response(Protocol):
def __init__(self, finished):
print "init server response"
self.finished = finished
self.remaining = 1024 * 10
def dataReceived(self, bytes):
if self.remaining:
display = bytes[:self.remaining]
print 'Some data received:'
print display
with open(filename, "a") as myfile:
myfile.write(display)
self.remaining -= len(display)
def connectionLost(self, reason):
print 'Finished receiving body:', reason.getErrorMessage()
self.finished.callback(None)
def capture_response(response):
print "Capturing response"
finished = Deferred()
response.deliverBody(server_response(finished))
print "Done capturing:", finished
return finished
def responseFail(err):
print "error" + err
reactor.stop()
def cl(ignored):
print "sending req"
agent = Agent(reactor)
headers = {
'authorization': [<snipped>],
'cache-control': [<snipped>],
'postman-token': [<snipped>]
}
URL = <snipped>
print URL
a = agent.request(
'GET',
URL,
Headers(headers),
None)
a.addCallback(capture_response)
reactor.callLater(60, cl, None)
#a.addBoth(cbShutdown, count)
def cbShutdown(ignored, count):
print "reactor stop"
reactor.stop()
def parse_args():
usage = """usage: %prog [options] [hostname]:port ...
Run it like this:
python test.py hostname1:instanceName1 hostname2:instancename2 ...
"""
parser = optparse.OptionParser(usage)
_, addresses = parser.parse_args()
if not addresses:
print parser.format_help()
parser.exit()
def parse_address(addr):
if ':' not in addr:
hostName = '127.0.0.1'
instanceName = addr
else:
hostName, instanceName = addr.split(':', 1)
return hostName, instanceName
return map(parse_address, addresses)
if __name__ == '__main__':
d = Deferred()
d.addCallbacks(cl, responseFail)
reactor.callWhenRunning(d.callback, None)
reactor.run()
However I'm having a tough time figuring out how to have multiple agents sending calls. With this, I'm relying on the end of the write in cl() ---reactor.callLater(60, cl, None) to create the call loop. So how do I create multiple call agent protocols (server_response(Protocol)) and continue to loop through the GET for each of them once my reactor is started?
Look what the cat dragged in!
So how do I create multiple call agent
Use treq. You rarely want to get tangled up with the Agent class.
This API GET call should be repeated every 60 seconds
Use LoopingCalls instead of callLater, in this case it's easier and you'll run into less problems later.
import treq
from twisted.internet import task, reactor
filename = 'test.csv'
def writeToFile(content):
with open(filename, 'ab') as f:
f.write(content)
def everyMinute(*urls):
for url in urls:
d = treq.get(url)
d.addCallback(treq.content)
d.addCallback(writeToFile)
#----- Main -----#
sites = [
'https://www.google.com',
'https://www.amazon.com',
'https://www.facebook.com']
repeating = task.LoopingCall(everyMinute, *sites)
repeating.start(60)
reactor.run()
It starts in the everyMinute() function, which runs every 60 seconds. Within that function, each endpoint is queried and once the contents of the response becomes available, the treq.content function takes the response and returns the contents. Finally the contents are written to a file.
PS
Are you scraping or trying to extract something from those sites? If you are scrapy might be a good option for you.

Too long response (self.transport.write) is formed in Twisted

Im trying to programming a simplest client-server application with SSL, and i have some issues with this:
1) How i can decrease time for serialization at JSON?
2) Maybe something exists for better than LineReciver, for creating communication between server and client? Or i can increase length of received lines?
Source code:
a) ServerSLL
import server
from twisted.internet.protocol import Factory
from twisted.internet import reactor
from OpenSSL import SSL
class ServerSSL(object):
def getContext(self):
ctx = SSL.Context(SSL.SSLv23_METHOD)
ctx.use_certificate_file('server_cert.pem')
ctx.use_privatekey_file('server_key.pem')
return ctx
if __name__ == '__main__':
factory = Factory()
factory.protocol = server.Server
reactor.listenSSL(8000, factory, ServerSSL())
reactor.run()
b) Server
from json import dumps, loads
import sqlalchemy
from sqlalchemy.orm import sessionmaker
from db.create_db import Users
from twisted.internet.protocol import Protocol, Factory
from twisted.internet import reactor
engine = sqlalchemy.create_engine('postgresql://user:test#localhost/csan', pool_size=20, max_overflow=0)
class Server(Protocol):
def __init__(self):
self.Session = sessionmaker(bind=engine)
def __del__(self):
self.session.close()
def authorization(self, data):
"""
Checking user with DB
"""
session = self.Session()
result = session.execute(sqlalchemy.select([Users]).where(Users.name == data['user']))
result = result.fetchone()
if result is None:
data['error'] = 404
else:
if result['name'] == data['user']:
# correct users info --> real user
if result['password'] == data['pswd']:
data['auth'] = 1
# incorrect password --> fake user
else:
data['error'] = 403
session.close()
return data
def dataReceived(self, data):
"""
Processing request from user and send response
"""
new_data = loads(data)
if new_data['cmd'] == 'AUTH':
response = self.authorization(new_data)
self.transport.write(str(dumps(new_data)))
if __name__ == '__main__':
f = Factory()
f.protocol = Server
reactor.listenTCP(8000, f)
reactor.run()
c) client_console
from json import dumps, loads
from twisted.internet.protocol import ClientFactory
from twisted.protocols.basic import LineReceiver
from twisted.internet import ssl, reactor
class ServerClientSSL(LineReceiver):
"""
Basic client for talking with server under SSL
"""
def connectionMade(self):
"""
Send auth request to serverSLL.py
"""
login = raw_input('Login:')
password = raw_input('Password:')
hash_password = str(hash(password))
data = dumps({'cmd': 'AUTH', 'user': login, 'pswd': hash_password, 'auth': 0, 'error': 0})
self.sendLine(str(data))
def connectionLost(self, reason):
"""
Says to client, why we are close connection
"""
print 'connection lost (protocol)'
def lineReceived(self, data):
"""
Processing responses from serverSSL.py and send new requests to there
"""
new_data = loads(data)
if new_data['cmd'] == 'BBYE':
self.transport.loseConnection()
else:
print new_data
class ServerClientSLLFactory(ClientFactory):
protocol = ServerClientSSL
def clientConnectionFailed(self, connector, reason):
print 'connection failed:', reason.getErrorMessage()
reactor.stop()
def clientConnectionLost(self, connector, reason):
print 'connection lost:', reason.getErrorMessage()
reactor.stop()
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print 'Using: python client_console.py [IP] [PORT] '
else:
ip = sys.argv[1]
port = sys.argv[2]
factory = ServerClientSLLFactory()
reactor.connectSSL(ip, int(port), factory, ssl.ClientContextFactory())
reactor.run()
class ServerSSL(object):
...
Don't write your own context factory. Use twisted.internet.ssl.CertificateOptions instead. It has fewer problems than what you have here.
def __del__(self):
self.session.close()
First rule of __del__: don't use __del__. Adding this method doesn't give you automatic session cleanup. Instead, it almost certainly guarantees your session will never be be cleaned up. Protocols have a method that gets called when they're done - it's called connectionLost. Use that instead.
result = session.execute(sqlalchemy.select([Users]).where(Users.name == data['user']))
result = result.fetchone()
Twisted is a single-threaded multi-tasking system. These statements block on network I/O and database operations. While they're running your server isn't doing anything else.
Use twisted.enterprise.adbapi or twext.enterprise.adbapi2 or alchimiato perform database interactions asynchronously instead.
class ServerClientSSL(LineReceiver):
...
There are lots of protocols better than LineReceiver. The simplest improvement you can make is to switch to Int32StringReceiver. A more substantial improvement would be to switch to twisted.protocols.amp.
1) How i can decrease time for serialization at JSON?
Use a faster JSON library. After you fix the blocking database code in your application, though, I doubt you'll still need a faster JSON library.
2) Maybe something exists for better than LineReciver, for creating communication between server and client? Or i can increase length of received lines?
LineReceiver.MAX_LENGTH. After you switch to Int32StringReceiver or AMP you won't need this anymore though.

loop http client python , twisted

I have A simple Client which sends a request to server and receives a response :
from StringIO import StringIO
from twisted.internet import reactor
from twisted.internet.protocol import Protocol
from twisted.web.client import Agent
from twisted.web.http_headers import Headers
from twisted.internet.defer import Deferred
from twisted.web.client import FileBodyProducer
import log , time
class server_response(Protocol):
def __init__(self, finished):
self.finished = finished
self.remaining = 1024 * 10
def dataReceived(self, bytes):
if self.remaining:
reply = bytes[:self.remaining]
print "reply from server:" , reply
log.info(reply)
def connectionLost(self, reason):
#print 'Finished receiving body:', reason.getErrorMessage()
self.finished.callback(None)
def capture_response(response):
finished = Deferred()
response.deliverBody(server_response(finished))
return finished
def cl():
xml_str = "<xml>"
agent = Agent(reactor)
body = FileBodyProducer(StringIO(xml_str))
d = agent.request(
'PUT',
'http://localhost:8080/',
Headers({'User-Agent': ['Replication'],
'Content-Type': ['text/x-greeting']}),
body)
d.addCallback(capture_response)
def cbShutdown(ignored):
reactor.stop()
d.addBoth(cbShutdown)
reactor.run()
if __name__ == '__main__':
count = 1
while (count < 5) :
print count
cl()
time.sleep(2)
count = count + 1
here in main, i am trying to send the request to server by invoking cl() 5 times in a while loop . but i am receiving some error, what i am assuming is that i have not stopped the client hence reactor is not starting, how do i solve this problem
Unfortunately, the Twisted reactor cannot be restarted. Once you have done reactor.stop() you cannot do reactor.start() again.
Instead you need to do something like chaining the runs so that the callback for one run finishing will cause the next run to be started, or then schedule the runs with reactor.callLater().

Problem with Twisted python - sending binary data

What I'm trying to do is fairly simple: send a file from client to server. First, the client sends information about the file - the size of it that is. Then it sends the actual file.
This is what I've done so far:
Server.py
from twisted.internet import reactor, protocol
from twisted.protocols.basic import LineReceiver
import pickle
import sys
class Echo(LineReceiver):
def connectionMade(self):
self.factory.clients.append(self)
self.setRawMode()
def connectionLost(self, reason):
self.factory.clients.remove(self)
def lineReceived(self, data):
print "line", data
def rawDataReceived(self, data):
try:
obj = pickle.loads(data)
print obj
except:
print data
#self.transport.write("wa2")
def main():
"""This runs the protocol on port 8000"""
factory = protocol.ServerFactory()
factory.protocol = Echo
factory.clients = []
reactor.listenTCP(8000,factory)
reactor.run()
# this only runs if the module was *not* imported
if __name__ == '__main__':
main()
Client.py
import pickle
from twisted.internet import reactor, protocol
import time
import os.path
from twisted.protocols.basic import LineReceiver
class EchoClient(LineReceiver):
def connectionMade(self):
file = "some file that is a couple of megs"
filesize = os.path.getsize(file)
self.sendLine(pickle.dumps({"size":filesize}))
f = open(file, "rb")
contents = f.read()
print contents[:20]
self.sendLine(contents[:20])
f.close()
# self.sendLine("hej")
# self.sendLine("wa")
def connectionLost(self, reason):
print "connection lost"
class EchoFactory(protocol.ClientFactory):
protocol = EchoClient
def clientConnectionFailed(self, connector, reason):
print "Connection failed - goodbye!"
reactor.stop()
def clientConnectionLost(self, connector, reason):
print "Connection lost - goodbye!"
reactor.stop()
# this connects the protocol to a server runing on port 8000
def main():
f = EchoFactory()
reactor.connectTCP("localhost", 8000, f)
reactor.run()
# this only runs if the module was *not* imported
if __name__ == '__main__':
main()
The server will only output the deserialized object:
{'size': 183574528L}
How come? What happend to the 20 chars from the file I wanted to send?
If use the "hej" and "wa" sends instead, I will get them both (in the same message, not twice).
Somebody?
You've set your server to raw mode with setRawMode(), so the callback rawDataReceived is being called with the incoming data (not lineReceived). If you print the data you receive in rawDataReceived, you see everything including the file content, but as you call pickle to deserialize the data, it's being ignored.
Either you change the way you send data to the server (I would suggest the netstring format) or you pass the content inside the pickle serialized object, and do this in one call.
self.sendLine(pickle.dumps({"size":filesize, 'content': contents[:20]}))

Categories