I am new to bottle. I have searched for the answer but could not get any.
Is there anyway to load a page without redirect?
The code is like this:
from bottle import get, run, template
#get('/list/item')
def listItems():
r = requests.get('my/url/which/works/')
return r.content
if __name__ == '__main__':
run(host='localhost', port=8080 )
and the webpage is empty. I have also tried return r.text, return template(r.content), return str(r.content), return str(r.content.decode('utf-8')), and
nf = urllib.urlopen('my/url/whick/works')
return nf.read()
none of them returns the page I want.
However, if I write this return r.content[0], it works. the page will show the first letter, which is '<'. But if I write return r.content[0:100], it returns a empty page again.
If I run the requests in command line, this is what it returns:
>>> import requests
>>> r = requests.get('my/url/which/works/')
>>>
>>> r.content
'<?xml version="1.0" encoding="utf-8" ?> \n<body copyright="...>\n</body>\n'
Is that possible that anyone can help about this? Thank you very much.
Your question doesn't specify what you expect to see when your code works as expected, but this may help you:
from bottle import get, run, template, response
#get('/list/item')
def listItems():
r = requests.get('my/url/which/works/')
response.content_type = 'text/plain' # added this
return r.content
...
The only change is that your webserver will now be setting the content type of the response to text/plain, which should make your browser render the page.
Longer term, if (as I'm inferring from your question) you intend to return XML responses, then I suggest either installing a browser plugin (here's an example), or, better yet, use curl or wget to see the precise response. This will help you debug your server's responses.
I use google safe browsing API. So I tested this simple code:
from safebrowsinglookup import SafebrowsinglookupClient
class TestMe:
def testMe(self):
self.key='my_Valid_Key_Here'
self.client=SafebrowsinglookupClient(self.key)
self.response=self.client.lookup('http://www.google.com')
print(self.response)
if __name__=="__main__":
TM=TestMe()
TM.testMe()
I always get this whatever the website I test:
{'website_I_tried','error'}
Note that I had to change some lines in the source code after I installed this API because it was written in Python 2 and I am using Python 3.4.1. How can I resolve this problem?
Update:
To understand why the above problem occured to me, I run this code:
from safebrowsinglookup import SafebrowsinglookupClient
class TestMe:
def testMe(self):
self.key = 'my_key_here'
self.client=SafebrowsinglookupClient(self.key,debug=1)
urls = ['http://www.google.com/','http://addonrock.ru/Debugger.js/']
self.results = self.client.lookup(*urls)
print(self.results['http://www.google.com/'])
if __name__ == "__main__":
TM=TestMe()
TM.testMe()
Now, I got this message:
BODY:
2
http://www.google.com/
http://addonrock.ru/Debugger.js/
URL: https://sb-ssl.google.com/safebrowsing/api/lookup?client=python&apikey=ABQIAAAAAU6Oj8JFgQpt0AXtnVwBYxQYl9AeQCxMD6irIIDtWpux_GHGQQ&appver=0.1&pver=3.0
Unexpected server response
name 'urllib2' is not defined
error
error
The library doesn't support Python3.x.
In this case, you can either make it support Python3 (there is also an opened pull request for Python3 Compatibility), or make the request to "Google Safebrowsing API" manually.
Here's an example using requests:
import requests
key = 'your key here'
URL = "https://sb-ssl.google.com/safebrowsing/api/lookup?client=api&apikey={key}&appver=1.0&pver=3.0&url={url}"
def is_safe(key, url):
response = requests.get(URL.format(key=key, url=url))
return response.text != 'malware'
print(is_safe(key, 'http://addonrock.ru/Debugger.js/')) # prints False
print(is_safe(key, 'http://google.com')) # prints True
Just the same, but without third-party packages (using urllib.request):
from urllib.request import urlopen
key = 'your key here'
URL = "https://sb-ssl.google.com/safebrowsing/api/lookup?client=python&apikey={key}&appver=1.0&pver=3.0&url={url}"
def is_safe(key, url):
response = urlopen(URL.format(key=key, url=url)).read().decode("utf8")
return response != 'malware'
print(is_safe(key, 'http://addonrock.ru/Debugger.js/')) # prints False
print(is_safe(key, 'http://google.com')) # prints True
I know I can use the shortcuts module to make it easier but just to see if I could do it manually I tried to create and return a response object myself but could not get it to work:
import urllib2
def djangoview(request):
data = '<byte string>'
open('body.txt', 'wb').write(data)
headers = {'Content-Type' : 'something', 'Accept' : 'somethingelse'}
newresponse = urllib2.Request('file:body.txt', None, headers)
return HttpResponse(newresponse)
I don't understand what you are trying to do. It's the contract of a view that it returns an instance of django.http.HttpResponse - you are simply not allowed to return anything else. Doing so is not a shortcut, it's a necessity.
friends.
I'm trying to rewrite one of my little tools. basically, it gets an input from user, and if that input doesn't contain the "base url" a function will construct that input into a valid url for other part of the program to work on.
if I were wrote it so the program only accepts valid url as input, it will work; however if I pass a string and construct it, urllib2.urlopen() will fail, and I have no idea why, as the value return is exactly the same str value...
import urllib2
import re
class XunLeiKuaiChuan:
kuaichuanBaseAddress = 'http://kuaichuan.xunlei.com/d/'
regexQuery = 'file_name=\"(.*?)\"\sfile_url=\"(.*?)\sfile_size=\"(.*?)\"'
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)'
def buildLink(self, aLink):
if aLink == '':
return
if 'xunlei.com' not in aLink:
aLink = self.kuaichuanBaseAddress + aLink
return aLink
def decodeLink(self, url):
url = self.buildLink(url) #it will return correct url with the value provided.
print 'in decodeLink ' + url
urlReq = urllib2.Request(url)
urlReq.add_header('User-agent', self.agent)
pageContent = urllib2.urlopen(urlReq).read()
realLinks = re.findall(self.regexQuery, pageContent)
return realLinks
test = XunLeiKuaiChuan()
link='y7L1AwKuOwDeCClS528'
link2 = 'http://kuai.xunlei.com/d/y7L1AwKuOwDeCClS528'
s = test.decodeLink(link2)
print s
when I call it with link2 it will function as expected. and will fail when use 'link' someone tell me what I miss here? my "old version" work with only accept full url, but this unknown behavior is killing me here......Thank you.
btw if with full url it returns an empty list, just open the url and enter the catcha on the page. they do it to prevent some kind of 'attacks'....
never mind I got hostname in the code wrong.
Is there an easy way to cache things when using urllib2 that I am over-looking, or do I have to roll my own?
If you don't mind working at a slightly lower level, httplib2 (https://github.com/httplib2/httplib2) is an excellent HTTP library that includes caching functionality.
You could use a decorator function such as:
class cache(object):
def __init__(self, fun):
self.fun = fun
self.cache = {}
def __call__(self, *args, **kwargs):
key = str(args) + str(kwargs)
try:
return self.cache[key]
except KeyError:
self.cache[key] = rval = self.fun(*args, **kwargs)
return rval
except TypeError: # incase key isn't a valid key - don't cache
return self.fun(*args, **kwargs)
and define a function along the lines of:
#cache
def get_url_src(url):
return urllib.urlopen(url).read()
This is assuming you're not paying attention to HTTP Cache Controls, but just want to cache the page for the duration of the application
This ActiveState Python recipe might be helpful:
http://code.activestate.com/recipes/491261/
I've always been torn between using httplib2, which does a solid job of handling HTTP caching and authentication, and urllib2, which is in the stdlib, has an extensible interface, and supports HTTP Proxy servers.
The ActiveState recipe starts to add caching support to urllib2, but only in a very primitive fashion. It fails to allow for extensibility in storage mechanisms, hard-coding the file-system-backed storage. It also does not honor HTTP cache headers.
In an attempt to bring together the best features of httplib2 caching and urllib2 extensibility, I've adapted the ActiveState recipe to implement most of the same caching functionality as is found in httplib2. The module is in jaraco.net as jaraco.net.http.caching. The link points to the module as it exists at the time of this writing. While that module is currently part of the larger jaraco.net package, it has no intra-package dependencies, so feel free to pull the module out and use it in your own projects.
Alternatively, if you have Python 2.6 or later, you can easy_install jaraco.net>=1.3 and then utilize the CachingHandler with something like the code in caching.quick_test().
"""Quick test/example of CacheHandler"""
import logging
import urllib2
from httplib2 import FileCache
from jaraco.net.http.caching import CacheHandler
logging.basicConfig(level=logging.DEBUG)
store = FileCache(".cache")
opener = urllib2.build_opener(CacheHandler(store))
urllib2.install_opener(opener)
response = opener.open("http://www.google.com/")
print response.headers
print "Response:", response.read()[:100], '...\n'
response.reload(store)
print response.headers
print "After reload:", response.read()[:100], '...\n'
Note that jaraco.util.http.caching does not provide a specification for the backing store for the cache, but instead follows the interface used by httplib2. For this reason, the httplib2.FileCache can be used directly with urllib2 and the CacheHandler. Also, other backing caches designed for httplib2 should be usable by the CacheHandler.
I was looking for something similar, and came across "Recipe 491261: Caching and throttling for urllib2" which danivo posted. The problem is I really dislike the caching code (lots of duplication, lots of manually joining of file paths instead of using os.path.join, uses staticmethods, non very PEP8'sih, and other things that I try to avoid)
The code is a bit nicer (in my opinion anyway) and is functionally much the same, with a few additions - mainly the "recache" method (example usage can be seem here, or in the if __name__ == "__main__": section at the end of the code).
The latest version can be found at http://github.com/dbr/tvdb_api/blob/master/cache.py, and I'll paste it here for posterity (with my application specific headers removed):
#!/usr/bin/env python
"""
urllib2 caching handler
Modified from http://code.activestate.com/recipes/491261/ by dbr
"""
import os
import time
import httplib
import urllib2
import StringIO
from hashlib import md5
def calculate_cache_path(cache_location, url):
"""Checks if [cache_location]/[hash_of_url].headers and .body exist
"""
thumb = md5(url).hexdigest()
header = os.path.join(cache_location, thumb + ".headers")
body = os.path.join(cache_location, thumb + ".body")
return header, body
def check_cache_time(path, max_age):
"""Checks if a file has been created/modified in the [last max_age] seconds.
False means the file is too old (or doesn't exist), True means it is
up-to-date and valid"""
if not os.path.isfile(path):
return False
cache_modified_time = os.stat(path).st_mtime
time_now = time.time()
if cache_modified_time < time_now - max_age:
# Cache is old
return False
else:
return True
def exists_in_cache(cache_location, url, max_age):
"""Returns if header AND body cache file exist (and are up-to-date)"""
hpath, bpath = calculate_cache_path(cache_location, url)
if os.path.exists(hpath) and os.path.exists(bpath):
return(
check_cache_time(hpath, max_age)
and check_cache_time(bpath, max_age)
)
else:
# File does not exist
return False
def store_in_cache(cache_location, url, response):
"""Tries to store response in cache."""
hpath, bpath = calculate_cache_path(cache_location, url)
try:
outf = open(hpath, "w")
headers = str(response.info())
outf.write(headers)
outf.close()
outf = open(bpath, "w")
outf.write(response.read())
outf.close()
except IOError:
return True
else:
return False
class CacheHandler(urllib2.BaseHandler):
"""Stores responses in a persistant on-disk cache.
If a subsequent GET request is made for the same URL, the stored
response is returned, saving time, resources and bandwidth
"""
def __init__(self, cache_location, max_age = 21600):
"""The location of the cache directory"""
self.max_age = max_age
self.cache_location = cache_location
if not os.path.exists(self.cache_location):
os.mkdir(self.cache_location)
def default_open(self, request):
"""Handles GET requests, if the response is cached it returns it
"""
if request.get_method() is not "GET":
return None # let the next handler try to handle the request
if exists_in_cache(
self.cache_location, request.get_full_url(), self.max_age
):
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header = True
)
else:
return None
def http_response(self, request, response):
"""Gets a HTTP response, if it was a GET request and the status code
starts with 2 (200 OK etc) it caches it and returns a CachedResponse
"""
if (request.get_method() == "GET"
and str(response.code).startswith("2")
):
if 'x-local-cache' not in response.info():
# Response is not cached
set_cache_header = store_in_cache(
self.cache_location,
request.get_full_url(),
response
)
else:
set_cache_header = True
#end if x-cache in response
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header = set_cache_header
)
else:
return response
class CachedResponse(StringIO.StringIO):
"""An urllib2.response-like object for cached responses.
To determine if a response is cached or coming directly from
the network, check the x-local-cache header rather than the object type.
"""
def __init__(self, cache_location, url, set_cache_header=True):
self.cache_location = cache_location
hpath, bpath = calculate_cache_path(cache_location, url)
StringIO.StringIO.__init__(self, file(bpath).read())
self.url = url
self.code = 200
self.msg = "OK"
headerbuf = file(hpath).read()
if set_cache_header:
headerbuf += "x-local-cache: %s\r\n" % (bpath)
self.headers = httplib.HTTPMessage(StringIO.StringIO(headerbuf))
def info(self):
"""Returns headers
"""
return self.headers
def geturl(self):
"""Returns original URL
"""
return self.url
def recache(self):
new_request = urllib2.urlopen(self.url)
set_cache_header = store_in_cache(
self.cache_location,
new_request.url,
new_request
)
CachedResponse.__init__(self, self.cache_location, self.url, True)
if __name__ == "__main__":
def main():
"""Quick test/example of CacheHandler"""
opener = urllib2.build_opener(CacheHandler("/tmp/"))
response = opener.open("http://google.com")
print response.headers
print "Response:", response.read()
response.recache()
print response.headers
print "After recache:", response.read()
main()
This article on Yahoo Developer Network - http://developer.yahoo.com/python/python-caching.html - describes how to cache http calls made through urllib to either memory or disk.
#dbr: you may need to add also https responses caching with :
def https_response(self, request, response):
return self.http_response(request,response)