I have the following code:
protocol = "http"
if self.protocol == PROTOCOL_HTTPS:
protocol = "https"
if self.session is None:
self.session = Session()
self.session.get(protocol+'://'+self.ip)
url = protocol+"://"+self.ip+requestURL
response = None
if requestType == GET_METHOD:
response = self.session.get(url, headers=(header),stream=False)
elif requestType == POST_METHOD:
response = self.session.post(url, payload, headers=(header), stream=False)
This code works but it opens too many connections to the device. I want only one connection to be opened in the session. I tried the following code but it doesn't seem to work. It is still creating more than 1 connection.
adapter = requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1)
self.session.mount('http://', adapter)
url = protocol+"://"+self.ip+requestURL
resp = self.session.get(url)
What am I doing wrong?
How do I ensure that only one connection is opened in the session?
You don't need to use an HTTPAdapter for one connection.
May just try
>>> import requests
>>> s = requests.Session()
>>> s.get('http://httpbin.org/get')
from http://docs.python-requests.org/en/latest/api/#requests.Session
Related
I'd like to check the new posts every set time (using apscheduler) on the site where needs to be logged in and receive messages from telegram bot.
import requests
from bs4 import BeautifulSoup
import os
import telegram
import sys
from apscheduler.schedulers.blocking import BlockingScheduler
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def scraping():
headers = {'User-Agent':'Mozilla/5.0'}
LOGIN_URL = 'Login page url'
LOGIN_DATA = {
"user_id":"id",
"password":"pw",
"keep_signed":"Y"
}
with requests.Session() as s:
login_req = s.post(LOGIN_URL, data=LOGIN_DATA, headers=headers)
url = "address"
req = s.get(url, headers=headers)
html = req.text
soup = BeautifulSoup(html, 'html.parser')
title = soup.select('#css values')
latest_title = title[0].text
token = "certain value"
bot = telegram.Bot(token=token)
chat_id = 'id'
with open(os.path.join(BASE_DIR, 'latest.txt'), 'r+') as f_read:
before = f_read.readline()
if before != latest_title:
bot.sendMessage(chat_id=chat_id, text= latest_title)
f_read.close()
with open(os.path.join(BASE_DIR, 'latest.txt'), 'w+') as f_write:
f_write.write(latest_title)
f_write.close()
scheduler = BlockingScheduler()
scheduler.add_job(scraping, 'interval', seconds=30)
scheduler.start()
With this code, the login process is also included in every interval and it's inefficient.
How can I check the posts repeatedly but keep the session alive with only one login?
I've had a similar issue before, and solved it by storing the session as a pickled object in redis.
When you try to login, get the pickled session, unpickle, and then try to use it. If it is no longer a valid session (for example, they time out your login session on the api), then create a new session.
something along these lines might work:
import pickle
import redis
redis_client = redis.Redis(host='localhost', port=6379, db=0)
conn = None
def connect(self):
if conn is None:
conn = # your login code here
redis_client.set(
"connection", pickle.dumps(# your session here)
)
connection = redis_client.get("connection")
conn = pickle.loads(connection) if connection else None
connect()
# make connection is not already connected.
timeout = time.time() + 60 * 3 # 3 mins from now
while True:
try:
connected = # code to check if you are connected.. for example get a url.
if not connected:
raise AssertionError()
break
except (AssertionError, ConnectionResetError) as e:
if time.time() <= timeout:
time.sleep(30) # wait 30 sec before retrying
# recreate login
connect()
continue
elif time.time() > timeout:
raise ValueError("Connection failed after timeout.")
else:
raise e
I am making HTTP requests using the requests library in python, but I need the IP address from the server that responded to the HTTP request and I'm trying to avoid making two calls (and possibly having a different IP address from the one that responded to the request).
Is that possible? Does any python HTTP library allow me to do that?
PS: I also need to make HTTPS requests and use an authenticated proxy.
Update 1:
Example:
import requests
proxies = {
"http": "http://user:password#10.10.1.10:3128",
"https": "http://user:password#10.10.1.10:1080",
}
response = requests.get("http://example.org", proxies=proxies)
response.ip # This doesn't exist, this is just an what I would like to do
Then, I would like to know to which IP address requests are connected from a method or property in the response. In other libraries, I was able to do that by finding the sock object and using the getpeername() function.
It turns out that it's rather involved.
Here's a monkey-patch while using requests version 1.2.3:
Wrapping the _make_request method on HTTPConnectionPool to store the response from socket.getpeername() on the HTTPResponse instance.
For me on python 2.7.3, this instance was available on response.raw._original_response.
from requests.packages.urllib3.connectionpool import HTTPConnectionPool
def _make_request(self,conn,method,url,**kwargs):
response = self._old_make_request(conn,method,url,**kwargs)
sock = getattr(conn,'sock',False)
if sock:
setattr(response,'peer',sock.getpeername())
else:
setattr(response,'peer',None)
return response
HTTPConnectionPool._old_make_request = HTTPConnectionPool._make_request
HTTPConnectionPool._make_request = _make_request
import requests
r = requests.get('http://www.google.com')
print r.raw._original_response.peer
Yields:
('2a00:1450:4009:809::1017', 80, 0, 0)
Ah, if there's a proxy involved or the response is chunked, the HTTPConnectionPool._make_request isn't called.
So here's a new version patching httplib.getresponse instead:
import httplib
def getresponse(self,*args,**kwargs):
response = self._old_getresponse(*args,**kwargs)
if self.sock:
response.peer = self.sock.getpeername()
else:
response.peer = None
return response
httplib.HTTPConnection._old_getresponse = httplib.HTTPConnection.getresponse
httplib.HTTPConnection.getresponse = getresponse
import requests
def check_peer(resp):
orig_resp = resp.raw._original_response
if hasattr(orig_resp,'peer'):
return getattr(orig_resp,'peer')
Running:
>>> r1 = requests.get('http://www.google.com')
>>> check_peer(r1)
('2a00:1450:4009:808::101f', 80, 0, 0)
>>> r2 = requests.get('https://www.google.com')
>>> check_peer(r2)
('2a00:1450:4009:808::101f', 443, 0, 0)
>>> r3 = requests.get('http://wheezyweb.readthedocs.org/en/latest/tutorial.html#what-you-ll-build')
>>> check_peer(r3)
('162.209.99.68', 80)
Also checked running with proxies set; proxy address is returned.
Update 2016/01/19
est offers an alternative that doesn't need the monkey-patch:
rsp = requests.get('http://google.com', stream=True)
# grab the IP while you can, before you consume the body!!!!!!!!
print rsp.raw._fp.fp._sock.getpeername()
# consume the body, which calls the read(), after that fileno is no longer available.
print rsp.content
Update 2016/05/19
From the comments, copying here for visibility, Richard Kenneth Niescior offers the following that is confirmed working with requests 2.10.0 and Python 3.
rsp=requests.get(..., stream=True)
rsp.raw._connection.sock.getpeername()
Update 2019/02/22
Python3 with requests version 2.19.1.
resp=requests.get(..., stream=True)
resp.raw._connection.sock.socket.getsockname()
Update 2020/01/31
Python3.8 with requests 2.22.0
resp = requests.get('https://www.google.com', stream=True)
resp.raw._connection.sock.getsockname()
Try:
import requests
proxies = {
"http": "http://user:password#10.10.1.10:3128",
"https": "http://user:password#10.10.1.10:1080",
}
response = requests.get('http://jsonip.com', proxies=proxies)
ip = response.json()['ip']
print('Your public IP is:', ip)
How can I set proxy for the last urllib in Python 3.
I am doing the next
from urllib import request as urlrequest
ask = urlrequest.Request(url) # note that here Request has R not r as prev versions
open = urlrequest.urlopen(req)
open.read()
I tried adding proxy as follows :
ask=urlrequest.Request.set_proxy(ask,proxies,'http')
However I don't know how correct it is since I am getting the next error:
336 def set_proxy(self, host, type):
--> 337 if self.type == 'https' and not self._tunnel_host:
338 self._tunnel_host = self.host
339 else:
AttributeError: 'NoneType' object has no attribute 'type'
You should be calling set_proxy() on an instance of class Request, not on the class itself:
from urllib import request as urlrequest
proxy_host = 'localhost:1234' # host and port of your proxy
url = 'http://www.httpbin.org/ip'
req = urlrequest.Request(url)
req.set_proxy(proxy_host, 'http')
response = urlrequest.urlopen(req)
print(response.read().decode('utf8'))
I needed to disable the proxy in our company environment, because I wanted to access a server on localhost. I could not disable the proxy server with the approach from #mhawke (tried to pass {}, None and [] as proxies).
This worked for me (can also be used for setting a specific proxy, see comment in code).
import urllib.request as request
# disable proxy by passing an empty
proxy_handler = request.ProxyHandler({})
# alertnatively you could set a proxy for http with
# proxy_handler = request.ProxyHandler({'http': 'http://www.example.com:3128/'})
opener = request.build_opener(proxy_handler)
url = 'http://www.example.org'
# open the website with the opener
req = opener.open(url)
data = req.read().decode('utf8')
print(data)
Urllib will automatically detect proxies set up in the environment - so one can just set the HTTP_PROXY variable either in your environment e.g. for Bash:
export HTTP_PROXY=http://proxy_url:proxy_port
or using Python e.g.
import os
os.environ['HTTP_PROXY'] = 'http://proxy_url:proxy_port'
Note from the urllib docs: "HTTP_PROXY[environment variable] will be ignored if a variable REQUEST_METHOD is set; see the documentation on getproxies()"
import urllib.request
def set_http_proxy(proxy):
if proxy == None: # Use system default setting
proxy_support = urllib.request.ProxyHandler()
elif proxy == '': # Don't use any proxy
proxy_support = urllib.request.ProxyHandler({})
else: # Use proxy
proxy_support = urllib.request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
proxy = 'user:pass#ip:port'
set_http_proxy(proxy)
url = 'https://www.httpbin.org/ip'
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
html = response.read()
html
I'm little confused about requests module, especially proxies.
From documentation:
PROXIES
Dictionary mapping protocol to the URL of the proxy (e.g. {‘http’:
‘foo.bar:3128’}) to be used on each Request.
May there be more proxies of one type in the dictionary? I mean is it possible to put there list of proxies and requests module will try them and use only those which are working?
Or there can be only one proxy address for example for http?
Using the proxies parameter is limited by the very nature of a python dictionary (i.e. each key must be unique).
import requests
url = 'http://google.com'
proxies = {'https': '84.22.41.1:3128',
'http': '185.26.183.14:80',
'http': '178.33.230.114:3128'}
if __name__ == '__main__':
print url
print proxies
response = requests.get(url, proxies=proxies)
if response.status_code == 200:
print response.text
else:
print 'Response ERROR', response.status_code
outputs
http://google.com
{'http': '178.33.230.114:3128', 'https': '84.22.41.1:3128'}
<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en"><head><meta content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for."
...more html...
As you can see, the value of the http protocol key in the proxies dictionary corresponds to the last encountered in its assignment (i.e. 178.33.230.114:3128). Try swapping the http entries around.
So, the answer is no, you cannot specify multiple proxies for the same protocol using a simple dictionary.
I have tried using an iterable as a value, which would make sense to me
proxies = {'https': '84.22.41.1:3128',
'http': ('178.33.230.114:3128', '185.26.183.14:80', )}
but with no luck, it produces an error
Well, actually you can, I've done this with a few lines of code and it works pretty well.
import requests
class Client:
def __init__(self):
self._session = requests.Session()
self.proxies = None
def set_proxy_pool(self, proxies, auth=None, https=True):
"""Randomly choose a proxy for every GET/POST request
:param proxies: list of proxies, like ["ip1:port1", "ip2:port2"]
:param auth: if proxy needs auth
:param https: default is True, pass False if you don't need https proxy
"""
from random import choice
if https:
self.proxies = [{'http': p, 'https': p} for p in proxies]
else:
self.proxies = [{'http': p} for p in proxies]
def get_with_random_proxy(url, **kwargs):
proxy = choice(self.proxies)
kwargs['proxies'] = proxy
if auth:
kwargs['auth'] = auth
return self._session.original_get(url, **kwargs)
def post_with_random_proxy(url, *args, **kwargs):
proxy = choice(self.proxies)
kwargs['proxies'] = proxy
if auth:
kwargs['auth'] = auth
return self._session.original_post(url, *args, **kwargs)
self._session.original_get = self._session.get
self._session.get = get_with_random_proxy
self._session.original_post = self._session.post
self._session.post = post_with_random_proxy
def remove_proxy_pool(self):
self.proxies = None
self._session.get = self._session.original_get
self._session.post = self._session.original_post
del self._session.original_get
del self._session.original_post
# You can define whatever operations using self._session
I use it like this:
client = Client()
client.set_proxy_pool(['112.25.41.136', '180.97.29.57'])
It's simple, but actually works for me.
I'm looking for help. My django server has instant messaging function achieved by django-socketio. If I run the server by cmd 'runserver_socketio' then there is no problems.
But now I want to run server by 'runfcgi' but that will make my socketio no working. So I want the socketio server handles the request which is conveyed by fcgi server. Can it work?
Following is my code:
def push_msg(msg):
params = urllib.urlencode({"msg":str(msg)})
'''headers = {"Content-type":"text/html;charset=utf8"}
conn = httplib.HTTPConnection("http://127.0.0.1:8000")
print conn
conn.request("POST", "/push_msg/", data=params, headers=headers)
response = conn.getresponse()
print response'''
h = httplib2.http()
print h
resp, content = h.request("http://127.0.0.1:8000/push_msg/", method="POST", body=params)
url(r'^push_msg/$', 'chat.events.on_message')
chat.events.on_message:
def on_message(request):
msg = request.POST.get('msg')
msg = eval(msg)
try:
print 'handle messages'
from_id = int(msg['from_id'])
to_id = int(msg['to_id'])
user_to = UserProfile.objects.get(id = msg['to_id'])
django_socketio.broadcast_channel(msg, user_to.channel)
if msg.get('type', '') == 'chat':
ct = Chat.objects.send_msg(from_id=from_id,to_id=to_id,content=data['content'],type=1)
ct.read = 1
ct.save()
except:
pass
return HttpResponse("success")
I have tried many times, but it can't work, why?
1) Of course Django can make request to another server
I have not much idea about django-socketio
and one more suggestion why you are using httplib you can use other advance version like httplib2 or requests apart from that Django-Piston is dedicated for REST request you can also try with that