The server I'm trying to logon and download a file from is using Basic Auth as I can confirm from Chrome Dev Tools and some tests. So I write code like below, bad example of OOP perhaps, but should make sense.
class Utils(object):
def __init__(self, username, password):
self.username = username
self.password = password
self.top_level_url = 'http://test.com/'
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, self.top_level_url, self.username, self.password)
basic_auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
opener = urllib2.build_opener(basi_auth_handler)
urllib2.install_opener(opener)
def download(self, filename):
url = self.top_level_url + filename
req = urllib2.Request(url)
try:
response = urllib2.urlopen(req)
return response
except urllib2.HTTPError as e:
print e.headers
raise
Strange things happen, when I initialize a Utils object and download the file repeatedly:
u = Utils('username', 'password')
index = 0
while 1:
resp = u.download('file.txt')
index += 1
time.sleep(1)
The scripts works for the first 5 times of download, but at the 6th time, it would raise HTTPError 401. But if I change the code, add the post header to include 'Authorization: Basic ***' instead of using HTTPBasicAuthHandler, it works every time... So is this something wrong with my code or the server part setup?
Related
I am working on a small project that gets the following of a given user's Instagram. I have this working flawlessly as a script using a function, however I plan to make this into an actual program so I decided to write a class. I believe I am using "self" correctly in all the right places, but I am failing to see why I am getting this name error. Here is my code:
# Library imports
import requests
import json
import time
# Class decleration
class NodesCursor:
# Class variables
# Login url
LOGIN_URL = 'https://www.instagram.com/accounts/login/ajax/'
# Referer url
REFERER_URL = 'https://www.instagram.com/accounts/login/'
# User agent
USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
# Class constructor
def __init__(self, USERNAME, PASSWD):
# Login username
self.USERNAME = USERNAME
# Login password
self.PASSWD = PASSWD
# Creating a session
self.session = requests.Session()
# Get request to login url
self.req = session.get(LOGIN_URL)
# Setting user agent for session header
self.session.headers = {'user-agent': USER_AGENT}
# Setting referer url for session header
self.session.headers.update({'Referer': REFERER_URL})
# Updating session header with x-csrftoken cookie
self.session.headers.update({'x-csrftoken': self.req.cookies['csrftoken']})
# Login data for website
self.login_data = {'username': self.USERNAME, 'password': self.PASSWD}
# Login with a post requests
self.login = session.post(LOGIN_URL, data=self.login_data, allow_redirects=True)
# Updating the session with x-csrftoken cookie
self.session.headers.update({'x-csrftoken': self.login.cookies['csrftoken']})
# Function to parse following
def parse(self):
# An array of usernames
usernames = []
# Variable to handle continuous scrolling
has_next_page = True
# Variable to handle continuous scrolling
end_cursor = None
# Loop to handle the scrolling to get the needed data
while has_next_page == True:
# Sleep for 30 seconds to not get rate limited
#time.sleep(30)
# Query url
queryUrl = "https://www.instagram.com/graphql/query/"
# Parameters for the get request
payload = {"query_hash":"9335e35a1b280f082a47b98c5aa10fa4", "id":"8164444379","first":24, "after": end_cursor}
# Variable for GETting all of the user's following
following = self.session.get(queryUrl, params=payload).json()
# Parsing the node to check to see what has_next_page equals to
has_next_page = following['data']['user']['edge_follow']['page_info']['has_next_page']
# Parse all user followings until there are no more
if has_next_page == True or has_next_page == False:
# Parsing end cursor id
end_cursor = following['data']['user']['edge_follow']['page_info']['end_cursor']
# Sleep for 30 seconds to not get rate limited
time.sleep(30)
# Parsing to get to username node
userList = following['data']['user']['edge_follow']
# Loop to interate through all of the names
for eachName in userList['edges']:
# Add each name to the array
usernames.append(eachName['node']['username'])
# Print the array of usernames, along with the length
print(usernames)
print(len(usernames))
if __name__ == '__main__':
checkFollowing = NodesCursor('username', 'password')
checkFollowing().parse()
Error:
Traceback (most recent call last):
File "test.py", line 115, in <module>
turboOne = NodesCursor('moola.ig', 'yeet1234')
File "test.py", line 42, in __init__
self.req = session.get(LOGIN_URL)
NameError: name 'session' is not defined
Though as I stated earlier that I think I am using "self" correctly, it is possible that is where my error is coming from but I'm unsure. Any help is greatly appreciated.
You’re missing the self. when accessing session:
# Creating a session
self.session = requests.Session()
# Get request to login url
self.req = self.session.get(LOGIN_URL)
To fix to error with LOGIN_URL:
self.req = self.session.get(NodesCursor.LOGIN_URL)
Try replacing
self.req = session.get(LOGIN_URL)
With
self.req = self.session.get(LOGIN_URL)
I'm using urllib.request in python to try and download some build information from Teamcity. This request used to work without username and password, however a recent security change means I must use a username and password. So I have changed tried each of the two solutions below:
Attempt 1)
url = 'http://<domain>/httpAuth/app/rest/buildTypes/<buildlabel>/builds/running:false?count=1&start=0'
# create a password manager
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
# Add the username and password.
top_level_url = "http://<domain>/httpAuth/app/rest/buildTypes/id:<buildlabel>/builds/running:false?count=1&start=0"
password_mgr.add_password(None, top_level_url, username, password)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener = urllib.request.build_opener(handler)
# use the opener to fetch a URL
opener.open(url)
Attempt 2
url = 'http://<username>:<password>#<domain>/httpAuth/app/rest/buildTypes/id:buildlabel/builds/running:false?count=1&start=0'
rest_api = urllib.request.urlopen(url)
Both of these return "HTTP Error 401: Unauthorized". However if I was to print 'url' and copy this output into a browser the link works perfectly. But when used through python I get the above error.
I use something very similar in another Perl script and this works perfectly also.
* SOLVED BELOW *
Solved this using.
credentials(url, username, password)
rest_api = urllib2.urlopen(url)
latest_build_info = rest_api.read()
latest_build_info = latest_build_info.decode("UTF-8")
# Then parse this xml for the information I want.
def credentials(self, url, username, password):
p = urllib2.HTTPPasswordMgrWithDefaultRealm()
p.add_password(None, url, username, password)
handler = urllib2.HTTPBasicAuthHandler(p)
opener = urllib2.build_opener(handler)
urllib2.install_opener(opener)
As a side note, I then want to download a file..
credentials(url, username, password)
urllib2.urlretrieve(url, downloaded_file)
Where Url is:
http://<teamcityServer>/repository/download/<build Label>/<BuildID>:id/Filename.zip
I have VMware setup for testing. I create one user abc/abc123 to access the Org url "http://localhost/cloud/org/MyOrg". I want to access the RestAPI of the VCloud. I tried with RestClient plugin in firefox. Its working fine.
Now I tried with python code.
url = 'https://localhost/api/sessions/'
req = urllib2.Request(url)
base64string = base64.encodestring('%s:%s' % ('abc#MyOrg', 'abc123'))[:-1]
authheader = "Basic %s" % base64string
req.add_header("Authorization", authheader)
req.add_header("Accept", 'application/*+xml;version=1.5')
f = urllib2.urlopen(req)
data = f.read()
print(data)
This is the code i get from stackoverflow. But for my example its give "urllib2.HTTPError: HTTP Error 403: Forbidden" Error.
I also tried HTTP authentication for the same.
After doing some googling I found the solution from the post https://stackoverflow.com/a/6348729/243031. I change the code for my usability. I am posting the answer because if some one has same error then he will get the answer directly.
My change code is:
import urllib2
import base64
# make a string with the request type in it:
method = "POST"
# create a handler. you can specify different handlers here (file uploads etc)
# but we go for the default
handler = urllib2.HTTPSHandler()
# create an openerdirector instance
opener = urllib2.build_opener(handler)
# build a request
url = 'https://localhost/api/sessions'
request = urllib2.Request(url)
# add any other information you want
base64string = base64.encodestring('%s:%s' % ('abc#MyOrg', 'abc123'))[:-1]
authheader = "Basic %s" % base64string
request.add_header("Authorization", authheader)
request.add_header("Accept",'application/*+xml;version=1.5')
# overload the get method function with a small anonymous function...
request.get_method = lambda: method
# try it; don't forget to catch the result
try:
connection = opener.open(request)
except urllib2.HTTPError,e:
connection = e
# check. Substitute with appropriate HTTP code.
if connection.code == 200:
data = connection.read()
print "Data :", data
else:
print "ERRROR", connection.code
Hope this will help some one who want to send POST request without the data.
I am fairly new to web programing but for the sake of it, I am trying to login to google account not using standard code but as a python application, but it is impossible to do so
has anyone tried to this before? can anyone help?
I made a python class that handle google login and the is able to get any google service page that requires the user to be logged in:
class SessionGoogle:
def __init__(self, url_login, url_auth, login, pwd):
self.ses = requests.session()
login_html = self.ses.get(url_login)
soup_login = BeautifulSoup(login_html.content).find('form').find_all('input')
my_dict = {}
for u in soup_login:
if u.has_attr('value'):
my_dict[u['name']] = u['value']
# override the inputs without login and pwd:
my_dict['Email'] = login
my_dict['Passwd'] = pwd
self.ses.post(url_auth, data=my_dict)
def get(self, URL):
return self.ses.get(URL).text
The idea is to go to the login page GALX hidden input value and send it back to google + login and password. It requires modules requests and beautifulSoup
Example of use:
url_login = "https://accounts.google.com/ServiceLogin"
url_auth = "https://accounts.google.com/ServiceLoginAuth"
session = SessionGoogle(url_login, url_auth, "myGoogleLogin", "myPassword")
print session.get("http://plus.google.com")
Hope this helps
Although probably not exactly what you were looking for here I found some code from a similar post that did run from me.
import urllib2
def get_unread_msgs(user, passwd):
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(
realm='New mail feed',
uri='https://mail.google.com',
user='%s#gmail.com' % user,
passwd=passwd
)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
feed = urllib2.urlopen('https://mail.google.com/mail/feed/atom')
return feed.read()
print get_unread_msgs("put-username-here","put-password-here")
reference:
How to auto log into gmail atom feed with Python?
2020 update for python 3:
import urllib.request
def unread_messages(user, passwd):
auth_handler = urllib.request.HTTPBasicAuthHandler()
auth_handler.add_password(
realm='New mail feed',
uri='https://mail.google.com',
user='%s#gmail.com' % user,
passwd=passwd
)
opener = urllib.request.build_opener(auth_handler)
urllib.request.install_opener(opener)
feed = urllib.request.urlopen('https://mail.google.com/mail/feed/atom')
return feed.read()
print(unread_messages('username', 'password'))
You can use urllib, urllib2 and cookielib libraries of python to login.
import urllib, urllib2, cookielib
def test_login():
username = '' # Gmail Address
password = '' # Gmail Password
cookie_jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
login_dict = urllib.urlencode({'username' : username, 'password' :password})
opener.open('https://accounts.google.com/ServiceLogin', login_dict)
response = opener.open('https://plus.google.com/explore')
print response.read()
if __name__ == '__main__':
test_login()
I need to write some python ftp code that uses a ftp proxy. The proxy doesn't require authentication but the ftp server I am connecting to does. I have the following code but I am getting a "I/O error(ftp error): 501 USER format: proxy-user:auth-method#destination. Closing connection." error. My code is:
import urllib2
proxies = {'ftp':'ftp://proxy_server:21'}
ftp_server = ' ftp.somecompany.com '
ftp_port='21'
username = 'aaaa'
password = 'secretPW'
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm( )
top_level_url = ftp_server
password_mgr.add_password(None , top_level_url, username, password)
proxy_support = urllib2.ProxyHandler(proxies )
handler = urllib2.HTTPBasicAuthHandler(password_mgr )
opener = urllib2.build_opener(proxy_support )
opener = urllib2.build_opener(handler )
a_url = 'ftp://' + ftp_server + ':' + ftp_port + '/'
print a_url
try:
data = opener.open(a_url )
print data
except IOError, (errno, strerror):
print "I/O error(%s): %s" % (errno, strerror)
I would be grateful for any assistance I can get.
I use the following code block which seems similar except i include the protocol in the top_level_url I use (and of course it's http).
You might also try calling install_opener after each build_opener call and then using urllib2.urlopen
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(realm='RESTRICTED ACCESS',
uri='http://website.com',
user='username',
passwd='password')
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
urllib2.urlopen('http://website.com/....')
I think you need to change this:
opener = urllib2.build_opener(proxy_support )
opener = urllib2.build_opener(handler )
to this:
opener = urllib2.build_opener([proxy_support, handler])
That gives you one opener that has both your authentication and your proxy support. You only need to use install_opener if you want the custom opener to be used whenever urllib2.urlopen is called.