Is there a way to detect url changes with Python

Is there a way to detect url changes with Python - python

I made a site phishing searcher using Python. Here is the code that i use
output = []
for i in range (100):
for subdomain_count in [1, 2, 3, 4]:
webtypo = random.choice(typo) + '.odoo.com'
http = random.choice(HTTP)
data = random.sample(web, k=subdomain_count) + [webtypo]
delims = (random.choices(delimiters, k=subdomain_count)
address = ''.join([a+b for a, b in zip(data, delims)])
weburl = http + address
output.append(weburl)
exist=[]
for c in output:
try:
request = requests.get(c)
if request.status_code == 200:
exist.append(c)
print('Exist')
elif request.status_code == 204:
print('user does not exist')
except:
print('Not Exist')
When i check the Request URL, the link changes to https://www.odoo.com/typo?domain=minecraftnet.odoo.com&autodbname=edcandroidbni123&hosting=eu142a, is there a way to detect in odoo if a link of a website changes it would print out web does not exist, but if there's a site that uses the odoo.com tld it would print out exist.

Yes, you can use the response.url parameter to get the final URL after any redirects.
response = requests.get(c)
final_url = response.url
Note this only handles 3xx redirects, not javascript redirects. Requests will never execute javascript.

Related

Urllib2 grab web-page element then reverse it

I need to visit http://www.chiquitooenterprise.com/ reverse the string and access the website using this URL: http://www.chiquitooenterprise.com/password?code=REVERSEDSTRING
How can i do this using urllib2 and Python?
link = "http://www.chiquitooenterprise.com/password"
request = urllib2.Request("http://www.chiquitooenterprise.com/password")
contents = urllib2.urlopen(request).read()
revString = request[::-1]
answer = "http://www.chiquitooenterprise.com/password?code=" + revString
response = urllib2.urlopen(answer)
response = response.read()
print(response)```

link = "http://www.chiquitooenterprise.com/password"
result = requests.get("http://www.chiquitooenterprise.com/password")
contents = result.text
revString = contents[::-1]
answer = f"http://www.chiquitooenterprise.com/password?code={revString}"
response = requests.get(answer)
response = response.text
print(response)

While loop page iteration for Meetup API not working

I'm trying to iterate through the pages of this Meetup API but I am receiving an error:
url = 'https://api.meetup.com/2/groups?offset=1&format=json&category_id=34&photo-host=public&page=100&radius=200.0&fields=&order=id&desc=false&sig_id=243750775&sig=768bcf78d9c73937fcf2f5d41fe6070424f8d0e3'
while url:
data = requests.get(url).json()
url2 = data['meta'].get('next')
data2 = pd.io.json.json_normalize(data['results'])
print(data2)
However, when I write it as;
while url:
data = requests.get(url).json()
print(data)
url2 = data['meta'].get('next')
data2 = pd.io.json.json_normalize(data['results'])
It comes out as a list that keeps iterating it's self but I don't know if it's looping through the same page or not.
I also need to use this ["offset"] += 1 somehow but fon't know where to place it

there is also a parameter page that you can use in your api call.
page = 1
url = '<base_url>&page=%d'
while page < 590:
new_url = url % page
# fetch new_url and do your magic
....
page += 1

Error when using Requests with python 3.5 recursively (GooglePlaces API)

I have been having a problem where I try to send a get request and if there is a next page token in the result it will then take that link and execute another request recursively until there is no next page token in the result.
The first request works fine but when there is a next page token in the response and it tries to execute the new request the result is an Invalid ReSponse but if I take the link that was given from the result and use it in postman or on my browser everything is fine.
I'm assuming it has something to requests running on different threads at the same time.
The second response from request using Python:
{'html_attributions': [], 'status': 'INVALID_REQUEST', 'results': []}
Here is what I have:
import requests
def getPlaces(location,radius,type, APIKEY):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&type="+type+"&key="+APIKEY
print('Getting results for type ' + type + '...')
r = requests.get(url)
response = r.json()
results = []
if response['status'] == 'ZERO_RESULTS':
print("Did not find results for the type "+type)
else:
print("Results for type "+type)
for result in response['results']:
results.append(result)
print(result)
print('Printing results')
print(results)
if 'next_page_token' in response:
print("There is a next page")
page_token = response['next_page_token']
print(page_token)
next_results = getNextPlace(page_token,APIKEY)
print(next_results)
results.append(next_results)
return results
# Get the rest of the results
def getNextPlace(page_token,APIKEY):
print('...')
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+'&radius='+radius+'&type='+type+'&pagetoken=' + page_token + '&key=' + APIKEY
print(next_url)
r = requests.get(next_url)
response = r.json()
results = []
print(response)
if response['status'] == 'ZERO_RESULTS':
print("Did not find results")
elif response['status'] == 'INVALID_REQUEST':
print('Invalid response')
else:
for next_result in response['results']:
results.append(next_result)
print(next_result)
if 'next_page_token' in response:
new_page_token = response['next_page_token']
getNext = getNextPlace(new_page_token,APIKEY)
results.append(getNext)
return results

Figured out the issue!
Google API doesn't allow consecutive requests to its API if the last request was within ~2 seconds.
What I did have I just had the program sleep for 3 seconds and the sent the request.
Now everything is working fine

What you are trying to do can be seen in one function like:
def getPlaces(location,radius,API,i,type):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&key="+API+"&types="+type
r = requests.get(url)
response = r.json()
results = []
for result in response['results']:
results.append(result)
l=[]
while True:
if 'next_page_token' in response:
page_token = response['next_page_token']
l.append(page_token)
next_url = url+'&pagetoken='+l[i]
i=i+1
time.sleep(3)
r = requests.get(next_url)
response = r.json()
for next_result in response['results']:
results.append(next_result)
else:
break
return results

Your code print "invalid response" because response['status'] == 'INVALID_REQUEST', so it is google api service think your url request is invalid.
As this document says, the parameter location, radius, type and key is required, and the pagetoken is optional. So your second request url is invalid because it does not have the all required key.
Maybe you should try change the url to :
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+"&radius="+radius+"&type="+type+"&key="+APIKEY + "&pagetoken=" + page_token

How to request a career page url within the domain using python

import requests
s = ['jobs','careers','opportunities']
u = ['yahoo.com','statestreet.com']
f = []
for i in u:
for j in s:
w = "http://{}/{}".format(i,j)
print w
r = requests.get(w)
print r.status_code
if(r.status_code == 200):
f.append(w)
break
print f
This code is working for most websites but not for websites like www.surveymonkey.com.

You don't state what isn't working, or what you expect the code to do. However, the indentation in the final if statement in your code seems incorrect.
Correcting that (and editing the variable names to be more descriptive as follows) at least runs as expected for me:
import requests
paths = ['jobs', 'careers', 'opportunities']
domains = ['yahoo.com', 'statestreet.com', 'surveymonkey.com']
results = []
for domain in domains:
for path in paths:
url = "http://{}/{}".format(domain, path)
print url
r = requests.get(url)
print r.status_code
if r.status_code == requests.codes.OK:
results.append(url)
break
print results

Http Redirection code 3XX in python requests

I am trying to capture http status code 3XX/302 for a redirection url. But I cannot get it because it gives 200 status code.
Here is the code:
import requests
r = requests.get('http://goo.gl/NZek5')
print r.status_code
I suppose this should issue either 301 or 302 because it redirects to another page. I had tried few redirecting urls (for e.g. http://fb.com ) but again it is issuing the 200. What should be done to capture the redirection code properly?

requests handles redirects for you, see redirection and history.
Set allow_redirects=False if you don't want requests to handle redirections, or you can inspect the redirection responses contained in the r.history list.
Demo:
>>> import requests
>>> url = 'https://httpbin.org/redirect-to'
>>> params = {"status_code": 301, "url": "https://stackoverflow.com/q/22150023"}
>>> r = requests.get(url, params=params)
>>> r.history
[<Response [301]>, <Response [302]>]
>>> r.history[0].status_code
301
>>> r.history[0].headers['Location']
'https://stackoverflow.com/q/22150023'
>>> r.url
'https://stackoverflow.com/questions/22150023/http-redirection-code-3xx-in-python-requests'
>>> r = requests.get(url, params=params, allow_redirects=False)
>>> r.status_code
301
>>> r.url
'https://httpbin.org/redirect-to?status_code=301&url=https%3A%2F%2Fstackoverflow.com%2Fq%2F22150023'
So if allow_redirects is True, the redirects have been followed and the final response returned is the final page after following redirects. If allow_redirects is False, the first response is returned, even if it is a redirect.

requests.get allows for an optional keyword argument allow_redirects which defaults to True. Setting allow_redirects to False will disable automatically following redirects, as follows:
In [1]: import requests
In [2]: r = requests.get('http://goo.gl/NZek5', allow_redirects=False)
In [3]: print r.status_code
301

This solution will identify the redirect and display the history of redirects, and it will handle common errors. This will ask you for your URL in the console.
import requests
def init():
console = input("Type the URL: ")
get_status_code_from_request_url(console)
def get_status_code_from_request_url(url, do_restart=True):
try:
r = requests.get(url)
if len(r.history) < 1:
print("Status Code: " + str(r.status_code))
else:
print("Status Code: 301. Below are the redirects")
h = r.history
i = 0
for resp in h:
print(" " + str(i) + " - URL " + resp.url + " \n")
i += 1
if do_restart:
init()
except requests.exceptions.MissingSchema:
print("You forgot the protocol. http://, https://, ftp://")
except requests.exceptions.ConnectionError:
print("Sorry, but I couldn't connect. There was a connection problem.")
except requests.exceptions.Timeout:
print("Sorry, but I couldn't connect. I timed out.")
except requests.exceptions.TooManyRedirects:
print("There were too many redirects. I can't count that high.")
init()

Anyone have the php version of this code?
r = requests.get(url)
if len(r.history) < 1:
print("Status Code: " + str(r.status_code))
else:
print("Status Code: 301. Below are the redirects")
h = r.history
i = 0
for resp in h:
print(" " + str(i) + " - URL " + resp.url + " \n")
i += 1
if do_restart:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Is there a way to detect url changes with Python - python

Yes, you can use the response.url parameter to get the final URL after any redirects. response = requests.get(c) final_url = response.url Note this only handles 3xx redirects, not javascript redirects. Requests will never execute javascript.

Related

Urllib2 grab web-page element then reverse it

While loop page iteration for Meetup API not working

Error when using Requests with python 3.5 recursively (GooglePlaces API)

How to request a career page url within the domain using python

Http Redirection code 3XX in python requests

Categories

Resources