Unshort link generated by android app with python - python

I'm trying to unshort an url generated by Banggood android app. The link is
https://banggood.app.link/ifPZZ5jS98
The code works great with all bit.ly urls. But with this url it doesn't work. Any idea.
My code:
import requests
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning
def unshort_link(url):
warnings.simplefilter('ignore',InsecureRequestWarning)
session = requests.Session()
resp = session.head(url, allow_redirects=True, verify=False)
unshort_url = resp.url
return unshort_url
unshorted_url = unshort_link("https://banggood.app.link/ifPZZ5jS98")
print(unshorted_url)
# expected result: https://www.banggood.com/LANGFEITE-L8S-2019-Version-15Ah-48_1V-800W+2-Dual-Motor-Folding-Electric-Scooter-Color-Display-DC-Brushless-Motor-45km-or-h-Top-Speed-40km-Range-EU-Plug-p-1486764.html?akmClientCountry=IT&channel=googleshopping&utm_source=googleshopping&utm_medium=cpc_bgs&utm_campaign=xibei-ssc-it-all-0303_prdshare_copy&utm_content=xibei&_branch_match_id=762227340915062987

Related

Unshort amazon EU link using Python

i'm trying to unshort amazon link using python, from pattern: "https:// amzn.eu/XXXX".
It seems the url is not recognized!
If the url is in the format "https:// amzn.to/XXXXX" it works!
Only with amzn.EU problem appears.
This is my code. Any suggest?
import os, pathlib, re, requests, time, warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning
def formaturl(url):
if not re.match('(?:http|ftp|https)://', url):
return 'http://{}'.format(url)
return url
def unshort_link(url):
url = formaturl(url)
warnings.simplefilter('ignore',InsecureRequestWarning)
session = requests.Session()
resp = session.head(url, allow_redirects=True, verify=False)
unshort_url = resp.url
return unshort_url
not_working_link = 'https://amzn.eu/d/fb1IYWl'
#working_link = 'https://amzn.to/3A0milQ'
unshorted_url = unshort_link(not_working_link)
print(unshorted_url)
The HEAD request doesn't work on this link, it returns a 404.
However, with a GET it'll work as expected:
resp = requests.get('https://amzn.eu/d/fb1IYWl')
resp.url
# 'https://www.amazon.it/dp/B00HVFQF3I/ref=cm_sw_r_apa_i_9GRWP18TK8S32ZPVJVM7_0?_encoding=UTF8&psc=1'

Python - Retrieve and use a cookie to download a file

Trying to download the following file:
https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf
I first need to sign into the following site before doing so:
https://urs.earthdata.nasa.gov
After reviewing my browser's web console, I believe it's using a cookie to allow me to download the file. How can I do this using python? I find out how to retrieve the cookies:
import os, requests
username = 'user'
password = 'pwd'
url = 'https://urs.earthdata.nasa.gov'
r = requests.get(url, auth=(username,password))
cookies = r.cookies
How can I then use this to download the HDF file? I've tried the following but always receive 401 error.
url2 = "https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf"
r2 = requests.get(url2, cookies=r.cookies)
Have you tried a simple basic authentification :
from requests.auth import HTTPBasicAuth
url2='https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf'
requests.get(url2, auth=HTTPBasicAuth('user', 'pass'))
or read this example
To download a file using the Requests library with the browser cookies, you can use the next function:
import browser_cookie3
import requests
import shutil
import os
cj = browser_cookie3.brave()
def download_file(url, root_des_path='./'):
local_filename = url.split('/')[-1]
local_filename = os.path.join(root_des_path, local_filename)
# r = requests.get(link, cookies=cj)
with requests.get(url, cookies=cj, stream=True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return local_filename
a = download_file(link)
In this example, cj is the cookies of Brave browser ( you can use ffox or chrome). then, these cj are passed to Requests to download the file.
Note, you need to get "browser_cookie3" library
pip install browser-cookie3

Python requests post login not working for this site

So I've tried everything to try to login to this site with sessions and python requests but it doesn't seem to work and keeps redirecting me to the login page when I try to access the protected url. (status_code = 302)
import time
import smtplib
import requests
from bs4 import BeautifulSoup
from lxml import html
url = "https://beatyourcourse.com/school_required#"
protected_url = "https://beatyourcourse.com/flyering"
session = requests.Session()
responce = session.get(url)
tree = html.fromstring(responce.text)
token = list(set(tree.xpath("//input[#name='authenticity_token']/#value")))[0]
payload = {
'user[email]' : '****',
'user[password]' : '****',
'authenticity_token' : token
}
responce = session.post(url, data = payload) #Logging in
responce = session.get(protected_url) # visiting protected url
print responce.url # prints "https://beatyourcourse.com/school_required#" (redirected to login page)

How to Login to a website using beautifulSoup in Python

I am very new to Python, I am trying to extract data from a site.
For that I am stuck on the first step of Login into the site only.
This is what I have tried:
#Importing Libs
import urllib3
from bs4 import BeautifulSoup
import requests
import http
jar = http.cookiejar.CookieJar(policy=None)
http = urllib3.PoolManager()
#Setting account details
acc_pwd = {'user_username':'userABC',
'user_password':'ABC123'}
#enter URL
quote_page = 'example.com'
response = http.request('GET', quote_page)
soup = BeautifulSoup(response.data)
print ("Data %s" % soup)
r = requests.get(quote_page, cookies=jar)
r = requests.post(quote_page, cookies=jar, data=acc_pwd)
print ("##############")
print ("RData %s" % r.text)
It takes me back to login page only.
Not sure if i am entering the details properly or not.
this generally works for me:
from bs4 import BeautifulSoup
import requests
from requests import Request, Session
from requests_ntlm import HttpNtlmAuth
base_url = ''
r = requests.get(base_url, auth=HttpNtlmAuth('domain\\username', 'password'))

Login into web site using Python

This question has been addresses in various shapes and flavors but I have not been able to apply any of the solutions I read online.
I would like to use Python to log into the site: https://app.ninchanese.com/login
and then reach the page: https://app.ninchanese.com/leaderboard/global/1
I have tried various stuff but without success...
Using POST method:
import urllib
import requests
oURL = 'https://app.ninchanese.com/login'
oCredentials = dict(email='myemail#hotmail.com', password='mypassword')
oSession = requests.session()
oResponse = oSession.post(oURL, data=oCredentials)
oResponse2 = oSession.get('https://app.ninchanese.com/leaderboard/global/1')
Using the authentication function from requests package
import requests
oSession = requests.session()
oResponse = oSession.get('https://app.ninchanese.com/login', auth=('myemail#hotmail.com', 'mypassword'))
oResponse2 = oSession.get('https://app.ninchanese.com/leaderboard/global/1')
Whenever I print oResponse2, I can see that I'm always on the login page so I am guessing the authentication did not work.
Could you please advise how to achieve this?
You have to send the csrf_token along with your login request:
import urllib
import requests
import bs4
URL = 'https://app.ninchanese.com/login'
credentials = dict(email='myemail#hotmail.com', password='mypassword')
session = requests.session()
response = session.get(URL)
html = bs4.BeautifulSoup(response.text)
credentials['csrf_token'] = html.find('input', {'name':'csrf_token'})['value']
response = session.post(URL, data=credentials)
response2 = session.get('https://app.ninchanese.com/leaderboard/global/1')

Categories