Upload a file using urllib.request tinyupload.com - python

I would like to write a program to upload files on http://www.tinyupload.com/, so I searched a method to upload a form.
I have written this code to upload a file:
import urllib.request
import urllib.parse
import http.cookiejar
import re
# Use cookies
cookie = http.cookiejar.CookieJar()
urllib.request.install_opener(urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie)))
# A function to download an url (GET)
def urldownload(url):
try:
page = urllib.request.urlopen(url)
return page.read().decode('iso-8859-2')
except urllib.error.HTTPError:
return False
# Get the url from the form to upload
def geturl(html):
regex = re.compile('\<form action="(.*?)\" name="upload_form"', re.S)
url = regex.findall(html)[0]
return(str(url))
# Get the sid from the url
def getsessionid(url):
return url[-26:]
# Upload a file
def upload(file):
url = geturl(urldownload('http://s000.tinyupload.com/index.php'))
sessionid = getsessionid(url)
f = open(file).read()
data = {'MAX_FILE_SIZE': '52428800',
'uploaded_file': f,
'file_description': 'File: %s' % (file),
'sessionid': sessionid}
data = urllib.parse.urlencode(data)
result = urllib.request.urlopen(url, data.encode('iso-8859-2'))
return(result.read().decode('iso-8859-2'))
#return(str(result.info()))
I should go to the page where is the link to download the file, but I have the form. What is wrong??

There is more simple way to upload file - use requests library.
import requests
session = requests.Session()
index_url = 'http://s000.tinyupload.com/index.php'
upload_url = 'http://s000.tinyupload.com/cgi-bin/upload.cgi?sid='
index_request = session.get(index_url)
PHPSESSID = index_request.cookies['PHPSESSID']
files = {'file': open('bitcoin.pdf', 'rb')}
r = requests.post(upload_url+PHPSESSID, files=files)
#Print "File upload finished" page
print r.text
#Print download link
import re
print re.search('http://s000\.tinyupload.com/\?file_id=[^<]+',r.text).group(0)

Related

Unshort amazon EU link using Python

i'm trying to unshort amazon link using python, from pattern: "https:// amzn.eu/XXXX".
It seems the url is not recognized!
If the url is in the format "https:// amzn.to/XXXXX" it works!
Only with amzn.EU problem appears.
This is my code. Any suggest?
import os, pathlib, re, requests, time, warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning
def formaturl(url):
if not re.match('(?:http|ftp|https)://', url):
return 'http://{}'.format(url)
return url
def unshort_link(url):
url = formaturl(url)
warnings.simplefilter('ignore',InsecureRequestWarning)
session = requests.Session()
resp = session.head(url, allow_redirects=True, verify=False)
unshort_url = resp.url
return unshort_url
not_working_link = 'https://amzn.eu/d/fb1IYWl'
#working_link = 'https://amzn.to/3A0milQ'
unshorted_url = unshort_link(not_working_link)
print(unshorted_url)
The HEAD request doesn't work on this link, it returns a 404.
However, with a GET it'll work as expected:
resp = requests.get('https://amzn.eu/d/fb1IYWl')
resp.url
# 'https://www.amazon.it/dp/B00HVFQF3I/ref=cm_sw_r_apa_i_9GRWP18TK8S32ZPVJVM7_0?_encoding=UTF8&psc=1'

simple download.file() in r is not working with requests.get

I am attempting to convert R code to python code. There is a current line that I am having trouble with. (code snip 1).
I have tried all variations of requests and the python code is creating a blank file with none of the contents.
Requests, wget, urllib.requests, etc. etc.
(1)
downloader = download.file(url = 'https://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm',destfile = 'C:/Users/bnewell/Desktop/test.xml",quiet = TRUE) # DOWNLOADING XML FILE FROM SITE
unfiltered = xmlToList(xmlParse(download_file))
(2)
import requests
URL = 'https://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm'
response = requests.head(URL, allow_redirects=True)
import requests, shutil
URL = 'https://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm'
page = requests.get(URL, stream=True, allow_redirects=True,
headers={'user-agent': 'MyPC'})
with open("File.xml", "wb") as f:
page.raw.decode_content = True
shutil.copyfileobj(page.raw, f)
Manually adding a user-agent header the file download for some reason I'm not sure about.
I use shutil to download the raw file which could be replaced by page.iter_content
try to actually get the request
import requests
URL = 'https://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm'
response = requests.get(URL, headers={'allow_redirects':True})
Then you can access what you are downloading with response.raw, response.text, response.content etc.
For more details see the actual docs
Try something like this instead:
import os
import requests
url = "htts://......"
r = requests.get(url , stream=True, allow_redirects=True)
if r.status_code != 200:
print("Download failed:", r.status_code, r.headers, r.text)
file_path = r"C:\data\...."
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())

urllib.error.HTTPError: HTTP Error 400: Bad Request from trying to get a set of images

I was trying to get a set of images for a dataset to train tiny-yolo on so I followed this tutorial
https://www.youtube.com/watch?v=Lg4T9iJkwhE&index=5&list=PLX-LrBk6h3wSGvuTnxB2Kj358XfctL4BM
The first file that was written in the tutorial goes like this:
import os
import urllib.request as ulib
from bs4 import BeautifulSoup as Soup
import json
url_a = 'https://www.google.com/search?ei=1m7NWePfFYaGmQG51q7IBg&hl=en&q={}'
url_b = '\&tbm=isch&ved=0ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ&start={}'
url_c = '\&yv=2&vet=10ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ.1m7NWePfFYaGmQG51q7IBg'
url_d = '\.i&ijn=1&asearch=ichunk&async=_id:rg_s,_pms:s'
url_base = ''.join((url_a, url_b, url_c, url_d))
headers = {'User-Agent': 'Chrome/67.0.3396.99 Safari/537.36'}
def get_links(search_name):
search_name = search_name.replace(' ', '+')
url = url_base.format(search_name, 0)
request = ulib.Request(url, None, headers)
json_string = ulib.urlopen(request).read()
page = json.loads(json_string)
new_soup = Soup(page[1][1], 'lxml')
images = new_soup.find_all('img')
links = [image['src'] for image in images]
return links
def save_images(links, search_name):
directory = search_name.replace(' ', '_')
if not os.path.isdir(directory):
os.mkdir(directory)
for i, link in enumerate(links):
savepath = os.path.join(directory, '{:06}.png'.format(i))
ulib.urlretrieve(link, savepath)
if __name__ == '__main__':
search_name = 'my search query'
links = get_links(search_name)
save_images(links, search_name)
The only thing I changed was the headers variable since my User-agent is different than the guy in the tutorial's one.
To my supprise the script returned this error
urllib.error.HTTPError: HTTP Error 400: Bad Request
Can anyone tell me what's wrong?
The Error code itself explains the issue
you are trying to hit URL which doesn't exist.
Please correct your URL.
URL:- https://www.google.com/search?ei=1m7NWePfFYaGmQG51q7IBg&hl=en&q=my+search+query\\&tbm=isch&ved=0ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ&start=0\\&yv=2&vet=10ahUKEwjjovnD7sjWAhUGQyYKHTmrC2kQuT0I7gEoAQ.1m7NWePfFYaGmQG51q7IBg\\.i&ijn=1&asearch=ichunk&async=_id:rg_s,_pms:s%27
Try on Browser, see what you get.

Python - Retrieve and use a cookie to download a file

Trying to download the following file:
https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf
I first need to sign into the following site before doing so:
https://urs.earthdata.nasa.gov
After reviewing my browser's web console, I believe it's using a cookie to allow me to download the file. How can I do this using python? I find out how to retrieve the cookies:
import os, requests
username = 'user'
password = 'pwd'
url = 'https://urs.earthdata.nasa.gov'
r = requests.get(url, auth=(username,password))
cookies = r.cookies
How can I then use this to download the HDF file? I've tried the following but always receive 401 error.
url2 = "https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf"
r2 = requests.get(url2, cookies=r.cookies)
Have you tried a simple basic authentification :
from requests.auth import HTTPBasicAuth
url2='https://e4ftl01.cr.usgs.gov/MOLA/MYD14A2.006/2017.10.24/MYD14A2.A2017297.h19v01.006.2017310142443.hdf'
requests.get(url2, auth=HTTPBasicAuth('user', 'pass'))
or read this example
To download a file using the Requests library with the browser cookies, you can use the next function:
import browser_cookie3
import requests
import shutil
import os
cj = browser_cookie3.brave()
def download_file(url, root_des_path='./'):
local_filename = url.split('/')[-1]
local_filename = os.path.join(root_des_path, local_filename)
# r = requests.get(link, cookies=cj)
with requests.get(url, cookies=cj, stream=True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return local_filename
a = download_file(link)
In this example, cj is the cookies of Brave browser ( you can use ffox or chrome). then, these cj are passed to Requests to download the file.
Note, you need to get "browser_cookie3" library
pip install browser-cookie3

Error using urllib2 to get torrent content from a url

Python code:
import urllib2
import requests
info_hash = '00001BD2C9F364C7DCB759DEC6BE02F913C96F72'
url = 'http://torrage.com/torrent/%s.torrent' % info_hash
print url
data = urllib2.urlopen(url).read() # this data is err
# data = requests.get(url).content # this data is ok
f = open('%s.torrent' % info_hash, 'wb')
f.write(data)
f.close()
I can't get right torrent content by the code, but I can get right torrent by the url in browser.

Categories