urllib2 does not download pdf file

urllib2 does not download pdf file - python

I am using the following code to download my files:
def downloadfile(url): #function to download file
file_name = filename_parse(url)
#print "***********************"
#print "File download started:"
#stime= time.time()
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
getfilesize(u)
file_size = getfilesize(u)
print "Downloading: %s Bytes: %s \n" % (file_name, file_size)
file_size_dl = 0
block_sz = 512
progressbar(u,block_sz,file_size_dl,f,file_size)
f.close()
the thing is that it can download any file exe, txt and others except .pdf files...how can i make it download the pdfs ?

I know this is an old question but for all of those that stumble upon it and are tyring to download the pdf file using python 2 and urllib2 here is the code:
import urllib2
url = 'http://mensenhandel.nl/files/pdftest2.pdf'
print "Download started..."
f = urllib2.urlopen(url)
data = f.read()
with open("test.pdf", "wb") as code:
code.write(data)
print "Download completed..."
Just modify the URL to your needs...
Source: http://www.blog.pythonlibrary.org/2012/06/07/python-101-how-to-download-a-file/

Related

Name different files from another file having different names respectively

I have created a script to download multiple images. I have another file (linkVars.py) in which there are URLs of the images to download. This script import the linkVars.py file then reads one URL at a time, downloads that image from the URL, and writes it into a file named {file_name}.jpg
Below is the code for the explanation of upper lines:
import linksVars as lV # file with urls
def download_url(url):
# Creating a function
print(f"\nDownloading from: ", url)
file_name_start_pos = url.rfind("=") + 1 # naming image by using text in url
name_from_url = url[file_name_start_pos:]
file_name = name_from_url
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
# Opening the image file to write data in it
with open(f'{file_name}.jpg', 'wb') as f:
for data in r:
f.write(data)
Now, I have multiple names written in name_file.txt(external file). As I download the image, I want to name file_name in {file_name}.jpg from one name in name_file.txt. Then as the code starts to download the next file, the next name in name_file.txt should be assigned to {file_name}.jpg If someone could help me then I will be grateful!
Below is the complete code:
import requests
import linksVars as lV
def download_url(url):
print(f"\nDownloading from: ", url)
file_name_start_pos = url.rfind("=") + 1
name_from_url = url[file_name_start_pos:]
file_name = name_from_url
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(f'{file_name}.jpg', 'wb') as f:
for data in r:
f.write(data)
links = lV.List1
try:
for listLinks in links:
download_url(listLinks)
except(KeyboardInterrupt):
print("\n\n===> Script ended by USER! <===")

Try this:
import requests
import linksVars as lV # Importing file with URLs stored in variables
import nameVars as nV # Importing file with names stored in variables
links = lV.List1 # List1 is the list of URLs stored in variables
names = nV.Name1 # Name1 is the list of names stored in variables
# This function will download image from URL and name it from Name1
def download_url(url, names):
print(f"\nDownloading from: ", url)
file_name_start_pos = url.rfind("v=") + 1 # It will find "v=" in given URL and move to next line
name_from_url = url[file_name_start_pos:]
file_name = names
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(f'{file_name}.jpg', 'wb') as f: # Downloaded file will opened and named
for data in r:
f.write(data)
try:
for listLinks, listNames in zip (links, names): # "For loop" will use two arguments
download_url(listLinks, listNames)
except(KeyboardInterrupt):
print("\n\n===> Script ended by USER! <===")

how to save an image from a url and save it in a specific folder

Hi I want to make a folder and save an image into that folder. Here is what I did but I get error:
URL='https://storage.labelbox.com/ckh4cqkd7y8r30721vlr7m5he%2F7bc29750-7fcc-dbac-7683-1b67125d7afd-Y62Y62.jpg?Expires=1604685570096&KeyName=labelbox-assets-key-1&Signature=sEhvkl3MUnT2iiuFZsvwFLqzAE8
path1 = "/Users/naghmeh/Documents/medical/jadid/train/"+str(filename)
try:
os.mkdir(path1)
except OSError:
print ("Creation of the directory %s failed" % path1)
else:
print ("Successfully created the directory %s " % path1)
src_fname, ext = os.path.splitext(filename)
save_fname = os.path.join(path1, os.path.basename(src_fname)+str(filename))
img_data = requests.get(URL).content
with open('ImPath', 'wb') as handler:
mask=handler.write(img_data)
mask.save(save_fname)
Error:
'int' object has no attribute 'save'

Using the requests pkg:
import requests
image_url = "https://storage.labelbox.com/ckh4cqkd7y8r30721vlr7m5he%2F7bc29750-7fcc-dbac-7683-1b67125d7afd-Y62Y62.jpg?Expires=1604685570096&KeyName=labelbox-assets-key-1&Signature=sEhvkl3MUnT2iiuFZsvwFLqzAE8"
path = "/Users/naghmeh/Documents/medical/jadid/train/asd.jpg"
request = requests.get(image_url, stream=True)
with open(path, "wb+") as file:
for c in request:
file.write(c)

Response binary data to download

I am trying to input a file from an input field, save it temporarily to the disk and reply with a response to re-download the same file.
In order to do this, I've read that I need to reply to the browser with a content-type : application/octet-stream and a content-disposition: attachment; "filename=myfile.extension".
I can store and listen to my music file in the /tmp folder so I know that the input part of it works.
This is my code in Pyramid:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
filename = file_path.split('/')[-1]
print(filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/octet-stream', content_disposition=f'attachment; "filename={filename}"')
These are my response headers:
And this is my response body:
However Chrome/Firefox do not start the download of my binary file. What am I doing wrong?
UPDATE
I also tried with FileResponse from Pyramid without success, I still do not get the download popup.
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
return FileResponse(file_path, request=request)

Apparently I was thinking how to perform this in the wrong way. I need to return a Response('OK') when I upload the file through /process and make another request to return a FileResponse object, building another endpoint /download and returning that fileresponse object fixed this issue.
Example:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
db = request.POST['volume']
input_file.seek(0)
filename = '%s.mp3' % uuid.uuid4()
file_path = os.path.join('/tmp', filename)
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
if boost_track(file_path, filename, db):
return Response(json_body={'filename': filename})
#view_config(route_name='download')
def download_file(request):
filename = request.GET['filename']
file_path = os.path.join('/tmp', filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/download', content_disposition=f'attachment; filename="{filename}"')

Not able to download files from FTP

I am trying to download files using python script from my ftp server...However i am getting the files which are of size 0 kb...i can't understand exactly where i am wrong...I am actually searching the files by a particular string in filename and then downloading all the files having that string on my ftp in a given directory.
Here is my code:
# Libraries
import re
import os
import ftplib
import ntpath
ftp = ftplib.FTP("192.168.1.786:22")
ftp.login("Marshmellow", "YourPasswordHere")
##ftp.dir("feed_1")
files = []
## F = open('Files.txt','a')
try:
files = ftp.nlst("feed_1")
for fname in files:
res = re.findall("2018-07-25", fname)
if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
# Download the file a chunk at a time
# Each chunk is sent to handleDownload
# We append the chunk to the file and then print a '.' for progress
# RETR is an FTP command
print 'Getting ' + ntpath.basename(fname)
try:
ftp.retrbinary('RETR ' + ntpath.basename(fname), file.write)
except:
pass
# Clean up time
print 'Closing file ' + ntpath.basename(fname)
file.close()
print (fname)
## F.write(fname + '\n')
if not res:
continue
except ftplib.error_perm , resp:
if str(resp) == "550 No files found":
print "No files in this directory"
pass
else:
raise
## F.close()
Help Me Out if anyone knows what's wrong in this.

try:
ftp.cwd("feed_1")
files = ftp.nlst() for fname in files:
res = re.findall("2018-07-25", fname) if res:
# Open the file for writing in binary mode
print 'Opening local file ' + ntpath.basename(fname)
file = open(ntpath.basename(fname), 'wb')
i've just set the current working directory using ftp.cwd("feed_1") which i did the wrong way earlier like: files = ftp.nlst("feed_1")

Python: How to download a zip file

I'm attempting to download a zip file using this code:
o = urllib2.build_opener( urllib2.HTTPCookieProcessor() )
#login
p = urllib.urlencode( { usernameField: usernameVal, passField: passVal } )
f = o.open(authUrl, p )
data = f.read()
print data
f.close()
#download file
f = o.open(remoteFileUrl)
localFile = open(localFile, "wb")
localFile.write(f.read())
f.close()
I am getting some binary data, but the size of the file I "downloaded" is too small and is not a valid zip file. Am I not retrieving the zip file properly? The HTTP response header for f = o.open(remoteFileUrl) is shown below. I don't know if special processing is needed to handle this response:
HTTP/1.1 200 OK Server:
Apache-Coyote/1.1 Pragma: private
Cache-Control: must-revalidate
Expires: Tue, 31 Dec 1997 23:59:59 GMT
Content-Disposition: inline;
filename="files.zip";
Content-Type: application/zip
Transfer-Encoding: chunked

f.read() doesn't necessarily read the whole file, but just a packet of it (which might be the whole file if it's small, but won't be for a large file).
You need to loop over the packets like this:
while 1:
packet = f.read()
if not packet:
break
localFile.write(packet)
f.close()
f.read() returns an empty packet to signify that you've read the whole file.

If you don't mind reading the whole zip-file to memory, the fastest way to read and write it is as follows:
data = f.readlines()
with open(localFile,'wb') as output:
output.writelines(data)
Otherwise, to read and write in chunks as you get them over the network, do
with open(localFile, "wb") as output:
chunk = f.read()
while chunk:
output.write(chunk)
chunk = f.read()
This is a little less neat, but avoids keeping the whole file in memory at once. Hope it helps.

Here is a more robust solution using urllib2 to download the file in chunks and print the status of the download
import os
import urllib2
import math
def downloadChunks(url):
"""Helper to download large files
the only arg is a url
this file will go to a temp directory
the file will also be downloaded
in chunks and print out how much remains
"""
baseFile = os.path.basename(url)
#move the file to a more uniq path
os.umask(0002)
temp_path = "/tmp/"
try:
file = os.path.join(temp_path,baseFile)
req = urllib2.urlopen(url)
total_size = int(req.info().getheader('Content-Length').strip())
downloaded = 0
CHUNK = 256 * 10240
with open(file, 'wb') as fp:
while True:
chunk = req.read(CHUNK)
downloaded += len(chunk)
print math.floor( (downloaded / total_size) * 100 )
if not chunk: break
fp.write(chunk)
except urllib2.HTTPError, e:
print "HTTP Error:",e.code , url
return False
except urllib2.URLError, e:
print "URL Error:",e.reason , url
return False
return file

Try this:
#download file
f = o.open(remoteFileUrl)
response = ""
while 1:
data = f.read()
if not data:
break
response += data
with open(localFile, "wb") as local_file:
local_file.write(response)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

urllib2 does not download pdf file - python

Related

Name different files from another file having different names respectively

how to save an image from a url and save it in a specific folder

Response binary data to download

Not able to download files from FTP

Python: How to download a zip file

Categories

Resources