I am trying to download map image in python with urllib module. But it always failed.
I'm tried to use urllib.urlopen() with some parameter variants
tried in urllib.urlretrieve()
But it doesn't work. And, When I see the source code of image url, I didn't find image file. Here is image: https://maps.googleapis.com/maps/api/staticmap?center=31.0456,121.3997&zoom=12&size=320x385&sensor=false
Source code:
#-------------------------- PARSE IP ADDRESS -------------------------------
import re
import urllib
try:
mysite = urllib.urlopen('http://ip-api.com/line')
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
list_of_params = mysite.read()
print list_of_params
ip_arr = list_of_params.splitlines()
#--------------------- HERE IS FIND MAP IMAGE --------------------------------------
try:
map_page = urllib.urlopen('http://ip-api.com')
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
#f = open("data.html", "w")
#f.write(str(mysite.read()))
#f.close()
#looking for this in page
pattern = re.findall(re.compile("url\(\'(https://maps\.googleapis\.com/maps/api/staticmap\?center=.*)\'"), page_get_map.read())
map_img_url = pattern[0].replace('&', '&')
#------------------- DOWNLOAD MAP IMAGE And SAVE IT ------------------------
#file_name = map_img_url.rsplit('/',1)[1]
try:
get_map_img = urllib.urlretrieve(map_img_url, "staticmap.png")
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
i = open("pict.png", "w")
i.write(get_map_img.read())
i.close()
print "End of file"
import requests
f=open('static.png','wb')
f.write(requests.get('https://maps.googleapis.com/maps/api/staticmap?center=31.0456,121.3997&zoom=12&size=320x385&sensor=false').content)
f.close()
Why are you parsing the map URL? Construct it yourself:
import json, urllib
query = '' # IP to get coordinates of, leave empty for current IP
geo = urllib.urlopen('http://ip-api.com/json/%s?fields=240' % query)
result = json.load(geo)
if result['zip']:
zoom = 13
elif result['city']:
zoom = 12
else:
zoom = 6
map_img_url = "https://maps.googleapis.com/maps/api/staticmap?center=%s,%s&zoom=%i&size=320x385&sensor=false" % (result['lat'], result['lon'], zoom)
get_map_img = urllib.urlretrieve(map_img_url, "staticmap.png")
Related
I write a script to download the 20000 images from Pixabay using Pixabay API. But the problem arises after scraping the 600th images and shows the following error:
"File "c:/Users/Dell/Desktop/python-pixabay-master/python-pixabay-master/main.py", line 26, in
pretty="true"
File "c:\Users\Dell\Desktop\python-pixabay-master\python-pixabay-master\pixabay.py", line 144, in search
raise ValueError(resp.text)
ValueError: [ERROR 400] "page" is out of valid range."
Code:
from pixabay import Image, Video
import pprint
import requests
import shutil
API_KEY = 'myAPIkeys'
image = Image(API_KEY)
j=1
for n in range(1,100):
ims = image.search(
q="education",
lang="en",
image_type="all",
orientation="all",
category="education",
min_width=0,
min_height=0,
colors="",
editors_choice="false",
safesearch="false",
order="popular",
page=n,
per_page=200,
callback="",
pretty="true"
)
#hits=ims['total']
#print(hits)
#print(ims)
#pp=pprint.PrettyPrinter(indent=4)
for i in range(0,200):
payload=ims['hits'][i]['largeImageURL']
resp = requests.get(payload, stream=True)
local_file = open(str(j)+"local_image.jpg", 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
print(str(j)+"URL of image: {}".format(payload))
j=j+1
#urllib.request.urlretrieve(payload,i)
#pp.pprint(ims)
#Pakshadow Hi I have tried your API in postman, You can see that in the API there is the one parameter page, So In your case your all the images are cover till page 3 so that on page 4 it's giving page is out of valid range error from pixabay.
you can try with this postman link: https://www.getpostman.com/collections/2823a8aad5ea81b55342
Import it and check that.
You can handle this error using exception.
from pixabay import Image
import requests
import shutil
API_KEY = 'API_KEY'
image = Image(API_KEY)
j=1
for n in range(1,100):
try:
ims = image.search(
q="education",
lang="en",
image_type="all",
orientation="all",
category="education",
min_width=0,
min_height=0,
colors="",
editors_choice="false",
safesearch="false",
order="popular",
page=n,
per_page=200,
callback="",
pretty="true"
)
for i in range(0, 200):
payload = ims['hits'][i]['largeImageURL']
resp = requests.get(payload, stream=True)
local_file = open(str(j) + "local_image.jpg", 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
print(str(j) + "URL of image: {}".format(payload))
j = j + 1
# urllib.request.urlretrieve(payload,i)
# pp.pprint(ims)
except Exception as e:
print(e)
I want to download MODIS data from HTTP site using Python script given in the LAADS DAAC site. But while running the script am getting an error:
error: argument -s/--source is required
Though I have given the source path .Following is the original script;
#!/usr/bin/env python
# script supports either python2 or python3
#
# Attempts to do HTTP Gets with urllib2(py2) urllib.requets(py3) or subprocess
# if tlsv1.1+ isn't supported by the python ssl module
#
# Will download csv or json depending on which python module is available
#
from __future__ import (division, print_function, absolute_import, unicode_literals)
import argparse
import os
import os.path
import shutil
import sys
try:
from StringIO import StringIO # python2
except ImportError:
from io import StringIO # python3
################################################################################
USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n','').replace('\r','')
def geturl(url, token=None, out=None):
headers = { 'user-agent' : USERAGENT }
if not token is None:
headers['Authorization'] = 'Bearer ' + token
try:
import ssl
CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
if sys.version_info.major == 2:
import urllib2
try:
fh = urllib2.urlopen(urllib2.Request(url, headers=headers), context=CTX)
if out is None:
return fh.read()
else:
shutil.copyfileobj(fh, out)
except urllib2.HTTPError as e:
print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
print('HTTP GET error message: %s' % e.message, file=sys.stderr)
except urllib2.URLError as e:
print('Failed to make request: %s' % e.reason, file=sys.stderr)
return None
else:
from urllib.request import urlopen, Request, URLError, HTTPError
try:
fh = urlopen(Request(url, headers=headers), context=CTX)
if out is None:
return fh.read().decode('utf-8')
else:
shutil.copyfileobj(fh, out)
except HTTPError as e:
print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
print('HTTP GET error message: %s' % e.message, file=sys.stderr)
except URLError as e:
print('Failed to make request: %s' % e.reason, file=sys.stderr)
return None
except AttributeError:
# OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl
import subprocess
try:
args = ['curl', '--fail', '-sS', '-L', '--get', url]
for (k,v) in headers.items():
args.extend(['-H', ': '.join([k, v])])
if out is None:
# python3's subprocess.check_output returns stdout as a byte string
result = subprocess.check_output(args)
return result.decode('utf-8') if isinstance(result, bytes) else result
else:
subprocess.call(args, stdout=out)
except subprocess.CalledProcessError as e:
print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)
return None
################################################################################
DESC = "This script will recursively download all files if they don't exist from a LAADS URL and stores them to the specified path"
def sync(src, dest, tok):
'''synchronize src url with dest directory'''
try:
import csv
files = [ f for f in csv.DictReader(StringIO(geturl('%s.csv' % src, tok)), skipinitialspace=True) ]
except ImportError:
import json
files = json.loads(geturl(src + '.json', tok))
# use os.path since python 2/3 both support it while pathlib is 3.4+
for f in files:
# currently we use filesize of 0 to indicate directory
filesize = int(f['size'])
path = os.path.join(dest, f['name'])
url = src + '/' + f['name']
if filesize == 0:
try:
print('creating dir:', path)
os.mkdir(path)
sync(src + '/' + f['name'], path, tok)
except IOError as e:
print("mkdir `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
sys.exit(-1)
else:
try:
if not os.path.exists(path):
print('downloading: ' , path)
with open(path, 'w+b') as fh:
geturl(url, tok, fh)
else:
print('skipping: ', path)
except IOError as e:
print("open `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
sys.exit(-1)
return 0
def _main(argv):
parser = argparse.ArgumentParser(prog=argv[0], description=DESC)
parser.add_argument('-s', '--source', dest='source', metavar='URL', help='Recursively download files at URL', required=True)
parser.add_argument('-d', '--destination', dest='destination', metavar='DIR', help='Store directory structure in DIR', required=True)
parser.add_argument('-t', '--token', dest='token', metavar='TOK', help='Use app token TOK to authenticate', required=True)
args = parser.parse_args(argv[1:])
if not os.path.exists(args.destination):
os.makedirs(args.destination)
return sync(args.source, args.destination, args.token)
if __name__ == '__main__':
try:
sys.exit(_main(sys.argv))
except KeyboardInterrupt:
sys.exit(-1)
I have given source, destination & token in _main function as follows
parser.add_argument('-s', '--source', dest='archive/orders/501235044/', metavar='https://ladsweb.modaps.eosdis.nasa.gov/', help='Recursively download files at URL', required=True)
parser.add_argument('-d', '--destination', dest='MODIS_data', metavar='D:\', help='Store directory structure in DIR', required=True)
parser.add_argument('-t', '--token', dest='token', metavar='BA52319C-4DCA-11E8-8D99-C71EAE849760', help='Use app token TOK to authenticate', required=True)
I got an error: argument -s/--source is required.
I think the best way to deal with this is to save the script, say as modis.py, then in the same directory, for example:
python modis.py -s https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/6/MCD19A2/2019/240/MCD19A2.A2019240.h10v04.006.2019242041719.hdf -d <destination-path/test.hdf> -t <MY-TOKEN>
I got this .hdf as an example from here, choosing the desired product, then going through the headers at the top (TIME, LOCATION, FILES) in order to get to the .hdf file.
I wrote a hiscore checker for a game that I play, basically you enter a list of usernames into the .txt file & it outputs the results in found.txt.
However if the page responds a 404 it throws an error instead of returning output as " 0 " & continuing with the list.
Example of script,
#!/usr/bin/python
import urllib2
def get_total(username):
try:
req = urllib2.Request('http://services.runescape.com/m=hiscore/index_lite.ws?player=' + username)
res = urllib2.urlopen(req).read()
parts = res.split(',')
return parts[1]
except urllib2.HTTPError, e:
if e.code == 404:
return "0"
except:
return "err"
filename = "check.txt"
accs = []
handler = open(filename)
for entry in handler.read().split('\n'):
if "No Displayname" not in entry:
accs.append(entry)
handler.close()
for account in accs:
display_name = account.split(':')[len(account.split(':')) - 1]
total = get_total(display_name)
if "err" not in total:
rStr = account + ' - ' + total
handler = open('tried.txt', 'a')
handler.write(rStr + '\n')
handler.close()
if total != "0" and total != "49":
handler = open('found.txt', 'a')
handler.write(rStr + '\n')
handler.close()
print rStr
else:
print "Error searching"
accs.append(account)
print "Done"
HTTPERROR exception that doesn't seem to be working,
except urllib2.HTTPError, e:
if e.code == 404:
return "0"
except:
return "err"
Error response shown below.
Now I understand the error shown doesn't seem to be related to a response of 404, however this only occurs with users that return a 404 response from the request, any other request works fine. So I can assume the issue is within the 404 response exception.
I believe the issue may lay in the fact that the 404 is a custom page which you get redirected too?
so the original page is " example.com/index.php " but the 404 is " example.com/error.php "?
Not sure how to fix.
For testing purposes, format to use is,
ID:USER:DISPLAY
which is placed into check.txt
It seems that total can end up being None. In that case you can't check that it has 'err' in it. To fix the crash, try changing that line to:
if total is not None and "err" not in total:
To be more specific, get_total is returning None, which means that either
parts[1] is None or
except urllib2.HTTPError, e: is executed but e.code is not 404.
In the latter case None is returned as the exception is caught but you're only dealing with the very specific 404 case and ignoring other cases.
I'm a complete beginner in Python, trying to get a script to work, but I'm a little at loss on where it goes wrong. From reading other posts it seems result hasn't been mentioned before and it doesn't know how to deal with the results.
I'm running Python 2.7.11 on EL Capitan
python ytc.py -v YQHsXMglC9A [*] Retrieving video ID: YQHsXMglC9A [*]
Thumbnails retrieved. Now submitting to TinEye. [*] Searching TinEye
for: https://i.ytimg.com/vi/YQHsXMglC9A/default.jpg Traceback (most
recent call last): File "ytc.py", line 72, in <module>
if result.total_results: NameError: name 'result' is not defined
This is the script:
import argparse
import requests
import json
from pytineye import TinEyeAPIRequest
tineye = TinEyeAPIRequest('http://api.tineye.com/rest/','PUBLICKEY','PRIVATEKEY')
youtube_key = "MY-API"
ap = argparse.ArgumentParser()
ap.add_argument("-v","--videoID", required=True,help="The videoID of the YouTube video. For example: https://www.youtube.com/watch?v=VIDEOID")
args = vars(ap.parse_args())
video_id = args['videoID']
#
# Retrieve the video details based on videoID
#
def youtube_video_details(video_id):
api_url = "https://www.googleapis.com/youtube/v3/videos?part=snippet%2CrecordingDetails&"
api_url += "id=%s&" % video_id
api_url += "key=%s" % youtube_key
response = requests.get(api_url)
if response.status_code == 200:
results = json.loads(response.content)
return results
return None
print "[*] Retrieving video ID: %s" % video_id
video_data = youtube_video_details(video_id)
thumbnails = video_data['items'][0]['snippet']['thumbnails']
print "[*] Thumbnails retrieved. Now submitting to TinEye."
url_list = []
# add the thumbnails from the API to the list
for thumbnail in thumbnails:
url_list.append(thumbnails[thumbnail]['url'])
# build the manual URLS
for count in range(4):
url = "http://img.youtube.com/vi/%s/%d.jpg" % (video_id,count)
url_list.append(url)
results = []
# now walk over the list of URLs and search TinEye
for url in url_list:
print "[*] Searching TinEye for: %s" % url
try:
result = tineye.search_url(url)
except:
pass
if result.total_results:
results.extend(result.matches)
result_urls = []
dates = {}
for match in results:
for link in match.backlinks:
if link.url not in result_urls:
result_urls.append(link.url)
dates[link.crawl_date] = link.url
print
print "[*] Discovered %d unique URLs with image matches." % len(result_urls)
for url in result_urls:
print url
oldest_date = sorted(dates.keys())
print
print "[*] Oldest match was crawled on %s at %s" % (str(oldest_date[0]),dates[oldest_date[0]])
If the try-except fails, it will execute the except block which has only pass, no assignment of variable result, so if that's the case, on if result.total_results, you are referencing an object which does not exist.
This should be a quick fix
try:
result = tineye.search_url(url)
except NameError:
print 'Nothing Found !'
break
if result.total_results:
results.extend(result.matches)
The error is clear, result variable is used while its not defined. this happen in the case of failure in your try except instruction.
Fix it by moving the instruction into the try block :
for url in url_list:
print "[*] Searching TinEye for: %s" % url
try:
result = tineye.search_url(url)
if result.total_results:
results.extend(result.matches)
except:
pass
I need to pass a query to a function. It should be really simple! here is the code:
def get_file_ID(q):
dump = drive_service.files().list(q=q, fields = 'items(mimeType,id,title,downloadUrl)').execute()
fileItems = dump['items']
for item in fileItems:
try:
if item['mimeType'] == "application/octet-stream":
return item['id']
except KeyError:
print "No item of the required type, or item has been deleted"
return None
def lets_go_get(file_Id):
f = drive_service.files().get(fileId=file_Id).execute()
resp, content = drive_service._http.request(f.get('id'))
if resp.status == 200:
#print 'Status: %s' % resp
return content
else:
print 'An error occurred: %s' % resp
return None
text = raw_input('Enter title search text: ')
query = "title contains '" + text +"'"
selectedFile = "'" + get_file_ID(query) +"'"
print lets_go_get(selectedFile)
I get the following error:
HttpError: <HttpError 404 when requesting https://www.googleapis.com/drive/v2/files/%270BxDfMkL6x0wjMS03Nz.....Tk%27?alt=json returned "File not found: '0BxDfMkL6x0wjMS03.....MTU5NjQ0ZjdhOTk'">
But when i substitute the last call to 'lets_go_get' with the DIRECT fileID like this:
print lets_go_get('0BxDfMkL6x0wjMS03.....MTU5NjQ0ZjdhOTk')
then it works.
I can't see any difference between the two. can anyone explain what I'm missing???
Many thanks.
Use downloadUrl to download the contents:
f = drive_service.files().get(fileId=file_Id).execute()
resp, content = drive_service._http.request(f.get('downloadUrl'))
Docs have a working sample on the "Python" tab: https://developers.google.com/drive/v2/reference/files/get