Python Pixabay API - python

I write a script to download the 20000 images from Pixabay using Pixabay API. But the problem arises after scraping the 600th images and shows the following error:
"File "c:/Users/Dell/Desktop/python-pixabay-master/python-pixabay-master/main.py", line 26, in
pretty="true"
File "c:\Users\Dell\Desktop\python-pixabay-master\python-pixabay-master\pixabay.py", line 144, in search
raise ValueError(resp.text)
ValueError: [ERROR 400] "page" is out of valid range."
Code:
from pixabay import Image, Video
import pprint
import requests
import shutil
API_KEY = 'myAPIkeys'
image = Image(API_KEY)
j=1
for n in range(1,100):
ims = image.search(
q="education",
lang="en",
image_type="all",
orientation="all",
category="education",
min_width=0,
min_height=0,
colors="",
editors_choice="false",
safesearch="false",
order="popular",
page=n,
per_page=200,
callback="",
pretty="true"
)
#hits=ims['total']
#print(hits)
#print(ims)
#pp=pprint.PrettyPrinter(indent=4)
for i in range(0,200):
payload=ims['hits'][i]['largeImageURL']
resp = requests.get(payload, stream=True)
local_file = open(str(j)+"local_image.jpg", 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
print(str(j)+"URL of image: {}".format(payload))
j=j+1
#urllib.request.urlretrieve(payload,i)
#pp.pprint(ims)

#Pakshadow Hi I have tried your API in postman, You can see that in the API there is the one parameter page, So In your case your all the images are cover till page 3 so that on page 4 it's giving page is out of valid range error from pixabay.
you can try with this postman link: https://www.getpostman.com/collections/2823a8aad5ea81b55342
Import it and check that.
You can handle this error using exception.
from pixabay import Image
import requests
import shutil
API_KEY = 'API_KEY'
image = Image(API_KEY)
j=1
for n in range(1,100):
try:
ims = image.search(
q="education",
lang="en",
image_type="all",
orientation="all",
category="education",
min_width=0,
min_height=0,
colors="",
editors_choice="false",
safesearch="false",
order="popular",
page=n,
per_page=200,
callback="",
pretty="true"
)
for i in range(0, 200):
payload = ims['hits'][i]['largeImageURL']
resp = requests.get(payload, stream=True)
local_file = open(str(j) + "local_image.jpg", 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
print(str(j) + "URL of image: {}".format(payload))
j = j + 1
# urllib.request.urlretrieve(payload,i)
# pp.pprint(ims)
except Exception as e:
print(e)

Related

OpenCV put text on raw files

I need help to put a timestamp on images taken by an IP camera, I made a version of the code that save the image as a local file, then opencv write on it, now i'd like to write on it without saving it before, but i'm stucked.
This is the code:
import requests
import time
import shutil
import os
from requests.auth import HTTPBasicAuth
import cv2
from datetime import datetime
from datetime import date
import numpy as np
url = 'http://192.168.0.138/image.jpg?cidx=366981845'
user = '****'
psw = '****'
path = 'ipCameraScreen.png'
font = cv2.FONT_HERSHEY_SIMPLEX
position = (20, 30)
color = (255, 255, 255)
font_size = 0.5
font_stroke = 1
while True:
response = requests.get(url, auth=HTTPBasicAuth(user, psw), stream = True)
resp_raw = response.raw
if response.status_code == 200:
# DEFINE TIMESTAMP
current_date = str(date.today())
current_time = datetime.now().strftime("%H:%M:%S")
timestamp = (current_date + " | " + current_time)
with open(path, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
img = np.asarray(bytearray(resp_raw.read()), dtype="uint8")
img = cv2.imdecode(img, cv2.IMREAD_COLOR)
cv2.putText(img, timestamp, position, font, font_size, color, font_stroke)
cv2.imwrite(path, img)
print("Screen saved, path:", path)
time.sleep(3)
else:
print("Connection failed")
time.sleep(3)
if os.path.exists(path):
os.remove(path)
print("image removed")
time.sleep(2)
The output is:
cv2.error: OpenCV(4.5.2) C:\Users\runneradmin\AppData\Local\Temp\pip-req-build-m8us58q4\opencv\modules\imgcodecs\src\loadsave.cpp:736: error: (-215:Assertion failed) !buf.empty() in function 'cv::imdecode_'
Plz help me and sorry for easy english
Your error states that when you are trying to decode the array into img using imdecode(), the buffer is empty.
This means that in the previous line:
img = np.asarray(bytearray(resp_raw.read()), dtype="uint8")
The resp_raw.read() gives you an empty array.
I would analyze the GET-request that you perform with a tool like Postman, or just try to print the contents of resp_raw because it seems like it does not return you an image.
I solved, i just re-did the request to define resp_raw, i did this:
response = requests.get(url, auth=HTTPBasicAuth(user, psw), stream = True)
resp_raw = requests.get(url, auth=HTTPBasicAuth(user, psw), stream = True).raw
instead of this:
response = requests.get(url, auth=HTTPBasicAuth(user, psw), stream = True)
resp_raw = response.raw

Python download image from URL efficiently

I have a list of image URL that are stored in a Pandas Dataframe. I want to download all of these images and store them localy.
The code I use to do so is :
import os
import requests
def load(df, output_folder):
print("Ready to load "+str(len(df.index))+" images.")
for i,row in df.iterrows():
print("Image "+str(i))
save_image_from_url(row["image_url"], os.path.join(output_folder, row["image_name"]))
''' From a given URL, download the image and store it at the given path'''
def save_image_from_url(url, output_path):
image = requests.get(url)
with open(output_path, 'wb') as f:
f.write(image.content)
The problem is that the process is very slow (from 0.5 seconds to 4 seconds per images). Is there a way to do it faster ?
The obvious way is to parallelize the downloads, you have a clear example in the docs
For your case, try this aproach:
import concurrent.futures
import os
import requests
def save_image_from_url(url, output_folder):
image = requests.get(url.image_url)
output_path = os.path.join(
output_folder, url.image_name
)
with open(output_path, "wb") as f:
f.write(image.content)
def load(df, output_folder):
with concurrent.futures.ThreadPoolExecutor(
max_workers=5
) as executor:
future_to_url = {
executor.submit(save_image_from_url, url, output_folder): url
for _, url in df.iterrows()
}
for future in concurrent.futures.as_completed(
future_to_url
):
url = future_to_url[future]
try:
future.result()
except Exception as exc:
print(
"%r generated an exception: %s" % (url, exc)
)

Change a while true python script to run only once

I'm new to python and I want this code to run only once and stops, not every 30 seconds
because I want to run multiple codes like this with different access tokens every 5 seconds using the command line.
and when I tried this code it never jumps to the second one because it's a while true:
import requests
import time
api_url = "https://graph.facebook.com/v2.9/"
access_token = "access token"
graph_url = "site url"
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
# Beware of rate limiting if trying to increase frequency.
refresh_rate = 30 # refresh rate in second
while True:
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open ("open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
time.sleep(refresh_rate)
From what I understood you're trying to execute the piece of code for multiple access tokens. To make your job simple, have all your access_tokens as lists and use the following code. It assumes that you know all your access_tokens in advance.
import requests
import time
def scrape_facebook(api_url, access_token, graph_url):
""" Scrapes the given access token"""
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open (access_token+"_"+"open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
access_token = ['a','b','c']
graph_url = ['sss','xxx','ppp']
api_url = "https://graph.facebook.com/v2.9/"
for n in range(len(graph_url)):
scrape_facebook(api_url, access_token[n], graph_url[n])
time.sleep(5)

Dwnload map image with python

I am trying to download map image in python with urllib module. But it always failed.
I'm tried to use urllib.urlopen() with some parameter variants
tried in urllib.urlretrieve()
But it doesn't work. And, When I see the source code of image url, I didn't find image file. Here is image: https://maps.googleapis.com/maps/api/staticmap?center=31.0456,121.3997&zoom=12&size=320x385&sensor=false
Source code:
#-------------------------- PARSE IP ADDRESS -------------------------------
import re
import urllib
try:
mysite = urllib.urlopen('http://ip-api.com/line')
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
list_of_params = mysite.read()
print list_of_params
ip_arr = list_of_params.splitlines()
#--------------------- HERE IS FIND MAP IMAGE --------------------------------------
try:
map_page = urllib.urlopen('http://ip-api.com')
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
#f = open("data.html", "w")
#f.write(str(mysite.read()))
#f.close()
#looking for this in page
pattern = re.findall(re.compile("url\(\'(https://maps\.googleapis\.com/maps/api/staticmap\?center=.*)\'"), page_get_map.read())
map_img_url = pattern[0].replace('&', '&')
#------------------- DOWNLOAD MAP IMAGE And SAVE IT ------------------------
#file_name = map_img_url.rsplit('/',1)[1]
try:
get_map_img = urllib.urlretrieve(map_img_url, "staticmap.png")
except urllib.HTTPError, e:
print "Cannot retrieve URL: HTTP Error Code", e.code
except urllib.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
i = open("pict.png", "w")
i.write(get_map_img.read())
i.close()
print "End of file"
import requests
f=open('static.png','wb')
f.write(requests.get('https://maps.googleapis.com/maps/api/staticmap?center=31.0456,121.3997&zoom=12&size=320x385&sensor=false').content)
f.close()
Why are you parsing the map URL? Construct it yourself:
import json, urllib
query = '' # IP to get coordinates of, leave empty for current IP
geo = urllib.urlopen('http://ip-api.com/json/%s?fields=240' % query)
result = json.load(geo)
if result['zip']:
zoom = 13
elif result['city']:
zoom = 12
else:
zoom = 6
map_img_url = "https://maps.googleapis.com/maps/api/staticmap?center=%s,%s&zoom=%i&size=320x385&sensor=false" % (result['lat'], result['lon'], zoom)
get_map_img = urllib.urlretrieve(map_img_url, "staticmap.png")

Delete lines from a txt file after read

i'm trying to create a script which makes requests to random urls from a txt file
import urllib2
with open('urls.txt') as urls:
for url in urls:
try:
r = urllib2.urlopen(url)
except urllib2.URLError as e:
r = e
if r.code in (200, 401):
print '[{}]: '.format(url), "Up!"
elif r.code == 404:
print '[{}]: '.format(url), "Not Found!"
But I want that when some url does 404 not found erase from the file. Each url is per line, so basically is to erase every url that does 404 not found. How to do it?!
You could write to a second file:
import urllib2
with open('urls.txt', 'r') as urls, open('urls2.txt', 'w') as urls2:
for url in urls:
try:
r = urllib2.urlopen(url)
except urllib2.URLError as e:
r = e
if r.code in (200, 401):
print '[{}]: '.format(url), "Up!"
urls2.write(url + '\n')
elif r.code == 404:
print '[{}]: '.format(url), "Not Found!"
In order to delete lines from a file, you have to rewrite the entire contents of the file. The safest way to do that is to write out a new file in the same directory and then rename it over the old file. I'd modify your code like this:
import os
import sys
import tempfile
import urllib2
good_urls = set()
with open('urls.txt') as urls:
for url in urls:
try:
r = urllib2.urlopen(url)
except urllib2.URLError as e:
r = e
if r.code in (200, 401):
sys.stdout.write('[{}]: Up!\n'.format(url))
good_urls.add(url)
elif r.code == 404:
sys.stdout.write('[{}]: Not found!\n'.format(url))
else:
sys.stdout.write('[{}]: Unexpected response code {}\n'.format(url, r.code))
tmp = None
try:
tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', dir='.', delete=False)
for url in sorted(good_urls):
tmp.write(url + "\n")
tmp.close()
os.rename(tmp.name, 'urls.txt')
tmp = None
finally:
if tmp is not None:
os.unlink(tmp.name)
You may want to add a good_urls.add(url) to the else clause in the first loop. If anyone knows a tidier way to do what I did with try-finally there at the end, I'd like to hear about it.

Categories