please help to fix the script.
import urllib
import re
import os
import pprint
import requests
import bs4
def make_catalog():
try:
os.mkdir('GRAB')
except FileExistsError:
print('FileExistsError')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def change_catalog():
try:
os.chdir('GRAB')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
f = open(name, "wb")
if f:
print('open!!!')
if f.write(img):
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
finally:
f.close()
beginIndex = 5794
endIndex = 5800
prefix = "http://www.inpic.ru"
rep_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '-' , ' ']
make_catalog()
change_catalog()
for i in range(beginIndex, endIndex):
req = requests.get(prefix + '/image/' + str(i))
if req.status_code == requests.codes.ok:
#print(i, '\t', req.status_code, '\t', req, end='\n')
soup = bs4.BeautifulSoup(req.content)
#print(soup.prettify())
name = soup.find("td", {"class": "post_title"}).contents[1].contents
#author = soup.find("div", {"class": "date_author"}).contents[1].contents
print('NAME: ', name[0])
#print(author[0])
#name[0] = re.sub('[\\\\/:*?"<>|-]', '_', name[0])
for char in rep_chars:
name[0] = name[0].replace(char, '_')
print('newNAME: ', name[0])
mainImagePath = soup.find("img", {"class": "image last"})["src"]
mainImageExt = mainImagePath.split('.')[-1]
manyImages = soup.findAll("img", {"class": "image"})
print('MAINUMAGE: ', mainImagePath)
print('MAINIMAGE EXT: ',mainImageExt)
print('MANYIMAGE: \n')
pprint.pprint(manyImages)
if len(manyImages) > 1:
print('CATALOG MAKE')
try:
os.mkdir(name[0])
#except FileExistsError:
#print('FileExistsError')
except PermissionError :
print('PermissionError')
except Exception:
print(Exception)
os.chdir(name[0])
#download_image(mainImagePath, str(name[0]) + '_0.' + mainImageExt)
i = 0
for name in manyImages:
#print(name['src'], end='------------\n')
download_image(name['src'], str(name['src']))
i = i + 1
os.chdir('../')
else:
print('IMAGE MAKE')
download_image(mainImagePath, str(name[0]) + '.' + mainImageExt)
print('mainImagePath', mainImagePath)
print('name', str(name[0]) + '.' + mainImageExt)
print('==================================')
the problem that when recording images from the page group
http://www.inpic.ru/image/5797/
displays the following error message:
Traceback (most recent call last):
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 98, in <module>
download_image(name['src'], str(name['src']))
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 46, in download_image
f.close()
UnboundLocalError: local variable 'f' referenced before assignment
You are trying to close a file that failed to open. f was never assigned to because the open() call raised an exception.
Instead of closing the file object in the finally handler, use it as a context manager:
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
with open(name, "wb") as f:
print('open!!!')
f.write(img)
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
Here the with statement will ensure that f is closed for you if it was opened successfully, whatever happens.
Related
Hello I am running a script where we get data from certain websites and put them in a database using scrapy, the script runs well but in log text file it shows ''DEBUG: Starting new HTTPS connection (1): 1.rome.api.flipkart.com:443''
what can be the real problem here, any help is appreciated, here is the code
import os
import sys, getopt
import time
import datetime
import pytz
import mysql.connector
import configparser
import shutil
import time
import concurrent.futures
import pandas as pd
currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
from datetime import datetime
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess
from multiprocessing import Process
from db.db_action import DBAction
from utils import utils
from concurrent.futures import ThreadPoolExecutor, as_completed
tz = pytz.timezone("Asia/Kolkata")
crawl_inputs = dict()
crawl_inputs["env"] = "prod"
crawl_inputs["marketplace"] = "Amazon"
crawl_inputs["site"] = "amz_kw"
crawl_inputs["db_name"] = "asian_paints"
crawl_inputs["pf_id"] = "1"
crawl_inputs["location_search"] = "0"
crawl_inputs["limit"] = ""
crawl_inputs["page"] = 1
crawl_inputs["kw_snapshot"] = 0
crawl_inputs["pdp_snapshot"] = 0
crawl_inputs["quick_search"] = 1
crawl_inputs["client_id"] = 1241
crawl_inputs["client"] = "asian_paints"
crawl_inputs["start_time"] = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
db_action = DBAction()
actual_count = 0
result_count = 0
archived_path = "C:/archives/"
connection = None
cursor = None
# Directory Create
try:
if not os.path.exists(archived_path):
os.makedirs(archived_path)
if not os.path.exists(archived_path + datetime.now(tz).strftime("%Y%m%d")+"/"+ crawl_inputs["site"]):
os.makedirs(archived_path + datetime.now(tz).strftime("%Y%m%d")+"/"+ crawl_inputs["site"])
if not os.path.exists("C:/var/logs/" + crawl_inputs["site"]):
os.makedirs("C:/var/logs/" + crawl_inputs["site"])
shutil.move("C:/var/logs/" + crawl_inputs["site"], archived_path + datetime.now(tz).strftime("%Y%m%d"), copy_function = shutil.copytree)
except Exception as e:
print(e)
print("File creation error: {0}:{1}".format(e.errno, e.strerror))
try:
if os.name == "nt":
log_path = "C:/var/logs"
base_dir = log_path+"/"+crawl_inputs["site"]
if not os.path.exists(base_dir):
os.makedirs(base_dir)
else:
log_path = "/var/logs/"
base_dir = log_path+"/"+crawl_inputs["site"]
if not os.path.exists(base_dir):
os.makedirs(base_dir)
directories = ["output", "run_log", "webpages"]
for directory in directories:
if not os.path.exists(base_dir+"/"+directory):
os.makedirs(base_dir+"/"+directory)
except OSError as oserr:
print("OS error occurred trying to open. Aborting.. Error{0}:{1}".format(oserr.errno, oserr.strerror))
sys.exit(1)
except IOError as ioerr:
print("I/O Error{0}: {1}".format(ioerr.errno, ioerr.strerror))
sys.exit(1)
except FileNotFoundError as fnfe:
print("File not found. Aborting.. Error: {0}:{1}".format(fnfe.errno, fnfe.strerror))
sys.exit(1)
except Exception as e:
print("File creation Error. Aborting.. Error: {0}:{1}".format(e.errno, e.strerror))
sys.exit(1)
crawl_inputs = db_action.get_kw_platform_inputs(crawl_inputs)
print(f"Total Executing Inputs : {len(crawl_inputs['inputs'])}")
print("Crawl ID: {0}".format(crawl_inputs["crawl_id"]))
def start(input):
pf_id = str(input["pf_id"])
keyword = str(input["keyword"])
brand_id = str(input["brand_id"])
brand_name = str(input["brand_name"])
keyword_id = str(input["keyword_id"])
location_id = str(input["location_id"])
location = str(input["location"])
pincode = str(input["pincode"])
location_search = str(crawl_inputs["location_search"])
env = str(crawl_inputs["env"])
db_name = str(crawl_inputs["db_name"])
crawl_id = str(crawl_inputs["crawl_id"])
site = str(crawl_inputs["site"])
page = str(crawl_inputs["page"])
command = 'python ' +currentdir+ '/main_kw.py --env="' +env+ '" --db_name="' +db_name+ '" --crawl_id="' +crawl_id+ '" --site="' +site+ '" --pf_id="'+pf_id+ '" --brand_id="' +brand_id+ '" --brand_name="' +brand_name+ '" --keyword_id="' +keyword_id+ '" --keyword="' +keyword+ '" --location_id="' +location_id+ '" --location="' +location+ '" --pincode="' +str(pincode)+ '" --page="' +page+ '" --location_search="' +location_search+ '"'
print("Executing Input :{0}".format(command))
os.system(command)
def runner():
threads = []
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
for input in crawl_inputs.get("inputs"):
print(f"Input: {input}")
task = executor.submit(start, input)
threads.append(task)
for task in concurrent.futures.as_completed(threads):
print(task.result())
runner()
time.sleep(5)
crawl_inputs["finish_time"] = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
connection = db_action.db_connection(db_name=crawl_inputs["db_name"], env=crawl_inputs["env"])
cursor = connection.cursor()
try:
cursor.execute("update `amazon_crawl_kw` set `status` = 0 where `crawl_id` != " +str(crawl_inputs["crawl_id"])+ ";")
cursor.execute("select count(kw_crawl_data_id) as product_count from `amazon_crawl_kw` where status = 1 and pf_id = "+str(crawl_inputs["pf_id"])+ " and crawl_id=" +str(crawl_inputs["crawl_id"])+ ";")
row = cursor.fetchone()
print("row value: "+ str(row))
result_count = row["product_count"]
print("Crawled row count : " + str(result_count))
try:
sql2 = 'UPDATE rb_crawl SET status=1, end_time = "' +str(crawl_inputs["finish_time"])+ '" ,no_of_sku_parsed='+ str(result_count)+ ' WHERE crawl_id=' + str(crawl_inputs["crawl_id"])
cursor.execute(sql2)
except Exception as e:
print("The following exception occured while updating : "+ str(e))
sql3 = 'UPDATE rb_platform SET kw_crawl_data_date = "'+ str(crawl_inputs["start_time"]) + '", kw_crawl_data_id = ' +str(crawl_inputs["crawl_id"])+ ' WHERE pf_id = ' + str(crawl_inputs["pf_id"])
cursor.execute(sql3)
connection.commit()
connection.close()
print("Updated rb_platform successfully")
except Exception as e:
print("Updating crawl id failed with exception as :" + str(e))
# try:
# items_count = result_count
# subject = crawl_inputs["site"] +" crawling completed"
# body = "Hi Team,<br><br>" +crawl_inputs["site"]+ " crawling successfully completed for the plateform " +crawl_inputs["marketplace"]+ "...<br>Platform Id: " +str(crawl_inputs["pf_id"])+ "<br>Crawl Id: " +str(crawl_inputs["crawl_id"])+ "<br>Total crawled items: " +str(items_count)+ " <br>Total Actual Items: " + str(actual_count) +" <br>Please QC the data value..<br><br>Thanks<br>Trailytics Team"
# utils.send_mail("no-reply#trailytics.com", "vijay.kothawar#trailytics.com;ashish.rawat#trailytics.com;anirudh.varshney#trailytics.com;ashutosh.shukla#trailytics.com", subject, body)
# print("Crawling process has been completed")
# except Exception as e:
# print("Mail Sending error:" + str(e))
print("Finish")
I want to get the "profile_pic_id" from the json list
I can get the 'follower_count' , 'following_count' and the 'username'
import requests
import json
import re
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name']['profile_pic_url'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
followingCount=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
I expect the output is followers, following and profile_pic_url, but the actual is follower and following only
'''
import requests
import json
import re
#'https://www.instagram.com/web/search/topsearch/?query={query}' para averiguar el pk
print("Colocar tu PK:")
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
profile_pic_url=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
'''
I am building a class to download files asynchronously.However i am facing a weird bug.
import pandas as pd
import requests
from requests_futures.sessions import FuturesSession
import os
import pathlib
class AsyncDownloader:
"""Download files asynchronously"""
__urls = set()
__dest_path = None
__user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
__read_timeout = 60
__connection_timeout = 30
def setSourceCSV(self, source_path, column_name):
self.source_path = source_path
self.column_name = column_name
try:
my_csv = pd.read_csv(source_path, usecols=[self.column_name], chunksize=10)
except ValueError:
print("The column name doesn't exist")
return
else:
# No exception whatsoever
for chunk in my_csv:
AsyncDownloader.__urls.update(set(getattr(chunk, self.column_name)))
def setDestinationPath(self, dest_path):
if dest_path.endswith('/'):
dest_path = dest_path[:-1]
self.dest_path = dest_path
# Make directory if not exist
# TODO Add exception in case we can't create the directory
pathlib.Path(self.dest_path).mkdir(parents=True, exist_ok=True)
if os.access(self.dest_path, os.W_OK):
AsyncDownloader.__dest_path = pathlib.Path(self.dest_path).resolve()
def setUserAgent(self, useragent):
self.useragent = useragent
AsyncDownloader.__user_agent = self.useragent
def setConnectionTimeout(self, ctimeout_secs):
self.timeout_secs = ctimeout_secs
AsyncDownloader.__connection_timeout = self.timeout_secs
def setReadTimeout(self, rtimeout_secs):
self.timeout_secs = rtimeout_secs
AsyncDownloader.__read_timeout = self.timeout_secs
def download(self):
try:
session = FuturesSession(max_workers=10)
session.headers.update({'user-agent': AsyncDownloader.__user_agent})
session.request(AsyncDownloader.__connection_timeout,
AsyncDownloader.__connection_timeout)
results = []
for url in AsyncDownloader.__urls:
results.append(session.get(url))
for result in results:
response = result.result()
filename = os.path.basename(response.url)
if AsyncDownloader.__dest_path is None:
AsyncDownloader.__dest_path = pathlib.Path(filename)
else:
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path) + os.path.sep + filename).resolve()
# save file in directory
print(AsyncDownloader.__dest_path) # Shows correct path
with open(AsyncDownloader.__dest_path, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("OOps: Something Else", err)
else:
return
def printURLs(self):
print(AsyncDownloader.__urls)
The print shows the correct path which is
C:\Users\XYZ\PycharmProjects\AsyncDownloaderTest\images\Spring-Landscape-HD-Wallpapers-25912.jpg
However open sees the wrong path
with open(AsyncDownloader.__dest_path, 'wb') as fd:
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\XYZ\\PycharmProjects\\AsyncDownloaderTest\\images\\Spring-Landscape-HD-Wallpapers-25912.jpg\\FUE7XiFApEqWZQ85wYcAfM.jpg'`
I think the identation is OK so I wonder what's wrong.
Change:
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path)
+ os.path.sep + filename).resolve()
to:
AsyncDownloader.__dest_path = pathlib.Path(
os.path.split(str(AsyncDownloader.__dest_path))[0] + os.path.sep + filename).resolve()
This adds the new file name to directory instead to the full path name of the previous file.
Change the following line
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path)
+ os.path.sep + filename).resolve()
to:
AsyncDownloader.__dest_path = pathlib.Path(os.path.join(os.path.dirname(AsyncDownloader.__dest_path), filename)).resolve()
This question already has answers here:
IndentationError: unindent does not match any outer indentation level
(32 answers)
Closed 6 years ago.
I keep on getting the same error:
File "backup.py", line 26
logging.error("Unable to create backup.zip")
IndentationError: unindent does not match any outer indentation level
This is my script:
import sys
import os
import logging
logging.basicConfig(filename='file_ex.log', level = logging.DEBUG)
logging.info("checking to see if the backup.zip exists")
if os.path.exists("backup.zip"):
logging.info("It exists!")
try:
zip_file = zipfile.ZipFile('backup.zip','a')
except:
err = sys.exc_info()
logging.error("Unable to open backup.zip in append mode")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " = err[1].args[1])
sys.exit()
else:
logging.info("Creating backup.zip")
try:
zip_file = zipfile.ZipFile('backup.zip', 'w')
except:
err = sys.exc_info()
logging.error("Unable to create backup.zip")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " + err[1].args[1])
sys.exit()
else:
logging.info("Creating backup.zip")
try:
zip_file = zipfile.ZipFile('backup.zip', 'w')
except:
err = sys.exc_info()
logging.error("Unable to create backup.zip")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " + err[1].args[1])
logging.info("adding test.txt to backup.zip")
try:
zip_file.write('test.txt', 'test.txt', zipfile.ZIP_DEFLATED)
except:
err = sys.exc_info()
logging.error("Unable to open backup.zip in append mode")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " = err[1].args[1])
zip_file.close()
You have errors on line 17 and 48. logging.error("Error Msg: " = err[1].args[1])
Try fix your indentation.
It is hard from your example to see exactly where your try except statements belong but I have done a code snippet with what I believe would be correct indentation based on what I believe you are trying to do.
import sys
import os
import logging
logging.basicConfig(filename='file_ex.log', level = logging.DEBUG)
logging.info("checking to see if the backup.zip exists")
if os.path.exists("backup.zip"):
logging.info("It exists!")
try:
zip_file = zipfile.ZipFile('backup.zip','a')
except:
err = sys.exc_info()
logging.error("Unable to open backup.zip in append mode")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " = err[1].args[1])
sys.exit()
else:
logging.info("Creating backup.zip")
try:
zip_file = zipfile.ZipFile('backup.zip', 'w')
except:
err = sys.exc_info()
logging.error("Unable to create backup.zip")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " + err[1].args[1])
sys.exit()
else:
logging.info("Creating backup.zip")
try:
zip_file = zipfile.ZipFile('backup.zip', 'w')
except:
err = sys.exc_info()
logging.error("Unable to create backup.zip")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " + err[1].args[1])
logging.info("adding test.txt to backup.zip")
try:
zip_file.write('test.txt', 'test.txt', zipfile.ZIP_DEFLATED)
except:
err = sys.exc_info()
logging.error("Unable to open backup.zip in append mode")
logging.error("Error Num: " + str(err[1].args[0]))
logging.error("Error Msg: " = err[1].args[1])
zip_file.close()
My facebook app lets the user upload an image, select what the image is (eyes, nose, mouth or other body part) and then can combine by selecting three random images by category and that's works fine and the code looks ok and readable though not very advanced:
class CyberFazeHandler(BaseHandler):
def get_random_image(self, category):
fileinfos = FileInfo.all().filter("category =", category)
return fileinfos[random.randint(0, fileinfos.count()-1)]
def get(self):
eyes_image = self.get_random_image(category="eyes")
nose_image = self.get_random_image(category="nose")
mouth_image = self.get_random_image(category="mouth")
eyes_data = None
try:
eyes_data = blobstore.fetch_data(eyes_image.blob.key(), 0, 50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find eyes data for file '+str(eyes_image.key().id())+' (' + unicode(e) + u')')
eyes_img = None
try:
eyes_img = images.Image(image_data=eyes_data)
...now I just fetch 3 random images and then combine then in the template:
<img src="{{eyes_url}}"><br>
<img src="{{nose_url}}"><br>
<img src="{{mouth_url}}">
Could this be improved by sending a composite image combining the three images into one? With the advantage that everything on the image will load at the same time and that it will be saved already if a randomization comes up next time the result is already saved. What do you think?
Thank you (the application is apps.facebook.com/cyberfaze you may inspect I did for fun and learning)
The entire class is
class CyberFazeHandler(BaseHandler):
def get_random_image(self, category):
fileinfos = FileInfo.all().filter("category =", category)
return fileinfos[random.randint(0, fileinfos.count()-1)] #optimize
def get(self):
eyes_image = self.get_random_image(category="eyes")
nose_image = self.get_random_image(category="nose")
mouth_image = self.get_random_image(category="mouth")
eyes_data = None
try:
eyes_data = blobstore.fetch_data(eyes_image.blob.key(), 0, 50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find eyes data for file '+str(eyes_image.key().id())+' (' + unicode(e) + u')')
eyes_img = None
try:
eyes_img = images.Image(image_data=eyes_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find eyes img for file '+str(eyes_image.key().id())+' (' + unicode(e) + u')')
nose_data = None
try:
nose_data = blobstore.fetch_data(nose_image.blob.key(), 0, 50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find nose data for file '+str(nose_image.key().id())+' (' + unicode(e) + u')')
nose_img = None
try:
nose_img = images.Image(image_data=nose_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find nose img for file '+str(nose_image.key().id())+' (' + unicode(e) + u')')
mouth_data = None
try:
mouth_data = blobstore.fetch_data(mouth_image.blob.key(), 0, 50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find mouth data for file '+str(eyes_image.key().id())+' (' + unicode(e) + u')')
mouth_img = None
try:
mouth_img = images.Image(image_data=mouth_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find mouth img for file '+str(mouth_image.key().id())+' (' + unicode(e) + u')')
minimum = min(int(eyes_img.width), int(nose_img.width), int(mouth_img.width))
eyes_url = images.get_serving_url(str(eyes_image.blob.key()), size=minimum)
nose_url = images.get_serving_url(str(nose_image.blob.key()), size=minimum)
mouth_url = images.get_serving_url(str(mouth_image.blob.key()), size=minimum)
self.render(u'cyberfaze', minimum=minimum, eyes_image=eyes_image, eyes_url=eyes_url, nose_image=nose_image, nose_url=nose_url, mouth_image=mouth_image, mouth_url=mouth_url, form_url = blobstore.create_upload_url('/upload'),)
After rewrite it's working like said:
class CyberFazeHandler(BaseHandler):
def get_random_image(self, category):
q = FileInfo.all()
q.filter('category =', category)
q.filter('randomvalue >=', random.random())
return q.get()
def get_random_image_legacy(self, category):
fileinfos = FileInfo.all().filter('category =', category)
return fileinfos[random.randint(0, fileinfos.count() - 1)]
def get(self):
eyes_image = self.get_random_image(category='eyes')
if not eyes_image:
logging.debug("getting eyes failed, trying legacy method")
eyes_image = self.get_random_image_legacy(category='eyes')
nose_image = self.get_random_image(category='nose')
if not nose_image:
nose_image = self.get_random_image_legacy(category='nose')
mouth_image = self.get_random_image(category='mouth')
if not mouth_image:
mouth_image = self.get_random_image_legacy(category='mouth')
eyes_data = None
try:
eyes_data = blobstore.fetch_data(eyes_image.blob.key(), 0,
50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find eyes data for file '
+ str(eyes_image.key().id()) + ' ('
+ unicode(e) + u')')
eyes_img = None
try:
eyes_img = images.Image(image_data=eyes_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find eyes img for file '
+ str(eyes_image.key().id()) + ' ('
+ unicode(e) + u')')
nose_data = None
try:
nose_data = blobstore.fetch_data(nose_image.blob.key(), 0,
50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find nose data for file '
+ str(nose_image.key().id()) + ' ('
+ unicode(e) + u')')
nose_img = None
try:
nose_img = images.Image(image_data=nose_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find nose img for file '
+ str(nose_image.key().id()) + ' ('
+ unicode(e) + u')')
mouth_data = None
try:
mouth_data = blobstore.fetch_data(mouth_image.blob.key(),
0, 50000)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find mouth data for file '
+ str(eyes_image.key().id()) + ' ('
+ unicode(e) + u')')
mouth_img = None
try:
mouth_img = images.Image(image_data=mouth_data)
except Exception, e:
self.set_message(type=u'error',
content=u'Could not find mouth img for file '
+ str(mouth_image.key().id()) + ' ('
+ unicode(e) + u')')
minimum = min(int(eyes_img.width), int(nose_img.width),
int(mouth_img.width))
eyes_url = images.get_serving_url(str(eyes_image.blob.key()),
size=minimum)
nose_url = images.get_serving_url(str(nose_image.blob.key()),
size=minimum)
mouth_url = images.get_serving_url(str(mouth_image.blob.key()),
size=minimum)
self.render(
u'cyberfaze',
minimum=minimum,
eyes_image=eyes_image,
eyes_url=eyes_url,
nose_image=nose_image,
nose_url=nose_url,
mouth_image=mouth_image,
mouth_url=mouth_url,
form_url=blobstore.create_upload_url('/upload'),
)
Which is more efficient depends on how it'll be used. If the user will be loading a lot of these mashups, it makes more sense to send them as separate images, because there will be fewer images for the browser to cache (a+b+c images instead of a*b*c).
Your code has a much more egregious performance issue, however:
def get_random_image(self, category):
fileinfos = FileInfo.all().filter("category =", category)
return fileinfos[random.randint(0, fileinfos.count()-1)]
Every time you call this function, it will perform a count operation, which is O(n) with the number of FileInfo entities, then perform an offset query, which is O(n) with the offset. This is extremely slow and inefficient, and will get more so as you increase the number of images.
If you expect the set of images to be small (less than a few thousand) and fairly constant, simply store them in code, which will be faster than any other option. If the set is larger, or changes at runtime, assign a random value between 0 and 1 to each entity, and use a query like this to retrieve a randomly selected one:
q = FileInfo.all()
q.filter('category =', category)
q.filter('random >=', random.random())
return q.get()