I am coding a music player in kivymd , I have a class that extracts the song metadata including the image data in bytes and write the bytes to an image file. I must do this for each song and assign the image to a FitImage class. However, the image displayed for each songs happens to be the image for the last song in the list. I need a way to fix this
The extractor class
from mutagen.id3 import ID3
class Infos:
def __init__(song):
self.song = song
self.data = None
def extract_details(self):
# skipped unnecessary details here
#But it returns the title, year, genre, song_length
return title,year,genre,song_length
def get_image_data(self):
tag = None
try:
tag = ID3(self.song)
except:
# does not start with an ID3 tag
pass
if tag is not None:
if "APIC:" in tag.keys():
data = tag['APIC:']
self.data = data.data
else:
data_file = open("assets/images/default.png", 'rb')
self.data = data_file.read()
data_file.close()
else:
# print("No tag")
data_file = open("assets/images/default.png", 'rb')
self.data = data_file.read()
data_file.close()
return self.data
def write_image(self):
try:
image = open("assets/images/album_art.png", "wb")
image.write(self.get_image_data())
image.close()
#print("saved")
except Exception as e:
print(e)
In the function resposible for updating FitImage with the exctracted image
def populate_music_tab(self, content_cls):
"""fuction for adding music in a gridlayout"""
music_list = os.listdir("assets/sample-songs")
item_holder = content_cls.ids.song_holder #MDGridLayout
for song in music_list:
info = Infos("assets/sample/songs/"+song)
title, yr, gnr, length = info.extract_details() # get the song details
info.write_image() # write the image first before loading
image_src = "assets/images/album_art.png"
item = CustomListItem() # MDCard with FitImage,Labels and Buttons
item.ids.song_name = title
item.ids.year = yr
item.ids.song_length = length
item.ids.image_left = image_src # FitImage
holder.add_widget(item)
The code above does this
image output
This is what I want to achieve that each item displays its own unique image present in the song metadata
desired image output
Related
I made a youtube video download Manager. It download a video but i am facing one issue when i download same video, it doesn't download it again. how can i download it again with same title like pic.png and send pic1.png. How can i do that?
def Download(self):
video_url = self.lineEdit.text()
save_location = self.lineEdit_2.text()
if video_url == '' or save_location == '':
QMessageBox.warning(self, "Data Error", "Provide a Valid Video URL or save Location")
else:
video = pafy.new(video_url)
video_stream = video.streams
video_quality = self.comboBox.currentIndex()
download = video_stream[video_quality].download(filepath=save_location, callback=self.Handel_Progress, )
Ok, this one is interesting.
The real problem begins here.
download = video_stream[video_quality].download(filepath=save_location, callback=self.Handel_Progress, )
Here, you are calling download function of video_stream object which takes filepath as an argument for file location but does not take the filename, because, obviously, the file would be saved with the actual name.
Root Cause of your problem:
If you look into the definition of download function, you would find that if a file exists with the same name, it would not download the file at all.
Now comes the part, how do you make sure it downloads, no matter what:
There are two things you need to do:
Check if a file with same name exists or not, and if does, then add 1 in the end of the file name just before the extension. So if abc.mp4 exists, then save abc1.mp4.
[I will tell you how to handle the scenario when abc.mp4, abc1.mp4 and so on exists, but for now, let's get back to the problem.]
How to pass the file name (abc1.mp4) to the download method?
Following piece of code would handle both.
I have added comments for your understanding.
import os
import re
import pafy
from pafy.util import xenc
# this function is used by pafy to generate file name while saving,
# so im using the same function to get the file name which I will use to check
# if file exists or not
# DO NOT CHANGE IT
def generate_filename(title, extension):
max_length = 251
""" Generate filename. """
ok = re.compile(r'[^/]')
if os.name == "nt":
ok = re.compile(r'[^\\/:*?"<>|]')
filename = "".join(x if ok.match(x) else "_" for x in title)
if max_length:
max_length = max_length + 1 + len(extension)
if len(filename) > max_length:
filename = filename[:max_length - 3] + '...'
filename += "." + extension
return xenc(filename)
def get_file_name_for_saving(save_location, full_name):
file_path_with_name = os.path.join(save_location, full_name)
# file exists, add 1 in the end, otherwise return filename as it is
if os.path.exists(file_path_with_name):
split = file_path_with_name.split(".")
file_path_with_name = ".".join(split[:-1]) + "1." + split[-1]
return file_path_with_name
def Download(self):
video_url = self.lineEdit.text()
save_location = self.lineEdit_2.text()
if video_url == '' or save_location == '':
QMessageBox.warning(self, "Data Error", "Provide a Valid Video URL or save Location")
else:
# video file
video = pafy.new(video_url)
# available video streams
video_stream = video.streams
video_quality = self.comboBox.currentIndex()
# video title/name
video_name = video.title
# take out the extension of the file from video stream
extension = video_stream[video_quality].extension
# fullname with extension
full_name = generate_filename(video_name, extension)
final_path_with_file_name = get_file_name_for_saving(save_location, full_name)
download = video_stream[video_quality].download(filepath=final_path_with_file_name,
callback=self.Handel_Progress, )
Let me know if you face any issues.
I am trying to do a mass extraction of gps exif data, my code below:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
def get_exif_data(image):
exif_data = {}
info = image._getexif()
if info:
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == "GPSInfo":
gps_data = {}
for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded] = value[t]
exif_data[decoded] = gps_data
else:
exif_data[decoded] = value
return exif_data
def _get_if_exist(data, key):
if key in data:
return data[key]
else:
pass
def get_lat_lon(exif_data):
gps_info = exif_data["GPSInfo"]
lat = None
lon = None
if "GPSInfo" in exif_data:
gps_info = exif_data["GPSInfo"]
gps_latitude = _get_if_exist(gps_info, "GPSLatitude")
gps_latitude_ref = _get_if_exist(gps_info, "GPSLatitudeRef")
gps_longitude = _get_if_exist(gps_info, "GPSLongitude")
gps_longitude_ref = _get_if_exist(gps_info, "GPSLongitudeRef")
if gps_latitude and gps_latitude_ref and gps_longitude and gps_longitude_ref:
lat = _convert_to_degrees(gps_latitude)
if gps_latitude_ref != "N":
lat = 0 - lat
lon = _convert_to_degrees(gps_longitude)
if gps_longitude_ref != "E":
lon = 0 - lon
return lat, lon
Code source
Which is run like:
if __name__ == "__main__":
image = Image.open("photo directory")
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data)
This works fine for one photo, so I've used glob to iterate over all photos in a file:
import glob
file_names = []
for name in glob.glob(photo directory):
file_names.append(name)
for item in file_names:
if __name__ == "__main__":
image = Image.open(item)
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data))
else:
pass
Which works fine, as long as every photo in the file is a) an image and b) has gps data. I have tried adding a pass in the _get_if_exist function as well as my file iteration, however, neither same to have had any impact and I'm still receiving KeyError: 'GPSInfo'
Any ideas on how I can ignore photos with no data or different file types?
A possible approach would be writing a small helper function that first checks, if the file is actually an image file and as a second step checks if the image contains EXIF data.
def is_metadata_image(filename):
try:
image = Image.open(filename)
return 'exif' in image.info
except OSError:
return False
I found that PIL does not work every time with .png files that do contain EXIF information when using _getexif(). So instead I check for the key exif in the info dictionary of an image.
I've tried this source code.
Simply you need to remove
gps_info = exif_data["GPSInfo"]
from the first line of get_lat_lon(exif_data) function, it works well for me.
I try to create an image from an url and save it in my django model. If the first part works fine, I do not know how to associate the generated file to my object.
This is my function to generate the image file:
def get_remote_image(image_url, merchant_product_path):
im = None
name = ''
r = requests.get(image_url, stream=True)
if r.status_code == 200:
name = urlparse(image_url).path.split('/')[-1]
full_path = os.path.join(settings.MEDIA_ROOT, merchant_product_path)
if not os.path.exists(full_path):
os.makedirs(full_path)
im = Image.open(r.raw)
if im.mode != "RGB":
im = im.convert("RGB")
im.thumbnail((500, 500), Image.ANTIALIAS)
im.save(full_path + name, 'JPEG')
return {'im': im, 'name': name}
And now, the part to associate this file to my object:
i = get_remote_image(row['pict'], m.get_products_media_path())
obj, created = ProductLine.objects.update_or_create(
...
...
...
)
if i['im'] is not None:
try:
obj.main_picture.save(
i['name'],
ContentFile(i['im']),
save=True)
except TypeError:
continue
This code works but unfortunately, mu pictures are created in the correct folder, objects are created/update but each one has no picture file :(
Can someone tell me what's wrong ?
I've finally found a solution:
def get_remote_image(image_url):
im = None
name = ''
r = requests.get(image_url, stream=True)
if r.status_code == 200:
name = urlparse(image_url).path.split('/')[-1]
i = Image.open(r.raw)
buffer = BytesIO()
if i.mode != "RGB":
i = i.convert("RGB")
i.thumbnail((500, 500), Image.ANTIALIAS)
i.save(buffer, format='JPEG')
im = InMemoryUploadedFile(
buffer,
None,
name,
'image/jpeg',
buffer.tell(),
None)
return {'im': im, 'name': name}
and then:
obj, created = ProductLine.objects.update_or_create(
...
...
...
)
i = get_remote_image(row['pict'])
obj.main_picture.save(
os.path.join(m.get_products_image_path(), i['name']),
i['im'],
save=True)
Hope this will help some other users in this situation.
With a model like :
class ProductLine(models.Model):
name = models.CharField(max_length=250, null=True)
image = models.ImageField(null=True)
You can directly link the picture on your computer using is path instead of his binary content.
obj, created = ProductLine.objects.update_or_create(...)
obj.image.name = "/path/to/the/file"
obj.save()
I have created a web scraper to extract information of research papers that are published in a digital library (sample document).
Basically I'm extracting the title, abstract and list of references for each paper and storing them in text files. This process is repeated for all the referenced papers also.
I have used a queue to store the document IDs.
I need to extract such information from atleast 5000 papers, but the program is too slow and takes about 3 hours to go through 250-300 papers.
What are the possible ways of improving the speed of this scraper?
Here is the code:
# _*_ coding:utf-8 _*_
import urllib2
import json
import Queue
crawled = []
fo = open("paper.txt", "w")
class Paper(object):
def __init__(self, paper_id):
self.paper_id = paper_id
self.title, self.abstract = self.fetch_data()
def fetch_data(self):
base_url = "http://ieeexplore.ieee.org/rest/document/{0}/{1}"
data_url = base_url.format(self.paper_id, "abstract")
response = urllib2.urlopen(data_url)
html = response.readlines()
data = json.loads("\n".join(html))
title = data["title"]
abstract = data["abstract"]
return title, abstract
def fetch_ieee_references(self):
base_url = "http://ieeexplore.ieee.org/rest/document/{0}/{1}"
data_url = base_url.format(self.paper_id, "references")
response = urllib2.urlopen(data_url)
html = response.readlines()
data = json.loads("\n".join(html))
references = []
try:
for ref in data["references"]:
try:
ref_link = ref["links"]["documentLink"]
ref_paper_id = ref_link.split("/")[-1]
references.append(Paper(ref_paper_id))
except:
pass
except:
pass
return references
def extract_paper(self):
try:
print "Paper ID"
print self.paper_id
fname = str(self.paper_id)
fname = fname + ".txt"
fcon = open(fname,"w")
print
print "Title"
print self.title
print >>fcon, self.title
print "Abstract"
print self.abstract
print >>fcon, self.abstract
print "References"
for ref in self.fetch_ieee_references():
print ref.paper_id, ref.title
print >>fo, self.paper_id, ref.paper_id
except:
pass
def new_func():
n_id = 6639344
q = Queue.Queue()
q.put_nowait(n_id)
crawled.append(n_id)
while not q.empty():
p_id = q.get_nowait()
paper = Paper(p_id)
paper.extract_paper()
for ref in paper.fetch_ieee_references():
if ref.paper_id not in crawled:
crawled.append(ref.paper_id)
q.put_nowait(ref.paper_id)
new_func()
As already mentioned by other users it mostly depends on the speed of the HTTP request so you are dependent on the server of the site. So to speed things up you can divide the papers between multiple processes.
Also I don't get why you read the html and then use json.loads you can just use json.load on the response, this will speed things up a little bit.
I keep getting an error, but i dont see it..
I am new to programing and if you explane me the code, please dont assume I know too much.
#!/usr/bin/env python
# Name:
# Student number:
'''
This script crawls the IMDB top 250 movies.
'''
# Python standard library imports
import os
import sys
import csv
import codecs
import cStringIO
import errno
# Third party library imports:
import pattern
from pattern.web import URL, DOM
# --------------------------------------------------------------------------
# Constants:
TOP_250_URL = 'http://www.imdb.com/chart/top'
OUTPUT_CSV = 'top250movies.csv'
SCRIPT_DIR = os.path.split(os.path.realpath(__file__))[0]
BACKUP_DIR = os.path.join(SCRIPT_DIR, 'HTML_BACKUPS')
# --------------------------------------------------------------------------
# Unicode reading/writing functionality for the Python CSV module, taken
# from the Python.org csv module documentation (very slightly adapted).
# Source: http://docs.python.org/2/library/csv.html (retrieved 2014-03-09).
class UTF8Recoder(object):
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader(object):
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter(object):
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
# --------------------------------------------------------------------------
# Utility functions (no need to edit):
def create_dir(directory):
'''
Create directory if needed.
Args:
directory: string, path of directory to be made
Note: the backup directory is used to save the HTML of the pages you
crawl.
'''
try:
os.makedirs(directory)
except OSError as e:
if e.errno == errno.EEXIST:
# Backup directory already exists, no problem for this script,
# just ignore the exception and carry on.
pass
else:
# All errors other than an already exising backup directory
# are not handled, so the exception is re-raised and the
# script will crash here.
raise
def save_csv(filename, rows):
'''
Save CSV file with the top 250 most popular movies on IMDB.
Args:
filename: string filename for the CSV file
rows: list of rows to be saved (250 movies in this exercise)
'''
with open(filename, 'wb') as f:
writer = UnicodeWriter(f) # implicitly UTF-8
writer.writerow([
'title', 'runtime', 'genre(s)', 'director(s)', 'writer(s)',
'actor(s)', 'rating(s)', 'number of rating(s)'
])
writer.writerows(rows)
def make_backup(filename, html):
'''
Save HTML to file.
Args:
filename: absolute path of file to save
html: (unicode) string of the html file
'''
with open(filename, 'wb') as f:
f.write(html)
def main():
'''
Crawl the IMDB top 250 movies, save CSV with their information.
Note:
This function also makes backups of the HTML files in a sub-directory
called HTML_BACKUPS (those will be used in grading).
'''
# Create a directory to store copies of all the relevant HTML files (those
# will be used in testing).
print 'Setting up backup dir if needed ...'
create_dir(BACKUP_DIR)
# Make backup of the IMDB top 250 movies page
print 'Access top 250 page, making backup ...'
top_250_url = URL(TOP_250_URL)
top_250_html = top_250_url.download(cached=True)
make_backup(os.path.join(BACKUP_DIR, 'index.html'), top_250_html)
# extract the top 250 movies
print 'Scraping top 250 page ...'
url_strings = scrape_top_250(top_250_url)
# grab all relevant information from the 250 movie web pages
rows = []
for i, url in enumerate(url_strings): # Enumerate, a great Python trick!
print 'Scraping movie %d ...' % i
# Grab web page
movie_html = URL(url).download(cached=True)
# Extract relevant information for each movie
movie_dom = DOM(movie_html)
rows.append(scrape_movie_page(movie_dom))
# Save one of the IMDB's movie pages (for testing)
if i == 83:
html_file = os.path.join(BACKUP_DIR, 'movie-%03d.html' % i)
make_backup(html_file, movie_html)
# Save a CSV file with the relevant information for the top 250 movies.
print 'Saving CSV ...'
save_csv(os.path.join(SCRIPT_DIR, 'top250movies.csv'), rows)
This function below, should return the webpage links of the top 250 movies:
# --------------------------------------------------------------------------
# Functions to adapt or provide implementations for:
def scrape_top_250(url):
'''
Scrape the IMDB top 250 movies index page.
Args:
url: pattern.web.URL instance pointing to the top 250 index page
Returns:
A list of strings, where each string is the URL to a movie's page on
IMDB, note that these URLS must be absolute (i.e. include the http
part, the domain part and the path part).
'''
movie_urls = []
table_rows = dom.by_id('main').by_tag('table')[1].by_tag('tr')
for tr in table_rows[1:]:
a = tr.by_tag('a')[0]
movie_urls.append(clean_unicode(abs_url(a.attributes.get('href', ''), url.string)))
# YOUR SCRAPING CODE GOES HERE, ALL YOU ARE LOOKING FOR ARE THE ABSOLUTE
# URLS TO EACH MOVIE'S IMDB PAGE, ADD THOSE TO THE LIST movie_urls.
# return the list of URLs of each movie's page on IMDB
return movie_urls
#print scrape_top_250(url)
And finaly this function should return specific contents.
def scrape_movie_page(dom):
'''
Scrape the IMDB page for a single movie
Args:
dom: pattern.web.DOM instance representing the page of 1 single
movie.
Returns:
A list of strings representing the following (in order): title, year,
duration, genre(s) (semicolon separated if several), director(s)
(semicolon separated if several), writer(s) (semicolon separated if
several), actor(s) (semicolon separated if several), rating, number
of ratings.
'''
# YOUR SCRAPING CODE GOES HERE:
for p in movie_urls:
p_url = URL(p)
p_dom = DOM(p_url.download(cached=True))
title = clean_unicode(p_dom.by_class('header')[0].content)
title = plaintext(strip_between('<span', '</span>', title))
runtime = clean_unicode(p_dom.by_class('infobar')[0].by_tag('time')[0].content)
duration = runtime
genres = []
for genre in p_dom.by_class('infobar')[0].by_tag('a')[:-1]:
genres.append(clean_unicode(genre.content))
directors = []
writers = []
actors = []
text_blocks = p_dom.by_class('txt-block')[:3]
for t in text_blocks:
spans = t.by_tag('span')
for s in spans:
if s.attributes.get('itemprop') == 'director':
director = s.by_tag('span')[0].by_tag('a')[0].content
directors.append(clean_unicode(director))
if s.attributes.get('itemprop') == 'writer':
p_writer = s.by_tag('span')[0].by_tag('a')[0].content
writers.append(clean_unicode(p_writer))
if s.attributes.get('itemprop') == 'actors':
actor = s.by_tag('span')[0].by_tag('a')[0].content
actors.append(clean_unicode(actor))
rating = []
ratings_count = []
spans = p_dom.by_class('star-box-details')[0].by_tag('span')
for s in spans:
if s.attributes.get('itemprop') == 'ratingValue':
rating = clean_unicode(s.content)
if s.attributes.get('itemprop') == 'ratingCount':
ratings_count = clean_unicode(s.content)
# format the strings from lists
genres = concat_strings(genres)
directors = concat_strings(directors)
writers = concat_strings(writers)
actors = concat_strings(actors)
# Return everything of interest for this movie (all strings as specified
# in the docstring of this function).
return title, duration, genres, directors, writers, actors, rating, \
n_ratings
if __name__ == '__main__':
main() # call into the progam
# If you want to test the functions you wrote, you can do that here:
# ...
It's just that (in the original revision) you forgot to indent the body of the function scrape_movie_page. The for loop is in module scope.
Most common reason for cause of this error due to not proper indent the body of the function, but some time code looks proper a for as indentation point of view but still it throw same error. I always saw this error comes due to mismatch in indentation.In same block if you use two type of indentation like in same block if for some line you use tab and and for some line you use spaces, code looks good as for as indentation prospective but it always through indentation error.