Use custom scrapy imagePipeline to download images and overwrite existing images

Use custom scrapy imagePipeline to download images and overwrite existing images - python

I am practising using scrapy to crop image with a custom imagePipeline.
I am using this code:
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def convert_image(self, image, size=None):
if image.format == 'PNG' and image.mode == 'RGBA':
background = Image.new('RGBA', image.size, (255, 255, 255))
background.paste(image, image)
image = background.convert('RGB')
elif image.mode != 'RGB':
image = image.convert('RGB')
if size:
image = image.copy()
image.thumbnail(size, Image.ANTIALIAS)
else:
# cut water image TODO use defined image replace Not cut
x,y = image.size
if(y>120):
image = image.crop((0,0,x,y-25))
buf = StringIO()
try:
image.save(buf, 'JPEG')
except Exception, ex:
raise ImageException("Cannot process image. Error: %s" % ex)
return image, buf
It works well but have a problem.
If there are original images in the folder,
then run the spider,
the images it download won't replace the original one.
How can I get it to over-write the original images ?

There is an expiration setting, it is by default 90 days.

Related

img2pdf AlphaChannelError: what is the best way to remove alphachannel

I have set of images from which I create pdf by the following code
with io.BytesIO() as tmp_io:
tmp_io.write(img2pdf.convert(img_file_paths))
result_bytes = tmp_io.getvalue()
One of files contains alpha channel and I got
raise AlphaChannelError("Refusing to work on images with alpha channel")
What is the simplest way to remove alpha channel and save to pdf rgb channels?

Here is bit ugly solution from myself
def remove_alpha_from_image(image_path):
im = Image.open(image_path)
im.load()
try:
background = Image.new("RGB", im.size, (255, 255, 255))
background.paste(im, mask=im.split()[3]) # 3 is the alpha channel
im = background
except IndexError: # img is not RGBA
pass
name_hash_md5 = md5(bytes(image_path, encoding="utf-8")) # noqa: S303
name = name_hash_md5.hexdigest()
if not os.path.exists(TMP_DIR):
os.makedirs(TMP_DIR)
path = f"{TMP_DIR}{name}.pdf"
im.save(path, "PNG", resolution=100.0)
return path
with io.BytesIO() as tmp_io:
try:
tmp_io.write(img2pdf.convert(file_paths))
except img2pdf.AlphaChannelError:
tmp_io.write(img2pdf.convert([remove_alpha_from_image(path) for path in file_paths]))
result_bytes = tmp_io.getvalue()

Here's a utility I put together - only tested in a single app so not sure how general it is, but should be turnkey. Tested in python 3.9
def image2pdf(image: bytes or str, allow_lossy=True, **rgba_to_kwds) -> bytes:
"""
Converts an image to PDF, optionally allowing for lossy conversion.
:param image: if non RGBA image, this can be any valid input to img2pdf. If RGBA, then must be str (ie. path to image)
or bytes representation of image.
:param allow_lossy: if img2pdf.convert fails with AlphaChannelError, tries to downsample
:param rgba_to_kwds: kwds to _rgba_to
:return: bytes representation of PDF image. To save to disk
pdfBytes=image2pdf(someImage)
with open('converted.pdf', 'w') as f:
f.write(pdfBytes)
"""
try:
pdf_bytes = img2pdf.convert(image)
except img2pdf.AlphaChannelError as alphaError:
if allow_lossy:
rgbBytes = _rgba_to(image)
pdf_bytes = img2pdf.convert(rgbBytes, **rgba_to_kwds)
else:
raise alphaError
return pdf_bytes
def _rgba_to(image: bytes or str, to='RGB', intermediate='PNG') -> bytes:
logging.warning(f"Image has alpha channel... downsampling (newtype={to}, intermediate={intermediate}) and converting")
# Image is a filepath
if isinstance(image, str):
img = Image.open(image)
converted: Image = img.convert(to)
# Image is a bytestream
elif isinstance(image, bytes):
buffered = io.BytesIO(image)
img = Image.open(buffered)
converted: Image = img.convert(to)
else:
raise Exception(f"rgba downsampling only supported for images of type str (ie. filepath) or bytes - got {type(image)}")
buf = io.BytesIO()
converted.save(buf, format=intermediate)
byte_im = buf.getvalue()
return byte_im
def test_convert_png_image_with_alphachannel_to_pdf():
img_path = "some-rgba-image.png"
pdf_bytes = image2pdf(img_path)
# Uncomment if want to view the pdf
with open('converted.pdf', "wb") as f:
f.write(pdf_bytes)

Downloading Images using BS4

I have been trying to download images off of a site using bs4, the images are not jpeg or png so I think that bs4 is unable to find the image, I could be wrong about that as well.
Here's my code
#--- IMAGE --- NOT WORKING
#Finds the image URL with the name of the product
#done
image = soup.find('img', attrs={'class':"image_container"})
try:
image = image.get("src")
except AttributeError:
print("NO image FOUND")
image = "NO image FOUND"
if(image != "NO image FOUND"): #if the image is found
try:
pos = image.index("?")
image = "http:" + image[:pos]
except ValueError:
pass
pathImg += name[:nameLength] # Truncates to 5 characters and adds to pathImg file
if(generateFiles):
download(image, pathImg) # Downloads image
self.image = image # Exporting var to class global var
Heres an image of where the source is on the page
Source Code for the image container

Image Conversion - Cannot write mode RGBA as JPEG

I'm trying to resize & reduce quality of image before upload in project. Here's what I tried,
def save(self):
im = Image.open(self.image)
output = BytesIO()
im = im.resize(240, 240)
im.save(output, format='JPEG', quality=95)
output.seek(0)
self.image = InMemoryUploadedFile(output, 'ImageField', "%s.jpg" % self.image.name.split('.')[0], 'image/jpeg', sys.getsizeof(output), None)
super(Model, self).save()
It's working fine if I upload a jpg image but if I upload a png or any other image type, it's not working it's raising errors like cannot write mode RGBA as JPEG & cannot write mode P as JPEG etc.
How can we fix that? Thank You!

If your image.mode is "P" or "RGBA" and you want to convert it to jpeg then you need to first convert the image.mode because the previous modes aren't supported for jpeg
if im.mode in ("RGBA", "P"):
im = im.convert("RGB")
https://github.com/python-pillow/Pillow/issues/2609

Summary timop and 2:
backgroud
JPG not support alpha = transparency
RGBA, P has alpha = transparency
RGBA= Red Green Blue Alpha
result
cannot write mode RGBA as JPEG
cannot write mode P as JPEG
solution
before save to JPG, discard alpha = transparency
such as: convert Image to RGB
then save to JPG
your code
if im.mode == "JPEG":
im.save(output, format='JPEG', quality=95)
elif im.mode in ["RGBA", "P"]:
im = im.convert("RGB")
im.save(output, format='JPEG', quality=95)
More for you:
about resize & reduce quality of image, I have implement a function, for you (and others) to refer:
from PIL import Image, ImageDraw
cfgDefaultImageResample = Image.BICUBIC # Image.LANCZOS
def resizeImage(inputImage,
newSize,
resample=cfgDefaultImageResample,
outputFormat=None,
outputImageFile=None
):
"""
resize input image
resize normally means become smaller, reduce size
:param inputImage: image file object(fp) / filename / binary bytes
:param newSize: (width, height)
:param resample: PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC, or PIL.Image.LANCZOS
https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.thumbnail
:param outputFormat: PNG/JPEG/BMP/GIF/TIFF/WebP/..., more refer:
https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html
if input image is filename with suffix, can omit this -> will infer from filename suffix
:param outputImageFile: output image file filename
:return:
input image file filename: output resized image to outputImageFile
input image binary bytes: resized image binary bytes
"""
openableImage = None
if isinstance(inputImage, str):
openableImage = inputImage
elif CommonUtils.isFileObject(inputImage):
openableImage = inputImage
elif isinstance(inputImage, bytes):
inputImageLen = len(inputImage)
openableImage = io.BytesIO(inputImage)
if openableImage:
imageFile = Image.open(openableImage)
elif isinstance(inputImage, Image.Image):
imageFile = inputImage
# <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=3543x3543 at 0x1065F7A20>
imageFile.thumbnail(newSize, resample)
if outputImageFile:
# save to file
imageFile.save(outputImageFile)
imageFile.close()
else:
# save and return binary byte
imageOutput = io.BytesIO()
# imageFile.save(imageOutput)
outputImageFormat = None
if outputFormat:
outputImageFormat = outputFormat
elif imageFile.format:
outputImageFormat = imageFile.format
imageFile.save(imageOutput, outputImageFormat)
imageFile.close()
compressedImageBytes = imageOutput.getvalue()
compressedImageLen = len(compressedImageBytes)
compressRatio = float(compressedImageLen)/float(inputImageLen)
print("%s -> %s, resize ratio: %d%%" % (inputImageLen, compressedImageLen, int(compressRatio * 100)))
return compressedImageBytes
latest code can found here:
https://github.com/crifan/crifanLibPython/blob/master/crifanLib/crifanMultimedia.py

Resizing uploaded files in django using PIL

I am using PIL to resize an uploaded file using this method:
def resize_uploaded_image(buf):
imagefile = StringIO.StringIO(buf.read())
imageImage = Image.open(imagefile)
(width, height) = imageImage.size
(width, height) = scale_dimensions(width, height, longest_side=240)
resizedImage = imageImage.resize((width, height))
return resizedImage
I then use this method to get the resizedImage in my main view method:
image = request.FILES['avatar']
resizedImage = resize_uploaded_image(image)
content = django.core.files.File(resizedImage)
acc = Account.objects.get(account=request.user)
acc.avatar.save(image.name, content)
However, this gives me the 'read' error.
Trace:
Exception Type: AttributeError at /myapp/editAvatar Exception Value:
read
Any idea how to fix this? I have been at it for hours!
Thanks!
Nikunj

Here's how you can take a file-like object, manipulate it as an image in PIL, then turn it back into a file-like object:
def resize_uploaded_image(buf):
image = Image.open(buf)
(width, height) = image.size
(width, height) = scale_dimensions(width, height, longest_side=240)
resizedImage = image.resize((width, height))
# Turn back into file-like object
resizedImageFile = StringIO.StringIO()
resizedImage.save(resizedImageFile , 'PNG', optimize = True)
resizedImageFile.seek(0) # So that the next read starts at the beginning
return resizedImageFile
Note that there's already a handy thumbnail() method for PIL images. This is a variant of the thumbnail code I use in my own project:
def resize_uploaded_image(buf):
from cStringIO import StringIO
import Image
image = Image.open(buf)
maxSize = (240, 240)
resizedImage = image.thumbnail(maxSize, Image.ANTIALIAS)
# Turn back into file-like object
resizedImageFile = StringIO()
resizedImage.save(resizedImageFile , 'PNG', optimize = True)
resizedImageFile.seek(0) # So that the next read starts at the beginning
return resizedImageFile

It would be better for you to save the uploaded image and then display and resize it in template as you wish. This way you will be able to resize images at runtime. sorl-thumbnail is djano app which you can use for template image resizing, it is easy to use and you can use it in a view too. Here are examples for this app.

PIL - Convert GIF Frames to JPG

I tried to convert an gif to single images with Python Image Library,
but it results in weird frames
The Input gif is:
Source Image http://longcat.de/gif_example.gif
In my first try, i tried to convert the image with Image.new to an
RGB image, with 255,255,255 as white background - like in any other
example i've found on the internet:
def processImage( infile ):
try:
im = Image.open( infile )
except IOError:
print "Cant load", infile
sys.exit(1)
i = 0
try:
while 1:
background = Image.new("RGB", im.size, (255, 255, 255))
background.paste(im)
background.save('foo'+str(i)+'.jpg', 'JPEG', quality=80)
i += 1
im.seek( im.tell() + 1 )
except EOFError:
pass # end of sequence
but it results in weird output files:
Example #1 http://longcat.de/gif_example1.jpg
My second try was, to convert the gif in an RGBA first, and then use
its transparency mask, to make the transparent pieces white:
def processImage( infile ):
try:
im = Image.open( infile )
except IOError:
print "Cant load", infile
sys.exit(1)
i = 0
try:
while 1:
im2 = im.convert('RGBA')
im2.load()
background = Image.new("RGB", im2.size, (255, 255, 255))
background.paste(im2, mask = im2.split()[3] )
background.save('foo'+str(i)+'.jpg', 'JPEG', quality=80)
i += 1
im.seek( im.tell() + 1 )
except EOFError:
pass # end of sequence
which results in an output like this:
Example #2 http://longcat.de/gif_example2.jpg
The advantage over the first try was, that the first frame looks pretty good
But as you can see, the rest is broken
What should i try next?
Edit:
I think i came a lot closer to the solution
Example #3 http://longcat.de/gif_example3.png
I had to use the palette of the first image for the other images,
and merge it with the previous frame (for gif animations which use
diff-images)
def processImage( infile ):
try:
im = Image.open( infile )
except IOError:
print "Cant load", infile
sys.exit(1)
i = 0
size = im.size
lastframe = im.convert('RGBA')
mypalette = im.getpalette()
try:
while 1:
im2 = im.copy()
im2.putpalette( mypalette )
background = Image.new("RGB", size, (255,255,255))
background.paste( lastframe )
background.paste( im2 )
background.save('foo'+str(i)+'.png', 'PNG', quality=80)
lastframe = background
i += 1
im.seek( im.tell() + 1 )
except EOFError:
pass # end of sequence
But i actually dont know, why my transparency is black, instead of white
Even if i modify the palette (change the transparency channel to white)
or use the transparency mask, the background is still black

First of all, JPEG doesn't support transparency! But that's not the only problem.. As you move to the next frame of the GIF the palette information is lost (problem witn PIL?) - so PIL is unable to correctly convert to the RGBA framework (Hence the first frame is okish, but all the others are screwy). So the work-around is to add the palette back in for every frame, (which is what you were doing in your last code example, but your trouble was that you were saving as RGB not RGBA so you had no alpha/ transparency channel. Also you were doing a few unnecessary things..). Anyhow, here are the .png's with transparency and the corrected code, hope its of some use :)
import Image
import sys
def processImage(infile):
try:
im = Image.open(infile)
except IOError:
print "Cant load", infile
sys.exit(1)
i = 0
mypalette = im.getpalette()
try:
while 1:
im.putpalette(mypalette)
new_im = Image.new("RGBA", im.size)
new_im.paste(im)
new_im.save('foo'+str(i)+'.png')
i += 1
im.seek(im.tell() + 1)
except EOFError:
pass # end of sequence
processImage('gif_example.gif')

When viewing an image on an image viewer, even when transparency is set to zero, it tends to display the image as black. One way to be sure that your image is truly transparent is to merge it over another. The 'emoticon' should be seen whilst not obstructing the other image.Try:
background = Image.open('someimage.jpg') #an existing image
foreground = Image.open('foo.jpg') #one of the above images
background.paste(foreground, (0,0), foreground)
background.save('trial.jpg') #the composite image
Theoretically, if you open 'trial.jpg' in the image viewer and the content of the initial image is preserved and on top of it lies the foo image then you'll know for sure if it's just the image viewer and your images are fine...

source here
Image.open('image.gif').convert('RGB').save('image.jpg')

This works for me. The following example shows converting image.gif to 8 jpg format images with white background.
from PIL import Image
from PIL import GifImagePlugin
def gif2jpg(file_name: str, num_key_frames: int, trans_color: tuple):
"""
convert gif to `num_key_frames` images with jpg format
:param file_name: gif file name
:param num_key_frames: result images number
:param trans_color: set converted transparent color in jpg image
:return:
"""
with Image.open(file_name) as im:
for i in range(num_key_frames):
im.seek(im.n_frames // num_key_frames * i)
image = im.convert("RGBA")
datas = image.getdata()
newData = []
for item in datas:
if item[3] == 0: # if transparent
newData.append(trans_color) # set transparent color in jpg
else:
newData.append(tuple(item[:3]))
image = Image.new("RGB", im.size)
image.getdata()
image.putdata(newData)
image.save('{}.jpg'.format(i))
gif2jpg("image.gif", 8, (255, 255, 255)) # convert image.gif to 8 jpg images with white background

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Use custom scrapy imagePipeline to download images and overwrite existing images - python

There is an expiration setting, it is by default 90 days.

Related

img2pdf AlphaChannelError: what is the best way to remove alphachannel

Downloading Images using BS4

Image Conversion - Cannot write mode RGBA as JPEG

Resizing uploaded files in django using PIL

PIL - Convert GIF Frames to JPG

Categories

Resources