Python, ignore files with no Exif data

Python, ignore files with no Exif data - python

I am trying to do a mass extraction of gps exif data, my code below:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
def get_exif_data(image):
exif_data = {}
info = image._getexif()
if info:
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == "GPSInfo":
gps_data = {}
for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded] = value[t]
exif_data[decoded] = gps_data
else:
exif_data[decoded] = value
return exif_data
def _get_if_exist(data, key):
if key in data:
return data[key]
else:
pass
def get_lat_lon(exif_data):
gps_info = exif_data["GPSInfo"]
lat = None
lon = None
if "GPSInfo" in exif_data:
gps_info = exif_data["GPSInfo"]
gps_latitude = _get_if_exist(gps_info, "GPSLatitude")
gps_latitude_ref = _get_if_exist(gps_info, "GPSLatitudeRef")
gps_longitude = _get_if_exist(gps_info, "GPSLongitude")
gps_longitude_ref = _get_if_exist(gps_info, "GPSLongitudeRef")
if gps_latitude and gps_latitude_ref and gps_longitude and gps_longitude_ref:
lat = _convert_to_degrees(gps_latitude)
if gps_latitude_ref != "N":
lat = 0 - lat
lon = _convert_to_degrees(gps_longitude)
if gps_longitude_ref != "E":
lon = 0 - lon
return lat, lon
Code source
Which is run like:
if __name__ == "__main__":
image = Image.open("photo directory")
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data)
This works fine for one photo, so I've used glob to iterate over all photos in a file:
import glob
file_names = []
for name in glob.glob(photo directory):
file_names.append(name)
for item in file_names:
if __name__ == "__main__":
image = Image.open(item)
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data))
else:
pass
Which works fine, as long as every photo in the file is a) an image and b) has gps data. I have tried adding a pass in the _get_if_exist function as well as my file iteration, however, neither same to have had any impact and I'm still receiving KeyError: 'GPSInfo'
Any ideas on how I can ignore photos with no data or different file types?

A possible approach would be writing a small helper function that first checks, if the file is actually an image file and as a second step checks if the image contains EXIF data.
def is_metadata_image(filename):
try:
image = Image.open(filename)
return 'exif' in image.info
except OSError:
return False
I found that PIL does not work every time with .png files that do contain EXIF information when using _getexif(). So instead I check for the key exif in the info dictionary of an image.

I've tried this source code.
Simply you need to remove
gps_info = exif_data["GPSInfo"]
from the first line of get_lat_lon(exif_data) function, it works well for me.

Related

How to load an image written several times at runtime in kivymd

I am coding a music player in kivymd , I have a class that extracts the song metadata including the image data in bytes and write the bytes to an image file. I must do this for each song and assign the image to a FitImage class. However, the image displayed for each songs happens to be the image for the last song in the list. I need a way to fix this
The extractor class
from mutagen.id3 import ID3
class Infos:
def __init__(song):
self.song = song
self.data = None
def extract_details(self):
# skipped unnecessary details here
#But it returns the title, year, genre, song_length
return title,year,genre,song_length
def get_image_data(self):
tag = None
try:
tag = ID3(self.song)
except:
# does not start with an ID3 tag
pass
if tag is not None:
if "APIC:" in tag.keys():
data = tag['APIC:']
self.data = data.data
else:
data_file = open("assets/images/default.png", 'rb')
self.data = data_file.read()
data_file.close()
else:
# print("No tag")
data_file = open("assets/images/default.png", 'rb')
self.data = data_file.read()
data_file.close()
return self.data
def write_image(self):
try:
image = open("assets/images/album_art.png", "wb")
image.write(self.get_image_data())
image.close()
#print("saved")
except Exception as e:
print(e)
In the function resposible for updating FitImage with the exctracted image
def populate_music_tab(self, content_cls):
"""fuction for adding music in a gridlayout"""
music_list = os.listdir("assets/sample-songs")
item_holder = content_cls.ids.song_holder #MDGridLayout
for song in music_list:
info = Infos("assets/sample/songs/"+song)
title, yr, gnr, length = info.extract_details() # get the song details
info.write_image() # write the image first before loading
image_src = "assets/images/album_art.png"
item = CustomListItem() # MDCard with FitImage,Labels and Buttons
item.ids.song_name = title
item.ids.year = yr
item.ids.song_length = length
item.ids.image_left = image_src # FitImage
holder.add_widget(item)
The code above does this
image output
This is what I want to achieve that each item displays its own unique image present in the song metadata
desired image output

How can I access images from powerpoint (python-pptx)

I'm having a hard time trying to access/save images using the python-pptx library. So, if the image is of shape type PICTURE (that's shape.shape_type == MSO_SHAPE_TYPE.PICTURE) I can access/save the image easily using the 'blob' attribute. Here is the code:
import argparse
import os
from PIL import Image
import pptx
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx import Presentation
from mdutils.mdutils import MdUtils
from mdutils import Html
def main():
parser = argparse.ArgumentParser()
parser.add_argument('ppt_name', type=str, help='add the name of the PowerPoint file(NOTE: the folder must be in the same directory as the prorgram file')
args = parser.parse_args()
pptx_name = args.ppt_name
pptx_name_formatted = pptx_name.split('.')[0]
prs = Presentation(pptx_name)
path = '{}_converted'.format(pptx_name_formatted)
if not os.path.exists(path):
os.mkdir(path)
images_folder = '{}_images'.format(pptx_name_formatted)
images_path = os.path.join(path, images_folder)
if not os.path.exists(images_path):
os.mkdir(images_path)
ppt_dict = {} #Keys: slide numbers, values: slide content
texts = []
slide_count = 0
picture_count = 0
for slide in prs.slides:
texts = []
slide_count += 1
for shape in slide.shapes:
if shape.has_text_frame:
if '\n' in shape.text:
splitted = shape.text.split('\n')
for word in splitted:
if word != '':
texts.append(word)
elif shape.text == '':
continue
else:
texts.append(shape.text)
elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
with open('{}/image{}_slide{}.png'.format(images_path, picture_count, slide_count), 'wb') as f:
f.write(shape.image.blob)
picture_count += 1
ppt_dict[slide_count] = texts
ppt_content = ''
for k,v in ppt_dict.items():
ppt_content = ppt_content + ' - Slide number {}\n'.format(k)
for a in v:
ppt_content = ppt_content + '\t - {}\n'.format(a)
mdFile = MdUtils(file_name='{}/{}'.format(path,path)) #second argument isn't path, it just shares the path name.
mdFile.write(ppt_content)
mdFile.create_md_file()
if __name__ == "__main__":
main()
The problem is when the picture is of shape type 'auto shape' , I tried a lot of approaches but to no avail. When I do run the following code for a shape that I know is a picture:
if shape.shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE:
print(shape.auto_shape_type)
print(shape.fill.type)
#indented because it's in a for loop
It outputs RECTANGLE for shape.auto_shape_type
and PICTURE for shape.fill.type
But what I want now is to save the picture (maybe by writing the the binary image bytestream of the image). Can someone help?

The "link" to the image (part, having the blob) is in the fill definition. Using that you can get to the image.
Print out the XML for the surroundings of the fill definition with shape.fill._xPr.xml. That will give you a look at what you need to navigate to. Good chance it will look something like "rId9" with some particular other number where the "9" placeholder is in that example. Probably in the neighborhood of something like "blipfill". The image is used as the "fill" of the shape, so that's what's going on here.
Then get the slide part with something like slide._part and use its .related_parts "dict" to look up the image "fill" part using the relationship-id (the string like "rId9").
image_part = slide._part.related_parts["rId9"]
The ImagePart implementation is here:
https://github.com/scanny/python-pptx/blob/master/pptx/parts/image.py#L21
and it gives access to the image and a lot of details about it as well.
You'll have to retrieve the "rId9"-like string using lxml calls, something roughly like:
rIds = shape.fill._xPr.xpath(".//#embed")
rId = rIds[0]
You'll need to do a little research on XPath to work out the right expression, based on the XML you print out in the earlier step. There's a lot out there on XPath, including here on SO, this is one resource to get started: http://www.rpbourret.com/xml/XPathIn5.htm
If you can't work it out, post the XML you printed out and we can get you to the next step.

Here is my approach, thanks to scanny.
for slide in prs.slides:
slide_count += 1
slide_parts = list(slide._part.related_parts.keys())
for part in slide_parts:
image_part = slide._part.related_parts[part]
if type(image_part) == pptx.parts.image.ImagePart or pptx.opc.package.Part:
file_startswith = image_part.blob[0:1]
if file_startswith == b'\x89' or file_startswith == b'\xff' or file_startswith == b'\x47':
with open('{}/image{}_slide{}.png'.format(images_path, picture_count, slide_count), 'wb') as f:
f.write(image_part.blob)
picture_count += 1
the if condition to check for PNG, JPEG or GIF is there because pptx.opc.package.Part isn't always an image.
Actually, I think since I'm checking for the beginning of image_part.blob, I don't think I need to include say if type(image_part) == pptx.parts.image.ImagePart or pptx.opc.package.Part:
But as long as it's working...

Python dict to csv

I have written a script to find image size and aspect ratio of all images in a directory along with their corresponding filepaths, I want to print dict values to csv file with following headers width,height,aspect-ratio and filepath
import os
import json
from PIL import Image
folder_images = "/home/user/Desktop/images"
size_images = dict()
def yocd(a,b):
if(b==0):
return a
else:
return yocd(b,a%b)
for dirpath, _, filenames in os.walk(folder_images):
for path_image in filenames:
if path_image.endswith(".png") or path_image.endswith('.jpg') or path_image.endswith('.JPG') or path_image.endswith('.jpeg'):
image = os.path.abspath(os.path.join(dirpath, path_image))
""" ImageFile.LOAD_TRUNCATED_IMAGES = True """
try:
with Image.open(image) as img:
img.LOAD_TRUNCATED_IMAGES = True
img.verify()
print('Valid image')
except Exception:
print('Invalid image')
img = False
if img is not False:
width, heigth = img.size
divisor = yocd(width, heigth)
w = str(int(width / divisor))
h = str(int(heigth / divisor))
aspectratio = w+':'+h
size_images[image] = {'width': width, 'heigth': heigth,'aspect-ratio':aspectratio,'filepath': image}
for k, v in size_images.items():
print(k, '-->', v)
with open('/home/user/Documents/imagesize.txt', 'w') as file:
file.write(json.dumps(size_images))```

You can add a (properly constructed) dict directly to a pandas.DataFrame. Then, DataFrames have a .to_csv() function.
Here are the docs:
Pandas: Create a DataFrame
Pandas: Write to CSV

Without dependencies (but you may have to tweak the formatting)
csv_sep = ';' # choose here wich field separatar you want
with open('your_csv', 'w') as f:
# header
f.write("width"+csv_sep"+height"+csv_sep"+aspect-ratio"+csv_sep+"filepath\n")
# data
for img in size_images:
fields = [img['width'], img['height'], img['aspect-ratio'], img['filepath']]
f.write(csv_sep.join(fields)+'\n')

How to encrypt images with python

Preface: this is required for a class, I know ECB should not be used.
I am trying to encrypt images using AES and then display the images
Steps needed:
Read the image,
Convert to byte object,
Pad the bytes,
Encrypt the bytes,
Convert back to image object,
Save as image file
This is my code right now:
from PIL import Image
from Crypto.Cipher import AES
from Crypto import Random
img = Image.open("photo.jpg")
img.tobytes()
key = '0123456789abcdef'
mode = AES.MODE_ECB
encryptor = AES.new(key, mode)
img.frombytes("RGB")
At this point I am stuck. I am getting a "not enough image data" error on the line "img.frombytes("RGB"), and am also stuck at the part to pad the bytes

So I needed a way to transfer files in the form of images (don't ask me why), if you just want to transfer text you can maybe create a txt file.
This is probably not exactly the best solution to the question as you probably want a way to hide data inside an existing image but I would like anyway to share the code in case it will help someone sometime somewhere.
Basically this will create an image with a size dependent on the file size and will put a sequence of 3 bytes in one pixel (RGB)
So I wrote a small folder2ImageEncoder.py
(it will encrypt all the data that is located in a folder named "files" by default)
from PIL import Image
from pathlib import Path
encryptedImagesFolder = 'encryptedImagesFolder'
Path(f"./{encryptedImagesFolder}").mkdir(parents=True, exist_ok=True)
newLine = b'\new\n\rL'
def encode_data_to_image(data: bytes, imagePath: str):
data += b'FINISH_OF_DATA'
data = str(
{
'path': imagePath,
'data': data
}
)
data = data.encode()
n = int((len(data)/3)**0.5) + 1
print(n, len(data))
img = Image.new('RGB', (n, n))
# data = img.getdata()
encryptedPixelsList = []
pixel = []
if len(data) % 3 != 0:
data += (3 - (len(data) % 3)) * b'\x00'
for i, Byte in enumerate(data):
if i % 3 == 2:
pixel.append(Byte)
encryptedPixelsList.append(tuple(pixel))
pixel = []
else:
pixel.append(Byte)
for _ in range(len(encryptedPixelsList), n**2):
encryptedPixelsList.append((0, 0, 0))
img.putdata(encryptedPixelsList)
imagePath = imagePath.replace('\\', '_')
img.save(f'./{encryptedImagesFolder}/{imagePath}.png')
# img.show()
def encode_folder(folder: Path):
for file in folder.iterdir():
if not file.is_dir():
with open(str(file), 'rb') as rb:
data = rb.readlines()
encode_data_to_image(
data=newLine.join(data),
imagePath=str(file))
else:
encode_folder(folder=file)
if __name__ == '__main__':
# ./files is the name of the folder you want to encrypt
encode_folder(folder=Path(r'./files'))
and a Image2FilesDecoder.py
this will iterate through the encrypted images folder and will retrieve its former form (run this in another folder with the encrypted images folder so it won't override the original files folder)
from PIL import Image
from pathlib import Path
newLine = b'\new\n\rL'
def decode_encrypted_images(folder: Path):
for pic in folder.iterdir():
img = Image.open(str(pic))
data = img.getdata()
totalData = []
for pixel in data:
totalData.extend(list(pixel))
decryptedData = bytes(totalData)
try:
decryptedData = eval(
decryptedData[:decryptedData.rfind(b'}')+1].decode())
except:
decryptedData.replace(b'\\', '')
decryptedData = eval(
decryptedData[:decryptedData.rfind(b'}')+1].decode())
decryptedData['data'] = decryptedData['data'][:-14]
filePathObj = Path(decryptedData['path'])
Path(filePathObj.parent).mkdir(
parents=True, exist_ok=True)
writeBytes = decryptedData['data'].split(newLine)
with open(str(filePathObj), 'wb') as wb:
wb.writelines(writeBytes)
if __name__ == '__main__':
decode_encrypted_images(folder=Path(
r".\encryptedImagesFolder"))

(OpenCV) put date in filename

I want to save VideoCapture in some condition,
so I wrote following code:
date = datetime.now()
detector = dlib.get_frontal_face_detector()
cap = cv2.VideoCapture(1) #I use second camera
global count, no_face
total_number = 0
count = 0
no_face = 0
num_bad_posture = 0
not_detected_posture = 0
while True:
ret, frame = cap.read()
frame = cv2.resize(frame,None,fx=sf,fy=sf, interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dets = detector(gray,1)
if not dets:
print('no face')
no_face += 1
if no_face > 15:
print('no face!! for real!!')
now = str(date.now())
not_detected = cv2.resize(frame,None,fx=5,fy=5,interpolation = cv2.INTER_CUBIC)
not_detected_posture += 1
print(type(now))
cv2.imwrite('./images/non_detected/non_detected({0},{1}).jpg'. format(not_detected_posture,now),not_detected)
no_face=0
for i, d in enumerate(dets):
no_face = 0
w = d.width()
h = d.height()
x = d.left()
y = d.top()
If I run this code, the file is not saved.
Also if I delete date.now() and just put num_detected, it is saved properly
I have no idea what is wrong with the file name(because it's type is str and other strs are saved properly.
Also if I do
print(type(now),now)
it appears
I need help.

I would use a more limited set of characters for the filenames, avoiding things like spaces, colons, parentheses. Use a custom datetime format to generate a timestamp in a form you want.
Example:
from datetime import datetime
from os import path
BASE_DIR = './images/non_detected'
PREFIX = 'non_detected'
EXTENSION = 'jpg'
file_name_format = "{:s}-{:d}-{:%Y%m%d_%H%M%S}.{:s}"
date = datetime.now()
not_detected_posture = 0
file_name = file_name_format.format(PREFIX, not_detected_posture, date, EXTENSION)
file_path = path.normpath(path.join(BASE_DIR, file_name))
# ---------
print("File name = '%s'" % file_name)
print("File path = '%s'" % file_path)
Output:
File name = 'non_detected-0-20170819_152459.jpg'
File path = 'images/non_detected/non_detected-0-20170819_152459.jpg'
Simple, unambiguous and portable.
Perhaps one more improvement. If you have a rough idea how many images you will generate, you can zero-pad the number, so that the filenames are sorted in order. For example, if you know there will be at most 10000, then you can do
file_name_format = "{:s}-{:04d}-{:%Y%m%d_%H%M%S}.{:s}"
to you get a file name like
File name = 'non_detected-0000-20170819_152459.jpg'

The problem could be with the ':' or the '.' in the filename due to the inclusion of the datetime. I am not sure which one, because you can have filenames that include those and you can write such filenames from within python AFAIK. So, it might be an issue specific to cv2.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python, ignore files with no Exif data - python

I've tried this source code. Simply you need to remove gps_info = exif_data["GPSInfo"] from the first line of get_lat_lon(exif_data) function, it works well for me.

Related

How to load an image written several times at runtime in kivymd

How can I access images from powerpoint (python-pptx)

Python dict to csv

How to encrypt images with python

(OpenCV) put date in filename

Categories

Resources