As most of you know, I publish plenty of images on this blog. I ensure that all of them are described. The biggest challenge I have in posting photographs on this blog is captioning them. I have to get images described manually before I put them up here. Once I take my photographs I isolate my pictures by location because of the geotagging my phone does. I then send them to people who have been on the trip who describe the images. I have been searching for solutions that describe images automatically. I was thrilled to learn that wordpress had a plugin that used the Microsoft Cognitive Services API to automatically describe images. The describer plugin however did not give me location information therefore I rolled my own code in python. I have created a utility that queries Google for location and the Microsoft Cognitive Services API for image descriptions and writes them to a text file. I had tried to embed the descriptions in EXIF tags but that did not work and I cannot tell why.
References
You will need an API key from the below link.
Microsoft Cognative Services API
The wordpress plugin that uses the Microsoft Cognitive Services API to automatically describe images when uploading
Notes
- You will need to keep your cognitive services API key alive by describing images at least once in every 90 days I think.
- Do account for Google’s usage limits for the geotagging API.
- In the code, do adjust where the image files you want described live as well where you want the log file to be stored.
- Do ensure you add your API key before you run the code.
import glob
from PIL import Image
from PIL.ExifTags import TAGS
from PIL.ExifTags import TAGS, GPSTAGS
import piexif
import requests
import json
import geocoder
def _get_if_exist(data, key):
if key in data:
return data[key]
return None
def get_exif_data(fn):
"""Returns a dictionary from the exif data of an PIL Image item. Also converts the GPS Tags"""
image = Image.open(fn)
exif_data = {}
info = image._getexif()
if info:
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == "GPSInfo":
gps_data = {}
for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded] = value[t]
exif_data[decoded] = gps_data
else:
exif_data[decoded] = value
return exif_data
def _convert_to_degrees(value):
"""Helper function to convert the GPS coordinates stored in the EXIF to degrees in float format"""
d0 = value[0][0]
d1 = value[0][1]
d = float(d0) / float(d1)
m0 = value[1][0]
m1 = value[1][1]
m = float(m0) / float(m1)
s0 = value[2][0]
s1 = value[2][1]
s = float(s0) / float(s1)
return d + (m / 60.0) + (s / 3600.0)
def get_lat_lon(exif_data):
"""Returns the latitude and longitude, if available, from the provided exif_data (obtained through get_exif_data above)"""
lat = None
lon = None
if "GPSInfo" in exif_data:
gps_info = exif_data["GPSInfo"]
gps_latitude = _get_if_exist(gps_info, "GPSLatitude")
gps_latitude_ref = _get_if_exist(gps_info, 'GPSLatitudeRef')
gps_longitude = _get_if_exist(gps_info, 'GPSLongitude')
gps_longitude_ref = _get_if_exist(gps_info, 'GPSLongitudeRef')
if gps_latitude and gps_latitude_ref and gps_longitude and gps_longitude_ref:
lat = _convert_to_degrees(gps_latitude)
if gps_latitude_ref != "N":
lat = 0 - lat
lon = _convert_to_degrees(gps_longitude)
if gps_longitude_ref != "E":
lon = 0 - lon
return lat, lon
def getPlaceName(fn):
lli=()
lli=get_lat_lon(get_exif_data(fn))
g = geocoder.google(lli, method='reverse')
return g.address
def getImageDescription(fn):
payload = {'visualFeatures': 'Description'}
files = {'file': open(fn, 'rb')}
headers={}
headers={ 'Ocp-Apim-Subscription-Key': 'myKey'}
r = requests.post('https://api.projectoxford.ai/vision/v1.0/describe', params=payload,files=files,headers=headers)
data = json.loads(r.text)
dscr=data['description']
s=dscr['captions']
s1=s[0]
return s1['text']
def tagFile(fn,ds):
img = Image.open(fn)
exif_dict = piexif.load(img.info["exif"])
exif_dict['Description''Comment']=ds
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, fn)
img.save(fn, exif=exif_bytes)
def createLog(dl):
with open('imageDescriberLog.txt','a+') as f:
f.write(dl)
f.write("\n")
path = "\*.jpg"
for fname in glob.glob(path):
print("processing:"+fname)
createLog("processing:"+fname)
try:
imageLocation=getPlaceName(fname)
except:
createLog("error in getting location name for file: "+fname)
pass
try:
imageDescription=getImageDescription(fname)
except:
createLog("error in getting description of file: "+fname)
pass
imgString="Description: "+imageDescription+"\n"+"location: "+imageLocation
createLog(imgString)
try:
tagFile(fname,imgString)
except:
createLog("error in writing exif tag to file: "+fname)
pass