from collections import OrderedDict
import json
import os.path
import urllib
# cached property is only available in Python 3.8+
try:
from functools import cached_property
except ImportError:
cached_property = None
from attrdict import AttrMap
from django.conf import settings
from django.db import models
from django.urls import reverse
from django.templatetags.static import static
from django.utils.html import format_html
from jsonfield import JSONField
from piffle import iiif
import rdflib
from rdflib.namespace import DC
import requests
from requests.exceptions import ConnectionError
# use cached property if python3.8 or greater; fallback to regular property
c_property_if38 = cached_property or property
[docs]def get_iiif_url(url):
'''Wrapper around :meth:`requests.get` to support conditionally
adding an auth token based on the domain of the request url and
any **AUTH_TOKENS** configured in django settings.'''
request_options = {}
AUTH_TOKENS = getattr(settings, 'DJIFFY_AUTH_TOKENS', None)
if AUTH_TOKENS:
domain = urllib.parse.urlparse(url).netloc
if domain in AUTH_TOKENS:
request_options['params'] = {'auth_token': AUTH_TOKENS[domain]}
return requests.get(url, **request_options)
[docs]class IIIFException(Exception):
'''Custom exception for IIIF/djiffy specific errors'''
pass
[docs]class Manifest(models.Model):
'''Minimal db model representation of an IIIF presentation manifest'''
#: label
label = models.TextField()
#: short id extracted from URI
short_id = models.CharField(max_length=255, unique=True)
#: URI
uri = models.URLField()
#: iiif presentation metadata for display
metadata = JSONField(load_kwargs={'object_pairs_hook': OrderedDict})
#: date local manifest cache was created
created = models.DateField(auto_now_add=True)
#: date local manifest cache was last modified
last_modified = models.DateField(auto_now=True)
#: extra data provided via a 'seeAlso' reference
extra_data = JSONField(load_kwargs={'object_pairs_hook': OrderedDict},
default=OrderedDict)
class Meta:
verbose_name = 'IIIF Manifest'
# add custom permissions; change and delete provided by django
permissions = (
('view_canvas', 'Can view %s' % verbose_name),
)
# todo: metadata? thumbnail references
# - should we cache the actual manifest file?
# TODO: thumbnail doesn't have to be a IIIF image! Support thumbnail url?
def __str__(self):
return self.label or self.short_id
@property
def thumbnail(self):
'''thumbnail url for associated canvas'''
return self.canvases.filter(thumbnail=True).first()
[docs] def get_absolute_url(self):
''''url for this manifest within the django site'''
return reverse('djiffy:manifest', args=[self.short_id])
[docs] def admin_thumbnail(self):
'''thumbnail for convenience display in admin interface'''
if self.thumbnail:
return self.thumbnail.admin_thumbnail()
admin_thumbnail.short_description = 'Thumbnail'
@c_property_if38
def logo(self):
'''manifest logo, if there is one'''
return self.extra_data.get('logo', None)
@c_property_if38
def attribution(self):
'''manifest attribution, if there is one'''
return self.extra_data.get('attribution', None)
@c_property_if38
def license(self):
'''manifest license, if there is one'''
return self.extra_data.get('license', None)
@c_property_if38
def license_uri(self):
'''manifest license as :class:`rdflib.URIRef`, if there is a license'''
license = self.license
if license:
# CC uri is also http rather than https
if urllib.parse.urlparse(license).hostname == 'creativecommons.org':
# remove language from url if present
url_parts = license.rstrip("/").split('/')
# url looks like https://creativecommons.org/publicdomain/mark/1.0/deed.de
# if the last part is a language code, remove it
if url_parts[-1].startswith("deed."):
url_parts = url_parts[:-1]
license = "%s/" % '/'.join(url_parts) # URI requires trailing slash
return rdflib.URIRef(license)
@c_property_if38
def rights_statement_id(self):
'''short id for rightstatement.org license'''
# rightstatement uri is http, not https
if self.license and urllib.parse.urlparse(self.license).hostname == 'rightsstatements.org':
return self.license.rstrip(' /').split('/')[-2]
@c_property_if38
def creativecommons_id(self):
'''short id for creative commons license'''
if self.license and urllib.parse.urlparse(self.license).hostname == 'creativecommons.org':
if "publicdomain/zero/" in self.license:
return "cc-zero"
if "publicdomain/mark/" in self.license:
return "publicdomain"
@c_property_if38
def license_image(self):
'''license image, if we can generate one'''
if self.rights_statement_id:
return static("img/rightsstatements_org/%s.svg" % self.rights_statement_id)
if self.creativecommons_id:
return static("img/creativecommons/%s.svg" % self.creativecommons_id)
_rights_graph = None
# TODO: should use django current language if possible
[docs] def license_label(self, lang='en'):
'''Get the text label for the rights license. Uses local
value from edm rights if available; otherwise uses
data for the URI to get the preferred label or title.'''
# Some manifests have a seeAlso data contains an "edm_rights"
# section with a label for the rights statement.
# Use that if available (NOTE: ignores specified language)
# NOTE: possibly PUL specific, but shouldn't hurt to look locally first
for data in self.extra_data.values():
if 'edm_rights' in data and 'pref_label' in data['edm_rights']:
return data['edm_rights']['pref_label']
# if license/rights label is not available locally, get via uri
if self._rights_graph is None:
# if license is defined and a url
if self.license and urllib.parse.urlparse(self.license).scheme in ['http', 'https']:
self._rights_graph = rdflib.Graph()
url_hostname = urllib.parse.urlparse(self.license).hostname
try:
# rights statement org does content-negotiation for json-jd,
# but rdflib doesn't handle that automatically
if url_hostname == 'rightsstatements.org':
resp = requests.get(self.license,
headers={'Accept': 'application/json'},
allow_redirects=False)
if resp.status_code == requests.codes.see_other:
self._rights_graph.parse(resp.headers['location'], format='json-ld')
# creative commons doesn't support content negotiation,
# but you can add rdf to the end of the url
elif url_hostname == 'creativecommons.org':
# license uri removes language if present and adds trailing slash
self._rights_graph.parse("%srdf" % self.license_uri)
except Exception:
# possible to get an exception when parsing the
# rdf, maybe on the request; don't choke if we do!
# NOTE: using generic Exception here becuase unfortunately
# that is what rdflib raises when it can't parse RDF
pass
# get the preferred label for this license in the requested language;
# returns a list of label, value; use the first value
if self._rights_graph:
license_uri = self.license_uri
preflabel = self._rights_graph.preferredLabel(license_uri,
lang=lang)
if preflabel:
# convert rdflib Literal to string
return str(preflabel[0][1])
# otherwise, get dc title
# iterate over all titles and return one with a matching language code
for title in self._rights_graph.objects(subject=license_uri, predicate=DC.title):
if title.language == lang:
return str(title)
[docs]class IIIFImage(iiif.IIIFImageClient):
'''Subclass of :class:`piffle.iiif.IIIFImageClient`, for generating
IIIF Image URIs for manifest canvas images.'''
#: long edge size for single page display
single_page_size = 1000
#: long edge size for thumbnail
thumbnail_size = 300
#: long edge size for mini thumbnail
mini_thumbnail_size = 100
thumbnail_format = getattr(settings, 'DJIFFY_THUMBNAIL_FORMAT', 'png')
[docs] def thumbnail(self):
'''thumbnail'''
return self.size(height=self.thumbnail_size, width=self.thumbnail_size,
exact=True).format(self.thumbnail_format)
[docs] def mini_thumbnail(self):
'''mini thumbnail'''
return self.size(height=self.mini_thumbnail_size,
width=self.mini_thumbnail_size, exact=True) \
.format(self.thumbnail_format)
[docs] def page_size(self):
'''page size for display: :attr:`SINGLE_PAGE_SIZE` on the long edge'''
return self.size(height=self.single_page_size,
width=self.single_page_size, exact=True)
[docs]class Canvas(models.Model):
'''Minimal db model representation of a canvas from an IIIF manifest'''
#: label
label = models.TextField()
#: short id extracted from URI
short_id = models.CharField(max_length=255)
#: URI
uri = models.URLField()
#: URL of IIIF image for this canvas
iiif_image_id = models.URLField()
#: :class:`Manifest` this canvas vbelongs to
manifest = models.ForeignKey(Manifest, related_name='canvases',
on_delete=models.CASCADE)
#: boolean flag to indicate if this canvas should be used as thumbnail
thumbnail = models.BooleanField(default=False)
#: order of this canvas within associated manifest primary sequence
order = models.PositiveIntegerField()
# (for now only stores a single sequence, so just store order on the page)
# format? size? (ocr text eventually?)
#: extra data not otherwise given its own field, serialized as json
extra_data = JSONField(load_kwargs={'object_pairs_hook': OrderedDict},
default=OrderedDict)
class Meta:
ordering = ["manifest", "order"]
verbose_name = 'IIIF Canvas'
verbose_name_plural = 'IIIF Canvases'
unique_together = ("short_id", "manifest")
# add custom permissions; change and delete provided by django
permissions = (
('view_manifest', 'Can view %s' % verbose_name),
)
def __str__(self):
return '%s %d (%s)%s' % (self.manifest, self.order + 1, self.label,
'*' if self.thumbnail else '')
@property
def image(self):
'''Associated IIIF image for this canvas as :class:`IIIFImage`'''
# NOTE: piffle iiif image wants service & id split out.
# Should update to handle iiif image ids as provided in manifests
# for now, split into service and image id. (is this reliable?)
return IIIFImage(*self.iiif_image_id.rsplit('/', 1))
@property
def plain_text_url(self):
'''Return plain text url for a canvas if one exists'''
rendering = self.extra_data.get('rendering', None)
if rendering:
# handle both cases where this is a list and where it is just
# a dictionary, to be safe
if isinstance(rendering, list):
for item in rendering:
# iterate over the list and return the first plain text url
# we find
if 'format' in item and item['format'] == 'text/plain':
return item['@id']
else:
# otherwise, if it's a dictionary, check if it's plaintext and
# return
if 'format' in rendering \
and rendering['format'] == 'text/plain':
return rendering['@id']
# finally return None if no plain text is available or no rendering
return None
@property
def width(self):
return self.extra_data.get('width', None)
@property
def height(self):
return self.extra_data.get('height', None)
[docs] def get_absolute_url(self):
''''url for this canvas within the django site'''
return reverse('djiffy:canvas', args=[self.manifest.short_id, self.short_id])
[docs] def next(self):
'''Next canvas after this one in sequence (within manifest
primary sequence). Returns an empty queryset if there is no next
canvas.'''
return Canvas.objects.filter(manifest=self.manifest, order__gt=self.order) \
.first()
[docs] def prev(self):
'''Previous canvas before this one in sequence
(within manifest primary sequence). Returns an empty queryset
if there is no next canvas.'''
return Canvas.objects.filter(manifest=self.manifest, order__lt=self.order) \
.last()
[docs] def admin_thumbnail(self):
'''thumbnail for convenience display in admin interface'''
return format_html('<img src="{}" />', self.image.mini_thumbnail())
admin_thumbnail.short_description = 'Thumbnail'
[docs]class IIIFPresentation(AttrMap):
''':class:`attrdict.AttrMap` subclass for read access to IIIF Presentation
content'''
# TODO: document sample use, e.g. @ fields
at_fields = ['type', 'id', 'context']
[docs] @classmethod
def from_file(cls, path):
'''Iniitialize :class:`IIIFPresentation` from a file.'''
with open(path) as manifest:
data = json.loads(manifest.read())
return cls(data)
[docs] @classmethod
def from_url(cls, uri):
'''Iniitialize :class:`IIIFPresentation` from a URL.
:raises: :class:`IIIFException` if URL is not retrieved successfully,
if the response is not JSON content, or if the JSON cannot be parsed.
'''
try:
response = get_iiif_url(uri)
if response.status_code == requests.codes.ok:
try:
return cls(response.json())
except json.decoder.JSONDecodeError as err:
# if json fails, two possibilities:
# - we didn't actually get json (e.g. redirect for auth)
if 'application/json' not in response.headers['content-type']:
raise IIIFException('No JSON found at %s' % uri)
# - there is something wrong with the json
raise IIIFException('Error parsing JSON for %s: %s' %
(uri, err))
raise IIIFException('Error retrieving manifest at %s: %s %s' %
(uri, response.status_code, response.reason))
except ConnectionError:
# could not reach URL to get a status code in the first place
raise IIIFException('Error connecting to manifest at %s' % uri)
[docs] @classmethod
def is_url(cls, url):
'''Utility method to check if a path is a url or file'''
return urllib.parse.urlparse(url).scheme != ""
[docs] @classmethod
def from_file_or_url(cls, path):
'''Iniitialize :class:`IIIFPresentation` from a file or a url.'''
if os.path.isfile(path):
return cls.from_file(path)
elif cls.is_url(path):
return cls.from_url(path)
else:
raise IIIFException('File not found: %s' % path)
[docs] @classmethod
def short_id(cls, uri):
'''Generate a short id from full manifest/canvas uri identifiers
for use in local urls. Logic is based on the recommended
url pattern from the IIIF Presentation 2.0 specification.'''
# shortening should work reliably for uris that follow
# recommended url patterns from the spec
# http://iiif.io/api/presentation/2.0/#a-summary-of-recommended-uri-patterns
# manifest: {scheme}://{host}/{prefix}/{identifier}/manifest
# canvas: {scheme}://{host}/{prefix}/{identifier}/canvas/{name}
# remove trailing /manifest at the end of the url, if present
if uri.endswith('/manifest'):
uri = uri[:-len('/manifest')]
# split on slashes and return the last portion
return uri.split('/')[-1]
def __getattr__(self, key):
"""
Access an item as an attribute.
"""
# override getattr to allow use of keys with leading @,
# which are otherwise not detected as present and not valid
at_key = self._handle_at_keys(key)
if key not in self or \
(key not in self.at_fields and at_key not in self) or \
not self._valid_name(key):
raise AttributeError(
"'{cls}' instance has no attribute '{name}'".format(
cls=self.__class__.__name__, name=key
)
)
return self._build(self[key])
def _handle_at_keys(self, key):
if key in self.at_fields:
key = '@%s' % key
return key
def __getitem__(self, key):
"""
Access a value associated with a key.
"""
return self._mapping[self._handle_at_keys(key)]
def __setitem__(self, key, value):
"""
Add a key-value pair to the instance.
"""
self._mapping[self._handle_at_keys(key)] = value
def __delitem__(self, key):
"""
Delete a key-value pair
"""
del self._mapping[self._handle_at_keys(key)]
@property
def first_label(self):
# label can be a string or list of strings
if isinstance(self.label, str):
return self.label
else:
return self.label[0]