from collections import OrderedDict
from django.conf import settings
from djiffy.models import (
Manifest,
Canvas,
IIIFPresentation,
IIIFException,
get_iiif_url,
)
[docs]class ManifestImporter(object):
"""Manifest importer. Intended for use with Django manage commands.
:param stdout: optional stdout, if status output is desired
:param stderr: optional stderr, if error output is desired
:param style: optional django command style object, for styled output
"""
stdout = None
stderr = None
style = None
# verbosity level?
check_import_supported = getattr(settings, "DJIFFY_IMPORT_CHECK_SUPPORTED", True)
# TODO: should have better reporting on what was done
def __init__(self, stdout=None, stderr=None, style=None, update=False):
self.stdout = stdout
self.stderr = stderr
self.style = style
self.update = update
[docs] def output(self, msg):
"""Output a message if stdout is configured (used to support output
via manage command)"""
if self.stdout:
self.stdout.write(msg)
[docs] def error_msg(self, msg):
"""Output an error message if stderr is configured (used to support output
via manage command)."""
if self.stderr:
if self.style:
msg = self.style.ERROR(msg)
self.stderr.write(msg)
[docs] def import_paths(self, paths):
"""Import a list of paths - file or url, collection or manifest.
Returns a list of imported manifests (includes previously
imported objects for the requested URIs, if already in the database)."""
imported = []
for path in paths:
try:
manifest = IIIFPresentation.from_file_or_url(path)
except IIIFException as err:
self.error_msg(str(err))
continue
if manifest.type == "sc:Collection":
collection_imported = self.import_collection(manifest)
if collection_imported:
imported.extend(collection_imported)
if manifest.type == "sc:Manifest":
imported_manifest = self.import_manifest(manifest, path)
if imported_manifest:
imported.append(imported_manifest)
return imported
[docs] def import_supported(self, manifest):
"""Check if import is supported (currently limited to paged or individuals,
left-to-right content)."""
# if import check is disabled, bypass checks and return true
if not self.check_import_supported:
return True
view_hint = getattr(manifest, "viewingHint", None)
view_direction = getattr(manifest, "viewingDirection", None)
if (view_hint and manifest.viewingHint in ["paged", "individuals", None]) or (
view_direction
and manifest.viewingDirection in ["left-to-right", "right-to-left"]
):
return True
else:
self.error_msg(
"Currently import only supports paged or individuals, left-to-right manifests; skipping %s (hint: %s, direction: %s)"
% (manifest.id, view_hint, view_direction)
)
return False
[docs] def import_manifest(self, manifest, path):
"""Process a single IIIF manifest and create
:class:`~djiffy.models.Manifest` and
:class:`~djiffy.models.Canvas` objects.
:param manifest: :class:`~djiffy.models.IIIFPresentation`
:param path: file or url import path
"""
self.output("Importing %s" % path)
# flag to indicate if we are updating an existing record
update_existing = False
# check if manifest with uri identifier has already been imported
db_manifest = Manifest.objects.filter(uri=manifest.id).first()
if db_manifest:
# TODO: would be nice to compare last-modified or etag
# and see if we actually need to update..
# NOTE: not updating for now; may want to add later
if self.update:
update_existing = True
else:
self.error_msg(
"%s has already been imported; use --update to request update"
% path
)
return db_manifest
# check if the type of manifest is supported
if not self.import_supported(manifest):
return
# make sure the manifest has sequences defined
# (workaround for a bug in Plum)
try:
getattr(manifest, "sequences")
except AttributeError:
self.error_msg("%s has no sequences; skipping" % path)
return
# create a new manifest if not updating a previous import
if not update_existing:
db_manifest = Manifest()
# label can be either a list/tuple or a bare string; handle both
# TODO: generalize this and move into model classes
if isinstance(manifest.label, str):
db_manifest.label = manifest.label
else:
if len(manifest.label) == 1:
db_manifest.label = manifest.label[0]
else:
db_manifest.label = "; ".join(manifest.label)
# set uri & short id if creating a new record
if not update_existing:
db_manifest.uri = manifest.id
db_manifest.short_id = IIIFPresentation.short_id(manifest.id)
# convert metadata into a more usable format
if hasattr(manifest, "metadata"):
metadata = OrderedDict(
[(item["label"], item["value"]) for item in manifest.metadata]
)
# handle single values as well as lists
for key, value in metadata.items():
if not isinstance(value, list):
metadata[key] = [value]
db_manifest.metadata = metadata
# if manifest has any seeAlso links, store the urls;
# if format is JSON, fetch it and store in the extra data
# NOTE: primary reason for this is to store the ARK identifier
# if there is one, since that will be more permanent than
# the manifest id; extra data may also include important
# rights information
if hasattr(manifest, "seeAlso"):
links = []
db_manifest.extra_data = OrderedDict()
# collect seeAlso links and formats, whether they
# appear as a single element or a list
# single link, not in a list
if isinstance(manifest.seeAlso, str):
# link with no format
links.append((manifest.seeAlso, ""))
elif hasattr(manifest.seeAlso, "format"):
links.append((manifest.seeAlso.id, manifest.seeAlso.format))
# list of seeAlso links
else:
for see_also in manifest.seeAlso:
links.append((see_also.id, see_also.format))
# process all the seeAlso links and add to extra data
for url, fmt in links:
db_manifest.extra_data[url] = {}
if fmt == "application/ld+json":
# TODO: error handling on the request?
response = get_iiif_url(url)
db_manifest.extra_data[url] = response.json()
# also check for logo, license, and attribution and add to extra data
for field in ["logo", "license", "attribution"]:
if hasattr(manifest, field):
db_manifest.extra_data[field] = getattr(manifest, field)
db_manifest.save()
thumbnail_id = None
if hasattr(manifest, "thumbnail"):
# if available as IIIF image, use that
if hasattr(manifest.thumbnail, "service"):
thumbnail_id = manifest.thumbnail.service.id
# otherwise, id is a path to an image
else:
thumbnail_id = manifest.thumbnail.id
# for now, only worry about the first sequence
# create a db canvas element for each canvas
for order, canvas in enumerate(manifest.sequences[0].canvases):
# when updating an existing manifest, look for existing canvas
if update_existing:
db_canvas = db_manifest.canvases.filter(uri=canvas.id).first()
if not update_existing or not db_canvas:
# otherwise, create a new canvas (new import or updating
# a manifest where this canvas did not previously exist)
db_canvas = Canvas(manifest=db_manifest)
# set order and label
db_canvas.order = order
db_canvas.label = canvas.label
# keep canvas id to obscure image id if necessary for security
db_canvas.uri = canvas.id
# get short id (extensible for subclasses)
db_canvas.short_id = self.canvas_short_id(canvas)
# only support single image per canvas for now
db_canvas.iiif_image_id = canvas.images[0].resource.service.id
# check if this page is the thumbnail image
if thumbnail_id is not None and db_canvas.iiif_image_id == thumbnail_id:
db_canvas.thumbnail = True
# include other fields as extra_data for now
for field in ["rendering", "width", "height"]:
if hasattr(canvas, field):
db_canvas.extra_data[field] = getattr(canvas, field)
db_canvas.save()
# if updating, check for previously imported canvases that are no
# longer preseent
if update_existing:
# get a list of all ids in the db
all_ids = db_manifest.canvases.all().values_list("uri", flat=True)
# get all ids in the current manifest
current_ids = [canvas.id for canvas in manifest.sequences[0].canvases]
# identify outdated ids in the database but not the manifest
outdated_ids = set(all_ids).difference(set(current_ids))
if outdated_ids:
outdated_canvases = db_manifest.canvases.filter(uri__in=outdated_ids)
if outdated_canvases:
self.output(
"Updating %s; removing %d canvases no longer included"
% (manifest.id, len(outdated_canvases))
)
outdated_canvases.delete()
# return the manifest db object that was created
return db_manifest
[docs] def import_collection(self, manifest):
"""Process a single IIIF collection and import
all supported manifests referenced in the collection.
:gedram manifest: :class:`~djiffy.models.IIIFPresentation`
"""
if manifest.type == "sc:Collection":
# import all manifests in the collection
imported = []
for brief_manifest in manifest.manifests:
# check if content is supported
if hasattr(brief_manifest, "viewingHint") or hasattr(
brief_manifest, "viewingDirection"
):
if not self.import_supported(brief_manifest):
continue
self.output(
'Importing "%s" %s'
% (brief_manifest.first_label, brief_manifest.id)
)
try:
manifest = IIIFPresentation.from_file_or_url(brief_manifest.id)
except IIIFException as err:
manifest = None
self.error_msg(str(err))
if manifest:
db_manifest = self.import_manifest(manifest, brief_manifest.id)
imported.append(db_manifest)
return imported
[docs] def canvas_short_id(self, canvas):
"""Method for generating short id from canvas; default is
:meth:`djiffy.models.IIIFPresentation.short_id`.
"""
return IIIFPresentation.short_id(canvas.id)