Source code for derrida.books.management.commands.import_digitaleds
'''
Manage command to import digitized book content via IIIF. It takes
both files and URLs, and supports both IIIF Collections and single
Manifests. If a collection is specified, all supported manifests in the
system will be loaded. If a manifest is already loaded, it will be
skipped (updating manifests is not yet supported). For convenience, you
use the preset path "PUL" to load the Princeton University Libraries
collection of Derrida materials.
Example use::
python manage.py import_digitaleds https://plum.princeton.edu/collections/p4j03fz143/manifest
python manage.py import_digitaleds https://plum.princeton.edu/concern/scanned_resources/pb2775t87z/manifest
python manage.py import_digitaleds manifest1.json manifest2.json
python manage.py import_digitaleds PUL
When a local identifier is present in manifest metadata, it will be used
to link the cached manifest in the django database with the appropriate
:class:`derrida.books.models.Instance``.
'''
from collections import defaultdict
from django.core.management.base import BaseCommand
from django.core.exceptions import ObjectDoesNotExist
from django.template.defaultfilters import slugify
from djiffy.importer import ManifestImporter
from derrida.books.models import Instance
[docs]class DerridaManifestImporter(ManifestImporter):
'''Extends :class:`djiffy.importer.ManifestImporter` to add additional
logic for associating the imported :class:`djiffy.models.Manifest`
with an existing :class:`winthrop.books.models.Book`'''
stats = defaultdict(int)
[docs] def canvas_short_id(self, canvas):
'''Override default short id logic, because that would result in
a uuid for PUL figgy content; instead, generate a slug based on the
image label. Canvas model requires short id + manifest unique
together, but that won't be a problem with project image
label naming conventions.'''
return slugify(canvas.label)
[docs] def import_manifest(self, manifest, path):
# parent method returns newly created db manifest
# or None if there was an error or manifest was already imported
self.stats['urls'] += 1
db_manifest = super(DerridaManifestImporter, self) \
.import_manifest(manifest, path)
if not db_manifest:
return
short_id = db_manifest.short_id
self.stats['manifests'] += 1
# if updating an existing db manifest that already has an
# associated instance, bail out
if self.update:
try:
db_manifest.instance
return db_manifest
except ObjectDoesNotExist:
pass
self.output('Imported %s "%s"' % (short_id, db_manifest.label))
# Attempt to find the corresponding Derrida library instance object
# for this digital edition and associate them.
# - manifests from plum include a url identifier as a "seeAlso"
# link; for archival items, this is the finding aid url.
findingaid_url = None
for url in db_manifest.extra_data.keys():
if 'findingaids.princeton.edu' in url:
findingaid_url = url
break
# if a finding aid url was found, clean it up for matching
# against the finding aid urls in project data
if findingaid_url:
# remove https since local urls use http
findingaid_url = findingaid_url.replace('https://', '')
# remove trailing .xml?scope=record for matching purposes
findingaid_url = findingaid_url.split('.xml')[0]
items = Instance.objects.filter(uri__contains=findingaid_url)
# only associate if one and only one match is found
if items.count() == 1:
instance = items.first()
instance.digital_edition = db_manifest
instance.save()
self.output(' Associated %s with "%s"' % (short_id, instance))
elif items.count() > 1:
self.error_msg(' Found %d matching instances for %s' % \
(items.count(), findingaid_url))
self.stats['nomatch'] += 1
else:
self.error_msg('No matching instance for %s (%s)' % \
(short_id, findingaid_url))
self.stats['nomatch'] += 1
else:
self.error_msg('No finding aid URL found for %s' % short_id)
return db_manifest
[docs]class Command(BaseCommand):
'''Import digital editions and associate with Derrida work instances'''
help = __doc__
# shorthand for known URIs to be imported
manifest_uris = {
'PUL': 'https://figgy.princeton.edu/collections/7081b751-abb6-4f62-9c38-ff1fda0f9d30/manifest'
}
[docs] def add_arguments(self, parser):
parser.add_argument('path', nargs='+',
help='''One or more IIIF Collections or Manifests as file or URL.
Use 'PUL' to import PUL Derrida materials.''')
parser.add_argument('--update', action='store_true',
help='Update previously imported manifests')
[docs] def handle(self, *args, **kwargs):
# convert any shorthand ids into the appropriate manifest uri
manifest_paths = [self.manifest_uris[p] if p in self.manifest_uris else p
for p in kwargs['path']]
dmi = DerridaManifestImporter(stdout=self.stdout, stderr=self.stderr,
style=self.style, update=kwargs['update'])
dmi.import_paths(manifest_paths)
self.summarize(dmi.stats)
def summarize(self, stats):
# briefly summarize what was done
self.stdout.write('\nURLs processed: %(urls)d' % stats)
if stats['manifests']:
self.stdout.write('Manifests imported or updated: %(manifests)d' % stats)
if stats['nomatch']:
self.stdout.write('Manifests not matched to library work instances: %(nomatch)d' \
% stats)