Source code for derrida.interventions.management.commands.intervention_data

'''
Manage command to export intervention data for use by others.

Generates a CSV and JSON file with details for all interventions
documented in the database.

Takes an optional argument to specify the output directory. Otherwise,
files are created in the current directory.

'''


import codecs
from collections import OrderedDict
import csv
import json
import os.path

from derrida.books.management.commands import reference_data
from derrida.interventions.models import Intervention


[docs]class Command(reference_data.Command):
    '''Export intervention data from the database as CSV and JSON'''
    help = __doc__

    # NOTE: extending reference_data manage command to inherit
    # flatten_data method; there is more overlap and these scripts
    # could probably be generalized further for re-use

    #: fields for CSV output
    csv_fields = [
        'id', 'book id', 'book title', 'book type', 'page', 'tags', 'text content',
        'text language', 'text language code', 'text translation',
        'quote content', 'quote language', 'quote language code', 'annotator'
    ]

    #: base filename, for CSV and JSON output
    base_filename = 'interventions'

[docs]    def add_arguments(self, parser):
        parser.add_argument(
            '-d', '--directory',
            help='Specify the directory where files should be generated')

[docs]    def handle(self, *args, **kwargs):

        if kwargs['directory']:
            self.base_filename = os.path.join(kwargs['directory'], self.base_filename)

        # aggregate intervention data to be exported for use in generating
        # CSV and JSON output
        data = [self.intervention_data(intervention)
                   for intervention in Intervention.objects.all()]

        # list of dictionaries can be output as is for JSON export
        with open('{}.json'.format(self.base_filename), 'w') as jsonfile:
            json.dump(data, jsonfile, indent=2)

        # generate CSV export
        with open('{}.csv'.format(self.base_filename), 'w') as csvfile:
            # write utf-8 byte order mark at the beginning of the file
            csvfile.write(codecs.BOM_UTF8.decode())

            csvwriter = csv.DictWriter(csvfile, fieldnames=self.csv_fields)
            csvwriter.writeheader()

            for intervention in data:
                csvwriter.writerow(self.flatten_dict(intervention))

[docs]    def intervention_data(self, intervention):
        '''Generate a dictionary of data to export for a single
        :class:`~derrida.books.models.Reference` object'''

        # NOTE: using OrderedDict to ensure JSON output follows logical
        # order in Python < 3.6, where dict order is not guaranteed

        data = OrderedDict([
            ('id', intervention.get_uri()),
            # every intervention *should* be associated with a book,
            # but possible that some are not
            ('book', OrderedDict([
                ('id', intervention.work_instance.get_uri() if intervention.work_instance else ''),
                ('title', intervention.work_instance.display_title() if intervention.work_instance else ''),
                ('type', intervention.work_instance.item_type if intervention.work_instance else '')
            ])),
            # canvas object *should* have a label, but possible it does not
            ('page', intervention.canvas.label if intervention.canvas else ''),
            ('tags', [tag.name for tag in intervention.tags.all()])
        ])

        # only include text and quote information if we have content
        if intervention.text:
            text_info = OrderedDict({
                'content': intervention.text
            })
            if intervention.text_language:
                text_info['language'] = intervention.text_language.name
                text_info['language code'] = intervention.text_language.code
            if intervention.text_translation:
                text_info['translation'] = intervention.text_translation

            data['text'] = text_info

        if intervention.quote:
            quote_info = OrderedDict({
                'content': intervention.quote
            })
            if intervention.quote_language:
                quote_info['language'] = intervention.quote_language.name
                quote_info['language code'] = intervention.quote_language.code
            data['quote'] = quote_info

        if intervention.author:
            data['annotator'] = intervention.author.authorized_name

        return data
Source code for derrida.interventions.management.commands.intervention_data

Derrida

Navigation