Source code for derrida.interventions.management.commands.intervention_data

'''
Manage command to export intervention data for use by others.

Generates a CSV and JSON file with details for all interventions
documented in the database.

Takes an optional argument to specify the output directory. Otherwise,
files are created in the current directory.

'''


import codecs
from collections import OrderedDict
import csv
import json
import os.path

from derrida.books.management.commands import reference_data
from derrida.interventions.models import Intervention


[docs]class Command(reference_data.Command): '''Export intervention data from the database as CSV and JSON''' help = __doc__ # NOTE: extending reference_data manage command to inherit # flatten_data method; there is more overlap and these scripts # could probably be generalized further for re-use #: fields for CSV output csv_fields = [ 'id', 'book id', 'book title', 'book type', 'page', 'tags', 'text content', 'text language', 'text language code', 'text translation', 'quote content', 'quote language', 'quote language code', 'annotator' ] #: base filename, for CSV and JSON output base_filename = 'interventions'
[docs] def add_arguments(self, parser): parser.add_argument( '-d', '--directory', help='Specify the directory where files should be generated')
[docs] def handle(self, *args, **kwargs): if kwargs['directory']: self.base_filename = os.path.join(kwargs['directory'], self.base_filename) # aggregate intervention data to be exported for use in generating # CSV and JSON output data = [self.intervention_data(intervention) for intervention in Intervention.objects.all()] # list of dictionaries can be output as is for JSON export with open('{}.json'.format(self.base_filename), 'w') as jsonfile: json.dump(data, jsonfile, indent=2) # generate CSV export with open('{}.csv'.format(self.base_filename), 'w') as csvfile: # write utf-8 byte order mark at the beginning of the file csvfile.write(codecs.BOM_UTF8.decode()) csvwriter = csv.DictWriter(csvfile, fieldnames=self.csv_fields) csvwriter.writeheader() for intervention in data: csvwriter.writerow(self.flatten_dict(intervention))
[docs] def intervention_data(self, intervention): '''Generate a dictionary of data to export for a single :class:`~derrida.books.models.Reference` object''' # NOTE: using OrderedDict to ensure JSON output follows logical # order in Python < 3.6, where dict order is not guaranteed data = OrderedDict([ ('id', intervention.get_uri()), # every intervention *should* be associated with a book, # but possible that some are not ('book', OrderedDict([ ('id', intervention.work_instance.get_uri() if intervention.work_instance else ''), ('title', intervention.work_instance.display_title() if intervention.work_instance else ''), ('type', intervention.work_instance.item_type if intervention.work_instance else '') ])), # canvas object *should* have a label, but possible it does not ('page', intervention.canvas.label if intervention.canvas else ''), ('tags', [tag.name for tag in intervention.tags.all()]) ]) # only include text and quote information if we have content if intervention.text: text_info = OrderedDict({ 'content': intervention.text }) if intervention.text_language: text_info['language'] = intervention.text_language.name text_info['language code'] = intervention.text_language.code if intervention.text_translation: text_info['translation'] = intervention.text_translation data['text'] = text_info if intervention.quote: quote_info = OrderedDict({ 'content': intervention.quote }) if intervention.quote_language: quote_info['language'] = intervention.quote_language.name quote_info['language code'] = intervention.quote_language.code data['quote'] = quote_info if intervention.author: data['annotator'] = intervention.author.authorized_name return data