diff --git a/django/website/chn_spreadsheet/importer.py b/django/website/chn_spreadsheet/importer.py index ca2931a83c68d5787d834a8933035ea1ae48a00e..f47c3c5b2031eb00e7de7bee2bc00474b53876ba 100644 --- a/django/website/chn_spreadsheet/importer.py +++ b/django/website/chn_spreadsheet/importer.py @@ -65,7 +65,8 @@ class Importer(object): columns = [] if first_row: col_map = self.get_columns_map(profile_columns) - for label in first_row: + + for label in first_row[:len(col_map)]: try: columns.append(col_map[label]) except: @@ -98,15 +99,18 @@ class Importer(object): objects = [] for i, row in enumerate(rows, 2 if first_row else 1): try: - objects.append(self.process_row(row, columns)) + values = self.normalize_row(row) + + if any(values): + objects.append(self.process_row(values, columns)) + except SheetImportException as e: raise type(e), type(e)(e.message + 'in row %d ' % i), sys.exc_info()[2] return objects - def process_row(self, row, columns): - values = self.normalize_row(row) + def process_row(self, values, columns): return reduce( lambda object_dict, converter: converter.add_to(object_dict), [CellConverter(val, col) for val, col in zip(values, columns)], @@ -160,6 +164,9 @@ class CellConverter(object): raise SheetImportException(message), None, sys.exc_info()[2] def convert_date(self): + if self.value is None: + return None + if isinstance(self.value, basestring): date_time = self.parse_date() else: diff --git a/django/website/chn_spreadsheet/migrations/0004_add_rapidpro_config.py b/django/website/chn_spreadsheet/migrations/0004_add_rapidpro_config.py new file mode 100644 index 0000000000000000000000000000000000000000..570d7c43e03f759b6c6e5ba33d101b70a4dbf980 --- /dev/null +++ b/django/website/chn_spreadsheet/migrations/0004_add_rapidpro_config.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations + + +RAPIDPRO_CONFIG = { + "label": "rapidpro", + "name": "RapidPro", + "format": "excel", + "type": "message", + "columns": [ + { + "name": "Phone", + "type": "ignore", + "field": "ignore" + }, + { + "name": "Name", + "type": "ignore", + "field": "ignore" + }, + { + "name": "Groups", + "type": "ignore", + "field": "ignore" + }, + { + "name": "Last Seen", + "type": "date", + "field": "timestamp", + "date_format": "%m/%d/%y %H:%M:%S" + }, + { + "name": "Rumors (Text) - DEY Say sample flow", + "type": "text", + "field": "body" + + }, + { + "name": "Channel", + "type": "ignore", + "field": "ignore" + } + ], + "skip_header": 1 +} + + +def add_rapidpro_config(apps, schema_editor): + Profile = apps.get_model('chn_spreadsheet', 'SheetProfile') + Profile.objects.create(label='rapidpro', profile=RAPIDPRO_CONFIG) + + +class Migration(migrations.Migration): + + dependencies = [ + ('chn_spreadsheet', '0003_update_geopoll_config'), + ] + + operations = [ + migrations.RunPython(add_rapidpro_config) + ] diff --git a/django/website/chn_spreadsheet/tests/__init__.py b/django/website/chn_spreadsheet/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/django/website/chn_spreadsheet/tests/cell_converter_tests.py b/django/website/chn_spreadsheet/tests/cell_converter_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..4d15984df4197cc22fc6ad20250ed4058c72b7bc --- /dev/null +++ b/django/website/chn_spreadsheet/tests/cell_converter_tests.py @@ -0,0 +1,87 @@ +import datetime +import pytest +import pytz + +from ..importer import ( + CellConverter, + SheetImportException +) + +from django.utils.translation import ugettext as _ + + +def test_successful_runs_of_parse_date(): + dates = ( + ('05/01/2015', '%d/%m/%Y'), + ('5.1.2015', '%d.%m.%Y'), + ('5/1/15', '%d/%m/%y'), + ('05-01-2015', '%d-%m-%Y'), + (datetime.datetime(2015, 1, 5, 0, 0), None) + ) + expected = pytz.utc.localize(datetime.datetime(2015, 1, 5)) + for date, date_format in dates: + converter = CellConverter(date, + {'type': 'date', + 'field': '', + 'date_format': date_format}) + + assert converter.convert_value() == expected + + +def test_exception_raised_on_faulty_dates(): + bad_date = '05x01-2015' + with pytest.raises(SheetImportException): + converter = CellConverter(bad_date, + {'type': 'date', + 'field': '', + 'date_format': '%m-%d-%Y'}) + converter.convert_value() + + +def test_convert_value_raises_on_unknown_type(): + value = 'Short message' + type = 'location' + + converter = CellConverter(value, {'type': type, 'field': ''}) + with pytest.raises(SheetImportException) as excinfo: + converter.convert_value() + assert excinfo.value.message == _(u"Unknown data type 'location' ") + + +def test_convert_value_raises_on_malformed_value(): + value = 'not_integer' + type = 'integer' + + converter = CellConverter(value, {'type': type, 'field': ''}) + + with pytest.raises(SheetImportException) as excinfo: + converter.convert_value() + + messages = excinfo.value.message.split('\n') + assert _(u"Can not process value 'not_integer' of type 'integer' ") in messages + + +def test_convert_value_raises_on_date_without_format(): + value = '1.5.2015' + + converter = CellConverter(value, { + 'type': 'date', + 'field': 'created'}) + + with pytest.raises(SheetImportException) as excinfo: + converter.convert_value() + + messages = excinfo.value.message.split('\n') + assert _(u"Date format not specified for 'created' ") in messages + + +def test_date_can_be_empty(): + value = None + + converter = CellConverter(value, { + 'type': 'date', + 'field': 'created'}) + + date = converter.convert_value() + + assert date is None diff --git a/django/website/chn_spreadsheet/tests/geopoll_tests.py b/django/website/chn_spreadsheet/tests/geopoll_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8a6dd7d8e9ff89ad115918b6bc5e45b9de97a5 --- /dev/null +++ b/django/website/chn_spreadsheet/tests/geopoll_tests.py @@ -0,0 +1,29 @@ +import datetime +from os import path +import pytest +import pytz + +import transport + +from importer_tests import importer + +TEST_BASE_DIR = path.abspath(path.dirname(__file__)) +TEST_DIR = path.join(TEST_BASE_DIR, 'test_files') + + +@pytest.mark.django_db +def test_items_imported(importer): + assert len(transport.items.list()) == 0 + + file_path = path.join(TEST_DIR, 'sample_geopoll.xlsx') + f = open(file_path, 'rb') + + num_saved = importer.store_spreadsheet('geopoll', f) + assert num_saved > 0 + + items = transport.items.list() + assert len(items) == num_saved + + assert items[0]['body'] == "What is the cuse of ebola?" + assert items[0]['timestamp'] == pytz.utc.localize( + datetime.datetime(2015, 5, 1)) diff --git a/django/website/chn_spreadsheet/tests.py b/django/website/chn_spreadsheet/tests/importer_tests.py similarity index 69% rename from django/website/chn_spreadsheet/tests.py rename to django/website/chn_spreadsheet/tests/importer_tests.py index 56923d939b9043fb3fe51790783e816f0e3cde45..defcbc03b1474dadba3a66fed6bf3cc9de9aca46 100644 --- a/django/website/chn_spreadsheet/tests.py +++ b/django/website/chn_spreadsheet/tests/importer_tests.py @@ -6,14 +6,11 @@ import pytz from django.utils.translation import ugettext as _ -from .importer import ( - CellConverter, Importer, +from ..importer import ( + Importer, SheetProfile, SheetImportException ) -from data_layer.models import Message - - TEST_BASE_DIR = path.abspath(path.dirname(__file__)) TEST_DIR = path.join(TEST_BASE_DIR, 'test_files') @@ -118,6 +115,15 @@ def test_order_columns_with_first_row_return_first_row_order(importer): assert ordered == [cleaned[1], cleaned[0]] +def test_order_columns_ignores_extra_columns_in_first_row(importer): + cleaned = _make_columns_row(COLUMN_LIST) + first_row = ['Message', 'Province', 'None', 'None', 'None'] + + ordered = importer.order_columns(COLUMN_LIST, first_row) + + assert ordered == [cleaned[1], cleaned[0]] + + def test_get_fields_and_types(importer): fields, types = importer.get_fields_and_types(COLUMN_LIST) expected_types = ['location', 'text'] @@ -127,34 +133,6 @@ def test_get_fields_and_types(importer): assert types == expected_types -def test_successful_runs_of_parse_date(importer): - dates = ( - ('05/01/2015', '%d/%m/%Y'), - ('5.1.2015', '%d.%m.%Y'), - ('5/1/15', '%d/%m/%y'), - ('05-01-2015', '%d-%m-%Y'), - (datetime.datetime(2015, 1, 5, 0, 0), None) - ) - expected = pytz.utc.localize(datetime.datetime(2015, 1, 5)) - for date, date_format in dates: - converter = CellConverter(date, - {'type': 'date', - 'field': '', - 'date_format': date_format}) - - assert converter.convert_value() == expected - - -def test_exception_raised_on_faulty_dates(importer): - bad_date = '05x01-2015' - with pytest.raises(SheetImportException): - converter = CellConverter(bad_date, - {'type': 'date', - 'field': '', - 'date_format': '%m-%d-%Y'}) - converter.convert_value() - - def test_process_row(importer): row = ['Short message', '5', '10.4', '1.5.2015', 'Something else'] @@ -199,43 +177,6 @@ def test_process_row(importer): } -def test_convert_value_raises_on_unknown_type(importer): - value = 'Short message' - type = 'location' - - converter = CellConverter(value, {'type': type, 'field': ''}) - with pytest.raises(SheetImportException) as excinfo: - converter.convert_value() - assert excinfo.value.message == _(u"Unknown data type 'location' ") - - -def test_convert_value_raises_on_malformed_value(importer): - value = 'not_integer' - type = 'integer' - - converter = CellConverter(value, {'type': type, 'field': ''}) - - with pytest.raises(SheetImportException) as excinfo: - converter.convert_value() - - messages = excinfo.value.message.split('\n') - assert _(u"Can not process value 'not_integer' of type 'integer' ") in messages - - -def test_convert_value_raises_on_date_without_format(importer): - value = '1.5.2015' - - converter = CellConverter(value, { - 'type': 'date', - 'field': 'created'}) - - with pytest.raises(SheetImportException) as excinfo: - converter.convert_value() - - messages = excinfo.value.message.split('\n') - assert _(u"Date format not specified for 'created' ") in messages - - def test_normalize_row_differences(importer): class Cell(object): def __init__(self, value): @@ -308,19 +249,54 @@ def test_process_rows_displays_line_number_on_error(importer): assert len(excinfo.traceback) > 2, "Was expecting traceback of more than 2 lines" -@pytest.mark.django_db -def test_items_imported(importer): - assert Message.objects.count() == 0 +def test_process_rows_ignores_empty_lines(importer): + class Cell(object): + def __init__(self, value): + self.value = value - file_path = path.join(TEST_DIR, 'sample_geopoll.xlsx') - f = open(file_path, 'rb') + def _rows_generator(): + rows = [ + ('Province', 'Message'), + ('London', 'Short message'), + ('', ''), + (None, None), + (Cell(''), Cell('')), + (Cell(None), Cell(None)), + ('Cambridge', 'What?'), + ] - num_saved = importer.store_spreadsheet('geopoll', f) - assert num_saved > 0 + for row in rows: + yield row - items = Message.objects.all() - assert len(items) > 0 + column_list = [ + { + 'name': 'Province', + 'type': 'text', + 'field': 'location', + }, + { + 'name': 'Message', + 'type': 'text', + 'field': 'body', + }, + ] + + columns = [d.copy() for d in column_list] + rows = _rows_generator() + + with_header = True + + objects = importer.process_rows(rows, columns, with_header) - assert items[0].body == "What is the cuse of ebola?" - assert items[0].timestamp == pytz.utc.localize( - datetime.datetime(2015, 5, 1)) + expected_objects = [ + { + 'location': 'London', + 'body': 'Short message' + }, + { + 'location': 'Cambridge', + 'body': 'What?' + }, + ] + + assert objects == expected_objects diff --git a/django/website/chn_spreadsheet/tests/rapidpro_tests.py b/django/website/chn_spreadsheet/tests/rapidpro_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..bd9518039d243c5777479d878d244129846d3089 --- /dev/null +++ b/django/website/chn_spreadsheet/tests/rapidpro_tests.py @@ -0,0 +1,29 @@ +import datetime +from os import path +import pytest +import pytz + +import transport + +from importer_tests import importer + +TEST_BASE_DIR = path.abspath(path.dirname(__file__)) +TEST_DIR = path.join(TEST_BASE_DIR, 'test_files') + + +@pytest.mark.django_db +def test_items_imported(importer): + assert len(transport.items.list()) == 0 + + file_path = path.join(TEST_DIR, 'sample_rapidpro.xlsx') + f = open(file_path, 'rb') + + num_saved = importer.store_spreadsheet('rapidpro', f) + assert num_saved > 0 + + items = transport.items.list() + assert len(items) == num_saved + + assert items[0]['body'] == "That there is a special budget to give money to the family of each dead in Liberia since the Ebola outbreak." + assert items[0]['timestamp'] == pytz.utc.localize( + datetime.datetime(2015, 4, 19, 21, 35, 20)) diff --git a/django/website/chn_spreadsheet/test_files/sample_excel.xlsx b/django/website/chn_spreadsheet/tests/test_files/sample_excel.xlsx similarity index 100% rename from django/website/chn_spreadsheet/test_files/sample_excel.xlsx rename to django/website/chn_spreadsheet/tests/test_files/sample_excel.xlsx diff --git a/django/website/chn_spreadsheet/test_files/sample_geopoll.xlsx b/django/website/chn_spreadsheet/tests/test_files/sample_geopoll.xlsx similarity index 100% rename from django/website/chn_spreadsheet/test_files/sample_geopoll.xlsx rename to django/website/chn_spreadsheet/tests/test_files/sample_geopoll.xlsx diff --git a/django/website/chn_spreadsheet/tests/test_files/sample_rapidpro.xlsx b/django/website/chn_spreadsheet/tests/test_files/sample_rapidpro.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c90d98f088008c0f8e95ca83d75d0fdeb885c8dc Binary files /dev/null and b/django/website/chn_spreadsheet/tests/test_files/sample_rapidpro.xlsx differ