# Copyright (C) 2013-2015 The Meme Factory, Inc.  http://www.meme.com/
#
#    This file is part of Gombe-MI.
#
#    Gombe-MI is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Gombe-MI.  If not, see <http://www.gnu.org/licenses/>.
#
# Upload group comp info into the db.
#
# Karl O. Pinc <kop@meme.com>
#

# Write python 3 compatible code.
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division

from pyramid.view import view_config
from pyramid.renderers import render_to_response

import logging

from gmi_pyramid.lib.upload import (
    UploadEngine
    , Error
    , DataLineError
    , textualize
    , DataLineProcessor
    , UploadDoubleFileForm
    , TabularFileUploadHandler
    , UploadData
    , is_checked
    )

import gmi_pyramid.lib.constants as gmi

# The columns that are not chimpids.
COLS = ('researcher',
        'date',
        'mother',
        'page',
        'time_on_sheet',
        'time_converted_mil',
        'time_type',
        'entered',
        'gcstatus',
        )
# The columns that don't need to be there.
OPTCOLS = ('time_on_sheet',
           'entered',
           )

log = logging.getLogger(__name__)

# Setup errors
class MissingColError(Error):
    def __init__(self, e, descr='', detail=''):
        super(MissingColError, self).__init__(e, descr, detail)

class DupChimpidError(Error):
    def __init__(self, e, descr='', detail=''):
        super(DupChimpidError, self).__init__(e, descr, detail)

class NoChimpidError(Error):
    def __init__(self, e, descr='', detail=''):
        super(NoChimpidError, self).__init__(e, descr, detail)

class NoBaboonColError(Error):
    def __init__(self, e, descr='', detail=''):
        super(NoBaboonColError, self).__init__(e, descr, detail)

# Data errors
class DataError(DataLineError):
    '''
    Class from which is derived all the exceptions raised while processing data
    '''
    def __init__(self, udl, e, descr='', detail=''):
        '''
        udl     The UploadDataLine instance being processed
        e       Error messsage
        descr   Further error detail
        detail  Structured html with detail
        '''
        super(DataError, self).__init__(udl.lineno, e, descr, detail, udl.raw)

class NoSuchResearcherError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(NoSuchResearcherError, self).__init__(udl, e, descr, detail)

class NoSuchMarkError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(NoSuchMarkError, self).__init__(udl, e, descr, detail)

class EmptycolFullError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(EmptycolFullError, self).__init__(udl, e, descr, detail)

class GCStatusMismatchError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(GCStatusMismatchError, self).__init__(udl, e, descr, detail)

class NoSuchFollowError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(NoSuchFollowError, self).__init__(udl, e, descr, detail)

class BaboonSwellingError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(BaboonSwellingError, self).__init__(udl, e, descr, detail)

class NonContiguousFollowError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(NonContiguousFollowError, self).__init__(udl, e, descr, detail)

class FollowExistsError(DataError):
    def __init__(self, udl, e, descr='', detail=''):
        super(FollowExistsError, self).__init__(udl, e, descr, detail)


class SaveGCLine(DataLineProcessor):
    def __init__(self, ue, uh, colmap, chimpids, baboon_col):
        '''
        ue             UploadEngine instance
        uh             UploadHandler instance
        colmap         Dict mapping column headings to column numbers
        chimpids       Dict mapping chimpids to column numbers
        baboon_col     Index into data column of baboon presence

        cur            psycopg2 cursor
        have_emptycol  Boolean, is there a column with no header
        emtpycol       Index into data column of no header
        '''
        super(SaveGCLine, self).__init__(ue, uh)
        self.have_emptycol = uh.have_emptycol
        self.emptycol      = uh.emptycol
        self.colmap        = colmap
        self.chimpids      = chimpids
        self.baboon_col    = baboon_col

    def eat(self, udl):
        '''
        Upload a line of data into the db.

        udl  An UploadDataLine instance
        '''

        def colval(hdg):
            '''Get the column value for "regular" column with heading h.'''
            return cols[self.colmap[hdg]]

        def chimpval(hdg):
            '''Get the column value for "chimp" column with heading h.'''
            return cols[self.chimpids[hdg]]

        def resolve_mark(mark):
            '''
            Look up the mark

            Return (present, uncertain, swelling, origin) tuple.
            '''
            self.cur.execute(
                'SELECT gc_trans.present '
                '     , gc_trans.uncertain '
                '     , gc_trans.swelling '
                '     , gc_trans.origin '
                '  FROM gc_trans '
                '  WHERE gc_trans.mark IS NOT DISTINCT FROM %s;',
                (mark,))
            row = self.cur.fetchone();

            if not row:
                raise NoSuchMarkError(
                    udl,
                    'Mark not found in GC_TRANS.Mark',
                    'Mark on uploaded sheet = ({0})'.format(textualize(mark)))

            return row


        # Process the data row.
        cols = udl.tuples

        if self.have_emptycol and cols[self.emptycol] != '':
            raise EmptycolFullError(udl,
                                    'The column with no heading has a value',
                                    'Value = (' + cols[self.emptycol] + ')')

        #
        # Resolve the researcher to a person id.
        #
        self.cur.execute('SELECT people.person '
                    '  FROM people '
                    '  WHERE LOWER(people.descr) = LOWER(%s);',
                    (colval('researcher'),))
        row = self.cur.fetchone()
        if not row:
            raise NoSuchResearcherError(
                udl,
                'Researcher not found',
                'No row on PEOPLE with Descr = ('
                + textualize(colval('researcher')) + ')')
        person = row[0]

        # Find the sheet and followid to which the raw_groups row
        # should relate.
        sheet_page = colval('page')
        if sheet_page == '':
            # The db can have a NULL page.
            sheet_page = None
        if sheet_page != None:
            sheet_page = int(sheet_page)

        sheet_date = colval('date')
        db_followid, db_sheetid, db_ffid, db_gcstatus = self.uh.ff_break(
            udl,
            sheet_date,
            sheet_page,
            person,
            colval('mother'))

        # Figure out whether baboons were censused
        present, uncertain, swelling, origin = resolve_mark(
            cols[self.baboon_col])

        if swelling != None:
            raise BaboonSwellingError(
                udl,
                'Baboons cannot be marked with swelling',
                'Mark on uploaded sheet = ({0}): '.format(
                    textualize(chimpval(gmi.ANIMID_BABOON))) +
                'The db has no provision for this')

        if uncertain:
            # (DB validation rules ensure uncertain only comes back when
            # present.)
            baboons = None
        else:
            baboons = present

        # Done with most validation.   Update the db.

        # Always check for NULL gcstatus in the db, even 
        # when we know that we're on the same fieldfollow
        # because the line that updated the gcstatus in the db
        # may have rolled back for another reason.

        sheet_gcstatus = colval('gcstatus')
        if db_gcstatus == None:
            # Make sure the fieldfollow has a gcstatus.
            self.cur.execute('''UPDATE fieldfollows
                                 SET gcstatus = %s
                                 WHERE fieldfollows.ffid = %s;''',
                             (sheet_gcstatus, db_ffid))
        else:
            if not self.uh.seen_follow(db_followid):
                # There's a gc in the db but we didn't put it there.
                raise FollowExistsError(
                    udl,
                    'This follow already has group composition data',
                    ("A follow's group compositon must be uploaded in"
                     " a single file or automatic additions to the"
                     " group composition will be erronous"))
            if db_gcstatus != sheet_gcstatus:
                raise GCStatusMismatchError(
                    udl,
                    'GCStatus mis-match',
                    'Value (FIELDFOLLOWS.GCStatus) = (' +
                    textualize(db_gcstatus) +
                    '): Value (Uploaded gcstatus) = (' +
                    textualize(sheet_gcstatus) +
                    ("): The fieldfollow's FIELDFOLLOWS.GCStatus value "
                     'does not match the uploaded gcstatus value'))

        # Make the raw_groups row
        self.cur.execute('INSERT INTO raw_groups (time, sheetid, baboons) '
                    '  VALUES (%s, %s, %s);',
                    (colval('time_converted_mil'), db_sheetid, baboons))

        # Get the rgid
        self.cur.execute("SELECT CURRVAL('raw_groups_rgid_seq'::REGCLASS);")
        rgid = self.cur.fetchone()[0]

        present_cids = []
        for cid in self.chimpids:
            present, uncertain, swelling, origin = resolve_mark(
                chimpval(cid))
                
            if present:
                # Make the raw_groupmembers row
                self.cur.execute(
                    'INSERT INTO raw_groupmembers '
                    '    (rgid, chimpid, origin, uncertain, swelling) '
                    '  VALUES (%s, %s, %s, %s, %s);',
                    (rgid, cid, origin, uncertain, swelling))

                present_cids.append(cid)

        self.uh.follow_break(db_followid, udl, rgid, sheet_date, present_cids)


class GCUploadHandler(TabularFileUploadHandler):
    '''
    Attributes:
      request       A pyramid request instance
      uf            A GCUploadForm instance
      session       A pyramid session instance
      have_emptycol
      emptycol
      ue
      cur

      _last_ff_key
      _last_ff_break_result
      _last_followid
      _follow_gc  List of (udl, rgid, date, chimpid-list) tuples where:
                    udl  UploadDataLine instance of gc
                    rgid rgid of a gc
                    date is the date of the follow (from the sheet)
                    chimpid-list is a list of chimpids present in the gc
      _follows    List of follows seen.
    '''

    def __init__(self, request):
        '''
        request A pyramid request instance
        '''
        super(GCUploadHandler, self).__init__(request)
        self._last_ff_key = None
        self._last_followid = None
        self._follow_gc = []
        self._follows = []

    def make_form(self):
        '''
        Make the upload form needed by this handler.
        '''
        return UploadDoubleFileForm(self)

    def val_input(self):
        '''
        Validate input needed beyond that required to connect to the db.

        Make sure we don't double-upload a file.

        Returns:
          A list of Error instances
        '''
        errors = super(GCUploadHandler, self).val_input()
        self.double_validator(errors)
        
        return errors

    def write(self, result, errors):
        '''Add double validation key into form.'''
        response = super(GCUploadHandler,self).write(result,errors)
        self.write_double_key(response)
        return response

    def get_data(self):
        '''
        Return an UploadData instance, with flags set as desired.
        '''
        uf = self.uf
        self.data = UploadData(uf['localfh'],
                               uf['upload_fmt'],
                               False,
                               None)

    def seen_follow(self, followid):
        return followid in self._follows

    def factory(self, ue):
        '''Make a db loader function from an UploadEngine.

        Input:

        Side Effects:
        Yes, lots.
        '''

        self.ue = ue
        self.cur = ue.cur
        data = ue.data

        # Map 'normal' headings to column numbers (0 relative).
        colmap = {}
        headers = data.headers.tuples
        hcnt = len(headers)
        for col in COLS:
            colno = 0
            for hdg in headers:
                if col == hdg:
                    colmap[col] = colno
                    break
                colno += 1

            if colno >= hcnt and col not in OPTCOLS:
                if hcnt == 1:
                    detail = ("Does the file's data format match"
                              " the data format chosen on the upload form?")
                else:
                    detail = ''
                raise MissingColError(
                    'Missing column',
                    ('The (' + col +
                     ') column is missing from the uploaded file'),
                    detail)

        # Get all the chimpids by removing the 'normal' headings.
        # Map the chimpids to their respective columns.
        baboon_col = None
        chimpids = {}
        colno = 0
        for hdg in headers:
            if hdg in chimpids:
                raise DupChimpidError(
                    'Duplicate chimpid column in header line',
                    'The (' + hdg + ') chimpid is duplicated')
            if hdg not in COLS:
                if hdg == gmi.ANIMID_BABOON:
                    baboon_col = colno
                else:
                    chimpids[hdg] = colno
            colno += 1

        if baboon_col == None:
            raise NoBaboonColError(
                'There is no column for baboon presence',
                ('The (' + gmi.ANIMID_BABOON +
                 ') column is missing from the uploaded file'))

        # There's sometimes one column with no header.  Get rid of it.
        if '' in chimpids:
            self.emptycol = chimpids['']
            self.have_emptycol = True
            del chimpids['']
        else:
            self.have_emptycol = False

        # Be sure there's at least one chimp
        if not chimpids:
            raise NoChimpidError('No chimpid columns in heading line')

        return SaveGCLine(ue, self, colmap, chimpids, baboon_col)

    def cleanup(self):
        '''Finish after processing all lines.'''
        super(GCUploadHandler, self).cleanup()
        if self._last_followid != None:
            self.finish_follow()

    def finish_follow(self):
        '''
        Process change of follow.
        '''
        for gc in self._follow_gc:
            udl, rgid, sheet_date, present_cids = gc
            def flthunk():
                for cid in present_cids:
                    # Automatically add infants if the individual
                    # is a (adoptive?) mom
                    self.cur.execute(
                        'SELECT mom_in_gc(%s, %s, %s);',
                        (cid, rgid, sheet_date))
                # Automatically add (real or adoptive) moms of infants
                    self.cur.execute(
                        'SELECT infants_in_gc(%s, %s, %s);',
                        (cid, rgid, sheet_date))
                # Automatically add social partners.
                    self.cur.execute(
                        'SELECT add_social_partners(%s, %s);',
                        (cid, rgid))

            self.ue.eat_old_line(udl, flthunk)

    def follow_break(self, followid, udl, rgid, date, added):
        '''
        Detect and process change of follow.

        followid  The followid of the group comp
        udl       The UploadDataLine instance
        rgid      The rgid just added to the db
        date      The date of the follow
        added     List of chimpids in the rgid's group comp

        This does all the "automatic updating" of stuff, which has
        to wait until all gc data for the follow is in the db to
        work properly.  (Otherwise the first gc for the day, when
        inserted first, will encompass the entire day because there's
        no other gc.  So all social partners for the day will "belong"
        to the first gc and automatic social partner adding will happen
        for the whole day.  Subsequent gc's don't undo previous automatic
        adds.)
        '''
        if followid == self._last_followid:
            self._follow_gc.append((udl, rgid, date, added))
            return

        self.finish_follow()

        if self.seen_follow(followid):
            raise NonContiguousFollowError(
                udl,
                'Line out of order',
                ('The uploaded group comp lines for a follow'
                 ' must be contiguous'))
        self._follows.append(followid)

        self._last_followid = followid
        self._follow_gc = [(udl, rgid, date, added)]


    def ff_break(self,
                 udl,
                 sheet_date,
                 sheet_page,
                 person,
                 sheet_mother):
        '''
        Handle change of fieldfollow.

        Input:
          udl            An UploadDataLine instance of the current line
          sheet_date     Follow date on sheet
          sheet_page     Page number on sheet
          person         PEOPLE.Person of the sheet's researcher
          sheet_mother   Chimpid of the mother in the follow as written
                            on sheet

        Returns:
        tuple of the database content:
          followid  follows.followid
          sheetid   sheets.sheetid
          ffid      fieldfollows.ffid
          gcstatus  fieldfollows.gcstatus
        '''

        # Match fieldfollow on sheet_date, sheet_page,
        # person, and sheet_mother.
        ff_key = (sheet_date, sheet_page, person, sheet_mother, gmi.MOM, )
        if ff_key == self._last_ff_key:
            # No need to break
            return self._last_ff_key_result
        
        self.cur.execute(
            '''SELECT follows.followid
                    , sheets.sheetid
                    , fieldfollows.ffid
                    , fieldfollows.gcstatus
                 FROM follows
                        JOIN intervals
                             ON (intervals.followid = follows.followid)
                        JOIN sheets
                             ON (sheets.sheetid = intervals.sheetid)
                        JOIN fieldfollows
                             ON (fieldfollows.ffid = sheets.ffid)
                        JOIN followparts
                             ON (followparts.followid = follows.followid)
                        JOIN followroles
                             ON (followroles.role = followparts.role)
                 WHERE follows.date = %s
                       AND sheets.page IS NOT DISTINCT FROM %s
                       AND fieldfollows.collector = %s
                       AND followparts.chimpid = %s
                       AND followroles.function = %s;''',
            ff_key)

        self._last_ff_key_result = self.cur.fetchone()
        if not self._last_ff_key_result:
            raise NoSuchFollowError(
                udl,
                'Follow not found in the db',
                ('Follow search matching on date = ({0})'
                 ', researcher = ({1})'
                 ', page=({2})'
                 ", and mother's chimpid({3})"
                 ": Note that mother's chimpid must match"
                 ", the mother's animid is not compared").format(
                    textualize(sheet_date),
                    person,
                    textualize(sheet_page),
                    textualize(sheet_mother)))

        self._last_ff_key = ff_key
        return self._last_ff_key_result


@view_config(route_name='upload_gc'
             , renderer='gmi_pyramid:templates/upload_gc.mak')
def upload_gc_view(request):

    response = UploadEngine(GCUploadHandler(request)).run()
    if response['db_changed']:
        if is_checked(response['csv_checked']):
            upload_fmt = 'CSV'
        else:
            upload_fmt = 'TAB'
        log.info('Successful upload: File ({filename}): Type {format}:'
                 ' Lines {lines}: DB {db}: By user {user}'
                 .format(filename=response['filename'],
                         format=upload_fmt,
                         lines=response['lines'],
                         db=response['db'],
                         user=response['user']))
    return response