# Copyright (C) 2013, 2014, 2015, 2018, 2020, 2021 The Meme Factory, Inc.
#               http://www.karlpinc.com/

# This file is part of PGWUI_Core.
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.
#

# Karl O. Pinc <kop@karlpinc.com>

'''Python source code for pgwui_core module

More general description of the module.
'''

# There are main objects, and their subclasses, here:
#   LoadedForm
#   DBHandler  (generally referred to a an "upload handler", at present)
#   DBConnector (UploadEngine)
#
# See their documentation below.

# Write python 3 compatible code.
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division

from csv import reader as csv_reader
import collections.abc
import ast
import attr
import markupsafe
import hashlib

import io

from . import exceptions as core_ex

# We are not really using wtforms.  We use it to (barely)
# interact with the html and post request but really
# we define our own classes to handle working memory
# and interacting with the session.
from wtforms import (
    Form,
    BooleanField,
    StringField,
    RadioField,
    PasswordField,
    FileField)

import psycopg2
import psycopg2.extensions

from pgwui_core.constants import (
    CHECKED,
    UNCHECKED,
    CSV,
    TAB,
    CSV_VALUE,
    TAB_VALUE,
)


# Setup default values for forms.

@attr.s
class UserInitialPost():
    db = attr.ib(default='')
    user = attr.ib(default='')
    password = attr.ib(default='')

    def build(self, settings={}):
        self.db = settings['pgwui'].get('default_db', '')
        return self


@attr.s
class UploadFileInitialPost(UserInitialPost):
    upload_fmt = attr.ib(default=CSV)
    trim_upload = attr.ib(default=True)
    literal_col_headings = attr.ib(default=False)
    datafile = attr.ib(default='')


@attr.s
class UploadNullFileInitialPost(UploadFileInitialPost):
    upload_null = attr.ib(default=True)
    null_rep = attr.ib(default='')


@attr.s
class UploadTableInitialPostMixin():
    table = attr.ib(default='')


@attr.s
class UploadTableInitialPost(UploadNullFileInitialPost,
                             UploadTableInitialPostMixin):
    pass


# The wtforms that suck data out of the html.

class UserWTForm(Form):
    '''The wtform used to connect to the db to authenticate .'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    user = StringField('User:')
    password = PasswordField('Password:')


class AuthWTForm(UserWTForm):
    '''The wtform used to connect to any db and authenticate.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    db = StringField('Database:')


class UploadFileWTForm(AuthWTForm):
    '''The wtform used for uploading files.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    upload_fmt = RadioField('Upload Format:',
                            choices=[('Upload CSV Data:', CSV),
                                     ('Upload tab delimited Data:', TAB)])
    datafile = FileField('File with CSV or Tab delimited Data:')
    trim_upload = BooleanField('Trim Leading/Trailing Spaces:')
    literal_col_headings = BooleanField('Literal Uploaded Column Headings:')


class UploadNullFileWTForm(UploadFileWTForm):
    '''The wtform used for uploading files that may contain NULL.'''
    # We don't actually use the labels, wanting the template to
    # look (and render) like html, but I'll define them anyway
    # just to keep my hand in.
    upload_null = BooleanField('Upload NULL Values:')
    null_rep = StringField('NULL Representation:')


class UploadTableWTForm(UploadNullFileWTForm):
    '''The wtform used for uploading arbitrary data into tables.'''
    table = StringField('Table or View:')


@attr.s
class LoadedForm(collections.abc.MutableMapping):
    '''
    Abstract class representing an upload form.

    Responsible for getting information into and out of
    html forms.

    The user API is that it acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

      _store  Where the real dict is kept
      _form   Instantaiated html form object (WTForms)
      _fc     Class handling html form
    '''
    fc_default = attr.ib(default=None)
    ip_default = attr.ib(default=None)
    uh = attr.ib(default=None)
    _store = attr.ib(factory=dict)
    _fc = attr.ib(default=None)
    _form = attr.ib(default=None)
    ivals = attr.ib(default=None)

    def build(self, uh, fc=None, ip=None, data={}, **kwargs):
        '''Form initialization
        ip is the instantiated initial post
        '''
        self.uh = uh
        if data == {}:
            store = dict(kwargs)
        else:
            store = dict(data)
            store.update(kwargs)
        self._store = store
        self._fc = (self.fc_default if fc is None else fc)
        ip_used = (self.ip_default if ip is None else ip)
        self.ivals = ip_used.build(self.uh.request.registry.settings)
        return self

    def __iter__(self):
        for item in self._store:
            yield item

    def __len__(self):
        return len(self._store)

    def __getitem__(self, key):
        return self._store[key]

    def __setitem__(self, key, value):
        self._store[key] = value

    def __delitem__(self, key):
        del self._store[key]

    def read(self):
        '''
        In the children this loads form from pyramid self.uh.request
        object and self._form and the session.

        In this case we instantiate _form and give it some defaults
        '''
        post = self.uh.request.POST
        if post:
            self._form = self._fc(formdata=post)
        else:
            self._form = self._fc(obj=self.ivals)

    def write(self, response, errors):
        '''
        Produces the dict pyramid will use to render the form.

        Input:
          response    Dict of results from connection execution
          errors      List of errors from connection execution
        '''
        response['errors'] = errors
        return response


@attr.s
class CredsLoadedForm(LoadedForm):
    '''
    Acts like a dict, but with extra methods.
    Manages credentials (but not db) needed to authenticate.

    Attributes:
      uh      The UploadHandler instance using the form
      user    The username used to login
      _form   Instantaiated html form object (WXForms)

    Methods:
      read()  Load form from pyramid request object.
    '''
    fc_default = attr.ib(default=UserWTForm)
    ip_default = attr.ib(factory=UserInitialPost)
    user = attr.ib(default=None)
    password = attr.ib(default=None)
    action = attr.ib(default=None)

    def session_put(self, key, value):
        '''
        Put data into the session.

        Input:
          key    The key
          value  The value

        Returns:

        Side effects:
          Modifies session

        May be overridden by a subclass to keep data out
        of the session.
        '''
        self.uh.session[key] = value

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super().read()

        # Read our form data

        # Keep password and user in the session.  All the other
        # form varaibles must be re-posted.
        post = self.uh.request.POST
        session = self.uh.request.session

        # Defaults are now in place in self._form for password
        # and user.  Ignore these since we want to know whether
        # to go to the session for data values.
        if 'password' in post:
            self['password'] = post['password']
            self.session_put('password', self['password'])
        elif 'password' in session:
            self['password'] = session['password']

        if 'user' in post:
            self['user'] = post['user']
            self.session_put('user', self['user'])
        elif 'user' in session:
            self['user'] = session['user']
        else:
            self['user'] = ''

        # Other, hidden, POST variables
        if 'action' in post:
            self['action'] = post['action']
        else:
            self['action'] = ''

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super().write(result, errors)
        havecreds = self.uh.session.get('havecreds', False)
        response.update({'havecreds': havecreds})
        if havecreds:
            response['user'] = self['user']
        else:
            # We don't know if the credentials are good or
            # we know they are bad.  Keep them out of the session.
            response['user'] = ''
            response['password'] = ''
            self.session_put('user', '')
            self.session_put('password', '')
        return response


@attr.s
class AuthLoadedForm(CredsLoadedForm):
    '''
    Acts like a dict, but with extra methods.
    Manages form data needed to authenticate, including db to authenticate
    in.

    Attributes:
      uh      The UploadHandler instance using the form
      user    The Usernamed used to login
      db      The db to login to
      _form   Instantaiated html form object (WXForms)

    '''
    db = attr.ib(default=None)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super().read()

        # Keep form variables handy
        self['db'] = self._form.db.data

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super().write(result, errors)
        response['db'] = self['db']
        return response


@attr.s
class UploadFileForm(AuthLoadedForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    fc_default = attr.ib(default=UploadFileWTForm)
    ip_default = attr.ib(factory=UploadFileInitialPost)
    upload_fmt = attr.ib(default=None)
    trim_upload = attr.ib(default=None)
    literal_col_headings = attr.ib(default=None)
    filename = attr.ib(default=None)
    localfh = attr.ib(default=None)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read parent's data
        super().read()

        # Read our own data
        self['upload_fmt'] = self._form.upload_fmt.data
        self['trim_upload'] = self._form.trim_upload.data
        self['literal_col_headings'] = self._form.literal_col_headings.data

        # Other POST variables involving a file
        self['filename'] = ''
        self['localfh'] = ''
        if self['action']:
            if self._form.datafile.data != '':
                post = self.uh.request.POST
                if hasattr(post['datafile'], 'filename'):
                    self['filename'] = post['datafile'].filename
                if hasattr(post['datafile'], 'file'):
                    self['localfh'] = post['datafile'].file

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        if self['upload_fmt'] == CSV:
            csv_checked = CHECKED
            tab_checked = UNCHECKED
        else:
            tab_checked = CHECKED
            csv_checked = UNCHECKED

        if self['trim_upload']:
            trim_upload_checked = CHECKED
        else:
            trim_upload_checked = UNCHECKED

        if self['literal_col_headings']:
            literal_col_headings_checked = CHECKED
        else:
            literal_col_headings_checked = UNCHECKED

        response = super().write(result, errors)
        response['filename'] = self['filename']
        response['trim_upload'] = trim_upload_checked
        response['csv_value'] = CSV_VALUE
        response['tab_value'] = TAB_VALUE
        response['csv_checked'] = csv_checked
        response['tab_checked'] = tab_checked
        response['literal_col_headings'] = literal_col_headings_checked
        return response


class UploadFormBaseMixin():
    '''
    Mixins add to attributes to self, and to response.
    '''
    def write_response(self, response):
        return response


@attr.s
class UploadDoubleFileFormMixin(UploadFormBaseMixin):
    '''
    Adds a last_key attribute to self, from POST

    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    last_key = attr.ib(default=None)

    def read(self):
        '''
        Read form data from the client
        '''
        super().read()

        post = self.uh.request.POST
        if 'last_key' in post:
            self['last_key'] = post['last_key']
        else:
            self['last_key'] = ''

    def write_response(self, response):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response['last_key'] = self['last_key']
        return super().write_response(response)


@attr.s
class UploadDoubleFileForm(UploadDoubleFileFormMixin, UploadFileForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def read(self):
        '''
        Read form data from the client
        '''
        # Read all parents' data
        super().read()

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super().write(result, errors)
        return super().write_response(response)


@attr.s
class UploadNullMixin(UploadFormBaseMixin):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def read(self):
        '''
        Read form data from the client
        '''
        super().read()
        self['upload_null'] = self._form.upload_null.data
        self['null_rep'] = self._form.null_rep.data

    def write_response(self, response):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        if self['upload_null']:
            upload_null_checked = CHECKED
        else:
            upload_null_checked = UNCHECKED

        response['upload_null'] = upload_null_checked
        response['null_rep'] = self['null_rep']
        return super().write_response(response)


@attr.s
class UploadTableForm(UploadNullMixin, UploadFileForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    fc_default = attr.ib(default=UploadTableWTForm)
    ip_default = attr.ib(factory=UploadTableInitialPost)

    def read(self):
        '''
        Read form data from the client
        '''

        # Read all parents' data
        super().read()
        # Read our own data
        self['table'] = self._form.table.data

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super().write(result, errors)
        response['table'] = self['table']
        return super().write_response(response)


@attr.s
class UploadDoubleTableForm(UploadDoubleFileFormMixin, UploadTableForm):
    '''
    Acts like a dict, but with extra methods.

    Attributes:
      uh      The UploadHandler instance using the form

    Methods:
      read()  Load form from pyramid request object.
    '''
    def read(self):
        '''
        Read form data from the client
        '''
        # Read all parents' data
        super().read()

    def write(self, result, errors):
        '''
        Produces the dict pyramid will use to render the form.
        '''
        response = super().write(result, errors)
        return super().write_response(response)


# Utility functions

def textualize(st):
    '''
    Return pg representation of NULL for None when string st is None.
    '''
    return 'NULL' if st is None else st


def is_checked(val):
    '''Is the value something a html input entity recognizes as checked?'''
    return val == CHECKED


# Some functions for logging

def escape_eol(string):
    '''Change all the newlines to \n.'''
    return string.replace('\n', r'\n')


def format_exception(ex):
    '''Return an exception formatted as suffix text for a log message.'''
    if isinstance(ex, psycopg2.DatabaseError):
        diag = ex.diag
        msg = diag.message_primary
        if hasattr(diag, 'message_detail'):
            msg += ', detail={0}'.format(escape_eol(diag.message_detail))
        if hasattr(diag, 'message_hint'):
            msg += ', hint={0}'.format(escape_eol(diag.message_hint))
    elif isinstance(ex, core_ex.UploadError):
        msg = ex.e
        if ex.descr != '':
            msg += ' {0}'.format(escape_eol(ex.descr))
        if ex.detail != '':
            msg += ' {0}'.format(escape_eol(ex.detail))
    else:
        msg = ''
    if msg != '':
        msg = ': Error is ({0})'.format(msg)
    return msg


# Upload processing

class SQLCommand(object):
    '''
    An SQL command that returns nothing

    Attributes:
      stmt  The statement, formatted for psycopg2 substitution
      args  Tuple of arguments used to substitute when executed.
    '''
    def __init__(self, stmt, args, ec=None):
        '''
        stmt   The statement, formatted for psycopg2 substitution
        args   Tuple of arguments used to substitute when executed.
        ec(ex) Produces the exception to raise an instance of on failure
                Input:
                  ex  The exception raised by psycopg2
        '''
        super(SQLCommand, self).__init__()
        self.stmt = stmt
        self.args = args
        self.ec = ec

    def execute(self, cur):
        '''
        Execute the sql statement.

        Input:
          cur  A psycopg2 cursor

        Side effects:
          Does something in the db.
          Can raise a psycopg2 error
        '''
        try:
            cur.execute(self.stmt, self.args)
        except psycopg2.DatabaseError as ex:
            if self.ec is None:
                raise ex
            else:
                raise self.ec(ex)


class LogSQLCommand(SQLCommand):
    '''An SQL command that logs success or failure.'''
    def __init__(self, stmt, args, ec=None,
                 log_success=None, log_failure=None):
        '''
        stmt  The statement, formatted for psycopg2 substitution
        args  Tuple of arguments used to substitute when executed.
        ec(ex) Produces the exception to raise an instance of on failure
                Input:
                  ex  The exception raised by psycopg2
        '''
        super(LogSQLCommand, self).__init__(stmt, args, ec)
        self.log_success = log_success
        self.log_failure = log_failure

    def execute(self, cur):
        '''
        Execute the sql statement.

        Input:
          cur  A psycopg2 cursor

        Side effects:
          Does something in the db.
          Can raise a psycopg2 error
        '''
        try:
            super(LogSQLCommand, self).execute(cur)
        except (core_ex.UploadError, psycopg2.DatabaseError) as ex:
            if self.log_failure:
                self.log_failure(ex)
            raise
        else:
            if self.log_success:
                self.log_success()


class UploadLine(object):
    '''
    Representation of a generic uploaded line

    Responsbile for getting data out of uploaded lines.

    Once out, the following attributes provide the user API:
      raw    The "raw" line as a string
      tuples A sequence, the line split into columns
    '''
    def __init__(self, line, stol, mapper):
        '''
        line        Uploaded line
        lineno      Line number
        stol(st)    Parse string st and convert into list
        mapper(st)  Maps st onto desired python representation
                    (Used for trimming whitespace)
        '''
        super(UploadLine, self).__init__()
        self.raw = line
        self.tuples = [mapper(st) for st in stol(line)]


def doublequote(st):
    '''
    Put string in double quotes escaping according to sql's
    requirements for an identifier.

    Note that this does not work for unicode identifiers.
    '''
    out = '"'
    for ch in st:
        if ch == '"':
            out += '""'
        else:
            out += ch
    return out + '"'


class UploadHeaders(UploadLine):
    '''Uploaded heading line

    Derived from UploadLine

    raw    The "raw" line as a string
    tuples As a sequence
    sql    As a sql command fragment string
    '''

    def __init__(self, line, stol, mapper):

        if mapper(line) == '':
            raise core_ex.NoHeadersError(
                'No column headings found on first line',
                'The first line is ({0})'.format(line))

        super(UploadHeaders, self).__init__(line, stol, mapper)
        self.sql = ', '.join(['"' + doublequote(st) + '"'
                              for st in self.tuples])


class UploadDataLine(UploadLine):
    '''Uploaded data line

    Derived from UploadLine

    raw    The "raw" line as a string
    tuples As a sequence
    lineno The line number
    '''
    def __init__(self, line, lineno, stol, mapper):
        super(UploadDataLine, self).__init__(line, stol, mapper)
        self.lineno = lineno


class DBData(object):
    '''Data that is to modify the db.

    It is iterable, consisting of thunks which make an UploadedLine instance.

    lineno     Number of lines processed (including header line)

    '''
    def __init__(self):
        self.lineno = 0

    def __iter__(self):
        for thunk in self._thunk():
            self.lineno += 1
            yield thunk

    def _thunk():
        '''
        Return a thunk which, when called, delivers the next
        object to go into the db.
        '''
        raise NotImplementedError()


class SQLData(DBData):
    '''
    SQL statements returning no data that execute in the db.

    Attributes:
      stmts  List of SQLCommand instances
    '''
    def __init__(self, stmts):
        '''
        stmts  List of SQLCommand instances
        '''
        super(SQLData, self).__init__()
        self.stmts = stmts

    def _thunk(self):
        for stmt in self.stmts:
            yield lambda: stmt


class UploadData(DBData):
    '''Uploaded data file

    It is iterable, consisting of thunks which make an UploadedLine instance.

    headers    UploadedHeaders instance
    cols       Number of columns
    lineno     Number of lines processed (including header line)

    '''
    def create_mapper(self, do_trim, null_data, null_rep):
        # Create mapper to frob data elements
        if null_data:
            null_rep = do_trim(null_rep)

            def mapper(st):
                st = do_trim(st)
                # psycopg2 maps None to NULL
                return None if st == null_rep else st
            self._mapper = mapper
        else:
            self._mapper = do_trim

    def create_parser(self, file_fmt, eol):
        # Create parser to read raw lines into a list
        if file_fmt == CSV:
            self._parser = lambda st: self._extend(st,
                                                   next(csv_reader((st,))))
        else:    # Tab delimited format
            neg_eollen = -len(eol)

            def tab_parser(line):
                if neg_eollen and line[neg_eollen:] == eol:
                    line = line[:neg_eollen]
                return self._extend(line, line.split('\t'))
            self._parser = tab_parser

    def open_fileo(self, fileo):
        '''Setup the buffered text stream for self.fileo
        '''
        # Read lines as text with line endings intact.
        # newline='' required by csv.reader()
        self._fileo = io.TextIOWrapper(fileo, newline='')

    def close_fileo(self):
        '''Close self.fileo
        '''
        self._fileo.close()

    def __init__(self, fileo, file_fmt, null_data, null_rep, trim=True):
        '''
        fileo       Uploaded file object
        file_fmt    File format: CSV or TAB
        null_data   (boolean) Uploaded data contains nulls
        null_rep    Uploaded string representation of null
        trim        (boolean) Trim leading and trailing whitespace?
        '''

        def get_headers():
            try:
                line = next(self._fileo)
            except StopIteration:
                raise core_ex.NoDataError('Uploaded file contains no data')
            except UnicodeError as ex:
                raise core_ex.CantDecodeError(
                    'Not a text file',
                    ("The file's content is not recognized as Unicode text, "
                     f'the error is: {ex}'))
            else:
                self.lineno += 1
                # Intuit the eol sequence
                if line[-2:] == '\r\n':
                    eol = '\r\n'
                    line = line[:-2]
                elif line[-1:] == '\n':
                    eol = '\n'
                    line = line[:-1]
                elif line[-1:] == '\r':
                    eol = '\r'
                    line = line[:-1]
                else:
                    # Give up
                    eol = ''

                if file_fmt == CSV:
                    def func(st):
                        return next(csv_reader((st,)))
                else:
                    def func(st):
                        return st.split('\t')
                return (eol, UploadHeaders(line, func, do_trim))

        super(UploadData, self).__init__()

        if trim:
            def do_trim(st):
                return st.strip()
        else:
            def do_trim(st):
                return st

        self.open_fileo(fileo)

        # Get column headings
        eol, self.headers = get_headers()
        self.cols = len(self.headers.tuples)

        self.create_parser(file_fmt, eol)
        self.create_mapper(do_trim, null_data, null_rep)

    def _thunk(self):
        '''
        Return a thunk which, when called, delivers the
        UploadDataLine of the next line of the uploaded file..
        '''
        for line in self._fileo:
            yield lambda: UploadDataLine(line,
                                         self.lineno,
                                         self._parser,
                                         self._mapper)
        self.close_fileo()

    def _extend(self, line, seq):
        '''Give the list as many elements as there are in the header.

        line  line as string
        seq   line as sequence

        If there's too many elements, raise an error.
        '''
        if len(seq) > self.cols:
            raise core_ex.TooManyColsError(self.lineno,
                                           'Line has too many columns',
                                           'More columns than column headings',
                                           data=line)
        return seq + ['' for i in range(len(seq) + 1, self.cols)]


@attr.s
class ParameterExecutor():
    '''Execute a parameterized pscopg2 statement
    Must be mixed in with a DataLineProcessor.
    '''
    def param_execute(self, insert_stmt, udl):
        try:
            self.cur.execute(insert_stmt, udl.tuples)
        except IndexError as exp:
            raise core_ex.TooFewColsError(
                udl.lineno,
                'Line has too few columns',
                'Fewer columns than column headings',
                f'The IndexError from psycopg2 is: ({exp})',
                data=udl.raw)
        except UnicodeEncodeError as exp:
            self.raise_encoding_error(exp, udl)

    def raise_encoding_error(self, exp, udl):
        errors = []
        cnt = 1
        enc = psycopg2.extensions.encodings[self.cur.connection.encoding]
        for col in udl.tuples:
            try:
                col.encode(encoding=enc)
            except UnicodeEncodeError as detailed_exp:
                errors.append(core_ex.EncodingError(
                    udl.lineno,
                    ("Data cannot be represented in the database's character"
                     " encoding"),
                    (f'The data ({col}) in column'
                     f' {cnt} contains an un-representable bit sequence;'
                     ' the reported error is:'),
                    str(detailed_exp),
                    data=udl.raw))
            cnt += 1
        if errors:
            raise core_ex.MultiDataLineError(errors)
        raise core_ex.EncodingError(
            udl.lineno,
            ("Data cannot be represented in the database's character"
             " encoding"),
            ('Cannot discover which column contains an un-representable'
             ' bit sequence, the reported error is:'),
            str(exp),
            data=udl.raw)


class DataLineProcessor(object):
    '''
    A processor supplied uploaded lines (UploadDataLine instances)
    by an UploadEngine.  The lines are expected to be put into a pg db.

    Abstract class not expected to be instantiated.

    Attributes:
      ue     UploadEngine instance
      uh     UploadHandler instance
      cur    psycopg2 cursor

    Methods:
      eat(udl)  Given an UploadDataLine instance put the line in the db.
    '''
    def __init__(self, ue, uh):
        '''
        ue        UploadEngine instance
        uh        UploadHandler instance
        '''
        super(DataLineProcessor, self).__init__()
        self.ue = ue
        self.uh = uh
        self.cur = ue.cur

    def eat(udl):
        '''
        Update the db with the content of an uploaded line

        udl  An UploadDataLine instance
        '''
        raise NotImplementedError


class NoOpProcessor(DataLineProcessor):
    '''A processor that discards it's input.'''
    def __init__(self, ue, uh):
        '''
        ue             UploadEngine instance
        uh             UploadHandler instance
        cur            psycopg2 cursor
        '''
        super(NoOpProcessor, self).__init__(ue, uh)

    def eat(self, udl):
        '''
        Executes an sql command in the db.

        udl  A DataLine instance.
        '''
        pass


class ExecuteSQL(DataLineProcessor):
    def __init__(self, ue, uh):
        '''
        ue             UploadEngine instance
        uh             UploadHandler instance
        cur            psycopg2 cursor
        '''
        super(ExecuteSQL, self).__init__(ue, uh)

    def eat(self, sqlc):
        '''
        Executes an sql command in the db.

        sqlc  An SQLCommand instance (a command and it's args)
        '''
        sqlc.execute(self.cur)


@attr.s
class DBHandler():
    '''
    Abstract class of a handler that modifies the db.

    The main logic goes here, including getting the data
    into and out of the db.

    Responsible for:
      Telling the form (LoadedForm) to get user data
      Retrieval of data file or generation of other data to go in the db
        (Generally involving an UploadData instance, and hence an UploadLine
        instance.)
      Validation of user supplied data; error raising:
        The html form data
        Whatever other data is uploaded or otherwise passed in
      Pre-processing to prepare for getting data into the db
        (Pre-processing is referred to as "setup" presently.)
      Getting the individual bits of data into and out of the db
        (by way of having a factory that produces  a DataLineProcessor)
      Post-processing to wrap up after the data is in the db
        (Post-processing is referred to as "cleanup" presently.)
      Rendering the html output

    The get_data, factory, and cleanup have two possible ways to report errors.
    They can raise an exception or raise the "special" exception
    MultiError.

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          Something (beyond what's on the form) that goes into db
    '''
    request = attr.ib()
    uf = attr.ib(default=None)
    data = attr.ib(default=None)

    def init(self):
        '''Initialize handler
        '''
        self.uf = self.make_form()
        return self

    def make_form(self):
        '''
        Return an instantiation of the upload form needed
        by the upload handler.
        '''
        raise NotImplementedError

    def get_data(self):
        '''
        Put something that will go into the db into the 'data' attribute.
        '''
        raise NotImplementedError

    def val_input(self):
        '''
        Validate input needed beyond that required to connect to the db.

        Returns a list of exceptions

        Note that this occurs after read() is called.

        This is expected to be replaced by it's subclass.
        '''
        return []

    def read(self):
        '''Read the html form into the upload form instance'''
        self.uf.read()

    def write(self, result, errors):
        '''
        Return dict needed to render an html form

        Input:
          errors  List of Error instances

        Returns:
        Dict pyramid will use to render the resulting form
        Reserved keys:
          errors   A list of core_ex.UploadError exceptions.
        '''
        return self.uf.write(result, errors)

    def cleanup(self):
        '''
        Called after all lines are processed to do any final
        updates to the db.

        May raise a single exception or may save multiple exceptions
        and raise MultiError.
        '''
        pass

    def render(self, errors, response):
        '''
        Return to pyramid what's needed to render the appropriate html
        page.

        Input:
          errors      List of Error instances
          response    Db connection result dict

        Returns:
          Dict pyramid will use to render the resulting form
        '''
        response = self.write(response, errors)
        return response


@attr.s
class SessionDBHandler(DBHandler):
    '''
    A DBHandler that supports sessions.

    Attributes:
      uf            An UploadForm instance
      session       A pyramid session instance
    '''
    session = attr.ib(default=None)

    def init(self):
        '''
        Initialize session db handler
        '''
        self.session = self.request.session
        return super().init()

    def write(self, result, errors):
        '''
        Add csrf token into html form to protect sessions.

        Input:
          errors  List of Error instances

        Returns:
          Dict pyramid will use to render the resulting form
          Reserved keys:
            errors      A list of core_ex.UploadError exceptions.
            csrf_token  Token for detecting CSRF.
        '''
        response = super(SessionDBHandler, self).write(result, errors)
        response['csrf_token'] = self.session.new_csrf_token()
        return response


@attr.s
class UploadHandler(SessionDBHandler):
    '''
    Handler for uploading a file.

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          (optional) A DBData instance
    '''
    def factory(self, ue):
        '''
        Takes an UploadEngine instance
        Returns a DataLineProcessor instance
        May raise a single exception or MultiError.
        '''
        raise NotImplementedError

    def val_input(self):
        '''
        Validate input needed beyond that required to connect to the db.

        Returns:
          A list of Error instances
        '''
        uf = self.uf
        errors = super(UploadHandler, self).val_input()

        if uf['filename'] == '':
            errors.append(core_ex.NoFileError('No file supplied'))

        return errors

    def double_validator(self, errors):
        '''Utility function that can optionally be called by
        a val_input() function.  It checks that the same file
        has not been uploaded twice in succession to the same
        db.

        `errors`
        List of errors.  Appended to.
        '''
        uf = self.uf
        if self.make_double_key() == uf['last_key']:
            errors.append(core_ex.DuplicateUploadError(
                'File just uploaded to this db',
                ('File named ({0}) just uploaded'
                 .format(markupsafe.escape(uf['filename']))),
                '<p>Try again to force upload.</p>'))

    def hash_sequence(self, seq):
        '''Produce a hash of the elements of a sequence.'''
        hash = hashlib.md5()
        for item in seq:
            hash.update((hash.hexdigest() + item).encode('utf-8'))
        return hash.hexdigest()

    def make_double_key(self):
        '''Generate unique value which identifies a double upload.'''
        uf = self.uf
        return self.hash_sequence((uf['db'], uf['filename']))

    def write_double_key(self, response):
        '''Utility function.  Optionally called from within write()
        to save a key which is later tested for to determine if
        the same file is being uploaded twice in a row.

        This function keys by filename and db.

        The key is stored in the form and submitted in POST.
        This way if the session times out due to inactivity
        the double upload is still detected.

        Since the data is delivered to the client and the
        client will likely see it anyway we use a hash not
        for security but just to provide a convenient single
        value that can be tested.

        Input:
          response   Dict used by pyramid to render html form.

        Side effects:
          Modifies response.  Adds 'last_key' entry used by form to store key.
        '''
        response['last_key'] = self.make_double_key()

    def write(self, result, errors):
        '''
        Add lines processed and number of errors into html form
        shown the user.

        Input:
          errors  List of Error instances

        Returns:
          Dict pyramid will use to render the resulting form
          Reserved keys:
            errors      A list of core_ex.UploadError exceptions.
            csrf_token  Token for detecting CSRF.
            e_cnt      Number of errors.
            db_changed  Boolean. Whether the db was changed.
        '''
        response = super().write(result, errors)
        if self.data is not None:
            response['lines'] = self.data.lineno
        response['e_cnt'] = len(errors)
        response['db_changed'] = (not response['errors']
                                  and self.uf['action'] != '')
        return response


@attr.s
class TabularFileUploadHandler(UploadHandler):
    '''
    Handler for uploading a file with rows and columns and column headings.

    Attributes:
      request       A pyramid request instance
      uf            An UploadForm instance
      data          An UploadData instance
    '''
    def cleanup(self):
        '''Finish after processing all lines.'''
        lines = self.ue.data.lineno
        if lines == 0:
            raise core_ex.DataLineError(
                1,
                'File contains no data',
                ('No lines found after '
                 'initial line of column headings'))


class DBConnector(object):
    '''Abstract class supporting db connection and execution of a
    processor function that alters the db content.

    Responsible for connecting to the db, transaction management,
    and running the DataLineProcessor instances produced by the
    DBConnector's (the UploadHandler's) factory.
    Collects all errors raised during the processing.
    Telling the uploadhandler to cleanup.  This is done after
      all upload processing and before commit or rollback
      (and before the connection is closed).

    Attributes:
        uh        An UploadHandler instance.
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''

    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(DBConnector, self).__init__()

        # Configuration and response management.
        self.uh = uh

    def call_alter_db(self, conn):
        '''
        Alter the contents of the db.

        Input:
          conn    A database connection

        Returns:
          A list of Error instances

        Side Effects:
          Alter's db
        '''

        raise NotImplementedError

    def no_connection_response(self):
        '''
        The result dict to use when call_with_connection is not called.
        Matches with the result dict of call_with_connection

        Returns:
          Dict pyramid will use to render the resulting form.
          Reserved keys:
            havecreds   Boolean. Supplied credentials can connect to db.
        '''
        return {'havecreds': self.uh.session.get('havecreds', False)}

    def nodberror_factory(self):
        return core_ex.NoDBError('No database name supplied')

    def nousererror_factory(self):
        return core_ex.NoUserError(
            'No user name supplied as login credentials')

    def authfailerror_factory(self):
        return core_ex.AuthFailError(
            'Unable to login',
            'Is the database, user, and password correct?')

    def dryrunerror_factory(self):
        return core_ex.DryRunError('Configured for "dry_run":'
                                   ' Transaction deliberately rolled back')

    def upload_data(self, data, errors):
        '''Put a DBData object into the db.

        Input:
           data    A DBData object.
           errors  A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        raise NotImplementedError

    def alter_db(self, conn):
        '''
        Alter the contents of the db.

        Input:
          conn    A database connection

        Returns:
          A list of Error instances

        Side Effects:
          Alter's db
        '''

        errors = []
        try:
            self.uh.get_data()
            self.data = self.uh.data
            self.cur = conn.cursor()
            # (Cannot call uh until after self is fully
            # initalized, including self.cur.)
            processor = self.uh.factory(self)
        except core_ex.MultiError as ex:
            errors.extend(ex.errors)
        except core_ex.PGWUIError as ex:
            errors.append(ex)
        except psycopg2.DatabaseError as ex:
            errors.append(core_ex.DBSetupError(ex))
        else:
            try:
                self.upload_data(processor, self.data, errors)
                # Let upload handler finish
                try:
                    self.uh.cleanup()
                except core_ex.MultiError as ex:
                    errors.extend(ex.errors)
                except core_ex.UploadError as ex:
                    errors.append(ex)
            finally:
                self.cur.close()
        return errors

    def call_with_connection(self, func):
        '''
        Validate input, connect to the db, and do something with
        the connection.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of core_ex.Error instances

        Returns:
          (errors, response)
          errors       List of core_ex.SetupError instances
          response     Dict pyramid will use to render the resulting form.
                       The dict returned by func(conn) plus reserved keys.
                       Reserved keys:
                         havecreds   Boolean. Supplied credentials can
                                     connect to db.
        Side effects:
          Raises errors, calls func(conn)
        '''
        errors = []
        havecreds = False
        response = {}

        if self.db == '':
            errors.append(self.nodberror_factory())

        if self.user == '':
            errors.append(self.nousererror_factory())

        newes = self.uh.val_input()
        if newes:
            if not errors:
                havecreds = True
            errors.extend(newes)

        if not errors:
            registry = self.uh.request.registry
            try:
                conn = psycopg2.connect(
                    database=self.db,
                    user=self.user,
                    password=self.password,
                    host=registry.settings['pgwui'].get('pg_host'),
                    port=registry.settings['pgwui'].get('pg_port'))
            except psycopg2.OperationalError:
                errors = [self.authfailerror_factory()]
                havecreds = False
            else:
                havecreds = True
                errors = self.call_alter_db(func, conn)

        self.uh.session.update({'havecreds': havecreds})
        return (errors, response)

    def read_uh(self):
        '''Read data into the upload handler.'''
        self.uh.read()

    def read_db(self):
        '''Read the db from the upload handler.'''
        self.db = self.uh.uf['db']

    def read_user(self):
        '''Read the username from the upload handler.'''
        self.user = self.uh.uf['user']

    def read_password(self):
        '''Read the password from the upload handler.'''
        if 'password' in self.uh.uf:
            self.password = self.uh.uf['password']
        else:
            self.password = ''

    def read_action(self):
        '''Read the action from the upload handler.'''
        self.action = self.uh.uf['action']

    def read(self):
        '''Allows for custom engines to map what the handler
        has to what they need.
        '''
        self.read_uh()
        self.read_db()
        self.read_user()
        self.read_password()
        self.read_action()

    def run(self):
        '''Call the alter_db function with the connection to alter the db,
        render form and return Response

        Input:

        Returns:
          (errors, response)
            errors   List of core_ex.PGWUIError instantiations
            response   Dict containing connection result info

        Side effects:
          Maintains session
          (Whatever the processor() call does)
          Outputs html
        '''
        self.read()

        if self.action == 'u':
            errors, response = self.call_with_connection(self.alter_db)
        else:
            errors = []
            response = self.no_connection_response()

        return self.uh.render(errors, response)


class NoTransactionEngine(DBConnector):
    '''An engine that does not run things in a transaction.

    It also does not do CSRF checking.

    Attributes:
        uh        An UploadHandler instance.
        data      An UploadData instance of the uploaded data
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''
    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(NoTransactionEngine, self).__init__(uh)

    def call_alter_db(self, func, conn):
        '''
        Call a database modification function with a db connection,
        turning off the automatic wrapping of the activity in
        a transaction.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of core_ex.PGWUIError
                    instances

        Returns:
          errors       List of core_ex.PGWUIError instances
        Side effects:
          Calls func(conn)
        '''
        conn.set_session(autocommit=True)
        errors = func(conn)
        conn.close()
        return errors

    def upload_data(self, processor, data, errors):
        '''Put a DBData object into the db, without a wrapping transaction.

        Input:
           processor A DataLineProcessor object.
           data      A DBData object.
           errors    A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        for thunk in data:
            try:
                udl = thunk()
            except core_ex.DataLineError as ex:
                errors.append(ex)
            else:
                try:
                    processor.eat(udl)
                except psycopg2.DatabaseError as ex:
                    errors.append(core_ex.DBDataLineError(udl, ex))
                except (core_ex.DataLineError, core_ex.DBError) as ex:
                    errors.append(ex)
                except core_ex.MultiDataLineError as ex:
                    errors.extend(ex.errors)


class UnsafeUploadEngine(DBConnector):
    '''Abstract class supporting repeatedly running a processer to put
       uploaded data into db.

    Attributes:
        uh        An UploadHandler instance.
        data      An UploadData instance of the uploaded data
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
        eat_old_line(udl, thunk)
                  Trap errors raised by the db while running thunk.
                  Report any errors as due to the udl UploadDataLine
                  instance.
    '''
    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(UnsafeUploadEngine, self).__init__(uh)

    def call_alter_db(self, func, conn):
        '''
        Call a database modification function with a connection.

        func(conn)  Call this function with the connection.
                    func(conn) must return a list of core_ex.PGWUIError
                    instances

        Returns:
          errors       List of core_ex.PGWUIError instances
        Side effects:
          Calls func(conn)
        '''
        errors = func(conn)

        if errors:
            conn.rollback()
        else:
            if ast.literal_eval(
                    self.uh.request.registry.settings['pgwui']['dry_run']):
                conn.rollback()
                errors.append(self.dryrunerror_factory())
            else:
                try:
                    conn.commit()
                except psycopg2.DatabaseError as ex:
                    errors.append(core_ex.DBCommitError(ex))
        conn.close()
        return errors

    def eat_old_line(self, udl, thunk):
        '''
        Run thunk and report any errors raised by the db as due to
        the UploadDataLine instance udl.

        Returns:
          The result of running thunk.
        '''
        try:
            result = thunk()
        except psycopg2.DatabaseError as ex:
            raise core_ex.DBDataLineError(udl, ex)
        else:
            return result

    def upload_data(self, processor, data, errors):
        '''Put a DBData object into the db.

        Input:
           processor A DataLineProcessor object.
           data      A DBData object.
           errors    A list of errors

        Side Effects:
          Alters db content
          Add errors to `errors`.
        '''
        for thunk in data:
            try:
                udl = thunk()
            except core_ex.DataLineError as ex:
                errors.append(ex)
            else:
                self.cur.execute(
                    'SAVEPOINT line_savepoint;')
                try:
                    processor.eat(udl)
                except psycopg2.DatabaseError as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.append(core_ex.DBDataLineError(udl, ex))
                except (core_ex.DataLineError, core_ex.DBError) as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.append(ex)
                except core_ex.MultiDataLineError as ex:
                    self.cur.execute(
                        'ROLLBACK TO line_savepoint;')
                    errors.extend(ex.errors)
                else:
                    self.cur.execute(
                        'RELEASE line_savepoint;')


class UploadEngine(UnsafeUploadEngine):
    '''Abstract class supporting repeatedly running a processor
    to alter db content.  Adds cross-site request forgery checking
    to the UnsafeUploadEngine class.

    Attributes:
        uh        An UploadHandler instance.
        cur       A psycopg2 cursor instance
        db        Name of db to connect to
        user      User to connect to db
        password  Password to connect to db
        action    Upload form's hidden action variable
                  '' means not submitted, anything else means take action
        csrf_token The client's csrf_token

    Methods:
        run()     Get a DataLineProcessor instance from the upload handler's
                  factory and feed it by iterating over data.
    '''

    def __init__(self, uh):
        '''
        uh         An UploadHandler instance
        '''
        super(UploadEngine, self).__init__(uh)

    def csrferror_factory(self):
        return core_ex.CSRFError(
            'Your request failed and you are now logged out',
            ('This is a security measure. '
             'Some possible causes are:'),
            ('<ul>'
             '<li>Cookies are off in the browser</li> '
             '<li>Reload of an expired page</li> '
             '  <ul>'
             '  <li>Use of the Back button to (re)submit'
             '      an old page</li>'
             '  <li>An attacker has crafted a page which resubmits'
             '      one of your old page submissions</li>'
             '  </ul> '
             '<li>Server restart</li> '
             '<li>Login session expired due to inactivity</li>'
             '</ul>'))

    def read_csrf_token(self):
        '''Save the csrf token.'''
        post = self.uh.request.POST
        if 'csrf_token' in post:
            self.csrf_token = post['csrf_token']
        else:
            self.csrf_token = None

    def read(self):
        '''Add a csrf_token.'''
        super(UploadEngine, self).read()
        self.read_csrf_token()

    def call_with_connection(self, func):
        '''
        Connect to the db.

        func(conn)  Call this function with the connection.
                    f(conn) must return a (errors, dict) tuple result,
                        errors   list of core_ex.PWGUIError instances
                        dict     other results

        Returns:
          (errors, response)
          errors     List of core_ex.PGWUIError instances
          response     Dict pyramid will use to render the resulting form.
                       The dict returned by func(conn) plus reserved keys.
                       Reserved keys:
                         havecreds   Boolean. Supplied credentials can
                                     connect to db.
                         session_expired  Boolean.  Session expired.
        Side effects:
          Raises errors, calls func(conn)
        '''
        session = self.uh.session

        token = session.get_csrf_token()
        if token != self.csrf_token:
            session.invalidate()
            response = self.no_connection_response()
            response['session_expired'] = True
            return([self.csrferror_factory()],
                   response)
        else:
            (errors, response) = (super(UploadEngine, self)
                                  .call_with_connection(func))
            response['session_expired'] = False
            return (errors, response)
