// Copyright (C) 2015,2017 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef VERSIONED_CSV_FILE_H
#define VERSIONED_CSV_FILE_H

#include <util/csv_file.h>

namespace isc {
namespace util {

/// @brief Exception thrown when an error occurs during CSV file processing.
class VersionedCSVFileError : public Exception {
public:
    VersionedCSVFileError(const char* file, size_t line, const char* what) :
        isc::Exception(file, line, what) { };
};

/// @brief Contains the metadata for a single column in a file.
class VersionedColumn {
public:
    /// @brief Constructor
    ///
    /// @param name Name of the column.
    /// @param version Text representation of the schema version in which
    /// this column first appeared.
    /// @param default_value The value the column should be assigned if it
    /// is not present in a data row. It defaults to an empty string, ""
    VersionedColumn(const std::string& name, const std::string& version,
               const std::string& default_value = "")
        : name_(name), version_(version), default_value_(default_value) {
    };

    /// @brief Destructor
    virtual ~VersionedColumn(){};

    /// @brief Name of the column.
    std::string name_;

    /// @brief Text representation of the schema version in which
    /// this column first appeared.
    std::string version_;

    /// @brief default_value The value the column should be assigned if it
    /// is not present in a data row.
    std::string default_value_;
};

/// @brief Defines a smart pointer to VersionedColumn
typedef boost::shared_ptr<VersionedColumn> VersionedColumnPtr;

/// @brief Implements a CSV file that supports multiple versions of
/// the file's "schema".  This allows files with older schemas to be
/// upgraded to newer schemas as they are being read.  The file's schema
/// is defined through a list of column descriptors, or @ref
/// isc::util::VersionedColumn(s). Each descriptor contains metadata describing
/// the column, consisting of the column's name, the version label in which
/// the column was added to the schema, and a default value to be used if the
/// column is missing from the file.  Note that the column descriptors are
/// defined in the order they occur in the file, when reading a row from left
/// to right.  This also assumes that when new version of the schema evolves,
/// all new columns are added at the end of the row.  In other words, the
/// order of the columns reflects not only the order in which they occur
/// in a row but also the order they were added to the schema.  Conceptually,
/// the entire list of columns defined constitutes the current schema.  Earlier
/// schema versions are therefore subsets of this list.   Creating the schema
/// is done by calling VersionedCSVfile::addColumn() for each column.  Note
/// that the schema must be defined prior to opening the file.
///
/// The first row of the file is always the header row and is a comma-separated
/// list of the names of the column in the file.  This row is used when
/// opening the file via @ref VersionedCSVFile::open(), to identify its schema
/// version so that it may be be read correctly.  This is done by comparing
/// the column found in the header to the columns defined in the schema. The
/// columns must match both by name and the order in which they occur.
///
/// -# If there are fewer columns in the header than in the schema, the file
/// is presumed to be an earlier schema version and will be upgraded as it is
/// read.  There is an ability to mark a specific column as being the minimum
/// column which must be present, see @ref VersionedCSVFile::setMinimumValidColumns().
/// If the header columns do not match up to this
/// minimum column, the file is presumed to be too old to upgrade and the
/// open will fail.  A valid, upgradable file will have an input schema
/// state of VersionedCSVFile::NEEDS_UPGRADE.
///
/// -# If there is a mismatch between a found column name and the column name
/// defined for that position in the row, the file is presumed to be invalid
/// and the open will fail.
///
/// -# If the content of the header matches exactly the columns defined in
/// the schema, the file is considered to match the schema exactly and the
/// input schema state will VersionedCSVFile::CURRENT.
///
/// -# If there columns in the header beyond all of the columns defined in
/// the schema (i.e the schema is a subset of the header), then the file
/// is presumed to be from a newer version of Kea and can be downgraded. The
/// input schema state fo the file will be set to
/// VersionedCSVFile::NEEDS_DOWNGRADE.
///
/// After successfully opening a file,  rows are read one at a time via
/// @ref VersionedCSVFile::next() and handled according to the input schema
/// state.   Each data row is expected to have at least the same number of
/// columns as were found in the header. Any row which as fewer values is
/// discarded as invalid.  Similarly, any row which is found to have more
/// values than were found in the header is discarded as invalid.
///
/// When upgrading a row, the values for each missing column is filled in
/// with the default value specified by that column's descriptor.  When
/// downgrading a row, extraneous values are dropped from the row.
///
/// It is important to note that upgrading or downgrading a file does NOT
/// alter the physical file itself.  Rather the conversion occurs after the
/// raw data has been read but before it is passed to caller.
///
/// Also note that there is currently no support for writing out a file in
/// anything other than the current schema.
class VersionedCSVFile : public CSVFile {
public:

    /// @brief Possible input file schema states.
    /// Used to categorize the input file's schema, relative to the defined
    /// schema.
    enum InputSchemaState {
        CURRENT,
        NEEDS_UPGRADE,
        NEEDS_DOWNGRADE
    };

    /// @brief Constructor.
    ///
    /// @param filename CSV file name.
    VersionedCSVFile(const std::string& filename);

    /// @brief Destructor
    virtual ~VersionedCSVFile();

    /// @brief Adds metadata for a single column to the schema.
    ///
    /// This method appends a new column description to the file's schema.
    /// Note this does not cause anything to be written to the physical file.
    /// The name of the column will be placed in the CSV header when new file
    /// is created by calling @c recreate or @c open function.
    ///
    /// @param col_name Name of the column.
    /// @param version  Text representation of the schema version in which
    /// this column first appeared.
    /// @param default_value value the missing column should be given during
    /// an upgrade.  It defaults to an empty string, ""
    ///
    /// @throw CSVFileError if a column with the specified name exists.
    void addColumn(const std::string& col_name, const std::string& version,
                   const std::string& default_value = "");

    /// @brief Sets the minimum number of valid columns based on a given column
    ///
    /// @param column_name Name of the column which positionally represents
    /// the minimum columns which must be present in a file and to be
    /// considered valid.
    void setMinimumValidColumns(const std::string& column_name);

    /// @brief Returns the minimum number of columns which must be present
    /// for the file to be considered valid.
    size_t getMinimumValidColumns() const;

    /// @brief Returns the number of columns found in the input header
    size_t getInputHeaderCount() const;

    /// @brief Returns the number of valid columns found in the header
    /// For newly created files this will always match the number of defined
    /// columns (i.e. getColumnCount()).  For existing files, this will be
    /// the number of columns in the header that match the defined columns.
    /// When this number is less than getColumnCount() it means the input file
    /// is from an earlier schema.  This value is zero until the file has
    /// been opened.
    size_t getValidColumnCount() const;

    /// @brief Opens existing file or creates a new one.
    ///
    /// This function will try to open existing file if this file has size
    /// greater than 0. If the file doesn't exist or has size of 0, the
    /// file is recreated. If the existing file has been opened, the header
    /// is parsed and and validated against the schema.
    /// By default, the data pointer in the file is set to the beginning of
    /// the first data row. In order to retrieve the row contents the @c next
    /// function should be called. If a @c seek_to_end parameter is set to
    /// true, the file will be opened and the internal pointer will be set
    /// to the end of file.
    ///
    /// @param seek_to_end A boolean value which indicates if the input and
    /// output file pointer should be set at the end of file.
    ///
    /// @throw VersionedCSVFileError if schema has not been defined,
    /// CSVFileError when IO operation fails, or header fails to validate.
    virtual void open(const bool seek_to_end = false);

    /// @brief Creates a new CSV file.
    ///
    /// The file creation will fail if there are no columns specified.
    /// Otherwise, this function will write the header to the file.
    /// In order to write rows to opened file, the @c append function
    /// should be called.
    ///
    /// @throw VersionedCSVFileError if schema has not been defined
    /// CSVFileError if an IO operation fails
    virtual void recreate();

    /// @brief Reads next row from the file file.
    ///
    /// This function will return the @c CSVRow object representing a
    /// parsed row if parsing is successful. If the end of file has been
    /// reached, the empty row is returned (a row containing no values).
    ///
    /// 1. If the row has fewer values than were found in the header it is
    /// discarded as invalid.
    ///
    /// 2. If the row is found to have more values than are defined in the
    /// schema it is discarded as invalid
    ///
    /// When a valid row has fewer than the defined number of columns, the
    /// values for each missing column is filled in with the default value
    /// specified by that column's descriptor.
    ///
    /// @param [out] row Object receiving the parsed CSV file.
    ///
    /// @return true if row has been read and validated; false if validation
    /// failed.
    bool next(CSVRow& row);

    /// @brief Returns the schema version of the physical file
    ///
    /// @return text version of the schema found or string "undefined" if the
    /// file has not been opened
    std::string getInputSchemaVersion() const;

    /// @brief text version of current schema supported by the file's metadata
    ///
    /// @return text version info assigned to the last column in the list of
    /// defined column, or the string "undefined" if no columns have been
    /// defined.
    std::string getSchemaVersion() const;

    /// @brief Fetch the column descriptor for a given index
    ///
    /// @param index index within the list of columns of the desired column
    /// @return a pointer to the VersionedColumn at the given index
    /// @throw OutOfRange exception if the index is invalid
    const VersionedColumnPtr& getVersionedColumn(const size_t index) const;

    /// @brief Fetches the state of the input file's schema
    ///
    /// Reflects that state of the input file's schema relative to the
    /// defined schema as a enum, InputSchemaState.
    ///
    /// @return VersionedCSVFile::CURRENT if the input file schema matches
    /// the defined schema, NEEDS_UPGRADE if the input file schema is older,
    /// and NEEDS_DOWNGRADE if it is newer
    enum InputSchemaState getInputSchemaState() const;

    /// @brief Returns true if the input file schema state is not CURRENT
    bool needsConversion() const;

protected:

    /// @brief Validates the header of a VersionedCSVFile
    ///
    /// This function is called internally when the reading in an existing
    /// file.  It parses the header row of the file, comparing each value
    /// in succession against the defined list of columns.  If the header
    /// contains too few matching columns (i.e. less than @c
    /// minimum_valid_columns_) or too many (more than the number of defined
    /// columns), the file is presumed to be either too old, too new, or too
    /// corrupt to process.  Otherwise it retains the number of valid columns
    /// found and deems the header valid.
    ///
    /// @param header A row holding a header.
    /// @return true if header matches the columns; false otherwise.
    virtual bool validateHeader(const CSVRow& header);

    /// @brief Convenience method for adding an error message
    ///
    /// Constructs an error message indicating that the number of columns
    /// in a given row are wrong and why, then adds it readMsg.
    ///
    /// @param row The row in error
    /// @param reason An explanation as to why the row column count is wrong
    void columnCountError(const CSVRow& row, const std::string& reason);

private:
    /// @brief Holds the collection of column descriptors
    std::vector<VersionedColumnPtr> columns_;

    /// @brief Number of valid columns present in input file. If this is less
    /// than the number of columns defined, this implies the input file is
    /// from an earlier version of the code.
    size_t valid_column_count_;

    /// @brief Minimum number of valid columns an input file must contain.
    /// If an input file does not meet this number it cannot be upgraded.
    size_t minimum_valid_columns_;

    /// @brief The number of columns found in the input header row
    /// This value represent the number of columns present, in the header
    /// valid or otherwise.
    size_t input_header_count_;

    /// @brief The state of the input schema in relation to the current schema
    enum InputSchemaState input_schema_state_;
};


} // namespace isc::util
} // namespace isc

#endif // VERSIONED_CSV_FILE_H