// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef CSV_FILE_H #define CSV_FILE_H #include #include #include #include #include #include #include namespace isc { namespace util { /// @brief Exception thrown when an error occurs during CSV file processing. class CSVFileError : public Exception { public: CSVFileError(const char* file, size_t line, const char* what) : isc::Exception(file, line, what) { }; }; /// @brief Represents a single row of the CSV file. /// /// The object of this type can create the string holding a collection of the /// comma separated values, representing a row of the CSV file. It allows the /// selection of any character as a separator for the values. The default /// separator is the comma symbol. /// /// The @c CSVRow object can be constructed in two different ways. The first /// option is that the caller creates an object holding empty values /// and then adds values one by one. Note that it is possible to either add /// a string or a number. The number is converted to the appropriate text /// representation. When all the values are added, the text representation of /// the row can be obtained by calling @c CSVRow::render function or output /// stream operator. /// /// The @c CSVRow object can be also constructed by parsing a row of a CSV /// file. In this case, the separator has to be known in advance and passed to /// the class constructor. The constructor will call the @c CSVRow::parse /// function internally to tokenize the CSV row and create the collection of /// values. The class accessors can be then used to retrieve individual values. /// /// This class is meant to be used by the @c CSVFile class to manipulate /// individual rows of the CSV file. class CSVRow { public: /// @brief Constructor, creates the raw to be used for output. /// /// Creates CSV row with empty values. The values should be /// later set using the @c CSVRow::writeAt functions. When the /// @c CSVRow::render is called, the text representation of the /// row will be created using a separator character specified /// as an argument of this constructor. /// /// This constructor is exception-free. /// /// @param cols Number of values in the row. /// @param separator Character used as a separator between values in the /// text representation of the row. CSVRow(const size_t cols = 0, const char separator = ','); /// @brief Constructor, parses a single row of the CSV file. /// /// This constructor should be used to parse a single row of the CSV /// file. The separator being used for the particular row needs to /// be known in advance and specified as an argument of the constructor /// if other than the default separator is used in the row being parsed. /// An example string to be parsed by this function looks as follows: /// "foo,bar,foo-bar". /// /// This constructor is exception-free. /// /// @param text Text representation of the CSV row. /// @param separator Character being used as a separator in a parsed file. CSVRow(const std::string& text, const char separator = ','); /// @brief Returns number of values in a CSV row. size_t getValuesCount() const { return (values_.size()); } /// @brief Parse the CSV file row. /// /// This function parses a string containing CSV values and assigns them /// to the @c values_ private container. These values can be retrieved /// from the container by calling @c CSVRow::readAt function. /// /// This function is exception-free. /// /// @param line String holding a row of comma separated values. void parse(const std::string& line); /// @brief Retrieves a value from the internal container. /// /// @param at Index of the value in the container. The values are indexed /// from 0, where 0 corresponds to the left-most value in the CSV file row. /// /// @return Value at specified index in the text form. /// /// @throw CSVFileError if the index is out of range. The number of elements /// being held by the container can be obtained using /// @c CSVRow::getValuesCount. std::string readAt(const size_t at) const; /// @brief Retrieves a value from the internal container, free of escaped /// characters. /// /// Returns a copy of the internal container value at the given index /// which has had all escaped characters replaced with their unescaped /// values. Escaped characters embedded using the following format: /// /// This function fetches the value at the given index and passes it /// into CSVRow::unescapeCharacters which replaces any escaped special /// characters with their unescaped form. /// /// @param at Index of the value in the container. The values are indexed /// from 0, where 0 corresponds to the left-most value in the CSV file row. /// /// @return Value at specified index in the text form. /// /// @throw CSVFileError if the index is out of range. The number of elements /// being held by the container can be obtained using /// @c CSVRow::getValuesCount. std::string readAtEscaped(const size_t at) const; /// @brief Trims a given number of elements from the end of a row /// /// @param count number of elements to trim /// /// @throw CSVFileError if the number to trim is larger than /// then the number of elements void trim(const size_t count); /// @brief Retrieves a value from the internal container. /// /// This method is reads a value from the internal container and converts /// this value to the type specified as a template parameter. Internally /// it uses @c boost::lexical_cast. /// /// @param at Index of the value in the container. The values are indexed /// from 0, where 0 corresponds to the left-most value in the CSV file row. /// @tparam T type of the value to convert to. /// /// @return Converted value. /// /// @throw CSVFileError if the index is out of range or if the /// @c boost::bad_lexical_cast is thrown by the @c boost::lexical_cast. template T readAndConvertAt(const size_t at) const { T cast_value; try { cast_value = boost::lexical_cast(readAt(at).c_str()); } catch (const boost::bad_lexical_cast& ex) { isc_throw(CSVFileError, ex.what()); } return (cast_value); } /// @brief Creates a text representation of the CSV file row. /// /// This function iterates over all values currently held in the internal /// @c values_ container and appends them to a string. The values are /// separated using the separator character specified in the constructor. /// /// This function is exception free. /// /// @return Text representation of the CSV file row. std::string render() const; /// @brief Replaces the value at specified index. /// /// This function is used to set values to be rendered using /// @c CSVRow::render function. /// /// @param at Index of the value to be replaced. /// @param value Value to be written given as string. /// /// @throw CSVFileError if index is out of range. void writeAt(const size_t at, const char* value); /// @brief Replaces the value at specified index. /// /// This function is used to set values to be rendered using /// @c CSVRow::render function. /// /// @param at Index of the value to be replaced. /// @param value Value to be written given as string. /// /// @throw CSVFileError if index is out of range. void writeAt(const size_t at, const std::string& value) { writeAt(at, value.c_str()); } /// @brief Replaces the value at the specified index with a value that has /// had special characters escaped /// /// This function first calls @c CSVRow::escapeCharacters to replace /// special characters with their escaped form. It then sets the value /// to be rendered using @c CSVRow::render function. /// /// @param at Index of the value to be replaced. /// @param value Value to be written given as string. /// /// @throw CSVFileError if index is out of range. void writeAtEscaped(const size_t at, const std::string& value); /// @brief Appends the value as a new column. /// /// @param value Value to be written. /// @tparam T Type of the value being written. template void append(const T value) { try { values_.push_back(boost::lexical_cast(value)); } catch (const boost::bad_lexical_cast& ex) { isc_throw(CSVFileError, "unable to stringify the value to be " "appended to the CSV file row."); } } /// @brief Replaces the value at specified index. /// /// This function is used to set values to be rendered using /// @c CSVRow::render function. /// /// @param at Index of the value to be replaced. /// @param value Value to be written - typically a number. /// @tparam T Type of the value being written. /// /// @throw CSVFileError if index is out of range. template void writeAt(const size_t at, const T value) { checkIndex(at); try { values_[at] = boost::lexical_cast(value); } catch (const boost::bad_lexical_cast& ex) { isc_throw(CSVFileError, "unable to stringify the value to be" " written in the CSV file row at position '" << at << "'"); } } /// @brief Equality operator. /// /// Two CSV rows are equal when their string representation is equal. This /// includes the order of fields, separator etc. /// /// @param other Object to compare to. bool operator==(const CSVRow& other) const { return (render() == other.render()); } /// @brief Unequality operator. /// /// Two CSV rows are unequal when their string representation is unequal. /// This includes the order of fields, separator etc. /// /// @param other Object to compare to. bool operator!=(const CSVRow& other) const { return (render() != other.render()); } /// @brief Returns a copy of a string with special characters escaped /// /// @param orig_str string which may contain characters that require /// escaping. /// @param characters list of characters which require escaping. /// /// The escaped characters will use the following format: /// /// @verbatim /// &#x{xx} /// @endverbatim /// /// where {xx} is the two digit hexadecimal ASCII value of the character /// escaped. A comma, for example is: /// /// &\#x2c /// /// @return A copy of the original string with special characters escaped. static std::string escapeCharacters(const std::string& orig_str, const std::string& characters); /// @brief Returns a copy of a string with special characters unescaped /// /// This function reverses the escaping of characters done by @c /// CSVRow::escapeCharacters. /// /// @param escaped_str string which may contain escaped characters. /// /// @return A string free of escaped characters static std::string unescapeCharacters(const std::string& escaped_str); private: /// @brief Check if the specified index of the value is in range. /// /// This function is used internally by other functions. /// /// @param at Value index. /// @throw CSVFileError if specified index is not in range. void checkIndex(const size_t at) const; /// @brief Separator character specified in the constructor. /// /// @note Separator is held as a string object (one character long), /// because the boost::is_any_of algorithm requires a string, not a /// char value. If we held the separator as a char, we would need to /// convert it to string on every call to @c CSVRow::parse. std::string separator_; /// @brief Internal container holding values that belong to the row. std::vector values_; /// @brief Prefix used to escape special characters. static const std::string escape_tag; }; /// @brief Overrides standard output stream operator for @c CSVRow object. /// /// The resulting string of characters is the same as the one returned by /// @c CSVRow::render function. /// /// @param os Output stream. /// @param row Object representing a CSV file row. std::ostream& operator<<(std::ostream& os, const CSVRow& row); /// @brief Provides input/output access to CSV files. /// /// This class provides basic methods to access (parse) and create CSV files. /// The file is identified by its name qualified with the absolute path. /// The name of the file is passed to the constructor. Constructor doesn't /// open/create a file, but simply records a file name specified by a caller. /// /// There are two functions that can be used to open a file: /// - @c open - opens an existing file; if the file doesn't exist it creates it, /// - @c recreate - removes existing file and creates a new one. /// /// When the file is opened its header file is parsed and column names are /// identified. At this point it is already possible to get the list of the /// column names using appropriate accessors. The data rows are not parsed /// at this time. The row parsing is triggered by calling @c next function. /// The result of parsing a row is stored in the @c CSVRow object passed as /// a parameter. /// /// When the new file is created (when @c recreate is called), the CSV header is /// immediately written into it. The header consists of the column names /// specified with the @c addColumn function. The subsequent rows are written /// into this file by calling @c append. class CSVFile { public: /// @brief Constructor. /// /// @param filename CSV file name. CSVFile(const std::string& filename); /// @brief Destructor virtual ~CSVFile(); /// @brief Adds new column name. /// /// This column adds a new column but doesn't write it to the file yet. /// The name of the column will be placed in the CSV header when new file /// is created by calling @c recreate or @c open function. /// /// @param col_name Name of the column. /// /// @throw CSVFileError if a column with the specified name exists. void addColumn(const std::string& col_name); /// @brief Writes the CSV row into the file. /// /// @param row Object representing a CSV file row. /// /// @throw CSVFileError When error occurred during IO operation or if the /// size of the row doesn't match the number of columns. void append(const CSVRow& row) const; /// @brief Closes the CSV file. void close(); /// @brief Checks if the CSV file exists and can be opened for reading. /// /// This method doesn't check if the existing file has a correct file /// format. /// /// @return true if file exists, false otherwise. bool exists() const; /// @brief Flushes a file. void flush() const; /// @brief Returns the number of columns in the file. size_t getColumnCount() const { return (cols_.size()); } /// @brief Returns the path to the CSV file. std::string getFilename() const { return (filename_); } /// @brief Returns the description of the last error returned by the /// @c CSVFile::next function. /// /// @return Description of the last error during row validation. std::string getReadMsg() const { return (read_msg_); } /// @brief Returns the index of the column having specified name. /// /// This function is exception safe. /// /// @param col_name Name of the column. /// @return Index of the column. /// @throw OutOfRange if column with such name doesn't exist. size_t getColumnIndex(const std::string& col_name) const; /// @brief Returns the name of the column. /// /// @param col_index Index of the column. /// /// @return Name of the column. /// @throw CSVFileError if the specified index is out of range. std::string getColumnName(const size_t col_index) const; /// @brief Reads next row from CSV file. /// /// This function will return the @c CSVRow object representing a /// parsed row if parsing is successful. If the end of file has been /// reached, the empty row is returned (a row containing no values). /// /// @param [out] row Object receiving the parsed CSV file. /// @param skip_validation Do not perform validation. /// /// @return true if row has been read and validated; false if validation /// failed. bool next(CSVRow& row, const bool skip_validation = false); /// @brief Opens existing file or creates a new one. /// /// This function will try to open existing file if this file has size /// greater than 0. If the file doesn't exist or has size of 0, the /// file is recreated. If the existing file has been opened, the header /// is parsed and column names are initialized in the @c CSVFile object. /// By default, the data pointer in the file is set to the beginning of /// the first row. In order to retrieve the row contents the @c next /// function should be called. If a @c seek_to_end parameter is set to /// true, the file will be opened and the internal pointer will be set /// to the end of file. /// /// @param seek_to_end A boolean value which indicates if the input and /// output file pointer should be set at the end of file. /// /// @throw CSVFileError when IO operation fails. virtual void open(const bool seek_to_end = false); /// @brief Creates a new CSV file. /// /// The file creation will fail if there are no columns specified. /// Otherwise, this function will write the header to the file. /// In order to write rows to opened file, the @c append function /// should be called. virtual void recreate(); /// @brief Sets error message after row validation. /// /// The @c CSVFile::validate function is responsible for setting the /// error message after validation of the row read from the CSV file. /// It will use this function to set this message. Note, that the /// @c validate function can set a message after successful validation /// too. Such message could say "success", or something similar. /// /// @param read_msg Error message to be set. void setReadMsg(const std::string& read_msg) { read_msg_ = read_msg; } /// @brief Represents empty row. static CSVRow EMPTY_ROW() { static CSVRow row(0); return (row); } protected: /// @brief Adds a column regardless if the file is open or not. /// /// This function adds as new column to the collection. It is meant to be /// called internally by the methods of the base class and derived classes. /// It must not be used in the public scope. The @c CSVFile::addColumn /// must be used in the public scope instead, because it prevents addition /// of the new column when the file is open. /// /// @param col_name Name of the column. /// /// @throw CSVFileError if a column with the specified name exists. void addColumnInternal(const std::string& col_name); /// @brief Validate the row read from a file. /// /// This function implements a basic validation for the row read from the /// CSV file. It is virtual so as it may be customized in derived classes. /// /// This default implementation checks that the number of values in the /// row corresponds to the number of columns specified for this file. /// /// If row validation fails, the error message is noted and can be retrieved /// using @c CSVFile::getReadMsg. The function which overrides this /// base implementation is responsible for setting the error message using /// @c CSVFile::setReadMsg. /// /// @param row A row to be validated. /// /// @return true if the column is valid; false otherwise. virtual bool validate(const CSVRow& row); protected: /// @brief This function validates the header of the CSV file. /// /// If there are any columns added to the @c CSVFile object, it will /// compare that they exactly match (including order) the header read /// from the file. /// /// This function is called internally by @ref CSVFile::open. Derived classes /// may add extra validation steps. /// /// @param header A row holding a header. /// @return true if header matches the columns; false otherwise. virtual bool validateHeader(const CSVRow& header); private: /// @brief Sanity check if stream is open. /// /// Checks if the file stream is open so as IO operations can be performed /// on it. This is internally called by the public class members to prevent /// them from performing IO operations on invalid stream and using NULL /// pointer to a stream. The @c clear() method is called on the stream /// after the status has been checked. /// /// @throw CSVFileError if stream is closed or pointer to it is NULL. void checkStreamStatusAndReset(const std::string& operation) const; /// @brief Returns size of the CSV file. std::streampos size() const; /// @brief CSV file name. std::string filename_; /// @brief Holds a pointer to the file stream. boost::shared_ptr fs_; /// @brief Holds CSV file columns. std::vector cols_; /// @brief Holds last error during row reading or validation. std::string read_msg_; }; } // namespace isc::util } // namespace isc #endif // CSV_FILE_H