diff options
author | Marcin Siodelski <marcin@isc.org> | 2014-03-14 15:37:57 +0100 |
---|---|---|
committer | Marcin Siodelski <marcin@isc.org> | 2014-03-14 15:37:57 +0100 |
commit | 6199f3a5ea37a65a2405e571a5b189dc127af138 (patch) | |
tree | 4f4f89383f9f5de8b3eff9afe97264663f9363ba /src/lib/util | |
parent | [3360] Implemented base class for read/write in CSV file. (diff) | |
download | kea-6199f3a5ea37a65a2405e571a5b189dc127af138.tar.xz kea-6199f3a5ea37a65a2405e571a5b189dc127af138.zip |
[3360] Added CSV file validation.
Diffstat (limited to 'src/lib/util')
-rw-r--r-- | src/lib/util/csv_file.cc | 144 | ||||
-rw-r--r-- | src/lib/util/csv_file.h | 77 | ||||
-rw-r--r-- | src/lib/util/tests/csv_file_unittest.cc | 147 |
3 files changed, 334 insertions, 34 deletions
diff --git a/src/lib/util/csv_file.cc b/src/lib/util/csv_file.cc index 10ce9e5cde..6fc44ca4b6 100644 --- a/src/lib/util/csv_file.cc +++ b/src/lib/util/csv_file.cc @@ -124,7 +124,8 @@ CSVRow::checkIndex(const size_t at) const { } CSVFile::CSVFile(const std::string& filename) - : primary_separator_(','), filename_(filename), fs_() { + : primary_separator_(','), filename_(filename), fs_(), cols_(0), + read_msg_() { } CSVFile::~CSVFile() { @@ -142,6 +143,12 @@ CSVFile::close() { } void +CSVFile::flush() const { + checkStreamStatus("flush"); + fs_->flush(); +} + +void CSVFile::addColumn(const std::string& col_name) { if (getColumnIndex(col_name) >= 0) { isc_throw(CSVFileError, "attempt to add duplicate column '" @@ -152,10 +159,10 @@ CSVFile::addColumn(const std::string& col_name) { void CSVFile::append(const CSVRow& row) const { - if (!fs_) { - isc_throw(CSVFileError, "unable to write a row to the CSV file '" - << filename_ << "', which is closed"); - } + checkStreamStatus("append"); + + // If a stream is in invalid state, reset the state. + fs_->clear(); if (row.getValuesCount() != getColumnCount()) { isc_throw(CSVFileError, "number of values in the CSV row '" @@ -176,6 +183,19 @@ CSVFile::append(const CSVRow& row) const { } } +void +CSVFile::checkStreamStatus(const std::string& operation) const { + if (!fs_) { + isc_throw(CSVFileError, "NULL stream pointer when performing '" + << operation << "' on file '" << filename_ << "'"); + + } else if (!fs_->is_open()) { + isc_throw(CSVFileError, "closed stream when performing '" + << operation << "' on file '" << filename_ << "'"); + + } +} + std::ifstream::pos_type CSVFile::size() const { std::ifstream fs(filename_.c_str()); @@ -186,11 +206,16 @@ CSVFile::size() const { fs.close(); return (0); } - // Seek to the end of file and see where we are. This is a size of - // the file. - fs.seekg(0, std::ifstream::end); - std::ifstream::pos_type pos = fs.tellg(); - fs.close(); + std::ifstream::pos_type pos; + try { + // Seek to the end of file and see where we are. This is a size of + // the file. + fs.seekg(0, std::ifstream::end); + pos = fs.tellg(); + fs.close(); + } catch (const std::exception& ex) { + return (0); + } return (pos); } @@ -214,8 +239,25 @@ CSVFile::getColumnName(const size_t col_index) const { return (cols_[col_index]); } -void -CSVFile::next(CSVRow& row) { +bool +CSVFile::next(CSVRow& row, const bool skip_validation) { + // Set somethings as row validation error. Although, we haven't started + // actual row validation we should get rid of any previously recorded + // errors so as the caller doesn't interpret them as the current one. + setReadMsg("validation not started"); + + try { + // Check that stream is "ready" for any IO operations. + checkStreamStatus("get next row"); + + } catch (isc::Exception& ex) { + setReadMsg(ex.what()); + return (false); + } + + // If a stream is in invalid state, reset the state. + fs_->clear(); + // Get exactly one line of the file. std::string line; std::getline(*fs_, line); @@ -223,10 +265,20 @@ CSVFile::next(CSVRow& row) { // return an empty row. if (line.empty() && fs_->eof()) { row = EMPTY_ROW(); - return; + return (true); + + } else if (!fs_->good()) { + // If we hit an IO error, communicate it to the caller but do NOT close + // the stream. Caller may try again. + setReadMsg("error reading a row from CSV file '" + + std::string(filename_) + "'"); + return (false); } // If we read anything, parse it. row.parse(line.c_str()); + + // And check if it is correct. + return (skip_validation ? true : validate(row)); } void @@ -248,14 +300,34 @@ CSVFile::open() { // Make sure we are on the beginning of the file, so as we can parse // the header. fs_->seekg(0); + if (!fs_->good()) { + close(); + isc_throw(CSVFileError, "unable to set read pointer in the file '" + << filename_ << "'"); + } - // Get the header. - std::string line; - std::getline(*fs_, line); - CSVRow header(line.c_str(), primary_separator_); - // Initialize columns. - for (size_t i = 0; i < header.getValuesCount(); ++i) { - addColumn(header.readAt(i)); + // Read the header. + CSVRow header; + if (!next(header, true)) { + close(); + isc_throw(CSVFileError, "failed to read and parse header of the" + " CSV file '" << filename_ << "': " + << getReadMsg()); + } + + // Check the header against the columns specified for the CSV file. + if (!validateHeader(header)) { + close(); + isc_throw(CSVFileError, "invalid header '" << header + << "' in CSV file '" << filename_ << "'"); + } + + // Everything is good, so if we haven't added any columns yet, + // add them. + if (getColumnCount() == 0) { + for (size_t i = 0; i < header.getValuesCount(); ++i) { + addColumn(header.readAt(i)); + } } } } @@ -291,5 +363,37 @@ CSVFile::recreate() { } +bool +CSVFile::validate(const CSVRow& row) { + setReadMsg("success"); + bool ok = (row.getValuesCount() == getColumnCount()); + if (!ok) { + std::ostringstream s; + s << "the size of the row '" << row << "' doesn't match the number of" + " columns '" << getColumnCount() << "' of the CSV file '" + << filename_ << "'"; + setReadMsg(s.str()); + } + return (ok); } + +bool +CSVFile::validateHeader(const CSVRow& header) { + if (getColumnCount() == 0) { + return (true); + } + + if (getColumnCount() != header.getValuesCount()) { + return (false); + } + + for (int i = 0; i < getColumnCount(); ++i) { + if (getColumnName(i) != header.readAt(i)) { + return (false); + } + } + return (true); } + +} // end of isc::util namespace +} // end of isc namespace diff --git a/src/lib/util/csv_file.h b/src/lib/util/csv_file.h index 52267f89b1..bf2bdb23b8 100644 --- a/src/lib/util/csv_file.h +++ b/src/lib/util/csv_file.h @@ -73,7 +73,7 @@ public: /// @param cols Number of values in the row. /// @param separator Character being a separator between values in the /// text representation of the row. - CSVRow(const size_t cols, const char separator = ','); + CSVRow(const size_t cols = 0, const char separator = ','); /// @brief Constructor, parses a single row of the CSV file. /// @@ -270,11 +270,22 @@ public: /// @brief Closes the CSV file. void close(); + /// @brief Flushes a file. + void flush() const; + /// @brief Returns the number of columns in the file. size_t getColumnCount() const { return (cols_.size()); } + /// @brief Returns the description of the last error returned by the + /// @c CSVFile::next function. + /// + /// @return Description of the last error during row validation. + std::string getReadMsg() const { + return (read_msg_); + } + /// @brief Returns the index of the column having specified name. /// /// This function is exception safe. @@ -299,7 +310,11 @@ public: /// reached, the empty row is returned (a row containing no values). /// /// @param [out] row Object receiving the parsed CSV file. - void next(CSVRow& row); + /// @param skip_validation Do not perform validation. + /// + /// @return true if row has been read and validated; false if validation + /// failed. + bool next(CSVRow& row, const bool skip_validation = false); /// @brief Opens existing file or creates a new one. /// @@ -322,14 +337,69 @@ public: /// should be called. void recreate(); + /// @brief Sets error message after row validation. + /// + /// The @c CSVFile::validate function is responsible for setting the + /// error message after validation of the row read from the CSV file. + /// It will use this function to set this message. Note, that the + /// @c validate function can set a message after successful validation + /// too. Such message could say "success", or something similar. + /// + /// @param val_msg Error message to be set. + void setReadMsg(const std::string& read_msg) { + read_msg_ = read_msg; + } + /// @brief Represents empty row. static CSVRow EMPTY_ROW() { static CSVRow row(0); return (row); } +protected: + + /// @brief Validate the row read from a file. + /// + /// This function implements a basic validation for the row read from the + /// CSV file. It is virtual so as it may be customized in derived classes. + /// + /// This default implementation checks that the number of values in the + /// row corresponds to the number of columns specified for this file. + /// + /// If row validation fails, the error message is noted and can be retrieved + /// using @c CSVFile::getReadMsg. The function which overrides this + /// base implementation is responsible for setting the error message using + /// @c CSVFile::setReadMsg. + /// + /// @param row A row to be validated. + /// + /// @return true if the column is valid; false otherwise. + virtual bool validate(const CSVRow& row); + private: + /// @brief This function validates the header of the CSV file. + /// + /// If there are any columns added to the @c CSVFile object, it will + /// compare that they exactly match (including order) the header read + /// from the file. + /// + /// This function is called internally by @CSVFile::open. + /// + /// @param header A row holding a header. + /// @return true if header matches the columns; false otherwise. + bool validateHeader(const CSVRow& header); + + /// @brief Sanity check if stream is open. + /// + /// Checks if the file stream is open so as IO operations can be performed + /// on it. This is internally called by the public class members to prevent + /// them from performing IO operations on invalid stream and using NULL + /// pointer to a stream. + /// + /// @throw CSVFileError if stream is closed or pointer to it is NULL. + void checkStreamStatus(const std::string& operation) const; + /// @brief Returns size of the CSV file. std::ifstream::pos_type size() const; @@ -344,6 +414,9 @@ private: /// @brief Holds CSV file columns. std::vector<std::string> cols_; + + /// @brief Holds last error during row reading or validation. + std::string read_msg_; }; } // namespace isc::util diff --git a/src/lib/util/tests/csv_file_unittest.cc b/src/lib/util/tests/csv_file_unittest.cc index b22ecaa5a5..9e0dca29c2 100644 --- a/src/lib/util/tests/csv_file_unittest.cc +++ b/src/lib/util/tests/csv_file_unittest.cc @@ -136,6 +136,7 @@ public: /// @param contents Contents of the file. void writeFile(const std::string& contents) const; + /// @brief Absolute path to the file used in the tests. std::string testfile_; }; @@ -190,13 +191,16 @@ CSVFileTest::writeFile(const std::string& contents) const { } } -// This test checks that the file can be opened and its content -// parsed correctly. It also checks that empty row is returned -// when EOF is reached. -TEST_F(CSVFileTest, open) { +// This test checks that the file can be opened, its whole content is +// parsed correctly and data may be appended. It also checks that empty +// row is returned when EOF is reached. +TEST_F(CSVFileTest, openReadAllWrite) { + // Create a new CSV file that contains a header and two data rows. writeFile("animal,age,color\n" "cat,10,white\n" "lion,15,yellow\n"); + + // Open this file and check that the header is parsed. boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_)); ASSERT_NO_THROW(csv->open()); ASSERT_EQ(3, csv->getColumnCount()); @@ -204,33 +208,65 @@ TEST_F(CSVFileTest, open) { EXPECT_EQ("age", csv->getColumnName(1)); EXPECT_EQ("color", csv->getColumnName(2)); - CSVRow row(0); - ASSERT_NO_THROW(csv->next(row)); + // Read first row. + CSVRow row; + ASSERT_TRUE(csv->next(row)); ASSERT_EQ(3, row.getValuesCount()); EXPECT_EQ("cat", row.readAt(0)); EXPECT_EQ("10", row.readAt(1)); EXPECT_EQ("white", row.readAt(2)); - ASSERT_NO_THROW(csv->next(row)); + // Read second row. + ASSERT_TRUE(csv->next(row)); ASSERT_EQ(3, row.getValuesCount()); EXPECT_EQ("lion", row.readAt(0)); EXPECT_EQ("15", row.readAt(1)); EXPECT_EQ("yellow", row.readAt(2)); - ASSERT_NO_THROW(csv->next(row)); + // There is no 3rd row, so the empty one should be returned. + ASSERT_TRUE(csv->next(row)); + EXPECT_EQ(CSVFile::EMPTY_ROW(), row); + + // It should be fine to read again, but again empty row should be returned. + ASSERT_TRUE(csv->next(row)); EXPECT_EQ(CSVFile::EMPTY_ROW(), row); + + // Now, let's try to append something to this file. + CSVRow row_write(3); + row_write.writeAt(0, "dog"); + row_write.writeAt(1, 2); + row_write.writeAt(2, "blue"); + ASSERT_NO_THROW(csv->append(row_write)); + + // Close the file. + ASSERT_NO_THROW(csv->flush()); + csv->close(); + + // Check the the file contents are correct. + EXPECT_EQ("animal,age,color\n" + "cat,10,white\n" + "lion,15,yellow\n" + "dog,2,blue\n", + readFile()); + + // Any attempt to read from the file or write to it should now fail. + EXPECT_FALSE(csv->next(row)); + EXPECT_THROW(csv->append(row_write), CSVFileError); } -// This test checks that a file can be used both for reading -// and writing. When content is appended to the end of file, -// an attempt to read results in empty row returned. -TEST_F(CSVFileTest, openReadWrite) { +// This test checks that contents may be appended to a file which hasn't +// been fully parsed/read. +TEST_F(CSVFileTest, openReadPartialWrite) { + // Create a CSV file with two rows in it. writeFile("animal,age,color\n" "cat,10,white\n" "lion,15,yellow\n"); + + // Open this file. boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_)); ASSERT_NO_THROW(csv->open()); + // Read the first row. CSVRow row0(0); ASSERT_NO_THROW(csv->next(row0)); ASSERT_EQ(3, row0.getValuesCount()); @@ -238,15 +274,31 @@ TEST_F(CSVFileTest, openReadWrite) { EXPECT_EQ("10", row0.readAt(1)); EXPECT_EQ("white", row0.readAt(2)); + // There is still second row to be read. But, it should be possible to + // skip reading it and append new row to the end of file. CSVRow row_write(3); row_write.writeAt(0, "dog"); row_write.writeAt(1, 2); row_write.writeAt(2, "blue"); ASSERT_NO_THROW(csv->append(row_write)); + // At this point, the file pointer is at the end of file, so reading + // should return empty row. CSVRow row1(0); ASSERT_NO_THROW(csv->next(row1)); EXPECT_EQ(CSVFile::EMPTY_ROW(), row1); + + // Close the file. + ASSERT_NO_THROW(csv->flush()); + csv->close(); + + // Check that there are two initial lines and one new there. + EXPECT_EQ("animal,age,color\n" + "cat,10,white\n" + "lion,15,yellow\n" + "dog,2,blue\n", + readFile()); + } // This test checks that the new CSV file is created and header @@ -274,11 +326,82 @@ TEST_F(CSVFileTest, recreate) { row1.writeAt(2, 2); ASSERT_NO_THROW(csv->append(row1)); + ASSERT_NO_THROW(csv->flush()); + csv->close(); + EXPECT_EQ("animal,color,age,comments\n" "dog,grey,3,nice one\n" "cat,black,2,\n", readFile()); } +// This test checks that the error is reported when the size of the row being +// read doesn't match the number of columns of the CSV file. +TEST_F(CSVFileTest, validate) { + // Create CSV file with 2 invalid rows in it: one too long, one too short. + // Apart from that, there are two valid columns that should be read + // successfuly. + writeFile("animal,age,color\n" + "cat,10,white\n" + "lion,15,yellow,black\n" + "dog,3,green\n" + "elephant,11\n"); + + boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_)); + ASSERT_NO_THROW(csv->open()); + // First row is correct. + CSVRow row0; + ASSERT_TRUE(csv->next(row0)); + EXPECT_EQ("cat", row0.readAt(0)); + EXPECT_EQ("10", row0.readAt(1)); + EXPECT_EQ("white", row0.readAt(2)); + EXPECT_EQ("success", csv->getReadMsg()); + // This row is too long. + CSVRow row1; + EXPECT_FALSE(csv->next(row1)); + EXPECT_NE("success", csv->getReadMsg()); + // This row is correct. + CSVRow row2; + ASSERT_TRUE(csv->next(row2)); + EXPECT_EQ("dog", row2.readAt(0)); + EXPECT_EQ("3", row2.readAt(1)); + EXPECT_EQ("green", row2.readAt(2)); + EXPECT_EQ("success", csv->getReadMsg()); + // This row is too short. + CSVRow row3; + EXPECT_FALSE(csv->next(row3)); + EXPECT_NE("success", csv->getReadMsg()); +} + +// Test test checks that exception is thrown when the header of the CSV file +// parsed, doesn't match the columns specified. +TEST_F(CSVFileTest, validateHeader) { + // Create CSV file with 3 columns. + writeFile("animal,age,color\n" + "cat,10,white\n" + "lion,15,yellow,black\n"); + + // Invalid order of columns. + boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_)); + csv->addColumn("color"); + csv->addColumn("animal"); + csv->addColumn("age"); + EXPECT_THROW(csv->open(), CSVFileError); + + // Too many columns. + csv.reset(new CSVFile(testfile_)); + csv->addColumn("animal"); + csv->addColumn("age"); + csv->addColumn("color"); + csv->addColumn("notes"); + EXPECT_THROW(csv->open(), CSVFileError); + + // Too few columns. + csv.reset(new CSVFile(testfile_)); + csv->addColumn("animal"); + csv->addColumn("age"); + EXPECT_THROW(csv->open(), CSVFileError); +} + } // end of anonymous namespace |