summaryrefslogtreecommitdiffstats
path: root/src/lib/util
diff options
context:
space:
mode:
authorMarcin Siodelski <marcin@isc.org>2014-03-14 15:37:57 +0100
committerMarcin Siodelski <marcin@isc.org>2014-03-14 15:37:57 +0100
commit6199f3a5ea37a65a2405e571a5b189dc127af138 (patch)
tree4f4f89383f9f5de8b3eff9afe97264663f9363ba /src/lib/util
parent[3360] Implemented base class for read/write in CSV file. (diff)
downloadkea-6199f3a5ea37a65a2405e571a5b189dc127af138.tar.xz
kea-6199f3a5ea37a65a2405e571a5b189dc127af138.zip
[3360] Added CSV file validation.
Diffstat (limited to 'src/lib/util')
-rw-r--r--src/lib/util/csv_file.cc144
-rw-r--r--src/lib/util/csv_file.h77
-rw-r--r--src/lib/util/tests/csv_file_unittest.cc147
3 files changed, 334 insertions, 34 deletions
diff --git a/src/lib/util/csv_file.cc b/src/lib/util/csv_file.cc
index 10ce9e5cde..6fc44ca4b6 100644
--- a/src/lib/util/csv_file.cc
+++ b/src/lib/util/csv_file.cc
@@ -124,7 +124,8 @@ CSVRow::checkIndex(const size_t at) const {
}
CSVFile::CSVFile(const std::string& filename)
- : primary_separator_(','), filename_(filename), fs_() {
+ : primary_separator_(','), filename_(filename), fs_(), cols_(0),
+ read_msg_() {
}
CSVFile::~CSVFile() {
@@ -142,6 +143,12 @@ CSVFile::close() {
}
void
+CSVFile::flush() const {
+ checkStreamStatus("flush");
+ fs_->flush();
+}
+
+void
CSVFile::addColumn(const std::string& col_name) {
if (getColumnIndex(col_name) >= 0) {
isc_throw(CSVFileError, "attempt to add duplicate column '"
@@ -152,10 +159,10 @@ CSVFile::addColumn(const std::string& col_name) {
void
CSVFile::append(const CSVRow& row) const {
- if (!fs_) {
- isc_throw(CSVFileError, "unable to write a row to the CSV file '"
- << filename_ << "', which is closed");
- }
+ checkStreamStatus("append");
+
+ // If a stream is in invalid state, reset the state.
+ fs_->clear();
if (row.getValuesCount() != getColumnCount()) {
isc_throw(CSVFileError, "number of values in the CSV row '"
@@ -176,6 +183,19 @@ CSVFile::append(const CSVRow& row) const {
}
}
+void
+CSVFile::checkStreamStatus(const std::string& operation) const {
+ if (!fs_) {
+ isc_throw(CSVFileError, "NULL stream pointer when performing '"
+ << operation << "' on file '" << filename_ << "'");
+
+ } else if (!fs_->is_open()) {
+ isc_throw(CSVFileError, "closed stream when performing '"
+ << operation << "' on file '" << filename_ << "'");
+
+ }
+}
+
std::ifstream::pos_type
CSVFile::size() const {
std::ifstream fs(filename_.c_str());
@@ -186,11 +206,16 @@ CSVFile::size() const {
fs.close();
return (0);
}
- // Seek to the end of file and see where we are. This is a size of
- // the file.
- fs.seekg(0, std::ifstream::end);
- std::ifstream::pos_type pos = fs.tellg();
- fs.close();
+ std::ifstream::pos_type pos;
+ try {
+ // Seek to the end of file and see where we are. This is a size of
+ // the file.
+ fs.seekg(0, std::ifstream::end);
+ pos = fs.tellg();
+ fs.close();
+ } catch (const std::exception& ex) {
+ return (0);
+ }
return (pos);
}
@@ -214,8 +239,25 @@ CSVFile::getColumnName(const size_t col_index) const {
return (cols_[col_index]);
}
-void
-CSVFile::next(CSVRow& row) {
+bool
+CSVFile::next(CSVRow& row, const bool skip_validation) {
+ // Set somethings as row validation error. Although, we haven't started
+ // actual row validation we should get rid of any previously recorded
+ // errors so as the caller doesn't interpret them as the current one.
+ setReadMsg("validation not started");
+
+ try {
+ // Check that stream is "ready" for any IO operations.
+ checkStreamStatus("get next row");
+
+ } catch (isc::Exception& ex) {
+ setReadMsg(ex.what());
+ return (false);
+ }
+
+ // If a stream is in invalid state, reset the state.
+ fs_->clear();
+
// Get exactly one line of the file.
std::string line;
std::getline(*fs_, line);
@@ -223,10 +265,20 @@ CSVFile::next(CSVRow& row) {
// return an empty row.
if (line.empty() && fs_->eof()) {
row = EMPTY_ROW();
- return;
+ return (true);
+
+ } else if (!fs_->good()) {
+ // If we hit an IO error, communicate it to the caller but do NOT close
+ // the stream. Caller may try again.
+ setReadMsg("error reading a row from CSV file '"
+ + std::string(filename_) + "'");
+ return (false);
}
// If we read anything, parse it.
row.parse(line.c_str());
+
+ // And check if it is correct.
+ return (skip_validation ? true : validate(row));
}
void
@@ -248,14 +300,34 @@ CSVFile::open() {
// Make sure we are on the beginning of the file, so as we can parse
// the header.
fs_->seekg(0);
+ if (!fs_->good()) {
+ close();
+ isc_throw(CSVFileError, "unable to set read pointer in the file '"
+ << filename_ << "'");
+ }
- // Get the header.
- std::string line;
- std::getline(*fs_, line);
- CSVRow header(line.c_str(), primary_separator_);
- // Initialize columns.
- for (size_t i = 0; i < header.getValuesCount(); ++i) {
- addColumn(header.readAt(i));
+ // Read the header.
+ CSVRow header;
+ if (!next(header, true)) {
+ close();
+ isc_throw(CSVFileError, "failed to read and parse header of the"
+ " CSV file '" << filename_ << "': "
+ << getReadMsg());
+ }
+
+ // Check the header against the columns specified for the CSV file.
+ if (!validateHeader(header)) {
+ close();
+ isc_throw(CSVFileError, "invalid header '" << header
+ << "' in CSV file '" << filename_ << "'");
+ }
+
+ // Everything is good, so if we haven't added any columns yet,
+ // add them.
+ if (getColumnCount() == 0) {
+ for (size_t i = 0; i < header.getValuesCount(); ++i) {
+ addColumn(header.readAt(i));
+ }
}
}
}
@@ -291,5 +363,37 @@ CSVFile::recreate() {
}
+bool
+CSVFile::validate(const CSVRow& row) {
+ setReadMsg("success");
+ bool ok = (row.getValuesCount() == getColumnCount());
+ if (!ok) {
+ std::ostringstream s;
+ s << "the size of the row '" << row << "' doesn't match the number of"
+ " columns '" << getColumnCount() << "' of the CSV file '"
+ << filename_ << "'";
+ setReadMsg(s.str());
+ }
+ return (ok);
}
+
+bool
+CSVFile::validateHeader(const CSVRow& header) {
+ if (getColumnCount() == 0) {
+ return (true);
+ }
+
+ if (getColumnCount() != header.getValuesCount()) {
+ return (false);
+ }
+
+ for (int i = 0; i < getColumnCount(); ++i) {
+ if (getColumnName(i) != header.readAt(i)) {
+ return (false);
+ }
+ }
+ return (true);
}
+
+} // end of isc::util namespace
+} // end of isc namespace
diff --git a/src/lib/util/csv_file.h b/src/lib/util/csv_file.h
index 52267f89b1..bf2bdb23b8 100644
--- a/src/lib/util/csv_file.h
+++ b/src/lib/util/csv_file.h
@@ -73,7 +73,7 @@ public:
/// @param cols Number of values in the row.
/// @param separator Character being a separator between values in the
/// text representation of the row.
- CSVRow(const size_t cols, const char separator = ',');
+ CSVRow(const size_t cols = 0, const char separator = ',');
/// @brief Constructor, parses a single row of the CSV file.
///
@@ -270,11 +270,22 @@ public:
/// @brief Closes the CSV file.
void close();
+ /// @brief Flushes a file.
+ void flush() const;
+
/// @brief Returns the number of columns in the file.
size_t getColumnCount() const {
return (cols_.size());
}
+ /// @brief Returns the description of the last error returned by the
+ /// @c CSVFile::next function.
+ ///
+ /// @return Description of the last error during row validation.
+ std::string getReadMsg() const {
+ return (read_msg_);
+ }
+
/// @brief Returns the index of the column having specified name.
///
/// This function is exception safe.
@@ -299,7 +310,11 @@ public:
/// reached, the empty row is returned (a row containing no values).
///
/// @param [out] row Object receiving the parsed CSV file.
- void next(CSVRow& row);
+ /// @param skip_validation Do not perform validation.
+ ///
+ /// @return true if row has been read and validated; false if validation
+ /// failed.
+ bool next(CSVRow& row, const bool skip_validation = false);
/// @brief Opens existing file or creates a new one.
///
@@ -322,14 +337,69 @@ public:
/// should be called.
void recreate();
+ /// @brief Sets error message after row validation.
+ ///
+ /// The @c CSVFile::validate function is responsible for setting the
+ /// error message after validation of the row read from the CSV file.
+ /// It will use this function to set this message. Note, that the
+ /// @c validate function can set a message after successful validation
+ /// too. Such message could say "success", or something similar.
+ ///
+ /// @param val_msg Error message to be set.
+ void setReadMsg(const std::string& read_msg) {
+ read_msg_ = read_msg;
+ }
+
/// @brief Represents empty row.
static CSVRow EMPTY_ROW() {
static CSVRow row(0);
return (row);
}
+protected:
+
+ /// @brief Validate the row read from a file.
+ ///
+ /// This function implements a basic validation for the row read from the
+ /// CSV file. It is virtual so as it may be customized in derived classes.
+ ///
+ /// This default implementation checks that the number of values in the
+ /// row corresponds to the number of columns specified for this file.
+ ///
+ /// If row validation fails, the error message is noted and can be retrieved
+ /// using @c CSVFile::getReadMsg. The function which overrides this
+ /// base implementation is responsible for setting the error message using
+ /// @c CSVFile::setReadMsg.
+ ///
+ /// @param row A row to be validated.
+ ///
+ /// @return true if the column is valid; false otherwise.
+ virtual bool validate(const CSVRow& row);
+
private:
+ /// @brief This function validates the header of the CSV file.
+ ///
+ /// If there are any columns added to the @c CSVFile object, it will
+ /// compare that they exactly match (including order) the header read
+ /// from the file.
+ ///
+ /// This function is called internally by @CSVFile::open.
+ ///
+ /// @param header A row holding a header.
+ /// @return true if header matches the columns; false otherwise.
+ bool validateHeader(const CSVRow& header);
+
+ /// @brief Sanity check if stream is open.
+ ///
+ /// Checks if the file stream is open so as IO operations can be performed
+ /// on it. This is internally called by the public class members to prevent
+ /// them from performing IO operations on invalid stream and using NULL
+ /// pointer to a stream.
+ ///
+ /// @throw CSVFileError if stream is closed or pointer to it is NULL.
+ void checkStreamStatus(const std::string& operation) const;
+
/// @brief Returns size of the CSV file.
std::ifstream::pos_type size() const;
@@ -344,6 +414,9 @@ private:
/// @brief Holds CSV file columns.
std::vector<std::string> cols_;
+
+ /// @brief Holds last error during row reading or validation.
+ std::string read_msg_;
};
} // namespace isc::util
diff --git a/src/lib/util/tests/csv_file_unittest.cc b/src/lib/util/tests/csv_file_unittest.cc
index b22ecaa5a5..9e0dca29c2 100644
--- a/src/lib/util/tests/csv_file_unittest.cc
+++ b/src/lib/util/tests/csv_file_unittest.cc
@@ -136,6 +136,7 @@ public:
/// @param contents Contents of the file.
void writeFile(const std::string& contents) const;
+ /// @brief Absolute path to the file used in the tests.
std::string testfile_;
};
@@ -190,13 +191,16 @@ CSVFileTest::writeFile(const std::string& contents) const {
}
}
-// This test checks that the file can be opened and its content
-// parsed correctly. It also checks that empty row is returned
-// when EOF is reached.
-TEST_F(CSVFileTest, open) {
+// This test checks that the file can be opened, its whole content is
+// parsed correctly and data may be appended. It also checks that empty
+// row is returned when EOF is reached.
+TEST_F(CSVFileTest, openReadAllWrite) {
+ // Create a new CSV file that contains a header and two data rows.
writeFile("animal,age,color\n"
"cat,10,white\n"
"lion,15,yellow\n");
+
+ // Open this file and check that the header is parsed.
boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
ASSERT_NO_THROW(csv->open());
ASSERT_EQ(3, csv->getColumnCount());
@@ -204,33 +208,65 @@ TEST_F(CSVFileTest, open) {
EXPECT_EQ("age", csv->getColumnName(1));
EXPECT_EQ("color", csv->getColumnName(2));
- CSVRow row(0);
- ASSERT_NO_THROW(csv->next(row));
+ // Read first row.
+ CSVRow row;
+ ASSERT_TRUE(csv->next(row));
ASSERT_EQ(3, row.getValuesCount());
EXPECT_EQ("cat", row.readAt(0));
EXPECT_EQ("10", row.readAt(1));
EXPECT_EQ("white", row.readAt(2));
- ASSERT_NO_THROW(csv->next(row));
+ // Read second row.
+ ASSERT_TRUE(csv->next(row));
ASSERT_EQ(3, row.getValuesCount());
EXPECT_EQ("lion", row.readAt(0));
EXPECT_EQ("15", row.readAt(1));
EXPECT_EQ("yellow", row.readAt(2));
- ASSERT_NO_THROW(csv->next(row));
+ // There is no 3rd row, so the empty one should be returned.
+ ASSERT_TRUE(csv->next(row));
+ EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+ // It should be fine to read again, but again empty row should be returned.
+ ASSERT_TRUE(csv->next(row));
EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+ // Now, let's try to append something to this file.
+ CSVRow row_write(3);
+ row_write.writeAt(0, "dog");
+ row_write.writeAt(1, 2);
+ row_write.writeAt(2, "blue");
+ ASSERT_NO_THROW(csv->append(row_write));
+
+ // Close the file.
+ ASSERT_NO_THROW(csv->flush());
+ csv->close();
+
+ // Check the the file contents are correct.
+ EXPECT_EQ("animal,age,color\n"
+ "cat,10,white\n"
+ "lion,15,yellow\n"
+ "dog,2,blue\n",
+ readFile());
+
+ // Any attempt to read from the file or write to it should now fail.
+ EXPECT_FALSE(csv->next(row));
+ EXPECT_THROW(csv->append(row_write), CSVFileError);
}
-// This test checks that a file can be used both for reading
-// and writing. When content is appended to the end of file,
-// an attempt to read results in empty row returned.
-TEST_F(CSVFileTest, openReadWrite) {
+// This test checks that contents may be appended to a file which hasn't
+// been fully parsed/read.
+TEST_F(CSVFileTest, openReadPartialWrite) {
+ // Create a CSV file with two rows in it.
writeFile("animal,age,color\n"
"cat,10,white\n"
"lion,15,yellow\n");
+
+ // Open this file.
boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
ASSERT_NO_THROW(csv->open());
+ // Read the first row.
CSVRow row0(0);
ASSERT_NO_THROW(csv->next(row0));
ASSERT_EQ(3, row0.getValuesCount());
@@ -238,15 +274,31 @@ TEST_F(CSVFileTest, openReadWrite) {
EXPECT_EQ("10", row0.readAt(1));
EXPECT_EQ("white", row0.readAt(2));
+ // There is still second row to be read. But, it should be possible to
+ // skip reading it and append new row to the end of file.
CSVRow row_write(3);
row_write.writeAt(0, "dog");
row_write.writeAt(1, 2);
row_write.writeAt(2, "blue");
ASSERT_NO_THROW(csv->append(row_write));
+ // At this point, the file pointer is at the end of file, so reading
+ // should return empty row.
CSVRow row1(0);
ASSERT_NO_THROW(csv->next(row1));
EXPECT_EQ(CSVFile::EMPTY_ROW(), row1);
+
+ // Close the file.
+ ASSERT_NO_THROW(csv->flush());
+ csv->close();
+
+ // Check that there are two initial lines and one new there.
+ EXPECT_EQ("animal,age,color\n"
+ "cat,10,white\n"
+ "lion,15,yellow\n"
+ "dog,2,blue\n",
+ readFile());
+
}
// This test checks that the new CSV file is created and header
@@ -274,11 +326,82 @@ TEST_F(CSVFileTest, recreate) {
row1.writeAt(2, 2);
ASSERT_NO_THROW(csv->append(row1));
+ ASSERT_NO_THROW(csv->flush());
+ csv->close();
+
EXPECT_EQ("animal,color,age,comments\n"
"dog,grey,3,nice one\n"
"cat,black,2,\n",
readFile());
}
+// This test checks that the error is reported when the size of the row being
+// read doesn't match the number of columns of the CSV file.
+TEST_F(CSVFileTest, validate) {
+ // Create CSV file with 2 invalid rows in it: one too long, one too short.
+ // Apart from that, there are two valid columns that should be read
+ // successfuly.
+ writeFile("animal,age,color\n"
+ "cat,10,white\n"
+ "lion,15,yellow,black\n"
+ "dog,3,green\n"
+ "elephant,11\n");
+
+ boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+ ASSERT_NO_THROW(csv->open());
+ // First row is correct.
+ CSVRow row0;
+ ASSERT_TRUE(csv->next(row0));
+ EXPECT_EQ("cat", row0.readAt(0));
+ EXPECT_EQ("10", row0.readAt(1));
+ EXPECT_EQ("white", row0.readAt(2));
+ EXPECT_EQ("success", csv->getReadMsg());
+ // This row is too long.
+ CSVRow row1;
+ EXPECT_FALSE(csv->next(row1));
+ EXPECT_NE("success", csv->getReadMsg());
+ // This row is correct.
+ CSVRow row2;
+ ASSERT_TRUE(csv->next(row2));
+ EXPECT_EQ("dog", row2.readAt(0));
+ EXPECT_EQ("3", row2.readAt(1));
+ EXPECT_EQ("green", row2.readAt(2));
+ EXPECT_EQ("success", csv->getReadMsg());
+ // This row is too short.
+ CSVRow row3;
+ EXPECT_FALSE(csv->next(row3));
+ EXPECT_NE("success", csv->getReadMsg());
+}
+
+// Test test checks that exception is thrown when the header of the CSV file
+// parsed, doesn't match the columns specified.
+TEST_F(CSVFileTest, validateHeader) {
+ // Create CSV file with 3 columns.
+ writeFile("animal,age,color\n"
+ "cat,10,white\n"
+ "lion,15,yellow,black\n");
+
+ // Invalid order of columns.
+ boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+ csv->addColumn("color");
+ csv->addColumn("animal");
+ csv->addColumn("age");
+ EXPECT_THROW(csv->open(), CSVFileError);
+
+ // Too many columns.
+ csv.reset(new CSVFile(testfile_));
+ csv->addColumn("animal");
+ csv->addColumn("age");
+ csv->addColumn("color");
+ csv->addColumn("notes");
+ EXPECT_THROW(csv->open(), CSVFileError);
+
+ // Too few columns.
+ csv.reset(new CSVFile(testfile_));
+ csv->addColumn("animal");
+ csv->addColumn("age");
+ EXPECT_THROW(csv->open(), CSVFileError);
+}
+
} // end of anonymous namespace