src/lib/util/versioned_csv_file.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

// Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#include <util/versioned_csv_file.h>

namespace isc {
namespace util {

VersionedCSVFile::VersionedCSVFile(const std::string& filename)
    : CSVFile(filename), columns_(0), valid_column_count_(0),
      minimum_valid_columns_(0), input_header_count_(0),
      input_schema_state_(CURRENT) {
}

VersionedCSVFile::~VersionedCSVFile() {
}

void
VersionedCSVFile::addColumn(const std::string& name,
                            const std::string& version,
                            const std::string& default_value) {
    CSVFile::addColumn(name);
    columns_.push_back(VersionedColumnPtr(new VersionedColumn(name, version,
                                                              default_value)));
}

void
VersionedCSVFile::setMinimumValidColumns(const std::string& column_name) {
    try {
        int index = getColumnIndex(column_name);
        minimum_valid_columns_ = index + 1;

    } catch (...) {
        isc_throw(VersionedCSVFileError,
                  "setMinimumValidColumns: " << column_name << " is not "
                  "defined");
    }
}

size_t
VersionedCSVFile::getMinimumValidColumns() const {
    return (minimum_valid_columns_);
}

size_t
VersionedCSVFile::getValidColumnCount() const {
    return (valid_column_count_);
}

size_t
VersionedCSVFile::getInputHeaderCount() const {
    return (input_header_count_);
}

void
VersionedCSVFile::open(const bool seek_to_end) {
    if (getColumnCount() == 0) {
        isc_throw(VersionedCSVFileError,
                  "no schema has been defined, cannot open CSV file :"
                  << getFilename());
    }

    CSVFile::open(seek_to_end);
}

void
VersionedCSVFile::recreate() {
    if (getColumnCount() == 0) {
        isc_throw(VersionedCSVFileError,
                  "no schema has been defined, cannot create CSV file :"
                  << getFilename());
    }

    CSVFile::recreate();
    // For new files they always match.
    input_header_count_ = valid_column_count_ = getColumnCount();
}

VersionedCSVFile::InputSchemaState
VersionedCSVFile::getInputSchemaState() const {
    return (input_schema_state_);
}

bool
VersionedCSVFile::needsConversion() const {
    return (input_schema_state_ != CURRENT);
}

std::string
VersionedCSVFile::getInputSchemaVersion() const {
    if (getValidColumnCount() > 0) {
        return (getVersionedColumn(getValidColumnCount() - 1)->version_);
    }

    return ("undefined");
}

std::string
VersionedCSVFile::getSchemaVersion() const {
    if (getColumnCount() > 0) {
        return (getVersionedColumn(getColumnCount() - 1)->version_);
    }

    return ("undefined");
}

const VersionedColumnPtr&
VersionedCSVFile::getVersionedColumn(const size_t index) const {
    if (index >= getColumnCount()) {
        isc_throw(isc::OutOfRange, "versioned column index " << index
                  << " out of range;  CSV file : " << getFilename()
                  << " only has " << getColumnCount() << " columns ");
    }

    return (columns_[index]);
}

bool
VersionedCSVFile::next(CSVRow& row) {
    setReadMsg("success");
    // Use base class to physical read the row, but skip its row
    // validation
    CSVFile::next(row, true);
    if (row == CSVFile::EMPTY_ROW()) {
        return(true);
    }

    bool row_valid = true;
    switch(getInputSchemaState()) {
        case CURRENT:
            // All rows must match than the current schema
            if (row.getValuesCount() != getColumnCount()) {
                columnCountError(row, "must match current schema");
                row_valid = false;
            }
            break;

        case NEEDS_UPGRADE:
            // The input header met the minimum column count but
            // is less than the current schema so:
            // Rows must not be shorter than the valid column count
            // and not longer than the current schema
            if (row.getValuesCount() < getValidColumnCount()) {
                columnCountError(row, "too few columns to upgrade");
                row_valid = false;
            } else if (row.getValuesCount() > getColumnCount()) {
                columnCountError(row, "too many columns to upgrade");
                row_valid = false;
            } else {
                // Add any missing values
                for (size_t index = row.getValuesCount();
                     index < getColumnCount(); ++index) {
                    row.append(columns_[index]->default_value_);
                }
            }
            break;

        case NEEDS_DOWNGRADE:
            // The input header exceeded current schema so:
            // Rows may be as long as input header but not shorter than
            // the the current schema
            if (row.getValuesCount() < getColumnCount()) {
                columnCountError(row, "too few columns to downgrade");
            } else if (row.getValuesCount() > getInputHeaderCount()) {
                columnCountError(row, "too many columns to downgrade");
            } else {
                // Toss any the extra columns
                row.trim(row.getValuesCount() - getColumnCount());
            }
            break;
    }

    return (row_valid);
}

void
VersionedCSVFile::columnCountError(const CSVRow& row,
                                  const std::string& reason) {
    std::ostringstream s;
    s <<  "Invalid number of columns: "
      << row.getValuesCount()  << " in row: '" << row
      << "', file: '" << getFilename() << "' : " << reason;
      setReadMsg(s.str());
}

bool
VersionedCSVFile::validateHeader(const CSVRow& header) {
    if (getColumnCount() == 0) {
        isc_throw(VersionedCSVFileError,
                  "cannot validate header, no schema has been defined");
    }

    input_header_count_ = header.getValuesCount();

    // Iterate over the number of columns in the header, testing
    // each against the defined column in the same position.
    // If there is a mismatch, bail.
    size_t i = 0;
    for (  ; i < getInputHeaderCount() && i < getColumnCount(); ++i) {
        if (getColumnName(i) != header.readAt(i)) {
            std::ostringstream s;
            s << " - header contains an invalid column: '"
              << header.readAt(i) << "'";
            setReadMsg(s.str());
            return (false);
        }
    }

    // If we found too few valid columns, then we cannot convert this
    // file.  It's too old, too corrupt, or not a Kea file.
    if (i < getMinimumValidColumns()) {
        std::ostringstream s;
        s << " - header has only " << i << " valid column(s), "
          << "it must have at least " << getMinimumValidColumns();
        setReadMsg(s.str());
        return (false);
    }

    // Remember the number of valid columns we found.  When this number
    // is less than the number of defined columns, then we have an older
    // version of the lease file.  We'll need this value to validate
    // and upgrade data rows.
    valid_column_count_ = i;

    if (getValidColumnCount() < getColumnCount()) {
        input_schema_state_ = NEEDS_UPGRADE;
    } else if (getInputHeaderCount() > getColumnCount()) {
        // If there are more values in the header than defined columns
        // then, we'll drop the extra.  This allows someone to attempt to
        // downgrade if need be.
        input_schema_state_ = NEEDS_DOWNGRADE;
        std::ostringstream s;
        s << " - header has " << getInputHeaderCount() - getColumnCount()
          << " extra column(s), these will be ignored";
        setReadMsg(s.str());
    }

    return (true);
}

} // end of isc::util namespace
} // end of isc namespace