diff options
author | JINMEI Tatuya <jinmei@isc.org> | 2012-03-06 23:42:58 +0100 |
---|---|---|
committer | JINMEI Tatuya <jinmei@isc.org> | 2012-03-06 23:42:58 +0100 |
commit | 1296fe25ed708996f9aca9d3ae063dd80d444d9d (patch) | |
tree | 8ae17e21e17d8eb112bb4311c787d4e848b8dcba /src/lib | |
parent | [1603] updated Makefile for newly added name_internal.h (diff) | |
download | kea-1296fe25ed708996f9aca9d3ae063dd80d444d9d.tar.xz kea-1296fe25ed708996f9aca9d3ae063dd80d444d9d.zip |
[1603] revised the MessageRenderer w/ a hash table and LabelSequence.
a benchmark indicated it's several times faster than the previous
implementation.
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/dns/messagerenderer.cc | 316 |
1 files changed, 147 insertions, 169 deletions
diff --git a/src/lib/dns/messagerenderer.cc b/src/lib/dns/messagerenderer.cc index bf4795acaf..1019d96baf 100644 --- a/src/lib/dns/messagerenderer.cc +++ b/src/lib/dns/messagerenderer.cc @@ -15,153 +15,98 @@ #include <exceptions/exceptions.h> #include <util/buffer.h> #include <dns/name.h> +#include <dns/labelsequence.h> #include <dns/messagerenderer.h> -#include <cctype> #include <cassert> -#include <set> +#include <vector> +using namespace std; using namespace isc::util; namespace isc { namespace dns { -namespace { // hide internal-only names from the public namespaces /// -/// \brief The \c NameCompressNode class represents a pointer to a name -/// rendered in the internal buffer for the \c MessageRendererImpl object. -/// -/// A \c MessageRendererImpl object maintains a set of the \c NameCompressNode -/// objects, and searches the set for the position of the longest match -/// (ancestor) name against each new name to be rendered into the buffer. -struct NameCompressNode { - NameCompressNode(const MessageRenderer& renderer, - const OutputBuffer& buffer, const size_t pos, - const size_t len) : - renderer_(renderer), buffer_(buffer), pos_(pos), len_(len) {} - /// The renderer that performs name compression using the node. - /// This is kept in each node to detect the compression mode - /// (case-sensitive or not) in the comparison functor (\c NameCompare). - const MessageRenderer& renderer_; - /// The buffer in which the corresponding name is rendered. - const OutputBuffer& buffer_; - /// The position (offset from the beginning) in the buffer where the - /// name starts. - uint16_t pos_; - /// The length of the corresponding name. - uint16_t len_; -}; - +/// \brief The \c MessageRendererImpl class is the actual implementation of +/// \c MessageRenderer. /// -/// \brief The \c NameCompare class is a functor that gives ordering among -/// \c NameCompressNode objects stored in \c MessageRendererImpl::nodeset_. +/// The implementation is hidden from applications. We can refer to specific +/// members of this class only within the implementation source file. /// -/// Its only public method as a functor, \c operator(), gives the ordering -/// between two \c NameCompressNode objects in terms of equivalence, that is, -/// returns whether one is "less than" the other. -/// For our purpose we only need to distinguish two different names, so the -/// ordering is different from the canonical DNS name order used in DNSSEC; -/// basically, it gives the case-insensitive ordering of the two names as their -/// textual representation. -struct NameCompare : public std::binary_function<NameCompressNode, - NameCompressNode, - bool> { - /// - /// Returns true if n1 < n2 as a result of case-insensitive comparison; - /// otherwise return false. - /// - /// The name corresponding to \c n1 or \c n2 may be compressed, in which - /// case we must follow the compression pointer in the associated buffer. - /// The helper private method \c nextPosition() gives the position in the - /// buffer for the next character, taking into account compression. - /// - bool operator()(const NameCompressNode& n1, - const NameCompressNode& n2) const +/// It internally holds a hash table for LabelSequence objects corresponding +/// to portions of names rendered in this renderer with their offset from +/// the beginning to the entire rendered data. It's used to handle name +/// compression. +struct MessageRenderer::MessageRendererImpl { + // The size of hash buckets + static const size_t BUCKETS = 64; + // Number of hash entries per bucket for which space is preallocated and + // keep reserved for subsequent rendering, inneding to provide better + // performance. + static const size_t RESERVED_ITEMS = 16; + static const uint16_t NO_OFFSET = 65535; // used as a marker of 'not found' + + // Structure used as hash entries + struct OffsetItem { + OffsetItem(const LabelSequence& labels_param, uint16_t offset_param) : + labels(labels_param), offset(offset_param) + {} + LabelSequence labels; + uint16_t offset; + }; + + MessageRendererImpl() : + msglength_limit_(512), truncated_(false), + compress_mode_(MessageRenderer::CASE_INSENSITIVE) { - if (n1.len_ < n2.len_) { - return (true); - } else if (n1.len_ > n2.len_) { - return (false); + // Reserve some spaces for hash and name placeholders. + for (size_t i = 0; i < BUCKETS; ++i) { + table_[i].reserve(RESERVED_ITEMS); } + names_.reserve(BUCKETS); + } - const bool case_sensitive = - (n1.renderer_.getCompressMode() == MessageRenderer::CASE_SENSITIVE); - - uint16_t pos1 = n1.pos_; - uint16_t pos2 = n2.pos_; - uint16_t l1 = 0; - uint16_t l2 = 0; - for (uint16_t i = 0; i < n1.len_; i++, pos1++, pos2++) { - pos1 = nextPosition(n1.buffer_, pos1, l1); - pos2 = nextPosition(n2.buffer_, pos2, l2); - if (case_sensitive) { - if (n1.buffer_[pos1] < n2.buffer_[pos2]) { - return (true); - } else if (n1.buffer_[pos1] > n2.buffer_[pos2]) { - return (false); - } - } else { - if (tolower(n1.buffer_[pos1]) < tolower(n2.buffer_[pos2])) { - return (true); - } else if (tolower(n1.buffer_[pos1]) > - tolower(n2.buffer_[pos2])) { - return (false); - } - } + // A helper structure to find the hash entry whose labelsequence is + // equal to the search key ("target"). + struct SequenceComp { + SequenceComp(const LabelSequence& target, bool case_sensitive) : + target_(target), case_sensitive_(case_sensitive) + {} + bool operator()(const OffsetItem& item) const { + return (item.labels.equals(target_, case_sensitive_)); } - - return (false); + private: + const LabelSequence& target_; + bool case_sensitive_; + }; + + uint16_t findOffset(const LabelSequence& sequence) const { + const bool case_sensitive = (compress_mode_ == + MessageRenderer::CASE_SENSITIVE); + const size_t bucket = (sequence.getHash(case_sensitive) % BUCKETS); + + // Find a matching entry, if any. We use some heuristics here: often + // the same name appers consecutively (like repeating the same owner + // name for a single RRset), so in case there's a collision in the + // bucket it will be more likely to find it in the tail side of the + // bucket. + vector<OffsetItem>::const_reverse_iterator found = + find_if(table_[bucket].rbegin(), table_[bucket].rend(), + SequenceComp(sequence, case_sensitive)); + if (found != table_[bucket].rend()) { + return (found->offset); + } + return (NO_OFFSET); } -private: - uint16_t nextPosition(const OutputBuffer& buffer, - uint16_t pos, uint16_t& llen) const - { - if (llen == 0) { - size_t i = 0; - - while ((buffer[pos] & Name::COMPRESS_POINTER_MARK8) == - Name::COMPRESS_POINTER_MARK8) { - pos = (buffer[pos] & ~Name::COMPRESS_POINTER_MARK8) * - 256 + buffer[pos + 1]; - - // This loop should stop as long as the buffer has been - // constructed validly and the search/insert argument is based - // on a valid name, which is an assumption for this class. - // But we'll abort if a bug could cause an infinite loop. - i += 2; - assert(i < Name::MAX_WIRE); - } - llen = buffer[pos]; - } else { - --llen; - } - return (pos); + void addOffset(const LabelSequence& sequence, uint16_t offset) { + const bool case_sensitive = (compress_mode_ == + MessageRenderer::CASE_SENSITIVE); + const size_t bucket = (sequence.getHash(case_sensitive) % BUCKETS); + table_[bucket].push_back(OffsetItem(sequence, offset)); } -}; -} -/// -/// \brief The \c MessageRendererImpl class is the actual implementation of -/// \c MessageRenderer. -/// -/// The implementation is hidden from applications. We can refer to specific -/// members of this class only within the implementation source file. -/// -struct MessageRenderer::MessageRendererImpl { - /// \brief Constructor from an output buffer. - /// - MessageRendererImpl() : - nbuffer_(Name::MAX_WIRE), msglength_limit_(512), - truncated_(false), compress_mode_(MessageRenderer::CASE_INSENSITIVE) - {} - /// A local working buffer to convert each given name into wire format. - /// This could be a local variable of the \c writeName() method, but - /// we keep it in the class so that we can reuse it and avoid construction - /// overhead. - OutputBuffer nbuffer_; - /// A set of compression pointers. - std::set<NameCompressNode, NameCompare> nodeset_; /// The maximum length of rendered data that can fit without /// truncation. uint16_t msglength_limit_; @@ -170,6 +115,11 @@ struct MessageRenderer::MessageRendererImpl { bool truncated_; /// The name compression mode. CompressMode compress_mode_; + + // The hash table for the (LabelSequence * offset) entries + vector<OffsetItem> table_[BUCKETS]; + // Placeholder for names referenced from the stored LabelSequences + vector<Name> names_; }; MessageRenderer::MessageRenderer() : @@ -184,11 +134,27 @@ MessageRenderer::~MessageRenderer() { void MessageRenderer::clear() { AbstractMessageRenderer::clear(); - impl_->nbuffer_.clear(); - impl_->nodeset_.clear(); impl_->msglength_limit_ = 512; impl_->truncated_ = false; impl_->compress_mode_ = CASE_INSENSITIVE; + + // Clear the hash table and name placeholders. We reserve the minimum + // space for possible subsequent use of the renderer. + for (size_t i = 0; i < MessageRendererImpl::BUCKETS; ++i) { + if (impl_->table_[i].size() > MessageRendererImpl::RESERVED_ITEMS) { + impl_->table_[i].reserve(MessageRendererImpl::RESERVED_ITEMS); + vector<MessageRendererImpl::OffsetItem>(impl_->table_[i].begin(), + impl_->table_[i].end()). + swap(impl_->table_[i]); + } + impl_->table_[i].clear(); + } + if (impl_->names_.size() > MessageRendererImpl::BUCKETS) { + impl_->names_.reserve(MessageRendererImpl::BUCKETS); + vector<Name>(impl_->names_.begin(), impl_->names_.end()). + swap(impl_->names_); + } + impl_->names_.clear(); } size_t @@ -223,54 +189,66 @@ MessageRenderer::setCompressMode(const CompressMode mode) { void MessageRenderer::writeName(const Name& name, const bool compress) { - impl_->nbuffer_.clear(); - name.toWire(impl_->nbuffer_); - - unsigned int i; - std::set<NameCompressNode, NameCompare>::const_iterator notfound = - impl_->nodeset_.end(); - std::set<NameCompressNode, NameCompare>::const_iterator n = notfound; - - // Find the longest ancestor name in the rendered set that matches the - // given name. - for (i = 0; i < impl_->nbuffer_.getLength(); i += impl_->nbuffer_[i] + 1) { - // skip the trailing null label - if (impl_->nbuffer_[i] == 0) { - continue; + LabelSequence sequence(name); + const size_t nlabels = sequence.getLabelCount(); + size_t data_len; + const char* data; + + // Find the offset in the offset table whose name gives the longest + // match against the name to be rendered. + size_t nlabels_uncomp; + uint16_t ptr_offset = MessageRendererImpl::NO_OFFSET; + for (nlabels_uncomp = 0; nlabels_uncomp < nlabels; ++nlabels_uncomp) { + if (sequence.getDataLength() == 1) { // trailing dot. + ++nlabels_uncomp; + break; } - n = impl_->nodeset_.find(NameCompressNode(*this, impl_->nbuffer_, i, - impl_->nbuffer_.getLength() - - i)); - if (n != notfound) { + ptr_offset = impl_->findOffset(sequence); + if (ptr_offset != MessageRendererImpl::NO_OFFSET) { break; } + sequence.stripLeft(1); } - // Record the current offset before extending the buffer. - const size_t offset = getLength(); - // Write uncompress part... - writeData(impl_->nbuffer_.getData(), - compress ? i : impl_->nbuffer_.getLength()); - if (compress && n != notfound) { - // ...and compression pointer if available. - uint16_t pointer = (*n).pos_; - pointer |= Name::COMPRESS_POINTER_MARK16; - writeUint16(pointer); + // Record the current offset before updating the offset table + size_t offset = getLength(); + // Write uncompress part: + if (nlabels_uncomp > 0 || !compress) { + LabelSequence uncomp_sequence(name); + if (compress && nlabels > nlabels_uncomp) { + // If there's compressed part, strip off that part. + uncomp_sequence.stripRight(nlabels - nlabels_uncomp); + } + data = uncomp_sequence.getData(&data_len); + writeData(data, data_len); + } + // And write compression pointer if available: + if (compress && ptr_offset != MessageRendererImpl::NO_OFFSET) { + ptr_offset |= Name::COMPRESS_POINTER_MARK16; + writeUint16(ptr_offset); } - // Finally, add to the set the newly rendered name and its ancestors that - // have not been in the set. - for (unsigned int j = 0; j < i; j += impl_->nbuffer_[j] + 1) { - if (impl_->nbuffer_[j] == 0) { - continue; - } - if (offset + j > Name::MAX_COMPRESS_POINTER) { - break; + // Finally, add the newly rendered name and its ancestors that + // have not been in the set. We need to make our copy of name and generate + // sequence(s) from the copied name because it's not guaranteed that + // the caller keeps the name valid after this call. + if (nlabels_uncomp > 0) { + impl_->names_.push_back(name); + LabelSequence saved_sequence(impl_->names_.back()); + size_t seqlen = saved_sequence.getDataLength(); + while (nlabels_uncomp-- > 0) { + if (seqlen == 1) { // root name doesn't need to be stored. + break; + } + if (offset > Name::MAX_COMPRESS_POINTER) { + break; + } + impl_->addOffset(saved_sequence, offset); + saved_sequence.stripLeft(1); + const size_t new_seqlen = saved_sequence.getDataLength(); + offset += (seqlen - new_seqlen); + seqlen = new_seqlen; } - impl_->nodeset_.insert(NameCompressNode(*this, getBuffer(), - offset + j, - impl_->nbuffer_.getLength() - - j)); } } |