summaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJINMEI Tatuya <jinmei@isc.org>2012-03-06 23:42:58 +0100
committerJINMEI Tatuya <jinmei@isc.org>2012-03-06 23:42:58 +0100
commit1296fe25ed708996f9aca9d3ae063dd80d444d9d (patch)
tree8ae17e21e17d8eb112bb4311c787d4e848b8dcba /src/lib
parent[1603] updated Makefile for newly added name_internal.h (diff)
downloadkea-1296fe25ed708996f9aca9d3ae063dd80d444d9d.tar.xz
kea-1296fe25ed708996f9aca9d3ae063dd80d444d9d.zip
[1603] revised the MessageRenderer w/ a hash table and LabelSequence.
a benchmark indicated it's several times faster than the previous implementation.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/dns/messagerenderer.cc316
1 files changed, 147 insertions, 169 deletions
diff --git a/src/lib/dns/messagerenderer.cc b/src/lib/dns/messagerenderer.cc
index bf4795acaf..1019d96baf 100644
--- a/src/lib/dns/messagerenderer.cc
+++ b/src/lib/dns/messagerenderer.cc
@@ -15,153 +15,98 @@
#include <exceptions/exceptions.h>
#include <util/buffer.h>
#include <dns/name.h>
+#include <dns/labelsequence.h>
#include <dns/messagerenderer.h>
-#include <cctype>
#include <cassert>
-#include <set>
+#include <vector>
+using namespace std;
using namespace isc::util;
namespace isc {
namespace dns {
-namespace { // hide internal-only names from the public namespaces
///
-/// \brief The \c NameCompressNode class represents a pointer to a name
-/// rendered in the internal buffer for the \c MessageRendererImpl object.
-///
-/// A \c MessageRendererImpl object maintains a set of the \c NameCompressNode
-/// objects, and searches the set for the position of the longest match
-/// (ancestor) name against each new name to be rendered into the buffer.
-struct NameCompressNode {
- NameCompressNode(const MessageRenderer& renderer,
- const OutputBuffer& buffer, const size_t pos,
- const size_t len) :
- renderer_(renderer), buffer_(buffer), pos_(pos), len_(len) {}
- /// The renderer that performs name compression using the node.
- /// This is kept in each node to detect the compression mode
- /// (case-sensitive or not) in the comparison functor (\c NameCompare).
- const MessageRenderer& renderer_;
- /// The buffer in which the corresponding name is rendered.
- const OutputBuffer& buffer_;
- /// The position (offset from the beginning) in the buffer where the
- /// name starts.
- uint16_t pos_;
- /// The length of the corresponding name.
- uint16_t len_;
-};
-
+/// \brief The \c MessageRendererImpl class is the actual implementation of
+/// \c MessageRenderer.
///
-/// \brief The \c NameCompare class is a functor that gives ordering among
-/// \c NameCompressNode objects stored in \c MessageRendererImpl::nodeset_.
+/// The implementation is hidden from applications. We can refer to specific
+/// members of this class only within the implementation source file.
///
-/// Its only public method as a functor, \c operator(), gives the ordering
-/// between two \c NameCompressNode objects in terms of equivalence, that is,
-/// returns whether one is "less than" the other.
-/// For our purpose we only need to distinguish two different names, so the
-/// ordering is different from the canonical DNS name order used in DNSSEC;
-/// basically, it gives the case-insensitive ordering of the two names as their
-/// textual representation.
-struct NameCompare : public std::binary_function<NameCompressNode,
- NameCompressNode,
- bool> {
- ///
- /// Returns true if n1 < n2 as a result of case-insensitive comparison;
- /// otherwise return false.
- ///
- /// The name corresponding to \c n1 or \c n2 may be compressed, in which
- /// case we must follow the compression pointer in the associated buffer.
- /// The helper private method \c nextPosition() gives the position in the
- /// buffer for the next character, taking into account compression.
- ///
- bool operator()(const NameCompressNode& n1,
- const NameCompressNode& n2) const
+/// It internally holds a hash table for LabelSequence objects corresponding
+/// to portions of names rendered in this renderer with their offset from
+/// the beginning to the entire rendered data. It's used to handle name
+/// compression.
+struct MessageRenderer::MessageRendererImpl {
+ // The size of hash buckets
+ static const size_t BUCKETS = 64;
+ // Number of hash entries per bucket for which space is preallocated and
+ // keep reserved for subsequent rendering, inneding to provide better
+ // performance.
+ static const size_t RESERVED_ITEMS = 16;
+ static const uint16_t NO_OFFSET = 65535; // used as a marker of 'not found'
+
+ // Structure used as hash entries
+ struct OffsetItem {
+ OffsetItem(const LabelSequence& labels_param, uint16_t offset_param) :
+ labels(labels_param), offset(offset_param)
+ {}
+ LabelSequence labels;
+ uint16_t offset;
+ };
+
+ MessageRendererImpl() :
+ msglength_limit_(512), truncated_(false),
+ compress_mode_(MessageRenderer::CASE_INSENSITIVE)
{
- if (n1.len_ < n2.len_) {
- return (true);
- } else if (n1.len_ > n2.len_) {
- return (false);
+ // Reserve some spaces for hash and name placeholders.
+ for (size_t i = 0; i < BUCKETS; ++i) {
+ table_[i].reserve(RESERVED_ITEMS);
}
+ names_.reserve(BUCKETS);
+ }
- const bool case_sensitive =
- (n1.renderer_.getCompressMode() == MessageRenderer::CASE_SENSITIVE);
-
- uint16_t pos1 = n1.pos_;
- uint16_t pos2 = n2.pos_;
- uint16_t l1 = 0;
- uint16_t l2 = 0;
- for (uint16_t i = 0; i < n1.len_; i++, pos1++, pos2++) {
- pos1 = nextPosition(n1.buffer_, pos1, l1);
- pos2 = nextPosition(n2.buffer_, pos2, l2);
- if (case_sensitive) {
- if (n1.buffer_[pos1] < n2.buffer_[pos2]) {
- return (true);
- } else if (n1.buffer_[pos1] > n2.buffer_[pos2]) {
- return (false);
- }
- } else {
- if (tolower(n1.buffer_[pos1]) < tolower(n2.buffer_[pos2])) {
- return (true);
- } else if (tolower(n1.buffer_[pos1]) >
- tolower(n2.buffer_[pos2])) {
- return (false);
- }
- }
+ // A helper structure to find the hash entry whose labelsequence is
+ // equal to the search key ("target").
+ struct SequenceComp {
+ SequenceComp(const LabelSequence& target, bool case_sensitive) :
+ target_(target), case_sensitive_(case_sensitive)
+ {}
+ bool operator()(const OffsetItem& item) const {
+ return (item.labels.equals(target_, case_sensitive_));
}
-
- return (false);
+ private:
+ const LabelSequence& target_;
+ bool case_sensitive_;
+ };
+
+ uint16_t findOffset(const LabelSequence& sequence) const {
+ const bool case_sensitive = (compress_mode_ ==
+ MessageRenderer::CASE_SENSITIVE);
+ const size_t bucket = (sequence.getHash(case_sensitive) % BUCKETS);
+
+ // Find a matching entry, if any. We use some heuristics here: often
+ // the same name appers consecutively (like repeating the same owner
+ // name for a single RRset), so in case there's a collision in the
+ // bucket it will be more likely to find it in the tail side of the
+ // bucket.
+ vector<OffsetItem>::const_reverse_iterator found =
+ find_if(table_[bucket].rbegin(), table_[bucket].rend(),
+ SequenceComp(sequence, case_sensitive));
+ if (found != table_[bucket].rend()) {
+ return (found->offset);
+ }
+ return (NO_OFFSET);
}
-private:
- uint16_t nextPosition(const OutputBuffer& buffer,
- uint16_t pos, uint16_t& llen) const
- {
- if (llen == 0) {
- size_t i = 0;
-
- while ((buffer[pos] & Name::COMPRESS_POINTER_MARK8) ==
- Name::COMPRESS_POINTER_MARK8) {
- pos = (buffer[pos] & ~Name::COMPRESS_POINTER_MARK8) *
- 256 + buffer[pos + 1];
-
- // This loop should stop as long as the buffer has been
- // constructed validly and the search/insert argument is based
- // on a valid name, which is an assumption for this class.
- // But we'll abort if a bug could cause an infinite loop.
- i += 2;
- assert(i < Name::MAX_WIRE);
- }
- llen = buffer[pos];
- } else {
- --llen;
- }
- return (pos);
+ void addOffset(const LabelSequence& sequence, uint16_t offset) {
+ const bool case_sensitive = (compress_mode_ ==
+ MessageRenderer::CASE_SENSITIVE);
+ const size_t bucket = (sequence.getHash(case_sensitive) % BUCKETS);
+ table_[bucket].push_back(OffsetItem(sequence, offset));
}
-};
-}
-///
-/// \brief The \c MessageRendererImpl class is the actual implementation of
-/// \c MessageRenderer.
-///
-/// The implementation is hidden from applications. We can refer to specific
-/// members of this class only within the implementation source file.
-///
-struct MessageRenderer::MessageRendererImpl {
- /// \brief Constructor from an output buffer.
- ///
- MessageRendererImpl() :
- nbuffer_(Name::MAX_WIRE), msglength_limit_(512),
- truncated_(false), compress_mode_(MessageRenderer::CASE_INSENSITIVE)
- {}
- /// A local working buffer to convert each given name into wire format.
- /// This could be a local variable of the \c writeName() method, but
- /// we keep it in the class so that we can reuse it and avoid construction
- /// overhead.
- OutputBuffer nbuffer_;
- /// A set of compression pointers.
- std::set<NameCompressNode, NameCompare> nodeset_;
/// The maximum length of rendered data that can fit without
/// truncation.
uint16_t msglength_limit_;
@@ -170,6 +115,11 @@ struct MessageRenderer::MessageRendererImpl {
bool truncated_;
/// The name compression mode.
CompressMode compress_mode_;
+
+ // The hash table for the (LabelSequence * offset) entries
+ vector<OffsetItem> table_[BUCKETS];
+ // Placeholder for names referenced from the stored LabelSequences
+ vector<Name> names_;
};
MessageRenderer::MessageRenderer() :
@@ -184,11 +134,27 @@ MessageRenderer::~MessageRenderer() {
void
MessageRenderer::clear() {
AbstractMessageRenderer::clear();
- impl_->nbuffer_.clear();
- impl_->nodeset_.clear();
impl_->msglength_limit_ = 512;
impl_->truncated_ = false;
impl_->compress_mode_ = CASE_INSENSITIVE;
+
+ // Clear the hash table and name placeholders. We reserve the minimum
+ // space for possible subsequent use of the renderer.
+ for (size_t i = 0; i < MessageRendererImpl::BUCKETS; ++i) {
+ if (impl_->table_[i].size() > MessageRendererImpl::RESERVED_ITEMS) {
+ impl_->table_[i].reserve(MessageRendererImpl::RESERVED_ITEMS);
+ vector<MessageRendererImpl::OffsetItem>(impl_->table_[i].begin(),
+ impl_->table_[i].end()).
+ swap(impl_->table_[i]);
+ }
+ impl_->table_[i].clear();
+ }
+ if (impl_->names_.size() > MessageRendererImpl::BUCKETS) {
+ impl_->names_.reserve(MessageRendererImpl::BUCKETS);
+ vector<Name>(impl_->names_.begin(), impl_->names_.end()).
+ swap(impl_->names_);
+ }
+ impl_->names_.clear();
}
size_t
@@ -223,54 +189,66 @@ MessageRenderer::setCompressMode(const CompressMode mode) {
void
MessageRenderer::writeName(const Name& name, const bool compress) {
- impl_->nbuffer_.clear();
- name.toWire(impl_->nbuffer_);
-
- unsigned int i;
- std::set<NameCompressNode, NameCompare>::const_iterator notfound =
- impl_->nodeset_.end();
- std::set<NameCompressNode, NameCompare>::const_iterator n = notfound;
-
- // Find the longest ancestor name in the rendered set that matches the
- // given name.
- for (i = 0; i < impl_->nbuffer_.getLength(); i += impl_->nbuffer_[i] + 1) {
- // skip the trailing null label
- if (impl_->nbuffer_[i] == 0) {
- continue;
+ LabelSequence sequence(name);
+ const size_t nlabels = sequence.getLabelCount();
+ size_t data_len;
+ const char* data;
+
+ // Find the offset in the offset table whose name gives the longest
+ // match against the name to be rendered.
+ size_t nlabels_uncomp;
+ uint16_t ptr_offset = MessageRendererImpl::NO_OFFSET;
+ for (nlabels_uncomp = 0; nlabels_uncomp < nlabels; ++nlabels_uncomp) {
+ if (sequence.getDataLength() == 1) { // trailing dot.
+ ++nlabels_uncomp;
+ break;
}
- n = impl_->nodeset_.find(NameCompressNode(*this, impl_->nbuffer_, i,
- impl_->nbuffer_.getLength() -
- i));
- if (n != notfound) {
+ ptr_offset = impl_->findOffset(sequence);
+ if (ptr_offset != MessageRendererImpl::NO_OFFSET) {
break;
}
+ sequence.stripLeft(1);
}
- // Record the current offset before extending the buffer.
- const size_t offset = getLength();
- // Write uncompress part...
- writeData(impl_->nbuffer_.getData(),
- compress ? i : impl_->nbuffer_.getLength());
- if (compress && n != notfound) {
- // ...and compression pointer if available.
- uint16_t pointer = (*n).pos_;
- pointer |= Name::COMPRESS_POINTER_MARK16;
- writeUint16(pointer);
+ // Record the current offset before updating the offset table
+ size_t offset = getLength();
+ // Write uncompress part:
+ if (nlabels_uncomp > 0 || !compress) {
+ LabelSequence uncomp_sequence(name);
+ if (compress && nlabels > nlabels_uncomp) {
+ // If there's compressed part, strip off that part.
+ uncomp_sequence.stripRight(nlabels - nlabels_uncomp);
+ }
+ data = uncomp_sequence.getData(&data_len);
+ writeData(data, data_len);
+ }
+ // And write compression pointer if available:
+ if (compress && ptr_offset != MessageRendererImpl::NO_OFFSET) {
+ ptr_offset |= Name::COMPRESS_POINTER_MARK16;
+ writeUint16(ptr_offset);
}
- // Finally, add to the set the newly rendered name and its ancestors that
- // have not been in the set.
- for (unsigned int j = 0; j < i; j += impl_->nbuffer_[j] + 1) {
- if (impl_->nbuffer_[j] == 0) {
- continue;
- }
- if (offset + j > Name::MAX_COMPRESS_POINTER) {
- break;
+ // Finally, add the newly rendered name and its ancestors that
+ // have not been in the set. We need to make our copy of name and generate
+ // sequence(s) from the copied name because it's not guaranteed that
+ // the caller keeps the name valid after this call.
+ if (nlabels_uncomp > 0) {
+ impl_->names_.push_back(name);
+ LabelSequence saved_sequence(impl_->names_.back());
+ size_t seqlen = saved_sequence.getDataLength();
+ while (nlabels_uncomp-- > 0) {
+ if (seqlen == 1) { // root name doesn't need to be stored.
+ break;
+ }
+ if (offset > Name::MAX_COMPRESS_POINTER) {
+ break;
+ }
+ impl_->addOffset(saved_sequence, offset);
+ saved_sequence.stripLeft(1);
+ const size_t new_seqlen = saved_sequence.getDataLength();
+ offset += (seqlen - new_seqlen);
+ seqlen = new_seqlen;
}
- impl_->nodeset_.insert(NameCompressNode(*this, getBuffer(),
- offset + j,
- impl_->nbuffer_.getLength() -
- j));
}
}