diff options
author | Andrei Pavel <andrei@isc.org> | 2024-03-04 10:49:45 +0100 |
---|---|---|
committer | Andrei Pavel <andrei@isc.org> | 2024-03-21 17:30:04 +0100 |
commit | 260ad292e87491476c7f34c111d76ef612f151e4 (patch) | |
tree | a340a68aa10a3509f1efbfb64cc2fa84a11f3e4a /src | |
parent | [#3210] refactor io utilities (diff) | |
download | kea-260ad292e87491476c7f34c111d76ef612f151e4.tar.xz kea-260ad292e87491476c7f34c111d76ef612f151e4.zip |
[#3210] refactor string utilities
- Rename util/strutil.h to util/str.h to escape redundancy.
- Simplify trim function.
- Remove unused functions.
- Get rid of the regex conditional compilation that helped Kea build
with ancient compilers. Lack of proper regex functionality now results
in failure in configure.ac.
Diffstat (limited to 'src')
49 files changed, 988 insertions, 1345 deletions
diff --git a/src/bin/dhcp4/dhcp4_srv.cc b/src/bin/dhcp4/dhcp4_srv.cc index 7c75edde28..cead6efbfb 100644 --- a/src/bin/dhcp4/dhcp4_srv.cc +++ b/src/bin/dhcp4/dhcp4_srv.cc @@ -49,7 +49,7 @@ #include <hooks/hooks_log.h> #include <hooks/hooks_manager.h> #include <stats/stats_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <log/logger.h> #include <cryptolink/cryptolink.h> #include <process/cfgrpt/config_report.h> diff --git a/src/bin/dhcp4/json_config_parser.cc b/src/bin/dhcp4/json_config_parser.cc index 6164c3c7d9..0b5c90c6a5 100644 --- a/src/bin/dhcp4/json_config_parser.cc +++ b/src/bin/dhcp4/json_config_parser.cc @@ -42,7 +42,7 @@ #include <process/config_ctl_parser.h> #include <util/encode/encode.h> #include <util/multi_threading_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> #include <boost/lexical_cast.hpp> diff --git a/src/bin/dhcp6/json_config_parser.cc b/src/bin/dhcp6/json_config_parser.cc index 8fd4e15743..62d8df4239 100644 --- a/src/bin/dhcp6/json_config_parser.cc +++ b/src/bin/dhcp6/json_config_parser.cc @@ -45,7 +45,7 @@ #include <process/config_ctl_parser.h> #include <util/encode/encode.h> #include <util/multi_threading_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <util/triplet.h> #include <boost/algorithm/string.hpp> diff --git a/src/hooks/dhcp/flex_option/flex_option.cc b/src/hooks/dhcp/flex_option/flex_option.cc index f32d429d0d..5313216ad2 100644 --- a/src/hooks/dhcp/flex_option/flex_option.cc +++ b/src/hooks/dhcp/flex_option/flex_option.cc @@ -8,7 +8,7 @@ #include <flex_option.h> #include <flex_option_log.h> -#include <util/strutil.h> +#include <util/str.h> #include <cc/simple_parser.h> #include <dhcp/dhcp4.h> #include <dhcp/libdhcp++.h> diff --git a/src/hooks/dhcp/flex_option/flex_option.h b/src/hooks/dhcp/flex_option/flex_option.h index 0bdbdce568..1d60a85920 100644 --- a/src/hooks/dhcp/flex_option/flex_option.h +++ b/src/hooks/dhcp/flex_option/flex_option.h @@ -17,7 +17,7 @@ #include <dhcp/std_option_defs.h> #include <eval/evaluate.h> #include <eval/token.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/classification.hpp> diff --git a/src/hooks/dhcp/high_availability/ha_config.cc b/src/hooks/dhcp/high_availability/ha_config.cc index a8d70e27e1..89d3f8a0a1 100644 --- a/src/hooks/dhcp/high_availability/ha_config.cc +++ b/src/hooks/dhcp/high_availability/ha_config.cc @@ -14,7 +14,7 @@ #include <dhcpsrv/network.h> #include <exceptions/exceptions.h> #include <util/multi_threading_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <ha_log.h> #include <ha_config.h> #include <ha_service_states.h> diff --git a/src/hooks/dhcp/lease_cmds/lease_cmds.cc b/src/hooks/dhcp/lease_cmds/lease_cmds.cc index 534d6f5516..7c6ebf0579 100644 --- a/src/hooks/dhcp/lease_cmds/lease_cmds.cc +++ b/src/hooks/dhcp/lease_cmds/lease_cmds.cc @@ -29,7 +29,7 @@ #include <stats/stats_mgr.h> #include <util/encode/encode.h> #include <util/multi_threading_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/scoped_ptr.hpp> #include <boost/algorithm/string.hpp> diff --git a/src/lib/cc/server_tag.cc b/src/lib/cc/server_tag.cc index 80e5184ec5..94e4ce0f84 100644 --- a/src/lib/cc/server_tag.cc +++ b/src/lib/cc/server_tag.cc @@ -8,7 +8,7 @@ #include <cc/server_tag.h> #include <exceptions/exceptions.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> namespace isc { diff --git a/src/lib/database/database_connection.cc b/src/lib/database/database_connection.cc index fca514d7da..607c706376 100644 --- a/src/lib/database/database_connection.cc +++ b/src/lib/database/database_connection.cc @@ -12,7 +12,7 @@ #include <database/db_log.h> #include <database/db_messages.h> #include <exceptions/exceptions.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> #include <vector> diff --git a/src/lib/dhcp/classify.cc b/src/lib/dhcp/classify.cc index 6803afb363..f42a9e3710 100644 --- a/src/lib/dhcp/classify.cc +++ b/src/lib/dhcp/classify.cc @@ -8,7 +8,7 @@ #include <cc/data.h> #include <dhcp/classify.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string/classification.hpp> #include <boost/algorithm/string/constants.hpp> diff --git a/src/lib/dhcp/duid.h b/src/lib/dhcp/duid.h index 2ec1b163d2..2239c232c2 100644 --- a/src/lib/dhcp/duid.h +++ b/src/lib/dhcp/duid.h @@ -8,10 +8,14 @@ #define DUID_H #include <asiolink/io_address.h> -#include <util/strutil.h> -#include <boost/shared_ptr.hpp> +#include <util/str.h> + +#include <cstdint> +#include <iomanip> #include <vector> -#include <stdint.h> + +#include <boost/shared_ptr.hpp> + #include <unistd.h> namespace isc { diff --git a/src/lib/dhcp/duid_factory.cc b/src/lib/dhcp/duid_factory.cc index 43c33638dc..ea1ad8ca77 100644 --- a/src/lib/dhcp/duid_factory.cc +++ b/src/lib/dhcp/duid_factory.cc @@ -11,7 +11,7 @@ #include <exceptions/exceptions.h> #include <util/io.h> #include <util/range_utilities.h> -#include <util/strutil.h> +#include <util/str.h> #include <ctime> #include <fstream> #include <stdlib.h> diff --git a/src/lib/dhcp/hwaddr.cc b/src/lib/dhcp/hwaddr.cc index 9f51fb891e..a1b3680257 100644 --- a/src/lib/dhcp/hwaddr.cc +++ b/src/lib/dhcp/hwaddr.cc @@ -9,7 +9,7 @@ #include <dhcp/hwaddr.h> #include <dhcp/dhcp4.h> #include <exceptions/exceptions.h> -#include <util/strutil.h> +#include <util/str.h> #include <iomanip> #include <sstream> #include <vector> diff --git a/src/lib/dhcp/option4_client_fqdn.cc b/src/lib/dhcp/option4_client_fqdn.cc index 17b804b57b..ae2d5b62d9 100644 --- a/src/lib/dhcp/option4_client_fqdn.cc +++ b/src/lib/dhcp/option4_client_fqdn.cc @@ -11,7 +11,7 @@ #include <dns/labelsequence.h> #include <util/buffer.h> #include <util/io.h> -#include <util/strutil.h> +#include <util/str.h> #include <sstream> namespace isc { diff --git a/src/lib/dhcp/option4_dnr.h b/src/lib/dhcp/option4_dnr.h index 7c498ceab1..02893d4598 100644 --- a/src/lib/dhcp/option4_dnr.h +++ b/src/lib/dhcp/option4_dnr.h @@ -16,7 +16,7 @@ #include <dhcp/option_data_types.h> #include <dns/name.h> #include <util/encode/utf8.h> -#include <util/strutil.h> +#include <util/str.h> #include <map> #include <set> diff --git a/src/lib/dhcp/option6_client_fqdn.cc b/src/lib/dhcp/option6_client_fqdn.cc index dd28599fa4..ba4d056425 100644 --- a/src/lib/dhcp/option6_client_fqdn.cc +++ b/src/lib/dhcp/option6_client_fqdn.cc @@ -11,7 +11,7 @@ #include <dns/labelsequence.h> #include <util/buffer.h> #include <util/io.h> -#include <util/strutil.h> +#include <util/str.h> #include <sstream> namespace isc { diff --git a/src/lib/dhcp/option_classless_static_route.cc b/src/lib/dhcp/option_classless_static_route.cc index cb4ed0cf13..6d95df9749 100644 --- a/src/lib/dhcp/option_classless_static_route.cc +++ b/src/lib/dhcp/option_classless_static_route.cc @@ -7,7 +7,7 @@ #include <config.h> #include <asiolink/io_error.h> -#include <util/strutil.h> +#include <util/str.h> #include <option_classless_static_route.h> diff --git a/src/lib/dhcp/option_data_types.cc b/src/lib/dhcp/option_data_types.cc index 0308d8c6a3..6be31e8452 100644 --- a/src/lib/dhcp/option_data_types.cc +++ b/src/lib/dhcp/option_data_types.cc @@ -9,7 +9,7 @@ #include <dhcp/option_data_types.h> #include <dns/labelsequence.h> #include <dns/name.h> -#include <util/strutil.h> +#include <util/str.h> #include <util/encode/encode.h> #include <algorithm> #include <limits> diff --git a/src/lib/dhcp/option_definition.cc b/src/lib/dhcp/option_definition.cc index 54d569b660..0c9cac9120 100644 --- a/src/lib/dhcp/option_definition.cc +++ b/src/lib/dhcp/option_definition.cc @@ -31,7 +31,7 @@ #include <util/encode/encode.h> #include <dns/labelsequence.h> #include <dns/name.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string/classification.hpp> #include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/replace.hpp> diff --git a/src/lib/dhcp/option_string.cc b/src/lib/dhcp/option_string.cc index 5f1d0d4d69..90918d9985 100644 --- a/src/lib/dhcp/option_string.cc +++ b/src/lib/dhcp/option_string.cc @@ -7,7 +7,7 @@ #include <config.h> #include <dhcp/option_string.h> -#include <util/strutil.h> +#include <util/str.h> #include <sstream> namespace isc { diff --git a/src/lib/dhcp/tests/option_classless_static_route_unittest.cc b/src/lib/dhcp/tests/option_classless_static_route_unittest.cc index 0b99b26104..8eb2775d4a 100644 --- a/src/lib/dhcp/tests/option_classless_static_route_unittest.cc +++ b/src/lib/dhcp/tests/option_classless_static_route_unittest.cc @@ -7,7 +7,7 @@ #include <config.h> #include <dhcp/option_classless_static_route.h> -#include <util/strutil.h> +#include <util/str.h> #include <gtest/gtest.h> diff --git a/src/lib/dhcpsrv/cfg_duid.cc b/src/lib/dhcpsrv/cfg_duid.cc index 24be637ab7..831772d429 100644 --- a/src/lib/dhcpsrv/cfg_duid.cc +++ b/src/lib/dhcpsrv/cfg_duid.cc @@ -9,7 +9,7 @@ #include <dhcp/duid_factory.h> #include <dhcpsrv/cfg_duid.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <iostream> #include <string> #include <string.h> diff --git a/src/lib/dhcpsrv/cfg_iface.cc b/src/lib/dhcpsrv/cfg_iface.cc index 78e61ba23d..3991e22396 100644 --- a/src/lib/dhcpsrv/cfg_iface.cc +++ b/src/lib/dhcpsrv/cfg_iface.cc @@ -11,7 +11,7 @@ #include <dhcpsrv/timer_mgr.h> #include <util/reconnect_ctl.h> #include <util/multi_threading_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <algorithm> #include <functional> diff --git a/src/lib/dhcpsrv/host.cc b/src/lib/dhcpsrv/host.cc index 4390dba322..5d45192ee4 100644 --- a/src/lib/dhcpsrv/host.cc +++ b/src/lib/dhcpsrv/host.cc @@ -14,7 +14,7 @@ #include <exceptions/exceptions.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/foreach.hpp> #include <sstream> diff --git a/src/lib/dhcpsrv/lease.cc b/src/lib/dhcpsrv/lease.cc index 9dad4083d1..651b9f58b7 100644 --- a/src/lib/dhcpsrv/lease.cc +++ b/src/lib/dhcpsrv/lease.cc @@ -10,7 +10,7 @@ #include <asiolink/addr_utilities.h> #include <dhcpsrv/lease.h> #include <util/pointer_util.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> #include <boost/scoped_ptr.hpp> #include <sstream> diff --git a/src/lib/dhcpsrv/parsers/base_network_parser.cc b/src/lib/dhcpsrv/parsers/base_network_parser.cc index 60717c295b..c70c44d36a 100644 --- a/src/lib/dhcpsrv/parsers/base_network_parser.cc +++ b/src/lib/dhcpsrv/parsers/base_network_parser.cc @@ -9,7 +9,7 @@ #include <dhcpsrv/dhcpsrv_log.h> #include <dhcpsrv/parsers/base_network_parser.h> #include <util/optional.h> -#include <util/strutil.h> +#include <util/str.h> using namespace isc::data; using namespace isc::util; diff --git a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc index 78077b5986..5fc899f6f1 100644 --- a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc +++ b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc @@ -19,7 +19,7 @@ #include <dhcpsrv/parsers/simple_parser6.h> #include <dhcpsrv/cfg_mac_source.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> #include <boost/foreach.hpp> diff --git a/src/lib/dhcpsrv/parsers/option_data_parser.cc b/src/lib/dhcpsrv/parsers/option_data_parser.cc index 8ec49a0309..cd544d5833 100644 --- a/src/lib/dhcpsrv/parsers/option_data_parser.cc +++ b/src/lib/dhcpsrv/parsers/option_data_parser.cc @@ -16,7 +16,7 @@ #include <dhcpsrv/parsers/simple_parser4.h> #include <dhcpsrv/parsers/simple_parser6.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/make_shared.hpp> #include <limits> #include <vector> diff --git a/src/lib/dhcpsrv/srv_config.cc b/src/lib/dhcpsrv/srv_config.cc index 763e678e9a..30a6ad8124 100644 --- a/src/lib/dhcpsrv/srv_config.cc +++ b/src/lib/dhcpsrv/srv_config.cc @@ -28,7 +28,7 @@ #include <log/logger_specification.h> #include <dhcp/pkt.h> #include <stats/stats_mgr.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/make_shared.hpp> diff --git a/src/lib/dhcpsrv/srv_config.h b/src/lib/dhcpsrv/srv_config.h index 8489249242..c329eb6421 100644 --- a/src/lib/dhcpsrv/srv_config.h +++ b/src/lib/dhcpsrv/srv_config.h @@ -31,7 +31,7 @@ #include <cc/user_context.h> #include <cc/simple_parser.h> #include <util/optional.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/shared_ptr.hpp> #include <vector> diff --git a/src/lib/dhcpsrv/tests/d2_client_unittest.cc b/src/lib/dhcpsrv/tests/d2_client_unittest.cc index 98efdadced..6bab3abeed 100644 --- a/src/lib/dhcpsrv/tests/d2_client_unittest.cc +++ b/src/lib/dhcpsrv/tests/d2_client_unittest.cc @@ -10,7 +10,7 @@ #include <dhcpsrv/d2_client_mgr.h> #include <testutils/test_to_element.h> #include <exceptions/exceptions.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/algorithm/string.hpp> #include <gtest/gtest.h> diff --git a/src/lib/hooks/hooks_parser.cc b/src/lib/hooks/hooks_parser.cc index 32296214dc..56029896fe 100644 --- a/src/lib/hooks/hooks_parser.cc +++ b/src/lib/hooks/hooks_parser.cc @@ -10,7 +10,7 @@ #include <cc/dhcp_config_error.h> #include <hooks/hooks_parser.h> #include <boost/algorithm/string.hpp> -#include <util/strutil.h> +#include <util/str.h> #include <vector> using namespace std; diff --git a/src/lib/http/basic_auth_config.cc b/src/lib/http/basic_auth_config.cc index 1e6a727094..84c0c548ec 100644 --- a/src/lib/http/basic_auth_config.cc +++ b/src/lib/http/basic_auth_config.cc @@ -9,7 +9,7 @@ #include <http/auth_log.h> #include <http/basic_auth_config.h> #include <util/filesystem.h> -#include <util/strutil.h> +#include <util/str.h> using namespace isc; using namespace isc::data; diff --git a/src/lib/http/http_header.cc b/src/lib/http/http_header.cc index a543e664b2..555eb3caa8 100644 --- a/src/lib/http/http_header.cc +++ b/src/lib/http/http_header.cc @@ -8,7 +8,7 @@ #include <exceptions/exceptions.h> #include <http/http_header.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/lexical_cast.hpp> namespace isc { diff --git a/src/lib/log/compiler/message.cc b/src/lib/log/compiler/message.cc index 91a582f958..81f408742b 100644 --- a/src/lib/log/compiler/message.cc +++ b/src/lib/log/compiler/message.cc @@ -22,7 +22,7 @@ #include <exceptions/exceptions.h> #include <util/filesystem.h> -#include <util/strutil.h> +#include <util/str.h> #include <log/log_messages.h> #include <log/message_dictionary.h> diff --git a/src/lib/log/logger.cc b/src/lib/log/logger.cc index e036a096a8..e8e0f74a98 100644 --- a/src/lib/log/logger.cc +++ b/src/lib/log/logger.cc @@ -16,7 +16,7 @@ #include <log/message_dictionary.h> #include <log/message_types.h> -#include <util/strutil.h> +#include <util/str.h> using namespace std; diff --git a/src/lib/log/logger_impl.cc b/src/lib/log/logger_impl.cc index d5ca88fd3a..b5e9c8dc8e 100644 --- a/src/lib/log/logger_impl.cc +++ b/src/lib/log/logger_impl.cc @@ -38,7 +38,7 @@ #include <log/interprocess/interprocess_sync_file.h> #include <log/interprocess/interprocess_sync_null.h> -#include <util/strutil.h> +#include <util/str.h> // Note: as log4cplus and the Kea logger have many concepts in common, and // thus many similar names, to disambiguate types we don't "use" the log4cplus diff --git a/src/lib/log/message_reader.cc b/src/lib/log/message_reader.cc index 2b48608733..bf797d9ffa 100644 --- a/src/lib/log/message_reader.cc +++ b/src/lib/log/message_reader.cc @@ -16,7 +16,7 @@ #include <log/log_messages.h> #include <log/message_exception.h> #include <log/message_reader.h> -#include <util/strutil.h> +#include <util/str.h> using namespace std; diff --git a/src/lib/log/tests/logger_example.cc b/src/lib/log/tests/logger_example.cc index ff3d512296..86e9b76431 100644 --- a/src/lib/log/tests/logger_example.cc +++ b/src/lib/log/tests/logger_example.cc @@ -24,7 +24,7 @@ #include <string> #include <vector> -#include <util/strutil.h> +#include <util/str.h> #include <log/logger.h> #include <log/logger_level.h> diff --git a/src/lib/process/d_cfg_mgr.cc b/src/lib/process/d_cfg_mgr.cc index 24ead16d3b..72bc4dea56 100644 --- a/src/lib/process/d_cfg_mgr.cc +++ b/src/lib/process/d_cfg_mgr.cc @@ -13,7 +13,7 @@ #include <process/daemon.h> #include <process/redact_config.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/lexical_cast.hpp> #include <boost/algorithm/string.hpp> diff --git a/src/lib/tcp/tcp_connection.cc b/src/lib/tcp/tcp_connection.cc index 0445b35a90..a3fa375bfd 100644 --- a/src/lib/tcp/tcp_connection.cc +++ b/src/lib/tcp/tcp_connection.cc @@ -11,7 +11,7 @@ #include <tcp/tcp_connection_pool.h> #include <tcp/tcp_log.h> #include <tcp/tcp_messages.h> -#include <util/strutil.h> +#include <util/str.h> #include <boost/make_shared.hpp> #include <iomanip> diff --git a/src/lib/tcp/tcp_stream_msg.cc b/src/lib/tcp/tcp_stream_msg.cc index 89fd5e8247..64332af840 100644 --- a/src/lib/tcp/tcp_stream_msg.cc +++ b/src/lib/tcp/tcp_stream_msg.cc @@ -7,7 +7,7 @@ #include <config.h> #include <tcp/tcp_stream_msg.h> -#include <util/strutil.h> +#include <util/str.h> #include <iomanip> #include <sstream> diff --git a/src/lib/util/Makefile.am b/src/lib/util/Makefile.am index 828e0fe7ed..90cb5e7118 100644 --- a/src/lib/util/Makefile.am +++ b/src/lib/util/Makefile.am @@ -32,7 +32,7 @@ libkea_util_la_SOURCES += staged_value.h libkea_util_la_SOURCES += state_model.cc state_model.h libkea_util_la_SOURCES += stopwatch.cc stopwatch.h libkea_util_la_SOURCES += stopwatch_impl.cc stopwatch_impl.h -libkea_util_la_SOURCES += strutil.h strutil.cc +libkea_util_la_SOURCES += str.h str.cc libkea_util_la_SOURCES += thread_pool.h libkea_util_la_SOURCES += triplet.h libkea_util_la_SOURCES += unlock_guard.h @@ -77,7 +77,7 @@ libkea_util_include_HEADERS = \ state_model.h \ stopwatch.h \ stopwatch_impl.h \ - strutil.h \ + str.h \ thread_pool.h \ triplet.h \ unlock_guard.h \ diff --git a/src/lib/util/str.cc b/src/lib/util/str.cc new file mode 100644 index 0000000000..9c3a3b857a --- /dev/null +++ b/src/lib/util/str.cc @@ -0,0 +1,345 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include <config.h> + +#include <util/encode/encode.h> +#include <util/str.h> + +#include <cstddef> +#include <cstdint> +#include <exception> +#include <iomanip> +#include <regex> +#include <sstream> +#include <string> +#include <vector> + +#include <boost/algorithm/string/classification.hpp> +#include <boost/algorithm/string/constants.hpp> +#include <boost/algorithm/string/split.hpp> + +using namespace std; + +namespace isc { +namespace util { +namespace str { + +string +trim(const string& input) { + if (input.empty()) { + return string(); + } + static const char* blanks = " \t\n"; + + // Search for first non-blank character in the string. + size_t const first(input.find_first_not_of(blanks)); + if (first == string::npos) { + return string(); + } + + // String not all blanks, so look for last character. + size_t const last(input.find_last_not_of(blanks)); + + // Extract the trimmed substring. + return input.substr(first, (last - first + 1)); +} + +vector<string> +tokens(const string& text, const string& delim, bool escape) { + vector<string> result; + string token; + bool in_token = false; + bool escaped = false; + for (auto const& c : text) { + if (delim.find(c) != string::npos) { + // Current character is a delimiter + if (!in_token) { + // Two or more delimiters, eat them + } else if (escaped) { + // Escaped delimiter in a token: reset escaped and keep it + escaped = false; + token.push_back(c); + } else { + // End of the current token: save it if not empty + if (!token.empty()) { + result.push_back(token); + } + // Reset state + in_token = false; + token.clear(); + } + } else if (escape && (c == '\\')) { + // Current character is the escape character + if (!in_token) { + // The escape character is the first character of a new token + in_token = true; + } + if (escaped) { + // Escaped escape: reset escaped and keep one character + escaped = false; + token.push_back(c); + } else { + // Remember to keep the next character + escaped = true; + } + } else { + // Not a delimiter nor an escape + if (!in_token) { + // First character of a new token + in_token = true; + } + if (escaped) { + // Escaped common character: as escape was false + escaped = false; + token.push_back('\\'); + token.push_back(c); + } else { + // The common case: keep it + token.push_back(c); + } + } + } + // End of input: close and save the current token if not empty + if (escaped) { + // Pending escape + token.push_back('\\'); + } + if (!token.empty()) { + result.push_back(token); + } + + return (result); +} + +char +toUpper(char const chr) { + return (toupper(chr)); +} + +void +uppercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toUpper); +} + +char +toLower(char const chr) { + return (tolower(static_cast<int>(chr))); +} + +void +lowercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toLower); +} + +vector<uint8_t> +quotedStringToBinary(const string& quoted_string) { + vector<uint8_t> binary; + // Remove whitespace before and after the quotes. + string trimmed_string = trim(quoted_string); + + // We require two quote characters, so the length of the string must be + // equal to 2 at minimum, and it must start and end with quotes. + if ((trimmed_string.length() > 1) && + ((trimmed_string[0] == '\'') && (trimmed_string[trimmed_string.length() - 1] == '\''))) { + // Remove quotes and trim the text inside the quotes. + trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2)); + // Copy string contents into the vector. + binary.assign(trimmed_string.begin(), trimmed_string.end()); + } + // Return resulting vector or empty vector. + return (binary); +} + +void +decodeColonSeparatedHexString(const string& hex_string, vector<uint8_t>& binary) { + decodeSeparatedHexString(hex_string, ":", binary); +} + +void +decodeSeparatedHexString(const string& hex_string, const string& sep, vector<uint8_t>& binary) { + vector<string> split_text; + boost::split(split_text, hex_string, boost::is_any_of(sep), + boost::algorithm::token_compress_off); + + vector<uint8_t> binary_vec; + for (size_t i = 0; i < split_text.size(); ++i) { + // If there are multiple tokens and the current one is empty, it + // means that two consecutive colons were specified. This is not + // allowed. + if ((split_text.size() > 1) && split_text[i].empty()) { + isc_throw(BadValue, "two consecutive separators ('" + << sep << "') specified in a decoded string '" << hex_string + << "'"); + + // Between a colon we expect at most two characters. + } else if (split_text[i].size() > 2) { + isc_throw(BadValue, "invalid format of the decoded string" + << " '" << hex_string << "'"); + + } else if (!split_text[i].empty()) { + stringstream s; + s << "0x"; + + for (unsigned int j = 0; j < split_text[i].length(); ++j) { + // Check if we're dealing with hexadecimal digit. + if (!isxdigit(split_text[i][j])) { + isc_throw(BadValue, "'" << split_text[i][j] + << "' is not a valid hexadecimal digit in" + << " decoded string '" << hex_string << "'"); + } + s << split_text[i][j]; + } + + // The stream should now have one or two hexadecimal digits. + // Let's convert it to a number and store in a temporary + // vector. + unsigned int binary_value; + s >> hex >> binary_value; + + binary_vec.push_back(static_cast<uint8_t>(binary_value)); + } + } + + // All ok, replace the data in the output vector with a result. + binary.swap(binary_vec); +} + +void +decodeFormattedHexString(const string& hex_string, vector<uint8_t>& binary) { + // If there is at least one colon we assume that the string + // comprises octets separated by colons (e.g. MAC address notation). + if (hex_string.find(':') != string::npos) { + decodeSeparatedHexString(hex_string, ":", binary); + } else if (hex_string.find(' ') != string::npos) { + decodeSeparatedHexString(hex_string, " ", binary); + } else { + ostringstream s; + + // If we have odd number of digits we'll have to prepend '0'. + if (hex_string.length() % 2 != 0) { + s << "0"; + } + + // It is ok to use '0x' prefix in a string. + if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) { + // Exclude '0x' from the decoded string. + s << hex_string.substr(2); + + } else { + // No '0x', so decode the whole string. + s << hex_string; + } + + try { + // Decode the hex string. + encode::decodeHex(s.str(), binary); + + } catch (...) { + isc_throw(BadValue, "'" << hex_string + << "' is not a valid" + " string of hexadecimal digits"); + } + } +} + +class StringSanitizerImpl { +public: + /// @brief Constructor. + StringSanitizerImpl(const string& char_set, const string& char_replacement) + : char_set_(char_set), char_replacement_(char_replacement) { + if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char set size: '" << char_set.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + + if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char replacement size: '" + << char_replacement.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + try { + scrub_exp_ = regex(char_set, regex::extended); + } catch (const exception& ex) { + isc_throw(BadValue, "invalid regex: '" << char_set_ << "', " << ex.what()); + } + } + + string scrub(const string& original) { + stringstream result; + try { + regex_replace(ostream_iterator<char>(result), original.begin(), original.end(), + scrub_exp_, char_replacement_); + } catch (const exception& ex) { + isc_throw(BadValue, "replacing '" << char_set_ << "' with '" << char_replacement_ + << "' in '" << original << "' failed: ," + << ex.what()); + } + + return (result.str()); + } + +private: + /// @brief The char set data for regex. + string char_set_; + + /// @brief The char replacement data for regex. + string char_replacement_; + + regex scrub_exp_; +}; + +// @note The regex engine is implemented using recursion and can cause +// stack overflow if the input data is too large. An arbitrary size of +// 4096 should be enough for all cases. +const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096; + +StringSanitizer::StringSanitizer(const string& char_set, const string& char_replacement) + : impl_(new StringSanitizerImpl(char_set, char_replacement)) { +} + +string +StringSanitizer::scrub(const string& original) { + return (impl_->scrub(original)); +} + +bool +isPrintable(const string& content) { + for (char const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +bool +isPrintable(const vector<uint8_t>& content) { + for (uint8_t const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +string +dumpAsHex(const uint8_t* data, size_t length) { + stringstream output; + for (unsigned int i = 0; i < length; i++) { + if (i) { + output << ":"; + } + + output << setfill('0') << setw(2) << hex << static_cast<unsigned short>(data[i]); + } + + return (output.str()); +} + +} // namespace str +} // namespace util +} // namespace isc diff --git a/src/lib/util/strutil.h b/src/lib/util/str.h index 8f3cd13ae8..1e5d4c405f 100644 --- a/src/lib/util/strutil.h +++ b/src/lib/util/str.h @@ -4,19 +4,20 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef STRUTIL_H -#define STRUTIL_H +#ifndef KEA_UTIL_STR_H +#define KEA_UTIL_STR_H + +#include <exceptions/exceptions.h> #include <algorithm> -#include <cctype> -#include <stdint.h> -#include <string> -#include <iomanip> +#include <cstddef> +#include <cstdint> +#include <memory> #include <sstream> +#include <string> #include <vector> -#include <exceptions/exceptions.h> + #include <boost/lexical_cast.hpp> -#include <boost/shared_ptr.hpp> namespace isc { namespace util { @@ -27,59 +28,52 @@ namespace str { /// /// @brief A standard string util exception that is thrown if getToken or /// numToToken are called with bad input data -/// class StringTokenError : public Exception { public: - StringTokenError(const char* file, size_t line, const char* what) : - isc::Exception(file, line, what) {} + StringTokenError(const char* file, size_t line, const char* what) + : isc::Exception(file, line, what) { + } }; -/// @brief Normalize Backslash -/// -/// Only relevant to Windows, this replaces all "\" in a string with "/" -/// and returns the result. On other systems it is a no-op. Note -/// that Windows does recognize file names with the "\" replaced by "/" -/// (at least in system calls, if not the command line). -/// -/// @param name Name to be substituted -void normalizeSlash(std::string& name); - -/// @brief Trim Leading and Trailing Spaces +/// @brief Trim leading and trailing spaces. /// /// Returns a copy of the input string but with any leading or trailing spaces /// or tabs removed. /// -/// @param instring Input string to modify +/// @param input Input string to modify. /// -/// @return String with leading and trailing spaces removed -std::string trim(const std::string& instring); +/// @return String with leading and trailing spaces removed. +std::string +trim(const std::string& input); /// @brief Finds the "trimmed" end of a buffer /// /// Works backward from the end of the buffer, looking for the first /// character not equal to the trim value, and returns an iterator -/// pointing that that position. +/// pointing to that position. /// /// @param begin - Forward iterator pointing to the beginning of the -/// buffer to trim +/// buffer to trim. /// @param end - Forward iterator pointing to the untrimmed end of -/// the buffer to trim +/// the buffer to trim. /// @param trim_val - byte value to trim off /// /// @return Iterator pointing the first character from the end of the -/// buffer not equal to the trim value -template<typename Iterator> +/// buffer not equal to the trim value. +template <typename Iterator> Iterator -seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) { - for (; end != begin && *(end - 1) == trim_val; --end); +seekTrimmed(Iterator const& begin, Iterator end, uint8_t const trim_val) { + while (end != begin && *(end - 1) == trim_val) { + --end; + } return (end); } -/// @brief Split String into Tokens +/// @brief Split string into tokens. /// /// Splits a string into tokens (the tokens being delimited by one or more of -/// the delimiter characters) and returns the tokens in a vector array. Note -/// that adjacent delimiters are considered to be a single delimiter. +/// the delimiter characters) and returns the tokens in a vector. +/// Adjacent delimiters are considered to be a single delimiter. /// /// Special cases are: /// -# The empty string is considered to be zero tokens. @@ -102,120 +96,46 @@ seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) { /// @param escape Use backslash to escape delimiter characters /// /// @return Vector of tokens. -std::vector<std::string> tokens(const std::string& text, - const std::string& delim = std::string(" \t\n"), - bool escape = false); +std::vector<std::string> +tokens(const std::string& text, const std::string& delim = " \t\n", bool escape = false); -/// @brief Uppercase Character +/// @brief Convert character to uppercase. /// -/// Used in uppercase() to pass as an argument to std::transform(). The -/// function std::toupper() can't be used as it takes an "int" as its argument; +/// Used in uppercase() to pass as a parameter to std::transform(). The +/// function std::toupper() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be upper-cased. /// -/// @return Uppercase version of the argument -inline char toUpper(char chr) { - return (static_cast<char>(std::toupper(static_cast<int>(chr)))); -} +/// @return Uppercase version of the input character. +char +toUpper(char const chr); -/// @brief Uppercase String -/// -/// A convenience function to uppercase a string. +/// @brief Convert string to uppercase. /// /// @param text String to be upper-cased. -inline void uppercase(std::string& text) { - std::transform(text.begin(), text.end(), text.begin(), - isc::util::str::toUpper); -} +void +uppercase(std::string& text); -/// @brief Lowercase Character +/// @brief Convert character to lowercase. /// -/// Used in lowercase() to pass as an argument to std::transform(). The -/// function std::tolower() can't be used as it takes an "int" as its argument; +/// Used in lowercase() to pass as a parameter to std::transform(). The +/// function std::tolower() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be lower-cased. /// -/// @return Lowercase version of the argument -inline char toLower(char chr) { - return (static_cast<char>(std::tolower(static_cast<int>(chr)))); -} +/// @return Lowercase version of the input character. +char +toLower(char const chr); -/// @brief Lowercase String -/// -/// A convenience function to lowercase a string +/// @brief Convert string to lowercase. /// /// @param text String to be lower-cased. -inline void lowercase(std::string& text) { - std::transform(text.begin(), text.end(), text.begin(), - isc::util::str::toLower); -} - -/// @brief Apply Formatting -/// -/// Given a printf-style format string containing only "%s" place holders -/// (others are ignored) and a vector of strings, this produces a single string -/// with the placeholders replaced. -/// -/// @param format Format string -/// @param args Vector of argument strings -/// -/// @return Resultant string -std::string format(const std::string& format, - const std::vector<std::string>& args); - - -/// @brief Returns one token from the given stringstream -/// -/// Using the >> operator, with basic error checking -/// -/// @throw StringTokenError if the token cannot be read from the stream -/// -/// @param iss stringstream to read one token from -/// -/// @return the first token read from the stringstream -std::string getToken(std::istringstream& iss); - -/// @brief Converts a string token to an *unsigned* integer. -/// -/// The value is converted using a lexical cast, with error and bounds -/// checking. -/// -/// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently -/// wide to store resulting integers. -/// -/// BitSize is the maximum number of bits that the resulting integer can take. -/// This function first checks whether the given token can be converted to -/// an integer of NumType type. It then confirms the conversion result is -/// within the valid range, i.e., [0, 2^BitSize - 1]. The second check is -/// necessary because lexical_cast<T> where T is an unsigned integer type -/// doesn't correctly reject negative numbers when compiled with SunStudio. -/// -/// @throw StringTokenError if the value is out of range, or if it -/// could not be converted -/// -/// @param num_token the string token to convert -/// -/// @return the converted value, of type NumType -template <typename NumType, int BitSize> -NumType -tokenToNum(const std::string& num_token) { - NumType num; - try { - num = boost::lexical_cast<NumType>(num_token); - } catch (const boost::bad_lexical_cast&) { - isc_throw(StringTokenError, "Invalid SRV numeric parameter: " << - num_token); - } - if (num < 0 || num >= (static_cast<NumType>(1) << BitSize)) { - isc_throw(StringTokenError, "Numeric SRV parameter out of range: " << - num); - } - return (num); -} +void +lowercase(std::string& text); /// @brief Converts a string in quotes into vector. /// @@ -263,16 +183,12 @@ decodeSeparatedHexString(const std::string& hex_string, /// @brief Converts a string of hexadecimal digits with colons into /// a vector. /// -/// Convenience method which calls @c decodeSeparatedHexString() passing -/// in a colon for the separator. - /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void -decodeColonSeparatedHexString(const std::string& hex_string, - std::vector<uint8_t>& binary); +decodeColonSeparatedHexString(const std::string& hex_string, std::vector<uint8_t>& binary); /// @brief Converts a formatted string of hexadecimal digits into /// a vector. @@ -293,24 +209,17 @@ decodeColonSeparatedHexString(const std::string& hex_string, /// /// @throw isc::BadValue if the format of the input string is invalid. void -decodeFormattedHexString(const std::string& hex_string, - std::vector<uint8_t>& binary); +decodeFormattedHexString(const std::string& hex_string, std::vector<uint8_t>& binary); /// @brief Forward declaration to the @c StringSanitizer implementation. class StringSanitizerImpl; /// @brief Type representing the pointer to the @c StringSanitizerImpl. -typedef boost::shared_ptr<StringSanitizerImpl> StringSanitizerImplPtr; +using StringSanitizerImplPtr = std::shared_ptr<StringSanitizerImpl>; -/// @brief Implements a regular expression based string scrubber -/// -/// The implementation uses C++11 regex IF the environment supports it -/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec. -/// Older compilers, such as pre Gnu g++ 4.9.0, provided only experimental -/// implementations of regex which are recognized as buggy. +/// @brief Implements a regular expression based string scrubber. class StringSanitizer { public: - /// @brief Constructor. /// /// Compiles the given character set into a regular expression, and @@ -324,23 +233,17 @@ public: /// @param char_replacement string of one or more characters to use as the /// replacement for invalid characters. /// - /// @throw BadValue if given an invalid regular expression - StringSanitizer(const std::string& char_set, - const std::string& char_replacement); - - /// @brief Destructor. - /// - /// Destroys the implementation instance. - ~StringSanitizer(); + /// @throw BadValue if given an invalid regular expression. + StringSanitizer(const std::string& char_set, const std::string& char_replacement); - /// Returns a scrubbed copy of a given string + /// @brief Returns a scrubbed copy of a given string. /// /// Replaces all occurrences of characters described by the regular /// expression with the character replacement. /// - /// @param original the string to scrub + /// @param original The string to be scrubbed. /// - /// @throw Unexpected if an error occurs during scrubbing + /// @throw Unexpected if an error occurs during scrubbing. std::string scrub(const std::string& original); /// @brief The maximum size for regex parameters. @@ -356,48 +259,34 @@ private: }; /// @brief Type representing the pointer to the @c StringSanitizer. -typedef boost::shared_ptr<StringSanitizer> StringSanitizerPtr; +using StringSanitizerPtr = std::unique_ptr<StringSanitizer>; -/// @brief Check if a string is printable +/// @brief Check if a string is printable. /// -/// @param content String to check for printable characters +/// @param content String to check for printable characters. /// -/// @return True if empty or contains only printable characters, False otherwise -inline bool -isPrintable(const std::string& content) { - for (auto const& ch : content) { - if (isprint(static_cast<int>(ch)) == 0) { - return (false); - } - } - return (true); -} +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::string& content); -/// @brief Check if a byte vector is printable +/// @brief Check if a byte vector is printable. /// -/// @param content Vector to check for printable characters +/// @param content Vector to check for printable characters. /// -/// @return True if empty or contains only printable characters, False otherwise -inline bool -isPrintable(const std::vector<uint8_t>& content) { - for (auto const& ch : content) { - if (isprint(static_cast<int>(ch)) == 0) { - return (false); - } - } - return (true); -} - +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::vector<uint8_t>& content); -/// @brief Dumps a buffer of bytes as a string of hexadecimal digits +/// @brief Dumps a buffer of bytes as a string of hexadecimal digits. /// -/// @param data pointer to the data to dump -/// @param length number of bytes to dump. Caller should ensure the length +/// @param data Pointer to the data to dump. +/// @param length Number of bytes to dump. Caller should ensure the length /// does not exceed the buffer. -std::string dumpAsHex(const uint8_t* data, size_t length); +std::string +dumpAsHex(const uint8_t* data, size_t length); -} // namespace str -} // namespace util -} // namespace isc +} // namespace str +} // namespace util +} // namespace isc -#endif // STRUTIL_H +#endif // KEA_UTIL_STR_H diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc deleted file mode 100644 index 7c3b2e65e2..0000000000 --- a/src/lib/util/strutil.cc +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include <config.h> - -#include <util/encode/encode.h> -#include <util/strutil.h> - -#include <boost/algorithm/string/classification.hpp> -#include <boost/algorithm/string/constants.hpp> -#include <boost/algorithm/string/split.hpp> - -#include <numeric> -#include <iostream> -#include <sstream> - -// Early versions of C++11 regex were buggy, use it if we -// can otherwise, we fall back to regcomp/regexec. For more info see: -// https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions -#ifdef USE_REGEX -#include <regex> -#else -#include <sys/types.h> -#include <regex.h> -#endif - -#include <string.h> - -using namespace std; - -namespace isc { -namespace util { -namespace str { - -// Normalize slashes - -void -normalizeSlash(std::string& name) { - if (!name.empty()) { - size_t pos = 0; - while ((pos = name.find('\\', pos)) != std::string::npos) { - name[pos] = '/'; - } - } -} - -// Trim String - -string -trim(const string& instring) { - string retstring = ""; - if (!instring.empty()) { - static const char* blanks = " \t\n"; - - // Search for first non-blank character in the string - size_t first = instring.find_first_not_of(blanks); - if (first != string::npos) { - - // String not all blanks, so look for last character - size_t last = instring.find_last_not_of(blanks); - - // Extract the trimmed substring - retstring = instring.substr(first, (last - first + 1)); - } - } - - return (retstring); -} - -// Tokenize string. As noted in the header, this is locally written to avoid -// another dependency on a Boost library. - -vector<string> -tokens(const std::string& text, const std::string& delim, bool escape) { - vector<string> result; - string token; - bool in_token = false; - bool escaped = false; - for (auto const& c : text) { - if (delim.find(c) != string::npos) { - // Current character is a delimiter - if (!in_token) { - // Two or more delimiters, eat them - } else if (escaped) { - // Escaped delimiter in a token: reset escaped and keep it - escaped = false; - token.push_back(c); - } else { - // End of the current token: save it if not empty - if (!token.empty()) { - result.push_back(token); - } - // Reset state - in_token = false; - token.clear(); - } - } else if (escape && (c == '\\')) { - // Current character is the escape character - if (!in_token) { - // The escape character is the first character of a new token - in_token = true; - } - if (escaped) { - // Escaped escape: reset escaped and keep one character - escaped = false; - token.push_back(c); - } else { - // Remember to keep the next character - escaped = true; - } - } else { - // Not a delimiter nor an escape - if (!in_token) { - // First character of a new token - in_token = true; - } - if (escaped) { - // Escaped common character: as escape was false - escaped = false; - token.push_back('\\'); - token.push_back(c); - } else { - // The common case: keep it - token.push_back(c); - } - } - } - // End of input: close and save the current token if not empty - if (escaped) { - // Pending escape - token.push_back('\\'); - } - if (!token.empty()) { - result.push_back(token); - } - - return (result); -} - -// Local function to pass to accumulate() for summing up string lengths. - -namespace { - -size_t -lengthSum(string::size_type curlen, const string& cur_string) { - return (curlen + cur_string.size()); -} - -} - -// Provide printf-style formatting. - -std::string -format(const std::string& format, const std::vector<std::string>& args) { - - static const string flag = "%s"; - - // Initialize return string. To speed things up, we'll reserve an - // appropriate amount of space - current string size, plus length of all - // the argument strings, less two characters for each argument (the %s in - // the format string is being replaced). - string result; - size_t length = accumulate(args.begin(), args.end(), format.size(), - lengthSum) - (args.size() * flag.size()); - result.reserve(length); - - // Iterate through replacing all tokens - result = format; - size_t tokenpos = 0; // Position of last token replaced - std::vector<std::string>::size_type i = 0; // Index into argument array - - while ((i < args.size()) && (tokenpos != string::npos)) { - tokenpos = result.find(flag, tokenpos); - if (tokenpos != string::npos) { - result.replace(tokenpos, flag.size(), args[i++]); - } - } - - return (result); -} - -std::string -getToken(std::istringstream& iss) { - string token; - iss >> token; - if (iss.bad() || iss.fail()) { - isc_throw(StringTokenError, "could not read token from string"); - } - return (token); -} - -std::vector<uint8_t> -quotedStringToBinary(const std::string& quoted_string) { - std::vector<uint8_t> binary; - // Remove whitespace before and after the quotes. - std::string trimmed_string = trim(quoted_string); - - // We require two quote characters, so the length of the string must be - // equal to 2 at minimum, and it must start and end with quotes. - if ((trimmed_string.length() > 1) && ((trimmed_string[0] == '\'') && - (trimmed_string[trimmed_string.length()-1] == '\''))) { - // Remove quotes and trim the text inside the quotes. - trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2)); - // Copy string contents into the vector. - binary.assign(trimmed_string.begin(), trimmed_string.end()); - } - // Return resulting vector or empty vector. - return (binary); -} - -void -decodeColonSeparatedHexString(const std::string& hex_string, - std::vector<uint8_t>& binary) { - decodeSeparatedHexString(hex_string, ":", binary); -} - -void -decodeSeparatedHexString(const std::string& hex_string, const std::string& sep, - std::vector<uint8_t>& binary) { - std::vector<std::string> split_text; - boost::split(split_text, hex_string, boost::is_any_of(sep), - boost::algorithm::token_compress_off); - - std::vector<uint8_t> binary_vec; - for (size_t i = 0; i < split_text.size(); ++i) { - - // If there are multiple tokens and the current one is empty, it - // means that two consecutive colons were specified. This is not - // allowed. - if ((split_text.size() > 1) && split_text[i].empty()) { - isc_throw(isc::BadValue, "two consecutive separators ('" << sep << "') specified in" - " a decoded string '" << hex_string << "'"); - - // Between a colon we expect at most two characters. - } else if (split_text[i].size() > 2) { - isc_throw(isc::BadValue, "invalid format of the decoded string" - << " '" << hex_string << "'"); - - } else if (!split_text[i].empty()) { - std::stringstream s; - s << "0x"; - - for (unsigned int j = 0; j < split_text[i].length(); ++j) { - // Check if we're dealing with hexadecimal digit. - if (!isxdigit(split_text[i][j])) { - isc_throw(isc::BadValue, "'" << split_text[i][j] - << "' is not a valid hexadecimal digit in" - << " decoded string '" << hex_string << "'"); - } - s << split_text[i][j]; - } - - // The stream should now have one or two hexadecimal digits. - // Let's convert it to a number and store in a temporary - // vector. - unsigned int binary_value; - s >> std::hex >> binary_value; - - binary_vec.push_back(static_cast<uint8_t>(binary_value)); - } - - } - - // All ok, replace the data in the output vector with a result. - binary.swap(binary_vec); -} - - -void -decodeFormattedHexString(const std::string& hex_string, - std::vector<uint8_t>& binary) { - // If there is at least one colon we assume that the string - // comprises octets separated by colons (e.g. MAC address notation). - if (hex_string.find(':') != std::string::npos) { - decodeSeparatedHexString(hex_string, ":", binary); - } else if (hex_string.find(' ') != std::string::npos) { - decodeSeparatedHexString(hex_string, " ", binary); - } else { - std::ostringstream s; - - // If we have odd number of digits we'll have to prepend '0'. - if (hex_string.length() % 2 != 0) { - s << "0"; - } - - // It is ok to use '0x' prefix in a string. - if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) { - // Exclude '0x' from the decoded string. - s << hex_string.substr(2); - - } else { - // No '0x', so decode the whole string. - s << hex_string; - } - - try { - // Decode the hex string. - encode::decodeHex(s.str(), binary); - - } catch (...) { - isc_throw(isc::BadValue, "'" << hex_string << "' is not a valid" - " string of hexadecimal digits"); - } - } -} - -class StringSanitizerImpl { -public: - /// @brief Constructor. - StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement) - : char_set_(char_set), char_replacement_(char_replacement) { - if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) { - isc_throw(isc::BadValue, "char set size: '" << char_set.size() - << "' exceeds max size: '" - << StringSanitizer::MAX_DATA_SIZE << "'"); - } - - if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) { - isc_throw(isc::BadValue, "char replacement size: '" - << char_replacement.size() << "' exceeds max size: '" - << StringSanitizer::MAX_DATA_SIZE << "'"); - } -#ifdef USE_REGEX - try { - scrub_exp_ = std::regex(char_set, std::regex::extended); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "invalid regex: '" - << char_set_ << "', " << ex.what()); - } -#else - int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED); - if (ec) { - char errbuf[512] = ""; - static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf))); - regfree(&scrub_exp_); - isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf); - } -#endif - } - - /// @brief Destructor. - ~StringSanitizerImpl() { -#ifndef USE_REGEX - regfree(&scrub_exp_); -#endif - } - - std::string scrub(const std::string& original) { -#ifdef USE_REGEX - std::stringstream result; - try { - std::regex_replace(std::ostream_iterator<char>(result), - original.begin(), original.end(), - scrub_exp_, char_replacement_); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '" - << char_replacement_ << "' in '" << original << "' failed: ," - << ex.what()); - } - - return (result.str()); -#else - // In order to handle embedded nuls, we have to process in nul-terminated - // chunks. We iterate over the original data, doing pattern replacement - // on each chunk. - const char* orig_data = original.data(); - const char* dead_end = orig_data + original.size(); - const char* start_from = orig_data; - stringstream result; - - while (start_from < dead_end) { - // Iterate over original string, match by match. - regmatch_t matches[2]; // n matches + 1 - const char* end_at = start_from + strlen(start_from); - - while (start_from < end_at) { - // Look for the next match - if (regexec(&scrub_exp_, start_from, 1, matches, 0) == REG_NOMATCH) { - // No matches, so add in the remainder - result << start_from; - start_from = end_at + 1; - break; - } - - // Shouldn't happen, but one never knows eh? - if (matches[0].rm_so == -1) { - isc_throw(isc::Unexpected, "matched but so is -1?"); - } - - // Add everything from starting point up to the current match - const char* match_at = start_from + matches[0].rm_so; - while (start_from < match_at) { - result << *start_from; - ++start_from; - } - - // Add in the replacement - result << char_replacement_; - - // Move past the match. - ++start_from; - } - - // if we have an embedded nul, replace it and continue - if (start_from < dead_end) { - // Add in the replacement - result << char_replacement_; - start_from = end_at + 1; - } - } - - return (result.str()); -#endif - } - -private: - /// @brief The char set data for regex. - std::string char_set_; - - /// @brief The char replacement data for regex. - std::string char_replacement_; - -#ifdef USE_REGEX - regex scrub_exp_; -#else - regex_t scrub_exp_; -#endif -}; - -// @note The regex engine is implemented using recursion and can cause -// stack overflow if the input data is too large. An arbitrary size of -// 4096 should be enough for all cases. -const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096; - -StringSanitizer::StringSanitizer(const std::string& char_set, - const std::string& char_replacement) - : impl_(new StringSanitizerImpl(char_set, char_replacement)) { -} - -StringSanitizer::~StringSanitizer() { -} - -std::string -StringSanitizer::scrub(const std::string& original) { - return (impl_->scrub(original)); -} - -std::string dumpAsHex(const uint8_t* data, size_t length) { - std::stringstream output; - for (unsigned int i = 0; i < length; i++) { - if (i) { - output << ":"; - } - - output << std::setfill('0') << std::setw(2) << std::hex - << static_cast<unsigned short>(data[i]); - } - - return (output.str()); -} - -} // namespace str -} // namespace util -} // namespace isc diff --git a/src/lib/util/tests/str_unittests.cc b/src/lib/util/tests/str_unittests.cc new file mode 100644 index 0000000000..accaf218bd --- /dev/null +++ b/src/lib/util/tests/str_unittests.cc @@ -0,0 +1,514 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include <config.h> + +#include <testutils/gtest_utils.h> +#include <util/encode/encode.h> +#include <util/str.h> + +#include <cstdint> +#include <exception> +#include <sstream> +#include <string> +#include <vector> + +#include <gtest/gtest.h> + +using namespace isc; +using namespace isc::util; +using namespace isc::util::encode; +using namespace isc::util::str; +using namespace std; + +namespace { + +/// @brief Fixture used to test StringSanitizer. +struct StringUtilTest : ::testing::Test { + /// @brief Pass string through scrub and check the result. + /// + /// @param original The string to sanitize. + /// @param char_set The regular expression string describing invalid characters. + /// @param char_replacement - character(s) which replace invalid + /// characters + /// @param expected - expected sanitized string + void checkScrub(const string& original, + const string& char_set, + const string& char_replacement, + const string& expected) { + StringSanitizerPtr ss; + string sanitized; + + try { + ss.reset(new StringSanitizer(char_set, char_replacement)); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); + return; + } + + try { + sanitized = ss->scrub(original); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not scrub string:" << ex.what(); + return; + } + + EXPECT_EQ(sanitized, expected); + } + + /// @brief Check that hex strings with colons can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkColonSeparated(const string& input, const string& reference) { + // Create a reference vector. + vector<uint8_t> reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector<uint8_t> decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeColonSeparatedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) << "decoded data don't match the reference, input='" + << input << "', reference='" << reference + << "'" + ", decoded='" + << encoded << "'"; + } + + /// @brief Check that formatted hex strings can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkFormatted(const string& input, const string& reference) { + // Create a reference vector. + vector<uint8_t> reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector<uint8_t> decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeFormattedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) + << "decoded data don't match the reference, input='" << input << "', reference='" + << reference << "', decoded='" << encoded << "'"; + } + + /// @brief Convenience function which calls quotedStringToBinary + /// and converts returned vector back to string. + /// + /// @param s Input string. + /// + /// @return String holding a copy of a vector returned by the + /// quotedStringToBinary. + string checkQuoted(const string& s) { + vector<uint8_t> vec = quotedStringToBinary(s); + string s2(vec.begin(), vec.end()); + return (s2); + } +}; + +// Check that leading and trailing space trimming works. +TEST_F(StringUtilTest, Trim) { + // Empty and full string. + EXPECT_EQ("", trim("")); + EXPECT_EQ("abcxyz", trim("abcxyz")); + + // Trim right-most blanks + EXPECT_EQ("ABC", trim("ABC ")); + EXPECT_EQ("ABC", trim("ABC\t\t \n\t")); + + // Left-most blank trimming + EXPECT_EQ("XYZ", trim(" XYZ")); + EXPECT_EQ("XYZ", trim("\t\t \tXYZ")); + + // Right and left, with embedded spaces + EXPECT_EQ("MN \t OP", trim("\t\tMN \t OP \t")); +} + +// Check tokenization. +TEST_F(StringUtilTest, Tokens) { + vector<string> result; + + // Default delimiters + + // Degenerate cases + result = tokens(""); // Empty string + EXPECT_EQ(0, result.size()); + + result = tokens(" \n "); // String is all delimiters + EXPECT_EQ(0, result.size()); + + result = tokens("abc"); // String has no delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("abc"), result[0]); + + // String containing leading and/or trailing delimiters, no embedded ones. + result = tokens("\txyz"); // One leading delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t \nxyz"); // Multiple leading delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz\n"); // One trailing delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz \t"); // Multiple trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t xyz \n"); // Leading and trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + // Embedded delimiters + result = tokens("abc\ndef"); // 2 tokens, one separator + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\t\t\ndef"); // 2 tokens, 3 separators + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\n \tdef\t\tghi"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Embedded and non-embedded delimiters + + result = tokens("\t\t \nabc\n \tdef\t\tghi \n\n"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Non-default delimiter + result = tokens("alpha/beta/ /gamma//delta/epsilon/", "/"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Non-default delimiters (plural) + result = tokens("+*--alpha*beta+ -gamma**delta+epsilon-+**", "*+-"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Escaped delimiter + result = tokens("foo\\,bar", ",", true); + EXPECT_EQ(1, result.size()); + EXPECT_EQ(string("foo,bar"), result[0]); + + // Escaped escape + result = tokens("foo\\\\,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Double escapes + result = tokens("foo\\\\\\\\,\\bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("\\bar"), result[1]); + + // Escaped standard character + result = tokens("fo\\o,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("fo\\o"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Escape at the end + result = tokens("foo,bar\\", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string("bar\\"), result[1]); + + // Escape opening a token + result = tokens("foo,\\,,bar", ",", true); + ASSERT_EQ(3, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string(","), result[1]); + EXPECT_EQ(string("bar"), result[2]); +} + +// Check changing of case. +TEST_F(StringUtilTest, ChangeCase) { + string mixed("abcDEFghiJKLmno123[]{=+--+]}"); + string upper("ABCDEFGHIJKLMNO123[]{=+--+]}"); + string lower("abcdefghijklmno123[]{=+--+]}"); + + string test = mixed; + lowercase(test); + EXPECT_EQ(lower, test); + + test = mixed; + uppercase(test); + EXPECT_EQ(upper, test); +} + +TEST_F(StringUtilTest, quotedStringToBinary) { + // No opening or closing quote should result in empty string. + EXPECT_TRUE(quotedStringToBinary("'").empty()); + EXPECT_TRUE(quotedStringToBinary("").empty()); + EXPECT_TRUE(quotedStringToBinary(" ").empty()); + EXPECT_TRUE(quotedStringToBinary("'circuit id").empty()); + EXPECT_TRUE(quotedStringToBinary("circuit id'").empty()); + + // If there is only opening and closing quote, an empty + // vector should be returned. + EXPECT_TRUE(quotedStringToBinary("''").empty()); + + // Both opening and ending quote is present. + EXPECT_EQ("circuit id", checkQuoted("'circuit id'")); + EXPECT_EQ("remote id", checkQuoted(" ' remote id'")); + EXPECT_EQ("duid", checkQuoted(" ' duid'")); + EXPECT_EQ("duid", checkQuoted("'duid ' ")); + EXPECT_EQ("remote'id", checkQuoted(" ' remote'id '")); + EXPECT_EQ("remote id'", checkQuoted("'remote id''")); + EXPECT_EQ("'remote id", checkQuoted("''remote id'")); + + // Multiple quotes. + EXPECT_EQ("'", checkQuoted("'''")); + EXPECT_EQ("''", checkQuoted("''''")); +} + +TEST_F(StringUtilTest, decodeColonSeparatedHexString) { + // Test valid strings. + checkColonSeparated("A1:02:C3:d4:e5:F6", "A102C3D4E5F6"); + checkColonSeparated("A:02:3:d:E5:F6", "0A02030DE5F6"); + checkColonSeparated("A:B:C:D", "0A0B0C0D"); + checkColonSeparated("1", "01"); + checkColonSeparated("1e", "1E"); + checkColonSeparated("", ""); + + // Test invalid strings. + vector<uint8_t> decoded; + // Whitespaces. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" ", decoded), BadValue, + "invalid format of the decoded string ' '"); + // Whitespace before digits. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" A1", decoded), BadValue, + "invalid format of the decoded string ' A1'"); + // Two consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A::01'"); + // Three consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A:::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A:::01'"); + // Whitespace within a string. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A :01", decoded), BadValue, + "' ' is not a valid hexadecimal digit in decoded string 'A :01'"); + // Terminating colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A:01:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0A:01:'"); + // Opening colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(":0A:01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string ':0A:01'"); + // Three digits before the colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A1:B1", decoded), BadValue, + "invalid format of the decoded string '0A1:B1'"); +} + +TEST_F(StringUtilTest, decodeFormattedHexString) { + // Colon separated. + checkFormatted("1:A7:B5:4:23", "01A7B50423"); + // Space separated. + checkFormatted("1 A7 B5 4 23", "01A7B50423"); + // No colons, even number of digits. + checkFormatted("17a534", "17A534"); + // Odd number of digits. + checkFormatted("A3A6f78", "0A3A6F78"); + // '0x' prefix. + checkFormatted("0xA3A6f78", "0A3A6F78"); + // '0x' prefix with a special value of 0. + checkFormatted("0x0", "00"); + // Empty string. + checkFormatted("", ""); + + vector<uint8_t> decoded; + // Dangling colon. + EXPECT_THROW_MSG(decodeFormattedHexString("0a:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0a:'"); + // Dangling space. + EXPECT_THROW_MSG(decodeFormattedHexString("0a ", decoded), BadValue, + "two consecutive separators (' ') specified in a decoded string '0a '"); + // '0x' prefix and spaces. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01 02", decoded), BadValue, + "invalid format of the decoded string '0x01 02'"); + // '0x' prefix and colons. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01:02", decoded), BadValue, + "invalid format of the decoded string '0x01:02'"); + // colon and spaces mixed + EXPECT_THROW_MSG(decodeFormattedHexString("01:02 03", decoded), BadValue, + "invalid format of the decoded string '01:02 03'"); + // Missing colon. + EXPECT_THROW_MSG(decodeFormattedHexString("01:0203", decoded), BadValue, + "invalid format of the decoded string '01:0203'"); + // Missing space. + EXPECT_THROW_MSG(decodeFormattedHexString("01 0203", decoded), BadValue, + "invalid format of the decoded string '01 0203'"); + // Invalid prefix. + EXPECT_THROW_MSG(decodeFormattedHexString("x0102", decoded), BadValue, + "'x0102' is not a valid string of hexadecimal digits"); + // Invalid prefix again. + EXPECT_THROW_MSG(decodeFormattedHexString("1x0102", decoded), BadValue, + "'1x0102' is not a valid string of hexadecimal digits"); +} + +// Verifies StringSantizer class +TEST_F(StringUtilTest, stringSanitizer) { + // Bad regular expression should throw. + StringSanitizerPtr ss; + ASSERT_THROW_MSG(ss.reset(new StringSanitizer("[bogus-regex", "")), BadValue, + "invalid regex: '[bogus-regex', Invalid range in bracket expression."); + + string good_data(StringSanitizer::MAX_DATA_SIZE, '0'); + string bad_data(StringSanitizer::MAX_DATA_SIZE + 1, '0'); + + ASSERT_NO_THROW_LOG(ss.reset(new StringSanitizer(good_data, good_data))); + + ASSERT_THROW_MSG(ss.reset(new StringSanitizer(bad_data, "")), BadValue, + "char set size: '4097' exceeds max size: '4096'"); + ASSERT_THROW_MSG(ss.reset(new StringSanitizer("", bad_data)), BadValue, + "char replacement size: '4097' exceeds max size: '4096'"); + + // List of invalid chars should work: (b,c,2 are invalid) + checkScrub("abc.123", "[b-c2]", "*", "a**.1*3"); + // Inverted list of valid chars should work: (b,c,2 are valid) + checkScrub("abc.123", "[^b-c2]", "*", "*bc**2*"); + + // A string of all valid chars should return an identical string. + checkScrub("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", "-_A--B__Cabc34567_-"); + + // Replacing with a character should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", "A*b*c*JoE3*_x*B*Y*e"); + + // Removing (i.e.replacing with an "empty" string) should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", "AbcJoE3_xBYe"); + + // More than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", "xxAxxBxxCxx"); + + // Removing more than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", "ABC"); + + // Replacing with a string should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); + + // Dots as valid chars work. + checkScrub("abc.123", "[^A-Za-z0-9_.]", "*", "abc.123"); + + string withNulls("\000ab\000c.12\0003", 10); + checkScrub(withNulls, "[^A-Za-z0-9_.]", "*", "*ab*c.12*3"); +} + +// Verifies templated buffer iterator seekTrimmed() function +TEST_F(StringUtilTest, seekTrimmed) { + // Empty buffer should be fine. + vector<uint8_t> buffer; + auto begin = buffer.end(); + auto end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(0, distance(begin, end)); + + // Buffer of only trim values, should be fine. + buffer = {1, 1}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 1)); + EXPECT_EQ(0, distance(begin, end)); + + // One trailing null should trim off. + buffer = {'o', 'n', 'e', 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(3, distance(begin, end)); + + // More than one trailing null should trim off. + buffer = {'t', 'h', 'r', 'e', 'e', 0, 0, 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(5, distance(begin, end)); + + // Embedded null should be left in place. + buffer = {'e', 'm', 0, 'b', 'e', 'd'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(6, distance(begin, end)); + + // Leading null should be left in place. + buffer = {0, 'l', 'e', 'a', 'd', 'i', 'n', 'g'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(8, distance(begin, end)); +} + +// Verifies isPrintable predicate on strings. +TEST_F(StringUtilTest, stringIsPrintable) { + string content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = "Abcd"; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content += "\a"; + EXPECT_FALSE(isPrintable(content)); +} + +// Verifies isPrintable predicate on byte vectors. +TEST_F(StringUtilTest, vectorIsPrintable) { + vector<uint8_t> content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = {0x41, 0x62, 0x63, 0x64}; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content.push_back('\a'); + EXPECT_FALSE(isPrintable(content)); +} + +} // namespace diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc deleted file mode 100644 index 5372ba0c85..0000000000 --- a/src/lib/util/tests/strutil_unittest.cc +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include <config.h> - -#include <exceptions/exceptions.h> -#include <util/strutil.h> -#include <util/encode/encode.h> - -#include <gtest/gtest.h> - -#include <stdint.h> -#include <string> - -using namespace isc; -using namespace isc::util; -using namespace isc::util::str; -using namespace std; - -namespace { - -// Check for slash replacement - -TEST(StringUtilTest, Slash) { - - string instring = ""; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("", instring); - - instring = "C:\\A\\B\\C.D"; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("C:/A/B/C.D", instring); - - instring = "// \\ //"; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("// / //", instring); -} - -// Check that leading and trailing space trimming works - -TEST(StringUtilTest, Trim) { - - // Empty and full string. - EXPECT_EQ("", isc::util::str::trim("")); - EXPECT_EQ("abcxyz", isc::util::str::trim("abcxyz")); - - // Trim right-most blanks - EXPECT_EQ("ABC", isc::util::str::trim("ABC ")); - EXPECT_EQ("ABC", isc::util::str::trim("ABC\t\t \n\t")); - - // Left-most blank trimming - EXPECT_EQ("XYZ", isc::util::str::trim(" XYZ")); - EXPECT_EQ("XYZ", isc::util::str::trim("\t\t \tXYZ")); - - // Right and left, with embedded spaces - EXPECT_EQ("MN \t OP", isc::util::str::trim("\t\tMN \t OP \t")); -} - -// Check tokenization. Note that ASSERT_EQ is used to check the size of the -// returned vector; if not as expected, the following references may be invalid -// so should not be used. - -TEST(StringUtilTest, Tokens) { - vector<string> result; - - // Default delimiters - - // Degenerate cases - result = isc::util::str::tokens(""); // Empty string - EXPECT_EQ(0, result.size()); - - result = isc::util::str::tokens(" \n "); // String is all delimiters - EXPECT_EQ(0, result.size()); - - result = isc::util::str::tokens("abc"); // String has no delimiters - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("abc"), result[0]); - - // String containing leading and/or trailing delimiters, no embedded ones. - result = isc::util::str::tokens("\txyz"); // One leading delimiter - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("\t \nxyz"); // Multiple leading delimiters - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("xyz\n"); // One trailing delimiter - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("xyz \t"); // Multiple trailing - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("\t xyz \n"); // Leading and trailing - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - // Embedded delimiters - result = isc::util::str::tokens("abc\ndef"); // 2 tokens, one separator - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - - result = isc::util::str::tokens("abc\t\t\ndef"); // 2 tokens, 3 separators - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - - result = isc::util::str::tokens("abc\n \tdef\t\tghi"); - ASSERT_EQ(3, result.size()); // Multiple tokens, many delims - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - EXPECT_EQ(string("ghi"), result[2]); - - // Embedded and non-embedded delimiters - - result = isc::util::str::tokens("\t\t \nabc\n \tdef\t\tghi \n\n"); - ASSERT_EQ(3, result.size()); // Multiple tokens, many delims - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - EXPECT_EQ(string("ghi"), result[2]); - - // Non-default delimiter - result = isc::util::str::tokens("alpha/beta/ /gamma//delta/epsilon/", "/"); - ASSERT_EQ(6, result.size()); - EXPECT_EQ(string("alpha"), result[0]); - EXPECT_EQ(string("beta"), result[1]); - EXPECT_EQ(string(" "), result[2]); - EXPECT_EQ(string("gamma"), result[3]); - EXPECT_EQ(string("delta"), result[4]); - EXPECT_EQ(string("epsilon"), result[5]); - - // Non-default delimiters (plural) - result = isc::util::str::tokens("+*--alpha*beta+ -gamma**delta+epsilon-+**", - "*+-"); - ASSERT_EQ(6, result.size()); - EXPECT_EQ(string("alpha"), result[0]); - EXPECT_EQ(string("beta"), result[1]); - EXPECT_EQ(string(" "), result[2]); - EXPECT_EQ(string("gamma"), result[3]); - EXPECT_EQ(string("delta"), result[4]); - EXPECT_EQ(string("epsilon"), result[5]); - - // Escaped delimiter - result = isc::util::str::tokens("foo\\,bar", ",", true); - EXPECT_EQ(1, result.size()); - EXPECT_EQ(string("foo,bar"), result[0]); - - // Escaped escape - result = isc::util::str::tokens("foo\\\\,bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo\\"), result[0]); - EXPECT_EQ(string("bar"), result[1]); - - // Double escapes - result = isc::util::str::tokens("foo\\\\\\\\,\\bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo\\\\"), result[0]); - EXPECT_EQ(string("\\bar"), result[1]); - - // Escaped standard character - result = isc::util::str::tokens("fo\\o,bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("fo\\o"), result[0]); - EXPECT_EQ(string("bar"), result[1]); - - // Escape at the end - result = isc::util::str::tokens("foo,bar\\", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo"), result[0]); - EXPECT_EQ(string("bar\\"), result[1]); - - // Escape opening a token - result = isc::util::str::tokens("foo,\\,,bar", ",", true); - ASSERT_EQ(3, result.size()); - EXPECT_EQ(string("foo"), result[0]); - EXPECT_EQ(string(","), result[1]); - EXPECT_EQ(string("bar"), result[2]); -} - -// Changing case - -TEST(StringUtilTest, ChangeCase) { - string mixed("abcDEFghiJKLmno123[]{=+--+]}"); - string upper("ABCDEFGHIJKLMNO123[]{=+--+]}"); - string lower("abcdefghijklmno123[]{=+--+]}"); - - string test = mixed; - isc::util::str::lowercase(test); - EXPECT_EQ(lower, test); - - test = mixed; - isc::util::str::uppercase(test); - EXPECT_EQ(upper, test); -} - -// Formatting - -TEST(StringUtilTest, Formatting) { - - vector<string> args; - args.push_back("arg1"); - args.push_back("arg2"); - args.push_back("arg3"); - - string format1 = "This is a string with no tokens"; - EXPECT_EQ(format1, isc::util::str::format(format1, args)); - - string format2 = ""; // Empty string - EXPECT_EQ(format2, isc::util::str::format(format2, args)); - - string format3 = " "; // Empty string - EXPECT_EQ(format3, isc::util::str::format(format3, args)); - - string format4 = "String with %d non-string tokens %lf"; - EXPECT_EQ(format4, isc::util::str::format(format4, args)); - - string format5 = "String with %s correct %s number of tokens %s"; - string result5 = "String with arg1 correct arg2 number of tokens arg3"; - EXPECT_EQ(result5, isc::util::str::format(format5, args)); - - string format6 = "String with %s too %s few tokens"; - string result6 = "String with arg1 too arg2 few tokens"; - EXPECT_EQ(result6, isc::util::str::format(format6, args)); - - string format7 = "String with %s too %s many %s tokens %s !"; - string result7 = "String with arg1 too arg2 many arg3 tokens %s !"; - EXPECT_EQ(result7, isc::util::str::format(format7, args)); - - string format8 = "String with embedded%s%s%stokens"; - string result8 = "String with embeddedarg1arg2arg3tokens"; - EXPECT_EQ(result8, isc::util::str::format(format8, args)); - - // Handle an empty vector - args.clear(); - string format9 = "%s %s"; - EXPECT_EQ(format9, isc::util::str::format(format9, args)); -} - -TEST(StringUtilTest, getToken) { - string s("a b c"); - istringstream ss(s); - EXPECT_EQ("a", isc::util::str::getToken(ss)); - EXPECT_EQ("b", isc::util::str::getToken(ss)); - EXPECT_EQ("c", isc::util::str::getToken(ss)); - EXPECT_THROW(isc::util::str::getToken(ss), isc::util::str::StringTokenError); -} - -int32_t tokenToNumCall_32_16(const string& token) { - return isc::util::str::tokenToNum<int32_t, 16>(token); -} - -int16_t tokenToNumCall_16_8(const string& token) { - return isc::util::str::tokenToNum<int16_t, 8>(token); -} - -TEST(StringUtilTest, tokenToNum) { - uint32_t num32 = tokenToNumCall_32_16("0"); - EXPECT_EQ(0, num32); - num32 = tokenToNumCall_32_16("123"); - EXPECT_EQ(123, num32); - num32 = tokenToNumCall_32_16("65535"); - EXPECT_EQ(65535, num32); - - EXPECT_THROW(tokenToNumCall_32_16(""), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("a"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("-1"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("65536"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("1234567890"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("-1234567890"), - isc::util::str::StringTokenError); - - uint16_t num16 = tokenToNumCall_16_8("123"); - EXPECT_EQ(123, num16); - num16 = tokenToNumCall_16_8("0"); - EXPECT_EQ(0, num16); - num16 = tokenToNumCall_16_8("255"); - EXPECT_EQ(255, num16); - - EXPECT_THROW(tokenToNumCall_16_8(""), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("a"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("-1"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("256"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("1234567890"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("-1234567890"), - isc::util::str::StringTokenError); - -} - -/// @brief Convenience function which calls quotedStringToBinary -/// and converts returned vector back to string. -/// -/// @param s Input string. -/// @return String holding a copy of a vector returned by the -/// quotedStringToBinary. -std::string testQuoted(const std::string& s) { - std::vector<uint8_t> vec = str::quotedStringToBinary(s); - std::string s2(vec.begin(), vec.end()); - return (s2); -} - -TEST(StringUtilTest, quotedStringToBinary) { - // No opening or closing quote should result in empty string. - EXPECT_TRUE(str::quotedStringToBinary("'").empty()); - EXPECT_TRUE(str::quotedStringToBinary("").empty()); - EXPECT_TRUE(str::quotedStringToBinary(" ").empty()); - EXPECT_TRUE(str::quotedStringToBinary("'circuit id").empty()); - EXPECT_TRUE(str::quotedStringToBinary("circuit id'").empty()); - - // If there is only opening and closing quote, an empty - // vector should be returned. - EXPECT_TRUE(str::quotedStringToBinary("''").empty()); - - // Both opening and ending quote is present. - EXPECT_EQ("circuit id", testQuoted("'circuit id'")); - EXPECT_EQ("remote id", testQuoted(" ' remote id'")); - EXPECT_EQ("duid", testQuoted(" ' duid'")); - EXPECT_EQ("duid", testQuoted("'duid ' ")); - EXPECT_EQ("remote'id", testQuoted(" ' remote'id '")); - EXPECT_EQ("remote id'", testQuoted("'remote id''")); - EXPECT_EQ("'remote id", testQuoted("''remote id'")); - - // Multiple quotes. - EXPECT_EQ("'", testQuoted("'''")); - EXPECT_EQ("''", testQuoted("''''")); -} - -/// @brief Test that hex string with colons can be decoded. -/// -/// @param input Input string to be decoded. -/// @param reference A string without colons representing the -/// decoded data. -void testColonSeparated(const std::string& input, - const std::string& reference) { - // Create a reference vector. - std::vector<uint8_t> reference_vector; - ASSERT_NO_THROW(encode::decodeHex(reference, reference_vector)); - - // Fill the output vector with some garbage to make sure that - // the data is erased when a string is decoded successfully. - std::vector<uint8_t> decoded(1, 10); - ASSERT_NO_THROW(decodeColonSeparatedHexString(input, decoded)); - - // Get the string representation of the decoded data for logging - // purposes. - std::string encoded; - ASSERT_NO_THROW(encoded = encode::encodeHex(decoded)); - - // Check if the decoded data matches the reference. - EXPECT_TRUE(decoded == reference_vector) - << "decoded data don't match the reference, input='" - << input << "', reference='" << reference << "'" - ", decoded='" << encoded << "'"; -} - -TEST(StringUtilTest, decodeColonSeparatedHexString) { - // Test valid strings. - testColonSeparated("A1:02:C3:d4:e5:F6", "A102C3D4E5F6"); - testColonSeparated("A:02:3:d:E5:F6", "0A02030DE5F6"); - testColonSeparated("A:B:C:D", "0A0B0C0D"); - testColonSeparated("1", "01"); - testColonSeparated("1e", "1E"); - testColonSeparated("", ""); - - // Test invalid strings. - std::vector<uint8_t> decoded; - // Whitespaces. - EXPECT_THROW(decodeColonSeparatedHexString(" ", decoded), - isc::BadValue); - // Whitespace before digits. - EXPECT_THROW(decodeColonSeparatedHexString(" A1", decoded), - isc::BadValue); - // Two consecutive colons. - EXPECT_THROW(decodeColonSeparatedHexString("A::01", decoded), - isc::BadValue); - // Three consecutive colons. - EXPECT_THROW(decodeColonSeparatedHexString("A:::01", decoded), - isc::BadValue); - // Whitespace within a string. - EXPECT_THROW(decodeColonSeparatedHexString("A :01", decoded), - isc::BadValue); - // Terminating colon. - EXPECT_THROW(decodeColonSeparatedHexString("0A:01:", decoded), - isc::BadValue); - // Opening colon. - EXPECT_THROW(decodeColonSeparatedHexString(":0A:01", decoded), - isc::BadValue); - // Three digits before the colon. - EXPECT_THROW(decodeColonSeparatedHexString("0A1:B1", decoded), - isc::BadValue); -} - -void testFormatted(const std::string& input, - const std::string& reference) { - // Create a reference vector. - std::vector<uint8_t> reference_vector; - ASSERT_NO_THROW(encode::decodeHex(reference, reference_vector)); - - // Fill the output vector with some garbage to make sure that - // the data is erased when a string is decoded successfully. - std::vector<uint8_t> decoded(1, 10); - ASSERT_NO_THROW(decodeFormattedHexString(input, decoded)); - - // Get the string representation of the decoded data for logging - // purposes. - std::string encoded; - ASSERT_NO_THROW(encoded = encode::encodeHex(decoded)); - - // Check if the decoded data matches the reference. - EXPECT_TRUE(decoded == reference_vector) - << "decoded data don't match the reference, input='" - << input << "', reference='" << reference << "'" - ", decoded='" << encoded << "'"; -} - -TEST(StringUtilTest, decodeFormattedHexString) { - // Colon separated. - testFormatted("1:A7:B5:4:23", "01A7B50423"); - // Space separated. - testFormatted("1 A7 B5 4 23", "01A7B50423"); - // No colons, even number of digits. - testFormatted("17a534", "17A534"); - // Odd number of digits. - testFormatted("A3A6f78", "0A3A6F78"); - // '0x' prefix. - testFormatted("0xA3A6f78", "0A3A6F78"); - // '0x' prefix with a special value of 0. - testFormatted("0x0", "00"); - // Empty string. - testFormatted("", ""); - - std::vector<uint8_t> decoded; - // Dangling colon. - EXPECT_THROW(decodeFormattedHexString("0a:", decoded), - isc::BadValue); - // Dangling space. - EXPECT_THROW(decodeFormattedHexString("0a ", decoded), - isc::BadValue); - // '0x' prefix and spaces. - EXPECT_THROW(decodeFormattedHexString("0x01 02", decoded), - isc::BadValue); - // '0x' prefix and colons. - EXPECT_THROW(decodeFormattedHexString("0x01:02", decoded), - isc::BadValue); - // colon and spaces mixed - EXPECT_THROW(decodeFormattedHexString("01:02 03", decoded), - isc::BadValue); - // Missing colon. - EXPECT_THROW(decodeFormattedHexString("01:0203", decoded), - isc::BadValue); - // Missing space. - EXPECT_THROW(decodeFormattedHexString("01 0203", decoded), - isc::BadValue); - // Invalid prefix. - EXPECT_THROW(decodeFormattedHexString("x0102", decoded), - isc::BadValue); - // Invalid prefix again. - EXPECT_THROW(decodeFormattedHexString("1x0102", decoded), - isc::BadValue); -} - -/// @brief Function used to test StringSantitizer -/// @param original - string to sanitize -/// @param char_set - regular expression string describing invalid -/// characters -/// @param char_replacement - character(s) which replace invalid -/// characters -/// @param expected - expected sanitized string -void sanitizeStringTest( - const std::string& original, - const std::string& char_set, - const std::string& char_replacement, - const std::string& expected) { - - StringSanitizerPtr ss; - std::string sanitized; - - try { - ss.reset(new StringSanitizer(char_set, char_replacement)); - } catch (const std::exception& ex) { - ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); - return; - } - - try { - sanitized = ss->scrub(original); - } catch (const std::exception& ex) { - ADD_FAILURE() << "Could not scrub string:" << ex.what(); - return; - } - - EXPECT_EQ(sanitized, expected); -} - -// Verifies StringSantizer class -TEST(StringUtilTest, stringSanitizer) { - // Bad regular expression should throw. - StringSanitizerPtr ss; - ASSERT_THROW(ss.reset(new StringSanitizer("[bogus-regex","")), BadValue); - - std::string good_data(StringSanitizer::MAX_DATA_SIZE, '0'); - std::string bad_data(StringSanitizer::MAX_DATA_SIZE + 1, '0'); - - ASSERT_NO_THROW(ss.reset(new StringSanitizer(good_data, good_data))); - - ASSERT_THROW(ss.reset(new StringSanitizer(bad_data, "")), BadValue); - ASSERT_THROW(ss.reset(new StringSanitizer("", bad_data)), BadValue); - - // List of invalid chars should work: (b,c,2 are invalid) - sanitizeStringTest("abc.123", "[b-c2]", "*", - "a**.1*3"); - // Inverted list of valid chars should work: (b,c,2 are valid) - sanitizeStringTest("abc.123", "[^b-c2]", "*", - "*bc**2*"); - - // A string of all valid chars should return an identical string. - sanitizeStringTest("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", - "-_A--B__Cabc34567_-"); - - // Replacing with a character should work. - sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", - "A*b*c*JoE3*_x*B*Y*e"); - - // Removing (i.e.replacing with an "empty" string) should work. - sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", - "AbcJoE3_xBYe"); - - // More than one non-matching in a row should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", - "xxAxxBxxCxx"); - - // Removing more than one non-matching in a row should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", - "ABC"); - - // Replacing with a string should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", - "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); - - // Dots as valid chars work. - sanitizeStringTest("abc.123", "[^A-Za-z0-9_.]", "*", - "abc.123"); - - std::string withNulls("\000ab\000c.12\0003",10); - sanitizeStringTest(withNulls, "[^A-Za-z0-9_.]", "*", - "*ab*c.12*3"); -} - -// Verifies templated buffer iterator seekTrimmed() function -TEST(StringUtilTest, seekTrimmed) { - - // Empty buffer should be fine. - std::vector<uint8_t> buffer; - auto begin = buffer.end(); - auto end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(0, std::distance(begin, end)); - - // Buffer of only trim values, should be fine. - buffer = { 1, 1 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 1)); - EXPECT_EQ(0, std::distance(begin, end)); - - // One trailing null should trim off. - buffer = {'o', 'n', 'e', 0 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(3, std::distance(begin, end)); - - // More than one trailing null should trim off. - buffer = { 't', 'h', 'r', 'e', 'e', 0, 0, 0 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(5, std::distance(begin, end)); - - // Embedded null should be left in place. - buffer = { 'e', 'm', 0, 'b', 'e', 'd' }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(6, std::distance(begin, end)); - - // Leading null should be left in place. - buffer = { 0, 'l', 'e', 'a', 'd', 'i', 'n', 'g' }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(8, std::distance(begin, end)); -} - -// Verifies isPrintable predicate on strings. -TEST(StringUtilTest, stringIsPrintable) { - string content; - - // Empty is printable. - EXPECT_TRUE(isPrintable(content)); - - // Check Abcd. - content = "Abcd"; - EXPECT_TRUE(isPrintable(content)); - - // Add a control character (not printable). - content += "\a"; - EXPECT_FALSE(isPrintable(content)); -} - -// Verifies isPrintable predicate on byte vectors. -TEST(StringUtilTest, vectorIsPrintable) { - vector<uint8_t> content; - - // Empty is printable. - EXPECT_TRUE(isPrintable(content)); - - // Check Abcd. - content = { 0x41, 0x62, 0x63, 0x64 }; - EXPECT_TRUE(isPrintable(content)); - - // Add a control character (not printable). - content.push_back('\a'); - EXPECT_FALSE(isPrintable(content)); -} - -} // end of anonymous namespace diff --git a/src/lib/yang/adaptor_host.cc b/src/lib/yang/adaptor_host.cc index 78f427e651..931696f214 100644 --- a/src/lib/yang/adaptor_host.cc +++ b/src/lib/yang/adaptor_host.cc @@ -7,7 +7,7 @@ #include <config.h> #include <util/encode/encode.h> -#include <util/strutil.h> +#include <util/str.h> #include <yang/adaptor_host.h> #include <iomanip> |