1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
|
// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
/// @file
/// Access to interface information on Linux is via netlink, a socket-based
/// method for transferring information between the kernel and user processes.
///
/// For detailed information about netlink interface, please refer to
/// http://en.wikipedia.org/wiki/Netlink and RFC3549. Comments in the
/// detectIfaces() method (towards the end of this file) provide an overview
/// on how the netlink interface is used here.
///
/// Note that this interface is very robust and allows many operations:
/// add/get/set/delete links, addresses, routes, queuing, manipulation of
/// traffic classes, manipulation of neighbourhood tables and even the ability
/// to do something with address labels. Getting a list of interfaces with
/// addresses configured on it is just a small subset of all possible actions.
#include <config.h>
#if defined(OS_LINUX)
#include <asiolink/io_address.h>
#include <dhcp/iface_mgr.h>
#include <dhcp/iface_mgr_error_handler.h>
#include <dhcp/pkt_filter_inet.h>
#include <dhcp/pkt_filter_lpf.h>
#include <exceptions/exceptions.h>
#include <util/io/sockaddr_util.h>
#include <boost/array.hpp>
#include <boost/static_assert.hpp>
#include <fcntl.h>
#include <stdint.h>
#include <net/if.h>
#include <linux/rtnetlink.h>
using namespace std;
using namespace isc;
using namespace isc::asiolink;
using namespace isc::dhcp;
using namespace isc::util::io::internal;
BOOST_STATIC_ASSERT(IFLA_MAX>=IFA_MAX);
namespace {
/// @brief This class offers utility methods for netlink connection.
///
/// See IfaceMgr::detectIfaces() (Linux implementation, towards the end of this
/// file) for example usage.
class Netlink
{
public:
/// @brief Holds pointers to netlink messages.
///
/// netlink (a Linux interface for getting information about network
/// interfaces) uses memory aliasing. Linux kernel returns a memory
/// blob that should be interpreted as series of nlmessages. There
/// are different nlmsg structures defined with varying size. They
/// have one thing common - initial fields are laid out in the same
/// way as nlmsghdr. Therefore different messages can be represented
/// as nlmsghdr with followed variable number of bytes that are
/// message-specific. The only reasonable way to represent this in
/// C++ is to use vector of pointers to nlmsghdr (the common structure).
typedef vector<nlmsghdr*> NetlinkMessages;
/// @brief Holds pointers to interface or address attributes.
///
/// Note that to get address info, a shorter (IFA_MAX rather than IFLA_MAX)
/// table could be used, but we will use the bigger one anyway to
/// make the code reusable.
///
/// rtattr is a generic structure, similar to sockaddr. It is defined
/// in linux/rtnetlink.h and shown here for documentation purposes only:
///
/// struct rtattr {
/// unsigned short<>rta_len;
/// unsigned short<>rta_type;
/// };
typedef boost::array<struct rtattr*, IFLA_MAX + 1> RTattribPtrs;
Netlink() : fd_(-1), seq_(0), dump_(0) {
memset(&local_, 0, sizeof(struct sockaddr_nl));
memset(&peer_, 0, sizeof(struct sockaddr_nl));
}
~Netlink() {
rtnl_close_socket();
}
void rtnl_open_socket();
void rtnl_send_request(int family, int type);
void rtnl_store_reply(NetlinkMessages& storage, const nlmsghdr* msg);
void parse_rtattr(RTattribPtrs& table, rtattr* rta, int len);
void ipaddrs_get(Iface& iface, NetlinkMessages& addr_info);
void rtnl_process_reply(NetlinkMessages& info);
void release_list(NetlinkMessages& messages);
void rtnl_close_socket();
private:
int fd_; // Netlink file descriptor
sockaddr_nl local_; // Local addresses
sockaddr_nl peer_; // Remote address
uint32_t seq_; // Counter used for generating unique sequence numbers
uint32_t dump_; // Number of expected message response
};
/// @brief defines a size of a sent netlink buffer
const static size_t SNDBUF_SIZE = 32768;
/// @brief defines a size of a received netlink buffer
const static size_t RCVBUF_SIZE = 32768;
/// @brief Opens netlink socket and initializes handle structure.
///
/// @throw isc::Unexpected Thrown if socket configuration fails.
void Netlink::rtnl_open_socket() {
fd_ = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (fd_ < 0) {
isc_throw(Unexpected, "Failed to create NETLINK socket.");
}
if (fcntl(fd_, F_SETFD, FD_CLOEXEC) < 0) {
isc_throw(Unexpected, "Failed to set close-on-exec in NETLINK socket.");
}
if (setsockopt(fd_, SOL_SOCKET, SO_SNDBUF, &SNDBUF_SIZE, sizeof(SNDBUF_SIZE)) < 0) {
isc_throw(Unexpected, "Failed to set send buffer in NETLINK socket.");
}
if (setsockopt(fd_, SOL_SOCKET, SO_RCVBUF, &RCVBUF_SIZE, sizeof(RCVBUF_SIZE)) < 0) {
isc_throw(Unexpected, "Failed to set receive buffer in NETLINK socket.");
}
local_.nl_family = AF_NETLINK;
local_.nl_groups = 0;
if (::bind(fd_, convertSockAddr(&local_), sizeof(local_)) < 0) {
isc_throw(Unexpected, "Failed to bind netlink socket.");
}
socklen_t addr_len = sizeof(local_);
if (getsockname(fd_, convertSockAddr(&local_), &addr_len) < 0) {
isc_throw(Unexpected, "Getsockname for netlink socket failed.");
}
// just 2 sanity checks and we are done
if ( (addr_len != sizeof(local_)) ||
(local_.nl_family != AF_NETLINK) ) {
isc_throw(Unexpected, "getsockname() returned unexpected data for netlink socket.");
}
}
/// @brief Closes netlink communication socket
void Netlink::rtnl_close_socket() {
if (fd_ != -1) {
close(fd_);
}
fd_ = -1;
}
/// @brief Sends request over NETLINK socket.
///
/// @param family requested information family.
/// @param type request type (RTM_GETLINK or RTM_GETADDR).
void Netlink::rtnl_send_request(int family, int type) {
struct Req {
nlmsghdr netlink_header;
rtgenmsg generic;
};
Req req; // we need this type named for offsetof() used in assert
struct sockaddr_nl nladdr;
// do a sanity check. Verify that Req structure is aligned properly
BOOST_STATIC_ASSERT(sizeof(nlmsghdr) == offsetof(Req, generic));
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
// According to netlink(7) manpage, mlmsg_seq must be set to a sequence
// number and is used to track messages. That is just a value that is
// opaque to kernel, and user-space code is supposed to use it to match
// incoming responses to sent requests. That is not really useful as we
// send a single request and get a single response at a time. However, we
// obey the man page suggestion and just set this to monotonically
// increasing numbers.
seq_++;
// This will be used to finding correct response (responses
// sent by kernel are supposed to have the same sequence number
// as the request we sent).
dump_ = seq_;
memset(&req, 0, sizeof(req));
req.netlink_header.nlmsg_len = sizeof(req);
req.netlink_header.nlmsg_type = type;
req.netlink_header.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
req.netlink_header.nlmsg_pid = 0;
req.netlink_header.nlmsg_seq = seq_;
req.generic.rtgen_family = family;
int status = sendto(fd_, static_cast<void*>(&req), sizeof(req), 0,
static_cast<struct sockaddr*>(static_cast<void*>(&nladdr)),
sizeof(nladdr));
if (status<0) {
isc_throw(Unexpected, "Failed to send " << sizeof(nladdr)
<< " bytes over netlink socket.");
}
}
/// @brief Appends nlmsg to a storage.
///
/// This method copies pointed nlmsg to a newly allocated memory
/// and adds it to storage.
///
/// @param storage A vector that holds pointers to netlink messages. The caller
/// is responsible for freeing the pointed-to messages.
/// @param msg A netlink message to be added.
void Netlink::rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg) {
// we need to make a copy of this message. We really can't allocate
// nlmsghdr directly as it is only part of the structure. There are
// many message types with varying lengths and a common header.
struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[msg->nlmsg_len]);
memcpy(copy, msg, msg->nlmsg_len);
// push_back copies only pointer content, not the pointed-to object.
storage.push_back(copy);
}
/// @brief Parses rtattr message.
///
/// Some netlink messages represent address information. Such messages
/// are concatenated collection of rtaddr structures. This function
/// iterates over that list and stores pointers to those messages in
/// flat array (table).
///
/// @param table rtattr Messages will be stored here
/// @param rta Pointer to first rtattr object
/// @param len Length (in bytes) of concatenated rtattr list.
void Netlink::parse_rtattr(RTattribPtrs& table, struct rtattr* rta, int len) {
std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
// RTA_OK and RTA_NEXT() are macros defined in linux/rtnetlink.h
// they are used to handle rtattributes. RTA_OK checks if the structure
// pointed by rta is reasonable and passes all sanity checks.
// RTA_NEXT() returns pointer to the next rtattr structure that
// immediately follows pointed rta structure. See aforementioned
// header for details.
while (RTA_OK(rta, len)) {
if (rta->rta_type < table.size()) {
table[rta->rta_type] = rta;
}
rta = RTA_NEXT(rta,len);
}
if (len) {
isc_throw(Unexpected, "Failed to parse RTATTR in netlink message.");
}
}
/// @brief Parses addr_info and appends appropriate addresses to Iface object.
///
/// Netlink is a fine, but convoluted interface. It returns a concatenated
/// collection of netlink messages. Some of those messages convey information
/// about addresses. Those messages are in fact appropriate header followed
/// by concatenated lists of rtattr structures that define various pieces
/// of address information.
///
/// @param iface interface representation (addresses will be added here)
/// @param addr_info collection of parsed netlink messages
void Netlink::ipaddrs_get(Iface& iface, NetlinkMessages& addr_info) {
uint8_t addr[V6ADDRESS_LEN];
RTattribPtrs rta_tb;
for (auto const& msg : addr_info) {
ifaddrmsg* ifa = static_cast<ifaddrmsg*>(NLMSG_DATA(msg));
// These are not the addresses you are looking for
if (ifa->ifa_index != iface.getIndex()) {
continue;
}
if ((ifa->ifa_family == AF_INET6) || (ifa->ifa_family == AF_INET)) {
std::fill(rta_tb.begin(), rta_tb.end(), static_cast<rtattr*>(NULL));
parse_rtattr(rta_tb, IFA_RTA(ifa), msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
if (!rta_tb[IFA_LOCAL]) {
rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
}
if (!rta_tb[IFA_ADDRESS]) {
rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
}
memcpy(addr, RTA_DATA(rta_tb[IFLA_ADDRESS]),
ifa->ifa_family==AF_INET?V4ADDRESS_LEN:V6ADDRESS_LEN);
IOAddress a = IOAddress::fromBytes(ifa->ifa_family, addr);
iface.addAddress(a);
/// TODO: Read lifetimes of configured IPv6 addresses
}
}
}
/// @brief Processes reply received over netlink socket.
///
/// This method parses the received buffer (a collection of concatenated
/// netlink messages), copies each received message to newly allocated
/// memory and stores pointers to it in the "info" container.
///
/// @param info received netlink messages will be stored here. It is the
/// caller's responsibility to release the memory associated with the
/// messages by calling the release_list() method.
void Netlink::rtnl_process_reply(NetlinkMessages& info) {
sockaddr_nl nladdr;
iovec iov;
msghdr msg;
memset(&msg, 0, sizeof(msghdr));
msg.msg_name = &nladdr;
msg.msg_namelen = sizeof(nladdr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
char buf[RCVBUF_SIZE];
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
while (true) {
int status = recvmsg(fd_, &msg, 0);
if (status < 0) {
if (errno == EINTR) {
continue;
}
isc_throw(Unexpected, "Error " << errno
<< " while processing reply from netlink socket.");
}
if (status == 0) {
isc_throw(Unexpected, "EOF while reading netlink socket.");
}
nlmsghdr* header = static_cast<nlmsghdr*>(static_cast<void*>(buf));
while (NLMSG_OK(header, status)) {
// Received a message not addressed to our process, or not
// with a sequence number we are expecting. Ignore, and
// look at the next one.
if (nladdr.nl_pid != 0 ||
header->nlmsg_pid != local_.nl_pid ||
header->nlmsg_seq != dump_) {
header = NLMSG_NEXT(header, status);
continue;
}
if (header->nlmsg_type == NLMSG_DONE) {
// End of message.
return;
}
if (header->nlmsg_type == NLMSG_ERROR) {
nlmsgerr* err = static_cast<nlmsgerr*>(NLMSG_DATA(header));
if (header->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
// We are really out of luck here. We can't even say what is
// wrong as error message is truncated. D'oh.
isc_throw(Unexpected, "Netlink reply read failed.");
} else {
isc_throw(Unexpected, "Netlink reply read error " << -err->error);
}
// Never happens we throw before we reach here
return;
}
// store the data
rtnl_store_reply(info, header);
header = NLMSG_NEXT(header, status);
}
if (msg.msg_flags & MSG_TRUNC) {
isc_throw(Unexpected, "Message received over netlink truncated.");
}
if (status) {
isc_throw(Unexpected, "Trailing garbage of " << status << " bytes received over netlink.");
}
}
}
/// @brief releases nlmsg structure
///
/// @param messages Set of messages to be freed.
void Netlink::release_list(NetlinkMessages& messages) {
// let's free local copies of stored messages
for (auto const& msg : messages) {
delete[] msg;
}
// and get rid of the message pointers as well
messages.clear();
}
} // end of anonymous namespace
namespace isc {
namespace dhcp {
/// @brief Detect available interfaces on Linux systems.
///
/// Uses the socket-based netlink protocol to retrieve the list of interfaces
/// from the Linux kernel.
void IfaceMgr::detectIfaces(bool update_only) {
if (detect_callback_) {
if (!detect_callback_(update_only)) {
return;
}
}
// Copies of netlink messages about links will be stored here.
Netlink::NetlinkMessages link_info;
// Copies of netlink messages about addresses will be stored here.
Netlink::NetlinkMessages addr_info;
// Socket descriptors and other rtnl-related parameters.
Netlink nl;
// Table with pointers to address attributes.
Netlink::RTattribPtrs attribs_table;
std::fill(attribs_table.begin(), attribs_table.end(),
static_cast<struct rtattr*>(NULL));
// Open socket
nl.rtnl_open_socket();
// Now we have open functional socket, let's use it!
// Ask for list of network interfaces...
nl.rtnl_send_request(AF_PACKET, RTM_GETLINK);
// Get reply and store it in link_info list:
// response is received as with any other socket - just a series
// of bytes. They are representing collection of netlink messages
// concatenated together. rtnl_process_reply will parse this
// buffer, copy each message to a newly allocated memory and
// store pointers to it in link_info. This allocated memory will
// be released later. See release_info(link_info) below.
nl.rtnl_process_reply(link_info);
// Now ask for list of addresses (AF_UNSPEC = of any family)
// Let's repeat, but this time ask for any addresses.
// That includes IPv4, IPv6 and any other address families that
// are happen to be supported by this system.
nl.rtnl_send_request(AF_UNSPEC, RTM_GETADDR);
// Get reply and store it in addr_info list.
// Again, we will allocate new memory and store messages in
// addr_info. It will be released later using release_info(addr_info).
nl.rtnl_process_reply(addr_info);
// Now build list with interface names
for (auto const& msg : link_info) {
// Required to display information about interface
struct ifinfomsg* interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(msg));
int len = msg->nlmsg_len;
len -= NLMSG_LENGTH(sizeof(*interface_info));
nl.parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
// valgrind reports *possible* memory leak in the line below, but it is
// bogus. Nevertheless, the whole interface definition has been split
// into three separate steps for easier debugging.
const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
string iface_name(tmp); // <--- bogus valgrind warning here
// This is guaranteed both by the if_nametoindex() implementation
// and by kernel dev_new_index() code. In fact 0 is impossible too...
if (interface_info->ifi_index < 0) {
isc_throw(OutOfRange, "negative interface index");
}
IfacePtr iface;
bool created = true;
if (update_only) {
iface = getIface(iface_name);
if (iface) {
created = false;
}
}
if (!iface) {
iface.reset(new Iface(iface_name, interface_info->ifi_index));
}
iface->setHWType(interface_info->ifi_type);
iface->setFlags(interface_info->ifi_flags);
// Does interface have LL_ADDR?
if (attribs_table[IFLA_ADDRESS]) {
iface->setMac(static_cast<const uint8_t*>(RTA_DATA(attribs_table[IFLA_ADDRESS])),
RTA_PAYLOAD(attribs_table[IFLA_ADDRESS]));
} else {
// Tunnels can have no LL_ADDR. RTA_PAYLOAD doesn't check it and
// try to dereference it in this manner
}
nl.ipaddrs_get(*iface, addr_info);
// addInterface can now throw so protect against memory leaks.
try {
if (created) {
addInterface(iface);
}
} catch (...) {
nl.release_list(link_info);
nl.release_list(addr_info);
throw;
}
}
nl.release_list(link_info);
nl.release_list(addr_info);
}
/// @brief sets flag_*_ fields.
///
/// This implementation is OS-specific as bits have different meaning
/// on different OSes.
///
/// @param flags flags bitfield read from OS
void Iface::setFlags(uint64_t flags) {
flags_ = flags;
flag_loopback_ = flags & IFF_LOOPBACK;
flag_up_ = flags & IFF_UP;
flag_running_ = flags & IFF_RUNNING;
flag_multicast_ = flags & IFF_MULTICAST;
flag_broadcast_ = flags & IFF_BROADCAST;
}
void
IfaceMgr::setMatchingPacketFilter(const bool direct_response_desired) {
if (direct_response_desired) {
setPacketFilter(PktFilterPtr(new PktFilterLPF()));
} else {
setPacketFilter(PktFilterPtr(new PktFilterInet()));
}
}
bool
IfaceMgr::openMulticastSocket(Iface& iface,
const isc::asiolink::IOAddress& addr,
const uint16_t port,
IfaceMgrErrorMsgCallback error_handler) {
// This variable will hold a descriptor of the socket bound to
// link-local address. It may be required for us to close this
// socket if an attempt to open and bind a socket to multicast
// address fails.
int sock;
try {
sock = openSocket(iface.getName(), addr, port, iface.flag_multicast_);
} catch (const Exception& ex) {
IFACEMGR_ERROR(SocketConfigError, error_handler, IfacePtr(),
"Failed to open link-local socket on "
"interface " << iface.getName() << ": "
<< ex.what());
return (false);
}
// In order to receive multicast traffic another socket is opened
// and bound to the multicast address.
/// @todo The DHCPv6 requires multicast so we may want to think
/// whether we want to open the socket on a multicast-incapable
/// interface or not. For now, we prefer to be liberal and allow
/// it for some odd use cases which may utilize non-multicast
/// interfaces. Perhaps a warning should be emitted if the
/// interface is not a multicast one.
if (iface.flag_multicast_) {
try {
openSocket(iface.getName(),
IOAddress(ALL_DHCP_RELAY_AGENTS_AND_SERVERS),
port);
} catch (const Exception& ex) {
// An attempt to open and bind a socket to multicast address
// has failed. We have to close the socket we previously
// bound to link-local address - this is everything or
// nothing strategy.
iface.delSocket(sock);
IFACEMGR_ERROR(SocketConfigError, error_handler, IfacePtr(),
"Failed to open multicast socket on"
" interface " << iface.getName()
<< ", reason: " << ex.what());
return (false);
}
}
// Both sockets have opened successfully.
return (true);
}
int
IfaceMgr::openSocket6(Iface& iface, const IOAddress& addr, uint16_t port,
const bool join_multicast) {
// Assuming that packet filter is not NULL, because its modifier checks it.
SocketInfo info = packet_filter6_->openSocket(iface, addr, port,
join_multicast);
iface.addSocket(info);
return (info.sockfd_);
}
} // end of isc::dhcp namespace
} // end of isc namespace
#endif // if defined(LINUX)
|