1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
|
#!@PYTHON@
# Copyright (C) 2010,2011 Internet Systems Consortium.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
# DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
# INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
"""
This file implements the Boss of Bind (BoB, or bob) program.
Its purpose is to start up the BIND 10 system, and then manage the
processes, by starting and stopping processes, plus restarting
processes that exit.
To start the system, it first runs the c-channel program (msgq), then
connects to that. It then runs the configuration manager, and reads
its own configuration. Then it proceeds to starting other modules.
The Python subprocess module is used for starting processes, but
because this is not efficient for managing groups of processes,
SIGCHLD signals are caught and processed using the signal module.
Most of the logic is contained in the BoB class. However, since Python
requires that signal processing happen in the main thread, we do
signal handling outside of that class, in the code running for
__main__.
"""
import sys; sys.path.append ('@@PYTHONPATH@@')
import os
# If B10_FROM_SOURCE is set in the environment, we use data files
# from a directory relative to that, otherwise we use the ones
# installed on the system
if "B10_FROM_SOURCE" in os.environ:
SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
else:
PREFIX = "@prefix@"
DATAROOTDIR = "@datarootdir@"
SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
import subprocess
import signal
import re
import errno
import time
import select
import random
import socket
from optparse import OptionParser, OptionValueError
import io
import pwd
import posix
import isc.cc
import isc.util.process
import isc.net.parse
import isc.log
from bind10_messages import *
import isc.bind10.sockcreator
isc.log.init("b10-boss")
logger = isc.log.Logger("boss")
# Pending system-wide debug level definitions, the ones we
# use here are hardcoded for now
DBG_PROCESS = 10
DBG_COMMANDS = 30
# Assign this process some longer name
isc.util.process.rename(sys.argv[0])
# This is the version that gets displayed to the user.
# The VERSION string consists of the module name, the module version
# number, and the overall BIND 10 version number (set in configure.ac).
VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
# This is for bind10.boottime of stats module
_BASETIME = time.gmtime()
class RestartSchedule:
"""
Keeps state when restarting something (in this case, a process).
When a process dies unexpectedly, we need to restart it. However, if
it fails to restart for some reason, then we should not simply keep
restarting it at high speed.
A more sophisticated algorithm can be developed, but for now we choose
a simple set of rules:
* If a process was been running for >=10 seconds, we restart it
right away.
* If a process was running for <10 seconds, we wait until 10 seconds
after it was started.
To avoid programs getting into lockstep, we use a normal distribution
to avoid being restarted at exactly 10 seconds."""
def __init__(self, restart_frequency=10.0):
self.restart_frequency = restart_frequency
self.run_start_time = None
self.run_stop_time = None
self.restart_time = None
def set_run_start_time(self, when=None):
if when is None:
when = time.time()
self.run_start_time = when
sigma = self.restart_frequency * 0.05
self.restart_time = when + random.normalvariate(self.restart_frequency,
sigma)
def set_run_stop_time(self, when=None):
"""We don't actually do anything with stop time now, but it
might be useful for future algorithms."""
if when is None:
when = time.time()
self.run_stop_time = when
def get_restart_time(self, when=None):
if when is None:
when = time.time()
return max(when, self.restart_time)
class ProcessInfoError(Exception): pass
class ProcessInfo:
"""Information about a process"""
dev_null = open(os.devnull, "w")
def __init__(self, name, args, env={}, dev_null_stdout=False,
dev_null_stderr=False, uid=None, username=None):
self.name = name
self.args = args
self.env = env
self.dev_null_stdout = dev_null_stdout
self.dev_null_stderr = dev_null_stderr
self.restart_schedule = RestartSchedule()
self.uid = uid
self.username = username
self.process = None
self.pid = None
def _preexec_work(self):
"""Function used before running a program that needs to run as a
different user."""
# First, put us into a separate process group so we don't get
# SIGINT signals on Ctrl-C (the boss will shut everthing down by
# other means).
os.setpgrp()
# Second, set the user ID if one has been specified
if self.uid is not None:
try:
posix.setuid(self.uid)
except OSError as e:
if e.errno == errno.EPERM:
# if we failed to change user due to permission report that
raise ProcessInfoError("Unable to change to user %s (uid %d)" % (self.username, self.uid))
else:
# otherwise simply re-raise whatever error we found
raise
def _spawn(self):
if self.dev_null_stdout:
spawn_stdout = self.dev_null
else:
spawn_stdout = None
if self.dev_null_stderr:
spawn_stderr = self.dev_null
else:
spawn_stderr = None
# Environment variables for the child process will be a copy of those
# of the boss process with any additional specific variables given
# on construction (self.env).
spawn_env = os.environ
spawn_env.update(self.env)
if 'B10_FROM_SOURCE' not in os.environ:
spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
self.process = subprocess.Popen(self.args,
stdin=subprocess.PIPE,
stdout=spawn_stdout,
stderr=spawn_stderr,
close_fds=True,
env=spawn_env,
preexec_fn=self._preexec_work)
self.pid = self.process.pid
self.restart_schedule.set_run_start_time()
# spawn() and respawn() are the same for now, but in the future they
# may have different functionality
def spawn(self):
self._spawn()
def respawn(self):
self._spawn()
class CChannelConnectError(Exception): pass
class BoB:
"""Boss of BIND class."""
def __init__(self, msgq_socket_file=None, data_path=None,
config_filename=None, nocache=False, verbose=False, setuid=None,
username=None, cmdctl_port=None, brittle=False):
"""
Initialize the Boss of BIND. This is a singleton (only one can run).
The msgq_socket_file specifies the UNIX domain socket file that the
msgq process listens on. If verbose is True, then the boss reports
what it is doing.
Data path and config filename are passed trough to config manager
(if provided) and specify the config file to be used.
The cmdctl_port is passed to cmdctl and specify on which port it
should listen.
"""
self.cc_session = None
self.ccs = None
self.cfg_start_auth = True
self.cfg_start_resolver = False
self.cfg_start_dhcp6 = False
self.cfg_start_dhcp4 = False
self.started_auth_family = False
self.started_resolver_family = False
self.curproc = None
self.dead_processes = {}
self.msgq_socket_file = msgq_socket_file
self.nocache = nocache
self.processes = {}
self.expected_shutdowns = {}
self.runnable = False
self.uid = setuid
self.username = username
self.verbose = verbose
self.data_path = data_path
self.config_filename = config_filename
self.cmdctl_port = cmdctl_port
self.brittle = brittle
self.sockcreator = None
def config_handler(self, new_config):
# If this is initial update, don't do anything now, leave it to startup
if not self.runnable:
return
# Now we declare few functions used only internally here. Besides the
# benefit of not polluting the name space, they are closures, so we
# don't need to pass some variables
def start_stop(name, started, start, stop):
if not'start_' + name in new_config:
return
if new_config['start_' + name]:
if not started:
if self.uid is not None:
logger.info(BIND10_START_AS_NON_ROOT, name)
start()
else:
stop()
# These four functions are passed to start_stop (smells like functional
# programming little bit)
def resolver_on():
self.start_resolver(self.c_channel_env)
self.started_resolver_family = True
def resolver_off():
self.stop_resolver()
self.started_resolver_family = False
def auth_on():
self.start_auth(self.c_channel_env)
self.start_xfrout(self.c_channel_env)
self.start_xfrin(self.c_channel_env)
self.start_zonemgr(self.c_channel_env)
self.started_auth_family = True
def auth_off():
self.stop_zonemgr()
self.stop_xfrin()
self.stop_xfrout()
self.stop_auth()
self.started_auth_family = False
# The real code of the config handler function follows here
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
new_config)
start_stop('resolver', self.started_resolver_family, resolver_on,
resolver_off)
start_stop('auth', self.started_auth_family, auth_on, auth_off)
answer = isc.config.ccsession.create_answer(0)
return answer
def get_processes(self):
pids = list(self.processes.keys())
pids.sort()
process_list = [ ]
for pid in pids:
process_list.append([pid, self.processes[pid].name])
return process_list
def _get_stats_data(self):
return { "stats_data": {
'bind10.boot_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
}}
def command_handler(self, command, args):
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
answer = isc.config.ccsession.create_answer(1, "command not implemented")
if type(command) != str:
answer = isc.config.ccsession.create_answer(1, "bad command")
else:
if command == "shutdown":
self.runnable = False
answer = isc.config.ccsession.create_answer(0)
elif command == "getstats":
answer = isc.config.ccsession.create_answer(0, self._get_stats_data())
elif command == "sendstats":
# send statistics data to the stats daemon immediately
cmd = isc.config.ccsession.create_command(
'set', self._get_stats_data())
seq = self.cc_session.group_sendmsg(cmd, 'Stats')
# Consume the answer, in case it becomes a orphan message.
try:
self.cc_session.group_recvmsg(False, seq)
except isc.cc.session.SessionTimeout:
pass
answer = isc.config.ccsession.create_answer(0)
elif command == "ping":
answer = isc.config.ccsession.create_answer(0, "pong")
elif command == "show_processes":
answer = isc.config.ccsession. \
create_answer(0, self.get_processes())
else:
answer = isc.config.ccsession.create_answer(1,
"Unknown command")
return answer
def start_creator(self):
self.curproc = 'b10-sockcreator'
self.sockcreator = isc.bind10.sockcreator.Creator("@@LIBEXECDIR@@:" +
os.environ['PATH'])
def stop_creator(self, kill=False):
if self.sockcreator is None:
return
if kill:
self.sockcreator.kill()
else:
self.sockcreator.terminate()
self.sockcreator = None
def kill_started_processes(self):
"""
Called as part of the exception handling when a process fails to
start, this runs through the list of started processes, killing
each one. It then clears that list.
"""
logger.info(BIND10_KILLING_ALL_PROCESSES)
self.stop_creator(True)
for pid in self.processes:
logger.info(BIND10_KILL_PROCESS, self.processes[pid].name)
self.processes[pid].process.kill()
self.processes = {}
def read_bind10_config(self):
"""
Reads the parameters associated with the BoB module itself.
At present these are the components to start although arguably this
information should be in the configuration for the appropriate
module itself. (However, this would cause difficulty in the case of
xfrin/xfrout and zone manager as we don't need to start those if we
are not running the authoritative server.)
"""
logger.info(BIND10_READING_BOSS_CONFIGURATION)
config_data = self.ccs.get_full_config()
self.cfg_start_auth = config_data.get("start_auth")
self.cfg_start_resolver = config_data.get("start_resolver")
logger.info(BIND10_CONFIGURATION_START_AUTH, self.cfg_start_auth)
logger.info(BIND10_CONFIGURATION_START_RESOLVER, self.cfg_start_resolver)
def log_starting(self, process, port = None, address = None):
"""
A convenience function to output a "Starting xxx" message if the
logging is set to DEBUG with debuglevel DBG_PROCESS or higher.
Putting this into a separate method ensures
that the output form is consistent across all processes.
The process name (passed as the first argument) is put into
self.curproc, and is used to indicate which process failed to
start if there is an error (and is used in the "Started" message
on success). The optional port and address information are
appended to the message (if present).
"""
self.curproc = process
if port is None and address is None:
logger.info(BIND10_STARTING_PROCESS, self.curproc)
elif address is None:
logger.info(BIND10_STARTING_PROCESS_PORT, self.curproc,
port)
else:
logger.info(BIND10_STARTING_PROCESS_PORT_ADDRESS,
self.curproc, address, port)
def log_started(self, pid = None):
"""
A convenience function to output a 'Started xxxx (PID yyyy)'
message. As with starting_message(), this ensures a consistent
format.
"""
if pid is None:
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
else:
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
# The next few methods start the individual processes of BIND-10. They
# are called via start_all_processes(). If any fail, an exception is
# raised which is caught by the caller of start_all_processes(); this kills
# processes started up to that point before terminating the program.
def start_msgq(self, c_channel_env):
"""
Start the message queue and connect to the command channel.
"""
self.log_starting("b10-msgq")
c_channel = ProcessInfo("b10-msgq", ["b10-msgq"], c_channel_env,
True, not self.verbose, uid=self.uid,
username=self.username)
c_channel.spawn()
self.processes[c_channel.pid] = c_channel
self.log_started(c_channel.pid)
# Now connect to the c-channel
cc_connect_start = time.time()
while self.cc_session is None:
# if we have been trying for "a while" give up
if (time.time() - cc_connect_start) > 5:
raise CChannelConnectError("Unable to connect to c-channel after 5 seconds")
# try to connect, and if we can't wait a short while
try:
self.cc_session = isc.cc.Session(self.msgq_socket_file)
except isc.cc.session.SessionError:
time.sleep(0.1)
def start_cfgmgr(self, c_channel_env):
"""
Starts the configuration manager process
"""
self.log_starting("b10-cfgmgr")
args = ["b10-cfgmgr"]
if self.data_path is not None:
args.append("--data-path=" + self.data_path)
if self.config_filename is not None:
args.append("--config-filename=" + self.config_filename)
bind_cfgd = ProcessInfo("b10-cfgmgr", args,
c_channel_env, uid=self.uid,
username=self.username)
bind_cfgd.spawn()
self.processes[bind_cfgd.pid] = bind_cfgd
self.log_started(bind_cfgd.pid)
# sleep until b10-cfgmgr is fully up and running, this is a good place
# to have a (short) timeout on synchronized groupsend/receive
# TODO: replace the sleep by a listen for ConfigManager started
# message
time.sleep(1)
def start_ccsession(self, c_channel_env):
"""
Start the CC Session
The argument c_channel_env is unused but is supplied to keep the
argument list the same for all start_xxx methods.
"""
self.log_starting("ccsession")
self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
self.config_handler,
self.command_handler)
self.ccs.start()
self.log_started()
# A couple of utility methods for starting processes...
def start_process(self, name, args, c_channel_env, port=None, address=None):
"""
Given a set of command arguments, start the process and output
appropriate log messages. If the start is successful, the process
is added to the list of started processes.
The port and address arguments are for log messages only.
"""
self.log_starting(name, port, address)
newproc = ProcessInfo(name, args, c_channel_env)
newproc.spawn()
self.processes[newproc.pid] = newproc
self.log_started(newproc.pid)
def start_simple(self, name, c_channel_env, port=None, address=None):
"""
Most of the BIND-10 processes are started with the command:
<process-name> [-v]
... where -v is appended if verbose is enabled. This method
generates the arguments from the name and starts the process.
The port and address arguments are for log messages only.
"""
# Set up the command arguments.
args = [name]
if self.verbose:
args += ['-v']
# ... and start the process
self.start_process(name, args, c_channel_env, port, address)
# The next few methods start up the rest of the BIND-10 processes.
# Although many of these methods are little more than a call to
# start_simple, they are retained (a) for testing reasons and (b) as a place
# where modifications can be made if the process start-up sequence changes
# for a given process.
def start_auth(self, c_channel_env):
"""
Start the Authoritative server
"""
authargs = ['b10-auth']
if self.nocache:
authargs += ['-n']
if self.uid:
authargs += ['-u', str(self.uid)]
if self.verbose:
authargs += ['-v']
# ... and start
self.start_process("b10-auth", authargs, c_channel_env)
def start_resolver(self, c_channel_env):
"""
Start the Resolver. At present, all these arguments and switches
are pure speculation. As with the auth daemon, they should be
read from the configuration database.
"""
self.curproc = "b10-resolver"
# XXX: this must be read from the configuration manager in the future
resargs = ['b10-resolver']
if self.uid:
resargs += ['-u', str(self.uid)]
if self.verbose:
resargs += ['-v']
# ... and start
self.start_process("b10-resolver", resargs, c_channel_env)
def start_xfrout(self, c_channel_env):
self.start_simple("b10-xfrout", c_channel_env)
def start_xfrin(self, c_channel_env):
self.start_simple("b10-xfrin", c_channel_env)
def start_zonemgr(self, c_channel_env):
self.start_simple("b10-zonemgr", c_channel_env)
def start_stats(self, c_channel_env):
self.start_simple("b10-stats", c_channel_env)
def start_stats_httpd(self, c_channel_env):
self.start_simple("b10-stats-httpd", c_channel_env)
def start_dhcp6(self, c_channel_env):
self.start_simple("b10-dhcp6", c_channel_env)
def start_cmdctl(self, c_channel_env):
"""
Starts the command control process
"""
args = ["b10-cmdctl"]
if self.cmdctl_port is not None:
args.append("--port=" + str(self.cmdctl_port))
self.start_process("b10-cmdctl", args, c_channel_env, self.cmdctl_port)
def start_all_processes(self):
"""
Starts up all the processes. Any exception generated during the
starting of the processes is handled by the caller.
"""
# The socket creator first, as it is the only thing that needs root
self.start_creator()
# TODO: Once everything uses the socket creator, we can drop root
# privileges right now
c_channel_env = self.c_channel_env
self.start_msgq(c_channel_env)
self.start_cfgmgr(c_channel_env)
self.start_ccsession(c_channel_env)
# Extract the parameters associated with Bob. This can only be
# done after the CC Session is started.
self.read_bind10_config()
# Continue starting the processes. The authoritative server (if
# selected):
if self.cfg_start_auth:
self.start_auth(c_channel_env)
# ... and resolver (if selected):
if self.cfg_start_resolver:
self.start_resolver(c_channel_env)
self.started_resolver_family = True
# Everything after the main components can run as non-root.
# TODO: this is only temporary - once the privileged socket creator is
# fully working, nothing else will run as root.
if self.uid is not None:
posix.setuid(self.uid)
# xfrin/xfrout and the zone manager are only meaningful if the
# authoritative server has been started.
if self.cfg_start_auth:
self.start_xfrout(c_channel_env)
self.start_xfrin(c_channel_env)
self.start_zonemgr(c_channel_env)
self.started_auth_family = True
# ... and finally start the remaining processes
self.start_stats(c_channel_env)
self.start_stats_httpd(c_channel_env)
self.start_cmdctl(c_channel_env)
if self.cfg_start_dhcp6:
self.start_dhcp6(c_channel_env)
def startup(self):
"""
Start the BoB instance.
Returns None if successful, otherwise an string describing the
problem.
"""
# Try to connect to the c-channel daemon, to see if it is already
# running
c_channel_env = {}
if self.msgq_socket_file is not None:
c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
# try to connect, and if we can't wait a short while
try:
self.cc_session = isc.cc.Session(self.msgq_socket_file)
logger.fatal(BIND10_MSGQ_ALREADY_RUNNING)
return "b10-msgq already running, or socket file not cleaned , cannot start"
except isc.cc.session.SessionError:
# this is the case we want, where the msgq is not running
pass
# Start all processes. If any one fails to start, kill all started
# processes and exit with an error indication.
try:
self.c_channel_env = c_channel_env
self.start_all_processes()
except Exception as e:
self.kill_started_processes()
return "Unable to start " + self.curproc + ": " + str(e)
# Started successfully
self.runnable = True
return None
def stop_all_processes(self):
"""Stop all processes."""
cmd = { "command": ['shutdown']}
self.cc_session.group_sendmsg(cmd, 'Cmdctl', 'Cmdctl')
self.cc_session.group_sendmsg(cmd, "ConfigManager", "ConfigManager")
self.cc_session.group_sendmsg(cmd, "Auth", "Auth")
self.cc_session.group_sendmsg(cmd, "Resolver", "Resolver")
self.cc_session.group_sendmsg(cmd, "Xfrout", "Xfrout")
self.cc_session.group_sendmsg(cmd, "Xfrin", "Xfrin")
self.cc_session.group_sendmsg(cmd, "Zonemgr", "Zonemgr")
self.cc_session.group_sendmsg(cmd, "Stats", "Stats")
self.cc_session.group_sendmsg(cmd, "StatsHttpd", "StatsHttpd")
# Terminate the creator last
self.stop_creator()
def stop_process(self, process, recipient):
"""
Stop the given process, friendly-like. The process is the name it has
(in logs, etc), the recipient is the address on msgq.
"""
logger.info(BIND10_STOP_PROCESS, process)
# TODO: Some timeout to solve processes that don't want to die would
# help. We can even store it in the dict, it is used only as a set
self.expected_shutdowns[process] = 1
# Ask the process to die willingly
self.cc_session.group_sendmsg({'command': ['shutdown']}, recipient,
recipient)
# Series of stop_process wrappers
def stop_resolver(self):
self.stop_process('b10-resolver', 'Resolver')
def stop_auth(self):
self.stop_process('b10-auth', 'Auth')
def stop_xfrout(self):
self.stop_process('b10-xfrout', 'Xfrout')
def stop_xfrin(self):
self.stop_process('b10-xfrin', 'Xfrin')
def stop_zonemgr(self):
self.stop_process('b10-zonemgr', 'Zonemgr')
def shutdown(self):
"""Stop the BoB instance."""
logger.info(BIND10_SHUTDOWN)
# first try using the BIND 10 request to stop
try:
self.stop_all_processes()
except:
pass
# XXX: some delay probably useful... how much is uncertain
# I have changed the delay from 0.5 to 1, but sometime it's
# still not enough.
time.sleep(1)
self.reap_children()
# next try sending a SIGTERM
processes_to_stop = list(self.processes.values())
for proc_info in processes_to_stop:
logger.info(BIND10_SEND_SIGTERM, proc_info.name,
proc_info.pid)
try:
proc_info.process.terminate()
except OSError:
# ignore these (usually ESRCH because the child
# finally exited)
pass
# finally, send SIGKILL (unmaskable termination) until everybody dies
while self.processes:
# XXX: some delay probably useful... how much is uncertain
time.sleep(0.1)
self.reap_children()
processes_to_stop = list(self.processes.values())
for proc_info in processes_to_stop:
logger.info(BIND10_SEND_SIGKILL, proc_info.name,
proc_info.pid)
try:
proc_info.process.kill()
except OSError:
# ignore these (usually ESRCH because the child
# finally exited)
pass
logger.info(BIND10_SHUTDOWN_COMPLETE)
def _get_process_exit_status(self):
return os.waitpid(-1, os.WNOHANG)
def reap_children(self):
"""Check to see if any of our child processes have exited,
and note this for later handling.
"""
while True:
try:
(pid, exit_status) = self._get_process_exit_status()
except OSError as o:
if o.errno == errno.ECHILD: break
# XXX: should be impossible to get any other error here
raise
if pid == 0: break
if self.sockcreator is not None and self.sockcreator.pid() == pid:
# This is the socket creator, started and terminated
# differently. This can't be restarted.
if self.runnable:
logger.fatal(BIND10_SOCKCREATOR_CRASHED)
self.sockcreator = None
self.runnable = False
elif pid in self.processes:
# One of the processes we know about. Get information on it.
proc_info = self.processes.pop(pid)
proc_info.restart_schedule.set_run_stop_time()
self.dead_processes[proc_info.pid] = proc_info
# Write out message, but only if in the running state:
# During startup and shutdown, these messages are handled
# elsewhere.
if self.runnable:
if exit_status is None:
logger.warn(BIND10_PROCESS_ENDED_NO_EXIT_STATUS,
proc_info.name, proc_info.pid)
else:
logger.warn(BIND10_PROCESS_ENDED_WITH_EXIT_STATUS,
proc_info.name, proc_info.pid,
exit_status)
# Was it a special process?
if proc_info.name == "b10-msgq":
logger.fatal(BIND10_MSGQ_DAEMON_ENDED)
self.runnable = False
# If we're in 'brittle' mode, we want to shutdown after
# any process dies.
if self.brittle:
self.runnable = False
else:
logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
def restart_processes(self):
"""
Restart any dead processes:
* Returns the time when the next process is ready to be restarted.
* If the server is shutting down, returns 0.
* If there are no processes, returns None.
The values returned can be safely passed into select() as the
timeout value.
"""
next_restart = None
# if we're shutting down, then don't restart
if not self.runnable:
return 0
# otherwise look through each dead process and try to restart
still_dead = {}
now = time.time()
for proc_info in self.dead_processes.values():
if proc_info.name in self.expected_shutdowns:
# We don't restart, we wanted it to die
del self.expected_shutdowns[proc_info.name]
continue
restart_time = proc_info.restart_schedule.get_restart_time(now)
if restart_time > now:
if (next_restart is None) or (next_restart > restart_time):
next_restart = restart_time
still_dead[proc_info.pid] = proc_info
else:
logger.info(BIND10_RESURRECTING_PROCESS, proc_info.name)
try:
proc_info.respawn()
self.processes[proc_info.pid] = proc_info
logger.info(BIND10_RESURRECTED_PROCESS, proc_info.name, proc_info.pid)
except:
still_dead[proc_info.pid] = proc_info
# remember any processes that refuse to be resurrected
self.dead_processes = still_dead
# return the time when the next process is ready to be restarted
return next_restart
# global variables, needed for signal handlers
options = None
boss_of_bind = None
def reaper(signal_number, stack_frame):
"""A child process has died (SIGCHLD received)."""
# don't do anything...
# the Python signal handler has been set up to write
# down a pipe, waking up our select() bit
pass
def get_signame(signal_number):
"""Return the symbolic name for a signal."""
for sig in dir(signal):
if sig.startswith("SIG") and sig[3].isalnum():
if getattr(signal, sig) == signal_number:
return sig
return "Unknown signal %d" % signal_number
# XXX: perhaps register atexit() function and invoke that instead
def fatal_signal(signal_number, stack_frame):
"""We need to exit (SIGINT or SIGTERM received)."""
global options
global boss_of_bind
logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
boss_of_bind.runnable = False
def process_rename(option, opt_str, value, parser):
"""Function that renames the process if it is requested by a option."""
isc.util.process.rename(value)
def parse_args(args=sys.argv[1:], Parser=OptionParser):
"""
Function for parsing command line arguments. Returns the
options object from OptionParser.
"""
parser = Parser(version=VERSION)
parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
type="string", default=None,
help="UNIX domain socket file the b10-msgq daemon will use")
parser.add_option("-n", "--no-cache", action="store_true", dest="nocache",
default=False, help="disable hot-spot cache in authoritative DNS server")
parser.add_option("-u", "--user", dest="user", type="string", default=None,
help="Change user after startup (must run as root)")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
help="display more about what is going on")
parser.add_option("--pretty-name", type="string", action="callback",
callback=process_rename,
help="Set the process name (displayed in ps, top, ...)")
parser.add_option("-c", "--config-file", action="store",
dest="config_file", default=None,
help="Configuration database filename")
parser.add_option("-p", "--data-path", dest="data_path",
help="Directory to search for configuration files",
default=None)
parser.add_option("--cmdctl-port", dest="cmdctl_port", type="int",
default=None, help="Port of command control")
parser.add_option("--pid-file", dest="pid_file", type="string",
default=None,
help="file to dump the PID of the BIND 10 process")
parser.add_option("--brittle", dest="brittle", action="store_true",
help="debugging flag: exit if any component dies")
(options, args) = parser.parse_args(args)
if options.cmdctl_port is not None:
try:
isc.net.parse.port_parse(options.cmdctl_port)
except ValueError as e:
parser.error(e)
if args:
parser.print_help()
sys.exit(1)
return options
def dump_pid(pid_file):
"""
Dump the PID of the current process to the specified file. If the given
file is None this function does nothing. If the file already exists,
the existing content will be removed. If a system error happens in
creating or writing to the file, the corresponding exception will be
propagated to the caller.
"""
if pid_file is None:
return
f = open(pid_file, "w")
f.write('%d\n' % os.getpid())
f.close()
def unlink_pid_file(pid_file):
"""
Remove the given file, which is basically expected to be the PID file
created by dump_pid(). The specified may or may not exist; if it
doesn't this function does nothing. Other system level errors in removing
the file will be propagated as the corresponding exception.
"""
if pid_file is None:
return
try:
os.unlink(pid_file)
except OSError as error:
if error.errno is not errno.ENOENT:
raise
def main():
global options
global boss_of_bind
# Enforce line buffering on stdout, even when not a TTY
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)
options = parse_args()
# Check user ID.
setuid = None
username = None
if options.user:
# Try getting information about the user, assuming UID passed.
try:
pw_ent = pwd.getpwuid(int(options.user))
setuid = pw_ent.pw_uid
username = pw_ent.pw_name
except ValueError:
pass
except KeyError:
pass
# Next try getting information about the user, assuming user name
# passed.
# If the information is both a valid user name and user number, we
# prefer the name because we try it second. A minor point, hopefully.
try:
pw_ent = pwd.getpwnam(options.user)
setuid = pw_ent.pw_uid
username = pw_ent.pw_name
except KeyError:
pass
if setuid is None:
logger.fatal(BIND10_INVALID_USER, options.user)
sys.exit(1)
# Announce startup.
logger.info(BIND10_STARTING, VERSION)
# Create wakeup pipe for signal handlers
wakeup_pipe = os.pipe()
signal.set_wakeup_fd(wakeup_pipe[1])
# Set signal handlers for catching child termination, as well
# as our own demise.
signal.signal(signal.SIGCHLD, reaper)
signal.siginterrupt(signal.SIGCHLD, False)
signal.signal(signal.SIGINT, fatal_signal)
signal.signal(signal.SIGTERM, fatal_signal)
# Block SIGPIPE, as we don't want it to end this process
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
# Go bob!
boss_of_bind = BoB(options.msgq_socket_file, options.data_path,
options.config_file, options.nocache, options.verbose,
setuid, username, options.cmdctl_port, options.brittle)
startup_result = boss_of_bind.startup()
if startup_result:
logger.fatal(BIND10_STARTUP_ERROR, startup_result)
sys.exit(1)
logger.info(BIND10_STARTUP_COMPLETE)
dump_pid(options.pid_file)
# In our main loop, we check for dead processes or messages
# on the c-channel.
wakeup_fd = wakeup_pipe[0]
ccs_fd = boss_of_bind.ccs.get_socket().fileno()
while boss_of_bind.runnable:
# clean up any processes that exited
boss_of_bind.reap_children()
next_restart = boss_of_bind.restart_processes()
if next_restart is None:
wait_time = None
else:
wait_time = max(next_restart - time.time(), 0)
# select() can raise EINTR when a signal arrives,
# even if they are resumable, so we have to catch
# the exception
try:
(rlist, wlist, xlist) = select.select([wakeup_fd, ccs_fd], [], [],
wait_time)
except select.error as err:
if err.args[0] == errno.EINTR:
(rlist, wlist, xlist) = ([], [], [])
else:
logger.fatal(BIND10_SELECT_ERROR, err)
break
for fd in rlist + xlist:
if fd == ccs_fd:
try:
boss_of_bind.ccs.check_command()
except isc.cc.session.ProtocolError:
logger.fatal(BIND10_MSGQ_DISAPPEARED)
self.runnable = False
break
elif fd == wakeup_fd:
os.read(wakeup_fd, 32)
# shutdown
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
boss_of_bind.shutdown()
unlink_pid_file(options.pid_file)
sys.exit(0)
if __name__ == "__main__":
main()
|