summaryrefslogtreecommitdiffstats
path: root/doc/guide/hooks-ha.xml
blob: 5946353b1859d0e1b9ea5f9a59a16169f836a447 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
<!--
 - Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC")
 -
 - This Source Code Form is subject to the terms of the Mozilla Public
 - License, v. 2.0. If a copy of the MPL was not distributed with this
 - file, you can obtain one at http://mozilla.org/MPL/2.0/.
-->

    <section xml:id="high-availability-library">
      <title>ha: High Availability</title>
      <para>
        This section describes the High Availability hooks library, which can be
        loaded on a pair of DHCPv4 or DHCPv6 servers to increase the reliability of
        the DHCP service in the event of an outage of one of the servers. This library
        was previously only available to ISC's paid subscribers, but is now part of
        the open source Kea, available to all users.

        <note>
          <para>This library may only be loaded by the <command>kea-dhcp4</command>
          or the <command>kea-dhcp6</command> process.
          </para>
        </note>
      </para>
      <para>
        High Availability (HA) of the DHCP service is provided by running multiple
        cooperating server instances. If any of these instances becomes
        unavailable for any reason (DHCP software crash, Control Agent
        software crash, power outage, hardware failure), a surviving
        server instance can continue providing reliable service to clients. Many
        DHCP server implementations include the "DHCP Failover" protocol, whose most
        significant features are communication between the servers, partner
        failure detection, and lease synchronization between the servers.
        However, the DHCPv4 failover standardization process was never completed
        by the IETF. The DHCPv6 failover standard (RFC 8156) was published, but it
        is complex, difficult to use, has significant operational constraints,
        and is different than its v4 counterpart.
        Although it may be useful for some users to use a "standard" failover
        protocol, it seems that most Kea users are simply interested in
        a working solution which guarantees high availability of the DHCP
        service. Therefore, the Kea HA hook library derives major concepts from the
        DHCP Failover protocol but uses its own solutions for communication and
        configuration. It offers its own state machine, which greatly simplifies its
        implementation and generally fits better into Kea, and it provides the
        same features in both DHCPv4 and DHCPv6. This document intentionally
        uses the term "High Availability" rather than "Failover" to emphasize that
        it is not the Failover protocol implementation.
      </para>
      <para>
        The following sections describe the configuration and operation of the Kea
        HA hook library.
      </para>

      <section>
        <title>Supported Configurations</title>
        <para>The Kea HA hook library supports two configurations, also known as HA
        modes: load balancing and hot standby. In the load-balancing mode,
        two servers respond to DHCP requests. The load-balancing function
        is implemented as described in <link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://tools.ietf.org/html/rfc3074">RFC 3074</link>, with each server responding to
        half the received DHCP queries. When one of the servers allocates a lease
        for a client, it notifies the partner server over the control channel
        (RESTful API), so the partner can save the lease information in its
        own database. If the communication with the partner is unsuccessful,
        the DHCP query is dropped and the response is not returned to the DHCP
        client. If the lease update is successful, the response is returned to
        the DHCP client by the server which has allocated the lease. By
        exchanging lease updates, both servers get a copy of all leases
        allocated by the entire HA setup, and either server can be switched
        to handle the entire DHCP traffic if its partner becomes unavailable.</para>

        <para>In the load-balancing configuration, one of the servers must be
        designated as "primary" and the other as "secondary."
        Functionally, there is no difference between the two during normal
        operation. This distinction is required when the two servers are
        started at (nearly) the same time and have to synchronize their
        lease databases. The primary server synchronizes the database first.
        The secondary server waits for the primary server to complete the
        lease database synchronization before it starts the synchronization.
        </para>

        <para>In the hot-standby configuration, one of the servers is also designated as
        "primary" and the second as "secondary". However, during
        normal operation, the primary server is the only one that responds to
        DHCP requests. The secondary or standby server receives lease updates from the
        primary over the control channel; however, it does not respond to any
        DHCP queries as long as the primary is running or, more accurately,
        until the secondary considers the primary to be offline. If the
        secondary server detects the failure of the primary, it starts
        responding to all DHCP queries.
        </para>

        <para>In the configurations described above, the primary, secondary, and
        standby are referred to as "active" servers, because they receive
        lease updates and can automatically react to the partner's failures by
        responding to the DHCP queries which would normally be handled by the
        partner. The HA hook library supports another server type/role:
        backup server. The use of a backup server is optional. They can be used
        in both load balancing and hot standby setup, in addition to the active
        servers. There is no limit on the number of backup servers in the HA
        setup; however, the presence of backup servers increases the latency
        of DHCP responses, because not only do active servers send lease
        updates to each other, but also to the backup servers.
        </para>
      </section>

      <section>
        <title>Clocks on Active Servers</title>
        <para>Synchronized clocks are essential for the HA setup to operate
        reliably. The servers share lease information via lease updates and
        during synchronization of the databases. The lease information includes
        the time when the lease has been allocated and when it expires. Some
        clock skew between the servers participating in the HA setup usually
        exists; this is acceptable as long as the clock skew is relatively low,
        compared to the lease lifetimes. However, if the clock skew becomes too
        high, the different lease expiration times on different
        servers may cause the HA system to malfunction. For example, one server
        may consider a lease to be expired when it is actually still valid. The lease
        reclamation process may remove a name associated with this lease from
        the DNS, causing problems when the client later attempts to renew the lease.</para>

        <para>Each active server monitors the clock skew by comparing its current
        time with the time returned by its partner in response to the heartbeat
        command. This gives a good approximation of the clock skew, although it
        doesn't take into account the time between sending the response by the
        partner and receiving this response by the server which sent the
        heartbeat command. If the clock skew exceeds 30 seconds, a warning log
        message is issued. The administrator may correct this problem by
        synchronizing the clocks (e.g. using NTP); the servers should notice
        the clock skew correction and stop issuing the warning</para>

        <para>If the clock skew is not corrected and exceeds 60 seconds, the
        HA service on each of the servers is terminated, i.e. the state
        machine enters the <command>terminated</command> state. The servers
        will continue to respond to DHCP clients (as in the load-balancing
        or hot-standby mode), but will exchange neither lease updates nor
        heartbeats and their lease databases will diverge. In this case, the
        administrator should synchronize the clocks and restart the servers.
        </para>
      </section>

      <section xml:id="ha-server-states">
        <title>Server States</title>
        <para id="command-ha-heartbeat">A DHCP server operating
        within an HA setup runs a state machine,
        and the state of the server can be retrieved by its peers using the
        <command>ha-heartbeat</command> command sent over the RESTful API. If
        the partner server doesn't respond to the <command>ha-heartbeat</command>
        command within the specified amount of time, the communication is
        considered interrupted and the server may (depending on the configuration)
        use additional measures (described later in this document) to verify that
        the partner is still operating. If it finds that the partner is not
        operating, the server transitions to the <command>partner-down</command>
        state to handle the entire DHCP traffic directed to the system.</para>

        <para>In this case, the surviving server continues to send the
        <command>ha-heartbeat</command> command to detect when the partner wakes
        up. At that time, the partner synchronizes the lease database and when it is again
        ready to operate, the surviving server returns to normal operation,
        i.e. the <command>load-balancing</command> or <command>hot-standby</command>
        state.</para>

        <para>The following is the list of all possible server states:

        <itemizedlist mark="bullet">
          <listitem><para><command>backup</command> - normal operation of the
          backup server. In this state it receives lease updates from the active
          servers.</para></listitem>

          <listitem><para><command>hot-standby</command> - normal operation of
          the active server running in the hot-standby mode; both the primary and
          the standby server are in this state during their normal operation.
          The primary server responds to DHCP queries and sends lease updates
          to the standby server and to any backup servers that
          are present.</para></listitem>

          <listitem><para><command>load-balancing</command> - normal operation
          of the active server running in the load-balancing mode; both the primary
          and the secondary server are in this state during their normal operation.
          Both servers respond to DHCP queries and send lease updates
          to each other and to any backup servers that are
          present.</para></listitem>

          <listitem><para><command>partner-down</command> - an active server
          transitions to this state after detecting that its partner (another
          active server) is offline. The server does not transition to this state
          if only a backup server is unavailable. In the <command>
          partner-down</command> state the active server responds to all DHCP queries,
          including those queries which are normally handled by the server
          that is now unavailable.</para></listitem>

          <listitem><para><command>ready</command> - an active server transitions
          to this state after synchronizing its lease database with an active
          partner. This state indicates to the partner - which may be in the
          <command>partner-down</command> state - that it should return to
          normal operation. If and when it does, the server in the <command>
          ready</command> state will also start normal operation.</para>
          </listitem>

          <listitem><para><command>syncing</command> - an active server
          transitions to this state to fetch leases from the active partner
          and update the local lease database. When in this state, the server
          issues the <command>dhcp-disable</command> command to disable the DHCP
          service of the partner from which the leases are fetched. The DHCP
          service is disabled for the maximum time of 60 seconds, after which
          it is automatically re-enabled, in case the syncing partner was unable
          to re-enable the service. If the synchronization is
          completed, the syncing server issues the <command>dhcp-enable
          </command> command to re-enable the DHCP service of its partner. The
          syncing operation is synchronous; the server waits for an
          answer from the partner and does nothing else while the
          lease synchronization takes place. A server that is configured
          not to synchronize the lease database with its partner, i.e. when the
          <command>sync-leases</command> configuration parameter is set to
          <command>false</command>, will never transition to this state.
          Instead, it will transition directly from the
          <command>waiting</command> state to the <command>ready</command> state.
          </para></listitem>

          <listitem><para><command>terminated</command> - an active server
          transitions to this state when the High Availability hooks library
          is unable to further provide reliable service and a manual
          intervention of the administrator is required to correct the problem.
          Various issues with the HA setup may cause the
          server to transition to this state.
          While in this state, the server continues responding to
          DHCP clients based on the HA mode selected (load-balancing or
          hot-standby), but the lease updates are not exchanged and the
          heartbeats are not sent. Once a server has entered the
          "terminated" state, it will remain in this state until it is
          restarted. The administrator must correct the issue which caused
          this situation prior to restarting the server (e.g. synchronize clocks).
          Otherwise, the server will return to the "terminated" state once
          it finds that the issue persists.
          </para></listitem>

          <listitem><para><command>waiting</command> - each started server
          instance enters this state. The backup server transitions
          directly from this state to the <command>backup</command> state.
          An active server sends a heartbeat to its partner to check its
          state; if the partner appears to be unavailable, the server
          transitions to the <command>partner-down</command> state. If the partner is
          available, the server transitions to the <command>syncing</command> or
          <command>ready</command> state, depending on the setting of the
          <command>sync-leases</command> configuration parameter. If
          both servers appear to be in the <command>waiting</command>
          state (concurrent startup), the primary server transitions to
          the next state first. The secondary or standby server remains
          in the <command>waiting</command> state until the primary
          transitions to the <command>ready</command> state.</para></listitem>.
        </itemizedlist></para>

        <note>
          <para>Currently, restarting the HA service from the
          <command>terminated</command> state requires restarting the
          DHCP server or reloading its configuration.</para>
        </note>

        <para>Whether the server responds to the DHCP queries and which
        queries it responds to is a matter of the server's state, if no
        administrative action is performed to configure the server
        otherwise. The following table provides the default behavior for
        various states.</para>

        <para>The <command>DHCP Server Scopes</command> denote what group
        of received DHCP queries the server responds to in the given state.
        An in-depth explanation of the scopes can be found below.
        </para>

        <para>
          <table frame="all" xml:id="ha-default-states-behavior">
            <title>Default Behavior of the Server in Various HA States</title>
            <tgroup cols="4">
              <colspec colname="state"/>
              <colspec colname="server type" align="center"/>
              <colspec colname="dhcp-service" align="center"/>
              <colspec colname="dhcp-service-scopes" align="center"/>
              <thead>
                <row>
                  <entry>State</entry>
                  <entry>Server Type</entry>
                  <entry>DHCP Service</entry>
                  <entry>DHCP Service Scopes</entry>
                </row>
              </thead>
              <tbody>
                <row>
                  <entry>backup</entry>
                  <entry>backup server</entry>
                  <entry>disabled</entry>
                  <entry>none</entry>
                </row>
                <row>
                  <entry>hot-standby</entry>
                  <entry>primary or standby (hot-standby mode)</entry>
                  <entry>enabled</entry>
                  <entry><command>HA_server1</command> if primary, none otherwise</entry>
                </row>
                <row>
                  <entry>load-balancing</entry>
                  <entry>primary or secondary (load-balancing mode)</entry>
                  <entry>enabled</entry>
                  <entry><command>HA_server1</command> or <command>HA_server2</command></entry>
                </row>
                <row>
                  <entry>partner-down</entry>
                  <entry>active server</entry>
                  <entry>enabled</entry>
                  <entry>all scopes</entry>
                </row>
                <row>
                  <entry>ready</entry>
                  <entry>active server</entry>
                  <entry>disabled</entry>
                  <entry>none</entry>
                </row>
                <row>
                  <entry>syncing</entry>
                  <entry>active server</entry>
                  <entry>disabled</entry>
                  <entry>none</entry>
                </row>
                <row>
                  <entry>terminated</entry>
                  <entry>active server</entry>
                  <entry>enabled</entry>
                  <entry>same as in the load-balancing or hot-standby state</entry>
                </row>
                <row>
                  <entry>waiting</entry>
                  <entry>any server</entry>
                  <entry>disabled</entry>
                  <entry>none</entry>
                </row>
              </tbody>
            </tgroup>
          </table>
        </para>

        <para>The DHCP service scopes require some explanation. The HA
        configuration must specify a unique name for each server within
        the HA setup. This document uses the following convention within
        provided examples: <command>server1</command> for a primary server,
        <command>server2</command> for the secondary or standby server, and
        <command>server3</command> for the backup server. In real life
        any names can be used as long as they remain unique.</para>

        <para>In the load-balancing mode there are two scopes named after
        the active servers: <command>HA_server1</command> and <command>
        HA_server2</command>. The DHCP queries load-balanced to
        <command>server1</command> belong to the <command>HA_server1</command>
        scope and the queries load-balanced to <command>server2</command>
        belong to the <command>HA_server2</command> scope. If either of the
        servers is in the <command>partner-down</command> state, the active partner is
        responsible for serving both scopes.</para>

        <para>In the hot-standby mode, there is only one scope - <command>
        HA_server1</command> - because only <command>server1</command>
        is responding to DHCP queries. If that server becomes unavailable,
        <command>server2</command> becomes responsible for this scope.
        </para>

        <para>The backup servers do not have their own scopes. In some
        cases they can be used to respond to queries belonging to
        the scopes of the active servers. Also, a server which is neither
        in the partner-down state nor in normal operation serves
        no scopes.</para>

        <para>The scope names can be used to associate pools, subnets,
        and networks with certain servers, so only these servers
        can allocate addresses or prefixes from those pools, subnets,
        or networks. This is done via the client classification mechanism
        (see below).</para>
      </section>

      <section xml:id="ha-scope-transition">
        <title>Scope Transition in a Partner-Down Case</title>
        <para>When one of the servers finds that its partner is unavailable,
        it starts serving clients from both its own scope and the scope of the
        unavailable partner. This is straightforward
        for new clients, i.e. those sending DHCPDISCOVER (DHCPv4) or Solicit
        (DHCPv6), because those requests are not sent to any particular server.
        The available server will respond to all such queries when it is
        in the <command>partner-down</command> state.</para>

        <para>When a client renews a lease, it sends its
        DHCPREQUEST (DHCPv4) or Renew (DHCPv6) message directly to the
        server which has allocated the lease being renewed. If this
        server is no longer available, the client will get no response. In
        that case, the client continues to use its lease and attempts to
        renew until the rebind timer (T2) elapses. The client then enters
        the rebinding phase, in which it sends a DHCPREQUEST (DHCPv4) or
        Rebind (DHCPv6) message to any available server. The surviving
        server will receive the rebinding request and will typically
        extend the lifetime of the lease. The client then continues to
        contact that new server to renew its lease as appropriate.</para>

        <para>If and when the other server once again becomes available, both active servers
        will eventually transition to the <command>load-balancing</command>
        or <command>hot-standby</command> state, in which they will again be
        responsible for their own scopes. Some clients belonging to the
        scope of the restarted server will try to renew their leases
        via the surviving server, but this server will not respond to them
        anymore; the client will eventually transition back to the
        correct server via the rebinding mechanism.</para>
      </section>

      <section xml:id="ha-load-balancing-config">
        <title>Load-Balancing Configuration</title>
        <para>The following is the configuration snippet to enable
        high availability on the primary server within the load-balancing
        configuration. The same configuration should be applied on the
        secondary and backup servers, with the only difference that
        <command>this-server-name</command> should be set to
        <command>server2</command> and <command>server3</command>
        on those servers, respectively.
<screen>
{
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "load-balancing",
                    "heartbeat-delay": 10000,
                    "max-response-delay": 10000,
                    "max-ack-delay": 5000,
                    "max-unacked-clients": 5,
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary",
                            "auto-failover": true
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "secondary",
                            "auto-failover": true
                        },
                        {
                            "name": "server3",
                            "url": "http://192.168.56.99:8080/",
                            "role": "backup",
                            "auto-failover": false
                        }
                    ]
                } ]
            }
        }
    ],

    "subnet4": [
        {
            "subnet": "192.0.3.0/24",
            "pools": [
                {
                    "pool": "192.0.3.100 - 192.0.3.150",
                    "client-class": "HA_server1"
                },
                {
                    "pool": "192.0.3.200 - 192.0.3.250",
                    "client-class": "HA_server2"
                }
            ],

            "option-data": [
                {
                    "name": "routers",
                    "data": "192.0.3.1"
                }
            ],

            "relay": { "ip-address": "10.1.2.3" }
        }
    ],

    ...

}

}
</screen>
        </para>

        <para>Two hook libraries must be loaded to enable HA:
        <filename>libdhcp_lease_cmds.so</filename> and
        <filename>libdhcp_ha.so</filename>. The latter implements the
        HA feature, while the former enables control
        commands required by HA to fetch and manipulate leases on the
        remote servers. In the example provided above, it is assumed that
        Kea libraries are installed in the <filename>/usr/lib</filename>
        directory. If Kea is not installed in the /usr directory, the
        hook libraries locations must be updated accordingly.
        </para>

        <para>The HA configuration is specified within the scope of
        <filename>libdhcp_ha.so</filename>. Note that the top-level
        parameter <command>high-availability</command> is a list, even
        though it currently contains only one entry.</para>

        <para>The following are the global parameters which control the server's
        behavior with respect to HA:
        <itemizedlist mark="bullet">
          <listitem><para><command>this-server-name</command> - is a unique
          identifier of the server within this HA setup. It must match with one
          of the servers specified within the <command>peers</command> list.
          </para></listitem>

          <listitem><para><command>mode</command> - specifies an HA mode
          of operation. Currently supported modes are <command>load-balancing
          </command> and <command>hot-standby</command>.</para></listitem>

          <listitem><para><command>heartbeat-delay</command> - specifies
          a duration in milliseconds between sending the last heartbeat (or other command sent
          to the partner) and the next heartbeat. The heartbeats are sent
          periodically to gather the status of the partner and to verify whether
          the partner is still operating. The default value of this parameter is
          10000 ms.</para></listitem>

          <listitem><para><command>max-response-delay</command> - specifies a
          duration in milliseconds since the last successful communication with the
          partner, after which the server assumes that communication with
          the partner is interrupted. This duration should be greater than
          the <command>heartbeat-delay</command>. Usually it is greater than
          the duration of multiple <command>heartbeat-delay</command> values.
          When the server detects that communication is interrupted, it
          may transition to the <command>partner-down</command> state (when
          <command>max-unacked-clients</command> is 0) or trigger the failure-
          detection procedure using the values of the two parameters below.
          The default value of this parameter is 60000.
          </para></listitem>

          <listitem><para><command>max-ack-delay</command> - is one of
          the parameters controlling partner failure-detection. When
          communication with the partner is interrupted, the server examines the values
          of the <command>secs</command> field (DHCPv4) or <command>Elapsed Time
          </command> option (DHCPv6), which denote how long the DHCP client has been
          trying to communicate with the DHCP server. This parameter specifies the
          maximum time in milliseconds for the client to try to communicate with the
          DHCP server, after which this server assumes that the client failed to
          communicate with the DHCP server (is "unacked"). The default value of
          this parameter is 10000.</para></listitem>

          <listitem><para><command>max-unacked-clients</command> - specifies
          how many "unacked" clients are allowed (see <command>max-ack-delay</command>)
          before this server assumes that the partner is offline and transitions
          to the <command>partner-down</command> state. The special value of 0
          is allowed for this parameter, which disables the failure-detection
          mechanism. In this case, a server that can't communicate with its
          partner over the control channel assumes that the partner server is
          down and transitions to the <command>partner-down</command> state
          immediately. The default value of this parameter is 10.</para>
          </listitem>

        </itemizedlist>
        </para>

        <para>
          The values of <command>max-ack-delay</command> and
          <command>max-unacked-clients</command> must be selected carefully, taking
          into account the specifics of the network in which the DHCP servers are
          operating. Note that the server in question may not respond to some
          DHCP clients because these clients are not to be serviced
          by this server according to administrative policy. The server may also
          drop malformed queries from clients. Therefore, selecting too
          low a value for the <command>max-unacked-clients</command> parameter may
          result in a transition to the <command>partner-down</command>
          state even though the partner is still operating. On the other
          hand, selecting too high a value may result in never transitioning
          to the <command>partner-down</command> state if the DHCP
          traffic in the network is very low (e.g. nighttime), because the
          number of distinct clients trying to communicate with the server
          could be lower than the <command>max-unacked-clients</command> setting.
        </para>

        <para>In some cases it may be useful to disable the failure-detection
        mechanism altogether, if the servers are located very close to each
        other and network partitioning is unlikely, i.e. failure to
        respond to heartbeats is only possible when the partner is offline.
        In such cases, set the <command>max-unacked-clients</command> to 0.
        </para>

        <para>The <command>peers</command> parameter contains a list of servers
        within this HA setup. This configuration must contain at least
        one primary and one secondary server. It may also contain an unlimited
        number of backup servers. In this example, there is one backup server
        which receives lease updates from the active servers.</para>

        <para>These are the parameters specified for each of the
        peers within this list:

        <itemizedlist mark="bullet">
          <listitem><para><command>name</command> - specifies a unique name for
          the server.</para></listitem>

          <listitem><para><command>url</command> - specifies the URL to be used to
          contact this server over the control channel. Other servers use this
          URL to send control commands to that server.</para></listitem>

          <listitem><para><command>role</command> - denotes the role of the
          server in the HA setup. The following roles are supported in the
          load-balancing configuration: <command>primary</command>,
          <command>secondary</command>, and <command>backup</command>.
          There must be exactly one primary and one secondary server in the
          load-balancing setup.</para></listitem>

          <listitem><para><command>auto-failover</command> - a boolean value
          which denotes whether a server detecting a partner's failure should
          automatically start serving the partner's clients. The default value of
          this parameter is true.</para></listitem>

        </itemizedlist>
        </para>

        <para>In our example configuration, both active servers can allocate
        leases from the subnet "192.0.3.0/24". This subnet contains two
        address pools: "192.0.3.100 - 192.0.3.150" and "192.0.3.200 - 192.0.3.250",
        which are associated with HA server scopes using client classification.
        When <command>server1</command> processes a DHCP query, it uses
        the first pool for lease allocation. Conversely, when
        <command>server2</command> processes a DHCP query it uses the
        second pool. When either of the servers is in the <command>partner-down
        </command> state, it can serve leases from both pools and it
        selects the pool which is appropriate for the received query. In
        other words, if the query would normally be processed by
        <command>server2</command> but this server is not available,
        <command>server1</command> will allocate the lease from the pool of
        "192.0.3.200 - 192.0.3.250".
        </para>

      </section> <!-- end of ha-load-balancing-config -->

      <section xml:id="ha-load-balancing-advanced-config">
        <title>Load Balancing with Advanced Classification</title>
        <para>In the previous section, we provided an example of
        a load-balancing configuration with client classification limited
        to the <command>HA_server1</command> and <command>HA_server2</command>
        classes, which are dynamically assigned to the received DHCP queries.
        In many cases, HA will be needed in deployments which already
        use some other client classification.
        </para>
        <para>
          Suppose there is a system which classifies devices into two groups:
          phones and laptops, based on some classification criteria specified in
          Kea configuration file. Both types of devices are allocated leases
          from different address pools. Introducing HA in the load-balancing mode
          results in a further split of each of those pools, as
          each server allocates leases for some phones and
          some laptops. This requires each of the existing pools
          to be split between <command>HA_server1</command> and
          <command>HA_server2</command>, so we end up with the following classes:

          <itemizedlist>
            <listitem><simpara>phones_server1</simpara></listitem>
            <listitem><simpara>laptops_server1</simpara></listitem>
            <listitem><simpara>phones_server2</simpara></listitem>
            <listitem><simpara>laptops_server2</simpara></listitem>
          </itemizedlist>
        </para>

        <para>The corresponding server configuration using advanced classification
        (and <command>member</command> expression) is provided below. For brevity's sake,
        the HA hook library configuration has been removed from this example.
<screen>
{
"Dhcp4": {

    "client-classes": [
        {
            "name": "phones",
            "test": "substring(option[60].hex,0,6) == 'Aastra'",
        },
        {
            "name": "laptops",
            "test": "not member('phones')"
        },
        {
            "name": "phones_server1",
            "test": "member('phones') and member('HA_server1')"
        },
        {
            "name": "phones_server2",
            "test": "member('phones') and member('HA_server2')"
        },
        {
            "name": "laptops_server1",
            "test": "member('laptops') and member('HA_server1')"
        },
        {
            "name": "laptops_server2",
            "test": "member('laptops') and member('HA_server2')"
        }
    ],

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {

                    ...

                } ]
            }
        }
    ],

    "subnet4": [
        {
            "subnet": "192.0.3.0/24",
            "pools": [
                {
                    "pool": "192.0.3.100 - 192.0.3.125",
                    "client-class": "phones_server1"
                },
                {
                    "pool": "192.0.3.126 - 192.0.3.150",
                    "client-class": "laptops_server1"
                },
                {
                    "pool": "192.0.3.200 - 192.0.3.225",
                    "client-class": "phones_server2"
                },
                {
                    "pool": "192.0.3.226 - 192.0.3.250",
                    "client-class": "laptops_server2"
                }
            ],

            "option-data": [
                {
                    "name": "routers",
                    "data": "192.0.3.1"
                }
            ],

            "relay": { "ip-address": "10.1.2.3" }
        }
    ],

    ...

}

}
</screen>
        </para>

        <para>The configuration provided above splits the address range into
        four pools: two pools dedicated to server1 and two to
        server2. Each server can assign leases to both phones and laptops.
        Both groups of devices are assigned addresses from different pools.
        The <command>HA_server1</command> and <command>HA_server2</command> classes
        are built-in (see <xref linkend="classification-using-vendor"/>)
        and do not need to be declared. They are assigned dynamically by
        the HA hook library as a result of the load-balancing algorithm.
        <command>phones_*</command> and <command>laptop_*</command> evaluate to
        "true" when the query belongs to a given combination of other classes,
        e.g. <command>HA_server1</command> and <command>phones</command>.
        The pool is selected accordingly as a result of such an evaluation.
        </para>

        <para>Consult <xref linkend="classify"/> for details on how to use the
        <command>member</command> expression and class dependencies.</para>

      </section> <!-- end of ha-load-balancing-advanced-config -->

      <section xml:id="ha-hot-standby-config">
        <title>Hot-Standby Configuration</title>
        <para>The following is an example configuration of the primary server
        in the hot-standby configuration:
<screen>
{
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "hot-standby",
                    "heartbeat-delay": 10000,
                    "max-response-delay": 10000,
                    "max-ack-delay": 5000,
                    "max-unacked-clients": 5,
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary",
                            "auto-failover": true
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "standby",
                            "auto-failover": true
                        },
                        {
                            "name": "server3",
                            "url": "http://192.168.56.99:8080/",
                            "role": "backup",
                            "auto-failover": false
                        }
                    ]
                } ]
            }
        }
    ],

    "subnet4": [
        {
            "subnet": "192.0.3.0/24",
            "pools": [
                {
                    "pool": "192.0.3.100 - 192.0.3.250",
                    "client-class": "HA_server1"
                }
            ],

            "option-data": [
                {
                    "name": "routers",
                    "data": "192.0.3.1"
                }
            ],

            "relay": { "ip-address": "10.1.2.3" }
        }
    ],

    ...

}

}
</screen>
        </para>

        <para>This configuration is very similar to the load-balancing
        configuration described in <xref linkend="ha-load-balancing-config"/>,
        with a few notable differences.</para>

        <para>The <command>mode</command> is now set to <command>hot-standby</command>,
        in which only one server responds to DHCP clients.
        If the primary server is online, it responds to
        all DHCP queries. The <command>standby</command> server takes over all
        DHCP traffic if it discovers that the primary is unavailable.
        </para>

        <para>In this mode, the non-primary active server is called
        <command>standby</command> and that is its role.</para>

        <para>Finally, because there is always one server responding to
        DHCP queries, there is only one scope - <command>HA_server1</command> -
        in use within pools definitions. In fact, the <command>client-class</command>
        parameter could be removed from this configuration without harm,
        because there can be no conflicts in lease allocations by different
        servers as they do not allocate leases concurrently. The
        <command>client-class</command> remains in this example mostly for
        demonstration purposes, to highlight the differences between the
        hot-standby and load-balancing modes of operation.</para>
      </section> <!-- end of ha-hot-standby-config -->

      <section xml:id="ha-sharing-lease-info">
        <title>Lease Information Sharing</title>
        <para>An HA-enabled server informs its active partner about allocated
        or renewed leases by sending appropriate control commands, and the partner
        updates the lease information in its own database. When the server starts
        up for the first time or recovers after a failure, it synchronizes its
        lease database with its partner. These two mechanisms guarantee
        consistency of the lease information between the servers and allow the
        designation of one of the servers to handle the entire DHCP traffic load if
        the other server becomes unavailable.</para>

        <para>In some cases, though, it is desirable to disable lease updates
        and/or database synchronization between the active servers, if the
        exchange of information about the allocated leases is performed
        using some other mechanism. Kea supports various database types
        that can be used to store leases, including MySQL, Postgres, and Cassandra.
        Those databases include built-in solutions for data replication which
        are often used by Kea administrators to provide redundancy.</para>

        <para>The HA hook library supports such scenarios by
        disabling lease updates over the control channel and/or lease database
        synchronization, leaving the server to rely on the database replication
        mechanism. This is controlled by the two boolean parameters
        <command>send-lease-updates</command> and <command>sync-leases</command>,
        whose values default to true:

<screen>
{
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "load-balancing",
                    "send-lease-updates": false,
                    "sync-leases": false,
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary"
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "secondary"
                        }
                    ]
                } ]
            }
        }
    ],

    ...

}
</screen>
        </para>

        <para>
          In the most typical use case, both parameters are set to the same
          value, i.e. both are <command>false</command> if database
          replication is in use, or both are <command>true</command> otherwise.
          Introducing two separate parameters to control lease updates and
          lease-database synchronization is aimed at possible special use
          cases; for example, when synchronization is performed by copying a lease file
          (therefore <command>sync-leases</command> is set to
          <command>false</command>), but lease updates should be conducted
          as usual (<command>send-lease-updates</command> is set to
          <command>true</command>). It should be noted that Kea does not
          natively support such use cases, but users may develop their own
          scripts and tools around Kea to provide such mechanisms. The HA
          hooks library configuration is designed to maximize flexibility of administration.
        </para>
      </section>

      <section xml:id="ha-syncing-page-limit">
        <title>Controlling Lease-Page Size Limit</title>
        <para>An HA-enabled server initiates synchronization of the lease
        database after downtime or upon receiving the <command>ha-sync</command>
        command. The server uses commands described in
        <xref linkend="lease-get-page-cmds"/> to fetch leases from its
        partner server (lease queries). The size of the results page
        (the maximum number of leases to be returned in a single response to one
        of these commands) can be controlled via HA hooks library configuration.
        Increasing the page size decreases the number of lease queries sent to
        the partner server, but it causes the partner server to generate
        larger responses, which lengthens transmission time as well as
        increases memory and CPU utilization on both servers. Decreasing the
        page size helps to decrease resource utilization, but requires
        more lease queries to be issued to fetch the entire lease
        database.</para>
        <para>The default value of the <command>sync-page-limit</command> command
        controlling the page size is 10000. This means that the entire
        lease database can be fetched with a single command if the
        size of the database is equal to or less than 10000 lines.
        </para>
      </section>

      <section xml:id="ha-syncing-timeouts">
        <title>Discussion About Timeouts</title>
        <para>In deployments with a large number of clients connected to the
        network, lease-database synchronization after a server failure
        may be a time-consuming operation. The synchronizing server must
        gather all leases from its partner, which yields a large response
        over the RESTful interface. The server receives leases using the
        paging mechanism described in <xref linkend="ha-syncing-page-limit"/>.
        Before the page of leases is fetched, the synchronizing server
        sends a <command>dhcp-disable</command> command to disable the DHCP
        service on the partner server. If the service is already disabled, this
        command will reset the timeout for the DHCP service being disabled.
        This timeout value is by default set to 60 seconds. If fetching a
        single page of leases takes longer than the specified time, the partner server will assume that
        the synchronizing server died and will resume its DHCP service.
        The connection of the synchronizing server with its partner is also
        protected by the timeout. If the synchronization of a single page
        of leases takes longer than the specified time, the synchronizing server
        terminates the connection and the synchronization fails.
        Both timeout values are controlled by a single configuration
        parameter: <command>sync-timeout</command>. The following
        configuration snippet demonstrates how to modify the timeout for
        automatic re-enabling of the DHCP service on the partner server
        and how to increase the timeout for fetching a single page of leases from 60 seconds
        to 90 seconds:
<screen>
{
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "load-balancing",
                    "sync-timeout": 90000,
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary"
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "secondary"
                        }
                    ]
                } ]
            }
        }
    ],

    ...

}
</screen>
        </para>

        <para>
          It is important to note that extending this <command>sync-timeout</command> value may sometimes
          be insufficient to prevent issues with timeouts during
          lease-database synchronization. The control commands travel via the
          Control Agent, which also monitors incoming (with a synchronizing
          server) and outgoing (with a DHCP server) connections for timeouts.
          The DHCP server also monitors the connection from the Control
          Agent for timeouts. Those timeouts cannot currently be modified
          via configuration; extending these timeouts is only possible by
          modifying them in the Kea code and recompiling the server. The
          relevant constants are located in the Kea source at:
          <filename>src/lib/config/timeouts.h</filename>.
        </para>
      </section>

      <section xml:id="ha-pause-state-machine">
        <title>Pausing HA State Machine</title>
        <para>The high-availability state machine includes many different
        states described in detail in <xref linkend="ha-server-states"/>.
        The server enters each state when certain conditions are met, most
        often taking into account the partner server's state. In some states
        the server performs specific actions, e.g. synchronization of the
        lease database in the <command>syncing</command> state or responding
        to DHCP queries according to the configured mode of operation in the
        <command>load-balancing</command> and <command>hot-standby</command>
        states.
        </para>
        <para>
          By default, transitions between the states are performed
          automatically and the server administrator has no direct control
          when the transitions take place; in most cases, the
          administrator doesn't need such control. In some situations,
          however, the administrator may want to "pause" the HA state
          machine in a selected state to perform some additional administrative
          actions before the server transitions to the next state.
        </para>

        <para>Consider a server failure which results in the loss of the entire
        lease database. Typically, the server will rebuild its lease database
        when it enters the <command>syncing</command> state by querying
        the partner server for leases, but it is possible that the
        partner was also experiencing a failure and lacks lease information.
        In this case, it may be required to reconstruct lease databases on
        both servers from some external source, e.g. a backup server. If the
        lease database is to be reconstructed via RESTful API, the
        servers should be started in the initial, i.e. <command>waiting</command>,
        state and remain in this state while leases are being added. In
        particular, the servers should not attempt to synchronize their lease
        databases nor start serving DHCP clients.
        </para>

        <para>The HA hooks library provides configuration parameters and a
        command to control when the HA state machine should be paused and
        resumed. The following configuration causes the HA state machine
        to pause in the <command>waiting</command> state after server startup.
<screen>
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "load-balancing",
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary"
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "secondary"
                        }
                    ],
                    "state-machine": {
                        "states":  [
                            {
                                "state": "waiting",
                                "pause": "once"
                            }
                        ]
                    }
                } ]
            }
        }
    ],

    ...

}
</screen>
        </para>

        <para>The <command>pause</command> parameter value <command>once</command>
        denotes that the state machine should be paused upon the first transition
        to the <command>waiting</command> state; later transitions to this state
        will not cause the state machine to pause. Two other supported values of the
        <command>pause</command> parameter are: <command>always</command> and
        <command>never</command>. The latter is the default value for each state,
        which instructs the server never to pause the state machine.
        </para>

        <para>In order to "unpause" the state machine, the <command>ha-continue</command>
        command must be sent to the paused server. This command does not take
        any arguments. See <xref linkend="ha-control-commands"/> for details
        about commands specific to the HA hooks library.
        </para>

        <para>It is possible to configure the state machine to pause in more than
        one state. Consider the following configuration:
<screen>
"Dhcp4": {

    ...

    "hooks-libraries": [
        {
            "library": "/usr/lib/kea/hooks/libdhcp_lease_cmds.so",
            "parameters": { }
        },
        {
            "library": "/usr/lib/kea/hooks/libdhcp_ha.so",
            "parameters": {
                "high-availability": [ {
                    "this-server-name": "server1",
                    "mode": "load-balancing",
                    "peers": [
                        {
                            "name": "server1",
                            "url": "http://192.168.56.33:8080/",
                            "role": "primary"
                        },
                        {
                            "name": "server2",
                            "url": "http://192.168.56.66:8080/",
                            "role": "secondary"
                        }
                    ],
                    "state-machine": {
                        "states": [
                            {
                                "state": "ready",
                                "pause": "always"
                            },
                            {
                                "state": "partner-down",
                                "pause": "once"
                            }
                        ]
                    }
                } ]
            }
        }
    ],

    ...

}
</screen>
        </para>

        <para>This configuration instructs the server to pause the state
        machine every time it transitions to the <command>ready</command> state
        and upon the first transition to the <command>partner-down</command>
        state.</para>

        <para>Refer to <xref linkend="ha-server-states"/> for a complete
        list of server states. The state machine can be paused in any of the
        supported states; however, it is not practical for the
        <command>backup</command> and <command>terminated</command> states because
        the server never transitions out of these states anyway.
        </para>

        <note><para>In the <command>syncing</command> state the server is paused
        before it makes an attempt to synchronize the lease database with a partner.
        To pause the state machine after lease-database synchronization,
        use the <command>ready</command> state instead.
        </para></note>

        <note><para>The state of the HA state machine depends on the state of the
        cooperating server. Therefore, it must be taken into account that
        pausing the state machine of one server may affect the operation of the
        partner server. For example: if the primary server is paused in the
        <command>waiting</command> state, the partner server will also remain in
        the <command>waiting</command> state until the state machine of the
        primary server is resumed and that server transitions to the
        <command>ready</command> state.</para></note>
      </section>

      <section xml:id="ha-ctrl-agent-config">
        <title>Control Agent Configuration</title>
        <para><xref linkend="kea-ctrl-agent"/> describes in detail the
        Kea daemon, which provides a RESTful interface to control Kea servers.
        The same functionality is used by the High Availability hook library to
        establish communication between the HA peers. Therefore, the HA
        library requires that the Control Agent (CA) be started for each DHCP
        instance within the HA setup. If the Control Agent is not started,
        the peers will not be able to communicate with the particular DHCP
        server (even if the DHCP server itself is online) and may eventually
        consider this server to be offline.
        </para>

        <para>The following is an example configuration for the CA running
        on the same machine as the primary server. This configuration is
        valid for both the load-balancing and the hot-standby cases presented in
        previous sections.

<screen>
{
"Control-agent": {
    "http-host": "192.168.56.33",
    "http-port": 8080,

    "control-sockets": {
        "dhcp4": {
            "socket-type": "unix",
            "socket-name": "/tmp/kea-dhcp4-ctrl.sock"
        },
        "dhcp6": {
            "socket-type": "unix",
            "socket-name": "/tmp/kea-dhcp6-ctrl.sock"
        }
    }
}
}
</screen>
        </para>
      </section> <!-- end of ha-ctrl-agent-config -->

      <section xml:id="ha-control-commands">
        <title>Control Commands for High Availability</title>
        <para>Even though the HA hook library is designed to automatically
        resolve issues with DHCP service interruptions by redirecting the
        DHCP traffic to a surviving server and synchronizing the lease
        database when required, it may be useful for the administrator to
        have more control over the server behavior. In particular, it may be
        useful to be able to trigger lease-database synchronization on demand.
        It may also be useful to manually set the HA scopes that are being
        served.</para>

        <para>Note that the backup server can sometimes be used to handle
        DHCP traffic if both active servers are down. The backup
        servers do not perform failover function automatically. Thus, in
        order to use the backup server to respond to DHCP queries,
        the server administrator must enable this function manually.
        </para>

        <para>The following sections describe commands supported by the
        HA hook library which are available for the administrator.
        </para>

        <section xml:id="command-ha-sync">
          <title>ha-sync Command</title>
          <para>The <command>ha-sync</command> command instructs the
          server to synchronize its local lease database with the
          selected peer. The server fetches all leases from the peer and
          updates those locally stored leases which are older than
          those fetched. It also creates new leases when any of those
          fetched do not exist in the local database. All leases that
          are not returned by the peer but are in the local database are
          preserved. The database synchronization is unidirectional;
          only the database on the server to which the command has been
          sent is updated. In order to synchronize the peer's database a
          separate <command>ha-sync</command> has to be issued to that
          peer.</para>

          <para>Database synchronization may be triggered for
          both active and backup server types. The <command>ha-sync</command> command
          has the following structure (DHCPv4 server case):
<screen>
{
    "command": "ha-sync",
    "service": [ "dhcp4 "],
    "arguments": {
        "server-name": "server2",
        "max-period": 60
    }
}
</screen>
          </para>

          <para>
            When the server receives this command it first disables the
            DHCP service of the server from which it will be fetching leases, by
            sending the <command>dhcp-disable</command> command to that server.
            The <command>max-period</command> parameter specifies the maximum
            duration (in seconds) for which the DHCP service should be disabled.
            If the DHCP service is successfully disabled, the synchronizing
            server will fetch leases from the remote server by issuing one or
            more <command>lease4-get-page</command> commands. When the lease-
            database synchronization is complete, the synchronizing server sends
            the <command>dhcp-enable</command> command to the peer to re-enable its
            DHCP service.
          </para>

          <para>The <command>max-period</command> value should be sufficiently
          long to guarantee that it doesn't elapse before the synchronization
          is completed. Otherwise, the DHCP server will automatically enable
          its DHCP function while the synchronization is still in progress.
          If the DHCP server subsequently allocates any leases during the
          synchronization, those new (or updated) leases will not be fetched
          by the synchronizing server, leading to database inconsistencies.
          </para>
        </section> <!-- ha-sync-command -->

        <section xml:id="command-ha-scopes">
          <title>ha-scopes Command</title>
          <para>This command allows modification of the HA scopes that the
          server is serving. Consult <xref linkend="ha-load-balancing-config"/>
          and <xref linkend="ha-hot-standby-config"/> to learn what scopes
          are available for different HA modes of operation. The
          <command>ha-scopes</command> command has the following structure
          (DHCPv4 server case):
<screen>
{
    "command": "ha-scopes",
    "service": [ "dhcp4" ],
    "arguments": {
        "scopes": [ "HA_server1", "HA_server2" ]
    }
}
</screen>
          </para>

          <para>This command configures the server to handle traffic from
          both <command>HA_server1</command> and <command>HA_server2</command>
          scopes. In order to disable all scopes specify an empty list:

<screen>
{
    "command": "ha-scopes",
    "service": [ "dhcp4 "],
    "arguments": {
        "scopes": [ ]
    }
}
</screen>
          </para>
        </section> <!-- ha-scopes-command -->

        <section xml:id="command-ha-continue">
        <title>ha-continue Command</title>
        <para>This command is used to resume the operation of the paused HA
        state machine, as described in <xref linkend="ha-pause-state-machine"/>.
        It takes no arguments, so the command structure is as simple as:
<screen>
{
    "command": "ha-continue"
}
</screen>
      </para>
        </section> <!-- ha-continue-command -->

      </section> <!-- ha-control-commands -->

    </section> <!-- end of high-availability-library -->