summaryrefslogtreecommitdiffstats
path: root/fs/dlm/user.c
blob: 5cb3896be8260f34791e6c260bf55f2f48cd5b67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2006-2010 Red Hat, Inc.  All rights reserved.
 */

#include <linux/miscdevice.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
#include <linux/dlm.h>
#include <linux/dlm_device.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>

#include <trace/events/dlm.h>

#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
#include "lvb_table.h"
#include "user.h"
#include "ast.h"
#include "config.h"
#include "memory.h"

static const char name_prefix[] = "dlm";
static const struct file_operations device_fops;
static atomic_t dlm_monitor_opened;
static int dlm_monitor_unused = 1;

#ifdef CONFIG_COMPAT

struct dlm_lock_params32 {
	__u8 mode;
	__u8 namelen;
	__u16 unused;
	__u32 flags;
	__u32 lkid;
	__u32 parent;
	__u64 xid;
	__u64 timeout;
	__u32 castparam;
	__u32 castaddr;
	__u32 bastparam;
	__u32 bastaddr;
	__u32 lksb;
	char lvb[DLM_USER_LVB_LEN];
	char name[];
};

struct dlm_write_request32 {
	__u32 version[3];
	__u8 cmd;
	__u8 is64bit;
	__u8 unused[2];

	union  {
		struct dlm_lock_params32 lock;
		struct dlm_lspace_params lspace;
		struct dlm_purge_params purge;
	} i;
};

struct dlm_lksb32 {
	__u32 sb_status;
	__u32 sb_lkid;
	__u8 sb_flags;
	__u32 sb_lvbptr;
};

struct dlm_lock_result32 {
	__u32 version[3];
	__u32 length;
	__u32 user_astaddr;
	__u32 user_astparam;
	__u32 user_lksb;
	struct dlm_lksb32 lksb;
	__u8 bast_mode;
	__u8 unused[3];
	/* Offsets may be zero if no data is present */
	__u32 lvb_offset;
};

static void compat_input(struct dlm_write_request *kb,
			 struct dlm_write_request32 *kb32,
			 int namelen)
{
	kb->version[0] = kb32->version[0];
	kb->version[1] = kb32->version[1];
	kb->version[2] = kb32->version[2];

	kb->cmd = kb32->cmd;
	kb->is64bit = kb32->is64bit;
	if (kb->cmd == DLM_USER_CREATE_LOCKSPACE ||
	    kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
		kb->i.lspace.flags = kb32->i.lspace.flags;
		kb->i.lspace.minor = kb32->i.lspace.minor;
		memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen);
	} else if (kb->cmd == DLM_USER_PURGE) {
		kb->i.purge.nodeid = kb32->i.purge.nodeid;
		kb->i.purge.pid = kb32->i.purge.pid;
	} else {
		kb->i.lock.mode = kb32->i.lock.mode;
		kb->i.lock.namelen = kb32->i.lock.namelen;
		kb->i.lock.flags = kb32->i.lock.flags;
		kb->i.lock.lkid = kb32->i.lock.lkid;
		kb->i.lock.parent = kb32->i.lock.parent;
		kb->i.lock.xid = kb32->i.lock.xid;
		kb->i.lock.timeout = kb32->i.lock.timeout;
		kb->i.lock.castparam = (__user void *)(long)kb32->i.lock.castparam;
		kb->i.lock.castaddr = (__user void *)(long)kb32->i.lock.castaddr;
		kb->i.lock.bastparam = (__user void *)(long)kb32->i.lock.bastparam;
		kb->i.lock.bastaddr = (__user void *)(long)kb32->i.lock.bastaddr;
		kb->i.lock.lksb = (__user void *)(long)kb32->i.lock.lksb;
		memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
		memcpy(kb->i.lock.name, kb32->i.lock.name, namelen);
	}
}

static void compat_output(struct dlm_lock_result *res,
			  struct dlm_lock_result32 *res32)
{
	memset(res32, 0, sizeof(*res32));

	res32->version[0] = res->version[0];
	res32->version[1] = res->version[1];
	res32->version[2] = res->version[2];

	res32->user_astaddr = (__u32)(__force long)res->user_astaddr;
	res32->user_astparam = (__u32)(__force long)res->user_astparam;
	res32->user_lksb = (__u32)(__force long)res->user_lksb;
	res32->bast_mode = res->bast_mode;

	res32->lvb_offset = res->lvb_offset;
	res32->length = res->length;

	res32->lksb.sb_status = res->lksb.sb_status;
	res32->lksb.sb_flags = res->lksb.sb_flags;
	res32->lksb.sb_lkid = res->lksb.sb_lkid;
	res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
}
#endif

/* Figure out if this lock is at the end of its life and no longer
   available for the application to use.  The lkb still exists until
   the final ast is read.  A lock becomes EOL in three situations:
     1. a noqueue request fails with EAGAIN
     2. an unlock completes with EUNLOCK
     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
   An EOL lock needs to be removed from the process's list of locks.
   And we can't allow any new operation on an EOL lock.  This is
   not related to the lifetime of the lkb struct which is managed
   entirely by refcount. */

static int lkb_is_endoflife(int mode, int status)
{
	switch (status) {
	case -DLM_EUNLOCK:
		return 1;
	case -DLM_ECANCEL:
	case -ETIMEDOUT:
	case -EDEADLK:
	case -EAGAIN:
		if (mode == DLM_LOCK_IV)
			return 1;
		break;
	}
	return 0;
}

/* we could possibly check if the cancel of an orphan has resulted in the lkb
   being removed and then remove that lkb from the orphans list and free it */

void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
		      int status, uint32_t sbflags)
{
	struct dlm_ls *ls;
	struct dlm_user_args *ua;
	struct dlm_user_proc *proc;
	struct dlm_callback *cb;
	int rv, copy_lvb;

	if (test_bit(DLM_DFL_ORPHAN_BIT, &lkb->lkb_dflags) ||
	    test_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags))
		return;

	ls = lkb->lkb_resource->res_ls;
	spin_lock_bh(&ls->ls_clear_proc_locks);

	/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
	   can't be delivered.  For ORPHAN's, dlm_clear_proc_locks() freed
	   lkb->ua so we can't try to use it.  This second check is necessary
	   for cases where a completion ast is received for an operation that
	   began before clear_proc_locks did its cancel/unlock. */

	if (test_bit(DLM_DFL_ORPHAN_BIT, &lkb->lkb_dflags) ||
	    test_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags))
		goto out;

	DLM_ASSERT(lkb->lkb_ua, dlm_print_lkb(lkb););
	ua = lkb->lkb_ua;
	proc = ua->proc;

	if ((flags & DLM_CB_BAST) && ua->bastaddr == NULL)
		goto out;

	if ((flags & DLM_CB_CAST) && lkb_is_endoflife(mode, status))
		set_bit(DLM_IFL_ENDOFLIFE_BIT, &lkb->lkb_iflags);

	spin_lock_bh(&proc->asts_spin);

	if (!dlm_may_skip_callback(lkb, flags, mode, status, sbflags,
				   &copy_lvb)) {
		rv = dlm_get_cb(lkb, flags, mode, status, sbflags, &cb);
		if (!rv) {
			cb->copy_lvb = copy_lvb;
			cb->ua = *ua;
			cb->lkb_lksb = &cb->ua.lksb;
			if (copy_lvb) {
				memcpy(cb->lvbptr, ua->lksb.sb_lvbptr,
				       DLM_USER_LVB_LEN);
				cb->lkb_lksb->sb_lvbptr = cb->lvbptr;
			}

			list_add_tail(&cb->list, &proc->asts);
			wake_up_interruptible(&proc->wait);
		}
	}
	spin_unlock_bh(&proc->asts_spin);

	if (test_bit(DLM_IFL_ENDOFLIFE_BIT, &lkb->lkb_iflags)) {
		/* N.B. spin_lock locks_spin, not asts_spin */
		spin_lock_bh(&proc->locks_spin);
		if (!list_empty(&lkb->lkb_ownqueue)) {
			list_del_init(&lkb->lkb_ownqueue);
			dlm_put_lkb(lkb);
		}
		spin_unlock_bh(&proc->locks_spin);
	}
 out:
	spin_unlock_bh(&ls->ls_clear_proc_locks);
}

static int device_user_lock(struct dlm_user_proc *proc,
			    struct dlm_lock_params *params)
{
	struct dlm_ls *ls;
	struct dlm_user_args *ua;
	uint32_t lkid;
	int error = -ENOMEM;

	ls = dlm_find_lockspace_local(proc->lockspace);
	if (!ls)
		return -ENOENT;

	if (!params->castaddr || !params->lksb) {
		error = -EINVAL;
		goto out;
	}

	ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
	if (!ua)
		goto out;
	ua->proc = proc;
	ua->user_lksb = params->lksb;
	ua->castparam = params->castparam;
	ua->castaddr = params->castaddr;
	ua->bastparam = params->bastparam;
	ua->bastaddr = params->bastaddr;
	ua->xid = params->xid;

	if (params->flags & DLM_LKF_CONVERT) {
		error = dlm_user_convert(ls, ua,
					 params->mode, params->flags,
					 params->lkid, params->lvb);
	} else if (params->flags & DLM_LKF_ORPHAN) {
		error = dlm_user_adopt_orphan(ls, ua,
					 params->mode, params->flags,
					 params->name, params->namelen,
					 &lkid);
		if (!error)
			error = lkid;
	} else {
		error = dlm_user_request(ls, ua,
					 params->mode, params->flags,
					 params->name, params->namelen);
		if (!error)
			error = ua->lksb.sb_lkid;
	}
 out:
	dlm_put_lockspace(ls);
	return error;
}

static int device_user_unlock(struct dlm_user_proc *proc,
			      struct dlm_lock_params *params)
{
	struct dlm_ls *ls;
	struct dlm_user_args *ua;
	int error = -ENOMEM;

	ls = dlm_find_lockspace_local(proc->lockspace);
	if (!ls)
		return -ENOENT;

	ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
	if (!ua)
		goto out;
	ua->proc = proc;
	ua->user_lksb = params->lksb;
	ua->castparam = params->castparam;
	ua->castaddr = params->castaddr;

	if (params->flags & DLM_LKF_CANCEL)
		error = dlm_user_cancel(ls, ua, params->flags, params->lkid);
	else
		error = dlm_user_unlock(ls, ua, params->flags, params->lkid,
					params->lvb);
 out:
	dlm_put_lockspace(ls);
	return error;
}

static int device_user_deadlock(struct dlm_user_proc *proc,
				struct dlm_lock_params *params)
{
	struct dlm_ls *ls;
	int error;

	ls = dlm_find_lockspace_local(proc->lockspace);
	if (!ls)
		return -ENOENT;

	error = dlm_user_deadlock(ls, params->flags, params->lkid);

	dlm_put_lockspace(ls);
	return error;
}

static int dlm_device_register(struct dlm_ls *ls, char *name)
{
	int error, len;

	/* The device is already registered.  This happens when the
	   lockspace is created multiple times from userspace. */
	if (ls->ls_device.name)
		return 0;

	error = -ENOMEM;
	len = strlen(name) + strlen(name_prefix) + 2;
	ls->ls_device.name = kzalloc(len, GFP_NOFS);
	if (!ls->ls_device.name)
		goto fail;

	snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
		 name);
	ls->ls_device.fops = &device_fops;
	ls->ls_device.minor = MISC_DYNAMIC_MINOR;

	error = misc_register(&ls->ls_device);
	if (error) {
		kfree(ls->ls_device.name);
		/* this has to be set to NULL
		 * to avoid a double-free in dlm_device_deregister
		 */
		ls->ls_device.name = NULL;
	}
fail:
	return error;
}

int dlm_device_deregister(struct dlm_ls *ls)
{
	/* The device is not registered.  This happens when the lockspace
	   was never used from userspace, or when device_create_lockspace()
	   calls dlm_release_lockspace() after the register fails. */
	if (!ls->ls_device.name)
		return 0;

	misc_deregister(&ls->ls_device);
	kfree(ls->ls_device.name);
	return 0;
}

static int device_user_purge(struct dlm_user_proc *proc,
			     struct dlm_purge_params *params)
{
	struct dlm_ls *ls;
	int error;

	ls = dlm_find_lockspace_local(proc->lockspace);
	if (!ls)
		return -ENOENT;

	error = dlm_user_purge(ls, proc, params->nodeid, params->pid);

	dlm_put_lockspace(ls);
	return error;
}

static int device_create_lockspace(struct dlm_lspace_params *params)
{
	dlm_lockspace_t *lockspace;
	struct dlm_ls *ls;
	int error;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	error = dlm_new_user_lockspace(params->name, dlm_config.ci_cluster_name,
				       params->flags, DLM_USER_LVB_LEN, NULL,
				       NULL, NULL, &lockspace);
	if (error)
		return error;

	ls = dlm_find_lockspace_local(lockspace);
	if (!ls)
		return -ENOENT;

	error = dlm_device_register(ls, params->name);
	dlm_put_lockspace(ls);

	if (error)
		dlm_release_lockspace(lockspace, 0);
	else
		error = ls->ls_device.minor;

	return error;
}

static int device_remove_lockspace(struct dlm_lspace_params *params)
{
	dlm_lockspace_t *lockspace;
	struct dlm_ls *ls;
	int error, force = 0;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	ls = dlm_find_lockspace_device(params->minor);
	if (!ls)
		return -ENOENT;

	if (params->flags & DLM_USER_LSFLG_FORCEFREE)
		force = 2;

	lockspace = ls;
	dlm_put_lockspace(ls);

	/* The final dlm_release_lockspace waits for references to go to
	   zero, so all processes will need to close their device for the
	   ls before the release will proceed.  release also calls the
	   device_deregister above.  Converting a positive return value
	   from release to zero means that userspace won't know when its
	   release was the final one, but it shouldn't need to know. */

	error = dlm_release_lockspace(lockspace, force);
	if (error > 0)
		error = 0;
	return error;
}

/* Check the user's version matches ours */
static int check_version(struct dlm_write_request *req)
{
	if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
	    (req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
	     req->version[1] > DLM_DEVICE_VERSION_MINOR)) {

		printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
		       "user (%d.%d.%d) kernel (%d.%d.%d)\n",
		       current->comm,
		       task_pid_nr(current),
		       req->version[0],
		       req->version[1],
		       req->version[2],
		       DLM_DEVICE_VERSION_MAJOR,
		       DLM_DEVICE_VERSION_MINOR,
		       DLM_DEVICE_VERSION_PATCH);
		return -EINVAL;
	}
	return 0;
}

/*
 * device_write
 *
 *   device_user_lock
 *     dlm_user_request -> request_lock
 *     dlm_user_convert -> convert_lock
 *
 *   device_user_unlock
 *     dlm_user_unlock -> unlock_lock
 *     dlm_user_cancel -> cancel_lock
 *
 *   device_create_lockspace
 *     dlm_new_lockspace
 *
 *   device_remove_lockspace
 *     dlm_release_lockspace
 */

/* a write to a lockspace device is a lock or unlock request, a write
   to the control device is to create/remove a lockspace */

static ssize_t device_write(struct file *file, const char __user *buf,
			    size_t count, loff_t *ppos)
{
	struct dlm_user_proc *proc = file->private_data;
	struct dlm_write_request *kbuf;
	int error;

#ifdef CONFIG_COMPAT
	if (count < sizeof(struct dlm_write_request32))
#else
	if (count < sizeof(struct dlm_write_request))
#endif
		return -EINVAL;

	/*
	 * can't compare against COMPAT/dlm_write_request32 because
	 * we don't yet know if is64bit is zero
	 */
	if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
		return -EINVAL;

	kbuf = memdup_user_nul(buf, count);
	if (IS_ERR(kbuf))
		return PTR_ERR(kbuf);

	if (check_version(kbuf)) {
		error = -EBADE;
		goto out_free;
	}

#ifdef CONFIG_COMPAT
	if (!kbuf->is64bit) {
		struct dlm_write_request32 *k32buf;
		int namelen = 0;

		if (count > sizeof(struct dlm_write_request32))
			namelen = count - sizeof(struct dlm_write_request32);

		k32buf = (struct dlm_write_request32 *)kbuf;

		/* add 1 after namelen so that the name string is terminated */
		kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
			       GFP_NOFS);
		if (!kbuf) {
			kfree(k32buf);
			return -ENOMEM;
		}

		if (proc)
			set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);

		compat_input(kbuf, k32buf, namelen);
		kfree(k32buf);
	}
#endif

	/* do we really need this? can a write happen after a close? */
	if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
	    (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
		error = -EINVAL;
		goto out_free;
	}

	error = -EINVAL;

	switch (kbuf->cmd)
	{
	case DLM_USER_LOCK:
		if (!proc) {
			log_print("no locking on control device");
			goto out_free;
		}
		error = device_user_lock(proc, &kbuf->i.lock);
		break;

	case DLM_USER_UNLOCK:
		if (!proc) {
			log_print("no locking on control device");
			goto out_free;
		}
		error = device_user_unlock(proc, &kbuf->i.lock);
		break;

	case DLM_USER_DEADLOCK:
		if (!proc) {
			log_print("no locking on control device");
			goto out_free;
		}
		error = device_user_deadlock(proc, &kbuf->i.lock);
		break;

	case DLM_USER_CREATE_LOCKSPACE:
		if (proc) {
			log_print("create/remove only on control device");
			goto out_free;
		}
		error = device_create_lockspace(&kbuf->i.lspace);
		break;

	case DLM_USER_REMOVE_LOCKSPACE:
		if (proc) {
			log_print("create/remove only on control device");
			goto out_free;
		}
		error = device_remove_lockspace(&kbuf->i.lspace);
		break;

	case DLM_USER_PURGE:
		if (!proc) {
			log_print("no locking on control device");
			goto out_free;
		}
		error = device_user_purge(proc, &kbuf->i.purge);
		break;

	default:
		log_print("Unknown command passed to DLM device : %d\n",
			  kbuf->cmd);
	}

 out_free:
	kfree(kbuf);
	return error;
}

/* Every process that opens the lockspace device has its own "proc" structure
   hanging off the open file that's used to keep track of locks owned by the
   process and asts that need to be delivered to the process. */

static int device_open(struct inode *inode, struct file *file)
{
	struct dlm_user_proc *proc;
	struct dlm_ls *ls;

	ls = dlm_find_lockspace_device(iminor(inode));
	if (!ls)
		return -ENOENT;

	proc = kzalloc(sizeof(struct dlm_user_proc), GFP_NOFS);
	if (!proc) {
		dlm_put_lockspace(ls);
		return -ENOMEM;
	}

	proc->lockspace = ls;
	INIT_LIST_HEAD(&proc->asts);
	INIT_LIST_HEAD(&proc->locks);
	INIT_LIST_HEAD(&proc->unlocking);
	spin_lock_init(&proc->asts_spin);
	spin_lock_init(&proc->locks_spin);
	init_waitqueue_head(&proc->wait);
	file->private_data = proc;

	return 0;
}

static int device_close(struct inode *inode, struct file *file)
{
	struct dlm_user_proc *proc = file->private_data;
	struct dlm_ls *ls;

	ls = dlm_find_lockspace_local(proc->lockspace);
	if (!ls)
		return -ENOENT;

	set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags);

	dlm_clear_proc_locks(ls, proc);

	/* at this point no more lkb's should exist for this lockspace,
	   so there's no chance of dlm_user_add_ast() being called and
	   looking for lkb->ua->proc */

	kfree(proc);
	file->private_data = NULL;

	dlm_put_lockspace(ls);
	dlm_put_lockspace(ls);  /* for the find in device_open() */

	/* FIXME: AUTOFREE: if this ls is no longer used do
	   device_remove_lockspace() */

	return 0;
}

static int copy_result_to_user(struct dlm_user_args *ua, int compat,
			       uint32_t flags, int mode, int copy_lvb,
			       char __user *buf, size_t count)
{
#ifdef CONFIG_COMPAT
	struct dlm_lock_result32 result32;
#endif
	struct dlm_lock_result result;
	void *resultptr;
	int error=0;
	int len;
	int struct_len;

	memset(&result, 0, sizeof(struct dlm_lock_result));
	result.version[0] = DLM_DEVICE_VERSION_MAJOR;
	result.version[1] = DLM_DEVICE_VERSION_MINOR;
	result.version[2] = DLM_DEVICE_VERSION_PATCH;
	memcpy(&result.lksb, &ua->lksb, offsetof(struct dlm_lksb, sb_lvbptr));
	result.user_lksb = ua->user_lksb;

	/* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
	   in a conversion unless the conversion is successful.  See code
	   in dlm_user_convert() for updating ua from ua_tmp.  OpenVMS, though,
	   notes that a new blocking AST address and parameter are set even if
	   the conversion fails, so maybe we should just do that. */

	if (flags & DLM_CB_BAST) {
		result.user_astaddr = ua->bastaddr;
		result.user_astparam = ua->bastparam;
		result.bast_mode = mode;
	} else {
		result.user_astaddr = ua->castaddr;
		result.user_astparam = ua->castparam;
	}

#ifdef CONFIG_COMPAT
	if (compat)
		len = sizeof(struct dlm_lock_result32);
	else
#endif
		len = sizeof(struct dlm_lock_result);
	struct_len = len;

	/* copy lvb to userspace if there is one, it's been updated, and
	   the user buffer has space for it */

	if (copy_lvb && ua->lksb.sb_lvbptr && count >= len + DLM_USER_LVB_LEN) {
		if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
				 DLM_USER_LVB_LEN)) {
			error = -EFAULT;
			goto out;
		}

		result.lvb_offset = len;
		len += DLM_USER_LVB_LEN;
	}

	result.length = len;
	resultptr = &result;
#ifdef CONFIG_COMPAT
	if (compat) {
		compat_output(&result, &result32);
		resultptr = &result32;
	}
#endif

	if (copy_to_user(buf, resultptr, struct_len))
		error = -EFAULT;
	else
		error = len;
 out:
	return error;
}

static int copy_version_to_user(char __user *buf, size_t count)
{
	struct dlm_device_version ver;

	memset(&ver, 0, sizeof(struct dlm_device_version));
	ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
	ver.version[1] = DLM_DEVICE_VERSION_MINOR;
	ver.version[2] = DLM_DEVICE_VERSION_PATCH;

	if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
		return -EFAULT;
	return sizeof(struct dlm_device_version);
}

/* a read returns a single ast described in a struct dlm_lock_result */

static ssize_t device_read(struct file *file, char __user *buf, size_t count,
			   loff_t *ppos)
{
	struct dlm_user_proc *proc = file->private_data;
	DECLARE_WAITQUEUE(wait, current);
	struct dlm_callback *cb;
	int rv, ret;

	if (count == sizeof(struct dlm_device_version)) {
		rv = copy_version_to_user(buf, count);
		return rv;
	}

	if (!proc) {
		log_print("non-version read from control device %zu", count);
		return -EINVAL;
	}

#ifdef CONFIG_COMPAT
	if (count < sizeof(struct dlm_lock_result32))
#else
	if (count < sizeof(struct dlm_lock_result))
#endif
		return -EINVAL;

	/* do we really need this? can a read happen after a close? */
	if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
		return -EINVAL;

	spin_lock_bh(&proc->asts_spin);
	if (list_empty(&proc->asts)) {
		if (file->f_flags & O_NONBLOCK) {
			spin_unlock_bh(&proc->asts_spin);
			return -EAGAIN;
		}

		add_wait_queue(&proc->wait, &wait);

	repeat:
		set_current_state(TASK_INTERRUPTIBLE);
		if (list_empty(&proc->asts) && !signal_pending(current)) {
			spin_unlock_bh(&proc->asts_spin);
			schedule();
			spin_lock_bh(&proc->asts_spin);
			goto repeat;
		}
		set_current_state(TASK_RUNNING);
		remove_wait_queue(&proc->wait, &wait);

		if (signal_pending(current)) {
			spin_unlock_bh(&proc->asts_spin);
			return -ERESTARTSYS;
		}
	}

	/* if we empty lkb_callbacks, we don't want to unlock the spinlock
	   without removing lkb_cb_list; so empty lkb_cb_list is always
	   consistent with empty lkb_callbacks */

	cb = list_first_entry(&proc->asts, struct dlm_callback, list);
	list_del(&cb->list);
	spin_unlock_bh(&proc->asts_spin);

	if (cb->flags & DLM_CB_BAST) {
		trace_dlm_bast(cb->ls_id, cb->lkb_id, cb->mode, cb->res_name,
			       cb->res_length);
	} else if (cb->flags & DLM_CB_CAST) {
		cb->lkb_lksb->sb_status = cb->sb_status;
		cb->lkb_lksb->sb_flags = cb->sb_flags;
		trace_dlm_ast(cb->ls_id, cb->lkb_id, cb->sb_status,
			      cb->sb_flags, cb->res_name, cb->res_length);
	}

	ret = copy_result_to_user(&cb->ua,
				  test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
				  cb->flags, cb->mode, cb->copy_lvb, buf, count);
	dlm_free_cb(cb);
	return ret;
}

static __poll_t device_poll(struct file *file, poll_table *wait)
{
	struct dlm_user_proc *proc = file->private_data;

	poll_wait(file, &proc->wait, wait);

	spin_lock_bh(&proc->asts_spin);
	if (!list_empty(&proc->asts)) {
		spin_unlock_bh(&proc->asts_spin);
		return EPOLLIN | EPOLLRDNORM;
	}
	spin_unlock_bh(&proc->asts_spin);
	return 0;
}

int dlm_user_daemon_available(void)
{
	/* dlm_controld hasn't started (or, has started, but not
	   properly populated configfs) */

	if (!dlm_our_nodeid())
		return 0;

	/* This is to deal with versions of dlm_controld that don't
	   know about the monitor device.  We assume that if the
	   dlm_controld was started (above), but the monitor device
	   was never opened, that it's an old version.  dlm_controld
	   should open the monitor device before populating configfs. */

	if (dlm_monitor_unused)
		return 1;

	return atomic_read(&dlm_monitor_opened) ? 1 : 0;
}

static int ctl_device_open(struct inode *inode, struct file *file)
{
	file->private_data = NULL;
	return 0;
}

static int ctl_device_close(struct inode *inode, struct file *file)
{
	return 0;
}

static int monitor_device_open(struct inode *inode, struct file *file)
{
	atomic_inc(&dlm_monitor_opened);
	dlm_monitor_unused = 0;
	return 0;
}

static int monitor_device_close(struct inode *inode, struct file *file)
{
	if (atomic_dec_and_test(&dlm_monitor_opened))
		dlm_stop_lockspaces();
	return 0;
}

static const struct file_operations device_fops = {
	.open    = device_open,
	.release = device_close,
	.read    = device_read,
	.write   = device_write,
	.poll    = device_poll,
	.owner   = THIS_MODULE,
	.llseek  = noop_llseek,
};

static const struct file_operations ctl_device_fops = {
	.open    = ctl_device_open,
	.release = ctl_device_close,
	.read    = device_read,
	.write   = device_write,
	.owner   = THIS_MODULE,
	.llseek  = noop_llseek,
};

static struct miscdevice ctl_device = {
	.name  = "dlm-control",
	.fops  = &ctl_device_fops,
	.minor = MISC_DYNAMIC_MINOR,
};

static const struct file_operations monitor_device_fops = {
	.open    = monitor_device_open,
	.release = monitor_device_close,
	.owner   = THIS_MODULE,
	.llseek  = noop_llseek,
};

static struct miscdevice monitor_device = {
	.name  = "dlm-monitor",
	.fops  = &monitor_device_fops,
	.minor = MISC_DYNAMIC_MINOR,
};

int __init dlm_user_init(void)
{
	int error;

	atomic_set(&dlm_monitor_opened, 0);

	error = misc_register(&ctl_device);
	if (error) {
		log_print("misc_register failed for control device");
		goto out;
	}

	error = misc_register(&monitor_device);
	if (error) {
		log_print("misc_register failed for monitor device");
		misc_deregister(&ctl_device);
	}
 out:
	return error;
}

void dlm_user_exit(void)
{
	misc_deregister(&ctl_device);
	misc_deregister(&monitor_device);
}