[DLM] Fix potential conflict in DLM userland locks
[pandora-kernel.git] / fs / dlm / device.c
1 /******************************************************************************
2 *******************************************************************************
3 **
4 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5 **  Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
6 **
7 **  This copyrighted material is made available to anyone wishing to use,
8 **  modify, copy, or redistribute it subject to the terms and conditions
9 **  of the GNU General Public License v.2.
10 **
11 *******************************************************************************
12 ******************************************************************************/
13
14 /*
15  * device.c
16  *
17  * This is the userland interface to the DLM.
18  *
19  * The locking is done via a misc char device (find the
20  * registered minor number in /proc/misc).
21  *
22  * User code should not use this interface directly but
23  * call the library routines in libdlm.a instead.
24  *
25  */
26
27 #include <linux/miscdevice.h>
28 #include <linux/init.h>
29 #include <linux/wait.h>
30 #include <linux/module.h>
31 #include <linux/file.h>
32 #include <linux/fs.h>
33 #include <linux/poll.h>
34 #include <linux/signal.h>
35 #include <linux/spinlock.h>
36 #include <linux/idr.h>
37
38 #include <linux/dlm.h>
39 #include <linux/dlm_device.h>
40
41 #include "lvb_table.h"
42
43 static struct file_operations _dlm_fops;
44 static const char *name_prefix="dlm";
45 static struct list_head user_ls_list;
46 static struct mutex user_ls_lock;
47
48 /* Flags in li_flags */
49 #define LI_FLAG_COMPLETE   1
50 #define LI_FLAG_FIRSTLOCK  2
51 #define LI_FLAG_PERSISTENT 3
52 #define LI_FLAG_ONLIST     4
53
54 /* flags in ls_flags*/
55 #define LS_FLAG_DELETED   1
56 #define LS_FLAG_AUTOFREE  2
57
58 /* flags in ls_flags*/
59 #define FI_FLAG_OPEN      1
60 #define FI_FLAG_COMPAT    2
61
62 #define LOCKINFO_MAGIC 0x53595324
63
64 struct lock_info {
65         uint32_t li_magic;
66         uint8_t li_cmd;
67         int8_t  li_grmode;
68         int8_t  li_rqmode;
69         struct dlm_lksb li_lksb;
70         wait_queue_head_t li_waitq;
71         unsigned long li_flags;
72         void __user *li_castparam;
73         void __user *li_castaddr;
74         void __user *li_bastparam;
75         void __user *li_bastaddr;
76         void __user *li_pend_bastparam;
77         void __user *li_pend_bastaddr;
78         struct list_head li_ownerqueue;
79         struct file_info *li_file;
80         struct dlm_lksb __user *li_user_lksb;
81         struct completion li_firstcomp;
82 };
83
84 /* A queued AST no less */
85 struct ast_info {
86         struct dlm_lock_result result;
87         struct list_head list;
88         uint32_t lvb_updated;
89         uint32_t progress;      /* How much has been read */
90 };
91
92 /* One of these per userland lockspace */
93 struct user_ls {
94         void    *ls_lockspace;
95         atomic_t ls_refcnt;
96         long     ls_flags;
97
98         /* Lock infos are stored in here indexed by lock ID */
99         struct idr lockinfo_idr;
100         rwlock_t lockinfo_lock;
101
102         /* Passed into misc_register() */
103         struct miscdevice ls_miscinfo;
104         struct list_head  ls_list;
105 };
106
107 /* misc_device info for the control device */
108 static struct miscdevice ctl_device;
109
110 /*
111  * Stuff we hang off the file struct.
112  * The first two are to cope with unlocking all the
113  * locks help by a process when it dies.
114  */
115 struct file_info {
116         struct list_head    fi_li_list;  /* List of active lock_infos */
117         spinlock_t          fi_li_lock;
118         struct list_head    fi_ast_list; /* Queue of ASTs to be delivered */
119         spinlock_t          fi_ast_lock;
120         wait_queue_head_t   fi_wait;
121         struct user_ls     *fi_ls;
122         atomic_t            fi_refcnt;   /* Number of users */
123         unsigned long       fi_flags;
124 };
125
126 #ifdef CONFIG_COMPAT
127
128 struct dlm_lock_params32 {
129         __u8 mode;
130         __u8 namelen;
131         __u16 flags;
132         __u32 lkid;
133         __u32 parent;
134
135         __u32 castparam;
136         __u32 castaddr;
137         __u32 bastparam;
138         __u32 bastaddr;
139         __u32 lksb;
140
141         char lvb[DLM_USER_LVB_LEN];
142         char name[0];
143 };
144
145 struct dlm_write_request32 {
146         __u32 version[3];
147         __u8 cmd;
148         __u8 is64bit;
149         __u8 unused[2];
150
151         union  {
152                 struct dlm_lock_params32 lock;
153                 struct dlm_lspace_params lspace;
154         } i;
155 };
156
157 struct dlm_lksb32 {
158         __u32    sb_status;
159         __u32    sb_lkid;
160         __u8     sb_flags;
161         __u32    sb_lvbptr;
162 };
163
164 struct dlm_lock_result32 {
165         __u32 length;
166         __u32 user_astaddr;
167         __u32 user_astparam;
168         __u32 user_lksb;
169         struct dlm_lksb32 lksb;
170         __u8 bast_mode;
171         __u8 unused[3];
172         /* Offsets may be zero if no data is present */
173         __u32 lvb_offset;
174 };
175
176
177 static void compat_input(struct dlm_write_request *kparams, struct dlm_write_request32 *k32params)
178 {
179
180         kparams->version[0] = k32params->version[0];
181         kparams->version[1] = k32params->version[1];
182         kparams->version[2] = k32params->version[2];
183
184         kparams->cmd = k32params->cmd;
185         kparams->is64bit = k32params->is64bit;
186         if (kparams->cmd == DLM_USER_CREATE_LOCKSPACE ||
187             kparams->cmd == DLM_USER_REMOVE_LOCKSPACE) {
188
189                 kparams->i.lspace.flags = k32params->i.lspace.flags;
190                 kparams->i.lspace.minor = k32params->i.lspace.minor;
191                 strcpy(kparams->i.lspace.name, k32params->i.lspace.name);
192         }
193         else {
194                 kparams->i.lock.mode = k32params->i.lock.mode;
195                 kparams->i.lock.namelen = k32params->i.lock.namelen;
196                 kparams->i.lock.flags = k32params->i.lock.flags;
197                 kparams->i.lock.lkid = k32params->i.lock.lkid;
198                 kparams->i.lock.parent = k32params->i.lock.parent;
199                 kparams->i.lock.castparam = (void *)(long)k32params->i.lock.castparam;
200                 kparams->i.lock.castaddr = (void *)(long)k32params->i.lock.castaddr;
201                 kparams->i.lock.bastparam = (void *)(long)k32params->i.lock.bastparam;
202                 kparams->i.lock.bastaddr = (void *)(long)k32params->i.lock.bastaddr;
203                 kparams->i.lock.lksb = (void *)(long)k32params->i.lock.lksb;
204                 memcpy(kparams->i.lock.lvb, k32params->i.lock.lvb, DLM_USER_LVB_LEN);
205                 memcpy(kparams->i.lock.name, k32params->i.lock.name, kparams->i.lock.namelen);
206         }
207 }
208
209 void compat_output(struct dlm_lock_result *res, struct dlm_lock_result32 *res32)
210 {
211         res32->length = res->length - (sizeof(struct dlm_lock_result) - sizeof(struct dlm_lock_result32));
212         res32->user_astaddr = (__u32)(long)res->user_astaddr;
213         res32->user_astparam = (__u32)(long)res->user_astparam;
214         res32->user_lksb = (__u32)(long)res->user_lksb;
215         res32->bast_mode = res->bast_mode;
216
217         res32->lvb_offset = res->lvb_offset;
218         res32->length = res->length;
219
220         res32->lksb.sb_status = res->lksb.sb_status;
221         res32->lksb.sb_flags = res->lksb.sb_flags;
222         res32->lksb.sb_lkid = res->lksb.sb_lkid;
223         res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
224 }
225 #endif
226
227
228 /* get and put ops for file_info.
229    Actually I don't really like "get" and "put", but everyone
230    else seems to use them and I can't think of anything
231    nicer at the moment */
232 static void get_file_info(struct file_info *f)
233 {
234         atomic_inc(&f->fi_refcnt);
235 }
236
237 static void put_file_info(struct file_info *f)
238 {
239         if (atomic_dec_and_test(&f->fi_refcnt))
240                 kfree(f);
241 }
242
243 static void release_lockinfo(struct user_ls *ls, struct lock_info *li)
244 {
245         put_file_info(li->li_file);
246
247         write_lock(&ls->lockinfo_lock);
248         idr_remove(&ls->lockinfo_idr, li->li_lksb.sb_lkid);
249         write_unlock(&ls->lockinfo_lock);
250
251         if (li->li_lksb.sb_lvbptr)
252                 kfree(li->li_lksb.sb_lvbptr);
253         kfree(li);
254
255         module_put(THIS_MODULE);
256 }
257
258 static struct lock_info *get_lockinfo(struct user_ls *ls, uint32_t lockid)
259 {
260         struct lock_info *li;
261
262         read_lock(&ls->lockinfo_lock);
263         li = idr_find(&ls->lockinfo_idr, lockid);
264         read_unlock(&ls->lockinfo_lock);
265
266         return li;
267 }
268
269 static int add_lockinfo(struct user_ls *ls, struct lock_info *li)
270 {
271         int n;
272         int r;
273         int ret = -EINVAL;
274
275         write_lock(&ls->lockinfo_lock);
276
277         if (idr_find(&ls->lockinfo_idr, li->li_lksb.sb_lkid))
278                 goto out_up;
279
280         ret = -ENOMEM;
281         r = idr_pre_get(&ls->lockinfo_idr, GFP_KERNEL);
282         if (!r)
283                 goto out_up;
284
285         r = idr_get_new_above(&ls->lockinfo_idr, li, li->li_lksb.sb_lkid, &n);
286         if (r)
287                 goto out_up;
288
289         if (n != li->li_lksb.sb_lkid) {
290                 idr_remove(&ls->lockinfo_idr, n);
291                 goto out_up;
292         }
293
294         ret = 0;
295
296  out_up:
297         write_unlock(&ls->lockinfo_lock);
298
299         return ret;
300 }
301
302
303 static struct user_ls *__find_lockspace(int minor)
304 {
305         struct user_ls *lsinfo;
306
307         list_for_each_entry(lsinfo, &user_ls_list, ls_list) {
308                 if (lsinfo->ls_miscinfo.minor == minor)
309                         return lsinfo;
310         }
311         return NULL;
312 }
313
314 /* Find a lockspace struct given the device minor number */
315 static struct user_ls *find_lockspace(int minor)
316 {
317         struct user_ls *lsinfo;
318
319         mutex_lock(&user_ls_lock);
320         lsinfo = __find_lockspace(minor);
321         mutex_unlock(&user_ls_lock);
322
323         return lsinfo;
324 }
325
326 static void add_lockspace_to_list(struct user_ls *lsinfo)
327 {
328         mutex_lock(&user_ls_lock);
329         list_add(&lsinfo->ls_list, &user_ls_list);
330         mutex_unlock(&user_ls_lock);
331 }
332
333 /* Register a lockspace with the DLM and create a misc
334    device for userland to access it */
335 static int register_lockspace(char *name, struct user_ls **ls, int flags)
336 {
337         struct user_ls *newls;
338         int status;
339         int namelen;
340
341         namelen = strlen(name)+strlen(name_prefix)+2;
342
343         newls = kzalloc(sizeof(struct user_ls), GFP_KERNEL);
344         if (!newls)
345                 return -ENOMEM;
346
347         newls->ls_miscinfo.name = kzalloc(namelen, GFP_KERNEL);
348         if (!newls->ls_miscinfo.name) {
349                 kfree(newls);
350                 return -ENOMEM;
351         }
352
353         status = dlm_new_lockspace(name, strlen(name), &newls->ls_lockspace, 0,
354                                    DLM_USER_LVB_LEN);
355         if (status != 0) {
356                 kfree(newls->ls_miscinfo.name);
357                 kfree(newls);
358                 return status;
359         }
360
361         idr_init(&newls->lockinfo_idr);
362         rwlock_init(&newls->lockinfo_lock);
363
364         snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s",
365                  name_prefix, name);
366
367         newls->ls_miscinfo.fops = &_dlm_fops;
368         newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR;
369
370         status = misc_register(&newls->ls_miscinfo);
371         if (status) {
372                 printk(KERN_ERR "dlm: misc register failed for %s\n", name);
373                 dlm_release_lockspace(newls->ls_lockspace, 0);
374                 kfree(newls->ls_miscinfo.name);
375                 kfree(newls);
376                 return status;
377         }
378
379         if (flags & DLM_USER_LSFLG_AUTOFREE)
380                 set_bit(LS_FLAG_AUTOFREE, &newls->ls_flags);
381
382         add_lockspace_to_list(newls);
383         *ls = newls;
384         return 0;
385 }
386
387 /* Called with the user_ls_lock mutex held */
388 static int unregister_lockspace(struct user_ls *lsinfo, int force)
389 {
390         int status;
391
392         status = dlm_release_lockspace(lsinfo->ls_lockspace, force);
393         if (status)
394                 return status;
395
396         status = misc_deregister(&lsinfo->ls_miscinfo);
397         if (status)
398                 return status;
399
400         list_del(&lsinfo->ls_list);
401         set_bit(LS_FLAG_DELETED, &lsinfo->ls_flags);
402         lsinfo->ls_lockspace = NULL;
403         if (atomic_read(&lsinfo->ls_refcnt) == 0) {
404                 kfree(lsinfo->ls_miscinfo.name);
405                 kfree(lsinfo);
406         }
407
408         return 0;
409 }
410
411 /* Add it to userland's AST queue */
412 static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam,
413                             int lvb_updated)
414 {
415         struct ast_info *ast = kzalloc(sizeof(struct ast_info), GFP_KERNEL);
416         if (!ast)
417                 return;
418
419         ast->result.user_astparam = astparam;
420         ast->result.user_astaddr  = astaddr;
421         ast->result.user_lksb     = li->li_user_lksb;
422         memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb));
423         ast->lvb_updated = lvb_updated;
424
425         spin_lock(&li->li_file->fi_ast_lock);
426         list_add_tail(&ast->list, &li->li_file->fi_ast_list);
427         spin_unlock(&li->li_file->fi_ast_lock);
428         wake_up_interruptible(&li->li_file->fi_wait);
429 }
430
431 static void bast_routine(void *param, int mode)
432 {
433         struct lock_info *li = param;
434
435         if (li && li->li_bastaddr)
436                 add_to_astqueue(li, li->li_bastaddr, li->li_bastparam, 0);
437 }
438
439 /*
440  * This is the kernel's AST routine.
441  * All lock, unlock & query operations complete here.
442  * The only syncronous ops are those done during device close.
443  */
444 static void ast_routine(void *param)
445 {
446         struct lock_info *li = param;
447
448         /* Param may be NULL if a persistent lock is unlocked by someone else */
449         if (!li)
450                 return;
451
452         /* If this is a succesful conversion then activate the blocking ast
453          * args from the conversion request */
454         if (!test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) &&
455             li->li_lksb.sb_status == 0) {
456
457                 li->li_bastparam = li->li_pend_bastparam;
458                 li->li_bastaddr = li->li_pend_bastaddr;
459                 li->li_pend_bastaddr = NULL;
460         }
461
462         /* If it's an async request then post data to the user's AST queue. */
463         if (li->li_castaddr) {
464                 int lvb_updated = 0;
465
466                 /* See if the lvb has been updated */
467                 if (dlm_lvb_operations[li->li_grmode+1][li->li_rqmode+1] == 1)
468                         lvb_updated = 1;
469
470                 if (li->li_lksb.sb_status == 0)
471                         li->li_grmode = li->li_rqmode;
472
473                 /* Only queue AST if the device is still open */
474                 if (test_bit(FI_FLAG_OPEN, &li->li_file->fi_flags))
475                         add_to_astqueue(li, li->li_castaddr, li->li_castparam,
476                                         lvb_updated);
477
478                 /* If it's a new lock operation that failed, then
479                  * remove it from the owner queue and free the
480                  * lock_info.
481                  */
482                 if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) &&
483                     li->li_lksb.sb_status != 0) {
484
485                         /* Wait till dlm_lock() has finished */
486                         wait_for_completion(&li->li_firstcomp);
487
488                         spin_lock(&li->li_file->fi_li_lock);
489                         list_del(&li->li_ownerqueue);
490                         clear_bit(LI_FLAG_ONLIST, &li->li_flags);
491                         spin_unlock(&li->li_file->fi_li_lock);
492                         release_lockinfo(li->li_file->fi_ls, li);
493                         return;
494                 }
495                 /* Free unlocks & queries */
496                 if (li->li_lksb.sb_status == -DLM_EUNLOCK ||
497                     li->li_cmd == DLM_USER_QUERY) {
498                         release_lockinfo(li->li_file->fi_ls, li);
499                 }
500         } else {
501                 /* Synchronous request, just wake up the caller */
502                 set_bit(LI_FLAG_COMPLETE, &li->li_flags);
503                 wake_up_interruptible(&li->li_waitq);
504         }
505 }
506
507 /*
508  * Wait for the lock op to complete and return the status.
509  */
510 static int wait_for_ast(struct lock_info *li)
511 {
512         /* Wait for the AST routine to complete */
513         set_task_state(current, TASK_INTERRUPTIBLE);
514         while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags))
515                 schedule();
516
517         set_task_state(current, TASK_RUNNING);
518
519         return li->li_lksb.sb_status;
520 }
521
522
523 /* Open on control device */
524 static int dlm_ctl_open(struct inode *inode, struct file *file)
525 {
526         file->private_data = NULL;
527         return 0;
528 }
529
530 /* Close on control device */
531 static int dlm_ctl_close(struct inode *inode, struct file *file)
532 {
533         return 0;
534 }
535
536 /* Open on lockspace device */
537 static int dlm_open(struct inode *inode, struct file *file)
538 {
539         struct file_info *f;
540         struct user_ls *lsinfo;
541
542         lsinfo = find_lockspace(iminor(inode));
543         if (!lsinfo)
544                 return -ENOENT;
545
546         f = kzalloc(sizeof(struct file_info), GFP_KERNEL);
547         if (!f)
548                 return -ENOMEM;
549
550         atomic_inc(&lsinfo->ls_refcnt);
551         INIT_LIST_HEAD(&f->fi_li_list);
552         INIT_LIST_HEAD(&f->fi_ast_list);
553         spin_lock_init(&f->fi_li_lock);
554         spin_lock_init(&f->fi_ast_lock);
555         init_waitqueue_head(&f->fi_wait);
556         f->fi_ls = lsinfo;
557         f->fi_flags = 0;
558         get_file_info(f);
559         set_bit(FI_FLAG_OPEN, &f->fi_flags);
560
561         file->private_data = f;
562
563         return 0;
564 }
565
566 /* Check the user's version matches ours */
567 static int check_version(struct dlm_write_request *req)
568 {
569         if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
570             (req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
571              req->version[1] > DLM_DEVICE_VERSION_MINOR)) {
572
573                 printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
574                        "user (%d.%d.%d) kernel (%d.%d.%d)\n",
575                        current->comm,
576                        current->pid,
577                        req->version[0],
578                        req->version[1],
579                        req->version[2],
580                        DLM_DEVICE_VERSION_MAJOR,
581                        DLM_DEVICE_VERSION_MINOR,
582                        DLM_DEVICE_VERSION_PATCH);
583                 return -EINVAL;
584         }
585         return 0;
586 }
587
588 /* Close on lockspace device */
589 static int dlm_close(struct inode *inode, struct file *file)
590 {
591         struct file_info *f = file->private_data;
592         struct lock_info li;
593         struct lock_info *old_li, *safe;
594         sigset_t tmpsig;
595         sigset_t allsigs;
596         struct user_ls *lsinfo;
597         DECLARE_WAITQUEUE(wq, current);
598
599         lsinfo = find_lockspace(iminor(inode));
600         if (!lsinfo)
601                 return -ENOENT;
602
603         /* Mark this closed so that ASTs will not be delivered any more */
604         clear_bit(FI_FLAG_OPEN, &f->fi_flags);
605
606         /* Block signals while we are doing this */
607         sigfillset(&allsigs);
608         sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
609
610         /* We use our own lock_info struct here, so that any
611          * outstanding "real" ASTs will be delivered with the
612          * corresponding "real" params, thus freeing the lock_info
613          * that belongs the lock. This catches the corner case where
614          * a lock is BUSY when we try to unlock it here
615          */
616         memset(&li, 0, sizeof(li));
617         clear_bit(LI_FLAG_COMPLETE, &li.li_flags);
618         init_waitqueue_head(&li.li_waitq);
619         add_wait_queue(&li.li_waitq, &wq);
620
621         /*
622          * Free any outstanding locks, they are on the
623          * list in LIFO order so there should be no problems
624          * about unlocking parents before children.
625          */
626         list_for_each_entry_safe(old_li, safe, &f->fi_li_list, li_ownerqueue) {
627                 int status;
628                 int flags = 0;
629
630                 /* Don't unlock persistent locks, just mark them orphaned */
631                 if (test_bit(LI_FLAG_PERSISTENT, &old_li->li_flags)) {
632                         list_del(&old_li->li_ownerqueue);
633
634                         /* Update master copy */
635                         /* TODO: Check locking core updates the local and
636                            remote ORPHAN flags */
637                         li.li_lksb.sb_lkid = old_li->li_lksb.sb_lkid;
638                         status = dlm_lock(f->fi_ls->ls_lockspace,
639                                           old_li->li_grmode, &li.li_lksb,
640                                           DLM_LKF_CONVERT|DLM_LKF_ORPHAN,
641                                           NULL, 0, 0, ast_routine, NULL, NULL);
642                         if (status != 0)
643                                 printk("dlm: Error orphaning lock %x: %d\n",
644                                        old_li->li_lksb.sb_lkid, status);
645
646                         /* But tidy our references in it */
647                         release_lockinfo(old_li->li_file->fi_ls, old_li);
648                         continue;
649                 }
650
651                 clear_bit(LI_FLAG_COMPLETE, &li.li_flags);
652
653                 flags = DLM_LKF_FORCEUNLOCK;
654                 if (old_li->li_grmode >= DLM_LOCK_PW)
655                         flags |= DLM_LKF_IVVALBLK;
656
657                 status = dlm_unlock(f->fi_ls->ls_lockspace,
658                                     old_li->li_lksb.sb_lkid, flags,
659                                     &li.li_lksb, &li);
660
661                 /* Must wait for it to complete as the next lock could be its
662                  * parent */
663                 if (status == 0)
664                         wait_for_ast(&li);
665
666                 /* Unlock suceeded, free the lock_info struct. */
667                 if (status == 0)
668                         release_lockinfo(old_li->li_file->fi_ls, old_li);
669         }
670
671         remove_wait_queue(&li.li_waitq, &wq);
672
673         /*
674          * If this is the last reference to the lockspace
675          * then free the struct. If it's an AUTOFREE lockspace
676          * then free the whole thing.
677          */
678         mutex_lock(&user_ls_lock);
679         if (atomic_dec_and_test(&lsinfo->ls_refcnt)) {
680
681                 if (lsinfo->ls_lockspace) {
682                         if (test_bit(LS_FLAG_AUTOFREE, &lsinfo->ls_flags)) {
683                                 unregister_lockspace(lsinfo, 1);
684                         }
685                 } else {
686                         kfree(lsinfo->ls_miscinfo.name);
687                         kfree(lsinfo);
688                 }
689         }
690         mutex_unlock(&user_ls_lock);
691         put_file_info(f);
692
693         /* Restore signals */
694         sigprocmask(SIG_SETMASK, &tmpsig, NULL);
695         recalc_sigpending();
696
697         return 0;
698 }
699
700 static int do_user_create_lockspace(struct file_info *fi, uint8_t cmd,
701                                     struct dlm_lspace_params *kparams)
702 {
703         int status;
704         struct user_ls *lsinfo;
705
706         if (!capable(CAP_SYS_ADMIN))
707                 return -EPERM;
708
709         status = register_lockspace(kparams->name, &lsinfo, kparams->flags);
710
711         /* If it succeeded then return the minor number */
712         if (status == 0)
713                 status = lsinfo->ls_miscinfo.minor;
714
715         return status;
716 }
717
718 static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd,
719                                     struct dlm_lspace_params *kparams)
720 {
721         int status;
722         int force = 1;
723         struct user_ls *lsinfo;
724
725         if (!capable(CAP_SYS_ADMIN))
726                 return -EPERM;
727
728         mutex_lock(&user_ls_lock);
729         lsinfo = __find_lockspace(kparams->minor);
730         if (!lsinfo) {
731                 mutex_unlock(&user_ls_lock);
732                 return -EINVAL;
733         }
734
735         if (kparams->flags & DLM_USER_LSFLG_FORCEFREE)
736                 force = 3;
737
738         status = unregister_lockspace(lsinfo, force);
739         mutex_unlock(&user_ls_lock);
740
741         return status;
742 }
743
744 /* Read call, might block if no ASTs are waiting.
745  * It will only ever return one message at a time, regardless
746  * of how many are pending.
747  */
748 static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count,
749                         loff_t *ppos)
750 {
751         struct file_info *fi = file->private_data;
752         struct ast_info *ast;
753         void *data;
754         int data_size;
755         int struct_size;
756         int offset;
757         DECLARE_WAITQUEUE(wait, current);
758 #ifdef CONFIG_COMPAT
759         struct dlm_lock_result32 result32;
760
761         if (count < sizeof(struct dlm_lock_result32))
762 #else
763         if (count < sizeof(struct dlm_lock_result))
764 #endif
765                 return -EINVAL;
766
767         spin_lock(&fi->fi_ast_lock);
768         if (list_empty(&fi->fi_ast_list)) {
769
770                 /* No waiting ASTs.
771                  * Return EOF if the lockspace been deleted.
772                  */
773                 if (test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags))
774                         return 0;
775
776                 if (file->f_flags & O_NONBLOCK) {
777                         spin_unlock(&fi->fi_ast_lock);
778                         return -EAGAIN;
779                 }
780
781                 add_wait_queue(&fi->fi_wait, &wait);
782
783         repeat:
784                 set_current_state(TASK_INTERRUPTIBLE);
785                 if (list_empty(&fi->fi_ast_list) &&
786                     !signal_pending(current)) {
787
788                         spin_unlock(&fi->fi_ast_lock);
789                         schedule();
790                         spin_lock(&fi->fi_ast_lock);
791                         goto repeat;
792                 }
793
794                 current->state = TASK_RUNNING;
795                 remove_wait_queue(&fi->fi_wait, &wait);
796
797                 if (signal_pending(current)) {
798                         spin_unlock(&fi->fi_ast_lock);
799                         return -ERESTARTSYS;
800                 }
801         }
802
803         ast = list_entry(fi->fi_ast_list.next, struct ast_info, list);
804         list_del(&ast->list);
805         spin_unlock(&fi->fi_ast_lock);
806
807         /* Work out the size of the returned data */
808 #ifdef CONFIG_COMPAT
809         if (test_bit(FI_FLAG_COMPAT, &fi->fi_flags)) {
810                 data_size = struct_size = sizeof(struct dlm_lock_result32);
811                 data = &result32;
812         }
813         else
814 #endif
815         {
816                 data_size = struct_size = sizeof(struct dlm_lock_result);
817                 data = &ast->result;
818         }
819         if (ast->lvb_updated && ast->result.lksb.sb_lvbptr)
820                 data_size += DLM_USER_LVB_LEN;
821
822         offset = struct_size;
823
824         /* Room for the extended data ? */
825         if (count >= data_size) {
826
827                 if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) {
828                         if (copy_to_user(buffer+offset,
829                                          ast->result.lksb.sb_lvbptr,
830                                          DLM_USER_LVB_LEN))
831                                 return -EFAULT;
832                         ast->result.lvb_offset = offset;
833                         offset += DLM_USER_LVB_LEN;
834                 }
835         }
836
837         ast->result.length = data_size;
838
839 #ifdef CONFIG_COMPAT
840         compat_output(&ast->result, &result32);
841 #endif
842
843         /* Copy the header now it has all the offsets in it */
844         if (copy_to_user(buffer, data, struct_size))
845                 offset = -EFAULT;
846
847         /* If we only returned a header and there's more to come then put it
848            back on the list */
849         if (count < data_size) {
850                 spin_lock(&fi->fi_ast_lock);
851                 list_add(&ast->list, &fi->fi_ast_list);
852                 spin_unlock(&fi->fi_ast_lock);
853         } else
854                 kfree(ast);
855         return offset;
856 }
857
858 static unsigned int dlm_poll(struct file *file, poll_table *wait)
859 {
860         struct file_info *fi = file->private_data;
861
862         poll_wait(file, &fi->fi_wait, wait);
863
864         spin_lock(&fi->fi_ast_lock);
865         if (!list_empty(&fi->fi_ast_list)) {
866                 spin_unlock(&fi->fi_ast_lock);
867                 return POLLIN | POLLRDNORM;
868         }
869
870         spin_unlock(&fi->fi_ast_lock);
871         return 0;
872 }
873
874 static struct lock_info *allocate_lockinfo(struct file_info *fi, uint8_t cmd,
875                                            struct dlm_lock_params *kparams)
876 {
877         struct lock_info *li;
878
879         if (!try_module_get(THIS_MODULE))
880                 return NULL;
881
882         li = kzalloc(sizeof(struct lock_info), GFP_KERNEL);
883         if (li) {
884                 li->li_magic     = LOCKINFO_MAGIC;
885                 li->li_file      = fi;
886                 li->li_cmd       = cmd;
887                 li->li_flags     = 0;
888                 li->li_grmode    = -1;
889                 li->li_rqmode    = -1;
890                 li->li_pend_bastparam = NULL;
891                 li->li_pend_bastaddr  = NULL;
892                 li->li_castaddr   = NULL;
893                 li->li_castparam  = NULL;
894                 li->li_lksb.sb_lvbptr = NULL;
895                 li->li_bastaddr  = kparams->bastaddr;
896                 li->li_bastparam = kparams->bastparam;
897
898                 get_file_info(fi);
899         }
900         return li;
901 }
902
903 static int do_user_lock(struct file_info *fi, uint8_t cmd,
904                         struct dlm_lock_params *kparams)
905 {
906         struct lock_info *li;
907         int status;
908
909         /*
910          * Validate things that we need to have correct.
911          */
912         if (!kparams->castaddr)
913                 return -EINVAL;
914
915         if (!kparams->lksb)
916                 return -EINVAL;
917
918         /* Persistent child locks are not available yet */
919         if ((kparams->flags & DLM_LKF_PERSISTENT) && kparams->parent)
920                 return -EINVAL;
921
922         /* For conversions, there should already be a lockinfo struct,
923            unless we are adopting an orphaned persistent lock */
924         if (kparams->flags & DLM_LKF_CONVERT) {
925
926                 li = get_lockinfo(fi->fi_ls, kparams->lkid);
927
928                 /* If this is a persistent lock we will have to create a
929                    lockinfo again */
930                 if (!li && (kparams->flags & DLM_LKF_PERSISTENT)) {
931                         li = allocate_lockinfo(fi, cmd, kparams);
932                         if (!li)
933                                 return -ENOMEM;
934
935                         li->li_lksb.sb_lkid = kparams->lkid;
936                         li->li_castaddr  = kparams->castaddr;
937                         li->li_castparam = kparams->castparam;
938
939                         /* OK, this isn't exactly a FIRSTLOCK but it is the
940                            first time we've used this lockinfo, and if things
941                            fail we want rid of it */
942                         init_completion(&li->li_firstcomp);
943                         set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags);
944                         add_lockinfo(fi->fi_ls, li);
945
946                         /* TODO: do a query to get the current state ?? */
947                 }
948                 if (!li)
949                         return -EINVAL;
950
951                 if (li->li_magic != LOCKINFO_MAGIC)
952                         return -EINVAL;
953
954                 /* For conversions don't overwrite the current blocking AST
955                    info so that:
956                    a) if a blocking AST fires before the conversion is queued
957                       it runs the current handler
958                    b) if the conversion is cancelled, the original blocking AST
959                       declaration is active
960                    The pend_ info is made active when the conversion
961                    completes.
962                 */
963                 li->li_pend_bastaddr  = kparams->bastaddr;
964                 li->li_pend_bastparam = kparams->bastparam;
965         } else {
966                 li = allocate_lockinfo(fi, cmd, kparams);
967                 if (!li)
968                         return -ENOMEM;
969
970                 /* Allow us to complete our work before
971                    the AST routine runs. In fact we only need (and use) this
972                    when the initial lock fails */
973                 init_completion(&li->li_firstcomp);
974                 set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags);
975         }
976
977         li->li_user_lksb = kparams->lksb;
978         li->li_castaddr  = kparams->castaddr;
979         li->li_castparam = kparams->castparam;
980         li->li_lksb.sb_lkid = kparams->lkid;
981         li->li_rqmode    = kparams->mode;
982         if (kparams->flags & DLM_LKF_PERSISTENT)
983                 set_bit(LI_FLAG_PERSISTENT, &li->li_flags);
984
985         /* Copy in the value block */
986         if (kparams->flags & DLM_LKF_VALBLK) {
987                 if (!li->li_lksb.sb_lvbptr) {
988                         li->li_lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN,
989                                                         GFP_KERNEL);
990                         if (!li->li_lksb.sb_lvbptr) {
991                                 status = -ENOMEM;
992                                 goto out_err;
993                         }
994                 }
995
996                 memcpy(li->li_lksb.sb_lvbptr, kparams->lvb, DLM_USER_LVB_LEN);
997         }
998
999         /* Lock it ... */
1000         status = dlm_lock(fi->fi_ls->ls_lockspace,
1001                           kparams->mode, &li->li_lksb,
1002                           kparams->flags,
1003                           kparams->name, kparams->namelen,
1004                           kparams->parent,
1005                           ast_routine,
1006                           li,
1007                           (li->li_pend_bastaddr || li->li_bastaddr) ?
1008                            bast_routine : NULL);
1009         if (status)
1010                 goto out_err;
1011
1012         /* If it succeeded (this far) with a new lock then keep track of
1013            it on the file's lockinfo list */
1014         if (!status && test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) {
1015
1016                 spin_lock(&fi->fi_li_lock);
1017                 list_add(&li->li_ownerqueue, &fi->fi_li_list);
1018                 set_bit(LI_FLAG_ONLIST, &li->li_flags);
1019                 spin_unlock(&fi->fi_li_lock);
1020                 if (add_lockinfo(fi->fi_ls, li))
1021                         printk(KERN_WARNING "Add lockinfo failed\n");
1022
1023                 complete(&li->li_firstcomp);
1024         }
1025
1026         /* Return the lockid as the user needs it /now/ */
1027         return li->li_lksb.sb_lkid;
1028
1029  out_err:
1030         if (test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags))
1031                 release_lockinfo(fi->fi_ls, li);
1032         return status;
1033
1034 }
1035
1036 static int do_user_unlock(struct file_info *fi, uint8_t cmd,
1037                           struct dlm_lock_params *kparams)
1038 {
1039         struct lock_info *li;
1040         int status;
1041         int convert_cancel = 0;
1042
1043         li = get_lockinfo(fi->fi_ls, kparams->lkid);
1044         if (!li) {
1045                 li = allocate_lockinfo(fi, cmd, kparams);
1046                 if (!li)
1047                         return -ENOMEM;
1048                 spin_lock(&fi->fi_li_lock);
1049                 list_add(&li->li_ownerqueue, &fi->fi_li_list);
1050                 set_bit(LI_FLAG_ONLIST, &li->li_flags);
1051                 spin_unlock(&fi->fi_li_lock);
1052         }
1053
1054         if (li->li_magic != LOCKINFO_MAGIC)
1055                 return -EINVAL;
1056
1057         li->li_user_lksb = kparams->lksb;
1058         li->li_castparam = kparams->castparam;
1059         li->li_cmd       = cmd;
1060
1061         /* Cancelling a conversion doesn't remove the lock...*/
1062         if (kparams->flags & DLM_LKF_CANCEL && li->li_grmode != -1)
1063                 convert_cancel = 1;
1064
1065         /* Wait until dlm_lock() has completed */
1066         if (!test_bit(LI_FLAG_ONLIST, &li->li_flags)) {
1067                 wait_for_completion(&li->li_firstcomp);
1068         }
1069
1070         /* dlm_unlock() passes a 0 for castaddr which means don't overwrite
1071            the existing li_castaddr as that's the completion routine for
1072            unlocks. dlm_unlock_wait() specifies a new AST routine to be
1073            executed when the unlock completes. */
1074         if (kparams->castaddr)
1075                 li->li_castaddr = kparams->castaddr;
1076
1077         /* Use existing lksb & astparams */
1078         status = dlm_unlock(fi->fi_ls->ls_lockspace,
1079                              kparams->lkid,
1080                              kparams->flags, &li->li_lksb, li);
1081
1082         if (!status && !convert_cancel) {
1083                 spin_lock(&fi->fi_li_lock);
1084                 list_del(&li->li_ownerqueue);
1085                 clear_bit(LI_FLAG_ONLIST, &li->li_flags);
1086                 spin_unlock(&fi->fi_li_lock);
1087         }
1088
1089         return status;
1090 }
1091
1092 /* Write call, submit a locking request */
1093 static ssize_t dlm_write(struct file *file, const char __user *buffer,
1094                          size_t count, loff_t *ppos)
1095 {
1096         struct file_info *fi = file->private_data;
1097         struct dlm_write_request *kparams;
1098         sigset_t tmpsig;
1099         sigset_t allsigs;
1100         int status;
1101
1102 #ifdef CONFIG_COMPAT
1103         if (count < sizeof(struct dlm_write_request32))
1104 #else
1105         if (count < sizeof(struct dlm_write_request))
1106 #endif
1107                 return -EINVAL;
1108
1109         if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
1110                 return -EINVAL;
1111
1112         /* Has the lockspace been deleted */
1113         if (fi && test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags))
1114                 return -ENOENT;
1115
1116         kparams = kmalloc(count, GFP_KERNEL);
1117         if (!kparams)
1118                 return -ENOMEM;
1119
1120         status = -EFAULT;
1121         /* Get the command info */
1122         if (copy_from_user(kparams, buffer, count))
1123                 goto out_free;
1124
1125         status = -EBADE;
1126         if (check_version(kparams))
1127                 goto out_free;
1128
1129 #ifdef CONFIG_COMPAT
1130         if (!kparams->is64bit) {
1131                 struct dlm_write_request32 *k32params = (struct dlm_write_request32 *)kparams;
1132                 kparams = kmalloc(count + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL);
1133                 if (!kparams)
1134                         return -ENOMEM;
1135
1136                 if (fi)
1137                         set_bit(FI_FLAG_COMPAT, &fi->fi_flags);
1138                 compat_input(kparams, k32params);
1139                 kfree(k32params);
1140         }
1141 #endif
1142
1143         /* Block signals while we are doing this */
1144         sigfillset(&allsigs);
1145         sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
1146
1147         status = -EINVAL;
1148         switch (kparams->cmd)
1149         {
1150         case DLM_USER_LOCK:
1151                 if (!fi) goto out_sig;
1152                 status = do_user_lock(fi, kparams->cmd, &kparams->i.lock);
1153                 break;
1154
1155         case DLM_USER_UNLOCK:
1156                 if (!fi) goto out_sig;
1157                 status = do_user_unlock(fi, kparams->cmd, &kparams->i.lock);
1158                 break;
1159
1160         case DLM_USER_CREATE_LOCKSPACE:
1161                 if (fi) goto out_sig;
1162                 status = do_user_create_lockspace(fi, kparams->cmd,
1163                                                   &kparams->i.lspace);
1164                 break;
1165
1166         case DLM_USER_REMOVE_LOCKSPACE:
1167                 if (fi) goto out_sig;
1168                 status = do_user_remove_lockspace(fi, kparams->cmd,
1169                                                   &kparams->i.lspace);
1170                 break;
1171         default:
1172                 printk("Unknown command passed to DLM device : %d\n",
1173                         kparams->cmd);
1174                 break;
1175         }
1176
1177  out_sig:
1178         /* Restore signals */
1179         sigprocmask(SIG_SETMASK, &tmpsig, NULL);
1180         recalc_sigpending();
1181
1182  out_free:
1183         kfree(kparams);
1184         if (status == 0)
1185                 return count;
1186         else
1187                 return status;
1188 }
1189
1190 static struct file_operations _dlm_fops = {
1191       .open    = dlm_open,
1192       .release = dlm_close,
1193       .read    = dlm_read,
1194       .write   = dlm_write,
1195       .poll    = dlm_poll,
1196       .owner   = THIS_MODULE,
1197 };
1198
1199 static struct file_operations _dlm_ctl_fops = {
1200       .open    = dlm_ctl_open,
1201       .release = dlm_ctl_close,
1202       .write   = dlm_write,
1203       .owner   = THIS_MODULE,
1204 };
1205
1206 /*
1207  * Create control device
1208  */
1209 static int __init dlm_device_init(void)
1210 {
1211         int r;
1212
1213         INIT_LIST_HEAD(&user_ls_list);
1214         mutex_init(&user_ls_lock);
1215
1216         ctl_device.name = "dlm-control";
1217         ctl_device.fops = &_dlm_ctl_fops;
1218         ctl_device.minor = MISC_DYNAMIC_MINOR;
1219
1220         r = misc_register(&ctl_device);
1221         if (r) {
1222                 printk(KERN_ERR "dlm: misc_register failed for control dev\n");
1223                 return r;
1224         }
1225
1226         return 0;
1227 }
1228
1229 static void __exit dlm_device_exit(void)
1230 {
1231         misc_deregister(&ctl_device);
1232 }
1233
1234 MODULE_DESCRIPTION("Distributed Lock Manager device interface");
1235 MODULE_AUTHOR("Red Hat, Inc.");
1236 MODULE_LICENSE("GPL");
1237
1238 module_init(dlm_device_init);
1239 module_exit(dlm_device_exit);