update aufs to it's latest standalone 3.2 branch
[pandora-kernel.git] / fs / aufs / whout.c
1 /*
2  * Copyright (C) 2005-2013 Junjiro R. Okajima
3  *
4  * This program, aufs is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 /*
20  * whiteout for logical deletion and opaque directory
21  */
22
23 #include "aufs.h"
24
25 #define WH_MASK                 S_IRUGO
26
27 /*
28  * If a directory contains this file, then it is opaque.  We start with the
29  * .wh. flag so that it is blocked by lookup.
30  */
31 static struct qstr diropq_name = {
32         .name = AUFS_WH_DIROPQ,
33         .len = sizeof(AUFS_WH_DIROPQ) - 1
34 };
35
36 /*
37  * generate whiteout name, which is NOT terminated by NULL.
38  * @name: original d_name.name
39  * @len: original d_name.len
40  * @wh: whiteout qstr
41  * returns zero when succeeds, otherwise error.
42  * succeeded value as wh->name should be freed by kfree().
43  */
44 int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
45 {
46         char *p;
47
48         if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
49                 return -ENAMETOOLONG;
50
51         wh->len = name->len + AUFS_WH_PFX_LEN;
52         p = kmalloc(wh->len, GFP_NOFS);
53         wh->name = p;
54         if (p) {
55                 memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
56                 memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
57                 /* smp_mb(); */
58                 return 0;
59         }
60         return -ENOMEM;
61 }
62
63 /* ---------------------------------------------------------------------- */
64
65 /*
66  * test if the @wh_name exists under @h_parent.
67  * @try_sio specifies the necessary of super-io.
68  */
69 int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
70                struct au_branch *br, int try_sio)
71 {
72         int err;
73         struct dentry *wh_dentry;
74
75         if (!try_sio)
76                 wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
77         else
78                 wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
79         err = PTR_ERR(wh_dentry);
80         if (IS_ERR(wh_dentry))
81                 goto out;
82
83         err = 0;
84         if (!wh_dentry->d_inode)
85                 goto out_wh; /* success */
86
87         err = 1;
88         if (S_ISREG(wh_dentry->d_inode->i_mode))
89                 goto out_wh; /* success */
90
91         err = -EIO;
92         AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
93                 AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
94
95 out_wh:
96         dput(wh_dentry);
97 out:
98         return err;
99 }
100
101 /*
102  * test if the @h_dentry sets opaque or not.
103  */
104 int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
105 {
106         int err;
107         struct inode *h_dir;
108
109         h_dir = h_dentry->d_inode;
110         err = au_wh_test(h_dentry, &diropq_name, br,
111                          au_test_h_perm_sio(h_dir, MAY_EXEC));
112         return err;
113 }
114
115 /*
116  * returns a negative dentry whose name is unique and temporary.
117  */
118 struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
119                              struct qstr *prefix)
120 {
121         struct dentry *dentry;
122         int i;
123         char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
124                 *name, *p;
125         /* strict atomic_t is unnecessary here */
126         static unsigned short cnt;
127         struct qstr qs;
128
129         BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
130
131         name = defname;
132         qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
133         if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
134                 dentry = ERR_PTR(-ENAMETOOLONG);
135                 if (unlikely(qs.len > NAME_MAX))
136                         goto out;
137                 dentry = ERR_PTR(-ENOMEM);
138                 name = kmalloc(qs.len + 1, GFP_NOFS);
139                 if (unlikely(!name))
140                         goto out;
141         }
142
143         /* doubly whiteout-ed */
144         memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
145         p = name + AUFS_WH_PFX_LEN * 2;
146         memcpy(p, prefix->name, prefix->len);
147         p += prefix->len;
148         *p++ = '.';
149         AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
150
151         qs.name = name;
152         for (i = 0; i < 3; i++) {
153                 sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
154                 dentry = au_sio_lkup_one(&qs, h_parent, br);
155                 if (IS_ERR(dentry) || !dentry->d_inode)
156                         goto out_name;
157                 dput(dentry);
158         }
159         /* pr_warn("could not get random name\n"); */
160         dentry = ERR_PTR(-EEXIST);
161         AuDbg("%.*s\n", AuLNPair(&qs));
162         BUG();
163
164 out_name:
165         if (name != defname)
166                 kfree(name);
167 out:
168         AuTraceErrPtr(dentry);
169         return dentry;
170 }
171
172 /*
173  * rename the @h_dentry on @br to the whiteouted temporary name.
174  */
175 int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
176 {
177         int err;
178         struct path h_path = {
179                 .mnt = au_br_mnt(br)
180         };
181         struct inode *h_dir;
182         struct dentry *h_parent;
183
184         h_parent = h_dentry->d_parent; /* dir inode is locked */
185         h_dir = h_parent->d_inode;
186         IMustLock(h_dir);
187
188         h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
189         err = PTR_ERR(h_path.dentry);
190         if (IS_ERR(h_path.dentry))
191                 goto out;
192
193         /* under the same dir, no need to lock_rename() */
194         err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
195         AuTraceErr(err);
196         dput(h_path.dentry);
197
198 out:
199         AuTraceErr(err);
200         return err;
201 }
202
203 /* ---------------------------------------------------------------------- */
204 /*
205  * functions for removing a whiteout
206  */
207
208 static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
209 {
210         int force;
211
212         /*
213          * forces superio when the dir has a sticky bit.
214          * this may be a violation of unix fs semantics.
215          */
216         force = (h_dir->i_mode & S_ISVTX)
217                 && h_path->dentry->d_inode->i_uid != current_fsuid();
218         return vfsub_unlink(h_dir, h_path, force);
219 }
220
221 int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
222                         struct dentry *dentry)
223 {
224         int err;
225
226         err = do_unlink_wh(h_dir, h_path);
227         if (!err && dentry)
228                 au_set_dbwh(dentry, -1);
229
230         return err;
231 }
232
233 static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
234                           struct au_branch *br)
235 {
236         int err;
237         struct path h_path = {
238                 .mnt = au_br_mnt(br)
239         };
240
241         err = 0;
242         h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
243         if (IS_ERR(h_path.dentry))
244                 err = PTR_ERR(h_path.dentry);
245         else {
246                 if (h_path.dentry->d_inode
247                     && S_ISREG(h_path.dentry->d_inode->i_mode))
248                         err = do_unlink_wh(h_parent->d_inode, &h_path);
249                 dput(h_path.dentry);
250         }
251
252         return err;
253 }
254
255 /* ---------------------------------------------------------------------- */
256 /*
257  * initialize/clean whiteout for a branch
258  */
259
260 static void au_wh_clean(struct inode *h_dir, struct path *whpath,
261                         const int isdir)
262 {
263         int err;
264
265         if (!whpath->dentry->d_inode)
266                 return;
267
268         err = mnt_want_write(whpath->mnt);
269         if (!err) {
270                 if (isdir)
271                         err = vfsub_rmdir(h_dir, whpath);
272                 else
273                         err = vfsub_unlink(h_dir, whpath, /*force*/0);
274                 mnt_drop_write(whpath->mnt);
275         }
276         if (unlikely(err))
277                 pr_warn("failed removing %.*s (%d), ignored.\n",
278                         AuDLNPair(whpath->dentry), err);
279 }
280
281 static int test_linkable(struct dentry *h_root)
282 {
283         struct inode *h_dir = h_root->d_inode;
284
285         if (h_dir->i_op->link)
286                 return 0;
287
288         pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
289                AuDLNPair(h_root), au_sbtype(h_root->d_sb));
290         return -ENOSYS;
291 }
292
293 /* todo: should this mkdir be done in /sbin/mount.aufs helper? */
294 static int au_whdir(struct inode *h_dir, struct path *path)
295 {
296         int err;
297
298         err = -EEXIST;
299         if (!path->dentry->d_inode) {
300                 int mode = S_IRWXU;
301
302                 if (au_test_nfs(path->dentry->d_sb))
303                         mode |= S_IXUGO;
304                 err = mnt_want_write(path->mnt);
305                 if (!err) {
306                         err = vfsub_mkdir(h_dir, path, mode);
307                         mnt_drop_write(path->mnt);
308                 }
309         } else if (S_ISDIR(path->dentry->d_inode->i_mode))
310                 err = 0;
311         else
312                 pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
313
314         return err;
315 }
316
317 struct au_wh_base {
318         const struct qstr *name;
319         struct dentry *dentry;
320 };
321
322 static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
323                           struct path *h_path)
324 {
325         h_path->dentry = base[AuBrWh_BASE].dentry;
326         au_wh_clean(h_dir, h_path, /*isdir*/0);
327         h_path->dentry = base[AuBrWh_PLINK].dentry;
328         au_wh_clean(h_dir, h_path, /*isdir*/1);
329         h_path->dentry = base[AuBrWh_ORPH].dentry;
330         au_wh_clean(h_dir, h_path, /*isdir*/1);
331 }
332
333 /*
334  * returns tri-state,
335  * minus: error, caller should print the mesage
336  * zero: succuess
337  * plus: error, caller should NOT print the mesage
338  */
339 static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
340                                 int do_plink, struct au_wh_base base[],
341                                 struct path *h_path)
342 {
343         int err;
344         struct inode *h_dir;
345
346         h_dir = h_root->d_inode;
347         h_path->dentry = base[AuBrWh_BASE].dentry;
348         au_wh_clean(h_dir, h_path, /*isdir*/0);
349         h_path->dentry = base[AuBrWh_PLINK].dentry;
350         if (do_plink) {
351                 err = test_linkable(h_root);
352                 if (unlikely(err)) {
353                         err = 1;
354                         goto out;
355                 }
356
357                 err = au_whdir(h_dir, h_path);
358                 if (unlikely(err))
359                         goto out;
360                 wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
361         } else
362                 au_wh_clean(h_dir, h_path, /*isdir*/1);
363         h_path->dentry = base[AuBrWh_ORPH].dentry;
364         err = au_whdir(h_dir, h_path);
365         if (unlikely(err))
366                 goto out;
367         wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
368
369 out:
370         return err;
371 }
372
373 /*
374  * for the moment, aufs supports the branch filesystem which does not support
375  * link(2). testing on FAT which does not support i_op->setattr() fully either,
376  * copyup failed. finally, such filesystem will not be used as the writable
377  * branch.
378  *
379  * returns tri-state, see above.
380  */
381 static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
382                          int do_plink, struct au_wh_base base[],
383                          struct path *h_path)
384 {
385         int err;
386         struct inode *h_dir;
387
388         WbrWhMustWriteLock(wbr);
389
390         err = test_linkable(h_root);
391         if (unlikely(err)) {
392                 err = 1;
393                 goto out;
394         }
395
396         /*
397          * todo: should this create be done in /sbin/mount.aufs helper?
398          */
399         err = -EEXIST;
400         h_dir = h_root->d_inode;
401         if (!base[AuBrWh_BASE].dentry->d_inode) {
402                 err = mnt_want_write(h_path->mnt);
403                 if (!err) {
404                         h_path->dentry = base[AuBrWh_BASE].dentry;
405                         err = vfsub_create(h_dir, h_path, WH_MASK);
406                         mnt_drop_write(h_path->mnt);
407                 }
408         } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
409                 err = 0;
410         else
411                 pr_err("unknown %.*s/%.*s exists\n",
412                        AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
413         if (unlikely(err))
414                 goto out;
415
416         h_path->dentry = base[AuBrWh_PLINK].dentry;
417         if (do_plink) {
418                 err = au_whdir(h_dir, h_path);
419                 if (unlikely(err))
420                         goto out;
421                 wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
422         } else
423                 au_wh_clean(h_dir, h_path, /*isdir*/1);
424         wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
425
426         h_path->dentry = base[AuBrWh_ORPH].dentry;
427         err = au_whdir(h_dir, h_path);
428         if (unlikely(err))
429                 goto out;
430         wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
431
432 out:
433         return err;
434 }
435
436 /*
437  * initialize the whiteout base file/dir for @br.
438  */
439 int au_wh_init(struct au_branch *br, struct super_block *sb)
440 {
441         int err, i;
442         const unsigned char do_plink
443                 = !!au_opt_test(au_mntflags(sb), PLINK);
444         struct inode *h_dir;
445         struct path path = br->br_path;
446         struct dentry *h_root = path.dentry;
447         struct au_wbr *wbr = br->br_wbr;
448         static const struct qstr base_name[] = {
449                 [AuBrWh_BASE] = {
450                         .name   = AUFS_BASE_NAME,
451                         .len    = sizeof(AUFS_BASE_NAME) - 1
452                 },
453                 [AuBrWh_PLINK] = {
454                         .name   = AUFS_PLINKDIR_NAME,
455                         .len    = sizeof(AUFS_PLINKDIR_NAME) - 1
456                 },
457                 [AuBrWh_ORPH] = {
458                         .name   = AUFS_ORPHDIR_NAME,
459                         .len    = sizeof(AUFS_ORPHDIR_NAME) - 1
460                 }
461         };
462         struct au_wh_base base[] = {
463                 [AuBrWh_BASE] = {
464                         .name   = base_name + AuBrWh_BASE,
465                         .dentry = NULL
466                 },
467                 [AuBrWh_PLINK] = {
468                         .name   = base_name + AuBrWh_PLINK,
469                         .dentry = NULL
470                 },
471                 [AuBrWh_ORPH] = {
472                         .name   = base_name + AuBrWh_ORPH,
473                         .dentry = NULL
474                 }
475         };
476
477         if (wbr)
478                 WbrWhMustWriteLock(wbr);
479
480         for (i = 0; i < AuBrWh_Last; i++) {
481                 /* doubly whiteouted */
482                 struct dentry *d;
483
484                 d = au_wh_lkup(h_root, (void *)base[i].name, br);
485                 err = PTR_ERR(d);
486                 if (IS_ERR(d))
487                         goto out;
488
489                 base[i].dentry = d;
490                 AuDebugOn(wbr
491                           && wbr->wbr_wh[i]
492                           && wbr->wbr_wh[i] != base[i].dentry);
493         }
494
495         if (wbr)
496                 for (i = 0; i < AuBrWh_Last; i++) {
497                         dput(wbr->wbr_wh[i]);
498                         wbr->wbr_wh[i] = NULL;
499                 }
500
501         err = 0;
502         if (!au_br_writable(br->br_perm)) {
503                 h_dir = h_root->d_inode;
504                 au_wh_init_ro(h_dir, base, &path);
505         } else if (!au_br_wh_linkable(br->br_perm)) {
506                 err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
507                 if (err > 0)
508                         goto out;
509                 else if (err)
510                         goto out_err;
511         } else {
512                 err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
513                 if (err > 0)
514                         goto out;
515                 else if (err)
516                         goto out_err;
517         }
518         goto out; /* success */
519
520 out_err:
521         pr_err("an error(%d) on the writable branch %.*s(%s)\n",
522                err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
523 out:
524         for (i = 0; i < AuBrWh_Last; i++)
525                 dput(base[i].dentry);
526         return err;
527 }
528
529 /* ---------------------------------------------------------------------- */
530 /*
531  * whiteouts are all hard-linked usually.
532  * when its link count reaches a ceiling, we create a new whiteout base
533  * asynchronously.
534  */
535
536 struct reinit_br_wh {
537         struct super_block *sb;
538         struct au_branch *br;
539 };
540
541 static void reinit_br_wh(void *arg)
542 {
543         int err;
544         aufs_bindex_t bindex;
545         struct path h_path;
546         struct reinit_br_wh *a = arg;
547         struct au_wbr *wbr;
548         struct inode *dir;
549         struct dentry *h_root;
550         struct au_hinode *hdir;
551
552         err = 0;
553         wbr = a->br->br_wbr;
554         /* big aufs lock */
555         si_noflush_write_lock(a->sb);
556         if (!au_br_writable(a->br->br_perm))
557                 goto out;
558         bindex = au_br_index(a->sb, a->br->br_id);
559         if (unlikely(bindex < 0))
560                 goto out;
561
562         di_read_lock_parent(a->sb->s_root, AuLock_IR);
563         dir = a->sb->s_root->d_inode;
564         hdir = au_hi(dir, bindex);
565         h_root = au_h_dptr(a->sb->s_root, bindex);
566         AuDebugOn(h_root != au_br_dentry(a->br));
567
568         au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
569         wbr_wh_write_lock(wbr);
570         err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
571                           h_root, a->br);
572         if (!err) {
573                 h_path.mnt = au_br_mnt(a->br);
574                 err = mnt_want_write(h_path.mnt);
575                 if (!err) {
576                         h_path.dentry = wbr->wbr_whbase;
577                         err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
578                         mnt_drop_write(h_path.mnt);
579                 }
580         } else {
581                 pr_warn("%.*s is moved, ignored\n",
582                         AuDLNPair(wbr->wbr_whbase));
583                 err = 0;
584         }
585         dput(wbr->wbr_whbase);
586         wbr->wbr_whbase = NULL;
587         if (!err)
588                 err = au_wh_init(a->br, a->sb);
589         wbr_wh_write_unlock(wbr);
590         au_hn_imtx_unlock(hdir);
591         di_read_unlock(a->sb->s_root, AuLock_IR);
592
593 out:
594         if (wbr)
595                 atomic_dec(&wbr->wbr_wh_running);
596         atomic_dec(&a->br->br_count);
597         si_write_unlock(a->sb);
598         au_nwt_done(&au_sbi(a->sb)->si_nowait);
599         kfree(arg);
600         if (unlikely(err))
601                 AuIOErr("err %d\n", err);
602 }
603
604 static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
605 {
606         int do_dec, wkq_err;
607         struct reinit_br_wh *arg;
608
609         do_dec = 1;
610         if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
611                 goto out;
612
613         /* ignore ENOMEM */
614         arg = kmalloc(sizeof(*arg), GFP_NOFS);
615         if (arg) {
616                 /*
617                  * dec(wh_running), kfree(arg) and dec(br_count)
618                  * in reinit function
619                  */
620                 arg->sb = sb;
621                 arg->br = br;
622                 atomic_inc(&br->br_count);
623                 wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
624                 if (unlikely(wkq_err)) {
625                         atomic_dec(&br->br_wbr->wbr_wh_running);
626                         atomic_dec(&br->br_count);
627                         kfree(arg);
628                 }
629                 do_dec = 0;
630         }
631
632 out:
633         if (do_dec)
634                 atomic_dec(&br->br_wbr->wbr_wh_running);
635 }
636
637 /* ---------------------------------------------------------------------- */
638
639 /*
640  * create the whiteout @wh.
641  */
642 static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
643                              struct dentry *wh)
644 {
645         int err;
646         struct path h_path = {
647                 .dentry = wh
648         };
649         struct au_branch *br;
650         struct au_wbr *wbr;
651         struct dentry *h_parent;
652         struct inode *h_dir;
653
654         h_parent = wh->d_parent; /* dir inode is locked */
655         h_dir = h_parent->d_inode;
656         IMustLock(h_dir);
657
658         br = au_sbr(sb, bindex);
659         h_path.mnt = au_br_mnt(br);
660         wbr = br->br_wbr;
661         wbr_wh_read_lock(wbr);
662         if (wbr->wbr_whbase) {
663                 err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
664                 if (!err || err != -EMLINK)
665                         goto out;
666
667                 /* link count full. re-initialize br_whbase. */
668                 kick_reinit_br_wh(sb, br);
669         }
670
671         /* return this error in this context */
672         err = vfsub_create(h_dir, &h_path, WH_MASK);
673
674 out:
675         wbr_wh_read_unlock(wbr);
676         return err;
677 }
678
679 /* ---------------------------------------------------------------------- */
680
681 /*
682  * create or remove the diropq.
683  */
684 static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
685                                 unsigned int flags)
686 {
687         struct dentry *opq_dentry, *h_dentry;
688         struct super_block *sb;
689         struct au_branch *br;
690         int err;
691
692         sb = dentry->d_sb;
693         br = au_sbr(sb, bindex);
694         h_dentry = au_h_dptr(dentry, bindex);
695         opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
696         if (IS_ERR(opq_dentry))
697                 goto out;
698
699         if (au_ftest_diropq(flags, CREATE)) {
700                 err = link_or_create_wh(sb, bindex, opq_dentry);
701                 if (!err) {
702                         au_set_dbdiropq(dentry, bindex);
703                         goto out; /* success */
704                 }
705         } else {
706                 struct path tmp = {
707                         .dentry = opq_dentry,
708                         .mnt    = au_br_mnt(br)
709                 };
710                 err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
711                 if (!err)
712                         au_set_dbdiropq(dentry, -1);
713         }
714         dput(opq_dentry);
715         opq_dentry = ERR_PTR(err);
716
717 out:
718         return opq_dentry;
719 }
720
721 struct do_diropq_args {
722         struct dentry **errp;
723         struct dentry *dentry;
724         aufs_bindex_t bindex;
725         unsigned int flags;
726 };
727
728 static void call_do_diropq(void *args)
729 {
730         struct do_diropq_args *a = args;
731         *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
732 }
733
734 struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
735                              unsigned int flags)
736 {
737         struct dentry *diropq, *h_dentry;
738
739         h_dentry = au_h_dptr(dentry, bindex);
740         if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
741                 diropq = do_diropq(dentry, bindex, flags);
742         else {
743                 int wkq_err;
744                 struct do_diropq_args args = {
745                         .errp           = &diropq,
746                         .dentry         = dentry,
747                         .bindex         = bindex,
748                         .flags          = flags
749                 };
750
751                 wkq_err = au_wkq_wait(call_do_diropq, &args);
752                 if (unlikely(wkq_err))
753                         diropq = ERR_PTR(wkq_err);
754         }
755
756         return diropq;
757 }
758
759 /* ---------------------------------------------------------------------- */
760
761 /*
762  * lookup whiteout dentry.
763  * @h_parent: lower parent dentry which must exist and be locked
764  * @base_name: name of dentry which will be whiteouted
765  * returns dentry for whiteout.
766  */
767 struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
768                           struct au_branch *br)
769 {
770         int err;
771         struct qstr wh_name;
772         struct dentry *wh_dentry;
773
774         err = au_wh_name_alloc(&wh_name, base_name);
775         wh_dentry = ERR_PTR(err);
776         if (!err) {
777                 wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
778                 kfree(wh_name.name);
779         }
780         return wh_dentry;
781 }
782
783 /*
784  * link/create a whiteout for @dentry on @bindex.
785  */
786 struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
787                             struct dentry *h_parent)
788 {
789         struct dentry *wh_dentry;
790         struct super_block *sb;
791         int err;
792
793         sb = dentry->d_sb;
794         wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
795         if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
796                 err = link_or_create_wh(sb, bindex, wh_dentry);
797                 if (!err)
798                         au_set_dbwh(dentry, bindex);
799                 else {
800                         dput(wh_dentry);
801                         wh_dentry = ERR_PTR(err);
802                 }
803         }
804
805         return wh_dentry;
806 }
807
808 /* ---------------------------------------------------------------------- */
809
810 /* Delete all whiteouts in this directory on branch bindex. */
811 static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
812                            aufs_bindex_t bindex, struct au_branch *br)
813 {
814         int err;
815         unsigned long ul, n;
816         struct qstr wh_name;
817         char *p;
818         struct hlist_head *head;
819         struct au_vdir_wh *tpos;
820         struct hlist_node *pos;
821         struct au_vdir_destr *str;
822
823         err = -ENOMEM;
824         p = __getname_gfp(GFP_NOFS);
825         wh_name.name = p;
826         if (unlikely(!wh_name.name))
827                 goto out;
828
829         err = 0;
830         memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
831         p += AUFS_WH_PFX_LEN;
832         n = whlist->nh_num;
833         head = whlist->nh_head;
834         for (ul = 0; !err && ul < n; ul++, head++) {
835                 hlist_for_each_entry(tpos, pos, head, wh_hash) {
836                         if (tpos->wh_bindex != bindex)
837                                 continue;
838
839                         str = &tpos->wh_str;
840                         if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
841                                 memcpy(p, str->name, str->len);
842                                 wh_name.len = AUFS_WH_PFX_LEN + str->len;
843                                 err = unlink_wh_name(h_dentry, &wh_name, br);
844                                 if (!err)
845                                         continue;
846                                 break;
847                         }
848                         AuIOErr("whiteout name too long %.*s\n",
849                                 str->len, str->name);
850                         err = -EIO;
851                         break;
852                 }
853         }
854         __putname(wh_name.name);
855
856 out:
857         return err;
858 }
859
860 struct del_wh_children_args {
861         int *errp;
862         struct dentry *h_dentry;
863         struct au_nhash *whlist;
864         aufs_bindex_t bindex;
865         struct au_branch *br;
866 };
867
868 static void call_del_wh_children(void *args)
869 {
870         struct del_wh_children_args *a = args;
871         *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
872 }
873
874 /* ---------------------------------------------------------------------- */
875
876 struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
877 {
878         struct au_whtmp_rmdir *whtmp;
879         int err;
880         unsigned int rdhash;
881
882         SiMustAnyLock(sb);
883
884         whtmp = kmalloc(sizeof(*whtmp), gfp);
885         if (unlikely(!whtmp)) {
886                 whtmp = ERR_PTR(-ENOMEM);
887                 goto out;
888         }
889
890         whtmp->dir = NULL;
891         whtmp->br = NULL;
892         whtmp->wh_dentry = NULL;
893         /* no estimation for dir size */
894         rdhash = au_sbi(sb)->si_rdhash;
895         if (!rdhash)
896                 rdhash = AUFS_RDHASH_DEF;
897         err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
898         if (unlikely(err)) {
899                 kfree(whtmp);
900                 whtmp = ERR_PTR(err);
901         }
902
903 out:
904         return whtmp;
905 }
906
907 void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
908 {
909         if (whtmp->br)
910                 atomic_dec(&whtmp->br->br_count);
911         dput(whtmp->wh_dentry);
912         iput(whtmp->dir);
913         au_nhash_wh_free(&whtmp->whlist);
914         kfree(whtmp);
915 }
916
917 /*
918  * rmdir the whiteouted temporary named dir @h_dentry.
919  * @whlist: whiteouted children.
920  */
921 int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
922                    struct dentry *wh_dentry, struct au_nhash *whlist)
923 {
924         int err;
925         struct path h_tmp;
926         struct inode *wh_inode, *h_dir;
927         struct au_branch *br;
928
929         h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
930         IMustLock(h_dir);
931
932         br = au_sbr(dir->i_sb, bindex);
933         wh_inode = wh_dentry->d_inode;
934         mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
935
936         /*
937          * someone else might change some whiteouts while we were sleeping.
938          * it means this whlist may have an obsoleted entry.
939          */
940         if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
941                 err = del_wh_children(wh_dentry, whlist, bindex, br);
942         else {
943                 int wkq_err;
944                 struct del_wh_children_args args = {
945                         .errp           = &err,
946                         .h_dentry       = wh_dentry,
947                         .whlist         = whlist,
948                         .bindex         = bindex,
949                         .br             = br
950                 };
951
952                 wkq_err = au_wkq_wait(call_del_wh_children, &args);
953                 if (unlikely(wkq_err))
954                         err = wkq_err;
955         }
956         mutex_unlock(&wh_inode->i_mutex);
957
958         if (!err) {
959                 h_tmp.dentry = wh_dentry;
960                 h_tmp.mnt = au_br_mnt(br);
961                 err = vfsub_rmdir(h_dir, &h_tmp);
962         }
963
964         if (!err) {
965                 if (au_ibstart(dir) == bindex) {
966                         /* todo: dir->i_mutex is necessary */
967                         au_cpup_attr_timesizes(dir);
968                         vfsub_drop_nlink(dir);
969                 }
970                 return 0; /* success */
971         }
972
973         pr_warn("failed removing %.*s(%d), ignored\n",
974                 AuDLNPair(wh_dentry), err);
975         return err;
976 }
977
978 static void call_rmdir_whtmp(void *args)
979 {
980         int err;
981         aufs_bindex_t bindex;
982         struct au_whtmp_rmdir *a = args;
983         struct super_block *sb;
984         struct dentry *h_parent;
985         struct inode *h_dir;
986         struct au_hinode *hdir;
987         struct vfsmount *h_mnt;
988
989         /* rmdir by nfsd may cause deadlock with this i_mutex */
990         /* mutex_lock(&a->dir->i_mutex); */
991         err = -EROFS;
992         sb = a->dir->i_sb;
993         si_read_lock(sb, !AuLock_FLUSH);
994         if (!au_br_writable(a->br->br_perm))
995                 goto out;
996         bindex = au_br_index(sb, a->br->br_id);
997         if (unlikely(bindex < 0))
998                 goto out;
999
1000         err = -EIO;
1001         ii_write_lock_parent(a->dir);
1002         h_parent = dget_parent(a->wh_dentry);
1003         h_dir = h_parent->d_inode;
1004         hdir = au_hi(a->dir, bindex);
1005         au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1006         err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
1007                           a->br);
1008         if (!err) {
1009                 h_mnt = au_br_mnt(a->br);
1010                 err = mnt_want_write(h_mnt);
1011                 if (!err) {
1012                         err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
1013                                              &a->whlist);
1014                         mnt_drop_write(h_mnt);
1015                 }
1016         }
1017         au_hn_imtx_unlock(hdir);
1018         dput(h_parent);
1019         ii_write_unlock(a->dir);
1020
1021 out:
1022         /* mutex_unlock(&a->dir->i_mutex); */
1023         au_whtmp_rmdir_free(a);
1024         si_read_unlock(sb);
1025         au_nwt_done(&au_sbi(sb)->si_nowait);
1026         if (unlikely(err))
1027                 AuIOErr("err %d\n", err);
1028 }
1029
1030 void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
1031                          struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
1032 {
1033         int wkq_err;
1034         struct super_block *sb;
1035
1036         IMustLock(dir);
1037
1038         /* all post-process will be done in do_rmdir_whtmp(). */
1039         sb = dir->i_sb;
1040         args->dir = au_igrab(dir);
1041         args->br = au_sbr(sb, bindex);
1042         atomic_inc(&args->br->br_count);
1043         args->wh_dentry = dget(wh_dentry);
1044         wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1045         if (unlikely(wkq_err)) {
1046                 pr_warn("rmdir error %.*s (%d), ignored\n",
1047                         AuDLNPair(wh_dentry), wkq_err);
1048                 au_whtmp_rmdir_free(args);
1049         }
1050 }