Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / fs / aufs / whout.c
1 /*
2  * Copyright (C) 2005-2012 Junjiro R. Okajima
3  *
4  * This program, aufs is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 /*
20  * whiteout for logical deletion and opaque directory
21  */
22
23 #include "aufs.h"
24
25 #define WH_MASK                 S_IRUGO
26
27 /*
28  * If a directory contains this file, then it is opaque.  We start with the
29  * .wh. flag so that it is blocked by lookup.
30  */
31 static struct qstr diropq_name = {
32         .name = AUFS_WH_DIROPQ,
33         .len = sizeof(AUFS_WH_DIROPQ) - 1
34 };
35
36 /*
37  * generate whiteout name, which is NOT terminated by NULL.
38  * @name: original d_name.name
39  * @len: original d_name.len
40  * @wh: whiteout qstr
41  * returns zero when succeeds, otherwise error.
42  * succeeded value as wh->name should be freed by kfree().
43  */
44 int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
45 {
46         char *p;
47
48         if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
49                 return -ENAMETOOLONG;
50
51         wh->len = name->len + AUFS_WH_PFX_LEN;
52         p = kmalloc(wh->len, GFP_NOFS);
53         wh->name = p;
54         if (p) {
55                 memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
56                 memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
57                 /* smp_mb(); */
58                 return 0;
59         }
60         return -ENOMEM;
61 }
62
63 /* ---------------------------------------------------------------------- */
64
65 /*
66  * test if the @wh_name exists under @h_parent.
67  * @try_sio specifies the necessary of super-io.
68  */
69 int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
70                struct au_branch *br, int try_sio)
71 {
72         int err;
73         struct dentry *wh_dentry;
74
75         if (!try_sio)
76                 wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
77         else
78                 wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
79         err = PTR_ERR(wh_dentry);
80         if (IS_ERR(wh_dentry))
81                 goto out;
82
83         err = 0;
84         if (!wh_dentry->d_inode)
85                 goto out_wh; /* success */
86
87         err = 1;
88         if (S_ISREG(wh_dentry->d_inode->i_mode))
89                 goto out_wh; /* success */
90
91         err = -EIO;
92         AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
93                 AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
94
95 out_wh:
96         dput(wh_dentry);
97 out:
98         return err;
99 }
100
101 /*
102  * test if the @h_dentry sets opaque or not.
103  */
104 int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
105 {
106         int err;
107         struct inode *h_dir;
108
109         h_dir = h_dentry->d_inode;
110         err = au_wh_test(h_dentry, &diropq_name, br,
111                          au_test_h_perm_sio(h_dir, MAY_EXEC));
112         return err;
113 }
114
115 /*
116  * returns a negative dentry whose name is unique and temporary.
117  */
118 struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
119                              struct qstr *prefix)
120 {
121         struct dentry *dentry;
122         int i;
123         char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
124                 *name, *p;
125         /* strict atomic_t is unnecessary here */
126         static unsigned short cnt;
127         struct qstr qs;
128
129         BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
130
131         name = defname;
132         qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
133         if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
134                 dentry = ERR_PTR(-ENAMETOOLONG);
135                 if (unlikely(qs.len > NAME_MAX))
136                         goto out;
137                 dentry = ERR_PTR(-ENOMEM);
138                 name = kmalloc(qs.len + 1, GFP_NOFS);
139                 if (unlikely(!name))
140                         goto out;
141         }
142
143         /* doubly whiteout-ed */
144         memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
145         p = name + AUFS_WH_PFX_LEN * 2;
146         memcpy(p, prefix->name, prefix->len);
147         p += prefix->len;
148         *p++ = '.';
149         AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
150
151         qs.name = name;
152         for (i = 0; i < 3; i++) {
153                 sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
154                 dentry = au_sio_lkup_one(&qs, h_parent, br);
155                 if (IS_ERR(dentry) || !dentry->d_inode)
156                         goto out_name;
157                 dput(dentry);
158         }
159         /* pr_warning("could not get random name\n"); */
160         dentry = ERR_PTR(-EEXIST);
161         AuDbg("%.*s\n", AuLNPair(&qs));
162         BUG();
163
164 out_name:
165         if (name != defname)
166                 kfree(name);
167 out:
168         AuTraceErrPtr(dentry);
169         return dentry;
170 }
171
172 /*
173  * rename the @h_dentry on @br to the whiteouted temporary name.
174  */
175 int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
176 {
177         int err;
178         struct path h_path = {
179                 .mnt = br->br_mnt
180         };
181         struct inode *h_dir;
182         struct dentry *h_parent;
183
184         h_parent = h_dentry->d_parent; /* dir inode is locked */
185         h_dir = h_parent->d_inode;
186         IMustLock(h_dir);
187
188         h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
189         err = PTR_ERR(h_path.dentry);
190         if (IS_ERR(h_path.dentry))
191                 goto out;
192
193         /* under the same dir, no need to lock_rename() */
194         err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
195         AuTraceErr(err);
196         dput(h_path.dentry);
197
198 out:
199         AuTraceErr(err);
200         return err;
201 }
202
203 /* ---------------------------------------------------------------------- */
204 /*
205  * functions for removing a whiteout
206  */
207
208 static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
209 {
210         int force;
211
212         /*
213          * forces superio when the dir has a sticky bit.
214          * this may be a violation of unix fs semantics.
215          */
216         force = (h_dir->i_mode & S_ISVTX)
217                 && h_path->dentry->d_inode->i_uid != current_fsuid();
218         return vfsub_unlink(h_dir, h_path, force);
219 }
220
221 int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
222                         struct dentry *dentry)
223 {
224         int err;
225
226         err = do_unlink_wh(h_dir, h_path);
227         if (!err && dentry)
228                 au_set_dbwh(dentry, -1);
229
230         return err;
231 }
232
233 static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
234                           struct au_branch *br)
235 {
236         int err;
237         struct path h_path = {
238                 .mnt = br->br_mnt
239         };
240
241         err = 0;
242         h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
243         if (IS_ERR(h_path.dentry))
244                 err = PTR_ERR(h_path.dentry);
245         else {
246                 if (h_path.dentry->d_inode
247                     && S_ISREG(h_path.dentry->d_inode->i_mode))
248                         err = do_unlink_wh(h_parent->d_inode, &h_path);
249                 dput(h_path.dentry);
250         }
251
252         return err;
253 }
254
255 /* ---------------------------------------------------------------------- */
256 /*
257  * initialize/clean whiteout for a branch
258  */
259
260 static void au_wh_clean(struct inode *h_dir, struct path *whpath,
261                         const int isdir)
262 {
263         int err;
264
265         if (!whpath->dentry->d_inode)
266                 return;
267
268         err = mnt_want_write(whpath->mnt);
269         if (!err) {
270                 if (isdir)
271                         err = vfsub_rmdir(h_dir, whpath);
272                 else
273                         err = vfsub_unlink(h_dir, whpath, /*force*/0);
274                 mnt_drop_write(whpath->mnt);
275         }
276         if (unlikely(err))
277                 pr_warning("failed removing %.*s (%d), ignored.\n",
278                            AuDLNPair(whpath->dentry), err);
279 }
280
281 static int test_linkable(struct dentry *h_root)
282 {
283         struct inode *h_dir = h_root->d_inode;
284
285         if (h_dir->i_op->link)
286                 return 0;
287
288         pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
289                AuDLNPair(h_root), au_sbtype(h_root->d_sb));
290         return -ENOSYS;
291 }
292
293 /* todo: should this mkdir be done in /sbin/mount.aufs helper? */
294 static int au_whdir(struct inode *h_dir, struct path *path)
295 {
296         int err;
297
298         err = -EEXIST;
299         if (!path->dentry->d_inode) {
300                 int mode = S_IRWXU;
301
302                 if (au_test_nfs(path->dentry->d_sb))
303                         mode |= S_IXUGO;
304                 err = mnt_want_write(path->mnt);
305                 if (!err) {
306                         err = vfsub_mkdir(h_dir, path, mode);
307                         mnt_drop_write(path->mnt);
308                 }
309         } else if (S_ISDIR(path->dentry->d_inode->i_mode))
310                 err = 0;
311         else
312                 pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
313
314         return err;
315 }
316
317 struct au_wh_base {
318         const struct qstr *name;
319         struct dentry *dentry;
320 };
321
322 static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
323                           struct path *h_path)
324 {
325         h_path->dentry = base[AuBrWh_BASE].dentry;
326         au_wh_clean(h_dir, h_path, /*isdir*/0);
327         h_path->dentry = base[AuBrWh_PLINK].dentry;
328         au_wh_clean(h_dir, h_path, /*isdir*/1);
329         h_path->dentry = base[AuBrWh_ORPH].dentry;
330         au_wh_clean(h_dir, h_path, /*isdir*/1);
331 }
332
333 /*
334  * returns tri-state,
335  * minus: error, caller should print the mesage
336  * zero: succuess
337  * plus: error, caller should NOT print the mesage
338  */
339 static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
340                                 int do_plink, struct au_wh_base base[],
341                                 struct path *h_path)
342 {
343         int err;
344         struct inode *h_dir;
345
346         h_dir = h_root->d_inode;
347         h_path->dentry = base[AuBrWh_BASE].dentry;
348         au_wh_clean(h_dir, h_path, /*isdir*/0);
349         h_path->dentry = base[AuBrWh_PLINK].dentry;
350         if (do_plink) {
351                 err = test_linkable(h_root);
352                 if (unlikely(err)) {
353                         err = 1;
354                         goto out;
355                 }
356
357                 err = au_whdir(h_dir, h_path);
358                 if (unlikely(err))
359                         goto out;
360                 wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
361         } else
362                 au_wh_clean(h_dir, h_path, /*isdir*/1);
363         h_path->dentry = base[AuBrWh_ORPH].dentry;
364         err = au_whdir(h_dir, h_path);
365         if (unlikely(err))
366                 goto out;
367         wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
368
369 out:
370         return err;
371 }
372
373 /*
374  * for the moment, aufs supports the branch filesystem which does not support
375  * link(2). testing on FAT which does not support i_op->setattr() fully either,
376  * copyup failed. finally, such filesystem will not be used as the writable
377  * branch.
378  *
379  * returns tri-state, see above.
380  */
381 static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
382                          int do_plink, struct au_wh_base base[],
383                          struct path *h_path)
384 {
385         int err;
386         struct inode *h_dir;
387
388         WbrWhMustWriteLock(wbr);
389
390         err = test_linkable(h_root);
391         if (unlikely(err)) {
392                 err = 1;
393                 goto out;
394         }
395
396         /*
397          * todo: should this create be done in /sbin/mount.aufs helper?
398          */
399         err = -EEXIST;
400         h_dir = h_root->d_inode;
401         if (!base[AuBrWh_BASE].dentry->d_inode) {
402                 err = mnt_want_write(h_path->mnt);
403                 if (!err) {
404                         h_path->dentry = base[AuBrWh_BASE].dentry;
405                         err = vfsub_create(h_dir, h_path, WH_MASK);
406                         mnt_drop_write(h_path->mnt);
407                 }
408         } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
409                 err = 0;
410         else
411                 pr_err("unknown %.*s/%.*s exists\n",
412                        AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
413         if (unlikely(err))
414                 goto out;
415
416         h_path->dentry = base[AuBrWh_PLINK].dentry;
417         if (do_plink) {
418                 err = au_whdir(h_dir, h_path);
419                 if (unlikely(err))
420                         goto out;
421                 wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
422         } else
423                 au_wh_clean(h_dir, h_path, /*isdir*/1);
424         wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
425
426         h_path->dentry = base[AuBrWh_ORPH].dentry;
427         err = au_whdir(h_dir, h_path);
428         if (unlikely(err))
429                 goto out;
430         wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
431
432 out:
433         return err;
434 }
435
436 /*
437  * initialize the whiteout base file/dir for @br.
438  */
439 int au_wh_init(struct dentry *h_root, struct au_branch *br,
440                struct super_block *sb)
441 {
442         int err, i;
443         const unsigned char do_plink
444                 = !!au_opt_test(au_mntflags(sb), PLINK);
445         struct path path = {
446                 .mnt = br->br_mnt
447         };
448         struct inode *h_dir;
449         struct au_wbr *wbr = br->br_wbr;
450         static const struct qstr base_name[] = {
451                 [AuBrWh_BASE] = {
452                         .name   = AUFS_BASE_NAME,
453                         .len    = sizeof(AUFS_BASE_NAME) - 1
454                 },
455                 [AuBrWh_PLINK] = {
456                         .name   = AUFS_PLINKDIR_NAME,
457                         .len    = sizeof(AUFS_PLINKDIR_NAME) - 1
458                 },
459                 [AuBrWh_ORPH] = {
460                         .name   = AUFS_ORPHDIR_NAME,
461                         .len    = sizeof(AUFS_ORPHDIR_NAME) - 1
462                 }
463         };
464         struct au_wh_base base[] = {
465                 [AuBrWh_BASE] = {
466                         .name   = base_name + AuBrWh_BASE,
467                         .dentry = NULL
468                 },
469                 [AuBrWh_PLINK] = {
470                         .name   = base_name + AuBrWh_PLINK,
471                         .dentry = NULL
472                 },
473                 [AuBrWh_ORPH] = {
474                         .name   = base_name + AuBrWh_ORPH,
475                         .dentry = NULL
476                 }
477         };
478
479         if (wbr)
480                 WbrWhMustWriteLock(wbr);
481
482         for (i = 0; i < AuBrWh_Last; i++) {
483                 /* doubly whiteouted */
484                 struct dentry *d;
485
486                 d = au_wh_lkup(h_root, (void *)base[i].name, br);
487                 err = PTR_ERR(d);
488                 if (IS_ERR(d))
489                         goto out;
490
491                 base[i].dentry = d;
492                 AuDebugOn(wbr
493                           && wbr->wbr_wh[i]
494                           && wbr->wbr_wh[i] != base[i].dentry);
495         }
496
497         if (wbr)
498                 for (i = 0; i < AuBrWh_Last; i++) {
499                         dput(wbr->wbr_wh[i]);
500                         wbr->wbr_wh[i] = NULL;
501                 }
502
503         err = 0;
504         if (!au_br_writable(br->br_perm)) {
505                 h_dir = h_root->d_inode;
506                 au_wh_init_ro(h_dir, base, &path);
507         } else if (!au_br_wh_linkable(br->br_perm)) {
508                 err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
509                 if (err > 0)
510                         goto out;
511                 else if (err)
512                         goto out_err;
513         } else {
514                 err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
515                 if (err > 0)
516                         goto out;
517                 else if (err)
518                         goto out_err;
519         }
520         goto out; /* success */
521
522 out_err:
523         pr_err("an error(%d) on the writable branch %.*s(%s)\n",
524                err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
525 out:
526         for (i = 0; i < AuBrWh_Last; i++)
527                 dput(base[i].dentry);
528         return err;
529 }
530
531 /* ---------------------------------------------------------------------- */
532 /*
533  * whiteouts are all hard-linked usually.
534  * when its link count reaches a ceiling, we create a new whiteout base
535  * asynchronously.
536  */
537
538 struct reinit_br_wh {
539         struct super_block *sb;
540         struct au_branch *br;
541 };
542
543 static void reinit_br_wh(void *arg)
544 {
545         int err;
546         aufs_bindex_t bindex;
547         struct path h_path;
548         struct reinit_br_wh *a = arg;
549         struct au_wbr *wbr;
550         struct inode *dir;
551         struct dentry *h_root;
552         struct au_hinode *hdir;
553
554         err = 0;
555         wbr = a->br->br_wbr;
556         /* big aufs lock */
557         si_noflush_write_lock(a->sb);
558         if (!au_br_writable(a->br->br_perm))
559                 goto out;
560         bindex = au_br_index(a->sb, a->br->br_id);
561         if (unlikely(bindex < 0))
562                 goto out;
563
564         di_read_lock_parent(a->sb->s_root, AuLock_IR);
565         dir = a->sb->s_root->d_inode;
566         hdir = au_hi(dir, bindex);
567         h_root = au_h_dptr(a->sb->s_root, bindex);
568
569         au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
570         wbr_wh_write_lock(wbr);
571         err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
572                           h_root, a->br);
573         if (!err) {
574                 err = mnt_want_write(a->br->br_mnt);
575                 if (!err) {
576                         h_path.dentry = wbr->wbr_whbase;
577                         h_path.mnt = a->br->br_mnt;
578                         err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
579                         mnt_drop_write(a->br->br_mnt);
580                 }
581         } else {
582                 pr_warning("%.*s is moved, ignored\n",
583                            AuDLNPair(wbr->wbr_whbase));
584                 err = 0;
585         }
586         dput(wbr->wbr_whbase);
587         wbr->wbr_whbase = NULL;
588         if (!err)
589                 err = au_wh_init(h_root, a->br, a->sb);
590         wbr_wh_write_unlock(wbr);
591         au_hn_imtx_unlock(hdir);
592         di_read_unlock(a->sb->s_root, AuLock_IR);
593
594 out:
595         if (wbr)
596                 atomic_dec(&wbr->wbr_wh_running);
597         atomic_dec(&a->br->br_count);
598         si_write_unlock(a->sb);
599         au_nwt_done(&au_sbi(a->sb)->si_nowait);
600         kfree(arg);
601         if (unlikely(err))
602                 AuIOErr("err %d\n", err);
603 }
604
605 static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
606 {
607         int do_dec, wkq_err;
608         struct reinit_br_wh *arg;
609
610         do_dec = 1;
611         if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
612                 goto out;
613
614         /* ignore ENOMEM */
615         arg = kmalloc(sizeof(*arg), GFP_NOFS);
616         if (arg) {
617                 /*
618                  * dec(wh_running), kfree(arg) and dec(br_count)
619                  * in reinit function
620                  */
621                 arg->sb = sb;
622                 arg->br = br;
623                 atomic_inc(&br->br_count);
624                 wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
625                 if (unlikely(wkq_err)) {
626                         atomic_dec(&br->br_wbr->wbr_wh_running);
627                         atomic_dec(&br->br_count);
628                         kfree(arg);
629                 }
630                 do_dec = 0;
631         }
632
633 out:
634         if (do_dec)
635                 atomic_dec(&br->br_wbr->wbr_wh_running);
636 }
637
638 /* ---------------------------------------------------------------------- */
639
640 /*
641  * create the whiteout @wh.
642  */
643 static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
644                              struct dentry *wh)
645 {
646         int err;
647         struct path h_path = {
648                 .dentry = wh
649         };
650         struct au_branch *br;
651         struct au_wbr *wbr;
652         struct dentry *h_parent;
653         struct inode *h_dir;
654
655         h_parent = wh->d_parent; /* dir inode is locked */
656         h_dir = h_parent->d_inode;
657         IMustLock(h_dir);
658
659         br = au_sbr(sb, bindex);
660         h_path.mnt = br->br_mnt;
661         wbr = br->br_wbr;
662         wbr_wh_read_lock(wbr);
663         if (wbr->wbr_whbase) {
664                 err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
665                 if (!err || err != -EMLINK)
666                         goto out;
667
668                 /* link count full. re-initialize br_whbase. */
669                 kick_reinit_br_wh(sb, br);
670         }
671
672         /* return this error in this context */
673         err = vfsub_create(h_dir, &h_path, WH_MASK);
674
675 out:
676         wbr_wh_read_unlock(wbr);
677         return err;
678 }
679
680 /* ---------------------------------------------------------------------- */
681
682 /*
683  * create or remove the diropq.
684  */
685 static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
686                                 unsigned int flags)
687 {
688         struct dentry *opq_dentry, *h_dentry;
689         struct super_block *sb;
690         struct au_branch *br;
691         int err;
692
693         sb = dentry->d_sb;
694         br = au_sbr(sb, bindex);
695         h_dentry = au_h_dptr(dentry, bindex);
696         opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
697         if (IS_ERR(opq_dentry))
698                 goto out;
699
700         if (au_ftest_diropq(flags, CREATE)) {
701                 err = link_or_create_wh(sb, bindex, opq_dentry);
702                 if (!err) {
703                         au_set_dbdiropq(dentry, bindex);
704                         goto out; /* success */
705                 }
706         } else {
707                 struct path tmp = {
708                         .dentry = opq_dentry,
709                         .mnt    = br->br_mnt
710                 };
711                 err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
712                 if (!err)
713                         au_set_dbdiropq(dentry, -1);
714         }
715         dput(opq_dentry);
716         opq_dentry = ERR_PTR(err);
717
718 out:
719         return opq_dentry;
720 }
721
722 struct do_diropq_args {
723         struct dentry **errp;
724         struct dentry *dentry;
725         aufs_bindex_t bindex;
726         unsigned int flags;
727 };
728
729 static void call_do_diropq(void *args)
730 {
731         struct do_diropq_args *a = args;
732         *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
733 }
734
735 struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
736                              unsigned int flags)
737 {
738         struct dentry *diropq, *h_dentry;
739
740         h_dentry = au_h_dptr(dentry, bindex);
741         if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
742                 diropq = do_diropq(dentry, bindex, flags);
743         else {
744                 int wkq_err;
745                 struct do_diropq_args args = {
746                         .errp           = &diropq,
747                         .dentry         = dentry,
748                         .bindex         = bindex,
749                         .flags          = flags
750                 };
751
752                 wkq_err = au_wkq_wait(call_do_diropq, &args);
753                 if (unlikely(wkq_err))
754                         diropq = ERR_PTR(wkq_err);
755         }
756
757         return diropq;
758 }
759
760 /* ---------------------------------------------------------------------- */
761
762 /*
763  * lookup whiteout dentry.
764  * @h_parent: lower parent dentry which must exist and be locked
765  * @base_name: name of dentry which will be whiteouted
766  * returns dentry for whiteout.
767  */
768 struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
769                           struct au_branch *br)
770 {
771         int err;
772         struct qstr wh_name;
773         struct dentry *wh_dentry;
774
775         err = au_wh_name_alloc(&wh_name, base_name);
776         wh_dentry = ERR_PTR(err);
777         if (!err) {
778                 wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
779                 kfree(wh_name.name);
780         }
781         return wh_dentry;
782 }
783
784 /*
785  * link/create a whiteout for @dentry on @bindex.
786  */
787 struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
788                             struct dentry *h_parent)
789 {
790         struct dentry *wh_dentry;
791         struct super_block *sb;
792         int err;
793
794         sb = dentry->d_sb;
795         wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
796         if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
797                 err = link_or_create_wh(sb, bindex, wh_dentry);
798                 if (!err)
799                         au_set_dbwh(dentry, bindex);
800                 else {
801                         dput(wh_dentry);
802                         wh_dentry = ERR_PTR(err);
803                 }
804         }
805
806         return wh_dentry;
807 }
808
809 /* ---------------------------------------------------------------------- */
810
811 /* Delete all whiteouts in this directory on branch bindex. */
812 static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
813                            aufs_bindex_t bindex, struct au_branch *br)
814 {
815         int err;
816         unsigned long ul, n;
817         struct qstr wh_name;
818         char *p;
819         struct hlist_head *head;
820         struct au_vdir_wh *tpos;
821         struct hlist_node *pos;
822         struct au_vdir_destr *str;
823
824         err = -ENOMEM;
825         p = __getname_gfp(GFP_NOFS);
826         wh_name.name = p;
827         if (unlikely(!wh_name.name))
828                 goto out;
829
830         err = 0;
831         memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
832         p += AUFS_WH_PFX_LEN;
833         n = whlist->nh_num;
834         head = whlist->nh_head;
835         for (ul = 0; !err && ul < n; ul++, head++) {
836                 hlist_for_each_entry(tpos, pos, head, wh_hash) {
837                         if (tpos->wh_bindex != bindex)
838                                 continue;
839
840                         str = &tpos->wh_str;
841                         if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
842                                 memcpy(p, str->name, str->len);
843                                 wh_name.len = AUFS_WH_PFX_LEN + str->len;
844                                 err = unlink_wh_name(h_dentry, &wh_name, br);
845                                 if (!err)
846                                         continue;
847                                 break;
848                         }
849                         AuIOErr("whiteout name too long %.*s\n",
850                                 str->len, str->name);
851                         err = -EIO;
852                         break;
853                 }
854         }
855         __putname(wh_name.name);
856
857 out:
858         return err;
859 }
860
861 struct del_wh_children_args {
862         int *errp;
863         struct dentry *h_dentry;
864         struct au_nhash *whlist;
865         aufs_bindex_t bindex;
866         struct au_branch *br;
867 };
868
869 static void call_del_wh_children(void *args)
870 {
871         struct del_wh_children_args *a = args;
872         *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
873 }
874
875 /* ---------------------------------------------------------------------- */
876
877 struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
878 {
879         struct au_whtmp_rmdir *whtmp;
880         int err;
881         unsigned int rdhash;
882
883         SiMustAnyLock(sb);
884
885         whtmp = kmalloc(sizeof(*whtmp), gfp);
886         if (unlikely(!whtmp)) {
887                 whtmp = ERR_PTR(-ENOMEM);
888                 goto out;
889         }
890
891         whtmp->dir = NULL;
892         whtmp->br = NULL;
893         whtmp->wh_dentry = NULL;
894         /* no estimation for dir size */
895         rdhash = au_sbi(sb)->si_rdhash;
896         if (!rdhash)
897                 rdhash = AUFS_RDHASH_DEF;
898         err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
899         if (unlikely(err)) {
900                 kfree(whtmp);
901                 whtmp = ERR_PTR(err);
902         }
903
904 out:
905         return whtmp;
906 }
907
908 void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
909 {
910         if (whtmp->br)
911                 atomic_dec(&whtmp->br->br_count);
912         dput(whtmp->wh_dentry);
913         iput(whtmp->dir);
914         au_nhash_wh_free(&whtmp->whlist);
915         kfree(whtmp);
916 }
917
918 /*
919  * rmdir the whiteouted temporary named dir @h_dentry.
920  * @whlist: whiteouted children.
921  */
922 int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
923                    struct dentry *wh_dentry, struct au_nhash *whlist)
924 {
925         int err;
926         struct path h_tmp;
927         struct inode *wh_inode, *h_dir;
928         struct au_branch *br;
929
930         h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
931         IMustLock(h_dir);
932
933         br = au_sbr(dir->i_sb, bindex);
934         wh_inode = wh_dentry->d_inode;
935         mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
936
937         /*
938          * someone else might change some whiteouts while we were sleeping.
939          * it means this whlist may have an obsoleted entry.
940          */
941         if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
942                 err = del_wh_children(wh_dentry, whlist, bindex, br);
943         else {
944                 int wkq_err;
945                 struct del_wh_children_args args = {
946                         .errp           = &err,
947                         .h_dentry       = wh_dentry,
948                         .whlist         = whlist,
949                         .bindex         = bindex,
950                         .br             = br
951                 };
952
953                 wkq_err = au_wkq_wait(call_del_wh_children, &args);
954                 if (unlikely(wkq_err))
955                         err = wkq_err;
956         }
957         mutex_unlock(&wh_inode->i_mutex);
958
959         if (!err) {
960                 h_tmp.dentry = wh_dentry;
961                 h_tmp.mnt = br->br_mnt;
962                 err = vfsub_rmdir(h_dir, &h_tmp);
963         }
964
965         if (!err) {
966                 if (au_ibstart(dir) == bindex) {
967                         /* todo: dir->i_mutex is necessary */
968                         au_cpup_attr_timesizes(dir);
969                         vfsub_drop_nlink(dir);
970                 }
971                 return 0; /* success */
972         }
973
974         pr_warning("failed removing %.*s(%d), ignored\n",
975                    AuDLNPair(wh_dentry), err);
976         return err;
977 }
978
979 static void call_rmdir_whtmp(void *args)
980 {
981         int err;
982         aufs_bindex_t bindex;
983         struct au_whtmp_rmdir *a = args;
984         struct super_block *sb;
985         struct dentry *h_parent;
986         struct inode *h_dir;
987         struct au_hinode *hdir;
988
989         /* rmdir by nfsd may cause deadlock with this i_mutex */
990         /* mutex_lock(&a->dir->i_mutex); */
991         err = -EROFS;
992         sb = a->dir->i_sb;
993         si_read_lock(sb, !AuLock_FLUSH);
994         if (!au_br_writable(a->br->br_perm))
995                 goto out;
996         bindex = au_br_index(sb, a->br->br_id);
997         if (unlikely(bindex < 0))
998                 goto out;
999
1000         err = -EIO;
1001         ii_write_lock_parent(a->dir);
1002         h_parent = dget_parent(a->wh_dentry);
1003         h_dir = h_parent->d_inode;
1004         hdir = au_hi(a->dir, bindex);
1005         au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1006         err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
1007                           a->br);
1008         if (!err) {
1009                 err = mnt_want_write(a->br->br_mnt);
1010                 if (!err) {
1011                         err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
1012                                              &a->whlist);
1013                         mnt_drop_write(a->br->br_mnt);
1014                 }
1015         }
1016         au_hn_imtx_unlock(hdir);
1017         dput(h_parent);
1018         ii_write_unlock(a->dir);
1019
1020 out:
1021         /* mutex_unlock(&a->dir->i_mutex); */
1022         au_whtmp_rmdir_free(a);
1023         si_read_unlock(sb);
1024         au_nwt_done(&au_sbi(sb)->si_nowait);
1025         if (unlikely(err))
1026                 AuIOErr("err %d\n", err);
1027 }
1028
1029 void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
1030                          struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
1031 {
1032         int wkq_err;
1033         struct super_block *sb;
1034
1035         IMustLock(dir);
1036
1037         /* all post-process will be done in do_rmdir_whtmp(). */
1038         sb = dir->i_sb;
1039         args->dir = au_igrab(dir);
1040         args->br = au_sbr(sb, bindex);
1041         atomic_inc(&args->br->br_count);
1042         args->wh_dentry = dget(wh_dentry);
1043         wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1044         if (unlikely(wkq_err)) {
1045                 pr_warning("rmdir error %.*s (%d), ignored\n",
1046                            AuDLNPair(wh_dentry), wkq_err);
1047                 au_whtmp_rmdir_free(args);
1048         }
1049 }