update aufs to it's latest standalone 3.2 branch
[pandora-kernel.git] / fs / aufs / dentry.c
1 /*
2  * Copyright (C) 2005-2013 Junjiro R. Okajima
3  *
4  * This program, aufs is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 /*
20  * lookup and dentry operations
21  */
22
23 #include <linux/namei.h>
24 #include "aufs.h"
25
26 static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
27 {
28         if (nd) {
29                 *h_nd = *nd;
30
31                 /*
32                  * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
33                  * due to whiteout and branch permission.
34                  */
35                 h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
36                                  | LOOKUP_FOLLOW | LOOKUP_EXCL);
37                 /* unnecessary? */
38                 h_nd->intent.open.file = NULL;
39         } else
40                 memset(h_nd, 0, sizeof(*h_nd));
41 }
42
43 struct au_lkup_one_args {
44         struct dentry **errp;
45         struct qstr *name;
46         struct dentry *h_parent;
47         struct au_branch *br;
48         struct nameidata *nd;
49 };
50
51 struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
52                            struct au_branch *br, struct nameidata *nd)
53 {
54         struct dentry *h_dentry;
55         int err;
56         struct nameidata h_nd;
57
58         if (au_test_fs_null_nd(h_parent->d_sb))
59                 return vfsub_lookup_one_len(name->name, h_parent, name->len);
60
61         au_h_nd(&h_nd, nd);
62         h_nd.path.dentry = h_parent;
63         h_nd.path.mnt = au_br_mnt(br);
64
65         err = vfsub_name_hash(name->name, &h_nd.last, name->len);
66         h_dentry = ERR_PTR(err);
67         if (!err) {
68                 path_get(&h_nd.path);
69                 h_dentry = vfsub_lookup_hash(&h_nd);
70                 path_put(&h_nd.path);
71         }
72
73         AuTraceErrPtr(h_dentry);
74         return h_dentry;
75 }
76
77 static void au_call_lkup_one(void *args)
78 {
79         struct au_lkup_one_args *a = args;
80         *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
81 }
82
83 #define AuLkup_ALLOW_NEG        1
84 #define au_ftest_lkup(flags, name)      ((flags) & AuLkup_##name)
85 #define au_fset_lkup(flags, name) \
86         do { (flags) |= AuLkup_##name; } while (0)
87 #define au_fclr_lkup(flags, name) \
88         do { (flags) &= ~AuLkup_##name; } while (0)
89
90 struct au_do_lookup_args {
91         unsigned int            flags;
92         mode_t                  type;
93         struct nameidata        *nd;
94 };
95
96 /*
97  * returns positive/negative dentry, NULL or an error.
98  * NULL means whiteout-ed or not-found.
99  */
100 static struct dentry*
101 au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
102              aufs_bindex_t bindex, struct qstr *wh_name,
103              struct au_do_lookup_args *args)
104 {
105         struct dentry *h_dentry;
106         struct inode *h_inode, *inode;
107         struct au_branch *br;
108         int wh_found, opq;
109         unsigned char wh_able;
110         const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
111
112         wh_found = 0;
113         br = au_sbr(dentry->d_sb, bindex);
114         wh_able = !!au_br_whable(br->br_perm);
115         if (wh_able)
116                 wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
117         h_dentry = ERR_PTR(wh_found);
118         if (!wh_found)
119                 goto real_lookup;
120         if (unlikely(wh_found < 0))
121                 goto out;
122
123         /* We found a whiteout */
124         /* au_set_dbend(dentry, bindex); */
125         au_set_dbwh(dentry, bindex);
126         if (!allow_neg)
127                 return NULL; /* success */
128
129 real_lookup:
130         h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
131         if (IS_ERR(h_dentry))
132                 goto out;
133
134         h_inode = h_dentry->d_inode;
135         if (!h_inode) {
136                 if (!allow_neg)
137                         goto out_neg;
138         } else if (wh_found
139                    || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
140                 goto out_neg;
141
142         if (au_dbend(dentry) <= bindex)
143                 au_set_dbend(dentry, bindex);
144         if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
145                 au_set_dbstart(dentry, bindex);
146         au_set_h_dptr(dentry, bindex, h_dentry);
147
148         inode = dentry->d_inode;
149         if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
150             || (inode && !S_ISDIR(inode->i_mode)))
151                 goto out; /* success */
152
153         mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
154         opq = au_diropq_test(h_dentry, br);
155         mutex_unlock(&h_inode->i_mutex);
156         if (opq > 0)
157                 au_set_dbdiropq(dentry, bindex);
158         else if (unlikely(opq < 0)) {
159                 au_set_h_dptr(dentry, bindex, NULL);
160                 h_dentry = ERR_PTR(opq);
161         }
162         goto out;
163
164 out_neg:
165         dput(h_dentry);
166         h_dentry = NULL;
167 out:
168         return h_dentry;
169 }
170
171 static int au_test_shwh(struct super_block *sb, const struct qstr *name)
172 {
173         if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
174                      && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
175                 return -EPERM;
176         return 0;
177 }
178
179 /*
180  * returns the number of lower positive dentries,
181  * otherwise an error.
182  * can be called at unlinking with @type is zero.
183  */
184 int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
185                    struct nameidata *nd)
186 {
187         int npositive, err;
188         aufs_bindex_t bindex, btail, bdiropq;
189         unsigned char isdir;
190         struct qstr whname;
191         struct au_do_lookup_args args = {
192                 .flags  = 0,
193                 .type   = type,
194                 .nd     = nd
195         };
196         const struct qstr *name = &dentry->d_name;
197         struct dentry *parent;
198         struct inode *inode;
199
200         err = au_test_shwh(dentry->d_sb, name);
201         if (unlikely(err))
202                 goto out;
203
204         err = au_wh_name_alloc(&whname, name);
205         if (unlikely(err))
206                 goto out;
207
208         inode = dentry->d_inode;
209         isdir = !!(inode && S_ISDIR(inode->i_mode));
210         if (!type)
211                 au_fset_lkup(args.flags, ALLOW_NEG);
212
213         npositive = 0;
214         parent = dget_parent(dentry);
215         btail = au_dbtaildir(parent);
216         for (bindex = bstart; bindex <= btail; bindex++) {
217                 struct dentry *h_parent, *h_dentry;
218                 struct inode *h_inode, *h_dir;
219
220                 h_dentry = au_h_dptr(dentry, bindex);
221                 if (h_dentry) {
222                         if (h_dentry->d_inode)
223                                 npositive++;
224                         if (type != S_IFDIR)
225                                 break;
226                         continue;
227                 }
228                 h_parent = au_h_dptr(parent, bindex);
229                 if (!h_parent)
230                         continue;
231                 h_dir = h_parent->d_inode;
232                 if (!h_dir || !S_ISDIR(h_dir->i_mode))
233                         continue;
234
235                 mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
236                 h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
237                                         &args);
238                 mutex_unlock(&h_dir->i_mutex);
239                 err = PTR_ERR(h_dentry);
240                 if (IS_ERR(h_dentry))
241                         goto out_parent;
242                 au_fclr_lkup(args.flags, ALLOW_NEG);
243
244                 if (au_dbwh(dentry) >= 0)
245                         break;
246                 if (!h_dentry)
247                         continue;
248                 h_inode = h_dentry->d_inode;
249                 if (!h_inode)
250                         continue;
251                 npositive++;
252                 if (!args.type)
253                         args.type = h_inode->i_mode & S_IFMT;
254                 if (args.type != S_IFDIR)
255                         break;
256                 else if (isdir) {
257                         /* the type of lower may be different */
258                         bdiropq = au_dbdiropq(dentry);
259                         if (bdiropq >= 0 && bdiropq <= bindex)
260                                 break;
261                 }
262         }
263
264         if (npositive) {
265                 AuLabel(positive);
266                 au_update_dbstart(dentry);
267         }
268         err = npositive;
269         if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
270                      && au_dbstart(dentry) < 0)) {
271                 err = -EIO;
272                 AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
273                         AuDLNPair(dentry), err);
274         }
275
276 out_parent:
277         dput(parent);
278         kfree(whname.name);
279 out:
280         return err;
281 }
282
283 struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
284                                struct au_branch *br)
285 {
286         struct dentry *dentry;
287         int wkq_err;
288
289         if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
290                 dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
291         else {
292                 struct au_lkup_one_args args = {
293                         .errp           = &dentry,
294                         .name           = name,
295                         .h_parent       = parent,
296                         .br             = br,
297                         .nd             = NULL
298                 };
299
300                 wkq_err = au_wkq_wait(au_call_lkup_one, &args);
301                 if (unlikely(wkq_err))
302                         dentry = ERR_PTR(wkq_err);
303         }
304
305         return dentry;
306 }
307
308 /*
309  * lookup @dentry on @bindex which should be negative.
310  */
311 int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
312 {
313         int err;
314         struct dentry *parent, *h_parent, *h_dentry;
315         struct au_branch *br;
316
317         parent = dget_parent(dentry);
318         h_parent = au_h_dptr(parent, bindex);
319         br = au_sbr(dentry->d_sb, bindex);
320         if (wh)
321                 h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
322         else
323                 h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent, br);
324         err = PTR_ERR(h_dentry);
325         if (IS_ERR(h_dentry))
326                 goto out;
327         if (unlikely(h_dentry->d_inode)) {
328                 err = -EIO;
329                 AuIOErr("%.*s should be negative on b%d.\n",
330                         AuDLNPair(h_dentry), bindex);
331                 dput(h_dentry);
332                 goto out;
333         }
334
335         err = 0;
336         if (bindex < au_dbstart(dentry))
337                 au_set_dbstart(dentry, bindex);
338         if (au_dbend(dentry) < bindex)
339                 au_set_dbend(dentry, bindex);
340         au_set_h_dptr(dentry, bindex, h_dentry);
341
342 out:
343         dput(parent);
344         return err;
345 }
346
347 /* ---------------------------------------------------------------------- */
348
349 /* subset of struct inode */
350 struct au_iattr {
351         unsigned long           i_ino;
352         /* unsigned int         i_nlink; */
353         uid_t                   i_uid;
354         gid_t                   i_gid;
355         u64                     i_version;
356 /*
357         loff_t                  i_size;
358         blkcnt_t                i_blocks;
359 */
360         umode_t                 i_mode;
361 };
362
363 static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
364 {
365         ia->i_ino = h_inode->i_ino;
366         /* ia->i_nlink = h_inode->i_nlink; */
367         ia->i_uid = h_inode->i_uid;
368         ia->i_gid = h_inode->i_gid;
369         ia->i_version = h_inode->i_version;
370 /*
371         ia->i_size = h_inode->i_size;
372         ia->i_blocks = h_inode->i_blocks;
373 */
374         ia->i_mode = (h_inode->i_mode & S_IFMT);
375 }
376
377 static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
378 {
379         return ia->i_ino != h_inode->i_ino
380                 /* || ia->i_nlink != h_inode->i_nlink */
381                 || ia->i_uid != h_inode->i_uid
382                 || ia->i_gid != h_inode->i_gid
383                 || ia->i_version != h_inode->i_version
384 /*
385                 || ia->i_size != h_inode->i_size
386                 || ia->i_blocks != h_inode->i_blocks
387 */
388                 || ia->i_mode != (h_inode->i_mode & S_IFMT);
389 }
390
391 static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
392                               struct au_branch *br)
393 {
394         int err;
395         struct au_iattr ia;
396         struct inode *h_inode;
397         struct dentry *h_d;
398         struct super_block *h_sb;
399
400         err = 0;
401         memset(&ia, -1, sizeof(ia));
402         h_sb = h_dentry->d_sb;
403         h_inode = h_dentry->d_inode;
404         if (h_inode)
405                 au_iattr_save(&ia, h_inode);
406         else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
407                 /* nfs d_revalidate may return 0 for negative dentry */
408                 /* fuse d_revalidate always return 0 for negative dentry */
409                 goto out;
410
411         /* main purpose is namei.c:cached_lookup() and d_revalidate */
412         h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
413         err = PTR_ERR(h_d);
414         if (IS_ERR(h_d))
415                 goto out;
416
417         err = 0;
418         if (unlikely(h_d != h_dentry
419                      || h_d->d_inode != h_inode
420                      || (h_inode && au_iattr_test(&ia, h_inode))))
421                 err = au_busy_or_stale();
422         dput(h_d);
423
424 out:
425         AuTraceErr(err);
426         return err;
427 }
428
429 int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
430                 struct dentry *h_parent, struct au_branch *br)
431 {
432         int err;
433
434         err = 0;
435         if (udba == AuOpt_UDBA_REVAL
436             && !au_test_fs_remote(h_dentry->d_sb)) {
437                 IMustLock(h_dir);
438                 err = (h_dentry->d_parent->d_inode != h_dir);
439         } else if (udba != AuOpt_UDBA_NONE)
440                 err = au_h_verify_dentry(h_dentry, h_parent, br);
441
442         return err;
443 }
444
445 /* ---------------------------------------------------------------------- */
446
447 static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
448 {
449         int err;
450         aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
451         struct au_hdentry tmp, *p, *q;
452         struct au_dinfo *dinfo;
453         struct super_block *sb;
454
455         DiMustWriteLock(dentry);
456
457         sb = dentry->d_sb;
458         dinfo = au_di(dentry);
459         bend = dinfo->di_bend;
460         bwh = dinfo->di_bwh;
461         bdiropq = dinfo->di_bdiropq;
462         p = dinfo->di_hdentry + dinfo->di_bstart;
463         for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
464                 if (!p->hd_dentry)
465                         continue;
466
467                 new_bindex = au_br_index(sb, p->hd_id);
468                 if (new_bindex == bindex)
469                         continue;
470
471                 if (dinfo->di_bwh == bindex)
472                         bwh = new_bindex;
473                 if (dinfo->di_bdiropq == bindex)
474                         bdiropq = new_bindex;
475                 if (new_bindex < 0) {
476                         au_hdput(p);
477                         p->hd_dentry = NULL;
478                         continue;
479                 }
480
481                 /* swap two lower dentries, and loop again */
482                 q = dinfo->di_hdentry + new_bindex;
483                 tmp = *q;
484                 *q = *p;
485                 *p = tmp;
486                 if (tmp.hd_dentry) {
487                         bindex--;
488                         p--;
489                 }
490         }
491
492         dinfo->di_bwh = -1;
493         if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
494                 dinfo->di_bwh = bwh;
495
496         dinfo->di_bdiropq = -1;
497         if (bdiropq >= 0
498             && bdiropq <= au_sbend(sb)
499             && au_sbr_whable(sb, bdiropq))
500                 dinfo->di_bdiropq = bdiropq;
501
502         err = -EIO;
503         dinfo->di_bstart = -1;
504         dinfo->di_bend = -1;
505         bend = au_dbend(parent);
506         p = dinfo->di_hdentry;
507         for (bindex = 0; bindex <= bend; bindex++, p++)
508                 if (p->hd_dentry) {
509                         dinfo->di_bstart = bindex;
510                         break;
511                 }
512
513         if (dinfo->di_bstart >= 0) {
514                 p = dinfo->di_hdentry + bend;
515                 for (bindex = bend; bindex >= 0; bindex--, p--)
516                         if (p->hd_dentry) {
517                                 dinfo->di_bend = bindex;
518                                 err = 0;
519                                 break;
520                         }
521         }
522
523         return err;
524 }
525
526 static void au_do_hide(struct dentry *dentry)
527 {
528         struct inode *inode;
529
530         inode = dentry->d_inode;
531         if (inode) {
532                 if (!S_ISDIR(inode->i_mode)) {
533                         if (inode->i_nlink && !d_unhashed(dentry))
534                                 drop_nlink(inode);
535                 } else {
536                         clear_nlink(inode);
537                         /* stop next lookup */
538                         inode->i_flags |= S_DEAD;
539                 }
540                 smp_mb(); /* necessary? */
541         }
542         d_drop(dentry);
543 }
544
545 static int au_hide_children(struct dentry *parent)
546 {
547         int err, i, j, ndentry;
548         struct au_dcsub_pages dpages;
549         struct au_dpage *dpage;
550         struct dentry *dentry;
551
552         err = au_dpages_init(&dpages, GFP_NOFS);
553         if (unlikely(err))
554                 goto out;
555         err = au_dcsub_pages(&dpages, parent, NULL, NULL);
556         if (unlikely(err))
557                 goto out_dpages;
558
559         /* in reverse order */
560         for (i = dpages.ndpage - 1; i >= 0; i--) {
561                 dpage = dpages.dpages + i;
562                 ndentry = dpage->ndentry;
563                 for (j = ndentry - 1; j >= 0; j--) {
564                         dentry = dpage->dentries[j];
565                         if (dentry != parent)
566                                 au_do_hide(dentry);
567                 }
568         }
569
570 out_dpages:
571         au_dpages_free(&dpages);
572 out:
573         return err;
574 }
575
576 static void au_hide(struct dentry *dentry)
577 {
578         int err;
579         struct inode *inode;
580
581         AuDbgDentry(dentry);
582         inode = dentry->d_inode;
583         if (inode && S_ISDIR(inode->i_mode)) {
584                 /* shrink_dcache_parent(dentry); */
585                 err = au_hide_children(dentry);
586                 if (unlikely(err))
587                         AuIOErr("%.*s, failed hiding children, ignored %d\n",
588                                 AuDLNPair(dentry), err);
589         }
590         au_do_hide(dentry);
591 }
592
593 /*
594  * By adding a dirty branch, a cached dentry may be affected in various ways.
595  *
596  * a dirty branch is added
597  * - on the top of layers
598  * - in the middle of layers
599  * - to the bottom of layers
600  *
601  * on the added branch there exists
602  * - a whiteout
603  * - a diropq
604  * - a same named entry
605  *   + exist
606  *     * negative --> positive
607  *     * positive --> positive
608  *       - type is unchanged
609  *       - type is changed
610  *   + doesn't exist
611  *     * negative --> negative
612  *     * positive --> negative (rejected by au_br_del() for non-dir case)
613  * - none
614  */
615 static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
616                                struct au_dinfo *tmp)
617 {
618         int err;
619         aufs_bindex_t bindex, bend;
620         struct {
621                 struct dentry *dentry;
622                 struct inode *inode;
623                 mode_t mode;
624         } orig_h, tmp_h;
625         struct au_hdentry *hd;
626         struct inode *inode, *h_inode;
627         struct dentry *h_dentry;
628
629         err = 0;
630         AuDebugOn(dinfo->di_bstart < 0);
631         orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
632         orig_h.inode = orig_h.dentry->d_inode;
633         orig_h.mode = 0;
634         if (orig_h.inode)
635                 orig_h.mode = orig_h.inode->i_mode & S_IFMT;
636         memset(&tmp_h, 0, sizeof(tmp_h));
637         if (tmp->di_bstart >= 0) {
638                 tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
639                 tmp_h.inode = tmp_h.dentry->d_inode;
640                 if (tmp_h.inode)
641                         tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
642         }
643
644         inode = dentry->d_inode;
645         if (!orig_h.inode) {
646                 AuDbg("nagative originally\n");
647                 if (inode) {
648                         au_hide(dentry);
649                         goto out;
650                 }
651                 AuDebugOn(inode);
652                 AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
653                 AuDebugOn(dinfo->di_bdiropq != -1);
654
655                 if (!tmp_h.inode) {
656                         AuDbg("negative --> negative\n");
657                         /* should have only one negative lower */
658                         if (tmp->di_bstart >= 0
659                             && tmp->di_bstart < dinfo->di_bstart) {
660                                 AuDebugOn(tmp->di_bstart != tmp->di_bend);
661                                 AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
662                                 au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
663                                 au_di_cp(dinfo, tmp);
664                                 hd = tmp->di_hdentry + tmp->di_bstart;
665                                 au_set_h_dptr(dentry, tmp->di_bstart,
666                                               dget(hd->hd_dentry));
667                         }
668                         au_dbg_verify_dinode(dentry);
669                 } else {
670                         AuDbg("negative --> positive\n");
671                         /*
672                          * similar to the behaviour of creating with bypassing
673                          * aufs.
674                          * unhash it in order to force an error in the
675                          * succeeding create operation.
676                          * we should not set S_DEAD here.
677                          */
678                         d_drop(dentry);
679                         /* au_di_swap(tmp, dinfo); */
680                         au_dbg_verify_dinode(dentry);
681                 }
682         } else {
683                 AuDbg("positive originally\n");
684                 /* inode may be NULL */
685                 AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
686                 if (!tmp_h.inode) {
687                         AuDbg("positive --> negative\n");
688                         /* or bypassing aufs */
689                         au_hide(dentry);
690                         if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
691                                 dinfo->di_bwh = tmp->di_bwh;
692                         if (inode)
693                                 err = au_refresh_hinode_self(inode);
694                         au_dbg_verify_dinode(dentry);
695                 } else if (orig_h.mode == tmp_h.mode) {
696                         AuDbg("positive --> positive, same type\n");
697                         if (!S_ISDIR(orig_h.mode)
698                             && dinfo->di_bstart > tmp->di_bstart) {
699                                 /*
700                                  * similar to the behaviour of removing and
701                                  * creating.
702                                  */
703                                 au_hide(dentry);
704                                 if (inode)
705                                         err = au_refresh_hinode_self(inode);
706                                 au_dbg_verify_dinode(dentry);
707                         } else {
708                                 /* fill empty slots */
709                                 if (dinfo->di_bstart > tmp->di_bstart)
710                                         dinfo->di_bstart = tmp->di_bstart;
711                                 if (dinfo->di_bend < tmp->di_bend)
712                                         dinfo->di_bend = tmp->di_bend;
713                                 dinfo->di_bwh = tmp->di_bwh;
714                                 dinfo->di_bdiropq = tmp->di_bdiropq;
715                                 hd = tmp->di_hdentry;
716                                 bend = dinfo->di_bend;
717                                 for (bindex = tmp->di_bstart; bindex <= bend;
718                                      bindex++) {
719                                         if (au_h_dptr(dentry, bindex))
720                                                 continue;
721                                         h_dentry = hd[bindex].hd_dentry;
722                                         if (!h_dentry)
723                                                 continue;
724                                         h_inode = h_dentry->d_inode;
725                                         AuDebugOn(!h_inode);
726                                         AuDebugOn(orig_h.mode
727                                                   != (h_inode->i_mode
728                                                       & S_IFMT));
729                                         au_set_h_dptr(dentry, bindex,
730                                                       dget(h_dentry));
731                                 }
732                                 err = au_refresh_hinode(inode, dentry);
733                                 au_dbg_verify_dinode(dentry);
734                         }
735                 } else {
736                         AuDbg("positive --> positive, different type\n");
737                         /* similar to the behaviour of removing and creating */
738                         au_hide(dentry);
739                         if (inode)
740                                 err = au_refresh_hinode_self(inode);
741                         au_dbg_verify_dinode(dentry);
742                 }
743         }
744
745 out:
746         return err;
747 }
748
749 int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
750 {
751         int err, ebrange;
752         unsigned int sigen;
753         struct au_dinfo *dinfo, *tmp;
754         struct super_block *sb;
755         struct inode *inode;
756
757         DiMustWriteLock(dentry);
758         AuDebugOn(IS_ROOT(dentry));
759         AuDebugOn(!parent->d_inode);
760
761         sb = dentry->d_sb;
762         inode = dentry->d_inode;
763         sigen = au_sigen(sb);
764         err = au_digen_test(parent, sigen);
765         if (unlikely(err))
766                 goto out;
767
768         dinfo = au_di(dentry);
769         err = au_di_realloc(dinfo, au_sbend(sb) + 1);
770         if (unlikely(err))
771                 goto out;
772         ebrange = au_dbrange_test(dentry);
773         if (!ebrange)
774                 ebrange = au_do_refresh_hdentry(dentry, parent);
775
776         if (d_unhashed(dentry) || ebrange) {
777                 AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
778                 if (inode)
779                         err = au_refresh_hinode_self(inode);
780                 au_dbg_verify_dinode(dentry);
781                 if (!err)
782                         goto out_dgen; /* success */
783                 goto out;
784         }
785
786         /* temporary dinfo */
787         AuDbgDentry(dentry);
788         err = -ENOMEM;
789         tmp = au_di_alloc(sb, AuLsc_DI_TMP);
790         if (unlikely(!tmp))
791                 goto out;
792         au_di_swap(tmp, dinfo);
793         /* returns the number of positive dentries */
794         /*
795          * if current working dir is removed, it returns an error.
796          * but the dentry is legal.
797          */
798         err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
799         AuDbgDentry(dentry);
800         au_di_swap(tmp, dinfo);
801         if (err == -ENOENT)
802                 err = 0;
803         if (err >= 0) {
804                 /* compare/refresh by dinfo */
805                 AuDbgDentry(dentry);
806                 err = au_refresh_by_dinfo(dentry, dinfo, tmp);
807                 au_dbg_verify_dinode(dentry);
808                 AuTraceErr(err);
809         }
810         au_rw_write_unlock(&tmp->di_rwsem);
811         au_di_free(tmp);
812         if (unlikely(err))
813                 goto out;
814
815 out_dgen:
816         au_update_digen(dentry);
817 out:
818         if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
819                 AuIOErr("failed refreshing %.*s, %d\n",
820                         AuDLNPair(dentry), err);
821                 AuDbgDentry(dentry);
822         }
823         AuTraceErr(err);
824         return err;
825 }
826
827 static noinline_for_stack
828 int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
829                     struct dentry *dentry, aufs_bindex_t bindex)
830 {
831         int err, valid;
832         int (*reval)(struct dentry *, struct nameidata *);
833
834         err = 0;
835         if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
836                 goto out;
837         reval = h_dentry->d_op->d_revalidate;
838
839         AuDbg("b%d\n", bindex);
840         if (au_test_fs_null_nd(h_dentry->d_sb))
841                 /* it may return tri-state */
842                 valid = reval(h_dentry, NULL);
843         else {
844                 struct nameidata h_nd;
845                 int locked;
846                 struct dentry *parent;
847
848                 au_h_nd(&h_nd, nd);
849                 parent = nd->path.dentry;
850                 locked = (nd && nd->path.dentry != dentry);
851                 if (locked)
852                         di_read_lock_parent(parent, AuLock_IR);
853                 BUG_ON(bindex > au_dbend(parent));
854                 h_nd.path.dentry = au_h_dptr(parent, bindex);
855                 BUG_ON(!h_nd.path.dentry);
856                 h_nd.path.mnt = au_sbr_mnt(parent->d_sb, bindex);
857                 path_get(&h_nd.path);
858                 valid = reval(h_dentry, &h_nd);
859                 path_put(&h_nd.path);
860                 if (locked)
861                         di_read_unlock(parent, AuLock_IR);
862         }
863
864         if (unlikely(valid < 0))
865                 err = valid;
866         else if (!valid)
867                 err = -EINVAL;
868
869 out:
870         AuTraceErr(err);
871         return err;
872 }
873
874 /* todo: remove this */
875 static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
876                           struct nameidata *nd, int do_udba)
877 {
878         int err;
879         umode_t mode, h_mode;
880         aufs_bindex_t bindex, btail, bstart, ibs, ibe;
881         unsigned char plus, unhashed, is_root, h_plus;
882         struct inode *h_inode, *h_cached_inode;
883         struct dentry *h_dentry;
884         struct qstr *name, *h_name;
885
886         err = 0;
887         plus = 0;
888         mode = 0;
889         ibs = -1;
890         ibe = -1;
891         unhashed = !!d_unhashed(dentry);
892         is_root = !!IS_ROOT(dentry);
893         name = &dentry->d_name;
894
895         /*
896          * Theoretically, REVAL test should be unnecessary in case of
897          * {FS,I}NOTIFY.
898          * But {fs,i}notify doesn't fire some necessary events,
899          *      IN_ATTRIB for atime/nlink/pageio
900          *      IN_DELETE for NFS dentry
901          * Let's do REVAL test too.
902          */
903         if (do_udba && inode) {
904                 mode = (inode->i_mode & S_IFMT);
905                 plus = (inode->i_nlink > 0);
906                 ibs = au_ibstart(inode);
907                 ibe = au_ibend(inode);
908         }
909
910         bstart = au_dbstart(dentry);
911         btail = bstart;
912         if (inode && S_ISDIR(inode->i_mode))
913                 btail = au_dbtaildir(dentry);
914         for (bindex = bstart; bindex <= btail; bindex++) {
915                 h_dentry = au_h_dptr(dentry, bindex);
916                 if (!h_dentry)
917                         continue;
918
919                 AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
920                 spin_lock(&h_dentry->d_lock);
921                 h_name = &h_dentry->d_name;
922                 if (unlikely(do_udba
923                              && !is_root
924                              && (unhashed != !!d_unhashed(h_dentry)
925                                  || name->len != h_name->len
926                                  || memcmp(name->name, h_name->name, name->len))
927                             )) {
928                         AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
929                                   unhashed, d_unhashed(h_dentry),
930                                   AuDLNPair(dentry), AuDLNPair(h_dentry));
931                         spin_unlock(&h_dentry->d_lock);
932                         goto err;
933                 }
934                 spin_unlock(&h_dentry->d_lock);
935
936                 err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
937                 if (unlikely(err))
938                         /* do not goto err, to keep the errno */
939                         break;
940
941                 /* todo: plink too? */
942                 if (!do_udba)
943                         continue;
944
945                 /* UDBA tests */
946                 h_inode = h_dentry->d_inode;
947                 if (unlikely(!!inode != !!h_inode))
948                         goto err;
949
950                 h_plus = plus;
951                 h_mode = mode;
952                 h_cached_inode = h_inode;
953                 if (h_inode) {
954                         h_mode = (h_inode->i_mode & S_IFMT);
955                         h_plus = (h_inode->i_nlink > 0);
956                 }
957                 if (inode && ibs <= bindex && bindex <= ibe)
958                         h_cached_inode = au_h_iptr(inode, bindex);
959
960                 if (unlikely(plus != h_plus
961                              || mode != h_mode
962                              || h_cached_inode != h_inode))
963                         goto err;
964                 continue;
965
966         err:
967                 err = -EINVAL;
968                 break;
969         }
970
971         return err;
972 }
973
974 /* todo: consolidate with do_refresh() and au_reval_for_attr() */
975 static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
976 {
977         int err;
978         struct dentry *parent;
979
980         if (!au_digen_test(dentry, sigen))
981                 return 0;
982
983         parent = dget_parent(dentry);
984         di_read_lock_parent(parent, AuLock_IR);
985         AuDebugOn(au_digen_test(parent, sigen));
986         au_dbg_verify_gen(parent, sigen);
987         err = au_refresh_dentry(dentry, parent);
988         di_read_unlock(parent, AuLock_IR);
989         dput(parent);
990         AuTraceErr(err);
991         return err;
992 }
993
994 int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
995 {
996         int err;
997         struct dentry *d, *parent;
998         struct inode *inode;
999
1000         if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1001                 return simple_reval_dpath(dentry, sigen);
1002
1003         /* slow loop, keep it simple and stupid */
1004         /* cf: au_cpup_dirs() */
1005         err = 0;
1006         parent = NULL;
1007         while (au_digen_test(dentry, sigen)) {
1008                 d = dentry;
1009                 while (1) {
1010                         dput(parent);
1011                         parent = dget_parent(d);
1012                         if (!au_digen_test(parent, sigen))
1013                                 break;
1014                         d = parent;
1015                 }
1016
1017                 inode = d->d_inode;
1018                 if (d != dentry)
1019                         di_write_lock_child2(d);
1020
1021                 /* someone might update our dentry while we were sleeping */
1022                 if (au_digen_test(d, sigen)) {
1023                         /*
1024                          * todo: consolidate with simple_reval_dpath(),
1025                          * do_refresh() and au_reval_for_attr().
1026                          */
1027                         di_read_lock_parent(parent, AuLock_IR);
1028                         err = au_refresh_dentry(d, parent);
1029                         di_read_unlock(parent, AuLock_IR);
1030                 }
1031
1032                 if (d != dentry)
1033                         di_write_unlock(d);
1034                 dput(parent);
1035                 if (unlikely(err))
1036                         break;
1037         }
1038
1039         return err;
1040 }
1041
1042 /*
1043  * if valid returns 1, otherwise 0.
1044  */
1045 static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
1046 {
1047         int valid, err;
1048         unsigned int sigen;
1049         unsigned char do_udba;
1050         struct super_block *sb;
1051         struct inode *inode;
1052
1053         /* todo: support rcu-walk? */
1054         if (nd && (nd->flags & LOOKUP_RCU))
1055                 return -ECHILD;
1056
1057         valid = 0;
1058         if (unlikely(!au_di(dentry)))
1059                 goto out;
1060
1061         inode = dentry->d_inode;
1062         if (inode && is_bad_inode(inode))
1063                 goto out;
1064
1065         valid = 1;
1066         sb = dentry->d_sb;
1067         /*
1068          * todo: very ugly
1069          * i_mutex of parent dir may be held,
1070          * but we should not return 'invalid' due to busy.
1071          */
1072         err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
1073         if (unlikely(err)) {
1074                 valid = err;
1075                 AuTraceErr(err);
1076                 goto out;
1077         }
1078         if (unlikely(au_dbrange_test(dentry))) {
1079                 err = -EINVAL;
1080                 AuTraceErr(err);
1081                 goto out_dgrade;
1082         }
1083
1084         sigen = au_sigen(sb);
1085         if (au_digen_test(dentry, sigen)) {
1086                 AuDebugOn(IS_ROOT(dentry));
1087                 err = au_reval_dpath(dentry, sigen);
1088                 if (unlikely(err)) {
1089                         AuTraceErr(err);
1090                         goto out_dgrade;
1091                 }
1092         }
1093         di_downgrade_lock(dentry, AuLock_IR);
1094
1095         err = -EINVAL;
1096         if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
1097                 goto out_inval;
1098
1099         do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
1100         if (do_udba && inode) {
1101                 aufs_bindex_t bstart = au_ibstart(inode);
1102                 struct inode *h_inode;
1103
1104                 if (bstart >= 0) {
1105                         h_inode = au_h_iptr(inode, bstart);
1106                         if (h_inode && au_test_higen(inode, h_inode))
1107                                 goto out_inval;
1108                 }
1109         }
1110
1111         err = h_d_revalidate(dentry, inode, nd, do_udba);
1112         if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1113                 err = -EIO;
1114                 AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
1115                       AuDLNPair(dentry), err);
1116         }
1117         goto out_inval;
1118
1119 out_dgrade:
1120         di_downgrade_lock(dentry, AuLock_IR);
1121 out_inval:
1122         aufs_read_unlock(dentry, AuLock_IR);
1123         AuTraceErr(err);
1124         valid = !err;
1125 out:
1126         if (!valid) {
1127                 AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
1128                 d_drop(dentry);
1129         }
1130         return valid;
1131 }
1132
1133 static void aufs_d_release(struct dentry *dentry)
1134 {
1135         if (au_di(dentry)) {
1136                 au_di_fin(dentry);
1137                 au_hn_di_reinit(dentry);
1138         }
1139 }
1140
1141 const struct dentry_operations aufs_dop = {
1142         .d_revalidate   = aufs_d_revalidate,
1143         .d_release      = aufs_d_release
1144 };