Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / fs / aufs / i_op.c
1 /*
2  * Copyright (C) 2005-2013 Junjiro R. Okajima
3  *
4  * This program, aufs is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 /*
20  * inode operations (except add/del/rename)
21  */
22
23 #include <linux/device_cgroup.h>
24 #include <linux/fs_stack.h>
25 #include <linux/namei.h>
26 #include <linux/security.h>
27 #include "aufs.h"
28
29 static int h_permission(struct inode *h_inode, int mask,
30                         struct vfsmount *h_mnt, int brperm)
31 {
32         int err;
33         const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
34
35         err = -EACCES;
36         if ((write_mask && IS_IMMUTABLE(h_inode))
37             || ((mask & MAY_EXEC)
38                 && S_ISREG(h_inode->i_mode)
39                 && ((h_mnt->mnt_flags & MNT_NOEXEC)
40                     || !(h_inode->i_mode & S_IXUGO))))
41                 goto out;
42
43         /*
44          * - skip the lower fs test in the case of write to ro branch.
45          * - nfs dir permission write check is optimized, but a policy for
46          *   link/rename requires a real check.
47          */
48         if ((write_mask && !au_br_writable(brperm))
49             || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
50                 && write_mask && !(mask & MAY_READ))
51             || !h_inode->i_op->permission) {
52                 /* AuLabel(generic_permission); */
53                 err = generic_permission(h_inode, mask);
54         } else {
55                 /* AuLabel(h_inode->permission); */
56                 err = h_inode->i_op->permission(h_inode, mask);
57                 AuTraceErr(err);
58         }
59
60         if (!err)
61                 err = devcgroup_inode_permission(h_inode, mask);
62         if (!err)
63                 err = security_inode_permission(h_inode, mask);
64
65 #if 0
66         if (!err) {
67                 /* todo: do we need to call ima_path_check()? */
68                 struct path h_path = {
69                         .dentry =
70                         .mnt    = h_mnt
71                 };
72                 err = ima_path_check(&h_path,
73                                      mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
74                                      IMA_COUNT_LEAVE);
75         }
76 #endif
77
78 out:
79         return err;
80 }
81
82 static int aufs_permission(struct inode *inode, int mask)
83 {
84         int err;
85         aufs_bindex_t bindex, bend;
86         const unsigned char isdir = !!S_ISDIR(inode->i_mode),
87                 write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
88         struct inode *h_inode;
89         struct super_block *sb;
90         struct au_branch *br;
91
92         /* todo: support rcu-walk? */
93         if (mask & MAY_NOT_BLOCK)
94                 return -ECHILD;
95
96         sb = inode->i_sb;
97         si_read_lock(sb, AuLock_FLUSH);
98         ii_read_lock_child(inode);
99 #if 0
100         err = au_iigen_test(inode, au_sigen(sb));
101         if (unlikely(err))
102                 goto out;
103 #endif
104
105         if (!isdir || write_mask) {
106                 err = au_busy_or_stale();
107                 h_inode = au_h_iptr(inode, au_ibstart(inode));
108                 if (unlikely(!h_inode
109                              || (h_inode->i_mode & S_IFMT)
110                              != (inode->i_mode & S_IFMT)))
111                         goto out;
112
113                 err = 0;
114                 bindex = au_ibstart(inode);
115                 br = au_sbr(sb, bindex);
116                 err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm);
117                 if (write_mask
118                     && !err
119                     && !special_file(h_inode->i_mode)) {
120                         /* test whether the upper writable branch exists */
121                         err = -EROFS;
122                         for (; bindex >= 0; bindex--)
123                                 if (!au_br_rdonly(au_sbr(sb, bindex))) {
124                                         err = 0;
125                                         break;
126                                 }
127                 }
128                 goto out;
129         }
130
131         /* non-write to dir */
132         err = 0;
133         bend = au_ibend(inode);
134         for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
135                 h_inode = au_h_iptr(inode, bindex);
136                 if (h_inode) {
137                         err = au_busy_or_stale();
138                         if (unlikely(!S_ISDIR(h_inode->i_mode)))
139                                 break;
140
141                         br = au_sbr(sb, bindex);
142                         err = h_permission(h_inode, mask, au_br_mnt(br),
143                                            br->br_perm);
144                 }
145         }
146
147 out:
148         ii_read_unlock(inode);
149         si_read_unlock(sb);
150         return err;
151 }
152
153 /* ---------------------------------------------------------------------- */
154
155 static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
156                                   struct nameidata *nd)
157 {
158         struct dentry *ret, *parent;
159         struct inode *inode;
160         struct super_block *sb;
161         int err, npositive;
162
163         IMustLock(dir);
164
165         /* todo: support rcu-walk? */
166         ret = ERR_PTR(-ECHILD);
167         if (nd && (nd->flags & LOOKUP_RCU))
168                 goto out;
169
170         ret = ERR_PTR(-ENAMETOOLONG);
171         if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
172                 goto out;
173
174         sb = dir->i_sb;
175         err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
176         ret = ERR_PTR(err);
177         if (unlikely(err))
178                 goto out;
179
180         err = au_di_init(dentry);
181         ret = ERR_PTR(err);
182         if (unlikely(err))
183                 goto out_si;
184
185         inode = NULL;
186         npositive = 0; /* suppress a warning */
187         parent = dentry->d_parent; /* dir inode is locked */
188         di_read_lock_parent(parent, AuLock_IR);
189         err = au_alive_dir(parent);
190         if (!err)
191                 err = au_digen_test(parent, au_sigen(sb));
192         if (!err) {
193                 npositive = au_lkup_dentry(dentry, au_dbstart(parent),
194                                            /*type*/0, nd);
195                 err = npositive;
196         }
197         di_read_unlock(parent, AuLock_IR);
198         ret = ERR_PTR(err);
199         if (unlikely(err < 0))
200                 goto out_unlock;
201
202         if (npositive) {
203                 inode = au_new_inode(dentry, /*must_new*/0);
204                 ret = (void *)inode;
205         }
206         if (IS_ERR(inode)) {
207                 inode = NULL;
208                 goto out_unlock;
209         }
210
211         ret = d_splice_alias(inode, dentry);
212 #if 0
213         if (unlikely(d_need_lookup(dentry))) {
214                 spin_lock(&dentry->d_lock);
215                 dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
216                 spin_unlock(&dentry->d_lock);
217         } else
218 #endif
219         if (unlikely(IS_ERR(ret) && inode)) {
220                 ii_write_unlock(inode);
221                 iput(inode);
222                 inode = NULL;
223         }
224
225 out_unlock:
226         di_write_unlock(dentry);
227         if (inode) {
228                 /* verbose coding for lock class name */
229                 if (unlikely(S_ISLNK(inode->i_mode)))
230                         au_rw_class(&au_di(dentry)->di_rwsem,
231                                     au_lc_key + AuLcSymlink_DIINFO);
232                 else if (unlikely(S_ISDIR(inode->i_mode)))
233                         au_rw_class(&au_di(dentry)->di_rwsem,
234                                     au_lc_key + AuLcDir_DIINFO);
235                 else /* likely */
236                         au_rw_class(&au_di(dentry)->di_rwsem,
237                                     au_lc_key + AuLcNonDir_DIINFO);
238         }
239 out_si:
240         si_read_unlock(sb);
241 out:
242         return ret;
243 }
244
245 /* ---------------------------------------------------------------------- */
246
247 static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
248                           const unsigned char add_entry, aufs_bindex_t bcpup,
249                           aufs_bindex_t bstart)
250 {
251         int err;
252         struct dentry *h_parent;
253         struct inode *h_dir;
254
255         if (add_entry)
256                 IMustLock(parent->d_inode);
257         else
258                 di_write_lock_parent(parent);
259
260         err = 0;
261         if (!au_h_dptr(parent, bcpup)) {
262                 if (bstart > bcpup)
263                         err = au_cpup_dirs(dentry, bcpup);
264                 else if (bstart < bcpup)
265                         err = au_cpdown_dirs(dentry, bcpup);
266                 else
267                         BUG();
268         }
269         if (!err && add_entry) {
270                 h_parent = au_h_dptr(parent, bcpup);
271                 h_dir = h_parent->d_inode;
272                 mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
273                 err = au_lkup_neg(dentry, bcpup,
274                                   au_ftest_wrdir(add_entry, TMP_WHENTRY));
275                 /* todo: no unlock here */
276                 mutex_unlock(&h_dir->i_mutex);
277
278                 AuDbg("bcpup %d\n", bcpup);
279                 if (!err) {
280                         if (!dentry->d_inode)
281                                 au_set_h_dptr(dentry, bstart, NULL);
282                         au_update_dbrange(dentry, /*do_put_zero*/0);
283                 }
284         }
285
286         if (!add_entry)
287                 di_write_unlock(parent);
288         if (!err)
289                 err = bcpup; /* success */
290
291         AuTraceErr(err);
292         return err;
293 }
294
295 /*
296  * decide the branch and the parent dir where we will create a new entry.
297  * returns new bindex or an error.
298  * copyup the parent dir if needed.
299  */
300 int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
301               struct au_wr_dir_args *args)
302 {
303         int err;
304         unsigned int flags;
305         aufs_bindex_t bcpup, bstart, src_bstart;
306         const unsigned char add_entry
307                 = au_ftest_wrdir(args->flags, ADD_ENTRY)
308                 | au_ftest_wrdir(args->flags, TMP_WHENTRY);
309         struct super_block *sb;
310         struct dentry *parent;
311         struct au_sbinfo *sbinfo;
312
313         sb = dentry->d_sb;
314         sbinfo = au_sbi(sb);
315         parent = dget_parent(dentry);
316         bstart = au_dbstart(dentry);
317         bcpup = bstart;
318         if (args->force_btgt < 0) {
319                 if (src_dentry) {
320                         src_bstart = au_dbstart(src_dentry);
321                         if (src_bstart < bstart)
322                                 bcpup = src_bstart;
323                 } else if (add_entry) {
324                         flags = 0;
325                         if (au_ftest_wrdir(args->flags, ISDIR))
326                                 au_fset_wbr(flags, DIR);
327                         err = AuWbrCreate(sbinfo, dentry, flags);
328                         bcpup = err;
329                 }
330
331                 if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
332                         if (add_entry)
333                                 err = AuWbrCopyup(sbinfo, dentry);
334                         else {
335                                 if (!IS_ROOT(dentry)) {
336                                         di_read_lock_parent(parent, !AuLock_IR);
337                                         err = AuWbrCopyup(sbinfo, dentry);
338                                         di_read_unlock(parent, !AuLock_IR);
339                                 } else
340                                         err = AuWbrCopyup(sbinfo, dentry);
341                         }
342                         bcpup = err;
343                         if (unlikely(err < 0))
344                                 goto out;
345                 }
346         } else {
347                 bcpup = args->force_btgt;
348                 AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
349         }
350
351         AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
352         err = bcpup;
353         if (bcpup == bstart)
354                 goto out; /* success */
355
356         /* copyup the new parent into the branch we process */
357         err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
358         if (err >= 0) {
359                 if (!dentry->d_inode) {
360                         au_set_h_dptr(dentry, bstart, NULL);
361                         au_set_dbstart(dentry, bcpup);
362                         au_set_dbend(dentry, bcpup);
363                 }
364                 AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
365         }
366
367 out:
368         dput(parent);
369         return err;
370 }
371
372 /* ---------------------------------------------------------------------- */
373
374 void au_pin_hdir_unlock(struct au_pin *p)
375 {
376         if (p->hdir)
377                 au_hn_imtx_unlock(p->hdir);
378 }
379
380 static int au_pin_hdir_lock(struct au_pin *p)
381 {
382         int err;
383
384         err = 0;
385         if (!p->hdir)
386                 goto out;
387
388         /* even if an error happens later, keep this lock */
389         au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
390
391         err = -EBUSY;
392         if (unlikely(p->hdir->hi_inode != p->h_parent->d_inode))
393                 goto out;
394
395         err = 0;
396         if (p->h_dentry)
397                 err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
398                                   p->h_parent, p->br);
399
400 out:
401         return err;
402 }
403
404 int au_pin_hdir_relock(struct au_pin *p)
405 {
406         int err, i;
407         struct inode *h_i;
408         struct dentry *h_d[] = {
409                 p->h_dentry,
410                 p->h_parent
411         };
412
413         err = au_pin_hdir_lock(p);
414         if (unlikely(err))
415                 goto out;
416
417         for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
418                 if (!h_d[i])
419                         continue;
420                 h_i = h_d[i]->d_inode;
421                 if (h_i)
422                         err = !h_i->i_nlink;
423         }
424
425 out:
426         return err;
427 }
428
429 void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
430 {
431 #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
432         p->hdir->hi_inode->i_mutex.owner = task;
433 #endif
434 }
435
436 void au_pin_hdir_acquire_nest(struct au_pin *p)
437 {
438         if (p->hdir) {
439                 mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
440                                    p->lsc_hi, 0, NULL, _RET_IP_);
441                 au_pin_hdir_set_owner(p, current);
442         }
443 }
444
445 void au_pin_hdir_release(struct au_pin *p)
446 {
447         if (p->hdir) {
448                 au_pin_hdir_set_owner(p, p->task);
449                 mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
450         }
451 }
452
453 struct dentry *au_pinned_h_parent(struct au_pin *pin)
454 {
455         if (pin && pin->parent)
456                 return au_h_dptr(pin->parent, pin->bindex);
457         return NULL;
458 }
459
460 void au_unpin(struct au_pin *p)
461 {
462         if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
463                 mnt_drop_write(p->h_mnt);
464         if (!p->hdir)
465                 return;
466
467         au_pin_hdir_unlock(p);
468         if (!au_ftest_pin(p->flags, DI_LOCKED))
469                 di_read_unlock(p->parent, AuLock_IR);
470         iput(p->hdir->hi_inode);
471         dput(p->parent);
472         p->parent = NULL;
473         p->hdir = NULL;
474         p->h_mnt = NULL;
475         /* do not clear p->task */
476 }
477
478 int au_do_pin(struct au_pin *p)
479 {
480         int err;
481         struct super_block *sb;
482         struct inode *h_dir;
483
484         err = 0;
485         sb = p->dentry->d_sb;
486         p->br = au_sbr(sb, p->bindex);
487         if (IS_ROOT(p->dentry)) {
488                 if (au_ftest_pin(p->flags, MNT_WRITE)) {
489                         p->h_mnt = au_br_mnt(p->br);
490                         err = mnt_want_write(p->h_mnt);
491                         if (unlikely(err)) {
492                                 au_fclr_pin(p->flags, MNT_WRITE);
493                                 goto out_err;
494                         }
495                 }
496                 goto out;
497         }
498
499         p->h_dentry = NULL;
500         if (p->bindex <= au_dbend(p->dentry))
501                 p->h_dentry = au_h_dptr(p->dentry, p->bindex);
502
503         p->parent = dget_parent(p->dentry);
504         if (!au_ftest_pin(p->flags, DI_LOCKED))
505                 di_read_lock(p->parent, AuLock_IR, p->lsc_di);
506
507         h_dir = NULL;
508         p->h_parent = au_h_dptr(p->parent, p->bindex);
509         p->hdir = au_hi(p->parent->d_inode, p->bindex);
510         if (p->hdir)
511                 h_dir = p->hdir->hi_inode;
512
513         /*
514          * udba case, or
515          * if DI_LOCKED is not set, then p->parent may be different
516          * and h_parent can be NULL.
517          */
518         if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
519                 err = -EBUSY;
520                 if (!au_ftest_pin(p->flags, DI_LOCKED))
521                         di_read_unlock(p->parent, AuLock_IR);
522                 dput(p->parent);
523                 p->parent = NULL;
524                 goto out_err;
525         }
526
527         au_igrab(h_dir);
528         err = au_pin_hdir_lock(p);
529         if (unlikely(err))
530                 goto out_unpin;
531
532         if (au_ftest_pin(p->flags, MNT_WRITE)) {
533                 p->h_mnt = au_br_mnt(p->br);
534                 err = mnt_want_write(p->h_mnt);
535                 if (unlikely(err)) {
536                         au_fclr_pin(p->flags, MNT_WRITE);
537                         goto out_unpin;
538                 }
539         }
540         goto out; /* success */
541
542 out_unpin:
543         au_unpin(p);
544 out_err:
545         pr_err("err %d\n", err);
546         err = au_busy_or_stale();
547 out:
548         return err;
549 }
550
551 void au_pin_init(struct au_pin *p, struct dentry *dentry,
552                  aufs_bindex_t bindex, int lsc_di, int lsc_hi,
553                  unsigned int udba, unsigned char flags)
554 {
555         p->dentry = dentry;
556         p->udba = udba;
557         p->lsc_di = lsc_di;
558         p->lsc_hi = lsc_hi;
559         p->flags = flags;
560         p->bindex = bindex;
561
562         p->parent = NULL;
563         p->hdir = NULL;
564         p->h_mnt = NULL;
565
566         p->h_dentry = NULL;
567         p->h_parent = NULL;
568         p->br = NULL;
569         p->task = current;
570 }
571
572 int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
573            unsigned int udba, unsigned char flags)
574 {
575         au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
576                     udba, flags);
577         return au_do_pin(pin);
578 }
579
580 /* ---------------------------------------------------------------------- */
581
582 /*
583  * ->setattr() and ->getattr() are called in various cases.
584  * chmod, stat: dentry is revalidated.
585  * fchmod, fstat: file and dentry are not revalidated, additionally they may be
586  *                unhashed.
587  * for ->setattr(), ia->ia_file is passed from ftruncate only.
588  */
589 /* todo: consolidate with do_refresh() and simple_reval_dpath() */
590 static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
591 {
592         int err;
593         struct inode *inode;
594         struct dentry *parent;
595
596         err = 0;
597         inode = dentry->d_inode;
598         if (au_digen_test(dentry, sigen)) {
599                 parent = dget_parent(dentry);
600                 di_read_lock_parent(parent, AuLock_IR);
601                 err = au_refresh_dentry(dentry, parent);
602                 di_read_unlock(parent, AuLock_IR);
603                 dput(parent);
604         }
605
606         AuTraceErr(err);
607         return err;
608 }
609
610 #define AuIcpup_DID_CPUP        1
611 #define au_ftest_icpup(flags, name)     ((flags) & AuIcpup_##name)
612 #define au_fset_icpup(flags, name) \
613         do { (flags) |= AuIcpup_##name; } while (0)
614 #define au_fclr_icpup(flags, name) \
615         do { (flags) &= ~AuIcpup_##name; } while (0)
616
617 struct au_icpup_args {
618         unsigned char flags;
619         unsigned char pin_flags;
620         aufs_bindex_t btgt;
621         unsigned int udba;
622         struct au_pin pin;
623         struct path h_path;
624         struct inode *h_inode;
625 };
626
627 static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
628                             struct au_icpup_args *a)
629 {
630         int err;
631         loff_t sz;
632         aufs_bindex_t bstart, ibstart;
633         struct dentry *hi_wh, *parent;
634         struct inode *inode;
635         struct au_wr_dir_args wr_dir_args = {
636                 .force_btgt     = -1,
637                 .flags          = 0
638         };
639
640         bstart = au_dbstart(dentry);
641         inode = dentry->d_inode;
642         if (S_ISDIR(inode->i_mode))
643                 au_fset_wrdir(wr_dir_args.flags, ISDIR);
644         /* plink or hi_wh() case */
645         ibstart = au_ibstart(inode);
646         if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
647                 wr_dir_args.force_btgt = ibstart;
648         err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
649         if (unlikely(err < 0))
650                 goto out;
651         a->btgt = err;
652         if (err != bstart)
653                 au_fset_icpup(a->flags, DID_CPUP);
654
655         err = 0;
656         a->pin_flags = AuPin_MNT_WRITE;
657         parent = NULL;
658         if (!IS_ROOT(dentry)) {
659                 au_fset_pin(a->pin_flags, DI_LOCKED);
660                 parent = dget_parent(dentry);
661                 di_write_lock_parent(parent);
662         }
663
664         err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
665         if (unlikely(err))
666                 goto out_parent;
667
668         a->h_path.dentry = au_h_dptr(dentry, bstart);
669         a->h_inode = a->h_path.dentry->d_inode;
670         mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
671         sz = -1;
672         if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
673                 sz = ia->ia_size;
674         mutex_unlock(&a->h_inode->i_mutex);
675
676         hi_wh = NULL;
677         if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
678                 hi_wh = au_hi_wh(inode, a->btgt);
679                 if (!hi_wh) {
680                         struct au_cp_generic cpg = {
681                                 .dentry = dentry,
682                                 .bdst   = a->btgt,
683                                 .bsrc   = -1,
684                                 .len    = sz,
685                                 .pin    = &a->pin
686                         };
687                         err = au_sio_cpup_wh(&cpg, /*file*/NULL);
688                         if (unlikely(err))
689                                 goto out_unlock;
690                         hi_wh = au_hi_wh(inode, a->btgt);
691                         /* todo: revalidate hi_wh? */
692                 }
693         }
694
695         if (parent) {
696                 au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
697                 di_downgrade_lock(parent, AuLock_IR);
698                 dput(parent);
699                 parent = NULL;
700         }
701         if (!au_ftest_icpup(a->flags, DID_CPUP))
702                 goto out; /* success */
703
704         if (!d_unhashed(dentry)) {
705                 struct au_cp_generic cpg = {
706                         .dentry = dentry,
707                         .bdst   = a->btgt,
708                         .bsrc   = bstart,
709                         .len    = sz,
710                         .pin    = &a->pin,
711                         .flags  = AuCpup_DTIME | AuCpup_HOPEN
712                 };
713                 err = au_sio_cpup_simple(&cpg);
714                 if (!err)
715                         a->h_path.dentry = au_h_dptr(dentry, a->btgt);
716         } else if (!hi_wh)
717                 a->h_path.dentry = au_h_dptr(dentry, a->btgt);
718         else
719                 a->h_path.dentry = hi_wh; /* do not dget here */
720
721 out_unlock:
722         a->h_inode = a->h_path.dentry->d_inode;
723         if (!err)
724                 goto out; /* success */
725         au_unpin(&a->pin);
726 out_parent:
727         if (parent) {
728                 di_write_unlock(parent);
729                 dput(parent);
730         }
731 out:
732         if (!err)
733                 mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
734         return err;
735 }
736
737 static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
738 {
739         int err;
740         struct inode *inode;
741         struct super_block *sb;
742         struct file *file;
743         struct au_icpup_args *a;
744
745         inode = dentry->d_inode;
746         IMustLock(inode);
747
748         err = -ENOMEM;
749         a = kzalloc(sizeof(*a), GFP_NOFS);
750         if (unlikely(!a))
751                 goto out;
752
753         if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
754                 ia->ia_valid &= ~ATTR_MODE;
755
756         file = NULL;
757         sb = dentry->d_sb;
758         err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
759         if (unlikely(err))
760                 goto out_kfree;
761
762         if (ia->ia_valid & ATTR_FILE) {
763                 /* currently ftruncate(2) only */
764                 AuDebugOn(!S_ISREG(inode->i_mode));
765                 file = ia->ia_file;
766                 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
767                 if (unlikely(err))
768                         goto out_si;
769                 ia->ia_file = au_hf_top(file);
770                 a->udba = AuOpt_UDBA_NONE;
771         } else {
772                 /* fchmod() doesn't pass ia_file */
773                 a->udba = au_opt_udba(sb);
774                 di_write_lock_child(dentry);
775                 /* no d_unlinked(), to set UDBA_NONE for root */
776                 if (d_unhashed(dentry))
777                         a->udba = AuOpt_UDBA_NONE;
778                 if (a->udba != AuOpt_UDBA_NONE) {
779                         AuDebugOn(IS_ROOT(dentry));
780                         err = au_reval_for_attr(dentry, au_sigen(sb));
781                         if (unlikely(err))
782                                 goto out_dentry;
783                 }
784         }
785
786         err = au_pin_and_icpup(dentry, ia, a);
787         if (unlikely(err < 0))
788                 goto out_dentry;
789         if (au_ftest_icpup(a->flags, DID_CPUP)) {
790                 ia->ia_file = NULL;
791                 ia->ia_valid &= ~ATTR_FILE;
792         }
793
794         a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
795         if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
796             == (ATTR_MODE | ATTR_CTIME)) {
797                 err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
798                                           ia->ia_mode);
799                 if (unlikely(err))
800                         goto out_unlock;
801         } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
802                    && (ia->ia_valid & ATTR_CTIME)) {
803                 err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
804                 if (unlikely(err))
805                         goto out_unlock;
806         }
807
808         if (ia->ia_valid & ATTR_SIZE) {
809                 struct file *f;
810
811                 if (ia->ia_size < i_size_read(inode))
812                         /* unmap only */
813                         truncate_setsize(inode, ia->ia_size);
814
815                 f = NULL;
816                 if (ia->ia_valid & ATTR_FILE)
817                         f = ia->ia_file;
818                 mutex_unlock(&a->h_inode->i_mutex);
819                 err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
820                 mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
821         } else
822                 err = vfsub_notify_change(&a->h_path, ia);
823         if (!err)
824                 au_cpup_attr_changeable(inode);
825
826 out_unlock:
827         mutex_unlock(&a->h_inode->i_mutex);
828         au_unpin(&a->pin);
829         if (unlikely(err))
830                 au_update_dbstart(dentry);
831 out_dentry:
832         di_write_unlock(dentry);
833         if (file) {
834                 fi_write_unlock(file);
835                 ia->ia_file = file;
836                 ia->ia_valid |= ATTR_FILE;
837         }
838 out_si:
839         si_read_unlock(sb);
840 out_kfree:
841         kfree(a);
842 out:
843         AuTraceErr(err);
844         return err;
845 }
846
847 static void au_refresh_iattr(struct inode *inode, struct kstat *st,
848                              unsigned int nlink)
849 {
850         unsigned int n;
851
852         inode->i_mode = st->mode;
853         inode->i_uid = st->uid;
854         inode->i_gid = st->gid;
855         inode->i_atime = st->atime;
856         inode->i_mtime = st->mtime;
857         inode->i_ctime = st->ctime;
858
859         au_cpup_attr_nlink(inode, /*force*/0);
860         if (S_ISDIR(inode->i_mode)) {
861                 n = inode->i_nlink;
862                 n -= nlink;
863                 n += st->nlink;
864                 smp_mb();
865                 set_nlink(inode, n);
866         }
867
868         spin_lock(&inode->i_lock);
869         inode->i_blocks = st->blocks;
870         i_size_write(inode, st->size);
871         spin_unlock(&inode->i_lock);
872 }
873
874 static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
875                         struct dentry *dentry, struct kstat *st)
876 {
877         int err;
878         unsigned int mnt_flags;
879         aufs_bindex_t bindex;
880         unsigned char udba_none, positive;
881         struct super_block *sb, *h_sb;
882         struct inode *inode;
883         struct vfsmount *h_mnt;
884         struct dentry *h_dentry;
885
886         sb = dentry->d_sb;
887         inode = dentry->d_inode;
888         err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
889         if (unlikely(err))
890                 goto out;
891         mnt_flags = au_mntflags(sb);
892         udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
893
894         /* support fstat(2) */
895         if (!d_unlinked(dentry) && !udba_none) {
896                 unsigned int sigen = au_sigen(sb);
897                 err = au_digen_test(dentry, sigen);
898                 if (!err) {
899                         di_read_lock_child(dentry, AuLock_IR);
900                         err = au_dbrange_test(dentry);
901                         if (unlikely(err))
902                                 goto out_unlock;
903                 } else {
904                         AuDebugOn(IS_ROOT(dentry));
905                         di_write_lock_child(dentry);
906                         err = au_dbrange_test(dentry);
907                         if (!err)
908                                 err = au_reval_for_attr(dentry, sigen);
909                         di_downgrade_lock(dentry, AuLock_IR);
910                         if (unlikely(err))
911                                 goto out_unlock;
912                 }
913         } else
914                 di_read_lock_child(dentry, AuLock_IR);
915
916         bindex = au_ibstart(inode);
917         h_mnt = au_sbr_mnt(sb, bindex);
918         h_sb = h_mnt->mnt_sb;
919         if (!au_test_fs_bad_iattr(h_sb) && udba_none)
920                 goto out_fill; /* success */
921
922         h_dentry = NULL;
923         if (au_dbstart(dentry) == bindex)
924                 h_dentry = dget(au_h_dptr(dentry, bindex));
925         else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
926                 h_dentry = au_plink_lkup(inode, bindex);
927                 if (IS_ERR(h_dentry))
928                         goto out_fill; /* pretending success */
929         }
930         /* illegally overlapped or something */
931         if (unlikely(!h_dentry))
932                 goto out_fill; /* pretending success */
933
934         positive = !!h_dentry->d_inode;
935         if (positive)
936                 err = vfs_getattr(h_mnt, h_dentry, st);
937         dput(h_dentry);
938         if (!err) {
939                 if (positive)
940                         au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
941                 goto out_fill; /* success */
942         }
943         AuTraceErr(err);
944         goto out_unlock;
945
946 out_fill:
947         generic_fillattr(inode, st);
948 out_unlock:
949         di_read_unlock(dentry, AuLock_IR);
950         si_read_unlock(sb);
951 out:
952         AuTraceErr(err);
953         return err;
954 }
955
956 /* ---------------------------------------------------------------------- */
957
958 static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
959                       int bufsiz)
960 {
961         int err;
962         struct super_block *sb;
963         struct dentry *h_dentry;
964
965         err = -EINVAL;
966         h_dentry = au_h_dptr(dentry, bindex);
967         if (unlikely(!h_dentry->d_inode->i_op->readlink))
968                 goto out;
969
970         err = security_inode_readlink(h_dentry);
971         if (unlikely(err))
972                 goto out;
973
974         sb = dentry->d_sb;
975         if (!au_test_ro(sb, bindex, dentry->d_inode)) {
976                 vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
977                 fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
978         }
979         err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
980
981 out:
982         return err;
983 }
984
985 static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
986 {
987         int err;
988
989         err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
990         if (unlikely(err))
991                 goto out;
992         err = au_d_hashed_positive(dentry);
993         if (!err)
994                 err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
995         aufs_read_unlock(dentry, AuLock_IR);
996
997 out:
998         return err;
999 }
1000
1001 static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
1002 {
1003         int err;
1004         mm_segment_t old_fs;
1005         union {
1006                 char *k;
1007                 char __user *u;
1008         } buf;
1009
1010         err = -ENOMEM;
1011         buf.k = __getname_gfp(GFP_NOFS);
1012         if (unlikely(!buf.k))
1013                 goto out;
1014
1015         err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
1016         if (unlikely(err))
1017                 goto out_name;
1018
1019         err = au_d_hashed_positive(dentry);
1020         if (!err) {
1021                 old_fs = get_fs();
1022                 set_fs(KERNEL_DS);
1023                 err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
1024                 set_fs(old_fs);
1025         }
1026         aufs_read_unlock(dentry, AuLock_IR);
1027
1028         if (err >= 0) {
1029                 buf.k[err] = 0;
1030                 /* will be freed by put_link */
1031                 nd_set_link(nd, buf.k);
1032                 return NULL; /* success */
1033         }
1034
1035 out_name:
1036         __putname(buf.k);
1037 out:
1038         path_put(&nd->path);
1039         AuTraceErr(err);
1040         return ERR_PTR(err);
1041 }
1042
1043 static void aufs_put_link(struct dentry *dentry __maybe_unused,
1044                           struct nameidata *nd, void *cookie __maybe_unused)
1045 {
1046         __putname(nd_get_link(nd));
1047 }
1048
1049 /* ---------------------------------------------------------------------- */
1050
1051 static void aufs_truncate_range(struct inode *inode __maybe_unused,
1052                                 loff_t start __maybe_unused,
1053                                 loff_t end __maybe_unused)
1054 {
1055         AuUnsupport();
1056 }
1057
1058 /* ---------------------------------------------------------------------- */
1059
1060 struct inode_operations aufs_symlink_iop = {
1061         .permission     = aufs_permission,
1062         .setattr        = aufs_setattr,
1063         .getattr        = aufs_getattr,
1064         .readlink       = aufs_readlink,
1065         .follow_link    = aufs_follow_link,
1066         .put_link       = aufs_put_link
1067 };
1068
1069 struct inode_operations aufs_dir_iop = {
1070         .create         = aufs_create,
1071         .lookup         = aufs_lookup,
1072         .link           = aufs_link,
1073         .unlink         = aufs_unlink,
1074         .symlink        = aufs_symlink,
1075         .mkdir          = aufs_mkdir,
1076         .rmdir          = aufs_rmdir,
1077         .mknod          = aufs_mknod,
1078         .rename         = aufs_rename,
1079
1080         .permission     = aufs_permission,
1081         .setattr        = aufs_setattr,
1082         .getattr        = aufs_getattr
1083 };
1084
1085 struct inode_operations aufs_iop = {
1086         .permission     = aufs_permission,
1087         .setattr        = aufs_setattr,
1088         .getattr        = aufs_getattr,
1089         .truncate_range = aufs_truncate_range
1090 };