2 * Copyright (C) 2005-2013 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <linux/exportfs.h>
24 #include <linux/mnt_namespace.h>
25 #include <linux/namei.h>
26 #include <linux/nsproxy.h>
27 #include <linux/random.h>
28 #include <linux/writeback.h>
32 #ifdef CONFIG_AUFS_INO_T_64
40 static ino_t decode_ino(__u32 *a)
44 BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
46 #ifdef CONFIG_AUFS_INO_T_64
52 static void encode_ino(__u32 *a, ino_t ino)
58 #ifdef CONFIG_AUFS_INO_T_64
67 #ifdef CONFIG_AUFS_INO_T_64
68 /* support 64bit inode number */
82 Fh_dir_ino = Fh_dir_ino1
85 static int au_test_anon(struct dentry *dentry)
87 /* note: read d_flags without d_lock */
88 return !!(dentry->d_flags & DCACHE_DISCONNECTED);
91 int au_test_nfsd(void)
94 struct task_struct *tsk = current;
95 char comm[sizeof(tsk->comm)];
98 if (tsk->flags & PF_KTHREAD) {
99 get_task_comm(comm, tsk);
100 ret = !strcmp(comm, "nfsd");
106 /* ---------------------------------------------------------------------- */
107 /* inode generation external table */
109 void au_xigen_inc(struct inode *inode)
114 struct super_block *sb;
115 struct au_sbinfo *sbinfo;
118 AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
123 igen = inode->i_generation + 1;
124 sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
126 if (sz == sizeof(igen))
127 return; /* success */
129 if (unlikely(sz >= 0))
130 AuIOErr("xigen error (%zd)\n", sz);
133 int au_xigen_new(struct inode *inode)
138 struct super_block *sb;
139 struct au_sbinfo *sbinfo;
143 /* todo: dirty, at mount time */
144 if (inode->i_ino == AUFS_ROOT_INO)
148 if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
153 if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
154 AuIOErr1("too large i%lld\n", pos);
157 pos *= sizeof(inode->i_generation);
161 file = sbinfo->si_xigen;
164 if (i_size_read(file->f_dentry->d_inode)
165 < pos + sizeof(inode->i_generation)) {
166 inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
167 sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
168 sizeof(inode->i_generation), &pos);
170 sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
171 sizeof(inode->i_generation), &pos);
172 if (sz == sizeof(inode->i_generation))
173 goto out; /* success */
176 if (unlikely(sz >= 0)) {
178 AuIOErr("xigen error (%zd)\n", sz);
185 int au_xigen_set(struct super_block *sb, struct file *base)
188 struct au_sbinfo *sbinfo;
194 file = au_xino_create2(base, sbinfo->si_xigen);
199 if (sbinfo->si_xigen)
200 fput(sbinfo->si_xigen);
201 sbinfo->si_xigen = file;
207 void au_xigen_clr(struct super_block *sb)
209 struct au_sbinfo *sbinfo;
214 if (sbinfo->si_xigen) {
215 fput(sbinfo->si_xigen);
216 sbinfo->si_xigen = NULL;
220 /* ---------------------------------------------------------------------- */
222 static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
225 struct dentry *dentry, *d;
230 inode = ilookup(sb, ino);
234 dentry = ERR_PTR(-ESTALE);
235 sigen = au_sigen(sb);
236 if (unlikely(is_bad_inode(inode)
238 || sigen != au_iigen(inode, NULL)))
242 if (!dir_ino || S_ISDIR(inode->i_mode))
243 dentry = d_find_alias(inode);
245 spin_lock(&inode->i_lock);
246 list_for_each_entry(d, &inode->i_dentry, d_alias) {
247 spin_lock(&d->d_lock);
249 && d->d_parent->d_inode->i_ino == dir_ino) {
250 dentry = dget_dlock(d);
251 spin_unlock(&d->d_lock);
254 spin_unlock(&d->d_lock);
256 spin_unlock(&inode->i_lock);
258 if (unlikely(dentry && au_digen_test(dentry, sigen))) {
259 /* need to refresh */
267 AuTraceErrPtr(dentry);
271 /* ---------------------------------------------------------------------- */
274 /* if exportfs_decode_fh() passed vfsmount*, we could be happy */
276 struct au_compare_mnt_args {
278 struct super_block *sb;
281 struct vfsmount *mnt;
284 static int au_compare_mnt(struct vfsmount *mnt, void *arg)
286 struct au_compare_mnt_args *a = arg;
288 if (mnt->mnt_sb != a->sb)
290 a->mnt = mntget(mnt);
294 static struct vfsmount *au_mnt_get(struct super_block *sb)
297 struct au_compare_mnt_args args = {
300 struct mnt_namespace *ns;
302 br_read_lock(vfsmount_lock);
304 AuDebugOn(!current->nsproxy);
305 ns = current->nsproxy->mnt_ns;
307 err = iterate_mounts(au_compare_mnt, &args, ns->root);
308 br_read_unlock(vfsmount_lock);
310 AuDebugOn(!args.mnt);
314 struct au_nfsd_si_lock {
316 aufs_bindex_t bindex, br_id;
317 unsigned char force_lock;
320 static int si_nfsd_read_lock(struct super_block *sb,
321 struct au_nfsd_si_lock *nsi_lock)
324 aufs_bindex_t bindex;
326 si_read_lock(sb, AuLock_FLUSH);
328 /* branch id may be wrapped around */
330 bindex = au_br_index(sb, nsi_lock->br_id);
331 if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
332 goto out; /* success */
336 if (!nsi_lock->force_lock)
340 nsi_lock->bindex = bindex;
344 struct find_name_by_ino {
352 find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
353 u64 ino, unsigned int d_type)
355 struct find_name_by_ino *a = arg;
361 memcpy(a->name, name, namelen);
362 a->namelen = namelen;
367 static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
368 struct au_nfsd_si_lock *nsi_lock)
370 struct dentry *dentry, *parent;
373 struct find_name_by_ino arg;
376 parent = path->dentry;
378 si_read_unlock(parent->d_sb);
379 file = vfsub_dentry_open(path, au_dir_roflags);
380 dentry = (void *)file;
384 dentry = ERR_PTR(-ENOMEM);
385 arg.name = __getname_gfp(GFP_NOFS);
386 if (unlikely(!arg.name))
393 err = vfsub_readdir(file, find_name_by_ino, &arg);
394 } while (!err && !arg.found && arg.called);
395 dentry = ERR_PTR(err);
398 /* instead of ENOENT */
399 dentry = ERR_PTR(-ESTALE);
403 /* do not call au_lkup_one() */
404 dir = parent->d_inode;
405 mutex_lock(&dir->i_mutex);
406 dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
407 mutex_unlock(&dir->i_mutex);
408 AuTraceErrPtr(dentry);
411 AuDebugOn(au_test_anon(dentry));
412 if (unlikely(!dentry->d_inode)) {
414 dentry = ERR_PTR(-ENOENT);
422 if (unlikely(nsi_lock
423 && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
424 if (!IS_ERR(dentry)) {
426 dentry = ERR_PTR(-ESTALE);
428 AuTraceErrPtr(dentry);
432 static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
434 struct au_nfsd_si_lock *nsi_lock)
436 struct dentry *dentry;
439 if (dir_ino != AUFS_ROOT_INO) {
440 path.dentry = decode_by_ino(sb, dir_ino, 0);
441 dentry = path.dentry;
442 if (!path.dentry || IS_ERR(path.dentry))
444 AuDebugOn(au_test_anon(path.dentry));
446 path.dentry = dget(sb->s_root);
448 path.mnt = au_mnt_get(sb);
449 dentry = au_lkup_by_ino(&path, ino, nsi_lock);
453 AuTraceErrPtr(dentry);
457 /* ---------------------------------------------------------------------- */
459 static int h_acceptable(void *expv, struct dentry *dentry)
464 static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
465 char *buf, int len, struct super_block *sb)
471 p = d_path(h_rootpath, buf, len);
476 path.mnt = h_rootpath->mnt;
477 path.dentry = h_parent;
478 p = d_path(&path, buf, len);
484 path.mnt = au_mnt_get(sb);
485 path.dentry = sb->s_root;
486 p = d_path(&path, buf, len - strlen(p));
499 struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
500 int fh_len, struct au_nfsd_si_lock *nsi_lock)
502 struct dentry *dentry, *h_parent, *root;
503 struct super_block *h_sb;
505 struct vfsmount *h_mnt;
506 struct au_branch *br;
510 br = au_sbr(sb, nsi_lock->bindex);
511 h_mnt = au_br_mnt(br);
512 h_sb = h_mnt->mnt_sb;
513 /* todo: call lower fh_to_dentry()? fh_to_parent()? */
514 h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
515 fh_len - Fh_tail, fh[Fh_h_type],
516 h_acceptable, /*context*/NULL);
518 if (unlikely(!h_parent || IS_ERR(h_parent))) {
519 AuWarn1("%s decode_fh failed, %ld\n",
520 au_sbtype(h_sb), PTR_ERR(h_parent));
524 if (unlikely(au_test_anon(h_parent))) {
525 AuWarn1("%s decode_fh returned a disconnected dentry\n",
530 dentry = ERR_PTR(-ENOMEM);
531 pathname = (void *)__get_free_page(GFP_NOFS);
532 if (unlikely(!pathname))
537 di_read_lock_parent(root, !AuLock_IR);
538 path.dentry = au_h_dptr(root, nsi_lock->bindex);
539 di_read_unlock(root, !AuLock_IR);
540 p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
546 err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
547 dentry = ERR_PTR(err);
551 dentry = ERR_PTR(-ENOENT);
552 AuDebugOn(au_test_anon(path.dentry));
553 if (unlikely(!path.dentry->d_inode))
556 if (ino != path.dentry->d_inode->i_ino)
557 dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
559 dentry = dget(path.dentry);
564 if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
565 if (!IS_ERR(dentry)) {
567 dentry = ERR_PTR(-ESTALE);
570 free_page((unsigned long)pathname);
574 AuTraceErrPtr(dentry);
578 /* ---------------------------------------------------------------------- */
580 static struct dentry *
581 aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
584 struct dentry *dentry;
585 __u32 *fh = fid->raw;
586 struct au_branch *br;
588 struct au_nfsd_si_lock nsi_lock = {
592 dentry = ERR_PTR(-ESTALE);
593 /* it should never happen, but the file handle is unreliable */
594 if (unlikely(fh_len < Fh_tail))
596 nsi_lock.sigen = fh[Fh_sigen];
597 nsi_lock.br_id = fh[Fh_br_id];
599 /* branch id may be wrapped around */
601 if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
603 nsi_lock.force_lock = 1;
605 /* is this inode still cached? */
606 ino = decode_ino(fh + Fh_ino);
607 /* it should never happen */
608 if (unlikely(ino == AUFS_ROOT_INO))
611 dir_ino = decode_ino(fh + Fh_dir_ino);
612 dentry = decode_by_ino(sb, ino, dir_ino);
618 /* is the parent dir cached? */
619 br = au_sbr(sb, nsi_lock.bindex);
620 atomic_inc(&br->br_count);
621 dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
628 dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
631 if (unlikely(!dentry))
632 /* todo?: make it ESTALE */
636 if (!au_digen_test(dentry, au_sigen(sb))
637 && dentry->d_inode->i_generation == fh[Fh_igen])
638 goto out_unlock; /* success */
641 dentry = ERR_PTR(-ESTALE);
644 atomic_dec(&br->br_count);
647 AuTraceErrPtr(dentry);
651 #if 0 /* reserved for future use */
652 /* support subtreecheck option */
653 static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
654 int fh_len, int fh_type)
656 struct dentry *parent;
657 __u32 *fh = fid->raw;
660 dir_ino = decode_ino(fh + Fh_dir_ino);
661 parent = decode_by_ino(sb, dir_ino, 0);
665 parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
666 dir_ino, fh, fh_len);
669 AuTraceErrPtr(parent);
674 /* ---------------------------------------------------------------------- */
676 static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
680 aufs_bindex_t bindex, bend;
681 struct super_block *sb, *h_sb;
683 struct dentry *parent, *h_parent;
684 struct au_branch *br;
686 AuDebugOn(au_test_anon(dentry));
690 if (unlikely(*max_len <= Fh_tail)) {
691 AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
696 if (IS_ROOT(dentry)) {
697 AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
702 err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
706 inode = dentry->d_inode;
709 #ifdef CONFIG_AUFS_DEBUG
710 if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
711 AuWarn1("NFS-exporting requires xino\n");
714 parent = dget_parent(dentry);
715 di_read_lock_parent(parent, !AuLock_IR);
716 bend = au_dbtaildir(parent);
717 for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
718 h_parent = au_h_dptr(parent, bindex);
724 if (unlikely(!h_parent))
728 br = au_sbr(sb, bindex);
730 if (unlikely(!h_sb->s_export_op)) {
731 AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
735 fh[Fh_br_id] = br->br_id;
736 fh[Fh_sigen] = au_sigen(sb);
737 encode_ino(fh + Fh_ino, inode->i_ino);
738 encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
739 fh[Fh_igen] = inode->i_generation;
742 fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
744 /*connectable or subtreecheck*/0);
751 AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
756 di_read_unlock(parent, !AuLock_IR);
758 aufs_read_unlock(dentry, AuLock_IR);
760 if (unlikely(err < 0))
765 /* ---------------------------------------------------------------------- */
767 static int aufs_commit_metadata(struct inode *inode)
770 aufs_bindex_t bindex;
771 struct super_block *sb;
772 struct inode *h_inode;
773 int (*f)(struct inode *inode);
776 si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
777 ii_write_lock_child(inode);
778 bindex = au_ibstart(inode);
779 AuDebugOn(bindex < 0);
780 h_inode = au_h_iptr(inode, bindex);
782 f = h_inode->i_sb->s_export_op->commit_metadata;
786 struct writeback_control wbc = {
787 .sync_mode = WB_SYNC_ALL,
788 .nr_to_write = 0 /* metadata only */
791 err = sync_inode(h_inode, &wbc);
794 au_cpup_attr_timesizes(inode);
795 ii_write_unlock(inode);
800 /* ---------------------------------------------------------------------- */
802 static struct export_operations aufs_export_op = {
803 .fh_to_dentry = aufs_fh_to_dentry,
804 /* .fh_to_parent = aufs_fh_to_parent, */
805 .encode_fh = aufs_encode_fh,
806 .commit_metadata = aufs_commit_metadata
809 void au_export_init(struct super_block *sb)
811 struct au_sbinfo *sbinfo;
814 sb->s_export_op = &aufs_export_op;
816 sbinfo->si_xigen = NULL;
817 get_random_bytes(&u, sizeof(u));
818 BUILD_BUG_ON(sizeof(u) != sizeof(int));
819 atomic_set(&sbinfo->si_xigen_next, u);