2 * Copyright (C) 2005-2012 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * mount and super_block operations
24 #include <linux/module.h>
25 #include <linux/seq_file.h>
26 #include <linux/statfs.h>
27 #include <linux/vmalloc.h>
28 #include <linux/writeback.h>
34 static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
38 c = au_cache_alloc_icntnr();
41 c->vfs_inode.i_version = 1; /* sigen(sb); */
42 c->iinfo.ii_hinode = NULL;
48 static void aufs_destroy_inode_cb(struct rcu_head *head)
50 struct inode *inode = container_of(head, struct inode, i_rcu);
52 INIT_LIST_HEAD(&inode->i_dentry);
53 au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
56 static void aufs_destroy_inode(struct inode *inode)
59 call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
62 struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
67 inode = iget_locked(sb, ino);
68 if (unlikely(!inode)) {
69 inode = ERR_PTR(-ENOMEM);
72 if (!(inode->i_state & I_NEW))
75 err = au_xigen_new(inode);
77 err = au_iinfo_init(inode);
86 /* never return NULL */
92 /* lock free root dinfo */
93 static int au_show_brs(struct seq_file *seq, struct super_block *sb)
96 aufs_bindex_t bindex, bend;
98 struct au_hdentry *hdp;
104 hdp = au_di(sb->s_root)->di_hdentry;
105 for (bindex = 0; !err && bindex <= bend; bindex++) {
106 br = au_sbr(sb, bindex);
107 path.mnt = br->br_mnt;
108 path.dentry = hdp[bindex].hd_dentry;
109 err = au_seq_path(seq, &path);
111 perm = au_optstr_br_perm(br->br_perm);
113 err = seq_printf(seq, "=%s", perm);
120 if (!err && bindex != bend)
121 err = seq_putc(seq, ':');
127 static void au_show_wbr_create(struct seq_file *m, int v,
128 struct au_sbinfo *sbinfo)
132 AuRwMustAnyLock(&sbinfo->si_rwsem);
134 seq_printf(m, ",create=");
135 pat = au_optstr_wbr_create(v);
137 case AuWbrCreate_TDP:
139 case AuWbrCreate_MFS:
140 case AuWbrCreate_PMFS:
143 case AuWbrCreate_MFSV:
144 seq_printf(m, /*pat*/"mfs:%lu",
145 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
148 case AuWbrCreate_PMFSV:
149 seq_printf(m, /*pat*/"pmfs:%lu",
150 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
153 case AuWbrCreate_MFSRR:
154 seq_printf(m, /*pat*/"mfsrr:%llu",
155 sbinfo->si_wbr_mfs.mfsrr_watermark);
157 case AuWbrCreate_MFSRRV:
158 seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
159 sbinfo->si_wbr_mfs.mfsrr_watermark,
160 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
166 static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
172 const int len = sizeof(AUFS_XINO_FNAME) - 1;
173 aufs_bindex_t bindex, brid;
174 struct super_block *sb;
177 struct dentry *d, *h_root;
178 struct au_hdentry *hdp;
180 AuRwMustAnyLock(&sbinfo->si_rwsem);
184 f = au_sbi(sb)->si_xib;
188 /* stop printing the default xino path on the first writable branch */
190 brid = au_xino_brid(sb);
192 bindex = au_br_index(sb, brid);
193 hdp = au_di(sb->s_root)->di_hdentry;
194 h_root = hdp[0 + bindex].hd_dentry;
198 /* safe ->d_parent because the file is unlinked */
199 if (d->d_parent == h_root
201 && !memcmp(name->name, AUFS_XINO_FNAME, len))
204 seq_puts(seq, ",xino=");
205 err = au_xino_path(seq, f);
212 /* seq_file will re-call me in case of too long string */
213 static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
216 unsigned int mnt_flags, v;
217 struct super_block *sb;
218 struct au_sbinfo *sbinfo;
220 #define AuBool(name, str) do { \
221 v = au_opt_test(mnt_flags, name); \
222 if (v != au_opt_test(AuOpt_Def, name)) \
223 seq_printf(m, ",%s" #str, v ? "" : "no"); \
226 #define AuStr(name, str) do { \
227 v = mnt_flags & AuOptMask_##name; \
228 if (v != (AuOpt_Def & AuOptMask_##name)) \
229 seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
232 #define AuUInt(name, str, val) do { \
233 if (val != AUFS_##name##_DEF) \
234 seq_printf(m, "," #str "=%u", val); \
237 /* lock free root dinfo */
239 si_noflush_read_lock(sb);
241 seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
243 mnt_flags = au_mntflags(sb);
244 if (au_opt_test(mnt_flags, XINO)) {
245 err = au_show_xino(m, mnt);
249 seq_puts(m, ",noxino");
251 AuBool(TRUNC_XINO, trunc_xino);
254 AuBool(PLINK, plink);
256 /* AuBool(DIRPERM1, dirperm1); */
257 /* AuBool(REFROF, refrof); */
259 v = sbinfo->si_wbr_create;
260 if (v != AuWbrCreate_Def)
261 au_show_wbr_create(m, v, sbinfo);
263 v = sbinfo->si_wbr_copyup;
264 if (v != AuWbrCopyup_Def)
265 seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
267 v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
268 if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
269 seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
271 AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
273 v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
274 AuUInt(RDCACHE, rdcache, v);
276 AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
277 AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
280 /* AuBool(SUM_W, wsum); */
281 AuBool(WARN_PERM, warn_perm);
282 AuBool(VERBOSE, verbose);
285 /* be sure to print "br:" last */
298 /* ---------------------------------------------------------------------- */
300 /* sum mode which returns the summation for statfs(2) */
302 static u64 au_add_till_max(u64 a, u64 b)
313 static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
316 u64 blocks, bfree, bavail, files, ffree;
317 aufs_bindex_t bend, bindex, i;
318 unsigned char shared;
320 struct super_block *h_sb;
330 for (bindex = bend; bindex >= 0; bindex--) {
331 h_path.mnt = au_sbr_mnt(sb, bindex);
332 h_sb = h_path.mnt->mnt_sb;
334 for (i = bindex + 1; !shared && i <= bend; i++)
335 shared = (au_sbr_sb(sb, i) == h_sb);
339 /* sb->s_root for NFS is unreliable */
340 h_path.dentry = h_path.mnt->mnt_root;
341 err = vfs_statfs(&h_path, buf);
345 blocks = au_add_till_max(blocks, buf->f_blocks);
346 bfree = au_add_till_max(bfree, buf->f_bfree);
347 bavail = au_add_till_max(bavail, buf->f_bavail);
348 files = au_add_till_max(files, buf->f_files);
349 ffree = au_add_till_max(ffree, buf->f_ffree);
352 buf->f_blocks = blocks;
353 buf->f_bfree = bfree;
354 buf->f_bavail = bavail;
355 buf->f_files = files;
356 buf->f_ffree = ffree;
362 static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
366 struct super_block *sb;
368 /* lock free root dinfo */
370 si_noflush_read_lock(sb);
371 if (!au_opt_test(au_mntflags(sb), SUM)) {
372 /* sb->s_root for NFS is unreliable */
373 h_path.mnt = au_sbr_mnt(sb, 0);
374 h_path.dentry = h_path.mnt->mnt_root;
375 err = vfs_statfs(&h_path, buf);
377 err = au_statfs_sum(sb, buf);
381 buf->f_type = AUFS_SUPER_MAGIC;
382 buf->f_namelen = AUFS_MAX_NAMELEN;
383 memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
385 /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
390 /* ---------------------------------------------------------------------- */
392 /* final actions when unmounting a file system */
393 static void aufs_put_super(struct super_block *sb)
395 struct au_sbinfo *sbinfo;
401 dbgaufs_si_fin(sbinfo);
402 kobject_put(&sbinfo->si_kobj);
405 /* ---------------------------------------------------------------------- */
407 void au_array_free(void *array)
410 if (!is_vmalloc_addr(array))
417 void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
420 unsigned long long n;
427 if (*hint > ULLONG_MAX / sizeof(array)) {
428 array = ERR_PTR(-EMFILE);
429 pr_err("hint %llu\n", *hint);
433 array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
434 if (unlikely(!array))
435 array = vmalloc(sizeof(array) * *hint);
436 if (unlikely(!array)) {
437 array = ERR_PTR(-ENOMEM);
441 n = cb(array, *hint, arg);
442 AuDebugOn(n > *hint);
449 static unsigned long long au_iarray_cb(void *a,
450 unsigned long long max __maybe_unused,
453 unsigned long long n;
454 struct inode **p, *inode;
455 struct list_head *head;
460 spin_lock(&inode_sb_list_lock);
461 list_for_each_entry(inode, head, i_sb_list) {
462 if (!is_bad_inode(inode)
463 && au_ii(inode)->ii_bstart >= 0) {
464 spin_lock(&inode->i_lock);
465 if (atomic_read(&inode->i_count)) {
471 spin_unlock(&inode->i_lock);
474 spin_unlock(&inode_sb_list_lock);
479 struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
481 *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
482 return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
485 void au_iarray_free(struct inode **a, unsigned long long max)
487 unsigned long long ull;
489 for (ull = 0; ull < max; ull++)
494 /* ---------------------------------------------------------------------- */
497 * refresh dentry and inode at remount time.
499 /* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
500 static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
501 struct dentry *parent)
505 di_write_lock_child(dentry);
506 di_read_lock_parent(parent, AuLock_IR);
507 err = au_refresh_dentry(dentry, parent);
508 if (!err && dir_flags)
509 au_hn_reset(dentry->d_inode, dir_flags);
510 di_read_unlock(parent, AuLock_IR);
511 di_write_unlock(dentry);
516 static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
517 struct au_sbinfo *sbinfo,
518 const unsigned int dir_flags)
521 struct dentry *parent;
525 parent = dget_parent(dentry);
526 if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
527 inode = dentry->d_inode;
529 if (!S_ISDIR(inode->i_mode))
530 err = au_do_refresh(dentry, /*dir_flags*/0,
533 err = au_do_refresh(dentry, dir_flags, parent);
535 au_fset_si(sbinfo, FAILED_REFRESH_DIR);
538 err = au_do_refresh(dentry, /*dir_flags*/0, parent);
547 static int au_refresh_d(struct super_block *sb)
549 int err, i, j, ndentry, e;
551 struct au_dcsub_pages dpages;
552 struct au_dpage *dpage;
553 struct dentry **dentries, *d;
554 struct au_sbinfo *sbinfo;
555 struct dentry *root = sb->s_root;
556 const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
558 err = au_dpages_init(&dpages, GFP_NOFS);
561 err = au_dcsub_pages(&dpages, root, NULL, NULL);
565 sigen = au_sigen(sb);
567 for (i = 0; i < dpages.ndpage; i++) {
568 dpage = dpages.dpages + i;
569 dentries = dpage->dentries;
570 ndentry = dpage->ndentry;
571 for (j = 0; j < ndentry; j++) {
573 e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
574 if (unlikely(e && !err))
581 au_dpages_free(&dpages);
586 static int au_refresh_i(struct super_block *sb)
590 unsigned long long max, ull;
591 struct inode *inode, **array;
593 array = au_iarray_alloc(sb, &max);
594 err = PTR_ERR(array);
599 sigen = au_sigen(sb);
600 for (ull = 0; ull < max; ull++) {
602 if (au_iigen(inode) != sigen) {
603 ii_write_lock_child(inode);
604 e = au_refresh_hinode_self(inode);
605 ii_write_unlock(inode);
607 pr_err("error %d, i%lu\n", e, inode->i_ino);
610 /* go on even if err */
615 au_iarray_free(array, max);
621 static void au_remount_refresh(struct super_block *sb)
625 aufs_bindex_t bindex, bend;
628 struct au_branch *br;
631 au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
634 DiMustNoWaiters(root);
635 inode = root->d_inode;
636 IiMustNoWaiters(inode);
638 udba = au_opt_udba(sb);
640 for (bindex = 0; bindex <= bend; bindex++) {
641 br = au_sbr(sb, bindex);
642 err = au_hnotify_reset_br(udba, br, br->br_perm);
644 AuIOErr("hnotify failed on br %d, %d, ignored\n",
646 /* go on even if err */
648 au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
650 di_write_unlock(root);
651 err = au_refresh_d(sb);
652 e = au_refresh_i(sb);
653 if (unlikely(e && !err))
655 /* aufs_write_lock() calls ..._child() */
656 di_write_lock_child(root);
658 au_cpup_attr_all(inode, /*force*/1);
661 AuIOErr("refresh failed, ignored, %d\n", err);
664 /* stop extra interpretation of errno in mount(8), and strange error messages */
665 static int cvt_err(int err)
679 static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
682 unsigned int mntflags;
686 struct au_sbinfo *sbinfo;
690 if (!data || !*data) {
691 err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
693 di_write_lock_child(root);
694 err = au_opts_verify(sb, *flags, /*pending*/0);
695 aufs_write_unlock(root);
701 memset(&opts, 0, sizeof(opts));
702 opts.opt = (void *)__get_free_page(GFP_NOFS);
703 if (unlikely(!opts.opt))
705 opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
706 opts.flags = AuOpts_REMOUNT;
707 opts.sb_flags = *flags;
709 /* parse it before aufs lock */
710 err = au_opts_parse(sb, data, &opts);
715 inode = root->d_inode;
716 mutex_lock(&inode->i_mutex);
717 err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
720 di_write_lock_child(root);
722 /* au_opts_remount() may return an error */
723 err = au_opts_remount(sb, &opts);
726 if (au_ftest_opts(opts.flags, REFRESH))
727 au_remount_refresh(sb);
729 if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
730 mntflags = au_mntflags(sb);
731 do_dx = !!au_opt_test(mntflags, DIO);
732 au_dy_arefresh(do_dx);
735 aufs_write_unlock(root);
738 mutex_unlock(&inode->i_mutex);
740 free_page((unsigned long)opts.opt);
747 static const struct super_operations aufs_sop = {
748 .alloc_inode = aufs_alloc_inode,
749 .destroy_inode = aufs_destroy_inode,
750 /* always deleting, no clearing */
751 .drop_inode = generic_delete_inode,
752 .show_options = aufs_show_options,
753 .statfs = aufs_statfs,
754 .put_super = aufs_put_super,
755 .remount_fs = aufs_remount_fs
758 /* ---------------------------------------------------------------------- */
760 static int alloc_root(struct super_block *sb)
767 inode = au_iget_locked(sb, AUFS_ROOT_INO);
768 err = PTR_ERR(inode);
772 inode->i_op = &aufs_dir_iop;
773 inode->i_fop = &aufs_dir_fop;
774 inode->i_mode = S_IFDIR;
776 unlock_new_inode(inode);
778 root = d_alloc_root(inode);
785 err = au_di_init(root);
788 return 0; /* success */
791 goto out; /* do not iput */
800 static int aufs_fill_super(struct super_block *sb, void *raw_data,
801 int silent __maybe_unused)
807 char *arg = raw_data;
809 if (unlikely(!arg || !*arg)) {
816 memset(&opts, 0, sizeof(opts));
817 opts.opt = (void *)__get_free_page(GFP_NOFS);
818 if (unlikely(!opts.opt))
820 opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
821 opts.sb_flags = sb->s_flags;
823 err = au_si_alloc(sb);
827 /* all timestamps always follow the ones on the branch */
828 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
829 sb->s_op = &aufs_sop;
830 sb->s_d_op = &aufs_dop;
831 sb->s_magic = AUFS_SUPER_MAGIC;
835 err = alloc_root(sb);
841 inode = root->d_inode;
844 * actually we can parse options regardless aufs lock here.
845 * but at remount time, parsing must be done before aufs lock.
846 * so we follow the same rule.
848 ii_write_lock_parent(inode);
849 aufs_write_unlock(root);
850 err = au_opts_parse(sb, arg, &opts);
854 /* lock vfs_inode first, then aufs. */
855 mutex_lock(&inode->i_mutex);
856 aufs_write_lock(root);
857 err = au_opts_mount(sb, &opts);
859 aufs_write_unlock(root);
860 mutex_unlock(&inode->i_mutex);
862 goto out_opts; /* success */
868 dbgaufs_si_fin(au_sbi(sb));
869 kobject_put(&au_sbi(sb)->si_kobj);
870 sb->s_fs_info = NULL;
872 free_page((unsigned long)opts.opt);
880 /* ---------------------------------------------------------------------- */
882 static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
883 const char *dev_name __maybe_unused,
887 struct super_block *sb;
889 /* all timestamps always follow the ones on the branch */
890 /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
891 root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
896 si_write_lock(sb, !AuLock_FLUSH);
897 sysaufs_brs_add(sb, 0);
905 static void aufs_kill_sb(struct super_block *sb)
907 struct au_sbinfo *sbinfo;
912 aufs_write_lock(sb->s_root);
913 if (sbinfo->si_wbr_create_ops->fin)
914 sbinfo->si_wbr_create_ops->fin(sb);
915 if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
916 au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
917 au_remount_refresh(sb);
919 if (au_opt_test(sbinfo->si_mntflags, PLINK))
920 au_plink_put(sb, /*verbose*/1);
922 sbinfo->si_sb = NULL;
923 aufs_write_unlock(sb->s_root);
924 au_nwt_flush(&sbinfo->si_nowait);
926 generic_shutdown_super(sb);
929 struct file_system_type aufs_fs_type = {
932 FS_RENAME_DOES_D_MOVE /* a race between rename and others */
933 | FS_REVAL_DOT, /* for NFS branch and udba */
935 .kill_sb = aufs_kill_sb,
936 /* no need to __module_get() and module_put(). */
937 .owner = THIS_MODULE,