2 * Copyright (C) 2005-2013 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * mount and super_block operations
24 #include <linux/module.h>
25 #include <linux/seq_file.h>
26 #include <linux/statfs.h>
27 #include <linux/vmalloc.h>
28 #include <linux/writeback.h>
34 static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
38 c = au_cache_alloc_icntnr();
41 c->vfs_inode.i_version = 1; /* sigen(sb); */
42 c->iinfo.ii_hinode = NULL;
48 static void aufs_destroy_inode_cb(struct rcu_head *head)
50 struct inode *inode = container_of(head, struct inode, i_rcu);
52 INIT_LIST_HEAD(&inode->i_dentry);
53 au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
56 static void aufs_destroy_inode(struct inode *inode)
59 call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
62 struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
67 inode = iget_locked(sb, ino);
68 if (unlikely(!inode)) {
69 inode = ERR_PTR(-ENOMEM);
72 if (!(inode->i_state & I_NEW))
75 err = au_xigen_new(inode);
77 err = au_iinfo_init(inode);
86 /* never return NULL */
92 /* lock free root dinfo */
93 static int au_show_brs(struct seq_file *seq, struct super_block *sb)
96 aufs_bindex_t bindex, bend;
98 struct au_hdentry *hdp;
104 hdp = au_di(sb->s_root)->di_hdentry;
105 for (bindex = 0; !err && bindex <= bend; bindex++) {
106 br = au_sbr(sb, bindex);
107 path.mnt = au_br_mnt(br);
108 path.dentry = hdp[bindex].hd_dentry;
109 err = au_seq_path(seq, &path);
111 perm = au_optstr_br_perm(br->br_perm);
113 err = seq_printf(seq, "=%s", perm);
120 if (!err && bindex != bend)
121 err = seq_putc(seq, ':');
127 static void au_show_wbr_create(struct seq_file *m, int v,
128 struct au_sbinfo *sbinfo)
132 AuRwMustAnyLock(&sbinfo->si_rwsem);
134 seq_puts(m, ",create=");
135 pat = au_optstr_wbr_create(v);
137 case AuWbrCreate_TDP:
139 case AuWbrCreate_MFS:
140 case AuWbrCreate_PMFS:
143 case AuWbrCreate_MFSV:
144 seq_printf(m, /*pat*/"mfs:%lu",
145 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
148 case AuWbrCreate_PMFSV:
149 seq_printf(m, /*pat*/"pmfs:%lu",
150 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
153 case AuWbrCreate_MFSRR:
154 seq_printf(m, /*pat*/"mfsrr:%llu",
155 sbinfo->si_wbr_mfs.mfsrr_watermark);
157 case AuWbrCreate_MFSRRV:
158 seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
159 sbinfo->si_wbr_mfs.mfsrr_watermark,
160 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
163 case AuWbrCreate_PMFSRR:
164 seq_printf(m, /*pat*/"pmfsrr:%llu",
165 sbinfo->si_wbr_mfs.mfsrr_watermark);
167 case AuWbrCreate_PMFSRRV:
168 seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
169 sbinfo->si_wbr_mfs.mfsrr_watermark,
170 jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
176 static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
182 const int len = sizeof(AUFS_XINO_FNAME) - 1;
183 aufs_bindex_t bindex, brid;
184 struct super_block *sb;
187 struct dentry *d, *h_root;
188 struct au_hdentry *hdp;
190 AuRwMustAnyLock(&sbinfo->si_rwsem);
194 f = au_sbi(sb)->si_xib;
198 /* stop printing the default xino path on the first writable branch */
200 brid = au_xino_brid(sb);
202 bindex = au_br_index(sb, brid);
203 hdp = au_di(sb->s_root)->di_hdentry;
204 h_root = hdp[0 + bindex].hd_dentry;
208 /* safe ->d_parent because the file is unlinked */
209 if (d->d_parent == h_root
211 && !memcmp(name->name, AUFS_XINO_FNAME, len))
214 seq_puts(seq, ",xino=");
215 err = au_xino_path(seq, f);
222 /* seq_file will re-call me in case of too long string */
223 static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
226 unsigned int mnt_flags, v;
227 struct super_block *sb;
228 struct au_sbinfo *sbinfo;
230 #define AuBool(name, str) do { \
231 v = au_opt_test(mnt_flags, name); \
232 if (v != au_opt_test(AuOpt_Def, name)) \
233 seq_printf(m, ",%s" #str, v ? "" : "no"); \
236 #define AuStr(name, str) do { \
237 v = mnt_flags & AuOptMask_##name; \
238 if (v != (AuOpt_Def & AuOptMask_##name)) \
239 seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
242 #define AuUInt(name, str, val) do { \
243 if (val != AUFS_##name##_DEF) \
244 seq_printf(m, "," #str "=%u", val); \
247 /* lock free root dinfo */
249 si_noflush_read_lock(sb);
251 seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
253 mnt_flags = au_mntflags(sb);
254 if (au_opt_test(mnt_flags, XINO)) {
255 err = au_show_xino(m, mnt);
259 seq_puts(m, ",noxino");
261 AuBool(TRUNC_XINO, trunc_xino);
264 AuBool(PLINK, plink);
266 /* AuBool(DIRPERM1, dirperm1); */
267 /* AuBool(REFROF, refrof); */
269 v = sbinfo->si_wbr_create;
270 if (v != AuWbrCreate_Def)
271 au_show_wbr_create(m, v, sbinfo);
273 v = sbinfo->si_wbr_copyup;
274 if (v != AuWbrCopyup_Def)
275 seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
277 v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
278 if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
279 seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
281 AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
283 v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
284 AuUInt(RDCACHE, rdcache, v);
286 AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
287 AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
290 /* AuBool(SUM_W, wsum); */
291 AuBool(WARN_PERM, warn_perm);
292 AuBool(VERBOSE, verbose);
295 /* be sure to print "br:" last */
308 /* ---------------------------------------------------------------------- */
310 /* sum mode which returns the summation for statfs(2) */
312 static u64 au_add_till_max(u64 a, u64 b)
323 static u64 au_mul_till_max(u64 a, long mul)
334 static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
338 u64 blocks, bfree, bavail, files, ffree;
339 aufs_bindex_t bend, bindex, i;
340 unsigned char shared;
342 struct super_block *h_sb;
352 for (bindex = 0; bindex <= bend; bindex++) {
353 h_path.mnt = au_sbr_mnt(sb, bindex);
354 h_sb = h_path.mnt->mnt_sb;
356 for (i = 0; !shared && i < bindex; i++)
357 shared = (au_sbr_sb(sb, i) == h_sb);
361 /* sb->s_root for NFS is unreliable */
362 h_path.dentry = h_path.mnt->mnt_root;
363 err = vfs_statfs(&h_path, buf);
367 if (bsize > buf->f_bsize) {
369 * we will reduce bsize, so we have to expand blocks
370 * etc. to match them again
372 factor = (bsize / buf->f_bsize);
373 blocks = au_mul_till_max(blocks, factor);
374 bfree = au_mul_till_max(bfree, factor);
375 bavail = au_mul_till_max(bavail, factor);
376 bsize = buf->f_bsize;
379 factor = (buf->f_bsize / bsize);
380 blocks = au_add_till_max(blocks,
381 au_mul_till_max(buf->f_blocks, factor));
382 bfree = au_add_till_max(bfree,
383 au_mul_till_max(buf->f_bfree, factor));
384 bavail = au_add_till_max(bavail,
385 au_mul_till_max(buf->f_bavail, factor));
386 files = au_add_till_max(files, buf->f_files);
387 ffree = au_add_till_max(ffree, buf->f_ffree);
390 buf->f_bsize = bsize;
391 buf->f_blocks = blocks;
392 buf->f_bfree = bfree;
393 buf->f_bavail = bavail;
394 buf->f_files = files;
395 buf->f_ffree = ffree;
402 static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
406 struct super_block *sb;
408 /* lock free root dinfo */
410 si_noflush_read_lock(sb);
411 if (!au_opt_test(au_mntflags(sb), SUM)) {
412 /* sb->s_root for NFS is unreliable */
413 h_path.mnt = au_sbr_mnt(sb, 0);
414 h_path.dentry = h_path.mnt->mnt_root;
415 err = vfs_statfs(&h_path, buf);
417 err = au_statfs_sum(sb, buf);
421 buf->f_type = AUFS_SUPER_MAGIC;
422 buf->f_namelen = AUFS_MAX_NAMELEN;
423 memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
425 /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
430 /* ---------------------------------------------------------------------- */
432 static int aufs_sync_fs(struct super_block *sb, int wait)
435 aufs_bindex_t bend, bindex;
436 struct au_branch *br;
437 struct super_block *h_sb;
440 si_noflush_read_lock(sb);
442 for (bindex = 0; bindex <= bend; bindex++) {
443 br = au_sbr(sb, bindex);
444 if (!au_br_writable(br->br_perm))
447 h_sb = au_sbr_sb(sb, bindex);
448 if (h_sb->s_op->sync_fs) {
449 e = h_sb->s_op->sync_fs(h_sb, wait);
450 if (unlikely(e && !err))
452 /* go on even if an error happens */
460 /* ---------------------------------------------------------------------- */
462 /* final actions when unmounting a file system */
463 static void aufs_put_super(struct super_block *sb)
465 struct au_sbinfo *sbinfo;
471 dbgaufs_si_fin(sbinfo);
472 kobject_put(&sbinfo->si_kobj);
475 /* ---------------------------------------------------------------------- */
477 void au_array_free(void *array)
480 if (!is_vmalloc_addr(array))
487 void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
490 unsigned long long n;
497 if (*hint > ULLONG_MAX / sizeof(array)) {
498 array = ERR_PTR(-EMFILE);
499 pr_err("hint %llu\n", *hint);
503 array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
504 if (unlikely(!array))
505 array = vmalloc(sizeof(array) * *hint);
506 if (unlikely(!array)) {
507 array = ERR_PTR(-ENOMEM);
511 n = cb(array, *hint, arg);
512 AuDebugOn(n > *hint);
519 static unsigned long long au_iarray_cb(void *a,
520 unsigned long long max __maybe_unused,
523 unsigned long long n;
524 struct inode **p, *inode;
525 struct list_head *head;
530 spin_lock(&inode_sb_list_lock);
531 list_for_each_entry(inode, head, i_sb_list) {
532 if (!is_bad_inode(inode)
533 && au_ii(inode)->ii_bstart >= 0) {
534 spin_lock(&inode->i_lock);
535 if (atomic_read(&inode->i_count)) {
541 spin_unlock(&inode->i_lock);
544 spin_unlock(&inode_sb_list_lock);
549 struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
551 *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
552 return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
555 void au_iarray_free(struct inode **a, unsigned long long max)
557 unsigned long long ull;
559 for (ull = 0; ull < max; ull++)
564 /* ---------------------------------------------------------------------- */
567 * refresh dentry and inode at remount time.
569 /* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
570 static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
571 struct dentry *parent)
575 di_write_lock_child(dentry);
576 di_read_lock_parent(parent, AuLock_IR);
577 err = au_refresh_dentry(dentry, parent);
578 if (!err && dir_flags)
579 au_hn_reset(dentry->d_inode, dir_flags);
580 di_read_unlock(parent, AuLock_IR);
581 di_write_unlock(dentry);
586 static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
587 struct au_sbinfo *sbinfo,
588 const unsigned int dir_flags)
591 struct dentry *parent;
595 parent = dget_parent(dentry);
596 if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
597 inode = dentry->d_inode;
599 if (!S_ISDIR(inode->i_mode))
600 err = au_do_refresh(dentry, /*dir_flags*/0,
603 err = au_do_refresh(dentry, dir_flags, parent);
605 au_fset_si(sbinfo, FAILED_REFRESH_DIR);
608 err = au_do_refresh(dentry, /*dir_flags*/0, parent);
617 static int au_refresh_d(struct super_block *sb)
619 int err, i, j, ndentry, e;
621 struct au_dcsub_pages dpages;
622 struct au_dpage *dpage;
623 struct dentry **dentries, *d;
624 struct au_sbinfo *sbinfo;
625 struct dentry *root = sb->s_root;
626 const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
628 err = au_dpages_init(&dpages, GFP_NOFS);
631 err = au_dcsub_pages(&dpages, root, NULL, NULL);
635 sigen = au_sigen(sb);
637 for (i = 0; i < dpages.ndpage; i++) {
638 dpage = dpages.dpages + i;
639 dentries = dpage->dentries;
640 ndentry = dpage->ndentry;
641 for (j = 0; j < ndentry; j++) {
643 e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
644 if (unlikely(e && !err))
651 au_dpages_free(&dpages);
656 static int au_refresh_i(struct super_block *sb)
660 unsigned long long max, ull;
661 struct inode *inode, **array;
663 array = au_iarray_alloc(sb, &max);
664 err = PTR_ERR(array);
669 sigen = au_sigen(sb);
670 for (ull = 0; ull < max; ull++) {
672 if (au_iigen(inode, NULL) != sigen) {
673 ii_write_lock_child(inode);
674 e = au_refresh_hinode_self(inode);
675 ii_write_unlock(inode);
677 pr_err("error %d, i%lu\n", e, inode->i_ino);
680 /* go on even if err */
685 au_iarray_free(array, max);
691 static void au_remount_refresh(struct super_block *sb)
695 aufs_bindex_t bindex, bend;
698 struct au_branch *br;
701 au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
704 DiMustNoWaiters(root);
705 inode = root->d_inode;
706 IiMustNoWaiters(inode);
708 udba = au_opt_udba(sb);
710 for (bindex = 0; bindex <= bend; bindex++) {
711 br = au_sbr(sb, bindex);
712 err = au_hnotify_reset_br(udba, br, br->br_perm);
714 AuIOErr("hnotify failed on br %d, %d, ignored\n",
716 /* go on even if err */
718 au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
720 di_write_unlock(root);
721 err = au_refresh_d(sb);
722 e = au_refresh_i(sb);
723 if (unlikely(e && !err))
725 /* aufs_write_lock() calls ..._child() */
726 di_write_lock_child(root);
728 au_cpup_attr_all(inode, /*force*/1);
731 AuIOErr("refresh failed, ignored, %d\n", err);
734 /* stop extra interpretation of errno in mount(8), and strange error messages */
735 static int cvt_err(int err)
749 static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
752 unsigned int mntflags;
756 struct au_sbinfo *sbinfo;
760 if (!data || !*data) {
761 err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
763 di_write_lock_child(root);
764 err = au_opts_verify(sb, *flags, /*pending*/0);
765 aufs_write_unlock(root);
771 memset(&opts, 0, sizeof(opts));
772 opts.opt = (void *)__get_free_page(GFP_NOFS);
773 if (unlikely(!opts.opt))
775 opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
776 opts.flags = AuOpts_REMOUNT;
777 opts.sb_flags = *flags;
779 /* parse it before aufs lock */
780 err = au_opts_parse(sb, data, &opts);
785 inode = root->d_inode;
786 mutex_lock(&inode->i_mutex);
787 err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
790 di_write_lock_child(root);
792 /* au_opts_remount() may return an error */
793 err = au_opts_remount(sb, &opts);
796 if (au_ftest_opts(opts.flags, REFRESH))
797 au_remount_refresh(sb);
799 if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
800 mntflags = au_mntflags(sb);
801 do_dx = !!au_opt_test(mntflags, DIO);
802 au_dy_arefresh(do_dx);
805 aufs_write_unlock(root);
808 mutex_unlock(&inode->i_mutex);
810 free_page((unsigned long)opts.opt);
817 static const struct super_operations aufs_sop = {
818 .alloc_inode = aufs_alloc_inode,
819 .destroy_inode = aufs_destroy_inode,
820 /* always deleting, no clearing */
821 .drop_inode = generic_delete_inode,
822 .show_options = aufs_show_options,
823 .statfs = aufs_statfs,
824 .put_super = aufs_put_super,
825 .sync_fs = aufs_sync_fs,
826 .remount_fs = aufs_remount_fs
829 /* ---------------------------------------------------------------------- */
831 static int alloc_root(struct super_block *sb)
838 inode = au_iget_locked(sb, AUFS_ROOT_INO);
839 err = PTR_ERR(inode);
843 inode->i_op = &aufs_dir_iop;
844 inode->i_fop = &aufs_dir_fop;
845 inode->i_mode = S_IFDIR;
847 unlock_new_inode(inode);
849 root = d_alloc_root(inode);
856 err = au_di_init(root);
859 return 0; /* success */
862 goto out; /* do not iput */
871 static int aufs_fill_super(struct super_block *sb, void *raw_data,
872 int silent __maybe_unused)
878 char *arg = raw_data;
880 if (unlikely(!arg || !*arg)) {
887 memset(&opts, 0, sizeof(opts));
888 opts.opt = (void *)__get_free_page(GFP_NOFS);
889 if (unlikely(!opts.opt))
891 opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
892 opts.sb_flags = sb->s_flags;
894 err = au_si_alloc(sb);
898 /* all timestamps always follow the ones on the branch */
899 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
900 sb->s_op = &aufs_sop;
901 sb->s_d_op = &aufs_dop;
902 sb->s_magic = AUFS_SUPER_MAGIC;
906 err = alloc_root(sb);
912 inode = root->d_inode;
915 * actually we can parse options regardless aufs lock here.
916 * but at remount time, parsing must be done before aufs lock.
917 * so we follow the same rule.
919 ii_write_lock_parent(inode);
920 aufs_write_unlock(root);
921 err = au_opts_parse(sb, arg, &opts);
925 /* lock vfs_inode first, then aufs. */
926 mutex_lock(&inode->i_mutex);
927 aufs_write_lock(root);
928 err = au_opts_mount(sb, &opts);
930 aufs_write_unlock(root);
931 mutex_unlock(&inode->i_mutex);
933 goto out_opts; /* success */
939 dbgaufs_si_fin(au_sbi(sb));
940 kobject_put(&au_sbi(sb)->si_kobj);
941 sb->s_fs_info = NULL;
943 free_page((unsigned long)opts.opt);
951 /* ---------------------------------------------------------------------- */
953 static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
954 const char *dev_name __maybe_unused,
958 struct super_block *sb;
960 /* all timestamps always follow the ones on the branch */
961 /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
962 root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
967 si_write_lock(sb, !AuLock_FLUSH);
968 sysaufs_brs_add(sb, 0);
976 static void aufs_kill_sb(struct super_block *sb)
978 struct au_sbinfo *sbinfo;
983 aufs_write_lock(sb->s_root);
984 if (sbinfo->si_wbr_create_ops->fin)
985 sbinfo->si_wbr_create_ops->fin(sb);
986 if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
987 au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
988 au_remount_refresh(sb);
990 if (au_opt_test(sbinfo->si_mntflags, PLINK))
991 au_plink_put(sb, /*verbose*/1);
993 sbinfo->si_sb = NULL;
994 aufs_write_unlock(sb->s_root);
995 au_nwt_flush(&sbinfo->si_nowait);
997 generic_shutdown_super(sb);
1000 struct file_system_type aufs_fs_type = {
1001 .name = AUFS_FSTYPE,
1003 FS_RENAME_DOES_D_MOVE /* a race between rename and others */
1004 | FS_REVAL_DOT, /* for NFS branch and udba */
1005 .mount = aufs_mount,
1006 .kill_sb = aufs_kill_sb,
1007 /* no need to __module_get() and module_put(). */
1008 .owner = THIS_MODULE,