2 * Copyright (C) 2005-2012 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * policies for selecting one among multiple writable branches
23 #include <linux/statfs.h>
26 /* subset of cpup_attr() */
27 static noinline_for_stack
28 int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
34 h_isrc = h_src->d_inode;
35 ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
36 ia.ia_mode = h_isrc->i_mode;
37 ia.ia_uid = h_isrc->i_uid;
38 ia.ia_gid = h_isrc->i_gid;
39 sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
40 au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
41 err = vfsub_sio_notify_change(h_path, &ia);
43 /* is this nfs only? */
44 if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
45 ia.ia_valid = ATTR_FORCE | ATTR_MODE;
46 ia.ia_mode = h_isrc->i_mode;
47 err = vfsub_sio_notify_change(h_path, &ia);
53 #define AuCpdown_PARENT_OPQ 1
54 #define AuCpdown_WHED (1 << 1)
55 #define AuCpdown_MADE_DIR (1 << 2)
56 #define AuCpdown_DIROPQ (1 << 3)
57 #define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
58 #define au_fset_cpdown(flags, name) \
59 do { (flags) |= AuCpdown_##name; } while (0)
60 #define au_fclr_cpdown(flags, name) \
61 do { (flags) &= ~AuCpdown_##name; } while (0)
63 struct au_cpdown_dir_args {
64 struct dentry *parent;
68 static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
69 struct au_cpdown_dir_args *a)
72 struct dentry *opq_dentry;
74 opq_dentry = au_diropq_create(dentry, bdst);
75 err = PTR_ERR(opq_dentry);
76 if (IS_ERR(opq_dentry))
79 au_fset_cpdown(a->flags, DIROPQ);
85 static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
86 struct inode *dir, aufs_bindex_t bdst)
92 br = au_sbr(dentry->d_sb, bdst);
93 h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
94 err = PTR_ERR(h_path.dentry);
95 if (IS_ERR(h_path.dentry))
99 if (h_path.dentry->d_inode) {
100 h_path.mnt = br->br_mnt;
101 err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
110 static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
111 struct dentry *h_parent, void *arg)
114 aufs_bindex_t bopq, bstart;
116 struct dentry *parent;
117 struct inode *h_dir, *h_inode, *inode, *dir;
118 struct au_cpdown_dir_args *args = arg;
120 bstart = au_dbstart(dentry);
121 /* dentry is di-locked */
122 parent = dget_parent(dentry);
123 dir = parent->d_inode;
124 h_dir = h_parent->d_inode;
125 AuDebugOn(h_dir != au_h_iptr(dir, bdst));
128 err = au_lkup_neg(dentry, bdst);
129 if (unlikely(err < 0))
131 h_path.dentry = au_h_dptr(dentry, bdst);
132 h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
133 err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
134 S_IRWXU | S_IRUGO | S_IXUGO);
137 au_fset_cpdown(args->flags, MADE_DIR);
139 bopq = au_dbdiropq(dentry);
140 au_fclr_cpdown(args->flags, WHED);
141 au_fclr_cpdown(args->flags, DIROPQ);
142 if (au_dbwh(dentry) == bdst)
143 au_fset_cpdown(args->flags, WHED);
144 if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
145 au_fset_cpdown(args->flags, PARENT_OPQ);
146 h_inode = h_path.dentry->d_inode;
147 mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
148 if (au_ftest_cpdown(args->flags, WHED)) {
149 err = au_cpdown_dir_opq(dentry, bdst, args);
151 mutex_unlock(&h_inode->i_mutex);
156 err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
157 mutex_unlock(&h_inode->i_mutex);
161 if (au_ftest_cpdown(args->flags, WHED)) {
162 err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
167 inode = dentry->d_inode;
168 if (au_ibend(inode) < bdst)
169 au_set_ibend(inode, bdst);
170 au_set_h_iptr(inode, bdst, au_igrab(h_inode),
171 au_hi_flags(inode, /*isdir*/1));
172 goto out; /* success */
176 if (au_ftest_cpdown(args->flags, DIROPQ)) {
177 mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
178 rerr = au_diropq_remove(dentry, bdst);
179 mutex_unlock(&h_inode->i_mutex);
180 if (unlikely(rerr)) {
181 AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
182 AuDLNPair(dentry), bdst, rerr);
188 if (au_ftest_cpdown(args->flags, MADE_DIR)) {
189 rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
190 if (unlikely(rerr)) {
191 AuIOErr("failed removing %.*s b%d (%d)\n",
192 AuDLNPair(dentry), bdst, rerr);
197 au_set_h_dptr(dentry, bdst, NULL);
198 if (au_dbend(dentry) == bdst)
199 au_update_dbend(dentry);
205 int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
208 struct au_cpdown_dir_args args = {
209 .parent = dget_parent(dentry),
213 err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
219 /* ---------------------------------------------------------------------- */
221 /* policies for create */
223 static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
225 int err, i, j, ndentry;
227 struct au_dcsub_pages dpages;
228 struct au_dpage *dpage;
229 struct dentry **dentries, *parent, *d;
231 err = au_dpages_init(&dpages, GFP_NOFS);
234 parent = dget_parent(dentry);
235 err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
240 for (i = 0; i < dpages.ndpage; i++) {
241 dpage = dpages.dpages + i;
242 dentries = dpage->dentries;
243 ndentry = dpage->ndentry;
244 for (j = 0; j < ndentry; j++) {
246 di_read_lock_parent2(d, !AuLock_IR);
247 bopq = au_dbdiropq(d);
248 di_read_unlock(d, !AuLock_IR);
249 if (bopq >= 0 && bopq < err)
256 au_dpages_free(&dpages);
261 static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
263 for (; bindex >= 0; bindex--)
264 if (!au_br_rdonly(au_sbr(sb, bindex)))
269 /* top down parent */
270 static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
273 aufs_bindex_t bstart, bindex;
274 struct super_block *sb;
275 struct dentry *parent, *h_parent;
278 bstart = au_dbstart(dentry);
280 if (!au_br_rdonly(au_sbr(sb, bstart)))
284 parent = dget_parent(dentry);
285 for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
286 h_parent = au_h_dptr(parent, bindex);
287 if (!h_parent || !h_parent->d_inode)
290 if (!au_br_rdonly(au_sbr(sb, bindex))) {
298 if (unlikely(err < 0)) {
299 err = au_wbr_bu(sb, bstart - 1);
301 err = au_wbr_nonopq(dentry, err);
309 /* ---------------------------------------------------------------------- */
311 /* an exception for the policy other than tdp */
312 static int au_wbr_create_exp(struct dentry *dentry)
315 aufs_bindex_t bwh, bdiropq;
316 struct dentry *parent;
319 bwh = au_dbwh(dentry);
320 parent = dget_parent(dentry);
321 bdiropq = au_dbdiropq(parent);
324 err = min(bdiropq, bwh);
328 } else if (bdiropq >= 0) {
335 err = au_wbr_nonopq(dentry, err);
337 if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
344 /* ---------------------------------------------------------------------- */
347 static int au_wbr_create_init_rr(struct super_block *sb)
351 err = au_wbr_bu(sb, au_sbend(sb));
352 atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
359 static int au_wbr_create_rr(struct dentry *dentry, int isdir)
363 aufs_bindex_t bindex, bend;
364 struct super_block *sb;
367 err = au_wbr_create_exp(dentry);
372 next = &au_sbi(sb)->si_wbr_rr_next;
375 for (bindex = 0; bindex <= bend; bindex++) {
377 err = atomic_dec_return(next) + 1;
378 /* modulo for 0 is meaningless */
380 err = atomic_dec_return(next) + 1;
382 err = atomic_read(next);
387 if (!au_br_rdonly(au_sbr(sb, err)))
393 err = au_wbr_nonopq(dentry, err);
400 /* ---------------------------------------------------------------------- */
402 /* most free space */
403 static void au_mfs(struct dentry *dentry)
405 struct super_block *sb;
406 struct au_branch *br;
407 struct au_wbr_mfs *mfs;
408 aufs_bindex_t bindex, bend;
410 unsigned long long b, bavail;
412 /* reduce the stack usage */
415 st = kmalloc(sizeof(*st), GFP_NOFS);
417 AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
423 mfs = &au_sbi(sb)->si_wbr_mfs;
424 MtxMustLock(&mfs->mfs_lock);
425 mfs->mfs_bindex = -EROFS;
426 mfs->mfsrr_bytes = 0;
428 for (bindex = 0; bindex <= bend; bindex++) {
429 br = au_sbr(sb, bindex);
430 if (au_br_rdonly(br))
433 /* sb->s_root for NFS is unreliable */
434 h_path.mnt = br->br_mnt;
435 h_path.dentry = h_path.mnt->mnt_root;
436 err = vfs_statfs(&h_path, st);
438 AuWarn1("failed statfs, b%d, %d\n", bindex, err);
442 /* when the available size is equal, select the lower one */
443 BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
444 || sizeof(b) < sizeof(st->f_bsize));
445 b = st->f_bavail * st->f_bsize;
446 br->br_wbr->wbr_bytes = b;
449 mfs->mfs_bindex = bindex;
450 mfs->mfs_jiffy = jiffies;
454 mfs->mfsrr_bytes = bavail;
455 AuDbg("b%d\n", mfs->mfs_bindex);
459 static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
462 struct super_block *sb;
463 struct au_wbr_mfs *mfs;
465 err = au_wbr_create_exp(dentry);
470 mfs = &au_sbi(sb)->si_wbr_mfs;
471 mutex_lock(&mfs->mfs_lock);
472 if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
473 || mfs->mfs_bindex < 0
474 || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
476 mutex_unlock(&mfs->mfs_lock);
477 err = mfs->mfs_bindex;
480 err = au_wbr_nonopq(dentry, err);
487 static int au_wbr_create_init_mfs(struct super_block *sb)
489 struct au_wbr_mfs *mfs;
491 mfs = &au_sbi(sb)->si_wbr_mfs;
492 mutex_init(&mfs->mfs_lock);
494 mfs->mfs_bindex = -EROFS;
499 static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
501 mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
505 /* ---------------------------------------------------------------------- */
507 /* most free space and then round robin */
508 static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
511 struct au_wbr_mfs *mfs;
513 err = au_wbr_create_mfs(dentry, isdir);
515 mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
516 mutex_lock(&mfs->mfs_lock);
517 if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
518 err = au_wbr_create_rr(dentry, isdir);
519 mutex_unlock(&mfs->mfs_lock);
526 static int au_wbr_create_init_mfsrr(struct super_block *sb)
530 au_wbr_create_init_mfs(sb); /* ignore */
531 err = au_wbr_create_init_rr(sb);
536 /* ---------------------------------------------------------------------- */
538 /* top down parent and most free space */
539 static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
542 unsigned long long b;
543 aufs_bindex_t bindex, bstart, bend;
544 struct super_block *sb;
545 struct dentry *parent, *h_parent;
546 struct au_branch *br;
548 err = au_wbr_create_tdp(dentry, isdir);
549 if (unlikely(err < 0))
551 parent = dget_parent(dentry);
552 bstart = au_dbstart(parent);
553 bend = au_dbtaildir(parent);
555 goto out_parent; /* success */
557 e2 = au_wbr_create_mfs(dentry, isdir);
559 goto out_parent; /* success */
561 /* when the available size is equal, select upper one */
563 br = au_sbr(sb, err);
564 b = br->br_wbr->wbr_bytes;
565 AuDbg("b%d, %llu\n", err, b);
567 for (bindex = bstart; bindex <= bend; bindex++) {
568 h_parent = au_h_dptr(parent, bindex);
569 if (!h_parent || !h_parent->d_inode)
572 br = au_sbr(sb, bindex);
573 if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
574 b = br->br_wbr->wbr_bytes;
576 AuDbg("b%d, %llu\n", err, b);
581 err = au_wbr_nonopq(dentry, err);
590 /* ---------------------------------------------------------------------- */
592 /* policies for copyup */
594 /* top down parent */
595 static int au_wbr_copyup_tdp(struct dentry *dentry)
597 return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
600 /* bottom up parent */
601 static int au_wbr_copyup_bup(struct dentry *dentry)
604 aufs_bindex_t bindex, bstart;
605 struct dentry *parent, *h_parent;
606 struct super_block *sb;
610 parent = dget_parent(dentry);
611 bstart = au_dbstart(parent);
612 for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
613 h_parent = au_h_dptr(parent, bindex);
614 if (!h_parent || !h_parent->d_inode)
617 if (!au_br_rdonly(au_sbr(sb, bindex))) {
625 if (unlikely(err < 0))
626 err = au_wbr_bu(sb, bstart - 1);
633 static int au_wbr_copyup_bu(struct dentry *dentry)
636 aufs_bindex_t bstart;
638 bstart = au_dbstart(dentry);
639 err = au_wbr_bu(dentry->d_sb, bstart);
642 err = au_wbr_nonopq(dentry, err);
648 /* ---------------------------------------------------------------------- */
650 struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
651 [AuWbrCopyup_TDP] = {
652 .copyup = au_wbr_copyup_tdp
654 [AuWbrCopyup_BUP] = {
655 .copyup = au_wbr_copyup_bup
658 .copyup = au_wbr_copyup_bu
662 struct au_wbr_create_operations au_wbr_create_ops[] = {
663 [AuWbrCreate_TDP] = {
664 .create = au_wbr_create_tdp
667 .create = au_wbr_create_rr,
668 .init = au_wbr_create_init_rr
670 [AuWbrCreate_MFS] = {
671 .create = au_wbr_create_mfs,
672 .init = au_wbr_create_init_mfs,
673 .fin = au_wbr_create_fin_mfs
675 [AuWbrCreate_MFSV] = {
676 .create = au_wbr_create_mfs,
677 .init = au_wbr_create_init_mfs,
678 .fin = au_wbr_create_fin_mfs
680 [AuWbrCreate_MFSRR] = {
681 .create = au_wbr_create_mfsrr,
682 .init = au_wbr_create_init_mfsrr,
683 .fin = au_wbr_create_fin_mfs
685 [AuWbrCreate_MFSRRV] = {
686 .create = au_wbr_create_mfsrr,
687 .init = au_wbr_create_init_mfsrr,
688 .fin = au_wbr_create_fin_mfs
690 [AuWbrCreate_PMFS] = {
691 .create = au_wbr_create_pmfs,
692 .init = au_wbr_create_init_mfs,
693 .fin = au_wbr_create_fin_mfs
695 [AuWbrCreate_PMFSV] = {
696 .create = au_wbr_create_pmfs,
697 .init = au_wbr_create_init_mfs,
698 .fin = au_wbr_create_fin_mfs