Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / fs / aufs / cpup.c
1 /*
2  * Copyright (C) 2005-2013 Junjiro R. Okajima
3  *
4  * This program, aufs is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 /*
20  * copy-up functions, see wbr_policy.c for copy-down
21  */
22
23 #include <linux/fs_stack.h>
24 #include <linux/mm.h>
25 #include "aufs.h"
26
27 void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
28 {
29         const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
30                 | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
31
32         BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
33
34         dst->i_flags |= iflags & ~mask;
35         if (au_test_fs_notime(dst->i_sb))
36                 dst->i_flags |= S_NOATIME | S_NOCMTIME;
37 }
38
39 void au_cpup_attr_timesizes(struct inode *inode)
40 {
41         struct inode *h_inode;
42
43         h_inode = au_h_iptr(inode, au_ibstart(inode));
44         fsstack_copy_attr_times(inode, h_inode);
45         fsstack_copy_inode_size(inode, h_inode);
46 }
47
48 void au_cpup_attr_nlink(struct inode *inode, int force)
49 {
50         struct inode *h_inode;
51         struct super_block *sb;
52         aufs_bindex_t bindex, bend;
53
54         sb = inode->i_sb;
55         bindex = au_ibstart(inode);
56         h_inode = au_h_iptr(inode, bindex);
57         if (!force
58             && !S_ISDIR(h_inode->i_mode)
59             && au_opt_test(au_mntflags(sb), PLINK)
60             && au_plink_test(inode))
61                 return;
62
63         set_nlink(inode, h_inode->i_nlink);
64
65         /*
66          * fewer nlink makes find(1) noisy, but larger nlink doesn't.
67          * it may includes whplink directory.
68          */
69         if (S_ISDIR(h_inode->i_mode)) {
70                 bend = au_ibend(inode);
71                 for (bindex++; bindex <= bend; bindex++) {
72                         h_inode = au_h_iptr(inode, bindex);
73                         if (h_inode)
74                                 au_add_nlink(inode, h_inode);
75                 }
76         }
77 }
78
79 void au_cpup_attr_changeable(struct inode *inode)
80 {
81         struct inode *h_inode;
82
83         h_inode = au_h_iptr(inode, au_ibstart(inode));
84         inode->i_mode = h_inode->i_mode;
85         inode->i_uid = h_inode->i_uid;
86         inode->i_gid = h_inode->i_gid;
87         au_cpup_attr_timesizes(inode);
88         au_cpup_attr_flags(inode, h_inode->i_flags);
89 }
90
91 void au_cpup_igen(struct inode *inode, struct inode *h_inode)
92 {
93         struct au_iinfo *iinfo = au_ii(inode);
94
95         IiMustWriteLock(inode);
96
97         iinfo->ii_higen = h_inode->i_generation;
98         iinfo->ii_hsb1 = h_inode->i_sb;
99 }
100
101 void au_cpup_attr_all(struct inode *inode, int force)
102 {
103         struct inode *h_inode;
104
105         h_inode = au_h_iptr(inode, au_ibstart(inode));
106         au_cpup_attr_changeable(inode);
107         if (inode->i_nlink > 0)
108                 au_cpup_attr_nlink(inode, force);
109         inode->i_rdev = h_inode->i_rdev;
110         inode->i_blkbits = h_inode->i_blkbits;
111         au_cpup_igen(inode, h_inode);
112 }
113
114 /* ---------------------------------------------------------------------- */
115
116 /* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
117
118 /* keep the timestamps of the parent dir when cpup */
119 void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
120                     struct path *h_path)
121 {
122         struct inode *h_inode;
123
124         dt->dt_dentry = dentry;
125         dt->dt_h_path = *h_path;
126         h_inode = h_path->dentry->d_inode;
127         dt->dt_atime = h_inode->i_atime;
128         dt->dt_mtime = h_inode->i_mtime;
129         /* smp_mb(); */
130 }
131
132 void au_dtime_revert(struct au_dtime *dt)
133 {
134         struct iattr attr;
135         int err;
136
137         attr.ia_atime = dt->dt_atime;
138         attr.ia_mtime = dt->dt_mtime;
139         attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
140                 | ATTR_ATIME | ATTR_ATIME_SET;
141
142         err = vfsub_notify_change(&dt->dt_h_path, &attr);
143         if (unlikely(err))
144                 pr_warn("restoring timestamps failed(%d). ignored\n", err);
145 }
146
147 /* ---------------------------------------------------------------------- */
148
149 /* internal use only */
150 struct au_cpup_reg_attr {
151         int             valid;
152         struct kstat    st;
153         unsigned int    iflags; /* inode->i_flags */
154 };
155
156 static noinline_for_stack
157 int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
158                struct au_cpup_reg_attr *h_src_attr)
159 {
160         int err, sbits;
161         struct iattr ia;
162         struct path h_path;
163         struct inode *h_isrc, *h_idst;
164         struct kstat *h_st;
165
166         h_path.dentry = au_h_dptr(dst, bindex);
167         h_idst = h_path.dentry->d_inode;
168         h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
169         h_isrc = h_src->d_inode;
170         ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
171                 | ATTR_ATIME | ATTR_MTIME
172                 | ATTR_ATIME_SET | ATTR_MTIME_SET;
173         if (h_src_attr && h_src_attr->valid) {
174                 h_st = &h_src_attr->st;
175                 ia.ia_uid = h_st->uid;
176                 ia.ia_gid = h_st->gid;
177                 ia.ia_atime = h_st->atime;
178                 ia.ia_mtime = h_st->mtime;
179                 if (h_idst->i_mode != h_st->mode
180                     && !S_ISLNK(h_idst->i_mode)) {
181                         ia.ia_valid |= ATTR_MODE;
182                         ia.ia_mode = h_st->mode;
183                 }
184                 sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
185                 au_cpup_attr_flags(h_idst, h_src_attr->iflags);
186         } else {
187                 ia.ia_uid = h_isrc->i_uid;
188                 ia.ia_gid = h_isrc->i_gid;
189                 ia.ia_atime = h_isrc->i_atime;
190                 ia.ia_mtime = h_isrc->i_mtime;
191                 if (h_idst->i_mode != h_isrc->i_mode
192                     && !S_ISLNK(h_idst->i_mode)) {
193                         ia.ia_valid |= ATTR_MODE;
194                         ia.ia_mode = h_isrc->i_mode;
195                 }
196                 sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
197                 au_cpup_attr_flags(h_idst, h_isrc->i_flags);
198         }
199         err = vfsub_notify_change(&h_path, &ia);
200
201         /* is this nfs only? */
202         if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
203                 ia.ia_valid = ATTR_FORCE | ATTR_MODE;
204                 ia.ia_mode = h_isrc->i_mode;
205                 err = vfsub_notify_change(&h_path, &ia);
206         }
207
208         return err;
209 }
210
211 /* ---------------------------------------------------------------------- */
212
213 static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
214                            char *buf, unsigned long blksize)
215 {
216         int err;
217         size_t sz, rbytes, wbytes;
218         unsigned char all_zero;
219         char *p, *zp;
220         struct mutex *h_mtx;
221         /* reduce stack usage */
222         struct iattr *ia;
223
224         zp = page_address(ZERO_PAGE(0));
225         if (unlikely(!zp))
226                 return -ENOMEM; /* possible? */
227
228         err = 0;
229         all_zero = 0;
230         while (len) {
231                 AuDbg("len %lld\n", len);
232                 sz = blksize;
233                 if (len < blksize)
234                         sz = len;
235
236                 rbytes = 0;
237                 /* todo: signal_pending? */
238                 while (!rbytes || err == -EAGAIN || err == -EINTR) {
239                         rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
240                         err = rbytes;
241                 }
242                 if (unlikely(err < 0))
243                         break;
244
245                 all_zero = 0;
246                 if (len >= rbytes && rbytes == blksize)
247                         all_zero = !memcmp(buf, zp, rbytes);
248                 if (!all_zero) {
249                         wbytes = rbytes;
250                         p = buf;
251                         while (wbytes) {
252                                 size_t b;
253
254                                 b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
255                                 err = b;
256                                 /* todo: signal_pending? */
257                                 if (unlikely(err == -EAGAIN || err == -EINTR))
258                                         continue;
259                                 if (unlikely(err < 0))
260                                         break;
261                                 wbytes -= b;
262                                 p += b;
263                         }
264                         if (unlikely(err < 0))
265                                 break;
266                 } else {
267                         loff_t res;
268
269                         AuLabel(hole);
270                         res = vfsub_llseek(dst, rbytes, SEEK_CUR);
271                         err = res;
272                         if (unlikely(res < 0))
273                                 break;
274                 }
275                 len -= rbytes;
276                 err = 0;
277         }
278
279         /* the last block may be a hole */
280         if (!err && all_zero) {
281                 AuLabel(last hole);
282
283                 err = 1;
284                 if (au_test_nfs(dst->f_dentry->d_sb)) {
285                         /* nfs requires this step to make last hole */
286                         /* is this only nfs? */
287                         do {
288                                 /* todo: signal_pending? */
289                                 err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
290                         } while (err == -EAGAIN || err == -EINTR);
291                         if (err == 1)
292                                 dst->f_pos--;
293                 }
294
295                 if (err == 1) {
296                         ia = (void *)buf;
297                         ia->ia_size = dst->f_pos;
298                         ia->ia_valid = ATTR_SIZE | ATTR_FILE;
299                         ia->ia_file = dst;
300                         h_mtx = &dst->f_dentry->d_inode->i_mutex;
301                         mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
302                         err = vfsub_notify_change(&dst->f_path, ia);
303                         mutex_unlock(h_mtx);
304                 }
305         }
306
307         return err;
308 }
309
310 int au_copy_file(struct file *dst, struct file *src, loff_t len)
311 {
312         int err;
313         unsigned long blksize;
314         unsigned char do_kfree;
315         char *buf;
316
317         err = -ENOMEM;
318         blksize = dst->f_dentry->d_sb->s_blocksize;
319         if (!blksize || PAGE_SIZE < blksize)
320                 blksize = PAGE_SIZE;
321         AuDbg("blksize %lu\n", blksize);
322         do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
323         if (do_kfree)
324                 buf = kmalloc(blksize, GFP_NOFS);
325         else
326                 buf = (void *)__get_free_page(GFP_NOFS);
327         if (unlikely(!buf))
328                 goto out;
329
330         if (len > (1 << 22))
331                 AuDbg("copying a large file %lld\n", (long long)len);
332
333         src->f_pos = 0;
334         dst->f_pos = 0;
335         err = au_do_copy_file(dst, src, len, buf, blksize);
336         if (do_kfree)
337                 kfree(buf);
338         else
339                 free_page((unsigned long)buf);
340
341 out:
342         return err;
343 }
344
345 /*
346  * to support a sparse file which is opened with O_APPEND,
347  * we need to close the file.
348  */
349 static int au_cp_regular(struct au_cp_generic *cpg)
350 {
351         int err, i;
352         enum { SRC, DST };
353         struct {
354                 aufs_bindex_t bindex;
355                 unsigned int flags;
356                 struct dentry *dentry;
357                 struct file *file;
358                 void *label, *label_file;
359         } *f, file[] = {
360                 {
361                         .bindex = cpg->bsrc,
362                         .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
363                         .file = NULL,
364                         .label = &&out,
365                         .label_file = &&out_src
366                 },
367                 {
368                         .bindex = cpg->bdst,
369                         .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
370                         .file = NULL,
371                         .label = &&out_src,
372                         .label_file = &&out_dst
373                 }
374         };
375         struct super_block *sb;
376
377         /* bsrc branch can be ro/rw. */
378         sb = cpg->dentry->d_sb;
379         f = file;
380         for (i = 0; i < 2; i++, f++) {
381                 f->dentry = au_h_dptr(cpg->dentry, f->bindex);
382                 f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
383                                     /*file*/NULL);
384                 err = PTR_ERR(f->file);
385                 if (IS_ERR(f->file))
386                         goto *f->label;
387                 err = -EINVAL;
388                 if (unlikely(!f->file->f_op))
389                         goto *f->label_file;
390         }
391
392         /* try stopping to update while we copyup */
393         IMustLock(file[SRC].dentry->d_inode);
394         err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
395
396 out_dst:
397         fput(file[DST].file);
398         au_sbr_put(sb, file[DST].bindex);
399 out_src:
400         fput(file[SRC].file);
401         au_sbr_put(sb, file[SRC].bindex);
402 out:
403         return err;
404 }
405
406 static int au_do_cpup_regular(struct au_cp_generic *cpg,
407                               struct au_cpup_reg_attr *h_src_attr)
408 {
409         int err, rerr;
410         loff_t l;
411         struct dentry *h_src_dentry;
412         struct inode *h_src_inode;
413         struct vfsmount *h_src_mnt;
414
415         err = 0;
416         h_src_inode = au_h_iptr(cpg->dentry->d_inode, cpg->bsrc);
417         l = i_size_read(h_src_inode);
418         if (cpg->len == -1 || l < cpg->len)
419                 cpg->len = l;
420         if (cpg->len) {
421                 /* try stopping to update while we are referencing */
422                 mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
423                 au_pin_hdir_unlock(cpg->pin);
424
425                 h_src_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
426                 h_src_mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
427                 h_src_attr->iflags = h_src_inode->i_flags;
428                 err = vfs_getattr(h_src_mnt, h_src_dentry, &h_src_attr->st);
429                 if (unlikely(err)) {
430                         mutex_unlock(&h_src_inode->i_mutex);
431                         goto out;
432                 }
433                 h_src_attr->valid = 1;
434                 err = au_cp_regular(cpg);
435                 mutex_unlock(&h_src_inode->i_mutex);
436                 rerr = au_pin_hdir_relock(cpg->pin);
437                 if (!err && rerr)
438                         err = rerr;
439         }
440
441 out:
442         return err;
443 }
444
445 static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
446                               struct inode *h_dir)
447 {
448         int err, symlen;
449         mm_segment_t old_fs;
450         union {
451                 char *k;
452                 char __user *u;
453         } sym;
454
455         err = -ENOSYS;
456         if (unlikely(!h_src->d_inode->i_op->readlink))
457                 goto out;
458
459         err = -ENOMEM;
460         sym.k = __getname_gfp(GFP_NOFS);
461         if (unlikely(!sym.k))
462                 goto out;
463
464         /* unnecessary to support mmap_sem since symlink is not mmap-able */
465         old_fs = get_fs();
466         set_fs(KERNEL_DS);
467         symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
468         err = symlen;
469         set_fs(old_fs);
470
471         if (symlen > 0) {
472                 sym.k[symlen] = 0;
473                 err = vfsub_symlink(h_dir, h_path, sym.k);
474         }
475         __putname(sym.k);
476
477 out:
478         return err;
479 }
480
481 static noinline_for_stack
482 int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
483                struct au_cpup_reg_attr *h_src_attr)
484 {
485         int err;
486         umode_t mode;
487         unsigned int mnt_flags;
488         unsigned char isdir;
489         const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
490         struct au_dtime dt;
491         struct path h_path;
492         struct dentry *h_src, *h_dst, *h_parent;
493         struct inode *h_inode, *h_dir;
494         struct super_block *sb;
495
496         /* bsrc branch can be ro/rw. */
497         h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
498         h_inode = h_src->d_inode;
499         AuDebugOn(h_inode != au_h_iptr(cpg->dentry->d_inode, cpg->bsrc));
500
501         /* try stopping to be referenced while we are creating */
502         h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
503         if (au_ftest_cpup(cpg->flags, RENAME))
504                 AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
505                                   AUFS_WH_PFX_LEN));
506         h_parent = h_dst->d_parent; /* dir inode is locked */
507         h_dir = h_parent->d_inode;
508         IMustLock(h_dir);
509         AuDebugOn(h_parent != h_dst->d_parent);
510
511         sb = cpg->dentry->d_sb;
512         h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
513         if (do_dt) {
514                 h_path.dentry = h_parent;
515                 au_dtime_store(&dt, dst_parent, &h_path);
516         }
517         h_path.dentry = h_dst;
518
519         isdir = 0;
520         mode = h_inode->i_mode;
521         switch (mode & S_IFMT) {
522         case S_IFREG:
523                 err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
524                 if (!err)
525                         err = au_do_cpup_regular(cpg, h_src_attr);
526                 break;
527         case S_IFDIR:
528                 isdir = 1;
529                 err = vfsub_mkdir(h_dir, &h_path, mode);
530                 if (!err) {
531                         /*
532                          * strange behaviour from the users view,
533                          * particularry setattr case
534                          */
535                         if (au_ibstart(dst_parent->d_inode) == cpg->bdst)
536                                 au_cpup_attr_nlink(dst_parent->d_inode,
537                                                    /*force*/1);
538                         au_cpup_attr_nlink(cpg->dentry->d_inode, /*force*/1);
539                 }
540                 break;
541         case S_IFLNK:
542                 err = au_do_cpup_symlink(&h_path, h_src, h_dir);
543                 break;
544         case S_IFCHR:
545         case S_IFBLK:
546                 AuDebugOn(!capable(CAP_MKNOD));
547                 /*FALLTHROUGH*/
548         case S_IFIFO:
549         case S_IFSOCK:
550                 err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
551                 break;
552         default:
553                 AuIOErr("Unknown inode type 0%o\n", mode);
554                 err = -EIO;
555         }
556
557         mnt_flags = au_mntflags(sb);
558         if (!au_opt_test(mnt_flags, UDBA_NONE)
559             && !isdir
560             && au_opt_test(mnt_flags, XINO)
561             && h_inode->i_nlink == 1
562             /* todo: unnecessary? */
563             /* && cpg->dentry->d_inode->i_nlink == 1 */
564             && cpg->bdst < cpg->bsrc
565             && !au_ftest_cpup(cpg->flags, KEEPLINO))
566                 au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
567                 /* ignore this error */
568
569         if (do_dt)
570                 au_dtime_revert(&dt);
571         return err;
572 }
573
574 static int au_do_ren_after_cpup(struct dentry *dentry, aufs_bindex_t bdst,
575                                 struct path *h_path)
576 {
577         int err;
578         struct dentry *h_dentry, *h_parent;
579         struct inode *h_dir;
580
581         h_dentry = dget(au_h_dptr(dentry, bdst));
582         au_set_h_dptr(dentry, bdst, NULL);
583         err = au_lkup_neg(dentry, bdst, /*wh*/0);
584         if (unlikely(err)) {
585                 au_set_h_dptr(dentry, bdst, h_dentry);
586                 goto out;
587         }
588
589         h_path->dentry = dget(au_h_dptr(dentry, bdst));
590         au_set_h_dptr(dentry, bdst, h_dentry);
591         h_parent = h_dentry->d_parent; /* dir inode is locked */
592         h_dir = h_parent->d_inode;
593         IMustLock(h_dir);
594         AuDbg("%.*s %.*s\n", AuDLNPair(h_dentry), AuDLNPair(h_path->dentry));
595         err = vfsub_rename(h_dir, h_dentry, h_dir, h_path);
596         dput(h_path->dentry);
597
598 out:
599         return err;
600 }
601
602 /*
603  * copyup the @dentry from @bsrc to @bdst.
604  * the caller must set the both of lower dentries.
605  * @len is for truncating when it is -1 copyup the entire file.
606  * in link/rename cases, @dst_parent may be different from the real one.
607  */
608 static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
609 {
610         int err, rerr;
611         aufs_bindex_t old_ibstart;
612         unsigned char isdir, plink;
613         struct dentry *h_src, *h_dst, *h_parent;
614         struct inode *dst_inode, *h_dir, *inode;
615         struct super_block *sb;
616         struct au_branch *br;
617         /* to reuduce stack size */
618         struct {
619                 struct au_dtime dt;
620                 struct path h_path;
621                 struct au_cpup_reg_attr h_src_attr;
622         } *a;
623
624         AuDebugOn(cpg->bsrc <= cpg->bdst);
625
626         err = -ENOMEM;
627         a = kmalloc(sizeof(*a), GFP_NOFS);
628         if (unlikely(!a))
629                 goto out;
630         a->h_src_attr.valid = 0;
631
632         sb = cpg->dentry->d_sb;
633         br = au_sbr(sb, cpg->bdst);
634         a->h_path.mnt = au_br_mnt(br);
635         h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
636         h_parent = h_dst->d_parent; /* dir inode is locked */
637         h_dir = h_parent->d_inode;
638         IMustLock(h_dir);
639
640         h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
641         inode = cpg->dentry->d_inode;
642
643         if (!dst_parent)
644                 dst_parent = dget_parent(cpg->dentry);
645         else
646                 dget(dst_parent);
647
648         plink = !!au_opt_test(au_mntflags(sb), PLINK);
649         dst_inode = au_h_iptr(inode, cpg->bdst);
650         if (dst_inode) {
651                 if (unlikely(!plink)) {
652                         err = -EIO;
653                         AuIOErr("hi%lu(i%lu) exists on b%d "
654                                 "but plink is disabled\n",
655                                 dst_inode->i_ino, inode->i_ino, cpg->bdst);
656                         goto out_parent;
657                 }
658
659                 if (dst_inode->i_nlink) {
660                         const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
661
662                         h_src = au_plink_lkup(inode, cpg->bdst);
663                         err = PTR_ERR(h_src);
664                         if (IS_ERR(h_src))
665                                 goto out_parent;
666                         if (unlikely(!h_src->d_inode)) {
667                                 err = -EIO;
668                                 AuIOErr("i%lu exists on a upper branch "
669                                         "but not pseudo-linked\n",
670                                         inode->i_ino);
671                                 dput(h_src);
672                                 goto out_parent;
673                         }
674
675                         if (do_dt) {
676                                 a->h_path.dentry = h_parent;
677                                 au_dtime_store(&a->dt, dst_parent, &a->h_path);
678                         }
679
680                         a->h_path.dentry = h_dst;
681                         err = vfsub_link(h_src, h_dir, &a->h_path);
682                         if (!err && au_ftest_cpup(cpg->flags, RENAME))
683                                 err = au_do_ren_after_cpup
684                                         (cpg->dentry, cpg->bdst, &a->h_path);
685                         if (do_dt)
686                                 au_dtime_revert(&a->dt);
687                         dput(h_src);
688                         goto out_parent;
689                 } else
690                         /* todo: cpup_wh_file? */
691                         /* udba work */
692                         au_update_ibrange(inode, /*do_put_zero*/1);
693         }
694
695         isdir = S_ISDIR(inode->i_mode);
696         old_ibstart = au_ibstart(inode);
697         err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
698         if (unlikely(err))
699                 goto out_rev;
700         dst_inode = h_dst->d_inode;
701         mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
702         /* todo: necessary? */
703         /* au_pin_hdir_unlock(cpg->pin); */
704
705         err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
706         if (unlikely(err)) {
707                 /* todo: necessary? */
708                 /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
709                 mutex_unlock(&dst_inode->i_mutex);
710                 goto out_rev;
711         }
712
713         if (cpg->bdst < old_ibstart) {
714                 if (S_ISREG(inode->i_mode)) {
715                         err = au_dy_iaop(inode, cpg->bdst, dst_inode);
716                         if (unlikely(err)) {
717                                 /* ignore an error */
718                                 /* au_pin_hdir_relock(cpg->pin); */
719                                 mutex_unlock(&dst_inode->i_mutex);
720                                 goto out_rev;
721                         }
722                 }
723                 au_set_ibstart(inode, cpg->bdst);
724         }
725         au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
726                       au_hi_flags(inode, isdir));
727
728         /* todo: necessary? */
729         /* err = au_pin_hdir_relock(cpg->pin); */
730         mutex_unlock(&dst_inode->i_mutex);
731         if (unlikely(err))
732                 goto out_rev;
733
734         if (!isdir
735             && h_src->d_inode->i_nlink > 1
736             && plink)
737                 au_plink_append(inode, cpg->bdst, h_dst);
738
739         if (au_ftest_cpup(cpg->flags, RENAME)) {
740                 a->h_path.dentry = h_dst;
741                 err = au_do_ren_after_cpup(cpg->dentry, cpg->bdst, &a->h_path);
742         }
743         if (!err)
744                 goto out_parent; /* success */
745
746         /* revert */
747 out_rev:
748         a->h_path.dentry = h_parent;
749         au_dtime_store(&a->dt, dst_parent, &a->h_path);
750         a->h_path.dentry = h_dst;
751         rerr = 0;
752         if (h_dst->d_inode) {
753                 if (!isdir)
754                         rerr = vfsub_unlink(h_dir, &a->h_path, /*force*/0);
755                 else
756                         rerr = vfsub_rmdir(h_dir, &a->h_path);
757         }
758         au_dtime_revert(&a->dt);
759         if (rerr) {
760                 AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
761                 err = -EIO;
762         }
763 out_parent:
764         dput(dst_parent);
765         kfree(a);
766 out:
767         return err;
768 }
769
770 #if 0 /* unused */
771 struct au_cpup_single_args {
772         int *errp;
773         struct au_cp_generic *cpg;
774         struct dentry *dst_parent;
775 };
776
777 static void au_call_cpup_single(void *args)
778 {
779         struct au_cpup_single_args *a = args;
780
781         au_pin_hdir_acquire_nest(a->cpg->pin);
782         *a->errp = au_cpup_single(a->cpg, a->dst_parent);
783         au_pin_hdir_release(a->cpg->pin);
784 }
785 #endif
786
787 /*
788  * prevent SIGXFSZ in copy-up.
789  * testing CAP_MKNOD is for generic fs,
790  * but CAP_FSETID is for xfs only, currently.
791  */
792 static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
793 {
794         int do_sio;
795         struct super_block *sb;
796         struct inode *h_dir;
797
798         do_sio = 0;
799         sb = au_pinned_parent(pin)->d_sb;
800         if (!au_wkq_test()
801             && (!au_sbi(sb)->si_plink_maint_pid
802                 || au_plink_maint(sb, AuLock_NOPLM))) {
803                 switch (mode & S_IFMT) {
804                 case S_IFREG:
805                         /* no condition about RLIMIT_FSIZE and the file size */
806                         do_sio = 1;
807                         break;
808                 case S_IFCHR:
809                 case S_IFBLK:
810                         do_sio = !capable(CAP_MKNOD);
811                         break;
812                 }
813                 if (!do_sio)
814                         do_sio = ((mode & (S_ISUID | S_ISGID))
815                                   && !capable(CAP_FSETID));
816                 /* this workaround may be removed in the future */
817                 if (!do_sio) {
818                         h_dir = au_pinned_h_dir(pin);
819                         do_sio = h_dir->i_mode & S_ISVTX;
820                 }
821         }
822
823         return do_sio;
824 }
825
826 #if 0 /* unused */
827 int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
828 {
829         int err, wkq_err;
830         struct dentry *h_dentry;
831
832         h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
833         if (!au_cpup_sio_test(pin, h_dentry->d_inode->i_mode))
834                 err = au_cpup_single(cpg, dst_parent);
835         else {
836                 struct au_cpup_single_args args = {
837                         .errp           = &err,
838                         .cpg            = cpg,
839                         .dst_parent     = dst_parent
840                 };
841                 wkq_err = au_wkq_wait(au_call_cpup_single, &args);
842                 if (unlikely(wkq_err))
843                         err = wkq_err;
844         }
845
846         return err;
847 }
848 #endif
849
850 /*
851  * copyup the @dentry from the first active lower branch to @bdst,
852  * using au_cpup_single().
853  */
854 static int au_cpup_simple(struct au_cp_generic *cpg)
855 {
856         int err;
857         unsigned int flags_orig;
858         aufs_bindex_t bsrc, bend;
859         struct dentry *dentry, *h_dentry;
860
861         dentry = cpg->dentry;
862         DiMustWriteLock(dentry);
863
864         bend = au_dbend(dentry);
865         if (cpg->bsrc < 0) {
866                 for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
867                         h_dentry = au_h_dptr(dentry, bsrc);
868                         if (h_dentry) {
869                                 AuDebugOn(!h_dentry->d_inode);
870                                 break;
871                         }
872                 }
873                 AuDebugOn(bsrc > bend);
874                 cpg->bsrc = bsrc;
875         }
876
877         err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
878         if (!err) {
879                 flags_orig = cpg->flags;
880                 au_fset_cpup(cpg->flags, RENAME);
881                 err = au_cpup_single(cpg, NULL);
882                 cpg->flags = flags_orig;
883                 if (!err)
884                         return 0; /* success */
885
886                 /* revert */
887                 au_set_h_dptr(dentry, cpg->bdst, NULL);
888                 au_set_dbstart(dentry, cpg->bsrc);
889         }
890
891         return err;
892 }
893
894 struct au_cpup_simple_args {
895         int *errp;
896         struct au_cp_generic *cpg;
897 };
898
899 static void au_call_cpup_simple(void *args)
900 {
901         struct au_cpup_simple_args *a = args;
902
903         au_pin_hdir_acquire_nest(a->cpg->pin);
904         *a->errp = au_cpup_simple(a->cpg);
905         au_pin_hdir_release(a->cpg->pin);
906 }
907
908 int au_sio_cpup_simple(struct au_cp_generic *cpg)
909 {
910         int err, wkq_err;
911         struct dentry *dentry, *parent;
912         struct file *h_file;
913         struct inode *h_dir;
914
915         dentry = cpg->dentry;
916         h_file = NULL;
917         if (au_ftest_cpup(cpg->flags, HOPEN)) {
918                 AuDebugOn(cpg->bsrc < 0);
919                 h_file = au_h_open_pre(dentry, cpg->bsrc);
920                 err = PTR_ERR(h_file);
921                 if (IS_ERR(h_file))
922                         goto out;
923         }
924
925         parent = dget_parent(dentry);
926         h_dir = au_h_iptr(parent->d_inode, cpg->bdst);
927         if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
928             && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode))
929                 err = au_cpup_simple(cpg);
930         else {
931                 struct au_cpup_simple_args args = {
932                         .errp           = &err,
933                         .cpg            = cpg
934                 };
935                 wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
936                 if (unlikely(wkq_err))
937                         err = wkq_err;
938         }
939
940         dput(parent);
941         if (h_file)
942                 au_h_open_post(dentry, cpg->bsrc, h_file);
943
944 out:
945         return err;
946 }
947
948 /* ---------------------------------------------------------------------- */
949
950 /*
951  * copyup the deleted file for writing.
952  */
953 static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
954                          struct file *file)
955 {
956         int err;
957         unsigned int flags_orig;
958         aufs_bindex_t bsrc_orig;
959         struct dentry *h_d_dst, *h_d_start;
960         struct au_dinfo *dinfo;
961         struct au_hdentry *hdp;
962
963         dinfo = au_di(cpg->dentry);
964         AuRwMustWriteLock(&dinfo->di_rwsem);
965
966         bsrc_orig = cpg->bsrc;
967         cpg->bsrc = dinfo->di_bstart;
968         hdp = dinfo->di_hdentry;
969         h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
970         dinfo->di_bstart = cpg->bdst;
971         hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
972         h_d_start = NULL;
973         if (file) {
974                 h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
975                 hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_dentry;
976         }
977         flags_orig = cpg->flags;
978         cpg->flags = !AuCpup_DTIME;
979         err = au_cpup_single(cpg, /*h_parent*/NULL);
980         cpg->flags = flags_orig;
981         if (file) {
982                 if (!err)
983                         err = au_reopen_nondir(file);
984                 hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
985         }
986         hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
987         dinfo->di_bstart = cpg->bsrc;
988         cpg->bsrc = bsrc_orig;
989
990         return err;
991 }
992
993 static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
994 {
995         int err;
996         aufs_bindex_t bdst;
997         struct au_dtime dt;
998         struct dentry *dentry, *parent, *h_parent, *wh_dentry;
999         struct au_branch *br;
1000         struct path h_path;
1001
1002         dentry = cpg->dentry;
1003         bdst = cpg->bdst;
1004         br = au_sbr(dentry->d_sb, bdst);
1005         parent = dget_parent(dentry);
1006         h_parent = au_h_dptr(parent, bdst);
1007         wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
1008         err = PTR_ERR(wh_dentry);
1009         if (IS_ERR(wh_dentry))
1010                 goto out;
1011
1012         h_path.dentry = h_parent;
1013         h_path.mnt = au_br_mnt(br);
1014         au_dtime_store(&dt, parent, &h_path);
1015         err = au_do_cpup_wh(cpg, wh_dentry, file);
1016         if (unlikely(err))
1017                 goto out_wh;
1018
1019         dget(wh_dentry);
1020         h_path.dentry = wh_dentry;
1021         if (!S_ISDIR(wh_dentry->d_inode->i_mode))
1022                 err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
1023         else
1024                 err = vfsub_rmdir(h_parent->d_inode, &h_path);
1025         if (unlikely(err)) {
1026                 AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
1027                         AuDLNPair(wh_dentry), err);
1028                 err = -EIO;
1029         }
1030         au_dtime_revert(&dt);
1031         au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
1032
1033 out_wh:
1034         dput(wh_dentry);
1035 out:
1036         dput(parent);
1037         return err;
1038 }
1039
1040 struct au_cpup_wh_args {
1041         int *errp;
1042         struct au_cp_generic *cpg;
1043         struct file *file;
1044 };
1045
1046 static void au_call_cpup_wh(void *args)
1047 {
1048         struct au_cpup_wh_args *a = args;
1049
1050         au_pin_hdir_acquire_nest(a->cpg->pin);
1051         *a->errp = au_cpup_wh(a->cpg, a->file);
1052         au_pin_hdir_release(a->cpg->pin);
1053 }
1054
1055 int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1056 {
1057         int err, wkq_err;
1058         aufs_bindex_t bdst;
1059         struct dentry *dentry, *parent, *h_orph, *h_parent, *h_dentry;
1060         struct inode *dir, *h_dir, *h_tmpdir;
1061         struct au_wbr *wbr;
1062         struct au_pin wh_pin, *pin_orig;
1063
1064         dentry = cpg->dentry;
1065         bdst = cpg->bdst;
1066         parent = dget_parent(dentry);
1067         dir = parent->d_inode;
1068         h_orph = NULL;
1069         h_parent = NULL;
1070         h_dir = au_igrab(au_h_iptr(dir, bdst));
1071         h_tmpdir = h_dir;
1072         pin_orig = NULL;
1073         if (!h_dir->i_nlink) {
1074                 wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
1075                 h_orph = wbr->wbr_orph;
1076
1077                 h_parent = dget(au_h_dptr(parent, bdst));
1078                 au_set_h_dptr(parent, bdst, dget(h_orph));
1079                 h_tmpdir = h_orph->d_inode;
1080                 au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
1081
1082                 if (file)
1083                         h_dentry = au_hf_top(file)->f_dentry;
1084                 else
1085                         h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
1086                 mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1087                 /* todo: au_h_open_pre()? */
1088
1089                 pin_orig = cpg->pin;
1090                 au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
1091                             AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
1092                 cpg->pin = &wh_pin;
1093         }
1094
1095         if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
1096             && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode))
1097                 err = au_cpup_wh(cpg, file);
1098         else {
1099                 struct au_cpup_wh_args args = {
1100                         .errp   = &err,
1101                         .cpg    = cpg,
1102                         .file   = file
1103                 };
1104                 wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
1105                 if (unlikely(wkq_err))
1106                         err = wkq_err;
1107         }
1108
1109         if (h_orph) {
1110                 mutex_unlock(&h_tmpdir->i_mutex);
1111                 /* todo: au_h_open_post()? */
1112                 au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1113                 au_set_h_dptr(parent, bdst, h_parent);
1114                 AuDebugOn(!pin_orig);
1115                 cpg->pin = pin_orig;
1116         }
1117         iput(h_dir);
1118         dput(parent);
1119
1120         return err;
1121 }
1122
1123 /* ---------------------------------------------------------------------- */
1124
1125 /*
1126  * generic routine for both of copy-up and copy-down.
1127  */
1128 /* cf. revalidate function in file.c */
1129 int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
1130                int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
1131                          struct au_pin *pin,
1132                          struct dentry *h_parent, void *arg),
1133                void *arg)
1134 {
1135         int err;
1136         struct au_pin pin;
1137         struct dentry *d, *parent, *h_parent, *real_parent;
1138
1139         err = 0;
1140         parent = dget_parent(dentry);
1141         if (IS_ROOT(parent))
1142                 goto out;
1143
1144         au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
1145                     au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
1146
1147         /* do not use au_dpage */
1148         real_parent = parent;
1149         while (1) {
1150                 dput(parent);
1151                 parent = dget_parent(dentry);
1152                 h_parent = au_h_dptr(parent, bdst);
1153                 if (h_parent)
1154                         goto out; /* success */
1155
1156                 /* find top dir which is necessary to cpup */
1157                 do {
1158                         d = parent;
1159                         dput(parent);
1160                         parent = dget_parent(d);
1161                         di_read_lock_parent3(parent, !AuLock_IR);
1162                         h_parent = au_h_dptr(parent, bdst);
1163                         di_read_unlock(parent, !AuLock_IR);
1164                 } while (!h_parent);
1165
1166                 if (d != real_parent)
1167                         di_write_lock_child3(d);
1168
1169                 /* somebody else might create while we were sleeping */
1170                 if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
1171                         if (au_h_dptr(d, bdst))
1172                                 au_update_dbstart(d);
1173
1174                         au_pin_set_dentry(&pin, d);
1175                         err = au_do_pin(&pin);
1176                         if (!err) {
1177                                 err = cp(d, bdst, &pin, h_parent, arg);
1178                                 au_unpin(&pin);
1179                         }
1180                 }
1181
1182                 if (d != real_parent)
1183                         di_write_unlock(d);
1184                 if (unlikely(err))
1185                         break;
1186         }
1187
1188 out:
1189         dput(parent);
1190         return err;
1191 }
1192
1193 static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
1194                        struct au_pin *pin,
1195                        struct dentry *h_parent __maybe_unused ,
1196                        void *arg __maybe_unused)
1197 {
1198         struct au_cp_generic cpg = {
1199                 .dentry = dentry,
1200                 .bdst   = bdst,
1201                 .bsrc   = -1,
1202                 .len    = 0,
1203                 .pin    = pin,
1204                 .flags  = AuCpup_DTIME
1205         };
1206         return au_sio_cpup_simple(&cpg);
1207 }
1208
1209 int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
1210 {
1211         return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
1212 }
1213
1214 int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
1215 {
1216         int err;
1217         struct dentry *parent;
1218         struct inode *dir;
1219
1220         parent = dget_parent(dentry);
1221         dir = parent->d_inode;
1222         err = 0;
1223         if (au_h_iptr(dir, bdst))
1224                 goto out;
1225
1226         di_read_unlock(parent, AuLock_IR);
1227         di_write_lock_parent(parent);
1228         /* someone else might change our inode while we were sleeping */
1229         if (!au_h_iptr(dir, bdst))
1230                 err = au_cpup_dirs(dentry, bdst);
1231         di_downgrade_lock(parent, AuLock_IR);
1232
1233 out:
1234         dput(parent);
1235         return err;
1236 }