2 * Copyright (C) 2005-2013 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * file and vm operations
23 #include <linux/fs_stack.h>
24 #include <linux/mman.h>
25 #include <linux/security.h>
28 int au_do_open_nondir(struct file *file, int flags)
33 struct dentry *dentry;
34 struct au_finfo *finfo;
36 FiMustWriteLock(file);
38 dentry = file->f_dentry;
39 err = au_d_alive(dentry);
44 memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
45 atomic_set(&finfo->fi_mmapped, 0);
46 bindex = au_dbstart(dentry);
47 h_file = au_h_open(dentry, bindex, flags, file);
49 err = PTR_ERR(h_file);
51 au_set_fbstart(file, bindex);
52 au_set_h_fptr(file, bindex, h_file);
53 au_update_figen(file);
54 /* todo: necessary? */
55 /* file->f_ra = h_file->f_ra; */
62 static int aufs_open_nondir(struct inode *inode __maybe_unused,
66 struct super_block *sb;
68 AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
69 AuDLNPair(file->f_dentry), vfsub_file_flags(file),
72 sb = file->f_dentry->d_sb;
73 si_read_lock(sb, AuLock_FLUSH);
74 err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
79 int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
81 struct au_finfo *finfo;
85 bindex = finfo->fi_btop;
87 /* remove me from sb->s_files */
88 file_sb_list_del(file);
89 au_set_h_fptr(file, bindex, NULL);
96 /* ---------------------------------------------------------------------- */
98 static int au_do_flush_nondir(struct file *file, fl_owner_t id)
104 h_file = au_hf_top(file);
106 err = vfsub_flush(h_file, id);
110 static int aufs_flush_nondir(struct file *file, fl_owner_t id)
112 return au_do_flush(file, id, au_do_flush_nondir);
115 /* ---------------------------------------------------------------------- */
117 * read and write functions acquire [fdi]_rwsem once, but release before
118 * mmap_sem. This is because to stop a race condition between mmap(2).
119 * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
120 * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
121 * read functions after [fdi]_rwsem are released, but it should be harmless.
124 static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
128 struct dentry *dentry;
130 struct super_block *sb;
132 dentry = file->f_dentry;
134 si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
135 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
139 h_file = au_hf_top(file);
141 di_read_unlock(dentry, AuLock_IR);
142 fi_read_unlock(file);
144 /* filedata may be obsoleted by concurrent copyup, but no problem */
145 err = vfsub_read_u(h_file, buf, count, ppos);
146 /* todo: necessary? */
147 /* file->f_ra = h_file->f_ra; */
148 /* update without lock, I don't think it a problem */
149 fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
159 * it locks both of i_mutex and si_rwsem for read in safe.
160 * if the plink maintenance mode continues forever (that is the problem),
163 static void au_mtx_and_read_lock(struct inode *inode)
166 struct super_block *sb = inode->i_sb;
169 mutex_lock(&inode->i_mutex);
170 err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
173 mutex_unlock(&inode->i_mutex);
174 si_read_lock(sb, AuLock_NOPLMW);
179 static ssize_t aufs_write(struct file *file, const char __user *ubuf,
180 size_t count, loff_t *ppos)
184 struct dentry *dentry;
185 struct super_block *sb;
188 char __user *buf = (char __user *)ubuf;
190 dentry = file->f_dentry;
192 inode = dentry->d_inode;
193 au_mtx_and_read_lock(inode);
195 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
199 err = au_ready_to_write(file, -1, &pin);
200 di_downgrade_lock(dentry, AuLock_IR);
202 di_read_unlock(dentry, AuLock_IR);
203 fi_write_unlock(file);
207 h_file = au_hf_top(file);
210 di_read_unlock(dentry, AuLock_IR);
211 fi_write_unlock(file);
213 err = vfsub_write_u(h_file, buf, count, ppos);
214 ii_write_lock_child(inode);
215 au_cpup_attr_timesizes(inode);
216 inode->i_mode = h_file->f_dentry->d_inode->i_mode;
217 ii_write_unlock(inode);
222 mutex_unlock(&inode->i_mutex);
226 static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
227 const struct iovec *iov, unsigned long nv, loff_t pos)
231 ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
234 err = security_file_permission(h_file, rw);
241 func = h_file->f_op->aio_read;
242 else if (rw == MAY_WRITE)
243 func = h_file->f_op->aio_write;
246 kio->ki_filp = h_file;
248 err = func(kio, iov, nv, pos);
252 /* currently there is no such fs */
259 static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
260 unsigned long nv, loff_t pos)
263 struct file *file, *h_file;
264 struct dentry *dentry;
265 struct super_block *sb;
268 dentry = file->f_dentry;
270 si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
271 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
275 h_file = au_hf_top(file);
277 di_read_unlock(dentry, AuLock_IR);
278 fi_read_unlock(file);
280 err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
281 /* todo: necessary? */
282 /* file->f_ra = h_file->f_ra; */
283 /* update without lock, I don't think it a problem */
284 fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
292 static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
293 unsigned long nv, loff_t pos)
297 struct dentry *dentry;
299 struct file *file, *h_file;
300 struct super_block *sb;
303 dentry = file->f_dentry;
305 inode = dentry->d_inode;
306 au_mtx_and_read_lock(inode);
308 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
312 err = au_ready_to_write(file, -1, &pin);
313 di_downgrade_lock(dentry, AuLock_IR);
315 di_read_unlock(dentry, AuLock_IR);
316 fi_write_unlock(file);
320 h_file = au_hf_top(file);
323 di_read_unlock(dentry, AuLock_IR);
324 fi_write_unlock(file);
326 err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
327 ii_write_lock_child(inode);
328 au_cpup_attr_timesizes(inode);
329 inode->i_mode = h_file->f_dentry->d_inode->i_mode;
330 ii_write_unlock(inode);
335 mutex_unlock(&inode->i_mutex);
339 static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
340 struct pipe_inode_info *pipe, size_t len,
345 struct dentry *dentry;
346 struct super_block *sb;
348 dentry = file->f_dentry;
350 si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
351 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
356 h_file = au_hf_top(file);
358 if (au_test_loopback_kthread()) {
359 au_warn_loopback(h_file->f_dentry->d_sb);
360 if (file->f_mapping != h_file->f_mapping) {
361 file->f_mapping = h_file->f_mapping;
362 smp_mb(); /* unnecessary? */
365 di_read_unlock(dentry, AuLock_IR);
366 fi_read_unlock(file);
368 err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
369 /* todo: necessasry? */
370 /* file->f_ra = h_file->f_ra; */
371 /* update without lock, I don't think it a problem */
372 fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
381 aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
382 size_t len, unsigned int flags)
386 struct dentry *dentry;
389 struct super_block *sb;
391 dentry = file->f_dentry;
393 inode = dentry->d_inode;
394 au_mtx_and_read_lock(inode);
396 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
400 err = au_ready_to_write(file, -1, &pin);
401 di_downgrade_lock(dentry, AuLock_IR);
403 di_read_unlock(dentry, AuLock_IR);
404 fi_write_unlock(file);
408 h_file = au_hf_top(file);
411 di_read_unlock(dentry, AuLock_IR);
412 fi_write_unlock(file);
414 err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
415 ii_write_lock_child(inode);
416 au_cpup_attr_timesizes(inode);
417 inode->i_mode = h_file->f_dentry->d_inode->i_mode;
418 ii_write_unlock(inode);
423 mutex_unlock(&inode->i_mutex);
427 /* ---------------------------------------------------------------------- */
430 * The locking order around current->mmap_sem.
431 * - in most and regular cases
432 * file I/O syscall -- aufs_read() or something
433 * -- si_rwsem for read -- mmap_sem
434 * (Note that [fdi]i_rwsem are released before mmap_sem).
436 * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
437 * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
438 * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
439 * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
440 * It means that when aufs acquires si_rwsem for write, the process should never
443 * Actually aufs_readdir() holds [fdi]i_rwsem before mmap_sem, but this is not a
444 * problem either since any directory is not able to be mmap-ed.
445 * The similar scenario is applied to aufs_readlink() too.
448 /* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
449 #define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
451 static unsigned long au_arch_prot_conv(unsigned long flags)
453 /* currently ppc64 only */
455 /* cf. linux/arch/powerpc/include/asm/mman.h */
456 AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
457 return AuConv_VM_PROT(flags, SAO);
459 AuDebugOn(arch_calc_vm_prot_bits(-1));
464 static unsigned long au_prot_conv(unsigned long flags)
466 return AuConv_VM_PROT(flags, READ)
467 | AuConv_VM_PROT(flags, WRITE)
468 | AuConv_VM_PROT(flags, EXEC)
469 | au_arch_prot_conv(flags);
472 /* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
473 #define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
475 static unsigned long au_flag_conv(unsigned long flags)
477 return AuConv_VM_MAP(flags, GROWSDOWN)
478 | AuConv_VM_MAP(flags, DENYWRITE)
479 | AuConv_VM_MAP(flags, EXECUTABLE)
480 | AuConv_VM_MAP(flags, LOCKED);
483 static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
487 aufs_bindex_t bstart;
488 const unsigned char wlock
489 = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
490 struct dentry *dentry;
491 struct super_block *sb;
493 struct au_branch *br;
496 AuDbgVmRegion(file, vma);
498 dentry = file->f_dentry;
501 si_read_lock(sb, AuLock_NOPLMW);
502 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
507 err = au_ready_to_write(file, -1, &pin);
508 di_write_unlock(dentry);
510 fi_write_unlock(file);
515 di_write_unlock(dentry);
517 bstart = au_fbstart(file);
518 br = au_sbr(sb, bstart);
519 h_file = au_hf_top(file);
521 au_set_mmapped(file);
522 fi_write_unlock(file);
525 au_vm_file_reset(vma, h_file);
526 prot = au_prot_conv(vma->vm_flags);
527 err = security_file_mmap(h_file, /*reqprot*/prot, prot,
528 au_flag_conv(vma->vm_flags), vma->vm_start, 0);
530 err = h_file->f_op->mmap(h_file, vma);
534 au_vm_prfile_set(vma, file);
535 /* update without lock, I don't think it a problem */
536 fsstack_copy_attr_atime(file->f_dentry->d_inode,
537 h_file->f_dentry->d_inode);
538 goto out_fput; /* success */
541 au_unset_mmapped(file);
542 au_vm_file_reset(vma, file);
553 /* ---------------------------------------------------------------------- */
555 static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
560 struct dentry *dentry;
563 struct super_block *sb;
565 dentry = file->f_dentry;
566 inode = dentry->d_inode;
568 mutex_lock(&inode->i_mutex);
569 err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
573 err = 0; /* -EBADF; */ /* posix? */
574 if (unlikely(!(file->f_mode & FMODE_WRITE)))
576 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
580 err = au_ready_to_write(file, -1, &pin);
581 di_downgrade_lock(dentry, AuLock_IR);
587 h_file = au_hf_top(file);
588 err = vfsub_fsync(h_file, &h_file->f_path, datasync);
589 au_cpup_attr_timesizes(inode);
592 di_read_unlock(dentry, AuLock_IR);
593 fi_write_unlock(file);
597 mutex_unlock(&inode->i_mutex);
601 /* no one supports this operation, currently */
603 static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
607 struct dentry *dentry;
609 struct file *file, *h_file;
612 dentry = file->f_dentry;
613 inode = dentry->d_inode;
614 au_mtx_and_read_lock(inode);
616 err = 0; /* -EBADF; */ /* posix? */
617 if (unlikely(!(file->f_mode & FMODE_WRITE)))
619 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
623 err = au_ready_to_write(file, -1, &pin);
624 di_downgrade_lock(dentry, AuLock_IR);
630 h_file = au_hf_top(file);
631 if (h_file->f_op && h_file->f_op->aio_fsync) {
635 h_d = h_file->f_dentry;
636 h_mtx = &h_d->d_inode->i_mutex;
637 if (!is_sync_kiocb(kio)) {
641 kio->ki_filp = h_file;
642 err = h_file->f_op->aio_fsync(kio, datasync);
643 mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
645 vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
647 au_cpup_attr_timesizes(inode);
652 di_read_unlock(dentry, AuLock_IR);
653 fi_write_unlock(file);
655 si_read_unlock(inode->sb);
656 mutex_unlock(&inode->i_mutex);
661 static int aufs_fasync(int fd, struct file *file, int flag)
665 struct dentry *dentry;
666 struct super_block *sb;
668 dentry = file->f_dentry;
670 si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
671 err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
675 h_file = au_hf_top(file);
676 if (h_file->f_op && h_file->f_op->fasync)
677 err = h_file->f_op->fasync(fd, h_file, flag);
679 di_read_unlock(dentry, AuLock_IR);
680 fi_read_unlock(file);
687 /* ---------------------------------------------------------------------- */
689 /* no one supports this operation, currently */
691 static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
692 size_t len, loff_t *pos , int more)
697 /* ---------------------------------------------------------------------- */
699 const struct file_operations aufs_file_fop = {
700 .owner = THIS_MODULE,
702 .llseek = default_llseek,
706 .aio_read = aufs_aio_read,
707 .aio_write = aufs_aio_write,
708 #ifdef CONFIG_AUFS_POLL
711 .unlocked_ioctl = aufs_ioctl_nondir,
713 .compat_ioctl = aufs_ioctl_nondir, /* same */
716 .open = aufs_open_nondir,
717 .flush = aufs_flush_nondir,
718 .release = aufs_release_nondir,
719 .fsync = aufs_fsync_nondir,
720 /* .aio_fsync = aufs_aio_fsync_nondir, */
721 .fasync = aufs_fasync,
722 /* .sendpage = aufs_sendpage, */
723 .splice_write = aufs_splice_write,
724 .splice_read = aufs_splice_read,
726 .aio_splice_write = aufs_aio_splice_write,
727 .aio_splice_read = aufs_aio_splice_read