--- /dev/null
+From 4fafa8e8eedf21cf9aeed56f2b193110696154e2 Mon Sep 17 00:00:00 2001
+From: David-John Willis <John.Willis@Distant-earth.com>
+Date: Mon, 21 Dec 2009 21:36:25 +0000
+Subject: [PATCH 7/7] AUFS2: Add latest AUFS2 in-tree code for 2.6.27.
+
+---
+ Documentation/ABI/testing/debugfs-aufs | 40 +
+ Documentation/ABI/testing/sysfs-aufs | 25 +
+ Documentation/filesystems/aufs/README | 347 +++++
+ Documentation/filesystems/aufs/design/01intro.txt | 137 ++
+ Documentation/filesystems/aufs/design/02struct.txt | 218 +++
+ Documentation/filesystems/aufs/design/03lookup.txt | 104 ++
+ Documentation/filesystems/aufs/design/04branch.txt | 76 +
+ .../filesystems/aufs/design/05wbr_policy.txt | 65 +
+ .../filesystems/aufs/design/06fmode_exec.txt | 33 +
+ Documentation/filesystems/aufs/design/07mmap.txt | 53 +
+ Documentation/filesystems/aufs/design/08export.txt | 59 +
+ Documentation/filesystems/aufs/design/09shwh.txt | 53 +
+ Documentation/filesystems/aufs/design/99plan.txt | 96 ++
+ fs/Kconfig | 2 +
+ fs/Makefile | 1 +
+ fs/aufs/Kconfig | 136 ++
+ fs/aufs/Makefile | 23 +
+ fs/aufs/aufs.h | 59 +
+ fs/aufs/branch.c | 978 ++++++++++++
+ fs/aufs/branch.h | 219 +++
+ fs/aufs/cpup.c | 1048 +++++++++++++
+ fs/aufs/cpup.h | 81 +
+ fs/aufs/dbgaufs.c | 331 +++++
+ fs/aufs/dbgaufs.h | 52 +
+ fs/aufs/dcsub.c | 223 +++
+ fs/aufs/dcsub.h | 54 +
+ fs/aufs/debug.c | 431 ++++++
+ fs/aufs/debug.h | 232 +++
+ fs/aufs/dentry.c | 875 +++++++++++
+ fs/aufs/dentry.h | 228 +++
+ fs/aufs/dinfo.c | 367 +++++
+ fs/aufs/dir.c | 579 ++++++++
+ fs/aufs/dir.h | 127 ++
+ fs/aufs/export.c | 745 ++++++++++
+ fs/aufs/f_op.c | 810 ++++++++++
+ fs/aufs/f_op_sp.c | 290 ++++
+ fs/aufs/file.c | 620 ++++++++
+ fs/aufs/file.h | 187 +++
+ fs/aufs/finfo.c | 131 ++
+ fs/aufs/fstype.h | 434 ++++++
+ fs/aufs/hinotify.c | 755 ++++++++++
+ fs/aufs/i_op.c | 875 +++++++++++
+ fs/aufs/i_op_add.c | 658 +++++++++
+ fs/aufs/i_op_del.c | 470 ++++++
+ fs/aufs/i_op_ren.c | 965 ++++++++++++
+ fs/aufs/iinfo.c | 283 ++++
+ fs/aufs/inode.c | 414 ++++++
+ fs/aufs/inode.h | 474 ++++++
+ fs/aufs/ioctl.c | 127 ++
+ fs/aufs/loop.c | 55 +
+ fs/aufs/loop.h | 43 +
+ fs/aufs/magic.mk | 66 +
+ fs/aufs/module.c | 173 +++
+ fs/aufs/module.h | 78 +
+ fs/aufs/opts.c | 1550 ++++++++++++++++++++
+ fs/aufs/opts.h | 196 +++
+ fs/aufs/plink.c | 429 ++++++
+ fs/aufs/rdu.c | 333 +++++
+ fs/aufs/rwsem.h | 186 +++
+ fs/aufs/sbinfo.c | 211 +++
+ fs/aufs/spl.h | 57 +
+ fs/aufs/super.c | 874 +++++++++++
+ fs/aufs/super.h | 361 +++++
+ fs/aufs/sysaufs.c | 104 ++
+ fs/aufs/sysaufs.h | 105 ++
+ fs/aufs/sysfs.c | 210 +++
+ fs/aufs/sysrq.c | 118 ++
+ fs/aufs/vdir.c | 884 +++++++++++
+ fs/aufs/vfsub.c | 660 +++++++++
+ fs/aufs/vfsub.h | 145 ++
+ fs/aufs/wbr_policy.c | 641 ++++++++
+ fs/aufs/whout.c | 1054 +++++++++++++
+ fs/aufs/whout.h | 87 ++
+ fs/aufs/wkq.c | 259 ++++
+ fs/aufs/wkq.h | 82 +
+ fs/aufs/xino.c | 1199 +++++++++++++++
+ fs/namei.c | 4 +-
+ fs/splice.c | 10 +-
+ include/linux/Kbuild | 1 +
+ include/linux/aufs_type.h | 195 +++
+ include/linux/namei.h | 3 +
+ include/linux/splice.h | 6 +
+ scripts/basic/hash | Bin 0 -> 6907 bytes
+ 83 files changed, 25962 insertions(+), 7 deletions(-)
+ create mode 100644 Documentation/ABI/testing/debugfs-aufs
+ create mode 100644 Documentation/ABI/testing/sysfs-aufs
+ create mode 100644 Documentation/filesystems/aufs/README
+ create mode 100644 Documentation/filesystems/aufs/design/01intro.txt
+ create mode 100644 Documentation/filesystems/aufs/design/02struct.txt
+ create mode 100644 Documentation/filesystems/aufs/design/03lookup.txt
+ create mode 100644 Documentation/filesystems/aufs/design/04branch.txt
+ create mode 100644 Documentation/filesystems/aufs/design/05wbr_policy.txt
+ create mode 100644 Documentation/filesystems/aufs/design/06fmode_exec.txt
+ create mode 100644 Documentation/filesystems/aufs/design/07mmap.txt
+ create mode 100644 Documentation/filesystems/aufs/design/08export.txt
+ create mode 100644 Documentation/filesystems/aufs/design/09shwh.txt
+ create mode 100644 Documentation/filesystems/aufs/design/99plan.txt
+ create mode 100644 fs/aufs/Kconfig
+ create mode 100644 fs/aufs/Makefile
+ create mode 100644 fs/aufs/aufs.h
+ create mode 100644 fs/aufs/branch.c
+ create mode 100644 fs/aufs/branch.h
+ create mode 100644 fs/aufs/cpup.c
+ create mode 100644 fs/aufs/cpup.h
+ create mode 100644 fs/aufs/dbgaufs.c
+ create mode 100644 fs/aufs/dbgaufs.h
+ create mode 100644 fs/aufs/dcsub.c
+ create mode 100644 fs/aufs/dcsub.h
+ create mode 100644 fs/aufs/debug.c
+ create mode 100644 fs/aufs/debug.h
+ create mode 100644 fs/aufs/dentry.c
+ create mode 100644 fs/aufs/dentry.h
+ create mode 100644 fs/aufs/dinfo.c
+ create mode 100644 fs/aufs/dir.c
+ create mode 100644 fs/aufs/dir.h
+ create mode 100644 fs/aufs/export.c
+ create mode 100644 fs/aufs/f_op.c
+ create mode 100644 fs/aufs/f_op_sp.c
+ create mode 100644 fs/aufs/file.c
+ create mode 100644 fs/aufs/file.h
+ create mode 100644 fs/aufs/finfo.c
+ create mode 100644 fs/aufs/fstype.h
+ create mode 100644 fs/aufs/hinotify.c
+ create mode 100644 fs/aufs/i_op.c
+ create mode 100644 fs/aufs/i_op_add.c
+ create mode 100644 fs/aufs/i_op_del.c
+ create mode 100644 fs/aufs/i_op_ren.c
+ create mode 100644 fs/aufs/iinfo.c
+ create mode 100644 fs/aufs/inode.c
+ create mode 100644 fs/aufs/inode.h
+ create mode 100644 fs/aufs/ioctl.c
+ create mode 100644 fs/aufs/loop.c
+ create mode 100644 fs/aufs/loop.h
+ create mode 100644 fs/aufs/magic.mk
+ create mode 100644 fs/aufs/module.c
+ create mode 100644 fs/aufs/module.h
+ create mode 100644 fs/aufs/opts.c
+ create mode 100644 fs/aufs/opts.h
+ create mode 100644 fs/aufs/plink.c
+ create mode 100644 fs/aufs/rdu.c
+ create mode 100644 fs/aufs/rwsem.h
+ create mode 100644 fs/aufs/sbinfo.c
+ create mode 100644 fs/aufs/spl.h
+ create mode 100644 fs/aufs/super.c
+ create mode 100644 fs/aufs/super.h
+ create mode 100644 fs/aufs/sysaufs.c
+ create mode 100644 fs/aufs/sysaufs.h
+ create mode 100644 fs/aufs/sysfs.c
+ create mode 100644 fs/aufs/sysrq.c
+ create mode 100644 fs/aufs/vdir.c
+ create mode 100644 fs/aufs/vfsub.c
+ create mode 100644 fs/aufs/vfsub.h
+ create mode 100644 fs/aufs/wbr_policy.c
+ create mode 100644 fs/aufs/whout.c
+ create mode 100644 fs/aufs/whout.h
+ create mode 100644 fs/aufs/wkq.c
+ create mode 100644 fs/aufs/wkq.h
+ create mode 100644 fs/aufs/xino.c
+ create mode 100644 include/linux/aufs_type.h
+ create mode 100644 scripts/basic/hash
+
+diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs
+new file mode 100644
+index 0000000..4110b94
+--- /dev/null
++++ b/Documentation/ABI/testing/debugfs-aufs
+@@ -0,0 +1,40 @@
++What: /debug/aufs/si_<id>/
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ Under /debug/aufs, a directory named si_<id> is created
++ per aufs mount, where <id> is a unique id generated
++ internally.
++
++What: /debug/aufs/si_<id>/xib
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ It shows the consumed blocks by xib (External Inode Number
++ Bitmap), its block size and file size.
++ When the aufs mount option 'noxino' is specified, it
++ will be empty. About XINO files, see
++ Documentation/filesystems/aufs/aufs.5 in detail.
++
++What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ It shows the consumed blocks by xino (External Inode Number
++ Translation Table), its link count, block size and file
++ size.
++ When the aufs mount option 'noxino' is specified, it
++ will be empty. About XINO files, see
++ Documentation/filesystems/aufs/aufs.5 in detail.
++
++What: /debug/aufs/si_<id>/xigen
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ It shows the consumed blocks by xigen (External Inode
++ Generation Table), its block size and file size.
++ If CONFIG_AUFS_EXPORT is disabled, this entry will not
++ be created.
++ When the aufs mount option 'noxino' is specified, it
++ will be empty. About XINO files, see
++ Documentation/filesystems/aufs/aufs.5 in detail.
+diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs
+new file mode 100644
+index 0000000..ca49330
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-aufs
+@@ -0,0 +1,25 @@
++What: /sys/fs/aufs/si_<id>/
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ Under /sys/fs/aufs, a directory named si_<id> is created
++ per aufs mount, where <id> is a unique id generated
++ internally.
++
++What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ It shows the abolute path of a member directory (which
++ is called branch) in aufs, and its permission.
++
++What: /sys/fs/aufs/si_<id>/xi_path
++Date: March 2009
++Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++ It shows the abolute path of XINO (External Inode Number
++ Bitmap, Translation Table and Generation Table) file
++ even if it is the default path.
++ When the aufs mount option 'noxino' is specified, it
++ will be empty. About XINO files, see
++ Documentation/filesystems/aufs/aufs.5 in detail.
+diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
+new file mode 100644
+index 0000000..2fbd3e6
+--- /dev/null
++++ b/Documentation/filesystems/aufs/README
+@@ -0,0 +1,347 @@
++
++Aufs2 -- advanced multi layered unification filesystem version 2
++http://aufs.sf.net
++Junjiro R. Okajima
++
++
++0. Introduction
++----------------------------------------
++In the early days, aufs was entirely re-designed and re-implemented
++Unionfs Version 1.x series. After many original ideas, approaches,
++improvements and implementations, it becomes totally different from
++Unionfs while keeping the basic features.
++Recently, Unionfs Version 2.x series begin taking some of the same
++approaches to aufs1's.
++Unionfs is being developed by Professor Erez Zadok at Stony Brook
++University and his team.
++
++This version of AUFS, aufs2 has several purposes.
++- to be reviewed easily and widely.
++- to make the source files simpler and smaller by dropping several
++ original features.
++
++Through this work, I found some bad things in aufs1 source code and
++fixed them. Some of the dropped features will be reverted in the future,
++but not all I'm afraid.
++Aufs2 supports linux-2.6.27 and later. If you want older kernel version
++support, try aufs1 from CVS on SourceForge.
++
++Note: it becomes clear that "Aufs was rejected. Let's give it up."
++According to Christoph Hellwig, linux rejects all union-type filesystems
++but UnionMount.
++<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
++
++
++1. Features
++----------------------------------------
++- unite several directories into a single virtual filesystem. The member
++ directory is called as a branch.
++- you can specify the permission flags to the branch, which are 'readonly',
++ 'readwrite' and 'whiteout-able.'
++- by upper writable branch, internal copyup and whiteout, files/dirs on
++ readonly branch are modifiable logically.
++- dynamic branch manipulation, add, del.
++- etc...
++
++Also there are many enhancements in aufs1, such as:
++- readdir(3) in userspace.
++- keep inode number by external inode number table
++- keep the timestamps of file/dir in internal copyup operation
++- seekable directory, supporting NFS readdir.
++- support mmap(2) including /proc/PID/exe symlink, without page-copy
++- whiteout is hardlinked in order to reduce the consumption of inodes
++ on branch
++- do not copyup, nor create a whiteout when it is unnecessary
++- revert a single systemcall when an error occurs in aufs
++- remount interface instead of ioctl
++- maintain /etc/mtab by an external command, /sbin/mount.aufs.
++- loopback mounted filesystem as a branch
++- kernel thread for removing the dir who has a plenty of whiteouts
++- support copyup sparse file (a file which has a 'hole' in it)
++- default permission flags for branches
++- selectable permission flags for ro branch, whether whiteout can
++ exist or not
++- export via NFS.
++- support <sysfs>/fs/aufs and <debugfs>/aufs.
++- support multiple writable branches, some policies to select one
++ among multiple writable branches.
++- a new semantics for link(2) and rename(2) to support multiple
++ writable branches.
++- no glibc changes are required.
++- pseudo hardlink (hardlink over branches)
++- allow a direct access manually to a file on branch, e.g. bypassing aufs.
++ including NFS or remote filesystem branch.
++- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
++- and more...
++
++Currently these features are dropped temporary from this version, aufs2.
++See design/08plan.txt in detail.
++- test only the highest one for the directory permission (dirperm1)
++- show whiteout mode (shwh)
++- copyup on open (coo=)
++- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
++ (robr)
++- statistics of aufs thread (/sys/fs/aufs/stat)
++- delegation mode (dlgt)
++ a delegation of the internal branch access to support task I/O
++ accounting, which also supports Linux Security Modules (LSM) mainly
++ for Suse AppArmor.
++- intent.open/create (file open in a single lookup)
++
++Features or just an idea in the future (see also design/*.txt),
++- reorder the branch index without del/re-add.
++- permanent xino files for NFSD
++- an option for refreshing the opened files after add/del branches
++- 'move' policy for copy-up between two writable branches, after
++ checking free space.
++- O_DIRECT
++- light version, without branch manipulation. (unnecessary?)
++- copyup in userspace
++- inotify in userspace
++- readv/writev
++- xattr, acl
++
++
++2. Download
++----------------------------------------
++Kindly one of aufs user, the Center for Scientific Computing and Free
++Software (C3SL), Federal University of Parana offered me a public GIT
++tree space.
++
++There are three GIT trees, aufs2-2.6, aufs2-standalone and aufs2-util.
++While the aufs2-util is always necessary, you need either of aufs2-2.6
++or aufs2-standalone.
++
++The aufs2-2.6 tree includes the whole linux-2.6 GIT tree,
++git://git.kernel.org/.../torvalds/linux-2.6.git.
++And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
++build aufs2 as an externel kernel module.
++If you already have linux-2.6 GIT tree, you may want to pull and merge
++the "aufs2" branch from this tree.
++
++On the other hand, the aufs2-standalone tree has only aufs2 source files
++and a necessary patch, and you can select CONFIG_AUFS_FS=m. In other
++words, the aufs2-standalone tree is generated from aufs2-2.6 tree by,
++- extract new files and modifications.
++- generate some patch files from modifications.
++- generate a ChangeLog file from git-log.
++- commit the files newly and no log messages. this is not git-pull.
++
++Both of aufs2-2.6 and aufs2-standalone trees have a branch whose name is
++in form of "aufs2-xx" where "xx" represents the linux kernel version,
++"linux-2.6.xx".
++
++o aufs2-2.6 tree
++$ git clone --reference /your/linux-2.6/git/tree \
++ http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git \
++ aufs2-2.6.git
++- if you don't have linux-2.6 GIT tree, then remove "--reference ..."
++$ cd aufs2-2.6.git
++$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
++ # aufs2 (no -xx) for the latest -rc version.
++
++o aufs2-standalone tree
++$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-standalone.git \
++ aufs2-standalone.git
++$ cd aufs2-standalone.git
++$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
++ # aufs2 (no -xx) for the latest -rc version.
++
++o aufs2-util tree
++$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-util.git \
++ aufs2-util.git
++$ cd aufs2-util.git
++- no particular tag/branch currently.
++
++o for advanced users
++$ git clone git://git.kernel.org/.../torvalds/linux-2.6.git linux-2.6.git
++ It will take very long time.
++
++$ cd linux-2.6.git
++$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
++$ git checkout -b aufs2-27 v2.6.27
++$ git pull aufs2 aufs2-27
++ It may take long time again.
++ Once pulling completes, you've got linux-2.6.27 and aufs2 for it in a
++ branch named aufs2-27, and you can configure and build it.
++
++Or
++
++$ git checkout -t -b aufs2 master
++$ git pull aufs2 aufs2
++ then you've got the latest linux kernel and the latest aufs2 in a
++ branch named aufs2, and you can configure and build it.
++ But aufs is released once a week, so you may meet a compilation error
++ due to mismatching between the mainline and aufs2.
++
++Or you may want build linux-2.6.xx.yy instead of linux-2.6.xx, then here
++is an approach using linux-2.6-stable GIT tree.
++
++$ cd linux-2.6.git/..
++$ git clone -q --reference ./linux-2.6.git git://git.kernel.org/.../linux-2.6-stable.git \
++ linux-2.6-stable.git
++ It will take very long time.
++
++$ cd linux-2.6-stable.git
++$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
++$ git checkout -b aufs2-27.1 v2.6.27.1
++$ git pull aufs2 aufs2-27
++ then you've got linux-2.6.27.1 and aufs2 for 2.6.27 in a branch named
++ aufs2-27.1, and you can configure and build it.
++ But the changes made by v2.6.xx.yy may conflict with aufs2-xx, since
++ aufs2-xx is for v2.6.xx only. In this case, you may find some patchces
++ for v2.6.xx.yy in aufs2-standalone.git#aufs2-xx branch if someone else
++ have ever requested me to support v2.6.xx.yy and I did it.
++
++You can also check what was changed by pulling aufs2.
++$ git diff v2.6.27.1..aufs2-27.1
++
++If you want to check the changed files other than fs/aufs, then try this.
++$ git diff v2.6.27.1..aufs2-27.1 |
++> awk '
++> /^diff / {new=1}
++> /^diff.*aufs/ {new=0}
++> new {print}
++> '
++
++
++3. Configuration and Compilation
++----------------------------------------
++Make sure you have git-checkout'ed the correct branch.
++
++For aufs2-2.6 tree,
++- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
++- set other aufs configurations if necessary.
++
++For aufs2-standalone tree,
++There are several ways to build.
++
++You may feel why aufs2-standalone.patch needs to export so many kernel
++symbols. Because you selected aufs2-standalone tree instead of aufs2-2.6
++tree. The number of necessary symbols to export essentially is zero.
++All other symbols are for the external module.
++If you don't like aufs2-standalone.patch, then try aufs2-2.6 tree.
++
++1.
++- apply ./aufs2-kbuild.patch to your kernel source files.
++- apply ./aufs2-base.patch too.
++- apply ./aufs2-standalone.patch too, if you have a plan to set
++ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs2-standalone.patch.
++- copy ./{Documentation,fs,include} files to your kernel source tree.
++- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
++ =m or =y.
++- and build your kernel as usual.
++- install it and reboot your system.
++
++2.
++- module only (CONFIG_AUFS_FS=m).
++- apply ./aufs2-base.patch to your kernel source files.
++- apply ./aufs2-standalone.patch too.
++- build your kernel and reboot.
++- edit ./config.mk and set other aufs configurations if necessary.
++ Note: You should read ./fs/aufs/Kconfig carefully which describes
++ every aufs configurations.
++- build the module by simple "make".
++- you can specify ${KDIR} make variable which points to your kernel
++ source tree.
++- copy the build ./aufs.ko to /lib/modules/..., and run depmod -a (or
++ reboot simply).
++- no need to apply aufs2-kbuild.patch, nor copying source files to your
++ kernel source tree.
++
++And then,
++- read README in aufs2-util, build and install it
++- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
++ then run "make install_ulib" too. And refer to the aufs manual in
++ detail.
++
++
++4. Usage
++----------------------------------------
++At first, make sure aufs2-util are installed, and please read the aufs
++manual, aufs.5 in aufs2-util.git tree.
++$ man -l aufs.5
++
++And then,
++$ mkdir /tmp/rw /tmp/aufs
++# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
++
++Here is another example. The result is equivalent.
++# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
++ Or
++# mount -t aufs -o br:/tmp/rw none /tmp/aufs
++# mount -o remount,append:${HOME} /tmp/aufs
++
++Then, you can see whole tree of your home dir through /tmp/aufs. If
++you modify a file under /tmp/aufs, the one on your home directory is
++not affected, instead the same named file will be newly created under
++/tmp/rw. And all of your modification to a file will be applied to
++the one under /tmp/rw. This is called the file based Copy on Write
++(COW) method.
++Aufs mount options are described in aufs.5.
++
++Additionally, there are some sample usages of aufs which are a
++diskless system with network booting, and LiveCD over NFS.
++See sample dir in CVS tree on SourceForge.
++
++
++5. Contact
++----------------------------------------
++When you have any problems or strange behaviour in aufs, please let me
++know with:
++- /proc/mounts (instead of the output of mount(8))
++- /sys/module/aufs/*
++- /sys/fs/aufs/* (if you have them)
++- /debug/aufs/* (if you have them)
++- linux kernel version
++ if your kernel is not plain, for example modified by distributor,
++ the url where i can download its source is necessary too.
++- aufs version which was printed at loading the module or booting the
++ system, instead of the date you downloaded.
++- configuration (define/undefine CONFIG_AUFS_xxx)
++- kernel configuration or /proc/config.gz (if you have it)
++- behaviour which you think to be incorrect
++- actual operation, reproducible one is better
++- mailto: aufs-users at lists.sourceforge.net
++
++Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
++and Feature Requests) on SourceForge. Please join and write to
++aufs-users ML.
++
++
++6. Acknowledgements
++----------------------------------------
++Thanks to everyone who have tried and are using aufs, whoever
++have reported a bug or any feedback.
++
++Especially donors:
++Tomas Matejicek(slax.org) made a donation (much more than once).
++Dai Itasaka made a donation (2007/8).
++Chuck Smith made a donation (2008/4, 10 and 12).
++Henk Schoneveld made a donation (2008/9).
++Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
++Francois Dupoux made a donation (2008/11).
++Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
++aufs2 GIT tree (2009/2).
++William Grant made a donation (2009/3).
++Patrick Lane made a donation (2009/4).
++The Mail Archive (mail-archive.com) made donations (2009/5).
++Nippy Networks (Ed Wildgoose) made a donation (2009/7).
++New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
++
++Thank you very much.
++Donations are always, including future donations, very important and
++helpful for me to keep on developing aufs.
++
++
++7.
++----------------------------------------
++If you are an experienced user, no explanation is needed. Aufs is
++just a linux filesystem.
++
++
++Enjoy!
++
++# Local variables: ;
++# mode: text;
++# End: ;
+diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt
+new file mode 100644
+index 0000000..ac678c0
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/01intro.txt
+@@ -0,0 +1,137 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Introduction
++----------------------------------------
++
++aufs [ei ju: ef es] | [a u f s]
++1. abbrev. for "advanced multi-layered unification filesystem".
++2. abbrev. for "another unionfs".
++3. abbrev. for "auf das" in German which means "on the" in English.
++ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
++ But "Filesystem aufs Filesystem" is hard to understand.
++
++AUFS is a filesystem with features:
++- multi layered stackable unification filesystem, the member directory
++ is called as a branch.
++- branch permission and attribute, 'readonly', 'real-readonly',
++ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
++ combination.
++- internal "file copy-on-write".
++- logical deletion, whiteout.
++- dynamic branch manipulation, adding, deleting and changing permission.
++- allow bypassing aufs, user's direct branch access.
++- external inode number translation table and bitmap which maintains the
++ persistent aufs inode number.
++- seekable directory, including NFS readdir.
++- file mapping, mmap and sharing pages.
++- pseudo-link, hardlink over branches.
++- loopback mounted filesystem as a branch.
++- several policies to select one among multiple writable branches.
++- revert a single systemcall when an error occurs in aufs.
++- and more...
++
++
++Multi Layered Stackable Unification Filesystem
++----------------------------------------------------------------------
++Most people already knows what it is.
++It is a filesystem which unifies several directories and provides a
++merged single directory. When users access a file, the access will be
++passed/re-directed/converted (sorry, I am not sure which English word is
++correct) to the real file on the member filesystem. The member
++filesystem is called 'lower filesystem' or 'branch' and has a mode
++'readonly' and 'readwrite.' And the deletion for a file on the lower
++readonly branch is handled by creating 'whiteout' on the upper writable
++branch.
++
++On LKML, there have been discussions about UnionMount (Jan Blunck and
++Bharata B Rao) and Unionfs (Erez Zadok). They took different approaches
++to implement the merged-view.
++The former tries putting it into VFS, and the latter implements as a
++separate filesystem.
++(If I misunderstand about these implementations, please let me know and
++I shall correct it. Because it is a long time ago when I read their
++source files last time).
++UnionMount's approach will be able to small, but may be hard to share
++branches between several UnionMount since the whiteout in it is
++implemented in the inode on branch filesystem and always
++shared. According to Bharata's post, readdir does not seems to be
++finished yet.
++Unionfs has a longer history. When I started implementing a stacking filesystem
++(Aug 2005), it already existed. It has virtual super_block, inode,
++dentry and file objects and they have an array pointing lower same kind
++objects. After contributing many patches for Unionfs, I re-started my
++project AUFS (Jun 2006).
++
++In AUFS, the structure of filesystem resembles to Unionfs, but I
++implemented my own ideas, approaches and enhancements and it became
++totally different one.
++
++
++Several characters/aspects of aufs
++----------------------------------------------------------------------
++
++Aufs has several characters or aspects.
++1. a filesystem, callee of VFS helper
++2. sub-VFS, caller of VFS helper for branches
++3. a virtual filesystem which maintains persistent inode number
++4. reader/writer of files on branches such like an application
++
++1. Caller of VFS Helper
++As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
++unlink(2) from an application reaches sys_unlink() kernel function and
++then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
++calls filesystem specific unlink operation. Actually aufs implements the
++unlink operation but it behaves like a redirector.
++
++2. Caller of VFS Helper for Branches
++aufs_unlink() passes the unlink request to the branch filesystem as if
++it were called from VFS. So the called unlink operation of the branch
++filesystem acts as usual. As a caller of VFS helper, aufs should handle
++every necessary pre/post operation for the branch filesystem.
++- acquire the lock for the parent dir on a branch
++- lookup in a branch
++- revalidate dentry on a branch
++- mnt_want_write() for a branch
++- vfs_unlink() for a branch
++- mnt_drop_write() for a branch
++- release the lock on a branch
++
++3. Persistent Inode Number
++One of the most important issue for a filesystem is to maintain inode
++numbers. This is particularly important to support exporting a
++filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
++backend block device for its own. But some storage is necessary to
++maintain inode number. It may be a large space and may not suit to keep
++in memory. Aufs rents some space from its first writable branch
++filesystem (by default) and creates file(s) on it. These files are
++created by aufs internally and removed soon (currently) keeping opened.
++Note: Because these files are removed, they are totally gone after
++ unmounting aufs. It means the inode numbers are not persistent
++ across unmount or reboot. I have a plan to make them really
++ persistent which will be important for aufs on NFS server.
++
++4. Read/Write Files Internally (copy-on-write)
++Because a branch can be readonly, when you write a file on it, aufs will
++"copy-up" it to the upper writable branch internally. And then write the
++originally requested thing to the file. Generally kernel doesn't
++open/read/write file actively. In aufs, even a single write may cause a
++internal "file copy". This behaviour is very similar to cp(1) command.
++
++Some people may think it is better to pass such work to user space
++helper, instead of doing in kernel space. Actually I am still thinking
++about it. But currently I have implemented it in kernel space.
+diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt
+new file mode 100644
+index 0000000..11cee07
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/02struct.txt
+@@ -0,0 +1,218 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Basic Aufs Internal Structure
++
++Superblock/Inode/Dentry/File Objects
++----------------------------------------------------------------------
++As like an ordinary filesystem, aufs has its own
++superblock/inode/dentry/file objects. All these objects have a
++dynamically allocated array and store the same kind of pointers to the
++lower filesystem, branch.
++For example, when you build a union with one readwrite branch and one
++readonly, mounted /au, /rw and /ro respectively.
++- /au = /rw + /ro
++- /ro/fileA exists but /rw/fileA
++
++Aufs lookup operation finds /ro/fileA and gets dentry for that. These
++pointers are stored in a aufs dentry. The array in aufs dentry will be,
++- [0] = NULL
++- [1] = /ro/fileA
++
++This style of an array is essentially same to the aufs
++superblock/inode/dentry/file objects.
++
++Because aufs supports manipulating branches, ie. add/delete/change
++dynamically, these objects has its own generation. When branches are
++changed, the generation in aufs superblock is incremented. And a
++generation in other object are compared when it is accessed.
++When a generation in other objects are obsoleted, aufs refreshes the
++internal array.
++
++
++Superblock
++----------------------------------------------------------------------
++Additionally aufs superblock has some data for policies to select one
++among multiple writable branches, XIB files, pseudo-links and kobject.
++See below in detail.
++About the policies which supports copy-down a directory, see policy.txt
++too.
++
++
++Branch and XINO(External Inode Number Translation Table)
++----------------------------------------------------------------------
++Every branch has its own xino (external inode number translation table)
++file. The xino file is created and unlinked by aufs internally. When two
++members of a union exist on the same filesystem, they share the single
++xino file.
++The struct of a xino file is simple, just a sequence of aufs inode
++numbers which is indexed by the lower inode number.
++In the above sample, assume the inode number of /ro/fileA is i111 and
++aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
++4(8) bytes at 111 * 4(8) bytes offset in the xino file.
++
++When the inode numbers are not contiguous, the xino file will be sparse
++which has a hole in it and doesn't consume as much disk space as it
++might appear. If your branch filesystem consumes disk space for such
++holes, then you should specify 'xino=' option at mounting aufs.
++
++Also a writable branch has three kinds of "whiteout bases". All these
++are existed when the branch is joined to aufs and the names are
++whiteout-ed doubly, so that users will never see their names in aufs
++hierarchy.
++1. a regular file which will be linked to all whiteouts.
++2. a directory to store a pseudo-link.
++3. a directory to store an "orphan-ed" file temporary.
++
++1. Whiteout Base
++ When you remove a file on a readonly branch, aufs handles it as a
++ logical deletion and creates a whiteout on the upper writable branch
++ as a hardlink of this file in order not to consume inode on the
++ writable branch.
++2. Pseudo-link Dir
++ See below, Pseudo-link.
++3. Step-Parent Dir
++ When "fileC" exists on the lower readonly branch only and it is
++ opened and removed with its parent dir, and then user writes
++ something into it, then aufs copies-up fileC to this
++ directory. Because there is no other dir to store fileC. After
++ creating a file under this dir, the file is unlinked.
++
++Because aufs supports manipulating branches, ie. add/delete/change
++dynamically, a branch has its own id. When the branch order changes, aufs
++finds the new index by searching the branch id.
++
++
++Pseudo-link
++----------------------------------------------------------------------
++Assume "fileA" exists on the lower readonly branch only and it is
++hardlinked to "fileB" on the branch. When you write something to fileA,
++aufs copies-up it to the upper writable branch. Additionally aufs
++creates a hardlink under the Pseudo-link Directory of the writable
++branch. The inode of a pseudo-link is kept in aufs super_block as a
++simple list. If fileB is read after unlinking fileA, aufs returns
++filedata from the pseudo-link instead of the lower readonly
++branch. Because the pseudo-link is based upon the inode, to keep the
++inode number by xino (see above) is important.
++
++All the hardlinks under the Pseudo-link Directory of the writable branch
++should be restored in a proper location later. Aufs provides a utility
++to do this. The userspace helpers executed at remounting and unmounting
++aufs by default.
++
++
++XIB(external inode number bitmap)
++----------------------------------------------------------------------
++Addition to the xino file per a branch, aufs has an external inode number
++bitmap in a superblock object. It is also a file such like a xino file.
++It is a simple bitmap to mark whether the aufs inode number is in-use or
++not.
++To reduce the file I/O, aufs prepares a single memory page to cache xib.
++
++Aufs implements a feature to truncate/refresh both of xino and xib to
++reduce the number of consumed disk blocks for these files.
++
++
++Virtual or Vertical Dir
++----------------------------------------------------------------------
++In order to support multiple layers (branches), aufs readdir operation
++constructs a virtual dir block on memory. For readdir, aufs calls
++vfs_readdir() internally for each dir on branches, merges their entries
++with eliminating the whiteout-ed ones, and sets it to file (dir)
++object. So the file object has its entry list until it is closed. The
++entry list will be updated when the file position is zero and becomes
++old. This decision is made in aufs automatically.
++
++The dynamically allocated memory block for the name of entries has a
++unit of 512 bytes (by default) and stores the names contiguously (no
++padding). Another block for each entry is handled by kmem_cache too.
++During building dir blocks, aufs creates hash list and judging whether
++the entry is whiteouted by its upper branch or already listed.
++
++Some people may call it can be a security hole or invite DoS attack
++since the opened and once readdir-ed dir (file object) holds its entry
++list and becomes a pressure for system memory. But I'd say it is similar
++to files under /proc or /sys. The virtual files in them also holds a
++memory page (generally) while they are opened. When an idea to reduce
++memory for them is introduced, it will be applied to aufs too.
++For those who really hate this situation, I've developed readdir(3)
++library which operates this merging in userspace. You just need to set
++LD_PRELOAD environment variable, and aufs will not consume no memory in
++kernel space for readdir(3).
++
++
++Workqueue
++----------------------------------------------------------------------
++Aufs sometimes requires privilege access to a branch. For instance,
++in copy-up/down operation. When a user process is going to make changes
++to a file which exists in the lower readonly branch only, and the mode
++of one of ancestor directories may not be writable by a user
++process. Here aufs copy-up the file with its ancestors and they may
++require privilege to set its owner/group/mode/etc.
++This is a typical case of a application character of aufs (see
++Introduction).
++
++Aufs uses workqueue synchronously for this case. It creates its own
++workqueue. The workqueue is a kernel thread and has privilege. Aufs
++passes the request to call mkdir or write (for example), and wait for
++its completion. This approach solves a problem of a signal handler
++simply.
++If aufs didn't adopt the workqueue and changed the privilege of the
++process, and if the mkdir/write call arises SIGXFSZ or other signal,
++then the user process might gain a privilege or the generated core file
++was owned by a superuser. But I have a plan to switch to a new
++credential approach which will be introduced in linux-2.6.29.
++
++Also aufs uses the system global workqueue ("events" kernel thread) too
++for asynchronous tasks, such like handling inotify, re-creating a
++whiteout base and etc. This is unrelated to a privilege.
++Most of aufs operation tries acquiring a rw_semaphore for aufs
++superblock at the beginning, at the same time waits for the completion
++of all queued asynchronous tasks.
++
++
++Whiteout
++----------------------------------------------------------------------
++The whiteout in aufs is very similar to Unionfs's. That is represented
++by its filename. UnionMount takes an approach of a file mode, but I am
++afraid several utilities (find(1) or something) will have to support it.
++
++Basically the whiteout represents "logical deletion" which stops aufs to
++lookup further, but also it represents "dir is opaque" which also stop
++lookup.
++
++In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
++In order to make several functions in a single systemcall to be
++revertible, aufs adopts an approach to rename a directory to a temporary
++unique whiteouted name.
++For example, in rename(2) dir where the target dir already existed, aufs
++renames the target dir to a temporary unique whiteouted name before the
++actual rename on a branch and then handles other actions (make it opaque,
++update the attributes, etc). If an error happens in these actions, aufs
++simply renames the whiteouted name back and returns an error. If all are
++succeeded, aufs registers a function to remove the whiteouted unique
++temporary name completely and asynchronously to the system global
++workqueue.
++
++
++Copy-up
++----------------------------------------------------------------------
++It is a well-known feature or concept.
++When user modifies a file on a readonly branch, aufs operate "copy-up"
++internally and makes change to the new file on the upper writable branch.
++When the trigger systemcall does not update the timestamps of the parent
++dir, aufs reverts it after copy-up.
+diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt
+new file mode 100644
+index 0000000..7510fdb
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/03lookup.txt
+@@ -0,0 +1,104 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Lookup in a Branch
++----------------------------------------------------------------------
++Since aufs has a character of sub-VFS (see Introduction), it operates
++lookup for branches as VFS does. It may be a heavy work. Generally
++speaking struct nameidata is a bigger structure and includes many
++information. But almost all lookup operation in aufs is the simplest
++case, ie. lookup only an entry directly connected to its parent. Digging
++down the directory hierarchy is unnecessary.
++
++VFS has a function lookup_one_len() for that use, but it is not usable
++for a branch filesystem which requires struct nameidata. So aufs
++implements a simple lookup wrapper function. When a branch filesystem
++allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
++a simplest nameidata and calls lookup_hash().
++Here aufs applies "a principle in NFSD", ie. if the filesystem supports
++NFS-export, then it has to support NULL as a nameidata parameter for
++->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
++aufs tests if ->s_export_op in the branch is NULL or not.
++
++When a branch is a remote filesystem, aufs trusts its ->d_revalidate().
++For d_revalidate, aufs implements three levels of revalidate tests. See
++"Revalidate Dentry and UDBA" in detail.
++
++
++Loopback Mount
++----------------------------------------------------------------------
++Basically aufs supports any type of filesystem and block device for a
++branch (actually there are some exceptions). But it is prohibited to add
++a loopback mounted one whose backend file exists in a filesystem which is
++already added to aufs. The reason is to protect aufs from a recursive
++lookup. If it was allowed, the aufs lookup operation might re-enter a
++lookup for the loopback mounted branch in the same context, and will
++cause a deadlock.
++
++
++Revalidate Dentry and UDBA (User's Direct Branch Access)
++----------------------------------------------------------------------
++Generally VFS helpers re-validate a dentry as a part of lookup.
++0. digging down the directory hierarchy.
++1. lock the parent dir by its i_mutex.
++2. lookup the final (child) entry.
++3. revalidate it.
++4. call the actual operation (create, unlink, etc.)
++5. unlock the parent dir
++
++If the filesystem implements its ->d_revalidate() (step 3), then it is
++called. Actually aufs implements it and checks the dentry on a branch is
++still valid.
++But it is not enough. Because aufs has to release the lock for the
++parent dir on a branch at the end of ->lookup() (step 2) and
++->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
++held by VFS.
++If the file on a branch is changed directly, eg. bypassing aufs, after
++aufs released the lock, then the subsequent operation may cause
++something unpleasant result.
++
++This situation is a result of VFS architecture, ->lookup() and
++->d_revalidate() is separated. But I never say it is wrong. It is a good
++design from VFS's point of view. It is just not suitable for sub-VFS
++character in aufs.
++
++Aufs supports such case by three level of revalidation which is
++selectable by user.
++1. Simple Revalidate
++ Addition to the native flow in VFS's, confirm the child-parent
++ relationship on the branch just after locking the parent dir on the
++ branch in the "actual operation" (step 4). When this validation
++ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
++ checks the validation of the dentry on branches.
++2. Monitor Changes Internally by Inotify
++ Addition to above, in the "actual operation" (step 4) aufs re-lookup
++ the dentry on the branch, and returns EBUSY if it finds different
++ dentry.
++ Additionally, aufs sets the inotify watch for every dir on branches
++ during it is in cache. When the event is notified, aufs registers a
++ function to kernel 'events' thread by schedule_work(). And the
++ function sets some special status to the cached aufs dentry and inode
++ private data. If they are not cached, then aufs has nothing to
++ do. When the same file is accessed through aufs (step 0-3) later,
++ aufs will detect the status and refresh all necessary data.
++ In this mode, aufs has to ignore the event which is fired by aufs
++ itself.
++3. No Extra Validation
++ This is the simplest test and doesn't add any additional revalidation
++ test, and skip therevalidatin in step 4. It is useful and improves
++ aufs performance when system surely hide the aufs branches from user,
++ by over-mounting something (or another method).
+diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt
+new file mode 100644
+index 0000000..5529d6a
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/04branch.txt
+@@ -0,0 +1,76 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Branch Manipulation
++
++Since aufs supports dynamic branch manipulation, ie. add/remove a branch
++and changing its permission/attribute, there are a lot of works to do.
++
++
++Add a Branch
++----------------------------------------------------------------------
++o Confirm the adding dir exists outside of aufs, including loopback
++ mount.
++- and other various attributes...
++o Initialize the xino file and whiteout bases if necessary.
++ See struct.txt.
++
++o Check the owner/group/mode of the directory
++ When the owner/group/mode of the adding directory differs from the
++ existing branch, aufs issues a warning because it may impose a
++ security risk.
++ For example, when a upper writable branch has a world writable empty
++ top directory, a malicious user can create any files on the writable
++ branch directly, like copy-up and modify manually. If something like
++ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
++ writable branch, and the writable branch is world-writable, then a
++ malicious guy may create /etc/passwd on the writable branch directly
++ and the infected file will be valid in aufs.
++ I am afraid it can be a security issue, but nothing to do except
++ producing a warning.
++
++
++Delete a Branch
++----------------------------------------------------------------------
++o Confirm the deleting branch is not busy
++ To be general, there is one merit to adopt "remount" interface to
++ manipulate branches. It is to discard caches. At deleting a branch,
++ aufs checks the still cached (and connected) dentries and inodes. If
++ there are any, then they are all in-use. An inode without its
++ corresponding dentry can be alive alone (for example, inotify case).
++
++ For the cached one, aufs checks whether the same named entry exists on
++ other branches.
++ If the cached one is a directory, because aufs provides a merged view
++ to users, as long as one dir is left on any branch aufs can show the
++ dir to users. In this case, the branch can be removed from aufs.
++ Otherwise aufs rejects deleting the branch.
++
++ If any file on the deleting branch is opened by aufs, then aufs
++ rejects deleting.
++
++
++Modify the Permission of a Branch
++----------------------------------------------------------------------
++o Re-initialize or remove the xino file and whiteout bases if necessary.
++ See struct.txt.
++
++o rw --> ro: Confirm the modifying branch is not busy
++ Aufs rejects the request if any of these conditions are true.
++ - a file on the branch is mmap-ed.
++ - a regular file on the branch is opened for write and there is no
++ same named entry on the upper branch.
+diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt
+new file mode 100644
+index 0000000..5f25684
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt
+@@ -0,0 +1,65 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Policies to Select One among Multiple Writable Branches
++----------------------------------------------------------------------
++When the number of writable branch is more than one, aufs has to decide
++the target branch for file creation or copy-up. By default, the highest
++writable branch which has the parent (or ancestor) dir of the target
++file is chosen (top-down-parent policy).
++By user's request, aufs implements some other policies to select the
++writable branch, for file creation two policies, round-robin and
++most-free-space policies. For copy-up three policies, top-down-parent,
++bottom-up-parent and bottom-up policies.
++
++As expected, the round-robin policy selects the branch in circular. When
++you have two writable branches and creates 10 new files, 5 files will be
++created for each branch. mkdir(2) systemcall is an exception. When you
++create 10 new directories, all will be created on the same branch.
++And the most-free-space policy selects the one which has most free
++space among the writable branches. The amount of free space will be
++checked by aufs internally, and users can specify its time interval.
++
++The policies for copy-up is more simple,
++top-down-parent is equivalent to the same named on in create policy,
++bottom-up-parent selects the writable branch where the parent dir
++exists and the nearest upper one from the copyup-source,
++bottom-up selects the nearest upper writable branch from the
++copyup-source, regardless the existence of the parent dir.
++
++There are some rules or exceptions to apply these policies.
++- If there is a readonly branch above the policy-selected branch and
++ the parent dir is marked as opaque (a variation of whiteout), or the
++ target (creating) file is whiteout-ed on the upper readonly branch,
++ then the result of the policy is ignored and the target file will be
++ created on the nearest upper writable branch than the readonly branch.
++- If there is a writable branch above the policy-selected branch and
++ the parent dir is marked as opaque or the target file is whiteouted
++ on the branch, then the result of the policy is ignored and the target
++ file will be created on the highest one among the upper writable
++ branches who has diropq or whiteout. In case of whiteout, aufs removes
++ it as usual.
++- link(2) and rename(2) systemcalls are exceptions in every policy.
++ They try selecting the branch where the source exists as possible
++ since copyup a large file will take long time. If it can't be,
++ ie. the branch where the source exists is readonly, then they will
++ follow the copyup policy.
++- There is an exception for rename(2) when the target exists.
++ If the rename target exists, aufs compares the index of the branches
++ where the source and the target exists and selects the higher
++ one. If the selected branch is readonly, then aufs follows the
++ copyup policy.
+diff --git a/Documentation/filesystems/aufs/design/06fmode_exec.txt b/Documentation/filesystems/aufs/design/06fmode_exec.txt
+new file mode 100644
+index 0000000..ebc311d
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/06fmode_exec.txt
+@@ -0,0 +1,33 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++FMODE_EXEC and deny_write()
++----------------------------------------------------------------------
++Generally Unix prevents an executing file from writing its filedata.
++In linux it is implemented by deny_write() and allow_write().
++When a file is executed by exec() family, open_exec() (and sys_uselib())
++they opens the file and calls deny_write(). If the file is aufs's virtual
++one, it has no meaning. The file which deny_write() is really necessary
++is the file on a branch. But the FMODE_EXEC flag is not passed to
++->open() operation. So aufs adopt a dirty trick.
++
++- in order to get FMODE_EXEC, aufs ->lookup() and ->d_revalidate() set
++ nd->intent.open.file->private_data to nd->intent.open.flags temporary.
++- in aufs ->open(), when FMODE_EXEC is set in file->private_data, it
++ calls deny_write() for the file on a branch.
++- when the aufs file is released, allow_write() for the file on a branch
++ is called.
+diff --git a/Documentation/filesystems/aufs/design/07mmap.txt b/Documentation/filesystems/aufs/design/07mmap.txt
+new file mode 100644
+index 0000000..5d60fb9
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/07mmap.txt
+@@ -0,0 +1,53 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++mmap(2) -- File Memory Mapping
++----------------------------------------------------------------------
++In aufs, the file-mapped pages are shared between the file on a branch
++and the virtual one in aufs by overriding vm_operation, particularly
++->fault().
++
++In aufs_mmap(),
++- get and store vm_ops of the real file on a branch.
++- map the file of aufs by generic_file_mmap() and set aufs's vm
++ operations.
++
++In aufs_fault(),
++- get the file of aufs from the passed vma, sleep if needed.
++- get the real file on a branch from the aufs file.
++- a race may happen. for instance a multithreaded library. so some lock
++ is implemented.
++- call ->fault() in the previously stored vm_ops with setting the
++ real file on a branch to vm_file.
++- restore vm_file and wake_up if someone else got sleep.
++
++When a branch is added to or deleted from aufs, the same-named file may
++unveil and its contents will be replaced by the new one when a process
++read(2) through previously opened file.
++(Some users may not want to refresh the filedata. For such users, I
++have a plan to implement a mount option 'refrof' which decides to
++refresh the opened files or not. See plan.txt too.)
++In this case, an already mapped file will not be updated since the
++contents are a part of a process already and it should not be changed by
++aufs branch manipulation. (Even if MAP_SHARED is specified, currently).
++Of course, in case of the deleting branch has a busy file, it cannot be
++deleted from the union.
++
++In Unionfs, it took an approach which the memory pages mapped to
++filedata are copied from the lower (real) file into the Unionfs's
++virtual one and handles it by address_space operations. Recently Unionfs
++changed it to this approach which aufs adopted since Jul 2006.
+diff --git a/Documentation/filesystems/aufs/design/08export.txt b/Documentation/filesystems/aufs/design/08export.txt
+new file mode 100644
+index 0000000..8394348
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/08export.txt
+@@ -0,0 +1,59 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Export Aufs via NFS
++----------------------------------------------------------------------
++Here is an approach.
++- like xino/xib, add a new file 'xigen' which stores aufs inode
++ generation.
++- iget_locked(): initialize aufs inode generation for a new inode, and
++ store it in xigen file.
++- destroy_inode(): increment aufs inode generation and store it in xigen
++ file. it is necessary even if it is not unlinked, because any data of
++ inode may be changed by UDBA.
++- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
++ build file handle by
++ + branch id (4 bytes)
++ + superblock generation (4 bytes)
++ + inode number (4 or 8 bytes)
++ + parent dir inode number (4 or 8 bytes)
++ + inode generation (4 bytes))
++ + return value of exportfs_encode_fh() for the parent on a branch (4
++ bytes)
++ + file handle for a branch (by exportfs_encode_fh())
++- fh_to_dentry():
++ + find the index of a branch from its id in handle, and check it is
++ still exist in aufs.
++ + 1st level: get the inode number from handle and search it in cache.
++ + 2nd level: if not found, get the parent inode number from handle and
++ search it in cache. and then open the parent dir, find the matching
++ inode number by vfs_readdir() and get its name, and call
++ lookup_one_len() for the target dentry.
++ + 3rd level: if the parent dir is not cached, call
++ exportfs_decode_fh() for a branch and get the parent on a branch,
++ build a pathname of it, convert it a pathname in aufs, call
++ path_lookup(). now aufs gets a parent dir dentry, then handle it as
++ the 2nd level.
++ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
++ for every branch, but not itself. to get this, (currently) aufs
++ searches in current->nsproxy->mnt_ns list. it may not be a good
++ idea, but I didn't get other approach.
++ + test the generation of the gotten inode.
++- every inode operation: they may get EBUSY due to UDBA. in this case,
++ convert it into ESTALE for NFSD.
++- readdir(): call lockdep_on/off() because filldir in NFSD calls
++ lookup_one_len(), vfs_getattr(), encode_fh() and others.
+diff --git a/Documentation/filesystems/aufs/design/09shwh.txt b/Documentation/filesystems/aufs/design/09shwh.txt
+new file mode 100644
+index 0000000..ce0c633
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/09shwh.txt
+@@ -0,0 +1,53 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Show Whiteout Mode (shwh)
++----------------------------------------------------------------------
++Generally aufs hides the name of whiteouts. But in some cases, to show
++them is very useful for users. For instance, creating a new middle layer
++(branch) by merging existing layers.
++
++(borrowing aufs1 HOW-TO from a user, Michael Towers)
++When you have three branches,
++- Bottom: 'system', squashfs (underlying base system), read-only
++- Middle: 'mods', squashfs, read-only
++- Top: 'overlay', ram (tmpfs), read-write
++
++The top layer is loaded at boot time and saved at shutdown, to preserve
++the changes made to the system during the session.
++When larger changes have been made, or smaller changes have accumulated,
++the size of the saved top layer data grows. At this point, it would be
++nice to be able to merge the two overlay branches ('mods' and 'overlay')
++and rewrite the 'mods' squashfs, clearing the top layer and thus
++restoring save and load speed.
++
++This merging is simplified by the use of another aufs mount, of just the
++two overlay branches using the 'shwh' option.
++# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
++ aufs /livesys/merge_union
++
++A merged view of these two branches is then available at
++/livesys/merge_union, and the new feature is that the whiteouts are
++visible!
++Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
++writing to all branches. Also the default mode for all branches is 'ro'.
++It is now possible to save the combined contents of the two overlay
++branches to a new squashfs, e.g.:
++# mksquashfs /livesys/merge_union /path/to/newmods.squash
++
++This new squashfs archive can be stored on the boot device and the
++initramfs will use it to replace the old one at the next boot.
+diff --git a/Documentation/filesystems/aufs/design/99plan.txt b/Documentation/filesystems/aufs/design/99plan.txt
+new file mode 100644
+index 0000000..78d1f0e
+--- /dev/null
++++ b/Documentation/filesystems/aufs/design/99plan.txt
+@@ -0,0 +1,96 @@
++
++# Copyright (C) 2005-2009 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++
++Plan
++
++Restoring some features which was implemented in aufs1.
++They were dropped in aufs2 in order to make source files simpler and
++easier to be reviewed.
++
++
++Test Only the Highest One for the Directory Permission (dirperm1 option)
++----------------------------------------------------------------------
++Let's try case study.
++- aufs has two branches, upper readwrite and lower readonly.
++ /au = /rw + /ro
++- "dirA" exists under /ro, but /rw. and its mode is 0700.
++- user invoked "chmod a+rx /au/dirA"
++- then "dirA" becomes world readable?
++
++In this case, /ro/dirA is still 0700 since it exists in readonly branch,
++or it may be a natively readonly filesystem. If aufs respects the lower
++branch, it should not respond readdir request from other users. But user
++allowed it by chmod. Should really aufs rejects showing the entries
++under /ro/dirA?
++
++To be honest, I don't have a best solution for this case. So I
++implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
++users.
++When dirperm1 is specified, aufs checks only the highest one for the
++directory permission, and shows the entries. Otherwise, as usual, checks
++every dir existing on all branches and rejects the request.
++
++As a side effect, dirperm1 option improves the performance of aufs
++because the number of permission check is reduced.
++
++
++Being Another Aufs's Readonly Branch (robr)
++----------------------------------------------------------------------
++Aufs1 allows aufs to be another aufs's readonly branch.
++This feature was developed by a user's request. But it may not be used
++currecnly.
++
++
++Copy-up on Open (coo=)
++----------------------------------------------------------------------
++By default the internal copy-up is executed when it is really necessary.
++It is not done when a file is opened for writing, but when write(2) is
++done. Users who have many (over 100) branches want to know and analyse
++when and what file is copied-up. To insert a new upper branch which
++contains such files only may improve the performance of aufs.
++
++Aufs1 implemented "coo=none | leaf | all" option.
++
++
++Refresh the Opened File (refrof)
++----------------------------------------------------------------------
++This option is implemented in aufs1 but incomplete.
++
++When user reads from a file, he expects to get its latest filedata
++generally. If the file is removed and a new same named file is created,
++the content he gets is unchanged, ie. the unlinked filedata.
++
++Let's try case study again.
++- aufs has two branches.
++ /au = /rw + /ro
++- "fileA" exists under /ro, but /rw.
++- user opened "/au/fileA".
++- he or someone else inserts a branch (/new) between /rw and /ro.
++ /au = /rw + /new + /ro
++- the new branch has "fileA".
++- user reads from the opened "fileA"
++- which filedata should aufs return, from /ro or /new?
++
++Some people says it has to be "from /ro" and it is a semantics of Unix.
++The others say it should be "from /new" because the file is not removed
++and it is equivalent to the case of someone else modifies the file.
++
++Here again I don't have a best and final answer. I got an idea to
++implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
++Opened File) is specified (by default), aufs returns the filedata from
++/new.
++Otherwise from /new.
+diff --git a/fs/Kconfig b/fs/Kconfig
+index bd989c0..8af22e4 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -1541,6 +1541,8 @@ config UFS_DEBUG
+ Y here. This will result in _many_ additional debugging messages to be
+ written to the system log.
+
++source "fs/aufs/Kconfig"
++
+ endmenu
+
+ menuconfig NETWORK_FILESYSTEMS
+diff --git a/fs/Makefile b/fs/Makefile
+index 0871ab5..364e863 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -123,3 +123,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
+ obj-$(CONFIG_DEBUG_FS) += debugfs/
+ obj-$(CONFIG_OCFS2_FS) += ocfs2/
+ obj-$(CONFIG_GFS2_FS) += gfs2/
++obj-$(CONFIG_AUFS_FS) += aufs/
+diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig
+new file mode 100644
+index 0000000..12e4327
+--- /dev/null
++++ b/fs/aufs/Kconfig
+@@ -0,0 +1,136 @@
++config AUFS_FS
++ bool "Aufs (Advanced multi layered unification filesystem) support"
++ depends on EXPERIMENTAL
++ help
++ Aufs is a stackable unification filesystem such as Unionfs,
++ which unifies several directories and provides a merged single
++ directory.
++ In the early days, aufs was entirely re-designed and
++ re-implemented Unionfs Version 1.x series. Introducing many
++ original ideas, approaches and improvements, it becomes totally
++ different from Unionfs while keeping the basic features.
++
++if AUFS_FS
++choice
++ prompt "Maximum number of branches"
++ default AUFS_BRANCH_MAX_127
++ help
++ Specifies the maximum number of branches (or member directories)
++ in a single aufs. The larger value consumes more system
++ resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_127
++ bool "127"
++ help
++ Specifies the maximum number of branches (or member directories)
++ in a single aufs. The larger value consumes more system
++ resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_511
++ bool "511"
++ help
++ Specifies the maximum number of branches (or member directories)
++ in a single aufs. The larger value consumes more system
++ resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_1023
++ bool "1023"
++ help
++ Specifies the maximum number of branches (or member directories)
++ in a single aufs. The larger value consumes more system
++ resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_32767
++ bool "32767"
++ help
++ Specifies the maximum number of branches (or member directories)
++ in a single aufs. The larger value consumes more system
++ resources and has a minor impact to performance.
++endchoice
++
++config AUFS_HINOTIFY
++ bool "Use inotify to detect actions on a branch"
++ depends on INOTIFY
++ help
++ If you want to modify files on branches directly, eg. bypassing aufs,
++ and want aufs to detect the changes of them fully, then enable this
++ option and use 'udba=inotify' mount option.
++ It will have a negative impact to the performance.
++ See detail in aufs.5.
++
++config AUFS_EXPORT
++ bool "NFS-exportable aufs"
++ depends on EXPORTFS = y
++ help
++ If you want to export your mounted aufs via NFS, then enable this
++ option. There are several requirements for this configuration.
++ See detail in aufs.5.
++
++config AUFS_INO_T_64
++ bool
++ depends on AUFS_EXPORT
++ depends on 64BIT && !(ALPHA || S390)
++ default y
++ help
++ Automatic configuration for internal use.
++ /* typedef unsigned long/int __kernel_ino_t */
++ /* alpha and s390x are int */
++
++config AUFS_RDU
++ bool "Readdir in userspace"
++ help
++ If you have millions of files under a single aufs directory, and
++ meet the out of memory, then enable this option and set
++ environment variables for your readdir(3).
++ See detail in aufs.5.
++
++config AUFS_SP_IATTR
++ bool "Respect the attributes (mtime/ctime mainly) of special files"
++ help
++ When you write something to a special file, some attributes of it
++ (mtime/ctime mainly) may be updated. Generally such updates are
++ less important (actually some device drivers and NFS ignore
++ it). But some applications (such like test program) requires
++ such updates. If you need these updates, then enable this
++ configuration which introduces some overhead.
++ Currently this configuration handles FIFO only.
++
++config AUFS_SHWH
++ bool "Show whiteouts"
++ help
++ If you want to make the whiteouts in aufs visible, then enable
++ this option and specify 'shwh' mount option. Although it may
++ sounds like philosophy or something, but in technically it
++ simply shows the name of whiteout with keeping its behaviour.
++
++config AUFS_BR_RAMFS
++ bool "Ramfs (initramfs/rootfs) as an aufs branch"
++ help
++ If you want to use ramfs as an aufs branch fs, then enable this
++ option. Generally tmpfs is recommended.
++ Aufs prohibited them to be a branch fs by default, because
++ initramfs becomes unusable after switch_root or something
++ generally. If you sets initramfs as an aufs branch and boot your
++ system by switch_root, you will meet a problem easily since the
++ files in initramfs may be inaccessible.
++ Unless you are going to use ramfs as an aufs branch fs without
++ switch_root or something, leave it N.
++
++config AUFS_BDEV_LOOP
++ bool
++ depends on BLK_DEV_LOOP
++ default y
++ help
++ Automatic configuration for internal use.
++ Convert =[ym] into =y.
++
++config AUFS_DEBUG
++ bool "Debug aufs"
++ help
++ Enable this to compile aufs internal debug code.
++ It will have a negative impact to the performance.
++
++config AUFS_MAGIC_SYSRQ
++ bool
++ depends on AUFS_DEBUG && MAGIC_SYSRQ
++ default y
++ help
++ Automatic configuration for internal use.
++ When aufs supports Magic SysRq, enabled automatically.
++endif
+diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile
+new file mode 100644
+index 0000000..29411e9
+--- /dev/null
++++ b/fs/aufs/Makefile
+@@ -0,0 +1,23 @@
++
++include ${srctree}/${src}/magic.mk
++
++obj-$(CONFIG_AUFS_FS) += aufs.o
++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
++ wkq.o vfsub.o dcsub.o \
++ cpup.o whout.o plink.o wbr_policy.o \
++ dinfo.o dentry.o \
++ finfo.o file.o f_op.o \
++ dir.o vdir.o \
++ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
++ ioctl.o
++
++# all are boolean
++aufs-$(CONFIG_SYSFS) += sysfs.o
++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
++aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o
++aufs-$(CONFIG_AUFS_EXPORT) += export.o
++aufs-$(CONFIG_AUFS_RDU) += rdu.o
++aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
++aufs-$(CONFIG_AUFS_DEBUG) += debug.o
++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
+diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h
+new file mode 100644
+index 0000000..49bada2
+--- /dev/null
++++ b/fs/aufs/aufs.h
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * all header files
++ */
++
++#ifndef __AUFS_H__
++#define __AUFS_H__
++
++#ifdef __KERNEL__
++
++#define AuStub(type, name, body, ...) \
++ static inline type name(__VA_ARGS__) { body; }
++
++#define AuStubVoid(name, ...) \
++ AuStub(void, name, , __VA_ARGS__)
++#define AuStubInt0(name, ...) \
++ AuStub(int, name, return 0, __VA_ARGS__)
++
++#include "debug.h"
++
++#include "branch.h"
++#include "cpup.h"
++#include "dcsub.h"
++#include "dbgaufs.h"
++#include "dentry.h"
++#include "dir.h"
++#include "file.h"
++#include "fstype.h"
++#include "inode.h"
++#include "loop.h"
++#include "module.h"
++#include "opts.h"
++#include "rwsem.h"
++#include "spl.h"
++#include "super.h"
++#include "sysaufs.h"
++#include "vfsub.h"
++#include "whout.h"
++#include "wkq.h"
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_H__ */
+diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c
+new file mode 100644
+index 0000000..2717e9e
+--- /dev/null
++++ b/fs/aufs/branch.c
+@@ -0,0 +1,978 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * branch management
++ */
++
++#include <linux/file.h>
++#include <linux/statfs.h>
++#include "aufs.h"
++
++/*
++ * free a single branch
++ */
++static void au_br_do_free(struct au_branch *br)
++{
++ int i;
++ struct au_wbr *wbr;
++
++ if (br->br_xino.xi_file)
++ fput(br->br_xino.xi_file);
++ mutex_destroy(&br->br_xino.xi_nondir_mtx);
++
++ AuDebugOn(atomic_read(&br->br_count));
++
++ wbr = br->br_wbr;
++ if (wbr) {
++ for (i = 0; i < AuBrWh_Last; i++)
++ dput(wbr->wbr_wh[i]);
++ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
++ AuRwDestroy(&wbr->wbr_wh_rwsem);
++ }
++
++ /* some filesystems acquire extra lock */
++ lockdep_off();
++ mntput(br->br_mnt);
++ lockdep_on();
++
++ kfree(wbr);
++ kfree(br);
++}
++
++/*
++ * frees all branches
++ */
++void au_br_free(struct au_sbinfo *sbinfo)
++{
++ aufs_bindex_t bmax;
++ struct au_branch **br;
++
++ AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++ bmax = sbinfo->si_bend + 1;
++ br = sbinfo->si_branch;
++ while (bmax--)
++ au_br_do_free(*br++);
++}
++
++/*
++ * find the index of a branch which is specified by @br_id.
++ */
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
++{
++ aufs_bindex_t bindex, bend;
++
++ bend = au_sbend(sb);
++ for (bindex = 0; bindex <= bend; bindex++)
++ if (au_sbr_id(sb, bindex) == br_id)
++ return bindex;
++ return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * add a branch
++ */
++
++static int test_overlap(struct super_block *sb, struct dentry *h_d1,
++ struct dentry *h_d2)
++{
++ if (unlikely(h_d1 == h_d2))
++ return 1;
++ return !!au_test_subdir(h_d1, h_d2)
++ || !!au_test_subdir(h_d2, h_d1)
++ || au_test_loopback_overlap(sb, h_d1, h_d2)
++ || au_test_loopback_overlap(sb, h_d2, h_d1);
++}
++
++/*
++ * returns a newly allocated branch. @new_nbranch is a number of branches
++ * after adding a branch.
++ */
++static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
++ int perm)
++{
++ struct au_branch *add_branch;
++ struct dentry *root;
++
++ root = sb->s_root;
++ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
++ if (unlikely(!add_branch))
++ goto out;
++
++ add_branch->br_wbr = NULL;
++ if (au_br_writable(perm)) {
++ /* may be freed separately at changing the branch permission */
++ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
++ GFP_NOFS);
++ if (unlikely(!add_branch->br_wbr))
++ goto out_br;
++ }
++
++ if (unlikely(au_sbr_realloc(au_sbi(sb), new_nbranch)
++ || au_di_realloc(au_di(root), new_nbranch)
++ || au_ii_realloc(au_ii(root->d_inode), new_nbranch)))
++ goto out_wbr;
++ return add_branch; /* success */
++
++ out_wbr:
++ kfree(add_branch->br_wbr);
++ out_br:
++ kfree(add_branch);
++ out:
++ return ERR_PTR(-ENOMEM);
++}
++
++/*
++ * test if the branch permission is legal or not.
++ */
++static int test_br(struct inode *inode, int brperm, char *path)
++{
++ int err;
++
++ err = 0;
++ if (unlikely(au_br_writable(brperm) && IS_RDONLY(inode))) {
++ AuErr("write permission for readonly mount or inode, %s\n",
++ path);
++ err = -EINVAL;
++ }
++
++ return err;
++}
++
++/*
++ * returns:
++ * 0: success, the caller will add it
++ * plus: success, it is already unified, the caller should ignore it
++ * minus: error
++ */
++static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++ int err;
++ aufs_bindex_t bend, bindex;
++ struct dentry *root;
++ struct inode *inode, *h_inode;
++
++ root = sb->s_root;
++ bend = au_sbend(sb);
++ if (unlikely(bend >= 0
++ && au_find_dbindex(root, add->path.dentry) >= 0)) {
++ err = 1;
++ if (!remount) {
++ err = -EINVAL;
++ AuErr("%s duplicated\n", add->pathname);
++ }
++ goto out;
++ }
++
++ err = -ENOSPC; /* -E2BIG; */
++ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
++ || AUFS_BRANCH_MAX - 1 <= bend)) {
++ AuErr("number of branches exceeded %s\n", add->pathname);
++ goto out;
++ }
++
++ err = -EDOM;
++ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
++ AuErr("bad index %d\n", add->bindex);
++ goto out;
++ }
++
++ inode = add->path.dentry->d_inode;
++ err = -ENOENT;
++ if (unlikely(!inode->i_nlink)) {
++ AuErr("no existence %s\n", add->pathname);
++ goto out;
++ }
++
++ err = -EINVAL;
++ if (unlikely(inode->i_sb == sb)) {
++ AuErr("%s must be outside\n", add->pathname);
++ goto out;
++ }
++
++ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
++ AuErr("unsupported filesystem, %s (%s)\n",
++ add->pathname, au_sbtype(inode->i_sb));
++ goto out;
++ }
++
++ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
++ if (unlikely(err))
++ goto out;
++
++ if (bend < 0)
++ return 0; /* success */
++
++ err = -EINVAL;
++ for (bindex = 0; bindex <= bend; bindex++)
++ if (unlikely(test_overlap(sb, add->path.dentry,
++ au_h_dptr(root, bindex)))) {
++ AuErr("%s is overlapped\n", add->pathname);
++ goto out;
++ }
++
++ err = 0;
++ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
++ h_inode = au_h_dptr(root, 0)->d_inode;
++ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
++ || h_inode->i_uid != inode->i_uid
++ || h_inode->i_gid != inode->i_gid)
++ AuWarn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
++ add->pathname,
++ inode->i_uid, inode->i_gid,
++ (inode->i_mode & S_IALLUGO),
++ h_inode->i_uid, h_inode->i_gid,
++ (h_inode->i_mode & S_IALLUGO));
++ }
++
++ out:
++ return err;
++}
++
++/*
++ * initialize or clean the whiteouts for an adding branch
++ */
++static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
++ int new_perm, struct dentry *h_root)
++{
++ int err, old_perm;
++ aufs_bindex_t bindex;
++ struct mutex *h_mtx;
++ struct au_wbr *wbr;
++ struct au_hinode *hdir;
++
++ wbr = br->br_wbr;
++ old_perm = br->br_perm;
++ br->br_perm = new_perm;
++ hdir = NULL;
++ h_mtx = NULL;
++ bindex = au_br_index(sb, br->br_id);
++ if (0 <= bindex) {
++ hdir = au_hi(sb->s_root->d_inode, bindex);
++ au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++ } else {
++ h_mtx = &h_root->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
++ }
++ if (!wbr)
++ err = au_wh_init(h_root, br, sb);
++ else {
++ wbr_wh_write_lock(wbr);
++ err = au_wh_init(h_root, br, sb);
++ wbr_wh_write_unlock(wbr);
++ }
++ if (hdir)
++ au_hin_imtx_unlock(hdir);
++ else
++ mutex_unlock(h_mtx);
++ br->br_perm = old_perm;
++
++ if (!err && wbr && !au_br_writable(new_perm)) {
++ kfree(wbr);
++ br->br_wbr = NULL;
++ }
++
++ return err;
++}
++
++static int au_wbr_init(struct au_branch *br, struct super_block *sb,
++ int perm, struct path *path)
++{
++ int err;
++ struct kstatfs kst;
++ struct au_wbr *wbr;
++ struct dentry *h_dentry;
++
++ wbr = br->br_wbr;
++ au_rw_init(&wbr->wbr_wh_rwsem);
++ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
++ atomic_set(&wbr->wbr_wh_running, 0);
++ wbr->wbr_bytes = 0;
++
++ /*
++ * a limit for rmdir/rename a dir
++ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
++ */
++ h_dentry = path->dentry;
++ err = vfs_statfs(h_dentry, &kst);
++ if (unlikely(err))
++ goto out;
++ err = -EINVAL;
++ if (kst.f_namelen >= NAME_MAX)
++ err = au_br_init_wh(sb, br, perm, h_dentry);
++ else
++ AuErr("%.*s(%s), unsupported namelen %ld\n",
++ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
++ kst.f_namelen);
++
++ out:
++ return err;
++}
++
++/* intialize a new branch */
++static int au_br_init(struct au_branch *br, struct super_block *sb,
++ struct au_opt_add *add)
++{
++ int err;
++
++ err = 0;
++ memset(&br->br_xino, 0, sizeof(br->br_xino));
++ mutex_init(&br->br_xino.xi_nondir_mtx);
++ br->br_perm = add->perm;
++ br->br_mnt = add->path.mnt; /* set first, mntget() later */
++ atomic_set(&br->br_count, 0);
++ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
++ atomic_set(&br->br_xino_running, 0);
++ br->br_id = au_new_br_id(sb);
++
++ if (au_br_writable(add->perm)) {
++ err = au_wbr_init(br, sb, add->perm, &add->path);
++ if (unlikely(err))
++ goto out;
++ }
++
++ if (au_opt_test(au_mntflags(sb), XINO)) {
++ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
++ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
++ if (unlikely(err)) {
++ AuDebugOn(br->br_xino.xi_file);
++ goto out;
++ }
++ }
++
++ sysaufs_br_init(br);
++ mntget(add->path.mnt);
++
++ out:
++ return err;
++}
++
++static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
++ struct au_branch *br, aufs_bindex_t bend,
++ aufs_bindex_t amount)
++{
++ struct au_branch **brp;
++
++ AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++ brp = sbinfo->si_branch + bindex;
++ memmove(brp + 1, brp, sizeof(*brp) * amount);
++ *brp = br;
++ sbinfo->si_bend++;
++ if (unlikely(bend < 0))
++ sbinfo->si_bend = 0;
++}
++
++static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
++ aufs_bindex_t bend, aufs_bindex_t amount)
++{
++ struct au_hdentry *hdp;
++
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ hdp = dinfo->di_hdentry + bindex;
++ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
++ au_h_dentry_init(hdp);
++ dinfo->di_bend++;
++ if (unlikely(bend < 0))
++ dinfo->di_bstart = 0;
++}
++
++static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
++ aufs_bindex_t bend, aufs_bindex_t amount)
++{
++ struct au_hinode *hip;
++
++ AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++ hip = iinfo->ii_hinode + bindex;
++ memmove(hip + 1, hip, sizeof(*hip) * amount);
++ hip->hi_inode = NULL;
++ au_hin_init(hip, NULL);
++ iinfo->ii_bend++;
++ if (unlikely(bend < 0))
++ iinfo->ii_bstart = 0;
++}
++
++static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
++ struct au_branch *br, aufs_bindex_t bindex)
++{
++ struct dentry *root;
++ struct inode *root_inode;
++ aufs_bindex_t bend, amount;
++
++ root = sb->s_root;
++ root_inode = root->d_inode;
++ au_plink_maint_block(sb);
++ bend = au_sbend(sb);
++ amount = bend + 1 - bindex;
++ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
++ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
++ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
++ au_set_h_dptr(root, bindex, dget(h_dentry));
++ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
++ /*flags*/0);
++}
++
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++ int err;
++ aufs_bindex_t bend, add_bindex;
++ struct dentry *root, *h_dentry;
++ struct inode *root_inode;
++ struct au_branch *add_branch;
++
++ root = sb->s_root;
++ root_inode = root->d_inode;
++ IMustLock(root_inode);
++ err = test_add(sb, add, remount);
++ if (unlikely(err < 0))
++ goto out;
++ if (err) {
++ err = 0;
++ goto out; /* success */
++ }
++
++ bend = au_sbend(sb);
++ add_branch = au_br_alloc(sb, bend + 2, add->perm);
++ err = PTR_ERR(add_branch);
++ if (IS_ERR(add_branch))
++ goto out;
++
++ err = au_br_init(add_branch, sb, add);
++ if (unlikely(err)) {
++ au_br_do_free(add_branch);
++ goto out;
++ }
++
++ add_bindex = add->bindex;
++ h_dentry = add->path.dentry;
++ if (!remount)
++ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++ else {
++ sysaufs_brs_del(sb, add_bindex);
++ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++ sysaufs_brs_add(sb, add_bindex);
++ }
++
++ if (!add_bindex) {
++ au_cpup_attr_all(root_inode, /*force*/1);
++ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
++ } else
++ au_add_nlink(root_inode, h_dentry->d_inode);
++
++ /*
++ * this test/set prevents aufs from handling unnecesary inotify events
++ * of xino files, in a case of re-adding a writable branch which was
++ * once detached from aufs.
++ */
++ if (au_xino_brid(sb) < 0
++ && au_br_writable(add_branch->br_perm)
++ && !au_test_fs_bad_xino(h_dentry->d_sb)
++ && add_branch->br_xino.xi_file
++ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
++ au_xino_brid_set(sb, add_branch->br_id);
++
++ out:
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * delete a branch
++ */
++
++/* to show the line number, do not make it inlined function */
++#define AuVerbose(do_info, fmt, ...) do { \
++ if (do_info) \
++ AuInfo(fmt, ##__VA_ARGS__); \
++} while (0)
++
++/*
++ * test if the branch is deletable or not.
++ */
++static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
++ unsigned int sigen)
++{
++ int err, i, j, ndentry;
++ aufs_bindex_t bstart, bend;
++ unsigned char verbose;
++ struct au_dcsub_pages dpages;
++ struct au_dpage *dpage;
++ struct dentry *d;
++ struct inode *inode;
++
++ err = au_dpages_init(&dpages, GFP_NOFS);
++ if (unlikely(err))
++ goto out;
++ err = au_dcsub_pages(&dpages, root, NULL, NULL);
++ if (unlikely(err))
++ goto out_dpages;
++
++ verbose = !!au_opt_test(au_mntflags(root->d_sb), VERBOSE);
++ for (i = 0; !err && i < dpages.ndpage; i++) {
++ dpage = dpages.dpages + i;
++ ndentry = dpage->ndentry;
++ for (j = 0; !err && j < ndentry; j++) {
++ d = dpage->dentries[j];
++ AuDebugOn(!atomic_read(&d->d_count));
++ inode = d->d_inode;
++ if (au_digen(d) == sigen && au_iigen(inode) == sigen)
++ di_read_lock_child(d, AuLock_IR);
++ else {
++ di_write_lock_child(d);
++ err = au_reval_dpath(d, sigen);
++ if (!err)
++ di_downgrade_lock(d, AuLock_IR);
++ else {
++ di_write_unlock(d);
++ break;
++ }
++ }
++
++ bstart = au_dbstart(d);
++ bend = au_dbend(d);
++ if (bstart <= bindex
++ && bindex <= bend
++ && au_h_dptr(d, bindex)
++ && (!S_ISDIR(inode->i_mode) || bstart == bend)) {
++ err = -EBUSY;
++ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
++ }
++ di_read_unlock(d, AuLock_IR);
++ }
++ }
++
++ out_dpages:
++ au_dpages_free(&dpages);
++ out:
++ return err;
++}
++
++static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
++ unsigned int sigen)
++{
++ int err;
++ struct inode *i;
++ aufs_bindex_t bstart, bend;
++ unsigned char verbose;
++
++ err = 0;
++ verbose = !!au_opt_test(au_mntflags(sb), VERBOSE);
++ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
++ AuDebugOn(!atomic_read(&i->i_count));
++ if (!list_empty(&i->i_dentry))
++ continue;
++
++ if (au_iigen(i) == sigen)
++ ii_read_lock_child(i);
++ else {
++ ii_write_lock_child(i);
++ err = au_refresh_hinode_self(i, /*do_attr*/1);
++ if (!err)
++ ii_downgrade_lock(i);
++ else {
++ ii_write_unlock(i);
++ break;
++ }
++ }
++
++ bstart = au_ibstart(i);
++ bend = au_ibend(i);
++ if (bstart <= bindex
++ && bindex <= bend
++ && au_h_iptr(i, bindex)
++ && (!S_ISDIR(i->i_mode) || bstart == bend)) {
++ err = -EBUSY;
++ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
++ ii_read_unlock(i);
++ break;
++ }
++ ii_read_unlock(i);
++ }
++
++ return err;
++}
++
++static int test_children_busy(struct dentry *root, aufs_bindex_t bindex)
++{
++ int err;
++ unsigned int sigen;
++
++ sigen = au_sigen(root->d_sb);
++ DiMustNoWaiters(root);
++ IiMustNoWaiters(root->d_inode);
++ di_write_unlock(root);
++ err = test_dentry_busy(root, bindex, sigen);
++ if (!err)
++ err = test_inode_busy(root->d_sb, bindex, sigen);
++ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
++
++ return err;
++}
++
++static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
++ const aufs_bindex_t bindex,
++ const aufs_bindex_t bend)
++{
++ struct au_branch **brp, **p;
++
++ AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++ brp = sbinfo->si_branch + bindex;
++ if (bindex < bend)
++ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
++ sbinfo->si_branch[0 + bend] = NULL;
++ sbinfo->si_bend--;
++
++ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, GFP_NOFS);
++ if (p)
++ sbinfo->si_branch = p;
++}
++
++static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
++ const aufs_bindex_t bend)
++{
++ struct au_hdentry *hdp, *p;
++
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ hdp = dinfo->di_hdentry + bindex;
++ if (bindex < bend)
++ memmove(hdp, hdp + 1, sizeof(*hdp) * (bend - bindex));
++ dinfo->di_hdentry[0 + bend].hd_dentry = NULL;
++ dinfo->di_bend--;
++
++ p = krealloc(dinfo->di_hdentry, sizeof(*p) * bend, GFP_NOFS);
++ if (p)
++ dinfo->di_hdentry = p;
++}
++
++static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
++ const aufs_bindex_t bend)
++{
++ struct au_hinode *hip, *p;
++
++ AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++ hip = iinfo->ii_hinode + bindex;
++ if (bindex < bend)
++ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
++ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
++ au_hin_init(iinfo->ii_hinode + bend, NULL);
++ iinfo->ii_bend--;
++
++ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, GFP_NOFS);
++ if (p)
++ iinfo->ii_hinode = p;
++}
++
++static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
++ struct au_branch *br)
++{
++ aufs_bindex_t bend;
++ struct au_sbinfo *sbinfo;
++ struct dentry *root;
++ struct inode *inode;
++
++ SiMustWriteLock(sb);
++
++ root = sb->s_root;
++ inode = root->d_inode;
++ au_plink_maint_block(sb);
++ sbinfo = au_sbi(sb);
++ bend = sbinfo->si_bend;
++
++ dput(au_h_dptr(root, bindex));
++ au_hiput(au_hi(inode, bindex));
++ au_br_do_free(br);
++
++ au_br_do_del_brp(sbinfo, bindex, bend);
++ au_br_do_del_hdp(au_di(root), bindex, bend);
++ au_br_do_del_hip(au_ii(inode), bindex, bend);
++}
++
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
++{
++ int err, rerr, i;
++ unsigned int mnt_flags;
++ aufs_bindex_t bindex, bend, br_id;
++ unsigned char do_wh, verbose;
++ struct au_branch *br;
++ struct au_wbr *wbr;
++
++ err = 0;
++ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
++ if (bindex < 0) {
++ if (remount)
++ goto out; /* success */
++ err = -ENOENT;
++ AuErr("%s no such branch\n", del->pathname);
++ goto out;
++ }
++ AuDbg("bindex b%d\n", bindex);
++
++ err = -EBUSY;
++ mnt_flags = au_mntflags(sb);
++ verbose = !!au_opt_test(mnt_flags, VERBOSE);
++ bend = au_sbend(sb);
++ if (unlikely(!bend)) {
++ AuVerbose(verbose, "no more branches left\n");
++ goto out;
++ }
++ br = au_sbr(sb, bindex);
++ i = atomic_read(&br->br_count);
++ if (unlikely(i)) {
++ AuVerbose(verbose, "%d file(s) opened\n", i);
++ goto out;
++ }
++
++ wbr = br->br_wbr;
++ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
++ if (do_wh) {
++ /* instead of WbrWhMustWriteLock(wbr) */
++ SiMustWriteLock(sb);
++ for (i = 0; i < AuBrWh_Last; i++) {
++ dput(wbr->wbr_wh[i]);
++ wbr->wbr_wh[i] = NULL;
++ }
++ }
++
++ err = test_children_busy(sb->s_root, bindex);
++ if (unlikely(err)) {
++ if (do_wh)
++ goto out_wh;
++ goto out;
++ }
++
++ err = 0;
++ br_id = br->br_id;
++ if (!remount)
++ au_br_do_del(sb, bindex, br);
++ else {
++ sysaufs_brs_del(sb, bindex);
++ au_br_do_del(sb, bindex, br);
++ sysaufs_brs_add(sb, bindex);
++ }
++
++ if (!bindex) {
++ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
++ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
++ } else
++ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
++ if (au_opt_test(mnt_flags, PLINK))
++ au_plink_half_refresh(sb, br_id);
++
++ if (au_xino_brid(sb) == br->br_id)
++ au_xino_brid_set(sb, -1);
++ goto out; /* success */
++
++ out_wh:
++ /* revert */
++ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
++ if (rerr)
++ AuWarn("failed re-creating base whiteout, %s. (%d)\n",
++ del->pathname, rerr);
++ out:
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * change a branch permission
++ */
++
++static int do_need_sigen_inc(int a, int b)
++{
++ return au_br_whable(a) && !au_br_whable(b);
++}
++
++static int need_sigen_inc(int old, int new)
++{
++ return do_need_sigen_inc(old, new)
++ || do_need_sigen_inc(new, old);
++}
++
++static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
++{
++ int err;
++ unsigned long n, ul, bytes, files;
++ aufs_bindex_t bstart;
++ struct file *file, *hf, **a;
++ const int step_bytes = 1024, /* memory allocation unit */
++ step_files = step_bytes / sizeof(*a);
++
++ err = -ENOMEM;
++ n = 0;
++ bytes = step_bytes;
++ files = step_files;
++ a = kmalloc(bytes, GFP_NOFS);
++ if (unlikely(!a))
++ goto out;
++
++ /* no need file_list_lock() since sbinfo is locked? defered? */
++ list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
++ if (special_file(file->f_dentry->d_inode->i_mode))
++ continue;
++
++ AuDbg("%.*s\n", AuDLNPair(file->f_dentry));
++ fi_read_lock(file);
++ if (unlikely(au_test_mmapped(file))) {
++ err = -EBUSY;
++ FiMustNoWaiters(file);
++ fi_read_unlock(file);
++ goto out_free;
++ }
++
++ bstart = au_fbstart(file);
++ if (!S_ISREG(file->f_dentry->d_inode->i_mode)
++ || !(file->f_mode & FMODE_WRITE)
++ || bstart != bindex) {
++ FiMustNoWaiters(file);
++ fi_read_unlock(file);
++ continue;
++ }
++
++ hf = au_h_fptr(file, bstart);
++ FiMustNoWaiters(file);
++ fi_read_unlock(file);
++
++ if (n < files)
++ a[n++] = hf;
++ else {
++ void *p;
++
++ err = -ENOMEM;
++ bytes += step_bytes;
++ files += step_files;
++ p = krealloc(a, bytes, GFP_NOFS);
++ if (p) {
++ a = p;
++ a[n++] = hf;
++ } else
++ goto out_free;
++ }
++ }
++
++ err = 0;
++ for (ul = 0; ul < n; ul++) {
++ /* todo: already flushed? */
++ /* cf. fs/super.c:mark_files_ro() */
++ hf = a[ul];
++ hf->f_mode &= ~FMODE_WRITE;
++ if (!file_check_writeable(hf)) {
++ file_release_write(hf);
++ mnt_drop_write(hf->f_vfsmnt);
++ }
++ }
++
++ out_free:
++ kfree(a);
++ out:
++ return err;
++}
++
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++ int *do_update)
++{
++ int err, rerr;
++ aufs_bindex_t bindex;
++ struct path path;
++ struct dentry *root;
++ struct au_branch *br;
++
++ root = sb->s_root;
++ au_plink_maint_block(sb);
++ bindex = au_find_dbindex(root, mod->h_root);
++ if (bindex < 0) {
++ if (remount)
++ return 0; /* success */
++ err = -ENOENT;
++ AuErr("%s no such branch\n", mod->path);
++ goto out;
++ }
++ AuDbg("bindex b%d\n", bindex);
++
++ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
++ if (unlikely(err))
++ goto out;
++
++ br = au_sbr(sb, bindex);
++ if (br->br_perm == mod->perm)
++ return 0; /* success */
++
++ if (au_br_writable(br->br_perm)) {
++ /* remove whiteout base */
++ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
++ if (unlikely(err))
++ goto out;
++
++ if (!au_br_writable(mod->perm)) {
++ /* rw --> ro, file might be mmapped */
++ DiMustNoWaiters(root);
++ IiMustNoWaiters(root->d_inode);
++ di_write_unlock(root);
++ err = au_br_mod_files_ro(sb, bindex);
++ /* aufs_write_lock() calls ..._child() */
++ di_write_lock_child(root);
++
++ if (unlikely(err)) {
++ rerr = -ENOMEM;
++ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
++ GFP_NOFS);
++ if (br->br_wbr) {
++ path.mnt = br->br_mnt;
++ path.dentry = mod->h_root;
++ rerr = au_wbr_init(br, sb, br->br_perm,
++ &path);
++ }
++ if (unlikely(rerr)) {
++ AuIOErr("nested error %d (%d)\n",
++ rerr, err);
++ br->br_perm = mod->perm;
++ }
++ }
++ }
++ } else if (au_br_writable(mod->perm)) {
++ /* ro --> rw */
++ err = -ENOMEM;
++ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
++ if (br->br_wbr) {
++ path.mnt = br->br_mnt;
++ path.dentry = mod->h_root;
++ err = au_wbr_init(br, sb, mod->perm, &path);
++ if (unlikely(err)) {
++ kfree(br->br_wbr);
++ br->br_wbr = NULL;
++ }
++ }
++ }
++
++ if (!err) {
++ *do_update |= need_sigen_inc(br->br_perm, mod->perm);
++ br->br_perm = mod->perm;
++ }
++
++ out:
++ return err;
++}
+diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h
+new file mode 100644
+index 0000000..1a7219c
+--- /dev/null
++++ b/fs/aufs/branch.h
+@@ -0,0 +1,219 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * branch filesystems and xino for them
++ */
++
++#ifndef __AUFS_BRANCH_H__
++#define __AUFS_BRANCH_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/mount.h>
++#include <linux/aufs_type.h>
++#include "rwsem.h"
++#include "super.h"
++
++/* ---------------------------------------------------------------------- */
++
++/* a xino file */
++struct au_xino_file {
++ struct file *xi_file;
++ struct mutex xi_nondir_mtx;
++
++ /* todo: make xino files an array to support huge inode number */
++
++#ifdef CONFIG_DEBUG_FS
++ struct dentry *xi_dbgaufs;
++#endif
++};
++
++/* members for writable branch only */
++enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
++struct au_wbr {
++ struct au_rwsem wbr_wh_rwsem;
++ struct dentry *wbr_wh[AuBrWh_Last];
++ atomic_t wbr_wh_running;
++#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
++#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
++#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
++
++ /* mfs mode */
++ unsigned long long wbr_bytes;
++};
++
++/* protected by superblock rwsem */
++struct au_branch {
++ struct au_xino_file br_xino;
++
++ aufs_bindex_t br_id;
++
++ int br_perm;
++ struct vfsmount *br_mnt;
++ atomic_t br_count;
++
++ struct au_wbr *br_wbr;
++
++ /* xino truncation */
++ blkcnt_t br_xino_upper; /* watermark in blocks */
++ atomic_t br_xino_running;
++
++#ifdef CONFIG_SYSFS
++ /* an entry under sysfs per mount-point */
++ char br_name[8];
++ struct attribute br_attr;
++#endif
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* branch permission and attribute */
++enum {
++ AuBrPerm_RW, /* writable, linkable wh */
++ AuBrPerm_RO, /* readonly, no wh */
++ AuBrPerm_RR, /* natively readonly, no wh */
++
++ AuBrPerm_RWNoLinkWH, /* un-linkable whiteouts */
++
++ AuBrPerm_ROWH, /* whiteout-able */
++ AuBrPerm_RRWH, /* whiteout-able */
++
++ AuBrPerm_Last
++};
++
++static inline int au_br_writable(int brperm)
++{
++ return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
++}
++
++static inline int au_br_whable(int brperm)
++{
++ return brperm == AuBrPerm_RW
++ || brperm == AuBrPerm_ROWH
++ || brperm == AuBrPerm_RRWH;
++}
++
++static inline int au_br_rdonly(struct au_branch *br)
++{
++ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
++ || !au_br_writable(br->br_perm))
++ ? -EROFS : 0;
++}
++
++static inline int au_br_hinotifyable(int brperm __maybe_unused)
++{
++#ifdef CONFIG_AUFS_HINOTIFY
++ return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
++#else
++ return 0;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* branch.c */
++struct au_sbinfo;
++void au_br_free(struct au_sbinfo *sinfo);
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
++struct au_opt_add;
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
++struct au_opt_del;
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
++struct au_opt_mod;
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++ int *do_update);
++
++/* xino.c */
++static const loff_t au_loff_max = LLONG_MAX;
++
++int au_xib_trunc(struct super_block *sb);
++ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
++ loff_t *pos);
++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
++ loff_t *pos);
++struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
++struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
++ino_t au_xino_new_ino(struct super_block *sb);
++int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++ ino_t ino);
++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++ ino_t ino);
++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++ ino_t *ino);
++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
++ struct file *base_file, int do_test);
++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
++
++struct au_opt_xino;
++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
++void au_xino_clr(struct super_block *sb);
++struct file *au_xino_def(struct super_block *sb);
++int au_xino_path(struct seq_file *seq, struct file *file);
++
++/* ---------------------------------------------------------------------- */
++
++/* Superblock to branch */
++static inline
++aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
++{
++ return au_sbr(sb, bindex)->br_id;
++}
++
++static inline
++struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
++{
++ return au_sbr(sb, bindex)->br_mnt;
++}
++
++static inline
++struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
++{
++ return au_sbr_mnt(sb, bindex)->mnt_sb;
++}
++
++static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
++{
++ atomic_dec_return(&au_sbr(sb, bindex)->br_count);
++}
++
++static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
++{
++ return au_sbr(sb, bindex)->br_perm;
++}
++
++static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
++{
++ return au_br_whable(au_sbr_perm(sb, bindex));
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * wbr_wh_read_lock, wbr_wh_write_lock
++ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
++ */
++AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
++
++#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
++#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
++#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_BRANCH_H__ */
+diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c
+new file mode 100644
+index 0000000..7f70e2f
+--- /dev/null
++++ b/fs/aufs/cpup.c
+@@ -0,0 +1,1048 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * copy-up functions, see wbr_policy.c for copy-down
++ */
++
++#include <linux/file.h>
++#include <linux/fs_stack.h>
++#include <linux/mm.h>
++#include <linux/uaccess.h>
++#include "aufs.h"
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src)
++{
++ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
++ | S_NOATIME | S_NOCMTIME;
++
++ dst->i_flags |= src->i_flags & ~mask;
++ if (au_test_fs_notime(dst->i_sb))
++ dst->i_flags |= S_NOATIME | S_NOCMTIME;
++}
++
++void au_cpup_attr_timesizes(struct inode *inode)
++{
++ struct inode *h_inode;
++
++ h_inode = au_h_iptr(inode, au_ibstart(inode));
++ fsstack_copy_attr_times(inode, h_inode);
++ vfsub_copy_inode_size(inode, h_inode);
++}
++
++void au_cpup_attr_nlink(struct inode *inode, int force)
++{
++ struct inode *h_inode;
++ struct super_block *sb;
++ aufs_bindex_t bindex, bend;
++
++ sb = inode->i_sb;
++ bindex = au_ibstart(inode);
++ h_inode = au_h_iptr(inode, bindex);
++ if (!force
++ && !S_ISDIR(h_inode->i_mode)
++ && au_opt_test(au_mntflags(sb), PLINK)
++ && au_plink_test(inode))
++ return;
++
++ inode->i_nlink = h_inode->i_nlink;
++
++ /*
++ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
++ * it may includes whplink directory.
++ */
++ if (S_ISDIR(h_inode->i_mode)) {
++ bend = au_ibend(inode);
++ for (bindex++; bindex <= bend; bindex++) {
++ h_inode = au_h_iptr(inode, bindex);
++ if (h_inode)
++ au_add_nlink(inode, h_inode);
++ }
++ }
++}
++
++void au_cpup_attr_changeable(struct inode *inode)
++{
++ struct inode *h_inode;
++
++ h_inode = au_h_iptr(inode, au_ibstart(inode));
++ inode->i_mode = h_inode->i_mode;
++ inode->i_uid = h_inode->i_uid;
++ inode->i_gid = h_inode->i_gid;
++ au_cpup_attr_timesizes(inode);
++ au_cpup_attr_flags(inode, h_inode);
++}
++
++void au_cpup_igen(struct inode *inode, struct inode *h_inode)
++{
++ struct au_iinfo *iinfo = au_ii(inode);
++
++ IiMustWriteLock(inode);
++
++ iinfo->ii_higen = h_inode->i_generation;
++ iinfo->ii_hsb1 = h_inode->i_sb;
++}
++
++void au_cpup_attr_all(struct inode *inode, int force)
++{
++ struct inode *h_inode;
++
++ h_inode = au_h_iptr(inode, au_ibstart(inode));
++ au_cpup_attr_changeable(inode);
++ if (inode->i_nlink > 0)
++ au_cpup_attr_nlink(inode, force);
++ inode->i_rdev = h_inode->i_rdev;
++ inode->i_blkbits = h_inode->i_blkbits;
++ au_cpup_igen(inode, h_inode);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
++
++/* keep the timestamps of the parent dir when cpup */
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++ struct path *h_path)
++{
++ struct inode *h_inode;
++
++ dt->dt_dentry = dentry;
++ dt->dt_h_path = *h_path;
++ h_inode = h_path->dentry->d_inode;
++ dt->dt_atime = h_inode->i_atime;
++ dt->dt_mtime = h_inode->i_mtime;
++ /* smp_mb(); */
++}
++
++void au_dtime_revert(struct au_dtime *dt)
++{
++ struct iattr attr;
++ int err;
++
++ attr.ia_atime = dt->dt_atime;
++ attr.ia_mtime = dt->dt_mtime;
++ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
++ | ATTR_ATIME | ATTR_ATIME_SET;
++
++ err = vfsub_notify_change(&dt->dt_h_path, &attr);
++ if (unlikely(err))
++ AuWarn("restoring timestamps failed(%d). ignored\n", err);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static noinline_for_stack
++int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
++{
++ int err, sbits;
++ struct iattr ia;
++ struct path h_path;
++ struct inode *h_isrc, *h_idst;
++
++ h_path.dentry = au_h_dptr(dst, bindex);
++ h_idst = h_path.dentry->d_inode;
++ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
++ h_isrc = h_src->d_inode;
++ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
++ | ATTR_ATIME | ATTR_MTIME
++ | ATTR_ATIME_SET | ATTR_MTIME_SET;
++ ia.ia_uid = h_isrc->i_uid;
++ ia.ia_gid = h_isrc->i_gid;
++ ia.ia_atime = h_isrc->i_atime;
++ ia.ia_mtime = h_isrc->i_mtime;
++ if (h_idst->i_mode != h_isrc->i_mode
++ && !S_ISLNK(h_idst->i_mode)) {
++ ia.ia_valid |= ATTR_MODE;
++ ia.ia_mode = h_isrc->i_mode;
++ }
++ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
++ au_cpup_attr_flags(h_idst, h_isrc);
++ err = vfsub_notify_change(&h_path, &ia);
++
++ /* is this nfs only? */
++ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
++ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
++ ia.ia_mode = h_isrc->i_mode;
++ err = vfsub_notify_change(&h_path, &ia);
++ }
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
++ char *buf, unsigned long blksize)
++{
++ int err;
++ size_t sz, rbytes, wbytes;
++ unsigned char all_zero;
++ char *p, *zp;
++ struct mutex *h_mtx;
++ /* reduce stack usage */
++ struct iattr *ia;
++
++ zp = page_address(ZERO_PAGE(0));
++ if (unlikely(!zp))
++ return -ENOMEM; /* possible? */
++
++ err = 0;
++ all_zero = 0;
++ while (len) {
++ AuDbg("len %lld\n", len);
++ sz = blksize;
++ if (len < blksize)
++ sz = len;
++
++ rbytes = 0;
++ /* todo: signal_pending? */
++ while (!rbytes || err == -EAGAIN || err == -EINTR) {
++ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
++ err = rbytes;
++ }
++ if (unlikely(err < 0))
++ break;
++
++ all_zero = 0;
++ if (len >= rbytes && rbytes == blksize)
++ all_zero = !memcmp(buf, zp, rbytes);
++ if (!all_zero) {
++ wbytes = rbytes;
++ p = buf;
++ while (wbytes) {
++ size_t b;
++
++ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
++ err = b;
++ /* todo: signal_pending? */
++ if (unlikely(err == -EAGAIN || err == -EINTR))
++ continue;
++ if (unlikely(err < 0))
++ break;
++ wbytes -= b;
++ p += b;
++ }
++ } else {
++ loff_t res;
++
++ AuLabel(hole);
++ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
++ err = res;
++ if (unlikely(res < 0))
++ break;
++ }
++ len -= rbytes;
++ err = 0;
++ }
++
++ /* the last block may be a hole */
++ if (!err && all_zero) {
++ AuLabel(last hole);
++
++ err = 1;
++ if (au_test_nfs(dst->f_dentry->d_sb)) {
++ /* nfs requires this step to make last hole */
++ /* is this only nfs? */
++ do {
++ /* todo: signal_pending? */
++ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
++ } while (err == -EAGAIN || err == -EINTR);
++ if (err == 1)
++ dst->f_pos--;
++ }
++
++ if (err == 1) {
++ ia = (void *)buf;
++ ia->ia_size = dst->f_pos;
++ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
++ ia->ia_file = dst;
++ h_mtx = &dst->f_dentry->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
++ err = vfsub_notify_change(&dst->f_path, ia);
++ mutex_unlock(h_mtx);
++ }
++ }
++
++ return err;
++}
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len)
++{
++ int err;
++ unsigned long blksize;
++ unsigned char do_kfree;
++ char *buf;
++
++ err = -ENOMEM;
++ blksize = dst->f_dentry->d_sb->s_blocksize;
++ if (!blksize || PAGE_SIZE < blksize)
++ blksize = PAGE_SIZE;
++ AuDbg("blksize %lu\n", blksize);
++ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
++ if (do_kfree)
++ buf = kmalloc(blksize, GFP_NOFS);
++ else
++ buf = (void *)__get_free_page(GFP_NOFS);
++ if (unlikely(!buf))
++ goto out;
++
++ if (len > (1 << 22))
++ AuDbg("copying a large file %lld\n", (long long)len);
++
++ src->f_pos = 0;
++ dst->f_pos = 0;
++ err = au_do_copy_file(dst, src, len, buf, blksize);
++ if (do_kfree)
++ kfree(buf);
++ else
++ free_page((unsigned long)buf);
++
++ out:
++ return err;
++}
++
++/*
++ * to support a sparse file which is opened with O_APPEND,
++ * we need to close the file.
++ */
++static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len)
++{
++ int err, i;
++ enum { SRC, DST };
++ struct {
++ aufs_bindex_t bindex;
++ unsigned int flags;
++ struct dentry *dentry;
++ struct file *file;
++ void *label, *label_file;
++ } *f, file[] = {
++ {
++ .bindex = bsrc,
++ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
++ .file = NULL,
++ .label = &&out,
++ .label_file = &&out_src
++ },
++ {
++ .bindex = bdst,
++ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
++ .file = NULL,
++ .label = &&out_src,
++ .label_file = &&out_dst
++ }
++ };
++ struct super_block *sb;
++
++ /* bsrc branch can be ro/rw. */
++ sb = dentry->d_sb;
++ f = file;
++ for (i = 0; i < 2; i++, f++) {
++ f->dentry = au_h_dptr(dentry, f->bindex);
++ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
++ err = PTR_ERR(f->file);
++ if (IS_ERR(f->file))
++ goto *f->label;
++ err = -EINVAL;
++ if (unlikely(!f->file->f_op))
++ goto *f->label_file;
++ }
++
++ /* try stopping to update while we copyup */
++ IMustLock(file[SRC].dentry->d_inode);
++ err = au_copy_file(file[DST].file, file[SRC].file, len);
++
++ out_dst:
++ fput(file[DST].file);
++ au_sbr_put(sb, file[DST].bindex);
++ out_src:
++ fput(file[SRC].file);
++ au_sbr_put(sb, file[SRC].bindex);
++ out:
++ return err;
++}
++
++static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len,
++ struct inode *h_dir, struct path *h_path)
++{
++ int err, rerr;
++ loff_t l;
++
++ err = 0;
++ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
++ if (len == -1 || l < len)
++ len = l;
++ if (len)
++ err = au_cp_regular(dentry, bdst, bsrc, len);
++ if (!err)
++ goto out; /* success */
++
++ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
++ if (rerr) {
++ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
++ AuDLNPair(h_path->dentry), err, rerr);
++ err = -EIO;
++ }
++
++ out:
++ return err;
++}
++
++static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
++ struct inode *h_dir)
++{
++ int err, symlen;
++ mm_segment_t old_fs;
++ char *sym;
++
++ err = -ENOSYS;
++ if (unlikely(!h_src->d_inode->i_op->readlink))
++ goto out;
++
++ err = -ENOMEM;
++ sym = __getname();
++ if (unlikely(!sym))
++ goto out;
++
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ symlen = h_src->d_inode->i_op->readlink(h_src, (char __user *)sym,
++ PATH_MAX);
++ err = symlen;
++ set_fs(old_fs);
++
++ if (symlen > 0) {
++ sym[symlen] = 0;
++ err = vfsub_symlink(h_dir, h_path, sym);
++ }
++ __putname(sym);
++
++ out:
++ return err;
++}
++
++/* return with the lower dst inode is locked */
++static noinline_for_stack
++int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++ struct dentry *dst_parent)
++{
++ int err;
++ umode_t mode;
++ unsigned int mnt_flags;
++ unsigned char isdir;
++ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
++ struct au_dtime dt;
++ struct path h_path;
++ struct dentry *h_src, *h_dst, *h_parent;
++ struct inode *h_inode, *h_dir;
++ struct super_block *sb;
++
++ /* bsrc branch can be ro/rw. */
++ h_src = au_h_dptr(dentry, bsrc);
++ h_inode = h_src->d_inode;
++ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
++
++ /* try stopping to be referenced while we are creating */
++ h_dst = au_h_dptr(dentry, bdst);
++ h_parent = h_dst->d_parent; /* dir inode is locked */
++ h_dir = h_parent->d_inode;
++ IMustLock(h_dir);
++ AuDebugOn(h_parent != h_dst->d_parent);
++
++ sb = dentry->d_sb;
++ h_path.mnt = au_sbr_mnt(sb, bdst);
++ if (do_dt) {
++ h_path.dentry = h_parent;
++ au_dtime_store(&dt, dst_parent, &h_path);
++ }
++ h_path.dentry = h_dst;
++
++ isdir = 0;
++ mode = h_inode->i_mode;
++ switch (mode & S_IFMT) {
++ case S_IFREG:
++ /* try stopping to update while we are referencing */
++ IMustLock(h_inode);
++ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
++ if (!err)
++ err = au_do_cpup_regular
++ (dentry, bdst, bsrc, len,
++ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
++ break;
++ case S_IFDIR:
++ isdir = 1;
++ err = vfsub_mkdir(h_dir, &h_path, mode);
++ if (!err) {
++ /*
++ * strange behaviour from the users view,
++ * particularry setattr case
++ */
++ if (au_ibstart(dst_parent->d_inode) == bdst)
++ au_cpup_attr_nlink(dst_parent->d_inode,
++ /*force*/1);
++ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
++ }
++ break;
++ case S_IFLNK:
++ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
++ break;
++ case S_IFCHR:
++ case S_IFBLK:
++ AuDebugOn(!capable(CAP_MKNOD));
++ /*FALLTHROUGH*/
++ case S_IFIFO:
++ case S_IFSOCK:
++ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
++ break;
++ default:
++ AuIOErr("Unknown inode type 0%o\n", mode);
++ err = -EIO;
++ }
++
++ mnt_flags = au_mntflags(sb);
++ if (!au_opt_test(mnt_flags, UDBA_NONE)
++ && !isdir
++ && au_opt_test(mnt_flags, XINO)
++ && h_inode->i_nlink == 1
++ /* todo: unnecessary? */
++ /* && dentry->d_inode->i_nlink == 1 */
++ && bdst < bsrc
++ && !au_ftest_cpup(flags, KEEPLINO))
++ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
++ /* ignore this error */
++
++ if (do_dt)
++ au_dtime_revert(&dt);
++ return err;
++}
++
++/*
++ * copyup the @dentry from @bsrc to @bdst.
++ * the caller must set the both of lower dentries.
++ * @len is for truncating when it is -1 copyup the entire file.
++ * in link/rename cases, @dst_parent may be different from the real one.
++ */
++static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++ struct dentry *dst_parent)
++{
++ int err, rerr;
++ aufs_bindex_t old_ibstart;
++ unsigned char isdir, plink;
++ struct au_dtime dt;
++ struct path h_path;
++ struct dentry *h_src, *h_dst, *h_parent;
++ struct inode *dst_inode, *h_dir, *inode;
++ struct super_block *sb;
++
++ AuDebugOn(bsrc <= bdst);
++
++ sb = dentry->d_sb;
++ h_path.mnt = au_sbr_mnt(sb, bdst);
++ h_dst = au_h_dptr(dentry, bdst);
++ h_parent = h_dst->d_parent; /* dir inode is locked */
++ h_dir = h_parent->d_inode;
++ IMustLock(h_dir);
++
++ h_src = au_h_dptr(dentry, bsrc);
++ inode = dentry->d_inode;
++
++ if (!dst_parent)
++ dst_parent = dget_parent(dentry);
++ else
++ dget(dst_parent);
++
++ plink = !!au_opt_test(au_mntflags(sb), PLINK);
++ dst_inode = au_h_iptr(inode, bdst);
++ if (dst_inode) {
++ if (unlikely(!plink)) {
++ err = -EIO;
++ AuIOErr("i%lu exists on a upper branch "
++ "but plink is disabled\n", inode->i_ino);
++ goto out;
++ }
++
++ if (dst_inode->i_nlink) {
++ const int do_dt = au_ftest_cpup(flags, DTIME);
++
++ h_src = au_plink_lkup(inode, bdst);
++ err = PTR_ERR(h_src);
++ if (IS_ERR(h_src))
++ goto out;
++ if (unlikely(!h_src->d_inode)) {
++ err = -EIO;
++ AuIOErr("i%lu exists on a upper branch "
++ "but plink is broken\n", inode->i_ino);
++ dput(h_src);
++ goto out;
++ }
++
++ if (do_dt) {
++ h_path.dentry = h_parent;
++ au_dtime_store(&dt, dst_parent, &h_path);
++ }
++ h_path.dentry = h_dst;
++ err = vfsub_link(h_src, h_dir, &h_path);
++ if (do_dt)
++ au_dtime_revert(&dt);
++ dput(h_src);
++ goto out;
++ } else
++ /* todo: cpup_wh_file? */
++ /* udba work */
++ au_update_brange(inode, 1);
++ }
++
++ old_ibstart = au_ibstart(inode);
++ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
++ if (unlikely(err))
++ goto out;
++ dst_inode = h_dst->d_inode;
++ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
++
++ err = cpup_iattr(dentry, bdst, h_src);
++ isdir = S_ISDIR(dst_inode->i_mode);
++ if (!err) {
++ if (bdst < old_ibstart)
++ au_set_ibstart(inode, bdst);
++ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
++ au_hi_flags(inode, isdir));
++ mutex_unlock(&dst_inode->i_mutex);
++ if (!isdir
++ && h_src->d_inode->i_nlink > 1
++ && plink)
++ au_plink_append(inode, bdst, h_dst);
++ goto out; /* success */
++ }
++
++ /* revert */
++ h_path.dentry = h_parent;
++ mutex_unlock(&dst_inode->i_mutex);
++ au_dtime_store(&dt, dst_parent, &h_path);
++ h_path.dentry = h_dst;
++ if (!isdir)
++ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++ else
++ rerr = vfsub_rmdir(h_dir, &h_path);
++ au_dtime_revert(&dt);
++ if (rerr) {
++ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
++ err = -EIO;
++ }
++
++ out:
++ dput(dst_parent);
++ return err;
++}
++
++struct au_cpup_single_args {
++ int *errp;
++ struct dentry *dentry;
++ aufs_bindex_t bdst, bsrc;
++ loff_t len;
++ unsigned int flags;
++ struct dentry *dst_parent;
++};
++
++static void au_call_cpup_single(void *args)
++{
++ struct au_cpup_single_args *a = args;
++ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
++ a->flags, a->dst_parent);
++}
++
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++ struct dentry *dst_parent)
++{
++ int err, wkq_err;
++ umode_t mode;
++ struct dentry *h_dentry;
++
++ h_dentry = au_h_dptr(dentry, bsrc);
++ mode = h_dentry->d_inode->i_mode & S_IFMT;
++ if ((mode != S_IFCHR && mode != S_IFBLK)
++ || capable(CAP_MKNOD))
++ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
++ dst_parent);
++ else {
++ struct au_cpup_single_args args = {
++ .errp = &err,
++ .dentry = dentry,
++ .bdst = bdst,
++ .bsrc = bsrc,
++ .len = len,
++ .flags = flags,
++ .dst_parent = dst_parent
++ };
++ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
++ if (unlikely(wkq_err))
++ err = wkq_err;
++ }
++
++ return err;
++}
++
++/*
++ * copyup the @dentry from the first active lower branch to @bdst,
++ * using au_cpup_single().
++ */
++static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ unsigned int flags)
++{
++ int err;
++ aufs_bindex_t bsrc, bend;
++
++ bend = au_dbend(dentry);
++ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
++ if (au_h_dptr(dentry, bsrc))
++ break;
++
++ err = au_lkup_neg(dentry, bdst);
++ if (!err) {
++ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
++ if (!err)
++ return 0; /* success */
++
++ /* revert */
++ au_set_h_dptr(dentry, bdst, NULL);
++ au_set_dbstart(dentry, bsrc);
++ }
++
++ return err;
++}
++
++struct au_cpup_simple_args {
++ int *errp;
++ struct dentry *dentry;
++ aufs_bindex_t bdst;
++ loff_t len;
++ unsigned int flags;
++};
++
++static void au_call_cpup_simple(void *args)
++{
++ struct au_cpup_simple_args *a = args;
++ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
++}
++
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ unsigned int flags)
++{
++ int err, wkq_err;
++ unsigned char do_sio;
++ struct dentry *parent;
++ struct inode *h_dir;
++
++ parent = dget_parent(dentry);
++ h_dir = au_h_iptr(parent->d_inode, bdst);
++ do_sio = !!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE);
++ if (!do_sio) {
++ /*
++ * testing CAP_MKNOD is for generic fs,
++ * but CAP_FSETID is for xfs only, currently.
++ */
++ umode_t mode = dentry->d_inode->i_mode;
++ do_sio = (((mode & (S_IFCHR | S_IFBLK))
++ && !capable(CAP_MKNOD))
++ || ((mode & (S_ISUID | S_ISGID))
++ && !capable(CAP_FSETID)));
++ }
++ if (!do_sio)
++ err = au_cpup_simple(dentry, bdst, len, flags);
++ else {
++ struct au_cpup_simple_args args = {
++ .errp = &err,
++ .dentry = dentry,
++ .bdst = bdst,
++ .len = len,
++ .flags = flags
++ };
++ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
++ if (unlikely(wkq_err))
++ err = wkq_err;
++ }
++
++ dput(parent);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * copyup the deleted file for writing.
++ */
++static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
++ struct dentry *wh_dentry, struct file *file,
++ loff_t len)
++{
++ int err;
++ aufs_bindex_t bstart;
++ struct au_dinfo *dinfo;
++ struct dentry *h_d_dst, *h_d_start;
++
++ dinfo = au_di(dentry);
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ bstart = dinfo->di_bstart;
++ h_d_dst = dinfo->di_hdentry[0 + bdst].hd_dentry;
++ dinfo->di_bstart = bdst;
++ dinfo->di_hdentry[0 + bdst].hd_dentry = wh_dentry;
++ h_d_start = dinfo->di_hdentry[0 + bstart].hd_dentry;
++ if (file)
++ dinfo->di_hdentry[0 + bstart].hd_dentry
++ = au_h_fptr(file, au_fbstart(file))->f_dentry;
++ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
++ /*h_parent*/NULL);
++ if (!err && file) {
++ err = au_reopen_nondir(file);
++ dinfo->di_hdentry[0 + bstart].hd_dentry = h_d_start;
++ }
++ dinfo->di_hdentry[0 + bdst].hd_dentry = h_d_dst;
++ dinfo->di_bstart = bstart;
++
++ return err;
++}
++
++static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ struct file *file)
++{
++ int err;
++ struct au_dtime dt;
++ struct dentry *parent, *h_parent, *wh_dentry;
++ struct au_branch *br;
++ struct path h_path;
++
++ br = au_sbr(dentry->d_sb, bdst);
++ parent = dget_parent(dentry);
++ h_parent = au_h_dptr(parent, bdst);
++ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
++ err = PTR_ERR(wh_dentry);
++ if (IS_ERR(wh_dentry))
++ goto out;
++
++ h_path.dentry = h_parent;
++ h_path.mnt = br->br_mnt;
++ au_dtime_store(&dt, parent, &h_path);
++ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
++ if (unlikely(err))
++ goto out_wh;
++
++ dget(wh_dentry);
++ h_path.dentry = wh_dentry;
++ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
++ if (unlikely(err)) {
++ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
++ AuDLNPair(wh_dentry), err);
++ err = -EIO;
++ }
++ au_dtime_revert(&dt);
++ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
++
++ out_wh:
++ dput(wh_dentry);
++ out:
++ dput(parent);
++ return err;
++}
++
++struct au_cpup_wh_args {
++ int *errp;
++ struct dentry *dentry;
++ aufs_bindex_t bdst;
++ loff_t len;
++ struct file *file;
++};
++
++static void au_call_cpup_wh(void *args)
++{
++ struct au_cpup_wh_args *a = args;
++ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
++}
++
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ struct file *file)
++{
++ int err, wkq_err;
++ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
++ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
++ struct au_wbr *wbr;
++
++ parent = dget_parent(dentry);
++ dir = parent->d_inode;
++ h_orph = NULL;
++ h_parent = NULL;
++ h_dir = au_igrab(au_h_iptr(dir, bdst));
++ h_tmpdir = h_dir;
++ if (!h_dir->i_nlink) {
++ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
++ h_orph = wbr->wbr_orph;
++
++ h_parent = dget(au_h_dptr(parent, bdst));
++ au_set_h_dptr(parent, bdst, NULL);
++ au_set_h_dptr(parent, bdst, dget(h_orph));
++ h_tmpdir = h_orph->d_inode;
++ au_set_h_iptr(dir, bdst, NULL, 0);
++ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
++
++ /* this temporary unlock is safe */
++ if (file)
++ h_dentry = au_h_fptr(file, au_fbstart(file))->f_dentry;
++ else
++ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
++ h_inode = h_dentry->d_inode;
++ IMustLock(h_inode);
++ mutex_unlock(&h_inode->i_mutex);
++ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ }
++
++ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE))
++ err = au_cpup_wh(dentry, bdst, len, file);
++ else {
++ struct au_cpup_wh_args args = {
++ .errp = &err,
++ .dentry = dentry,
++ .bdst = bdst,
++ .len = len,
++ .file = file
++ };
++ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
++ if (unlikely(wkq_err))
++ err = wkq_err;
++ }
++
++ if (h_orph) {
++ mutex_unlock(&h_tmpdir->i_mutex);
++ au_set_h_iptr(dir, bdst, NULL, 0);
++ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
++ au_set_h_dptr(parent, bdst, NULL);
++ au_set_h_dptr(parent, bdst, h_parent);
++ }
++ iput(h_dir);
++ dput(parent);
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * generic routine for both of copy-up and copy-down.
++ */
++/* cf. revalidate function in file.c */
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++ struct dentry *h_parent, void *arg),
++ void *arg)
++{
++ int err;
++ struct au_pin pin;
++ struct dentry *d, *parent, *h_parent, *real_parent;
++
++ err = 0;
++ parent = dget_parent(dentry);
++ if (IS_ROOT(parent))
++ goto out;
++
++ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
++ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
++
++ /* do not use au_dpage */
++ real_parent = parent;
++ while (1) {
++ dput(parent);
++ parent = dget_parent(dentry);
++ h_parent = au_h_dptr(parent, bdst);
++ if (h_parent)
++ goto out; /* success */
++
++ /* find top dir which is necessary to cpup */
++ do {
++ d = parent;
++ dput(parent);
++ parent = dget_parent(d);
++ di_read_lock_parent3(parent, !AuLock_IR);
++ h_parent = au_h_dptr(parent, bdst);
++ di_read_unlock(parent, !AuLock_IR);
++ } while (!h_parent);
++
++ if (d != real_parent)
++ di_write_lock_child3(d);
++
++ /* somebody else might create while we were sleeping */
++ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
++ if (au_h_dptr(d, bdst))
++ au_update_dbstart(d);
++
++ au_pin_set_dentry(&pin, d);
++ err = au_do_pin(&pin);
++ if (!err) {
++ err = cp(d, bdst, h_parent, arg);
++ au_unpin(&pin);
++ }
++ }
++
++ if (d != real_parent)
++ di_write_unlock(d);
++ if (unlikely(err))
++ break;
++ }
++
++ out:
++ dput(parent);
++ return err;
++}
++
++static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
++ struct dentry *h_parent __maybe_unused ,
++ void *arg __maybe_unused)
++{
++ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
++}
++
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
++}
++
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++ int err;
++ struct dentry *parent;
++ struct inode *dir;
++
++ parent = dget_parent(dentry);
++ dir = parent->d_inode;
++ err = 0;
++ if (au_h_iptr(dir, bdst))
++ goto out;
++
++ di_read_unlock(parent, AuLock_IR);
++ di_write_lock_parent(parent);
++ /* someone else might change our inode while we were sleeping */
++ if (!au_h_iptr(dir, bdst))
++ err = au_cpup_dirs(dentry, bdst);
++ di_downgrade_lock(parent, AuLock_IR);
++
++ out:
++ dput(parent);
++ return err;
++}
+diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h
+new file mode 100644
+index 0000000..29e2508
+--- /dev/null
++++ b/fs/aufs/cpup.h
+@@ -0,0 +1,81 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * copy-up/down functions
++ */
++
++#ifndef __AUFS_CPUP_H__
++#define __AUFS_CPUP_H__
++
++#ifdef __KERNEL__
++
++#include <linux/path.h>
++#include <linux/time.h>
++#include <linux/aufs_type.h>
++
++struct inode;
++struct file;
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src);
++void au_cpup_attr_timesizes(struct inode *inode);
++void au_cpup_attr_nlink(struct inode *inode, int force);
++void au_cpup_attr_changeable(struct inode *inode);
++void au_cpup_igen(struct inode *inode, struct inode *h_inode);
++void au_cpup_attr_all(struct inode *inode, int force);
++
++/* ---------------------------------------------------------------------- */
++
++/* cpup flags */
++#define AuCpup_DTIME 1 /* do dtime_store/revert */
++#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
++ for link(2) */
++#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
++#define au_fset_cpup(flags, name) { (flags) |= AuCpup_##name; }
++#define au_fclr_cpup(flags, name) { (flags) &= ~AuCpup_##name; }
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len);
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++ struct dentry *dst_parent);
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ unsigned int flags);
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++ struct file *file);
++
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++ struct dentry *h_parent, void *arg),
++ void *arg);
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++
++/* ---------------------------------------------------------------------- */
++
++/* keep timestamps when copyup */
++struct au_dtime {
++ struct dentry *dt_dentry;
++ struct path dt_h_path;
++ struct timespec dt_atime, dt_mtime;
++};
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++ struct path *h_path);
++void au_dtime_revert(struct au_dtime *dt);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_CPUP_H__ */
+diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
+new file mode 100644
+index 0000000..6b19d09
+--- /dev/null
++++ b/fs/aufs/dbgaufs.c
+@@ -0,0 +1,331 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#include <linux/debugfs.h>
++#include "aufs.h"
++
++#ifndef CONFIG_SYSFS
++#error DEBUG_FS depends upon SYSFS
++#endif
++
++static struct dentry *dbgaufs;
++static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
++
++/* 20 is max digits length of ulong 64 */
++struct dbgaufs_arg {
++ int n;
++ char a[20 * 4];
++};
++
++/*
++ * common function for all XINO files
++ */
++static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
++ struct file *file)
++{
++ kfree(file->private_data);
++ return 0;
++}
++
++static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
++{
++ int err;
++ struct kstat st;
++ struct dbgaufs_arg *p;
++
++ err = -ENOMEM;
++ p = kmalloc(sizeof(*p), GFP_NOFS);
++ if (unlikely(!p))
++ goto out;
++
++ err = 0;
++ p->n = 0;
++ file->private_data = p;
++ if (!xf)
++ goto out;
++
++ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
++ if (!err) {
++ if (do_fcnt)
++ p->n = snprintf
++ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
++ (long)file_count(xf), st.blocks, st.blksize,
++ (long long)st.size);
++ else
++ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
++ st.blocks, st.blksize,
++ (long long)st.size);
++ AuDebugOn(p->n >= sizeof(p->a));
++ } else {
++ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
++ err = 0;
++ }
++
++ out:
++ return err;
++
++}
++
++static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ struct dbgaufs_arg *p;
++
++ p = file->private_data;
++ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int dbgaufs_xib_open(struct inode *inode, struct file *file)
++{
++ int err;
++ struct au_sbinfo *sbinfo;
++ struct super_block *sb;
++
++ sbinfo = inode->i_private;
++ sb = sbinfo->si_sb;
++ si_noflush_read_lock(sb);
++ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
++ si_read_unlock(sb);
++ return err;
++}
++
++static struct file_operations dbgaufs_xib_fop = {
++ .open = dbgaufs_xib_open,
++ .release = dbgaufs_xi_release,
++ .read = dbgaufs_xi_read
++};
++
++/* ---------------------------------------------------------------------- */
++
++#define DbgaufsXi_PREFIX "xi"
++
++static int dbgaufs_xino_open(struct inode *inode, struct file *file)
++{
++ int err;
++ long l;
++ struct au_sbinfo *sbinfo;
++ struct super_block *sb;
++ struct file *xf;
++ struct qstr *name;
++
++ err = -ENOENT;
++ xf = NULL;
++ name = &file->f_dentry->d_name;
++ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
++ || memcmp(name->name, DbgaufsXi_PREFIX,
++ sizeof(DbgaufsXi_PREFIX) - 1)))
++ goto out;
++ err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
++ if (unlikely(err))
++ goto out;
++
++ sbinfo = inode->i_private;
++ sb = sbinfo->si_sb;
++ si_noflush_read_lock(sb);
++ if (l <= au_sbend(sb)) {
++ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
++ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
++ } else
++ err = -ENOENT;
++ si_read_unlock(sb);
++
++ out:
++ return err;
++}
++
++static struct file_operations dbgaufs_xino_fop = {
++ .open = dbgaufs_xino_open,
++ .release = dbgaufs_xi_release,
++ .read = dbgaufs_xi_read
++};
++
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
++{
++ aufs_bindex_t bend;
++ struct au_branch *br;
++ struct au_xino_file *xi;
++
++ if (!au_sbi(sb)->si_dbgaufs)
++ return;
++
++ bend = au_sbend(sb);
++ for (; bindex <= bend; bindex++) {
++ br = au_sbr(sb, bindex);
++ xi = &br->br_xino;
++ if (xi->xi_dbgaufs) {
++ debugfs_remove(xi->xi_dbgaufs);
++ xi->xi_dbgaufs = NULL;
++ }
++ }
++}
++
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
++{
++ struct au_sbinfo *sbinfo;
++ struct dentry *parent;
++ struct au_branch *br;
++ struct au_xino_file *xi;
++ aufs_bindex_t bend;
++ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
++
++ sbinfo = au_sbi(sb);
++ parent = sbinfo->si_dbgaufs;
++ if (!parent)
++ return;
++
++ bend = au_sbend(sb);
++ for (; bindex <= bend; bindex++) {
++ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
++ br = au_sbr(sb, bindex);
++ xi = &br->br_xino;
++ AuDebugOn(xi->xi_dbgaufs);
++ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
++ sbinfo, &dbgaufs_xino_fop);
++ /* ignore an error */
++ if (unlikely(!xi->xi_dbgaufs))
++ AuWarn1("failed %s under debugfs\n", name);
++ }
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_EXPORT
++static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
++{
++ int err;
++ struct au_sbinfo *sbinfo;
++ struct super_block *sb;
++
++ sbinfo = inode->i_private;
++ sb = sbinfo->si_sb;
++ si_noflush_read_lock(sb);
++ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
++ si_read_unlock(sb);
++ return err;
++}
++
++static struct file_operations dbgaufs_xigen_fop = {
++ .open = dbgaufs_xigen_open,
++ .release = dbgaufs_xi_release,
++ .read = dbgaufs_xi_read
++};
++
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++ int err;
++
++ /*
++ * This function is a dynamic '__init' fucntion actually,
++ * so the tiny check for si_rwsem is unnecessary.
++ */
++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++ err = -EIO;
++ sbinfo->si_dbgaufs_xigen = debugfs_create_file
++ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++ &dbgaufs_xigen_fop);
++ if (sbinfo->si_dbgaufs_xigen)
++ err = 0;
++
++ return err;
++}
++#else
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++ return 0;
++}
++#endif /* CONFIG_AUFS_EXPORT */
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
++{
++ /*
++ * This function is a dynamic '__init' fucntion actually,
++ * so the tiny check for si_rwsem is unnecessary.
++ */
++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++ debugfs_remove_recursive(sbinfo->si_dbgaufs);
++ sbinfo->si_dbgaufs = NULL;
++ kobject_put(&sbinfo->si_kobj);
++}
++
++int dbgaufs_si_init(struct au_sbinfo *sbinfo)
++{
++ int err;
++ char name[SysaufsSiNameLen];
++
++ /*
++ * This function is a dynamic '__init' fucntion actually,
++ * so the tiny check for si_rwsem is unnecessary.
++ */
++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++ err = -ENOENT;
++ if (!dbgaufs) {
++ AuErr1("/debug/aufs is uninitialized\n");
++ goto out;
++ }
++
++ err = -EIO;
++ sysaufs_name(sbinfo, name);
++ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
++ if (unlikely(!sbinfo->si_dbgaufs))
++ goto out;
++ kobject_get(&sbinfo->si_kobj);
++
++ sbinfo->si_dbgaufs_xib = debugfs_create_file
++ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++ &dbgaufs_xib_fop);
++ if (unlikely(!sbinfo->si_dbgaufs_xib))
++ goto out_dir;
++
++ err = dbgaufs_xigen_init(sbinfo);
++ if (!err)
++ goto out; /* success */
++
++ out_dir:
++ dbgaufs_si_fin(sbinfo);
++ out:
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_fin(void)
++{
++ debugfs_remove(dbgaufs);
++}
++
++int __init dbgaufs_init(void)
++{
++ int err;
++
++ err = -EIO;
++ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
++ if (dbgaufs)
++ err = 0;
++ return err;
++}
+diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h
+new file mode 100644
+index 0000000..3481fc2
+--- /dev/null
++++ b/fs/aufs/dbgaufs.h
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#ifndef __DBGAUFS_H__
++#define __DBGAUFS_H__
++
++#ifdef __KERNEL__
++
++#include <linux/init.h>
++#include <linux/aufs_type.h>
++
++struct super_block;
++struct au_sbinfo;
++
++#ifdef CONFIG_DEBUG_FS
++/* dbgaufs.c */
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
++int dbgaufs_si_init(struct au_sbinfo *sbinfo);
++void dbgaufs_fin(void);
++int __init dbgaufs_init(void);
++#else
++AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
++AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
++AuStubVoid(dbgaufs_fin, void)
++AuStubInt0(__init dbgaufs_init, void)
++#endif /* CONFIG_DEBUG_FS */
++
++#endif /* __KERNEL__ */
++#endif /* __DBGAUFS_H__ */
+diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c
+new file mode 100644
+index 0000000..43a8cb4
+--- /dev/null
++++ b/fs/aufs/dcsub.c
+@@ -0,0 +1,223 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#include "aufs.h"
++
++static void au_dpage_free(struct au_dpage *dpage)
++{
++ int i;
++ struct dentry **p;
++
++ p = dpage->dentries;
++ for (i = 0; i < dpage->ndentry; i++)
++ dput(*p++);
++ free_page((unsigned long)dpage->dentries);
++}
++
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
++{
++ int err;
++ void *p;
++
++ err = -ENOMEM;
++ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
++ if (unlikely(!dpages->dpages))
++ goto out;
++
++ p = (void *)__get_free_page(gfp);
++ if (unlikely(!p))
++ goto out_dpages;
++
++ dpages->dpages[0].ndentry = 0;
++ dpages->dpages[0].dentries = p;
++ dpages->ndpage = 1;
++ return 0; /* success */
++
++ out_dpages:
++ kfree(dpages->dpages);
++ out:
++ return err;
++}
++
++void au_dpages_free(struct au_dcsub_pages *dpages)
++{
++ int i;
++ struct au_dpage *p;
++
++ p = dpages->dpages;
++ for (i = 0; i < dpages->ndpage; i++)
++ au_dpage_free(p++);
++ kfree(dpages->dpages);
++}
++
++static int au_dpages_append(struct au_dcsub_pages *dpages,
++ struct dentry *dentry, gfp_t gfp)
++{
++ int err, sz;
++ struct au_dpage *dpage;
++ void *p;
++
++ dpage = dpages->dpages + dpages->ndpage - 1;
++ sz = PAGE_SIZE / sizeof(dentry);
++ if (unlikely(dpage->ndentry >= sz)) {
++ AuLabel(new dpage);
++ err = -ENOMEM;
++ sz = dpages->ndpage * sizeof(*dpages->dpages);
++ p = au_kzrealloc(dpages->dpages, sz,
++ sz + sizeof(*dpages->dpages), gfp);
++ if (unlikely(!p))
++ goto out;
++
++ dpages->dpages = p;
++ dpage = dpages->dpages + dpages->ndpage;
++ p = (void *)__get_free_page(gfp);
++ if (unlikely(!p))
++ goto out;
++
++ dpage->ndentry = 0;
++ dpage->dentries = p;
++ dpages->ndpage++;
++ }
++
++ dpage->dentries[dpage->ndentry++] = dget(dentry);
++ return 0; /* success */
++
++ out:
++ return err;
++}
++
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++ au_dpages_test test, void *arg)
++{
++ int err;
++ struct dentry *this_parent = root;
++ struct list_head *next;
++ struct super_block *sb = root->d_sb;
++
++ err = 0;
++ spin_lock(&dcache_lock);
++ repeat:
++ next = this_parent->d_subdirs.next;
++ resume:
++ if (this_parent->d_sb == sb
++ && !IS_ROOT(this_parent)
++ && atomic_read(&this_parent->d_count)
++ && this_parent->d_inode
++ && (!test || test(this_parent, arg))) {
++ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
++ if (unlikely(err))
++ goto out;
++ }
++
++ while (next != &this_parent->d_subdirs) {
++ struct list_head *tmp = next;
++ struct dentry *dentry = list_entry(tmp, struct dentry,
++ d_u.d_child);
++ next = tmp->next;
++ if (/*d_unhashed(dentry) || */!dentry->d_inode)
++ continue;
++ if (!list_empty(&dentry->d_subdirs)) {
++ this_parent = dentry;
++ goto repeat;
++ }
++ if (dentry->d_sb == sb
++ && atomic_read(&dentry->d_count)
++ && (!test || test(dentry, arg))) {
++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++ if (unlikely(err))
++ goto out;
++ }
++ }
++
++ if (this_parent != root) {
++ next = this_parent->d_u.d_child.next;
++ this_parent = this_parent->d_parent; /* dcache_lock is locked */
++ goto resume;
++ }
++ out:
++ spin_unlock(&dcache_lock);
++ return err;
++}
++
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++ int do_include, au_dpages_test test, void *arg)
++{
++ int err;
++
++ err = 0;
++ spin_lock(&dcache_lock);
++ if (do_include && (!test || test(dentry, arg))) {
++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++ if (unlikely(err))
++ goto out;
++ }
++ while (!IS_ROOT(dentry)) {
++ dentry = dentry->d_parent; /* dcache_lock is locked */
++ if (!test || test(dentry, arg)) {
++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++ if (unlikely(err))
++ break;
++ }
++ }
++
++ out:
++ spin_unlock(&dcache_lock);
++
++ return err;
++}
++
++struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2)
++{
++ struct dentry *trap, **dentries;
++ int err, i, j;
++ struct au_dcsub_pages dpages;
++ struct au_dpage *dpage;
++
++ trap = ERR_PTR(-ENOMEM);
++ err = au_dpages_init(&dpages, GFP_NOFS);
++ if (unlikely(err))
++ goto out;
++ err = au_dcsub_pages_rev(&dpages, d1, /*do_include*/1, NULL, NULL);
++ if (unlikely(err))
++ goto out_dpages;
++
++ trap = d1;
++ for (i = 0; !err && i < dpages.ndpage; i++) {
++ dpage = dpages.dpages + i;
++ dentries = dpage->dentries;
++ for (j = 0; !err && j < dpage->ndentry; j++) {
++ struct dentry *d;
++
++ d = dentries[j];
++ err = (d == d2);
++ if (!err)
++ trap = d;
++ }
++ }
++ if (!err)
++ trap = NULL;
++
++ out_dpages:
++ au_dpages_free(&dpages);
++ out:
++ return trap;
++}
+diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h
+new file mode 100644
+index 0000000..bb934b4
+--- /dev/null
++++ b/fs/aufs/dcsub.h
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#ifndef __AUFS_DCSUB_H__
++#define __AUFS_DCSUB_H__
++
++#ifdef __KERNEL__
++
++#include <linux/types.h>
++
++struct dentry;
++
++struct au_dpage {
++ int ndentry;
++ struct dentry **dentries;
++};
++
++struct au_dcsub_pages {
++ int ndpage;
++ struct au_dpage *dpages;
++};
++
++/* ---------------------------------------------------------------------- */
++
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
++void au_dpages_free(struct au_dcsub_pages *dpages);
++typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++ au_dpages_test test, void *arg);
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++ int do_include, au_dpages_test test, void *arg);
++struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DCSUB_H__ */
+diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c
+new file mode 100644
+index 0000000..5cfd707
+--- /dev/null
++++ b/fs/aufs/debug.c
+@@ -0,0 +1,431 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#include <linux/module.h>
++#include <linux/vt_kern.h>
++#include "aufs.h"
++
++int aufs_debug;
++MODULE_PARM_DESC(debug, "debug print");
++module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
++
++char *au_plevel = KERN_DEBUG;
++#define dpri(fmt, ...) do { \
++ if (au_debug_test()) \
++ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
++} while (0)
++
++/* ---------------------------------------------------------------------- */
++
++void au_dpri_whlist(struct au_nhash *whlist)
++{
++ unsigned long ul, n;
++ struct hlist_head *head;
++ struct au_vdir_wh *tpos;
++ struct hlist_node *pos;
++
++ n = whlist->nh_num;
++ head = whlist->nh_head;
++ for (ul = 0; ul < n; ul++) {
++ hlist_for_each_entry(tpos, pos, head, wh_hash)
++ dpri("b%d, %.*s, %d\n",
++ tpos->wh_bindex,
++ tpos->wh_str.len, tpos->wh_str.name,
++ tpos->wh_str.len);
++ head++;
++ }
++}
++
++void au_dpri_vdir(struct au_vdir *vdir)
++{
++ unsigned long ul;
++ union au_vdir_deblk_p p;
++ unsigned char *o;
++
++ if (!vdir || IS_ERR(vdir)) {
++ dpri("err %ld\n", PTR_ERR(vdir));
++ return;
++ }
++
++ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
++ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
++ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
++ for (ul = 0; ul < vdir->vd_nblk; ul++) {
++ p.deblk = vdir->vd_deblk[ul];
++ o = p.deblk;
++ dpri("[%lu]: %p\n", ul, o);
++ }
++}
++
++static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode,
++ struct dentry *wh)
++{
++ char *n = NULL;
++ int l = 0;
++
++ if (!inode || IS_ERR(inode)) {
++ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
++ return -1;
++ }
++
++ /* the type of i_blocks depends upon CONFIG_LSF */
++ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
++ && sizeof(inode->i_blocks) != sizeof(u64));
++ if (wh) {
++ n = (void *)wh->d_name.name;
++ l = wh->d_name.len;
++ }
++
++ dpri("i%d: i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
++ " ct %lld, np %lu, st 0x%lx, f 0x%x, g %x%s%.*s\n",
++ bindex,
++ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
++ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
++ i_size_read(inode), (unsigned long long)inode->i_blocks,
++ (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
++ inode->i_mapping ? inode->i_mapping->nrpages : 0,
++ inode->i_state, inode->i_flags, inode->i_generation,
++ l ? ", wh " : "", l, n);
++ return 0;
++}
++
++void au_dpri_inode(struct inode *inode)
++{
++ struct au_iinfo *iinfo;
++ aufs_bindex_t bindex;
++ int err;
++
++ err = do_pri_inode(-1, inode, NULL);
++ if (err || !au_test_aufs(inode->i_sb))
++ return;
++
++ iinfo = au_ii(inode);
++ if (!iinfo)
++ return;
++ dpri("i-1: bstart %d, bend %d, gen %d\n",
++ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
++ if (iinfo->ii_bstart < 0)
++ return;
++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++)
++ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode,
++ iinfo->ii_hinode[0 + bindex].hi_whdentry);
++}
++
++static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
++{
++ struct dentry *wh = NULL;
++
++ if (!dentry || IS_ERR(dentry)) {
++ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
++ return -1;
++ }
++ /* do not call dget_parent() here */
++ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
++ bindex,
++ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
++ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
++ atomic_read(&dentry->d_count), dentry->d_flags);
++ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
++ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
++ if (iinfo)
++ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
++ }
++ do_pri_inode(bindex, dentry->d_inode, wh);
++ return 0;
++}
++
++void au_dpri_dentry(struct dentry *dentry)
++{
++ struct au_dinfo *dinfo;
++ aufs_bindex_t bindex;
++ int err;
++
++ err = do_pri_dentry(-1, dentry);
++ if (err || !au_test_aufs(dentry->d_sb))
++ return;
++
++ dinfo = au_di(dentry);
++ if (!dinfo)
++ return;
++ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
++ dinfo->di_bstart, dinfo->di_bend,
++ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
++ if (dinfo->di_bstart < 0)
++ return;
++ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
++ do_pri_dentry(bindex, dinfo->di_hdentry[0 + bindex].hd_dentry);
++}
++
++static int do_pri_file(aufs_bindex_t bindex, struct file *file)
++{
++ char a[32];
++
++ if (!file || IS_ERR(file)) {
++ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
++ return -1;
++ }
++ a[0] = 0;
++ if (bindex < 0
++ && file->f_dentry
++ && au_test_aufs(file->f_dentry->d_sb)
++ && au_fi(file))
++ snprintf(a, sizeof(a), ", mmapped %d", au_test_mmapped(file));
++ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, pos %llu%s\n",
++ bindex, file->f_mode, file->f_flags, (long)file_count(file),
++ file->f_pos, a);
++ if (file->f_dentry)
++ do_pri_dentry(bindex, file->f_dentry);
++ return 0;
++}
++
++void au_dpri_file(struct file *file)
++{
++ struct au_finfo *finfo;
++ aufs_bindex_t bindex;
++ int err;
++
++ err = do_pri_file(-1, file);
++ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
++ return;
++
++ finfo = au_fi(file);
++ if (!finfo)
++ return;
++ if (finfo->fi_bstart < 0)
++ return;
++ for (bindex = finfo->fi_bstart; bindex <= finfo->fi_bend; bindex++) {
++ struct au_hfile *hf;
++
++ hf = finfo->fi_hfile + bindex;
++ do_pri_file(bindex, hf ? hf->hf_file : NULL);
++ }
++}
++
++static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
++{
++ struct vfsmount *mnt;
++ struct super_block *sb;
++
++ if (!br || IS_ERR(br))
++ goto out;
++ mnt = br->br_mnt;
++ if (!mnt || IS_ERR(mnt))
++ goto out;
++ sb = mnt->mnt_sb;
++ if (!sb || IS_ERR(sb))
++ goto out;
++
++ dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
++ "%s, dev 0x%02x%02x, flags 0x%lx, cnt(BIAS) %d, active %d, "
++ "xino %d\n",
++ bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
++ au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
++ sb->s_flags, sb->s_count - S_BIAS,
++ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
++ return 0;
++
++ out:
++ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
++ return -1;
++}
++
++void au_dpri_sb(struct super_block *sb)
++{
++ struct au_sbinfo *sbinfo;
++ aufs_bindex_t bindex;
++ int err;
++ /* to reuduce stack size */
++ struct {
++ struct vfsmount mnt;
++ struct au_branch fake;
++ } *a;
++
++ /* this function can be called from magic sysrq */
++ a = kzalloc(sizeof(*a), GFP_ATOMIC);
++ if (unlikely(!a)) {
++ dpri("no memory\n");
++ return;
++ }
++
++ a->mnt.mnt_sb = sb;
++ a->fake.br_perm = 0;
++ a->fake.br_mnt = &a->mnt;
++ a->fake.br_xino.xi_file = NULL;
++ atomic_set(&a->fake.br_count, 0);
++ smp_mb(); /* atomic_set */
++ err = do_pri_br(-1, &a->fake);
++ kfree(a);
++ dpri("dev 0x%x\n", sb->s_dev);
++ if (err || !au_test_aufs(sb))
++ return;
++
++ sbinfo = au_sbi(sb);
++ if (!sbinfo)
++ return;
++ dpri("nw %d, gen %u, kobj %d\n",
++ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
++ atomic_read(&sbinfo->si_kobj.kref.refcount));
++ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
++ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_dbg_sleep_jiffy(int jiffy)
++{
++ while (jiffy)
++ jiffy = schedule_timeout_uninterruptible(jiffy);
++}
++
++void au_dbg_iattr(struct iattr *ia)
++{
++#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
++ dpri(#name "\n")
++ AuBit(MODE);
++ AuBit(UID);
++ AuBit(GID);
++ AuBit(SIZE);
++ AuBit(ATIME);
++ AuBit(MTIME);
++ AuBit(CTIME);
++ AuBit(ATIME_SET);
++ AuBit(MTIME_SET);
++ AuBit(FORCE);
++ AuBit(ATTR_FLAG);
++ AuBit(KILL_SUID);
++ AuBit(KILL_SGID);
++ AuBit(FILE);
++ AuBit(KILL_PRIV);
++ AuBit(OPEN);
++ AuBit(TIMES_SET);
++#undef AuBit
++ dpri("ia_file %p\n", ia->ia_file);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
++{
++ struct dentry *parent;
++
++ parent = dget_parent(dentry);
++ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)
++ || IS_ROOT(dentry)
++ || au_digen(parent) != sigen);
++ dput(parent);
++}
++
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
++{
++ struct dentry *parent;
++
++ parent = dget_parent(dentry);
++ AuDebugOn(S_ISDIR(dentry->d_inode->i_mode)
++ || au_digen(parent) != sigen);
++ dput(parent);
++}
++
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
++{
++ int err, i, j;
++ struct au_dcsub_pages dpages;
++ struct au_dpage *dpage;
++ struct dentry **dentries;
++
++ err = au_dpages_init(&dpages, GFP_NOFS);
++ AuDebugOn(err);
++ err = au_dcsub_pages_rev(&dpages, parent, /*do_include*/1, NULL, NULL);
++ AuDebugOn(err);
++ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
++ dpage = dpages.dpages + i;
++ dentries = dpage->dentries;
++ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
++ AuDebugOn(au_digen(dentries[j]) != sigen);
++ }
++ au_dpages_free(&dpages);
++}
++
++void au_dbg_verify_hf(struct au_finfo *finfo)
++{
++ struct au_hfile *hf;
++ aufs_bindex_t bend, bindex;
++
++ if (finfo->fi_bstart >= 0) {
++ bend = finfo->fi_bend;
++ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++) {
++ hf = finfo->fi_hfile + bindex;
++ AuDebugOn(hf->hf_file || hf->hf_br);
++ }
++ }
++}
++
++void au_dbg_verify_kthread(void)
++{
++ if (au_test_wkq(current)) {
++ au_dbg_blocked();
++ BUG();
++ }
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
++{
++#ifdef AuForceNoPlink
++ au_opt_clr(sbinfo->si_mntflags, PLINK);
++#endif
++#ifdef AuForceNoXino
++ au_opt_clr(sbinfo->si_mntflags, XINO);
++#endif
++#ifdef AuForceNoRefrof
++ au_opt_clr(sbinfo->si_mntflags, REFROF);
++#endif
++#ifdef AuForceHinotify
++ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HINOTIFY);
++#endif
++#ifdef AuForceRd0
++ sbinfo->si_rdblk = 0;
++ sbinfo->si_rdhash = 0;
++#endif
++}
++
++int __init au_debug_init(void)
++{
++ aufs_bindex_t bindex;
++ struct au_vdir_destr destr;
++
++ bindex = -1;
++ AuDebugOn(bindex >= 0);
++
++ destr.len = -1;
++ AuDebugOn(destr.len < NAME_MAX);
++
++#ifdef CONFIG_4KSTACKS
++ AuWarn("CONFIG_4KSTACKS is defined.\n");
++#endif
++
++#ifdef AuForceNoBrs
++ sysaufs_brs = 0;
++#endif
++
++ return 0;
++}
+diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h
+new file mode 100644
+index 0000000..dbcbf31
+--- /dev/null
++++ b/fs/aufs/debug.h
+@@ -0,0 +1,232 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#ifndef __AUFS_DEBUG_H__
++#define __AUFS_DEBUG_H__
++
++#ifdef __KERNEL__
++
++#include <linux/bug.h>
++/* #include <linux/err.h> */
++/* #include <linux/init.h> */
++/* #include <linux/kernel.h> */
++#include <linux/delay.h>
++/* #include <linux/kd.h> */
++/* #include <linux/vt_kern.h> */
++#include <linux/sysrq.h>
++#include <linux/aufs_type.h>
++
++#ifdef CONFIG_AUFS_DEBUG
++#define AuDebugOn(a) BUG_ON(a)
++
++/* module parameter */
++extern int aufs_debug;
++static inline void au_debug(int n)
++{
++ aufs_debug = n;
++ smp_mb();
++}
++
++static inline int au_debug_test(void)
++{
++ return aufs_debug;
++}
++#else
++#define AuDebugOn(a) do {} while (0)
++AuStubVoid(au_debug, int n)
++AuStubInt0(au_debug_test, void)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++/* debug print */
++
++#define AuDpri(lvl, fmt, ...) \
++ printk(lvl AUFS_NAME " %s:%d:%s[%d]: " fmt, \
++ __func__, __LINE__, current->comm, current->pid, ##__VA_ARGS__)
++#define AuDbg(fmt, ...) do { \
++ if (au_debug_test()) \
++ AuDpri(KERN_DEBUG, "DEBUG: " fmt, ##__VA_ARGS__); \
++} while (0)
++#define AuLabel(l) AuDbg(#l "\n")
++#define AuInfo(fmt, ...) AuDpri(KERN_INFO, fmt, ##__VA_ARGS__)
++#define AuWarn(fmt, ...) AuDpri(KERN_WARNING, fmt, ##__VA_ARGS__)
++#define AuErr(fmt, ...) AuDpri(KERN_ERR, fmt, ##__VA_ARGS__)
++#define AuIOErr(fmt, ...) AuErr("I/O Error, " fmt, ##__VA_ARGS__)
++#define AuWarn1(fmt, ...) do { \
++ static unsigned char _c; \
++ if (!_c++) \
++ AuWarn(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuErr1(fmt, ...) do { \
++ static unsigned char _c; \
++ if (!_c++) \
++ AuErr(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuIOErr1(fmt, ...) do { \
++ static unsigned char _c; \
++ if (!_c++) \
++ AuIOErr(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuUnsupportMsg "This operation is not supported." \
++ " Please report this application to aufs-users ML."
++#define AuUnsupport(fmt, ...) do { \
++ AuErr(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
++ dump_stack(); \
++} while (0)
++
++#define AuTraceErr(e) do { \
++ if (unlikely((e) < 0)) \
++ AuDbg("err %d\n", (int)(e)); \
++} while (0)
++
++#define AuTraceErrPtr(p) do { \
++ if (IS_ERR(p)) \
++ AuDbg("err %ld\n", PTR_ERR(p)); \
++} while (0)
++
++/* dirty macros for debug print, use with "%.*s" and caution */
++#define AuLNPair(qstr) (qstr)->len, (qstr)->name
++#define AuDLNPair(d) AuLNPair(&(d)->d_name)
++
++/* ---------------------------------------------------------------------- */
++
++struct au_sbinfo;
++struct au_finfo;
++struct dentry;
++#ifdef CONFIG_AUFS_DEBUG
++extern char *au_plevel;
++struct au_nhash;
++void au_dpri_whlist(struct au_nhash *whlist);
++struct au_vdir;
++void au_dpri_vdir(struct au_vdir *vdir);
++struct inode;
++void au_dpri_inode(struct inode *inode);
++void au_dpri_dentry(struct dentry *dentry);
++struct file;
++void au_dpri_file(struct file *filp);
++struct super_block;
++void au_dpri_sb(struct super_block *sb);
++
++void au_dbg_sleep_jiffy(int jiffy);
++struct iattr;
++void au_dbg_iattr(struct iattr *ia);
++
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
++void au_dbg_verify_hf(struct au_finfo *finfo);
++void au_dbg_verify_kthread(void);
++
++int __init au_debug_init(void);
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
++#define AuDbgWhlist(w) do { \
++ AuDbg(#w "\n"); \
++ au_dpri_whlist(w); \
++} while (0)
++
++#define AuDbgVdir(v) do { \
++ AuDbg(#v "\n"); \
++ au_dpri_vdir(v); \
++} while (0)
++
++#define AuDbgInode(i) do { \
++ AuDbg(#i "\n"); \
++ au_dpri_inode(i); \
++} while (0)
++
++#define AuDbgDentry(d) do { \
++ AuDbg(#d "\n"); \
++ au_dpri_dentry(d); \
++} while (0)
++
++#define AuDbgFile(f) do { \
++ AuDbg(#f "\n"); \
++ au_dpri_file(f); \
++} while (0)
++
++#define AuDbgSb(sb) do { \
++ AuDbg(#sb "\n"); \
++ au_dpri_sb(sb); \
++} while (0)
++
++#define AuDbgSleep(sec) do { \
++ AuDbg("sleep %d sec\n", sec); \
++ ssleep(sec); \
++} while (0)
++
++#define AuDbgSleepJiffy(jiffy) do { \
++ AuDbg("sleep %d jiffies\n", jiffy); \
++ au_dbg_sleep_jiffy(jiffy); \
++} while (0)
++
++#define AuDbgIAttr(ia) do { \
++ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
++ au_dbg_iattr(ia); \
++} while (0)
++#else
++AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
++AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
++ unsigned int sigen)
++AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
++AuStubVoid(au_dbg_verify_hf, struct au_finfo *finfo)
++AuStubVoid(au_dbg_verify_kthread, void)
++AuStubInt0(__init au_debug_init, void)
++AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
++
++#define AuDbgWhlist(w) do {} while (0)
++#define AuDbgVdir(v) do {} while (0)
++#define AuDbgInode(i) do {} while (0)
++#define AuDbgDentry(d) do {} while (0)
++#define AuDbgFile(f) do {} while (0)
++#define AuDbgSb(sb) do {} while (0)
++#define AuDbgSleep(sec) do {} while (0)
++#define AuDbgSleepJiffy(jiffy) do {} while (0)
++#define AuDbgIAttr(ia) do {} while (0)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_MAGIC_SYSRQ
++int __init au_sysrq_init(void);
++void au_sysrq_fin(void);
++
++#ifdef CONFIG_HW_CONSOLE
++#define au_dbg_blocked() do { \
++ WARN_ON(1); \
++ handle_sysrq('w', vc_cons[fg_console].d->vc_tty); \
++} while (0)
++#else
++AuStubVoid(au_dbg_blocked, void)
++#endif
++
++#else
++AuStubInt0(__init au_sysrq_init, void)
++AuStubVoid(au_sysrq_fin, void)
++AuStubVoid(au_dbg_blocked, void)
++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DEBUG_H__ */
+diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c
+new file mode 100644
+index 0000000..2b18f3c
+--- /dev/null
++++ b/fs/aufs/dentry.c
+@@ -0,0 +1,875 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#include <linux/namei.h>
++#include "aufs.h"
++
++static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
++{
++ if (nd) {
++ *h_nd = *nd;
++
++ /*
++ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
++ * due to whiteout and branch permission.
++ */
++ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
++ | LOOKUP_FOLLOW);
++ /* unnecessary? */
++ h_nd->intent.open.file = NULL;
++ } else
++ memset(h_nd, 0, sizeof(*h_nd));
++}
++
++struct au_lkup_one_args {
++ struct dentry **errp;
++ struct qstr *name;
++ struct dentry *h_parent;
++ struct au_branch *br;
++ struct nameidata *nd;
++};
++
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++ struct au_branch *br, struct nameidata *nd)
++{
++ struct dentry *h_dentry;
++ int err;
++ struct nameidata h_nd;
++
++ if (au_test_fs_null_nd(h_parent->d_sb))
++ return vfsub_lookup_one_len(name->name, h_parent, name->len);
++
++ au_h_nd(&h_nd, nd);
++ h_nd.path.dentry = h_parent;
++ h_nd.path.mnt = br->br_mnt;
++
++ err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
++ h_dentry = ERR_PTR(err);
++ if (!err) {
++ path_get(&h_nd.path);
++ h_dentry = vfsub_lookup_hash(&h_nd);
++ path_put(&h_nd.path);
++ }
++
++ AuTraceErrPtr(h_dentry);
++ return h_dentry;
++}
++
++static void au_call_lkup_one(void *args)
++{
++ struct au_lkup_one_args *a = args;
++ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
++}
++
++#define AuLkup_ALLOW_NEG 1
++#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
++#define au_fset_lkup(flags, name) { (flags) |= AuLkup_##name; }
++#define au_fclr_lkup(flags, name) { (flags) &= ~AuLkup_##name; }
++
++struct au_do_lookup_args {
++ unsigned int flags;
++ mode_t type;
++ struct nameidata *nd;
++};
++
++/*
++ * returns positive/negative dentry, NULL or an error.
++ * NULL means whiteout-ed or not-found.
++ */
++static struct dentry*
++au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
++ aufs_bindex_t bindex, struct qstr *wh_name,
++ struct au_do_lookup_args *args)
++{
++ struct dentry *h_dentry;
++ struct inode *h_inode, *inode;
++ struct qstr *name;
++ struct au_branch *br;
++ int wh_found, opq;
++ unsigned char wh_able;
++ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
++
++ name = &dentry->d_name;
++ wh_found = 0;
++ br = au_sbr(dentry->d_sb, bindex);
++ wh_able = !!au_br_whable(br->br_perm);
++ if (wh_able)
++ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
++ h_dentry = ERR_PTR(wh_found);
++ if (!wh_found)
++ goto real_lookup;
++ if (unlikely(wh_found < 0))
++ goto out;
++
++ /* We found a whiteout */
++ /* au_set_dbend(dentry, bindex); */
++ au_set_dbwh(dentry, bindex);
++ if (!allow_neg)
++ return NULL; /* success */
++
++ real_lookup:
++ h_dentry = au_lkup_one(name, h_parent, br, args->nd);
++ if (IS_ERR(h_dentry))
++ goto out;
++
++ h_inode = h_dentry->d_inode;
++ if (!h_inode) {
++ if (!allow_neg)
++ goto out_neg;
++ } else if (wh_found
++ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
++ goto out_neg;
++
++ if (au_dbend(dentry) <= bindex)
++ au_set_dbend(dentry, bindex);
++ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++ au_set_dbstart(dentry, bindex);
++ au_set_h_dptr(dentry, bindex, h_dentry);
++
++ inode = dentry->d_inode;
++ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
++ || (inode && !S_ISDIR(inode->i_mode)))
++ goto out; /* success */
++
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ opq = au_diropq_test(h_dentry, br);
++ mutex_unlock(&h_inode->i_mutex);
++ if (opq > 0)
++ au_set_dbdiropq(dentry, bindex);
++ else if (unlikely(opq < 0)) {
++ au_set_h_dptr(dentry, bindex, NULL);
++ h_dentry = ERR_PTR(opq);
++ }
++ goto out;
++
++ out_neg:
++ dput(h_dentry);
++ h_dentry = NULL;
++ out:
++ return h_dentry;
++}
++
++static int au_test_shwh(struct super_block *sb, const struct qstr *name)
++{
++ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
++ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
++ return -EPERM;
++ return 0;
++}
++
++/*
++ * returns the number of lower positive dentries,
++ * otherwise an error.
++ * can be called at unlinking with @type is zero.
++ */
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++ struct nameidata *nd)
++{
++ int npositive, err;
++ aufs_bindex_t bindex, btail, bdiropq;
++ unsigned char isdir;
++ struct qstr whname;
++ struct au_do_lookup_args args = {
++ .flags = 0,
++ .type = type,
++ .nd = nd
++ };
++ const struct qstr *name = &dentry->d_name;
++ struct dentry *parent;
++ struct inode *inode;
++
++ parent = dget_parent(dentry);
++ err = au_test_shwh(dentry->d_sb, name);
++ if (unlikely(err))
++ goto out;
++
++ err = au_wh_name_alloc(&whname, name);
++ if (unlikely(err))
++ goto out;
++
++ inode = dentry->d_inode;
++ isdir = !!(inode && S_ISDIR(inode->i_mode));
++ if (!type)
++ au_fset_lkup(args.flags, ALLOW_NEG);
++
++ npositive = 0;
++ btail = au_dbtaildir(parent);
++ for (bindex = bstart; bindex <= btail; bindex++) {
++ struct dentry *h_parent, *h_dentry;
++ struct inode *h_inode, *h_dir;
++
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry) {
++ if (h_dentry->d_inode)
++ npositive++;
++ if (type != S_IFDIR)
++ break;
++ continue;
++ }
++ h_parent = au_h_dptr(parent, bindex);
++ if (!h_parent)
++ continue;
++ h_dir = h_parent->d_inode;
++ if (!h_dir || !S_ISDIR(h_dir->i_mode))
++ continue;
++
++ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
++ &args);
++ mutex_unlock(&h_dir->i_mutex);
++ err = PTR_ERR(h_dentry);
++ if (IS_ERR(h_dentry))
++ goto out_wh;
++ au_fclr_lkup(args.flags, ALLOW_NEG);
++
++ if (au_dbwh(dentry) >= 0)
++ break;
++ if (!h_dentry)
++ continue;
++ h_inode = h_dentry->d_inode;
++ if (!h_inode)
++ continue;
++ npositive++;
++ if (!args.type)
++ args.type = h_inode->i_mode & S_IFMT;
++ if (args.type != S_IFDIR)
++ break;
++ else if (isdir) {
++ /* the type of lower may be different */
++ bdiropq = au_dbdiropq(dentry);
++ if (bdiropq >= 0 && bdiropq <= bindex)
++ break;
++ }
++ }
++
++ if (npositive) {
++ AuLabel(positive);
++ au_update_dbstart(dentry);
++ }
++ err = npositive;
++ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++ && au_dbstart(dentry) < 0))
++ /* both of real entry and whiteout found */
++ err = -EIO;
++
++ out_wh:
++ kfree(whname.name);
++ out:
++ dput(parent);
++ return err;
++}
++
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++ struct au_branch *br)
++{
++ struct dentry *dentry;
++ int wkq_err;
++
++ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
++ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
++ else {
++ struct au_lkup_one_args args = {
++ .errp = &dentry,
++ .name = name,
++ .h_parent = parent,
++ .br = br,
++ .nd = NULL
++ };
++
++ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
++ if (unlikely(wkq_err))
++ dentry = ERR_PTR(wkq_err);
++ }
++
++ return dentry;
++}
++
++/*
++ * lookup @dentry on @bindex which should be negative.
++ */
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ int err;
++ struct dentry *parent, *h_parent, *h_dentry;
++ struct qstr *name;
++
++ name = &dentry->d_name;
++ parent = dget_parent(dentry);
++ h_parent = au_h_dptr(parent, bindex);
++ h_dentry = au_sio_lkup_one(name, h_parent,
++ au_sbr(dentry->d_sb, bindex));
++ err = PTR_ERR(h_dentry);
++ if (IS_ERR(h_dentry))
++ goto out;
++ if (unlikely(h_dentry->d_inode)) {
++ err = -EIO;
++ AuIOErr("b%d %.*s should be negative.\n",
++ bindex, AuDLNPair(h_dentry));
++ dput(h_dentry);
++ goto out;
++ }
++
++ if (bindex < au_dbstart(dentry))
++ au_set_dbstart(dentry, bindex);
++ if (au_dbend(dentry) < bindex)
++ au_set_dbend(dentry, bindex);
++ au_set_h_dptr(dentry, bindex, h_dentry);
++ err = 0;
++
++ out:
++ dput(parent);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* subset of struct inode */
++struct au_iattr {
++ unsigned long i_ino;
++ /* unsigned int i_nlink; */
++ uid_t i_uid;
++ gid_t i_gid;
++ u64 i_version;
++/*
++ loff_t i_size;
++ blkcnt_t i_blocks;
++*/
++ umode_t i_mode;
++};
++
++static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
++{
++ ia->i_ino = h_inode->i_ino;
++ /* ia->i_nlink = h_inode->i_nlink; */
++ ia->i_uid = h_inode->i_uid;
++ ia->i_gid = h_inode->i_gid;
++ ia->i_version = h_inode->i_version;
++/*
++ ia->i_size = h_inode->i_size;
++ ia->i_blocks = h_inode->i_blocks;
++*/
++ ia->i_mode = (h_inode->i_mode & S_IFMT);
++}
++
++static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
++{
++ return ia->i_ino != h_inode->i_ino
++ /* || ia->i_nlink != h_inode->i_nlink */
++ || ia->i_uid != h_inode->i_uid
++ || ia->i_gid != h_inode->i_gid
++ || ia->i_version != h_inode->i_version
++/*
++ || ia->i_size != h_inode->i_size
++ || ia->i_blocks != h_inode->i_blocks
++*/
++ || ia->i_mode != (h_inode->i_mode & S_IFMT);
++}
++
++static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
++ struct au_branch *br)
++{
++ int err;
++ struct au_iattr ia;
++ struct inode *h_inode;
++ struct dentry *h_d;
++
++ memset(&ia, -1, sizeof(ia));
++ h_inode = h_dentry->d_inode;
++ if (h_inode)
++ au_iattr_save(&ia, h_inode);
++
++ /* main purpose is namei.c:cached_lookup() and d_revalidate */
++ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
++ err = PTR_ERR(h_d);
++ if (IS_ERR(h_d))
++ goto out;
++
++ /* fuse d_revalidate always return 0 for negative dentries */
++ err = 0;
++ if (unlikely((h_d != h_dentry
++ || h_d->d_inode != h_inode
++ || (h_inode && au_iattr_test(&ia, h_inode)))
++ && !au_test_fuse(h_parent->d_sb)))
++ err = au_busy_or_stale();
++ dput(h_d);
++
++ out:
++ return err;
++}
++
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++ struct dentry *h_parent, struct au_branch *br)
++{
++ int err;
++
++ err = 0;
++ if (udba == AuOpt_UDBA_REVAL) {
++ IMustLock(h_dir);
++ err = (h_dentry->d_parent->d_inode != h_dir);
++ } else if (udba == AuOpt_UDBA_HINOTIFY)
++ err = au_h_verify_dentry(h_dentry, h_parent, br);
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_do_refresh_hdentry(struct au_hdentry *p, struct au_dinfo *dinfo,
++ struct dentry *parent)
++{
++ struct dentry *h_d, *h_dp;
++ struct au_hdentry tmp, *q;
++ struct super_block *sb;
++ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
++
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ bend = dinfo->di_bend;
++ bwh = dinfo->di_bwh;
++ bdiropq = dinfo->di_bdiropq;
++ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
++ h_d = p->hd_dentry;
++ if (!h_d)
++ continue;
++
++ h_dp = dget_parent(h_d);
++ if (h_dp == au_h_dptr(parent, bindex)) {
++ dput(h_dp);
++ continue;
++ }
++
++ new_bindex = au_find_dbindex(parent, h_dp);
++ dput(h_dp);
++ if (dinfo->di_bwh == bindex)
++ bwh = new_bindex;
++ if (dinfo->di_bdiropq == bindex)
++ bdiropq = new_bindex;
++ if (new_bindex < 0) {
++ au_hdput(p);
++ p->hd_dentry = NULL;
++ continue;
++ }
++
++ /* swap two lower dentries, and loop again */
++ q = dinfo->di_hdentry + new_bindex;
++ tmp = *q;
++ *q = *p;
++ *p = tmp;
++ if (tmp.hd_dentry) {
++ bindex--;
++ p--;
++ }
++ }
++
++ sb = parent->d_sb;
++ dinfo->di_bwh = -1;
++ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
++ dinfo->di_bwh = bwh;
++
++ dinfo->di_bdiropq = -1;
++ if (bdiropq >= 0
++ && bdiropq <= au_sbend(sb)
++ && au_sbr_whable(sb, bdiropq))
++ dinfo->di_bdiropq = bdiropq;
++
++ bend = au_dbend(parent);
++ p = dinfo->di_hdentry;
++ for (bindex = 0; bindex <= bend; bindex++, p++)
++ if (p->hd_dentry) {
++ dinfo->di_bstart = bindex;
++ break;
++ }
++
++ p = dinfo->di_hdentry + bend;
++ for (bindex = bend; bindex >= 0; bindex--, p--)
++ if (p->hd_dentry) {
++ dinfo->di_bend = bindex;
++ break;
++ }
++}
++
++/*
++ * returns the number of found lower positive dentries,
++ * otherwise an error.
++ */
++int au_refresh_hdentry(struct dentry *dentry, mode_t type)
++{
++ int npositive, err;
++ unsigned int sigen;
++ aufs_bindex_t bstart;
++ struct au_dinfo *dinfo;
++ struct super_block *sb;
++ struct dentry *parent;
++
++ DiMustWriteLock(dentry);
++
++ sb = dentry->d_sb;
++ AuDebugOn(IS_ROOT(dentry));
++ sigen = au_sigen(sb);
++ parent = dget_parent(dentry);
++ AuDebugOn(au_digen(parent) != sigen
++ || au_iigen(parent->d_inode) != sigen);
++
++ dinfo = au_di(dentry);
++ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
++ npositive = err;
++ if (unlikely(err))
++ goto out;
++ au_do_refresh_hdentry(dinfo->di_hdentry + dinfo->di_bstart, dinfo,
++ parent);
++
++ npositive = 0;
++ bstart = au_dbstart(parent);
++ if (type != S_IFDIR && dinfo->di_bstart == bstart)
++ goto out_dgen; /* success */
++
++ npositive = au_lkup_dentry(dentry, bstart, type, /*nd*/NULL);
++ if (npositive < 0)
++ goto out;
++ if (dinfo->di_bwh >= 0 && dinfo->di_bwh <= dinfo->di_bstart)
++ d_drop(dentry);
++
++ out_dgen:
++ au_update_digen(dentry);
++ out:
++ dput(parent);
++ AuTraceErr(npositive);
++ return npositive;
++}
++
++static noinline_for_stack
++int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
++ struct dentry *dentry, aufs_bindex_t bindex)
++{
++ int err, valid;
++ int (*reval)(struct dentry *, struct nameidata *);
++
++ err = 0;
++ reval = NULL;
++ if (h_dentry->d_op)
++ reval = h_dentry->d_op->d_revalidate;
++ if (!reval)
++ goto out;
++
++ AuDbg("b%d\n", bindex);
++ if (au_test_fs_null_nd(h_dentry->d_sb))
++ /* it may return tri-state */
++ valid = reval(h_dentry, NULL);
++ else {
++ struct nameidata h_nd;
++ int locked;
++ struct dentry *parent;
++
++ au_h_nd(&h_nd, nd);
++ parent = nd->path.dentry;
++ locked = (nd && nd->path.dentry != dentry);
++ if (locked)
++ di_read_lock_parent(parent, AuLock_IR);
++ BUG_ON(bindex > au_dbend(parent));
++ h_nd.path.dentry = au_h_dptr(parent, bindex);
++ BUG_ON(!h_nd.path.dentry);
++ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
++ path_get(&h_nd.path);
++ valid = reval(h_dentry, &h_nd);
++ path_put(&h_nd.path);
++ if (locked)
++ di_read_unlock(parent, AuLock_IR);
++ }
++
++ if (unlikely(valid < 0))
++ err = valid;
++ else if (!valid)
++ err = -EINVAL;
++
++ out:
++ AuTraceErr(err);
++ return err;
++}
++
++/* todo: remove this */
++static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
++ struct nameidata *nd, int do_udba)
++{
++ int err;
++ umode_t mode, h_mode;
++ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
++ unsigned char plus, unhashed, is_root, h_plus;
++ struct inode *first, *h_inode, *h_cached_inode;
++ struct dentry *h_dentry;
++ struct qstr *name, *h_name;
++
++ err = 0;
++ plus = 0;
++ mode = 0;
++ first = NULL;
++ ibs = -1;
++ ibe = -1;
++ unhashed = !!d_unhashed(dentry);
++ is_root = !!IS_ROOT(dentry);
++ name = &dentry->d_name;
++
++ /*
++ * Theoretically, REVAL test should be unnecessary in case of INOTIFY.
++ * But inotify doesn't fire some necessary events,
++ * IN_ATTRIB for atime/nlink/pageio
++ * IN_DELETE for NFS dentry
++ * Let's do REVAL test too.
++ */
++ if (do_udba && inode) {
++ mode = (inode->i_mode & S_IFMT);
++ plus = (inode->i_nlink > 0);
++ first = au_h_iptr(inode, au_ibstart(inode));
++ ibs = au_ibstart(inode);
++ ibe = au_ibend(inode);
++ }
++
++ bstart = au_dbstart(dentry);
++ btail = bstart;
++ if (inode && S_ISDIR(inode->i_mode))
++ btail = au_dbtaildir(dentry);
++ for (bindex = bstart; bindex <= btail; bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (!h_dentry)
++ continue;
++
++ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
++ h_name = &h_dentry->d_name;
++ if (unlikely(do_udba
++ && !is_root
++ && (unhashed != !!d_unhashed(h_dentry)
++ || name->len != h_name->len
++ || memcmp(name->name, h_name->name, name->len))
++ )) {
++ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
++ unhashed, d_unhashed(h_dentry),
++ AuDLNPair(dentry), AuDLNPair(h_dentry));
++ goto err;
++ }
++
++ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
++ if (unlikely(err))
++ /* do not goto err, to keep the errno */
++ break;
++
++ /* todo: plink too? */
++ if (!do_udba)
++ continue;
++
++ /* UDBA tests */
++ h_inode = h_dentry->d_inode;
++ if (unlikely(!!inode != !!h_inode))
++ goto err;
++
++ h_plus = plus;
++ h_mode = mode;
++ h_cached_inode = h_inode;
++ if (h_inode) {
++ h_mode = (h_inode->i_mode & S_IFMT);
++ h_plus = (h_inode->i_nlink > 0);
++ }
++ if (inode && ibs <= bindex && bindex <= ibe)
++ h_cached_inode = au_h_iptr(inode, bindex);
++
++ if (unlikely(plus != h_plus
++ || mode != h_mode
++ || h_cached_inode != h_inode))
++ goto err;
++ continue;
++
++ err:
++ err = -EINVAL;
++ break;
++ }
++
++ return err;
++}
++
++static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++ int err;
++ struct dentry *parent;
++ struct inode *inode;
++
++ inode = dentry->d_inode;
++ if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
++ return 0;
++
++ parent = dget_parent(dentry);
++ di_read_lock_parent(parent, AuLock_IR);
++ AuDebugOn(au_digen(parent) != sigen
++ || au_iigen(parent->d_inode) != sigen);
++ au_dbg_verify_gen(parent, sigen);
++
++ /* returns a number of positive dentries */
++ err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
++ if (err >= 0)
++ err = au_refresh_hinode(inode, dentry);
++
++ di_read_unlock(parent, AuLock_IR);
++ dput(parent);
++ return err;
++}
++
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++ int err;
++ struct dentry *d, *parent;
++ struct inode *inode;
++
++ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS))
++ return simple_reval_dpath(dentry, sigen);
++
++ /* slow loop, keep it simple and stupid */
++ /* cf: au_cpup_dirs() */
++ err = 0;
++ parent = NULL;
++ while (au_digen(dentry) != sigen
++ || au_iigen(dentry->d_inode) != sigen) {
++ d = dentry;
++ while (1) {
++ dput(parent);
++ parent = dget_parent(d);
++ if (au_digen(parent) == sigen
++ && au_iigen(parent->d_inode) == sigen)
++ break;
++ d = parent;
++ }
++
++ inode = d->d_inode;
++ if (d != dentry)
++ di_write_lock_child(d);
++
++ /* someone might update our dentry while we were sleeping */
++ if (au_digen(d) != sigen || au_iigen(d->d_inode) != sigen) {
++ di_read_lock_parent(parent, AuLock_IR);
++ /* returns a number of positive dentries */
++ err = au_refresh_hdentry(d, inode->i_mode & S_IFMT);
++ if (err >= 0)
++ err = au_refresh_hinode(inode, d);
++ di_read_unlock(parent, AuLock_IR);
++ }
++
++ if (d != dentry)
++ di_write_unlock(d);
++ dput(parent);
++ if (unlikely(err))
++ break;
++ }
++
++ return err;
++}
++
++/*
++ * if valid returns 1, otherwise 0.
++ */
++static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++ int valid, err;
++ unsigned int sigen;
++ unsigned char do_udba;
++ struct super_block *sb;
++ struct inode *inode;
++
++ err = -EINVAL;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW);
++ sigen = au_sigen(sb);
++ if (au_digen(dentry) != sigen) {
++ AuDebugOn(IS_ROOT(dentry));
++ if (inode)
++ err = au_reval_dpath(dentry, sigen);
++ if (unlikely(err))
++ goto out_dgrade;
++ AuDebugOn(au_digen(dentry) != sigen);
++ }
++ if (inode && au_iigen(inode) != sigen) {
++ AuDebugOn(IS_ROOT(dentry));
++ err = au_refresh_hinode(inode, dentry);
++ if (unlikely(err))
++ goto out_dgrade;
++ AuDebugOn(au_iigen(inode) != sigen);
++ }
++ di_downgrade_lock(dentry, AuLock_IR);
++
++ AuDebugOn(au_digen(dentry) != sigen);
++ AuDebugOn(inode && au_iigen(inode) != sigen);
++ err = -EINVAL;
++ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
++ if (do_udba && inode) {
++ aufs_bindex_t bstart = au_ibstart(inode);
++
++ if (bstart >= 0
++ && au_test_higen(inode, au_h_iptr(inode, bstart)))
++ goto out;
++ }
++
++ err = h_d_revalidate(dentry, inode, nd, do_udba);
++ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0))
++ /* both of real entry and whiteout found */
++ err = -EIO;
++ goto out;
++
++ out_dgrade:
++ di_downgrade_lock(dentry, AuLock_IR);
++ out:
++ au_store_oflag(nd, inode);
++ aufs_read_unlock(dentry, AuLock_IR);
++ AuTraceErr(err);
++ valid = !err;
++ if (!valid)
++ AuDbg("%.*s invalid\n", AuDLNPair(dentry));
++ return valid;
++}
++
++static void aufs_d_release(struct dentry *dentry)
++{
++ struct au_dinfo *dinfo;
++ aufs_bindex_t bend, bindex;
++
++ dinfo = dentry->d_fsdata;
++ if (!dinfo)
++ return;
++
++ /* dentry may not be revalidated */
++ bindex = dinfo->di_bstart;
++ if (bindex >= 0) {
++ struct au_hdentry *p;
++
++ bend = dinfo->di_bend;
++ p = dinfo->di_hdentry + bindex;
++ while (bindex++ <= bend) {
++ if (p->hd_dentry)
++ au_hdput(p);
++ p++;
++ }
++ }
++ kfree(dinfo->di_hdentry);
++ AuRwDestroy(&dinfo->di_rwsem);
++ au_cache_free_dinfo(dinfo);
++ au_hin_di_reinit(dentry);
++}
++
++struct dentry_operations aufs_dop = {
++ .d_revalidate = aufs_d_revalidate,
++ .d_release = aufs_d_release
++};
+diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h
+new file mode 100644
+index 0000000..893288c
+--- /dev/null
++++ b/fs/aufs/dentry.h
+@@ -0,0 +1,228 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#ifndef __AUFS_DENTRY_H__
++#define __AUFS_DENTRY_H__
++
++#ifdef __KERNEL__
++
++#include <linux/dcache.h>
++#include <linux/aufs_type.h>
++#include "rwsem.h"
++
++/* make a single member structure for future use */
++/* todo: remove this structure */
++struct au_hdentry {
++ struct dentry *hd_dentry;
++};
++
++struct au_dinfo {
++ atomic_t di_generation;
++
++ struct au_rwsem di_rwsem;
++ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
++ struct au_hdentry *di_hdentry;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dentry.c */
++extern struct dentry_operations aufs_dop;
++struct au_branch;
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++ struct au_branch *br, struct nameidata *nd);
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++ struct au_branch *br);
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++ struct dentry *h_parent, struct au_branch *br);
++
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++ struct nameidata *nd);
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
++int au_refresh_hdentry(struct dentry *dentry, mode_t type);
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
++
++/* dinfo.c */
++int au_alloc_dinfo(struct dentry *dentry);
++int au_di_realloc(struct au_dinfo *dinfo, int nbr);
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
++void di_read_unlock(struct dentry *d, int flags);
++void di_downgrade_lock(struct dentry *d, int flags);
++void di_write_lock(struct dentry *d, unsigned int lsc);
++void di_write_unlock(struct dentry *d);
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_unlock2(struct dentry *d1, struct dentry *d2);
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
++aufs_bindex_t au_dbtail(struct dentry *dentry);
++aufs_bindex_t au_dbtaildir(struct dentry *dentry);
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++ struct dentry *h_dentry);
++void au_update_digen(struct dentry *dentry);
++void au_update_dbrange(struct dentry *dentry, int do_put_zero);
++void au_update_dbstart(struct dentry *dentry);
++void au_update_dbend(struct dentry *dentry);
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_dinfo *au_di(struct dentry *dentry)
++{
++ return dentry->d_fsdata;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for dinfo */
++enum {
++ AuLsc_DI_CHILD, /* child first */
++ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hinotify */
++ AuLsc_DI_CHILD3, /* copyup dirs */
++ AuLsc_DI_PARENT,
++ AuLsc_DI_PARENT2,
++ AuLsc_DI_PARENT3
++};
++
++/*
++ * di_read_lock_child, di_write_lock_child,
++ * di_read_lock_child2, di_write_lock_child2,
++ * di_read_lock_child3, di_write_lock_child3,
++ * di_read_lock_parent, di_write_lock_parent,
++ * di_read_lock_parent2, di_write_lock_parent2,
++ * di_read_lock_parent3, di_write_lock_parent3,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void di_read_lock_##name(struct dentry *d, int flags) \
++{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
++
++#define AuWriteLockFunc(name, lsc) \
++static inline void di_write_lock_##name(struct dentry *d) \
++{ di_write_lock(d, AuLsc_DI_##lsc); }
++
++#define AuRWLockFuncs(name, lsc) \
++ AuReadLockFunc(name, lsc) \
++ AuWriteLockFunc(name, lsc)
++
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
++
++#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
++#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
++#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: memory barrier? */
++static inline unsigned int au_digen(struct dentry *d)
++{
++ return atomic_read(&au_di(d)->di_generation);
++}
++
++static inline void au_h_dentry_init(struct au_hdentry *hdentry)
++{
++ hdentry->hd_dentry = NULL;
++}
++
++static inline void au_hdput(struct au_hdentry *hd)
++{
++ dput(hd->hd_dentry);
++}
++
++static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
++{
++ DiMustAnyLock(dentry);
++ return au_di(dentry)->di_bstart;
++}
++
++static inline aufs_bindex_t au_dbend(struct dentry *dentry)
++{
++ DiMustAnyLock(dentry);
++ return au_di(dentry)->di_bend;
++}
++
++static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
++{
++ DiMustAnyLock(dentry);
++ return au_di(dentry)->di_bwh;
++}
++
++static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
++{
++ DiMustAnyLock(dentry);
++ return au_di(dentry)->di_bdiropq;
++}
++
++/* todo: hard/soft set? */
++static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ DiMustWriteLock(dentry);
++ au_di(dentry)->di_bstart = bindex;
++}
++
++static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ DiMustWriteLock(dentry);
++ au_di(dentry)->di_bend = bindex;
++}
++
++static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ DiMustWriteLock(dentry);
++ /* dbwh can be outside of bstart - bend range */
++ au_di(dentry)->di_bwh = bindex;
++}
++
++static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ DiMustWriteLock(dentry);
++ au_di(dentry)->di_bdiropq = bindex;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_HINOTIFY
++static inline void au_digen_dec(struct dentry *d)
++{
++ atomic_dec_return(&au_di(d)->di_generation);
++}
++
++static inline void au_hin_di_reinit(struct dentry *dentry)
++{
++ dentry->d_fsdata = NULL;
++}
++#else
++AuStubVoid(au_hin_di_reinit, struct dentry *dentry __maybe_unused)
++#endif /* CONFIG_AUFS_HINOTIFY */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DENTRY_H__ */
+diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
+new file mode 100644
+index 0000000..0010c99
+--- /dev/null
++++ b/fs/aufs/dinfo.c
+@@ -0,0 +1,367 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * dentry private data
++ */
++
++#include "aufs.h"
++
++int au_alloc_dinfo(struct dentry *dentry)
++{
++ struct au_dinfo *dinfo;
++ struct super_block *sb;
++ int nbr;
++
++ dinfo = au_cache_alloc_dinfo();
++ if (unlikely(!dinfo))
++ goto out;
++
++ sb = dentry->d_sb;
++ nbr = au_sbend(sb) + 1;
++ if (nbr <= 0)
++ nbr = 1;
++ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
++ if (unlikely(!dinfo->di_hdentry))
++ goto out_dinfo;
++
++ atomic_set(&dinfo->di_generation, au_sigen(sb));
++ /* smp_mb(); */ /* atomic_set */
++ au_rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD);
++ dinfo->di_bstart = -1;
++ dinfo->di_bend = -1;
++ dinfo->di_bwh = -1;
++ dinfo->di_bdiropq = -1;
++
++ dentry->d_fsdata = dinfo;
++ dentry->d_op = &aufs_dop;
++ return 0; /* success */
++
++ out_dinfo:
++ au_cache_free_dinfo(dinfo);
++ out:
++ return -ENOMEM;
++}
++
++int au_di_realloc(struct au_dinfo *dinfo, int nbr)
++{
++ int err, sz;
++ struct au_hdentry *hdp;
++
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ err = -ENOMEM;
++ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
++ if (!sz)
++ sz = sizeof(*hdp);
++ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
++ if (hdp) {
++ dinfo->di_hdentry = hdp;
++ err = 0;
++ }
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
++{
++ switch (lsc) {
++ case AuLsc_DI_CHILD:
++ ii_write_lock_child(inode);
++ break;
++ case AuLsc_DI_CHILD2:
++ ii_write_lock_child2(inode);
++ break;
++ case AuLsc_DI_CHILD3:
++ ii_write_lock_child3(inode);
++ break;
++ case AuLsc_DI_PARENT:
++ ii_write_lock_parent(inode);
++ break;
++ case AuLsc_DI_PARENT2:
++ ii_write_lock_parent2(inode);
++ break;
++ case AuLsc_DI_PARENT3:
++ ii_write_lock_parent3(inode);
++ break;
++ default:
++ BUG();
++ }
++}
++
++static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
++{
++ switch (lsc) {
++ case AuLsc_DI_CHILD:
++ ii_read_lock_child(inode);
++ break;
++ case AuLsc_DI_CHILD2:
++ ii_read_lock_child2(inode);
++ break;
++ case AuLsc_DI_CHILD3:
++ ii_read_lock_child3(inode);
++ break;
++ case AuLsc_DI_PARENT:
++ ii_read_lock_parent(inode);
++ break;
++ case AuLsc_DI_PARENT2:
++ ii_read_lock_parent2(inode);
++ break;
++ case AuLsc_DI_PARENT3:
++ ii_read_lock_parent3(inode);
++ break;
++ default:
++ BUG();
++ }
++}
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
++{
++ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
++ if (d->d_inode) {
++ if (au_ftest_lock(flags, IW))
++ do_ii_write_lock(d->d_inode, lsc);
++ else if (au_ftest_lock(flags, IR))
++ do_ii_read_lock(d->d_inode, lsc);
++ }
++}
++
++void di_read_unlock(struct dentry *d, int flags)
++{
++ if (d->d_inode) {
++ if (au_ftest_lock(flags, IW))
++ ii_write_unlock(d->d_inode);
++ else if (au_ftest_lock(flags, IR))
++ ii_read_unlock(d->d_inode);
++ }
++ au_rw_read_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_downgrade_lock(struct dentry *d, int flags)
++{
++ if (d->d_inode && au_ftest_lock(flags, IR))
++ ii_downgrade_lock(d->d_inode);
++ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock(struct dentry *d, unsigned int lsc)
++{
++ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
++ if (d->d_inode)
++ do_ii_write_lock(d->d_inode, lsc);
++}
++
++void di_write_unlock(struct dentry *d)
++{
++ if (d->d_inode)
++ ii_write_unlock(d->d_inode);
++ au_rw_write_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
++{
++ AuDebugOn(d1 == d2
++ || d1->d_inode == d2->d_inode
++ || d1->d_sb != d2->d_sb);
++
++ if (isdir && au_test_subdir(d1, d2)) {
++ di_write_lock_child(d1);
++ di_write_lock_child2(d2);
++ } else {
++ /* there should be no races */
++ di_write_lock_child(d2);
++ di_write_lock_child2(d1);
++ }
++}
++
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
++{
++ AuDebugOn(d1 == d2
++ || d1->d_inode == d2->d_inode
++ || d1->d_sb != d2->d_sb);
++
++ if (isdir && au_test_subdir(d1, d2)) {
++ di_write_lock_parent(d1);
++ di_write_lock_parent2(d2);
++ } else {
++ /* there should be no races */
++ di_write_lock_parent(d2);
++ di_write_lock_parent2(d1);
++ }
++}
++
++void di_write_unlock2(struct dentry *d1, struct dentry *d2)
++{
++ di_write_unlock(d1);
++ if (d1->d_inode == d2->d_inode)
++ au_rw_write_unlock(&au_di(d2)->di_rwsem);
++ else
++ di_write_unlock(d2);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ struct dentry *d;
++
++ DiMustAnyLock(dentry);
++
++ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++ return NULL;
++ AuDebugOn(bindex < 0);
++ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
++ AuDebugOn(d && (atomic_read(&d->d_count) <= 0));
++ return d;
++}
++
++aufs_bindex_t au_dbtail(struct dentry *dentry)
++{
++ aufs_bindex_t bend, bwh;
++
++ bend = au_dbend(dentry);
++ if (0 <= bend) {
++ bwh = au_dbwh(dentry);
++ if (!bwh)
++ return bwh;
++ if (0 < bwh && bwh < bend)
++ return bwh - 1;
++ }
++ return bend;
++}
++
++aufs_bindex_t au_dbtaildir(struct dentry *dentry)
++{
++ aufs_bindex_t bend, bopq;
++
++ bend = au_dbtail(dentry);
++ if (0 <= bend) {
++ bopq = au_dbdiropq(dentry);
++ if (0 <= bopq && bopq < bend)
++ bend = bopq;
++ }
++ return bend;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++ struct dentry *h_dentry)
++{
++ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
++
++ DiMustWriteLock(dentry);
++
++ if (hd->hd_dentry)
++ au_hdput(hd);
++ hd->hd_dentry = h_dentry;
++}
++
++void au_update_digen(struct dentry *dentry)
++{
++ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
++ /* smp_mb(); */ /* atomic_set */
++}
++
++void au_update_dbrange(struct dentry *dentry, int do_put_zero)
++{
++ struct au_dinfo *dinfo;
++ struct dentry *h_d;
++
++ DiMustWriteLock(dentry);
++
++ dinfo = au_di(dentry);
++ if (!dinfo || dinfo->di_bstart < 0)
++ return;
++
++ if (do_put_zero) {
++ aufs_bindex_t bindex, bend;
++
++ bend = dinfo->di_bend;
++ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
++ h_d = dinfo->di_hdentry[0 + bindex].hd_dentry;
++ if (h_d && !h_d->d_inode)
++ au_set_h_dptr(dentry, bindex, NULL);
++ }
++ }
++
++ dinfo->di_bstart = -1;
++ while (++dinfo->di_bstart <= dinfo->di_bend)
++ if (dinfo->di_hdentry[0 + dinfo->di_bstart].hd_dentry)
++ break;
++ if (dinfo->di_bstart > dinfo->di_bend) {
++ dinfo->di_bstart = -1;
++ dinfo->di_bend = -1;
++ return;
++ }
++
++ dinfo->di_bend++;
++ while (0 <= --dinfo->di_bend)
++ if (dinfo->di_hdentry[0 + dinfo->di_bend].hd_dentry)
++ break;
++ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
++}
++
++void au_update_dbstart(struct dentry *dentry)
++{
++ aufs_bindex_t bindex, bend;
++ struct dentry *h_dentry;
++
++ bend = au_dbend(dentry);
++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (!h_dentry)
++ continue;
++ if (h_dentry->d_inode) {
++ au_set_dbstart(dentry, bindex);
++ return;
++ }
++ au_set_h_dptr(dentry, bindex, NULL);
++ }
++}
++
++void au_update_dbend(struct dentry *dentry)
++{
++ aufs_bindex_t bindex, bstart;
++ struct dentry *h_dentry;
++
++ bstart = au_dbstart(dentry);
++ for (bindex = au_dbend(dentry); bindex <= bstart; bindex--) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (!h_dentry)
++ continue;
++ if (h_dentry->d_inode) {
++ au_set_dbend(dentry, bindex);
++ return;
++ }
++ au_set_h_dptr(dentry, bindex, NULL);
++ }
++}
++
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
++{
++ aufs_bindex_t bindex, bend;
++
++ bend = au_dbend(dentry);
++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
++ if (au_h_dptr(dentry, bindex) == h_dentry)
++ return bindex;
++ return -1;
++}
+diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c
+new file mode 100644
+index 0000000..c9d8284
+--- /dev/null
++++ b/fs/aufs/dir.c
+@@ -0,0 +1,579 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * directory operations
++ */
++
++#include <linux/file.h>
++#include <linux/fs_stack.h>
++#include "aufs.h"
++
++void au_add_nlink(struct inode *dir, struct inode *h_dir)
++{
++ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++ dir->i_nlink += h_dir->i_nlink - 2;
++ if (h_dir->i_nlink < 2)
++ dir->i_nlink += 2;
++}
++
++void au_sub_nlink(struct inode *dir, struct inode *h_dir)
++{
++ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++ dir->i_nlink -= h_dir->i_nlink - 2;
++ if (h_dir->i_nlink < 2)
++ dir->i_nlink -= 2;
++}
++
++loff_t au_dir_size(struct file *file, struct dentry *dentry)
++{
++ loff_t sz;
++ aufs_bindex_t bindex, bend;
++ struct file *h_file;
++ struct dentry *h_dentry;
++
++ sz = 0;
++ if (file) {
++ AuDebugOn(!file->f_dentry);
++ AuDebugOn(!file->f_dentry->d_inode);
++ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
++
++ bend = au_fbend(file);
++ for (bindex = au_fbstart(file);
++ bindex <= bend && sz < KMALLOC_MAX_SIZE;
++ bindex++) {
++ h_file = au_h_fptr(file, bindex);
++ if (h_file
++ && h_file->f_dentry
++ && h_file->f_dentry->d_inode)
++ sz += i_size_read(h_file->f_dentry->d_inode);
++ }
++ } else {
++ AuDebugOn(!dentry);
++ AuDebugOn(!dentry->d_inode);
++ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
++
++ bend = au_dbtaildir(dentry);
++ for (bindex = au_dbstart(dentry);
++ bindex <= bend && sz < KMALLOC_MAX_SIZE;
++ bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry && h_dentry->d_inode)
++ sz += i_size_read(h_dentry->d_inode);
++ }
++ }
++ if (sz < KMALLOC_MAX_SIZE)
++ sz = roundup_pow_of_two(sz);
++ if (sz > KMALLOC_MAX_SIZE)
++ sz = KMALLOC_MAX_SIZE;
++ else if (sz < NAME_MAX) {
++ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
++ sz = AUFS_RDBLK_DEF;
++ }
++ return sz;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int reopen_dir(struct file *file)
++{
++ int err;
++ unsigned int flags;
++ aufs_bindex_t bindex, btail, bstart;
++ struct dentry *dentry, *h_dentry;
++ struct file *h_file;
++
++ /* open all lower dirs */
++ dentry = file->f_dentry;
++ bstart = au_dbstart(dentry);
++ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
++ au_set_h_fptr(file, bindex, NULL);
++ au_set_fbstart(file, bstart);
++
++ btail = au_dbtaildir(dentry);
++ for (bindex = au_fbend(file); btail < bindex; bindex--)
++ au_set_h_fptr(file, bindex, NULL);
++ au_set_fbend(file, btail);
++
++ flags = file->f_flags;
++ for (bindex = bstart; bindex <= btail; bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (!h_dentry)
++ continue;
++ h_file = au_h_fptr(file, bindex);
++ if (h_file)
++ continue;
++
++ h_file = au_h_open(dentry, bindex, flags, file);
++ err = PTR_ERR(h_file);
++ if (IS_ERR(h_file))
++ goto out; /* close all? */
++ au_set_h_fptr(file, bindex, h_file);
++ }
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ err = 0;
++
++ out:
++ return err;
++}
++
++static int do_open_dir(struct file *file, int flags)
++{
++ int err;
++ aufs_bindex_t bindex, btail;
++ struct dentry *dentry, *h_dentry;
++ struct file *h_file;
++
++ FiMustWriteLock(file);
++
++ err = 0;
++ dentry = file->f_dentry;
++ au_set_fvdir_cache(file, NULL);
++ file->f_version = dentry->d_inode->i_version;
++ bindex = au_dbstart(dentry);
++ au_set_fbstart(file, bindex);
++ btail = au_dbtaildir(dentry);
++ au_set_fbend(file, btail);
++ for (; !err && bindex <= btail; bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (!h_dentry)
++ continue;
++
++ h_file = au_h_open(dentry, bindex, flags, file);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ break;
++ }
++ au_set_h_fptr(file, bindex, h_file);
++ }
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ if (!err)
++ return 0; /* success */
++
++ /* close all */
++ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
++ au_set_h_fptr(file, bindex, NULL);
++ au_set_fbstart(file, -1);
++ au_set_fbend(file, -1);
++ return err;
++}
++
++static int aufs_open_dir(struct inode *inode __maybe_unused,
++ struct file *file)
++{
++ return au_do_open(file, do_open_dir);
++}
++
++static int aufs_release_dir(struct inode *inode __maybe_unused,
++ struct file *file)
++{
++ struct au_vdir *vdir_cache;
++ struct super_block *sb;
++
++ sb = file->f_dentry->d_sb;
++ vdir_cache = au_fi(file)->fi_vdir_cache; /* lock-free */
++ if (vdir_cache)
++ au_vdir_free(vdir_cache);
++ au_plink_maint_leave(file);
++ au_finfo_fin(file);
++ return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
++{
++ int err;
++ aufs_bindex_t bend, bindex;
++ struct inode *inode;
++ struct super_block *sb;
++
++ err = 0;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ IMustLock(inode);
++ bend = au_dbend(dentry);
++ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
++ struct path h_path;
++ struct inode *h_inode;
++
++ if (au_test_ro(sb, bindex, inode))
++ continue;
++ h_path.dentry = au_h_dptr(dentry, bindex);
++ if (!h_path.dentry)
++ continue;
++ h_inode = h_path.dentry->d_inode;
++ if (!h_inode)
++ continue;
++
++ /* no mnt_want_write() */
++ /* cf. fs/nsfd/vfs.c and fs/nfsd/nfs4recover.c */
++ /* todo: inotiry fired? */
++ h_path.mnt = au_sbr_mnt(sb, bindex);
++ mutex_lock(&h_inode->i_mutex);
++ err = filemap_fdatawrite(h_inode->i_mapping);
++ AuDebugOn(!h_inode->i_fop);
++ if (!err && h_inode->i_fop->fsync)
++ err = h_inode->i_fop->fsync(NULL, h_path.dentry,
++ datasync);
++ if (!err)
++ err = filemap_fdatawrite(h_inode->i_mapping);
++ if (!err)
++ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++ mutex_unlock(&h_inode->i_mutex);
++ }
++
++ return err;
++}
++
++static int au_do_fsync_dir(struct file *file, int datasync)
++{
++ int err;
++ aufs_bindex_t bend, bindex;
++ struct file *h_file;
++ struct super_block *sb;
++ struct inode *inode;
++ struct mutex *h_mtx;
++
++ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ sb = file->f_dentry->d_sb;
++ inode = file->f_dentry->d_inode;
++ bend = au_fbend(file);
++ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++ h_file = au_h_fptr(file, bindex);
++ if (!h_file || au_test_ro(sb, bindex, inode))
++ continue;
++
++ err = (int)do_fsync(h_file, datasync);
++ if (!err) {
++ h_mtx = &h_file->f_dentry->d_inode->i_mutex;
++ mutex_lock(h_mtx);
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ mutex_unlock(h_mtx);
++ }
++ }
++
++ out:
++ return err;
++}
++
++/*
++ * @file may be NULL
++ */
++static int aufs_fsync_dir(struct file *file, struct dentry *dentry,
++ int datasync)
++{
++ int err;
++ struct super_block *sb;
++
++ IMustLock(dentry->d_inode);
++
++ err = 0;
++ sb = dentry->d_sb;
++ si_noflush_read_lock(sb);
++ if (file)
++ err = au_do_fsync_dir(file, datasync);
++ else {
++ di_write_lock_child(dentry);
++ err = au_do_fsync_dir_no_file(dentry, datasync);
++ }
++ au_cpup_attr_timesizes(dentry->d_inode);
++ di_write_unlock(dentry);
++ if (file)
++ fi_write_unlock(file);
++
++ si_read_unlock(sb);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++ int err;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ inode = dentry->d_inode;
++ IMustLock(inode);
++
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++ err = au_vdir_init(file);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++
++ if (!au_test_nfsd(current)) {
++ err = au_vdir_fill_de(file, dirent, filldir);
++ fsstack_copy_attr_atime(inode,
++ au_h_iptr(inode, au_ibstart(inode)));
++ } else {
++ /*
++ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
++ * encode_fh() and others.
++ */
++ struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
++
++ di_read_unlock(dentry, AuLock_IR);
++ si_read_unlock(sb);
++ lockdep_off();
++ err = au_vdir_fill_de(file, dirent, filldir);
++ lockdep_on();
++ fsstack_copy_attr_atime(inode, h_inode);
++ fi_write_unlock(file);
++
++ AuTraceErr(err);
++ return err;
++ }
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define AuTestEmpty_WHONLY 1
++#define AuTestEmpty_CALLED (1 << 1)
++#define AuTestEmpty_SHWH (1 << 2)
++#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
++#define au_fset_testempty(flags, name) { (flags) |= AuTestEmpty_##name; }
++#define au_fclr_testempty(flags, name) { (flags) &= ~AuTestEmpty_##name; }
++
++#ifndef CONFIG_AUFS_SHWH
++#undef AuTestEmpty_SHWH
++#define AuTestEmpty_SHWH 0
++#endif
++
++struct test_empty_arg {
++ struct au_nhash *whlist;
++ unsigned int flags;
++ int err;
++ aufs_bindex_t bindex;
++};
++
++static int test_empty_cb(void *__arg, const char *__name, int namelen,
++ loff_t offset __maybe_unused, u64 ino,
++ unsigned int d_type)
++{
++ struct test_empty_arg *arg = __arg;
++ char *name = (void *)__name;
++
++ arg->err = 0;
++ au_fset_testempty(arg->flags, CALLED);
++ /* smp_mb(); */
++ if (name[0] == '.'
++ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
++ goto out; /* success */
++
++ if (namelen <= AUFS_WH_PFX_LEN
++ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++ if (au_ftest_testempty(arg->flags, WHONLY)
++ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
++ arg->err = -ENOTEMPTY;
++ goto out;
++ }
++
++ name += AUFS_WH_PFX_LEN;
++ namelen -= AUFS_WH_PFX_LEN;
++ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
++ arg->err = au_nhash_append_wh
++ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
++ au_ftest_testempty(arg->flags, SHWH));
++
++ out:
++ /* smp_mb(); */
++ AuTraceErr(arg->err);
++ return arg->err;
++}
++
++static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++ int err;
++ struct file *h_file;
++
++ h_file = au_h_open(dentry, arg->bindex,
++ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
++ /*file*/NULL);
++ err = PTR_ERR(h_file);
++ if (IS_ERR(h_file))
++ goto out;
++
++ err = 0;
++ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++ && !h_file->f_dentry->d_inode->i_nlink)
++ goto out_put;
++
++ do {
++ arg->err = 0;
++ au_fclr_testempty(arg->flags, CALLED);
++ /* smp_mb(); */
++ err = vfsub_readdir(h_file, test_empty_cb, arg);
++ if (err >= 0)
++ err = arg->err;
++ } while (!err && au_ftest_testempty(arg->flags, CALLED));
++
++ out_put:
++ fput(h_file);
++ au_sbr_put(dentry->d_sb, arg->bindex);
++ out:
++ return err;
++}
++
++struct do_test_empty_args {
++ int *errp;
++ struct dentry *dentry;
++ struct test_empty_arg *arg;
++};
++
++static void call_do_test_empty(void *args)
++{
++ struct do_test_empty_args *a = args;
++ *a->errp = do_test_empty(a->dentry, a->arg);
++}
++
++static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++ int err, wkq_err;
++ struct dentry *h_dentry;
++ struct inode *h_inode;
++
++ h_dentry = au_h_dptr(dentry, arg->bindex);
++ h_inode = h_dentry->d_inode;
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
++ mutex_unlock(&h_inode->i_mutex);
++ if (!err)
++ err = do_test_empty(dentry, arg);
++ else {
++ struct do_test_empty_args args = {
++ .errp = &err,
++ .dentry = dentry,
++ .arg = arg
++ };
++ unsigned int flags = arg->flags;
++
++ wkq_err = au_wkq_wait(call_do_test_empty, &args);
++ if (unlikely(wkq_err))
++ err = wkq_err;
++ arg->flags = flags;
++ }
++
++ return err;
++}
++
++int au_test_empty_lower(struct dentry *dentry)
++{
++ int err;
++ unsigned int rdhash;
++ aufs_bindex_t bindex, bstart, btail;
++ struct au_nhash whlist;
++ struct test_empty_arg arg;
++
++ SiMustAnyLock(dentry->d_sb);
++
++ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++ if (!rdhash)
++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
++ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++ if (unlikely(err))
++ goto out;
++
++ arg.flags = 0;
++ arg.whlist = &whlist;
++ bstart = au_dbstart(dentry);
++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++ au_fset_testempty(arg.flags, SHWH);
++ arg.bindex = bstart;
++ err = do_test_empty(dentry, &arg);
++ if (unlikely(err))
++ goto out_whlist;
++
++ au_fset_testempty(arg.flags, WHONLY);
++ btail = au_dbtaildir(dentry);
++ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
++ struct dentry *h_dentry;
++
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry && h_dentry->d_inode) {
++ arg.bindex = bindex;
++ err = do_test_empty(dentry, &arg);
++ }
++ }
++
++ out_whlist:
++ au_nhash_wh_free(&whlist);
++ out:
++ return err;
++}
++
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
++{
++ int err;
++ struct test_empty_arg arg;
++ aufs_bindex_t bindex, btail;
++
++ err = 0;
++ arg.whlist = whlist;
++ arg.flags = AuTestEmpty_WHONLY;
++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++ au_fset_testempty(arg.flags, SHWH);
++ btail = au_dbtaildir(dentry);
++ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
++ struct dentry *h_dentry;
++
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry && h_dentry->d_inode) {
++ arg.bindex = bindex;
++ err = sio_test_empty(dentry, &arg);
++ }
++ }
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct file_operations aufs_dir_fop = {
++ .read = generic_read_dir,
++ .readdir = aufs_readdir,
++ .unlocked_ioctl = aufs_ioctl_dir,
++ .open = aufs_open_dir,
++ .release = aufs_release_dir,
++ .flush = aufs_flush,
++ .fsync = aufs_fsync_dir
++};
+diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h
+new file mode 100644
+index 0000000..dc40539
+--- /dev/null
++++ b/fs/aufs/dir.h
+@@ -0,0 +1,127 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * directory operations
++ */
++
++#ifndef __AUFS_DIR_H__
++#define __AUFS_DIR_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/aufs_type.h>
++
++/* ---------------------------------------------------------------------- */
++
++/* need to be faster and smaller */
++
++struct au_nhash {
++ unsigned int nh_num;
++ struct hlist_head *nh_head;
++};
++
++struct au_vdir_destr {
++ unsigned char len;
++ unsigned char name[0];
++} __packed;
++
++struct au_vdir_dehstr {
++ struct hlist_node hash;
++ struct au_vdir_destr *str;
++};
++
++struct au_vdir_de {
++ ino_t de_ino;
++ unsigned char de_type;
++ /* caution: packed */
++ struct au_vdir_destr de_str;
++} __packed;
++
++struct au_vdir_wh {
++ struct hlist_node wh_hash;
++#ifdef CONFIG_AUFS_SHWH
++ ino_t wh_ino;
++ aufs_bindex_t wh_bindex;
++ unsigned char wh_type;
++#else
++ aufs_bindex_t wh_bindex;
++#endif
++ /* caution: packed */
++ struct au_vdir_destr wh_str;
++} __packed;
++
++union au_vdir_deblk_p {
++ unsigned char *deblk;
++ struct au_vdir_de *de;
++};
++
++struct au_vdir {
++ unsigned char **vd_deblk;
++ unsigned long vd_nblk;
++ struct {
++ unsigned long ul;
++ union au_vdir_deblk_p p;
++ } vd_last;
++
++ unsigned long vd_version;
++ unsigned int vd_deblk_sz;
++ unsigned long vd_jiffy;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dir.c */
++extern struct file_operations aufs_dir_fop;
++void au_add_nlink(struct inode *dir, struct inode *h_dir);
++void au_sub_nlink(struct inode *dir, struct inode *h_dir);
++loff_t au_dir_size(struct file *file, struct dentry *dentry);
++int au_test_empty_lower(struct dentry *dentry);
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
++
++/* vdir.c */
++unsigned int au_rdhash_est(loff_t sz);
++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
++void au_nhash_wh_free(struct au_nhash *whlist);
++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
++ int limit);
++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
++ unsigned int d_type, aufs_bindex_t bindex,
++ unsigned char shwh);
++void au_vdir_free(struct au_vdir *vdir);
++int au_vdir_init(struct file *file);
++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
++
++/* ioctl.c */
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
++
++#ifdef CONFIG_AUFS_RDU
++/* rdu.c */
++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
++#else
++static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ return -EINVAL;
++}
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DIR_H__ */
+diff --git a/fs/aufs/export.c b/fs/aufs/export.c
+new file mode 100644
+index 0000000..3d13ff5
+--- /dev/null
++++ b/fs/aufs/export.c
+@@ -0,0 +1,745 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * export via nfs
++ */
++
++#include <linux/exportfs.h>
++#include <linux/file.h>
++#include <linux/mnt_namespace.h>
++#include <linux/namei.h>
++#include <linux/random.h>
++#include "aufs.h"
++
++union conv {
++#ifdef CONFIG_AUFS_INO_T_64
++ __u32 a[2];
++#else
++ __u32 a[1];
++#endif
++ ino_t ino;
++};
++
++static ino_t decode_ino(__u32 *a)
++{
++ union conv u;
++
++ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
++ u.a[0] = a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++ u.a[1] = a[1];
++#endif
++ return u.ino;
++}
++
++static void encode_ino(__u32 *a, ino_t ino)
++{
++ union conv u;
++
++ u.ino = ino;
++ a[0] = u.a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++ a[1] = u.a[1];
++#endif
++}
++
++/* NFS file handle */
++enum {
++ Fh_br_id,
++ Fh_sigen,
++#ifdef CONFIG_AUFS_INO_T_64
++ /* support 64bit inode number */
++ Fh_ino1,
++ Fh_ino2,
++ Fh_dir_ino1,
++ Fh_dir_ino2,
++#else
++ Fh_ino1,
++ Fh_dir_ino1,
++#endif
++ Fh_igen,
++ Fh_h_type,
++ Fh_tail,
++
++ Fh_ino = Fh_ino1,
++ Fh_dir_ino = Fh_dir_ino1
++};
++
++static int au_test_anon(struct dentry *dentry)
++{
++ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
++}
++
++/* ---------------------------------------------------------------------- */
++/* inode generation external table */
++
++int au_xigen_inc(struct inode *inode)
++{
++ int err;
++ loff_t pos;
++ ssize_t sz;
++ __u32 igen;
++ struct super_block *sb;
++ struct au_sbinfo *sbinfo;
++
++ err = 0;
++ sb = inode->i_sb;
++ sbinfo = au_sbi(sb);
++ /*
++ * temporary workaround for escaping from SiMustAnyLock() in
++ * au_mntflags(), since this function is called from au_iinfo_fin().
++ */
++ if (unlikely(!au_opt_test(sbinfo->si_mntflags, XINO)))
++ goto out;
++
++ pos = inode->i_ino;
++ pos *= sizeof(igen);
++ igen = inode->i_generation + 1;
++ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
++ sizeof(igen), &pos);
++ if (sz == sizeof(igen))
++ goto out; /* success */
++
++ err = sz;
++ if (unlikely(sz >= 0)) {
++ err = -EIO;
++ AuIOErr("xigen error (%zd)\n", sz);
++ }
++
++ out:
++ return err;
++}
++
++int au_xigen_new(struct inode *inode)
++{
++ int err;
++ loff_t pos;
++ ssize_t sz;
++ struct super_block *sb;
++ struct au_sbinfo *sbinfo;
++ struct file *file;
++
++ err = 0;
++ /* todo: dirty, at mount time */
++ if (inode->i_ino == AUFS_ROOT_INO)
++ goto out;
++ sb = inode->i_sb;
++ SiMustAnyLock(sb);
++ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++ goto out;
++
++ err = -EFBIG;
++ pos = inode->i_ino;
++ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
++ AuIOErr1("too large i%lld\n", pos);
++ goto out;
++ }
++ pos *= sizeof(inode->i_generation);
++
++ err = 0;
++ sbinfo = au_sbi(sb);
++ file = sbinfo->si_xigen;
++ BUG_ON(!file);
++
++ if (i_size_read(file->f_dentry->d_inode)
++ < pos + sizeof(inode->i_generation)) {
++ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
++ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
++ sizeof(inode->i_generation), &pos);
++ } else
++ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
++ sizeof(inode->i_generation), &pos);
++ if (sz == sizeof(inode->i_generation))
++ goto out; /* success */
++
++ err = sz;
++ if (unlikely(sz >= 0)) {
++ err = -EIO;
++ AuIOErr("xigen error (%zd)\n", sz);
++ }
++
++ out:
++ return err;
++}
++
++int au_xigen_set(struct super_block *sb, struct file *base)
++{
++ int err;
++ struct au_sbinfo *sbinfo;
++ struct file *file;
++
++ SiMustWriteLock(sb);
++
++ sbinfo = au_sbi(sb);
++ file = au_xino_create2(base, sbinfo->si_xigen);
++ err = PTR_ERR(file);
++ if (IS_ERR(file))
++ goto out;
++ err = 0;
++ if (sbinfo->si_xigen)
++ fput(sbinfo->si_xigen);
++ sbinfo->si_xigen = file;
++
++ out:
++ return err;
++}
++
++void au_xigen_clr(struct super_block *sb)
++{
++ struct au_sbinfo *sbinfo;
++
++ SiMustWriteLock(sb);
++
++ sbinfo = au_sbi(sb);
++ if (sbinfo->si_xigen) {
++ fput(sbinfo->si_xigen);
++ sbinfo->si_xigen = NULL;
++ }
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
++ ino_t dir_ino)
++{
++ struct dentry *dentry, *d;
++ struct inode *inode;
++ unsigned int sigen;
++
++ dentry = NULL;
++ inode = ilookup(sb, ino);
++ if (!inode)
++ goto out;
++
++ dentry = ERR_PTR(-ESTALE);
++ sigen = au_sigen(sb);
++ if (unlikely(is_bad_inode(inode)
++ || IS_DEADDIR(inode)
++ || sigen != au_iigen(inode)))
++ goto out_iput;
++
++ dentry = NULL;
++ if (!dir_ino || S_ISDIR(inode->i_mode))
++ dentry = d_find_alias(inode);
++ else {
++ spin_lock(&dcache_lock);
++ list_for_each_entry(d, &inode->i_dentry, d_alias)
++ if (!au_test_anon(d)
++ && d->d_parent->d_inode->i_ino == dir_ino) {
++ dentry = dget_locked(d);
++ break;
++ }
++ spin_unlock(&dcache_lock);
++ }
++ if (unlikely(dentry && sigen != au_digen(dentry))) {
++ dput(dentry);
++ dentry = ERR_PTR(-ESTALE);
++ }
++
++ out_iput:
++ iput(inode);
++ out:
++ return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: dirty? */
++/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
++static struct vfsmount *au_mnt_get(struct super_block *sb)
++{
++ struct mnt_namespace *ns;
++ struct vfsmount *pos, *mnt;
++
++ spin_lock(&vfsmount_lock);
++ /* no get/put ?? */
++ AuDebugOn(!current->nsproxy);
++ ns = current->nsproxy->mnt_ns;
++ AuDebugOn(!ns);
++ mnt = NULL;
++ /* the order (reverse) will not be a problem */
++ list_for_each_entry(pos, &ns->list, mnt_list)
++ if (pos->mnt_sb == sb) {
++ mnt = mntget(pos);
++ break;
++ }
++ spin_unlock(&vfsmount_lock);
++ AuDebugOn(!mnt);
++
++ return mnt;
++}
++
++struct au_nfsd_si_lock {
++ const unsigned int sigen;
++ const aufs_bindex_t br_id;
++ unsigned char force_lock;
++};
++
++static aufs_bindex_t si_nfsd_read_lock(struct super_block *sb,
++ struct au_nfsd_si_lock *nsi_lock)
++{
++ aufs_bindex_t bindex;
++
++ si_read_lock(sb, AuLock_FLUSH);
++
++ /* branch id may be wrapped around */
++ bindex = au_br_index(sb, nsi_lock->br_id);
++ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
++ goto out; /* success */
++
++ if (!nsi_lock->force_lock)
++ si_read_unlock(sb);
++ bindex = -1;
++
++ out:
++ return bindex;
++}
++
++struct find_name_by_ino {
++ int called, found;
++ ino_t ino;
++ char *name;
++ int namelen;
++};
++
++static int
++find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
++ u64 ino, unsigned int d_type)
++{
++ struct find_name_by_ino *a = arg;
++
++ a->called++;
++ if (a->ino != ino)
++ return 0;
++
++ memcpy(a->name, name, namelen);
++ a->namelen = namelen;
++ a->found = 1;
++ return 1;
++}
++
++static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
++ struct au_nfsd_si_lock *nsi_lock)
++{
++ struct dentry *dentry, *parent;
++ struct file *file;
++ struct inode *dir;
++ struct find_name_by_ino arg;
++ int err;
++
++ parent = path->dentry;
++ if (nsi_lock)
++ si_read_unlock(parent->d_sb);
++ path_get(path);
++ file = dentry_open(parent, path->mnt, au_dir_roflags);
++ dentry = (void *)file;
++ if (IS_ERR(file))
++ goto out;
++
++ dentry = ERR_PTR(-ENOMEM);
++ arg.name = __getname();
++ if (unlikely(!arg.name))
++ goto out_file;
++ arg.ino = ino;
++ arg.found = 0;
++ do {
++ arg.called = 0;
++ /* smp_mb(); */
++ err = vfsub_readdir(file, find_name_by_ino, &arg);
++ } while (!err && !arg.found && arg.called);
++ dentry = ERR_PTR(err);
++ if (unlikely(err))
++ goto out_name;
++ dentry = ERR_PTR(-ENOENT);
++ if (!arg.found)
++ goto out_name;
++
++ /* do not call au_lkup_one() */
++ dir = parent->d_inode;
++ mutex_lock(&dir->i_mutex);
++ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
++ mutex_unlock(&dir->i_mutex);
++ AuTraceErrPtr(dentry);
++ if (IS_ERR(dentry))
++ goto out_name;
++ AuDebugOn(au_test_anon(dentry));
++ if (unlikely(!dentry->d_inode)) {
++ dput(dentry);
++ dentry = ERR_PTR(-ENOENT);
++ }
++
++ out_name:
++ __putname(arg.name);
++ out_file:
++ fput(file);
++ out:
++ if (unlikely(nsi_lock
++ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
++ if (!IS_ERR(dentry)) {
++ dput(dentry);
++ dentry = ERR_PTR(-ESTALE);
++ }
++ AuTraceErrPtr(dentry);
++ return dentry;
++}
++
++static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
++ ino_t dir_ino,
++ struct au_nfsd_si_lock *nsi_lock)
++{
++ struct dentry *dentry;
++ struct path path;
++
++ if (dir_ino != AUFS_ROOT_INO) {
++ path.dentry = decode_by_ino(sb, dir_ino, 0);
++ dentry = path.dentry;
++ if (!path.dentry || IS_ERR(path.dentry))
++ goto out;
++ AuDebugOn(au_test_anon(path.dentry));
++ } else
++ path.dentry = dget(sb->s_root);
++
++ path.mnt = au_mnt_get(sb);
++ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
++ path_put(&path);
++
++ out:
++ AuTraceErrPtr(dentry);
++ return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int h_acceptable(void *expv, struct dentry *dentry)
++{
++ return 1;
++}
++
++static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
++ char *buf, int len, struct super_block *sb)
++{
++ char *p;
++ int n;
++ struct path path;
++
++ p = d_path(h_rootpath, buf, len);
++ if (IS_ERR(p))
++ goto out;
++ n = strlen(p);
++
++ path.mnt = h_rootpath->mnt;
++ path.dentry = h_parent;
++ p = d_path(&path, buf, len);
++ if (IS_ERR(p))
++ goto out;
++ if (n != 1)
++ p += n;
++
++ path.mnt = au_mnt_get(sb);
++ path.dentry = sb->s_root;
++ p = d_path(&path, buf, len - strlen(p));
++ mntput(path.mnt);
++ if (IS_ERR(p))
++ goto out;
++ if (n != 1)
++ p[strlen(p)] = '/';
++
++ out:
++ AuTraceErrPtr(p);
++ return p;
++}
++
++static noinline_for_stack
++struct dentry *decode_by_path(struct super_block *sb, aufs_bindex_t bindex,
++ ino_t ino, __u32 *fh, int fh_len,
++ struct au_nfsd_si_lock *nsi_lock)
++{
++ struct dentry *dentry, *h_parent, *root;
++ struct super_block *h_sb;
++ char *pathname, *p;
++ struct vfsmount *h_mnt;
++ struct au_branch *br;
++ int err;
++ struct nameidata nd;
++
++ br = au_sbr(sb, bindex);
++ /* au_br_get(br); */
++ h_mnt = br->br_mnt;
++ h_sb = h_mnt->mnt_sb;
++ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
++ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
++ fh_len - Fh_tail, fh[Fh_h_type],
++ h_acceptable, /*context*/NULL);
++ dentry = h_parent;
++ if (unlikely(!h_parent || IS_ERR(h_parent))) {
++ AuWarn1("%s decode_fh failed, %ld\n",
++ au_sbtype(h_sb), PTR_ERR(h_parent));
++ goto out;
++ }
++ dentry = NULL;
++ if (unlikely(au_test_anon(h_parent))) {
++ AuWarn1("%s decode_fh returned a disconnected dentry\n",
++ au_sbtype(h_sb));
++ goto out_h_parent;
++ }
++
++ dentry = ERR_PTR(-ENOMEM);
++ pathname = (void *)__get_free_page(GFP_NOFS);
++ if (unlikely(!pathname))
++ goto out_h_parent;
++
++ root = sb->s_root;
++ nd.path.mnt = h_mnt;
++ di_read_lock_parent(root, !AuLock_IR);
++ nd.path.dentry = au_h_dptr(root, bindex);
++ di_read_unlock(root, !AuLock_IR);
++ p = au_build_path(h_parent, &nd.path, pathname, PAGE_SIZE, sb);
++ dentry = (void *)p;
++ if (IS_ERR(p))
++ goto out_pathname;
++
++ si_read_unlock(sb);
++ err = vfsub_path_lookup(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
++ dentry = ERR_PTR(err);
++ if (unlikely(err))
++ goto out_relock;
++
++ dentry = ERR_PTR(-ENOENT);
++ AuDebugOn(au_test_anon(nd.path.dentry));
++ if (unlikely(!nd.path.dentry->d_inode))
++ goto out_nd;
++
++ if (ino != nd.path.dentry->d_inode->i_ino)
++ dentry = au_lkup_by_ino(&nd.path, ino, /*nsi_lock*/NULL);
++ else
++ dentry = dget(nd.path.dentry);
++
++ out_nd:
++ path_put(&nd.path);
++ out_relock:
++ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
++ if (!IS_ERR(dentry)) {
++ dput(dentry);
++ dentry = ERR_PTR(-ESTALE);
++ }
++ out_pathname:
++ free_page((unsigned long)pathname);
++ out_h_parent:
++ dput(h_parent);
++ out:
++ /* au_br_put(br); */
++ AuTraceErrPtr(dentry);
++ return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *
++aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
++ int fh_type)
++{
++ struct dentry *dentry;
++ __u32 *fh = fid->raw;
++ ino_t ino, dir_ino;
++ aufs_bindex_t bindex;
++ struct au_nfsd_si_lock nsi_lock = {
++ .sigen = fh[Fh_sigen],
++ .br_id = fh[Fh_br_id],
++ .force_lock = 0
++ };
++
++ AuDebugOn(fh_len < Fh_tail);
++
++ dentry = ERR_PTR(-ESTALE);
++ /* branch id may be wrapped around */
++ bindex = si_nfsd_read_lock(sb, &nsi_lock);
++ if (unlikely(bindex < 0))
++ goto out;
++ nsi_lock.force_lock = 1;
++
++ /* is this inode still cached? */
++ ino = decode_ino(fh + Fh_ino);
++ AuDebugOn(ino == AUFS_ROOT_INO);
++ dir_ino = decode_ino(fh + Fh_dir_ino);
++ dentry = decode_by_ino(sb, ino, dir_ino);
++ if (IS_ERR(dentry))
++ goto out_unlock;
++ if (dentry)
++ goto accept;
++
++ /* is the parent dir cached? */
++ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
++ if (IS_ERR(dentry))
++ goto out_unlock;
++ if (dentry)
++ goto accept;
++
++ /* lookup path */
++ dentry = decode_by_path(sb, bindex, ino, fh, fh_len, &nsi_lock);
++ if (IS_ERR(dentry))
++ goto out_unlock;
++ if (unlikely(!dentry))
++ /* todo?: make it ESTALE */
++ goto out_unlock;
++
++ accept:
++ if (dentry->d_inode->i_generation == fh[Fh_igen])
++ goto out_unlock; /* success */
++
++ dput(dentry);
++ dentry = ERR_PTR(-ESTALE);
++ out_unlock:
++ si_read_unlock(sb);
++ out:
++ AuTraceErrPtr(dentry);
++ return dentry;
++}
++
++#if 0 /* reserved for future use */
++/* support subtreecheck option */
++static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
++ int fh_len, int fh_type)
++{
++ struct dentry *parent;
++ __u32 *fh = fid->raw;
++ ino_t dir_ino;
++
++ dir_ino = decode_ino(fh + Fh_dir_ino);
++ parent = decode_by_ino(sb, dir_ino, 0);
++ if (IS_ERR(parent))
++ goto out;
++ if (!parent)
++ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
++ dir_ino, fh, fh_len);
++
++ out:
++ AuTraceErrPtr(parent);
++ return parent;
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
++ int connectable)
++{
++ int err;
++ aufs_bindex_t bindex, bend;
++ struct super_block *sb, *h_sb;
++ struct inode *inode;
++ struct dentry *parent, *h_parent;
++ struct au_branch *br;
++
++ AuDebugOn(au_test_anon(dentry));
++
++ parent = NULL;
++ err = -ENOSPC;
++ if (unlikely(*max_len <= Fh_tail)) {
++ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
++ goto out;
++ }
++
++ err = FILEID_ROOT;
++ if (IS_ROOT(dentry)) {
++ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
++ goto out;
++ }
++
++ err = -EIO;
++ h_parent = NULL;
++ sb = dentry->d_sb;
++ aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR);
++ parent = dget_parent(dentry);
++ di_read_lock_parent(parent, !AuLock_IR);
++ inode = dentry->d_inode;
++ AuDebugOn(!inode);
++#ifdef CONFIG_AUFS_DEBUG
++ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++ AuWarn1("NFS-exporting requires xino\n");
++#endif
++
++ bend = au_dbtaildir(parent);
++ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
++ h_parent = au_h_dptr(parent, bindex);
++ if (h_parent) {
++ dget(h_parent);
++ break;
++ }
++ }
++ if (unlikely(!h_parent))
++ goto out_unlock;
++
++ err = -EPERM;
++ br = au_sbr(sb, bindex);
++ h_sb = br->br_mnt->mnt_sb;
++ if (unlikely(!h_sb->s_export_op)) {
++ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
++ goto out_dput;
++ }
++
++ fh[Fh_br_id] = br->br_id;
++ fh[Fh_sigen] = au_sigen(sb);
++ encode_ino(fh + Fh_ino, inode->i_ino);
++ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
++ fh[Fh_igen] = inode->i_generation;
++
++ *max_len -= Fh_tail;
++ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
++ max_len,
++ /*connectable or subtreecheck*/0);
++ err = fh[Fh_h_type];
++ *max_len += Fh_tail;
++ /* todo: macros? */
++ if (err != 255)
++ err = 99;
++ else
++ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
++
++ out_dput:
++ dput(h_parent);
++ out_unlock:
++ di_read_unlock(parent, !AuLock_IR);
++ dput(parent);
++ aufs_read_unlock(dentry, AuLock_IR);
++ out:
++ if (unlikely(err < 0))
++ err = 255;
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct export_operations aufs_export_op = {
++ .fh_to_dentry = aufs_fh_to_dentry,
++ /* .fh_to_parent = aufs_fh_to_parent, */
++ .encode_fh = aufs_encode_fh
++};
++
++void au_export_init(struct super_block *sb)
++{
++ struct au_sbinfo *sbinfo;
++ __u32 u;
++
++ sb->s_export_op = &aufs_export_op;
++ sbinfo = au_sbi(sb);
++ sbinfo->si_xigen = NULL;
++ get_random_bytes(&u, sizeof(u));
++ BUILD_BUG_ON(sizeof(u) != sizeof(int));
++ atomic_set(&sbinfo->si_xigen_next, u);
++}
+diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
+new file mode 100644
+index 0000000..867d1e1
+--- /dev/null
++++ b/fs/aufs/f_op.c
+@@ -0,0 +1,810 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * file and vm operations
++ */
++
++#include <linux/file.h>
++#include <linux/fs_stack.h>
++#include <linux/mm.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++/* common function to regular file and dir */
++int aufs_flush(struct file *file, fl_owner_t id)
++{
++ int err;
++ aufs_bindex_t bindex, bend;
++ struct dentry *dentry;
++ struct file *h_file;
++
++ dentry = file->f_dentry;
++ si_noflush_read_lock(dentry->d_sb);
++ fi_read_lock(file);
++ di_read_lock_child(dentry, AuLock_IW);
++
++ err = 0;
++ bend = au_fbend(file);
++ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++ h_file = au_h_fptr(file, bindex);
++ if (!h_file || !h_file->f_op || !h_file->f_op->flush)
++ continue;
++
++ err = h_file->f_op->flush(h_file, id);
++ if (!err)
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ }
++ au_cpup_attr_timesizes(dentry->d_inode);
++
++ di_read_unlock(dentry, AuLock_IW);
++ fi_read_unlock(file);
++ si_read_unlock(dentry->d_sb);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_do_open_nondir(struct file *file, int flags)
++{
++ int err;
++ aufs_bindex_t bindex;
++ struct file *h_file;
++ struct dentry *dentry;
++ struct au_finfo *finfo;
++
++ FiMustWriteLock(file);
++
++ err = 0;
++ dentry = file->f_dentry;
++ finfo = au_fi(file);
++ finfo->fi_h_vm_ops = NULL;
++ finfo->fi_vm_ops = NULL;
++ bindex = au_dbstart(dentry);
++ /* O_TRUNC is processed already */
++ BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
++ && (flags & O_TRUNC));
++
++ h_file = au_h_open(dentry, bindex, flags, file);
++ if (IS_ERR(h_file))
++ err = PTR_ERR(h_file);
++ else {
++ au_set_fbstart(file, bindex);
++ au_set_fbend(file, bindex);
++ au_set_h_fptr(file, bindex, h_file);
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ }
++ return err;
++}
++
++static int aufs_open_nondir(struct inode *inode __maybe_unused,
++ struct file *file)
++{
++ return au_do_open(file, au_do_open_nondir);
++}
++
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
++{
++ kfree(au_fi(file)->fi_vm_ops);
++ au_finfo_fin(file);
++ return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
++ loff_t *ppos)
++{
++ ssize_t err;
++ struct dentry *dentry;
++ struct file *h_file;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
++
++ h_file = au_h_fptr(file, au_fbstart(file));
++ err = vfsub_read_u(h_file, buf, count, ppos);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++static ssize_t aufs_write(struct file *file, const char __user *ubuf,
++ size_t count, loff_t *ppos)
++{
++ ssize_t err;
++ aufs_bindex_t bstart;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct super_block *sb;
++ struct file *h_file;
++ char __user *buf = (char __user *)ubuf;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ mutex_lock(&inode->i_mutex);
++ si_read_lock(sb, AuLock_FLUSH);
++
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++
++ bstart = au_fbstart(file);
++ h_file = au_h_fptr(file, bstart);
++ au_unpin(&pin);
++ err = vfsub_write_u(h_file, buf, count, ppos);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ mutex_unlock(&inode->i_mutex);
++ return err;
++}
++
++static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
++ const struct iovec *iov, unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ struct file *file;
++
++ err = security_file_permission(h_file, rw);
++ if (unlikely(err))
++ goto out;
++
++ file = kio->ki_filp;
++ if (!is_sync_kiocb(kio)) {
++ get_file(h_file);
++ fput(file);
++ }
++ kio->ki_filp = h_file;
++ if (rw == MAY_READ)
++ err = h_file->f_op->aio_read(kio, iov, nv, pos);
++ else if (rw == MAY_WRITE)
++ err = h_file->f_op->aio_write(kio, iov, nv, pos);
++ else
++ BUG();
++ /* do not restore kio->ki_filp */
++
++ out:
++ return err;
++}
++
++static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ struct file *file, *h_file;
++ struct dentry *dentry;
++ struct super_block *sb;
++
++ file = kio->ki_filp;
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
++
++ err = -ENOSYS;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (h_file->f_op && h_file->f_op->aio_read) {
++ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode,
++ h_file->f_dentry->d_inode);
++ } else
++ /* currently there is no such fs */
++ WARN_ON_ONCE(h_file->f_op && h_file->f_op->read);
++
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
++
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct super_block *sb;
++ struct file *file, *h_file;
++
++ file = kio->ki_filp;
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ mutex_lock(&inode->i_mutex);
++ si_read_lock(sb, AuLock_FLUSH);
++
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++
++ err = -ENOSYS;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ au_unpin(&pin);
++ if (h_file->f_op && h_file->f_op->aio_write) {
++ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++ } else
++ /* currently there is no such fs */
++ WARN_ON_ONCE(h_file->f_op && h_file->f_op->write);
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ mutex_unlock(&inode->i_mutex);
++ return err;
++}
++
++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
++ struct pipe_inode_info *pipe, size_t len,
++ unsigned int flags)
++{
++ ssize_t err;
++ struct file *h_file;
++ struct dentry *dentry;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
++
++ err = -EINVAL;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (au_test_loopback_kthread()) {
++ file->f_mapping = h_file->f_mapping;
++ smp_mb(); /* unnecessary? */
++ }
++ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
++ /* todo: necessasry? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
++
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++static ssize_t
++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
++ size_t len, unsigned int flags)
++{
++ ssize_t err;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct super_block *sb;
++ struct file *h_file;
++
++ dentry = file->f_dentry;
++ inode = dentry->d_inode;
++ mutex_lock(&inode->i_mutex);
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++
++ h_file = au_h_fptr(file, au_fbstart(file));
++ au_unpin(&pin);
++ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ mutex_unlock(&inode->i_mutex);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct file *au_safe_file(struct vm_area_struct *vma)
++{
++ struct file *file;
++
++ file = vma->vm_file;
++ if (file->private_data && au_test_aufs(file->f_dentry->d_sb))
++ return file;
++ return NULL;
++}
++
++static void au_reset_file(struct vm_area_struct *vma, struct file *file)
++{
++ vma->vm_file = file;
++ /* smp_mb(); */ /* flush vm_file */
++}
++
++static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++ int err;
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
++
++ /* todo: non-robr mode, user vm_file as it is? */
++ wait_event(wq, (file = au_safe_file(vma)));
++
++ /* do not revalidate, no si lock */
++ finfo = au_fi(file);
++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
++ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
++
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ err = finfo->fi_h_vm_ops->fault(vma, vmf);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++#if 0 /* def CONFIG_SMP */
++ /* wake_up_nr(&wq, online_cpu - 1); */
++ wake_up_all(&wq);
++#else
++ wake_up(&wq);
++#endif
++
++ return err;
++}
++
++static int aufs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
++{
++ int err;
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
++
++ wait_event(wq, (file = au_safe_file(vma)));
++
++ finfo = au_fi(file);
++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
++ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
++
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ err = finfo->fi_h_vm_ops->page_mkwrite(vma, page);
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++ wake_up(&wq);
++
++ return err;
++}
++
++static void aufs_vm_close(struct vm_area_struct *vma)
++{
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
++
++ wait_event(wq, (file = au_safe_file(vma)));
++
++ finfo = au_fi(file);
++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
++ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
++
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ finfo->fi_h_vm_ops->close(vma);
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++ wake_up(&wq);
++}
++
++static struct vm_operations_struct aufs_vm_ops = {
++ /* .close and .page_mkwrite are not set by default */
++ .fault = aufs_fault,
++};
++
++/* ---------------------------------------------------------------------- */
++
++static struct vm_operations_struct *au_vm_ops(struct file *h_file,
++ struct vm_area_struct *vma)
++{
++ struct vm_operations_struct *vm_ops;
++ int err;
++
++ /* todo: call security_file_mmap() here */
++
++ vm_ops = ERR_PTR(-ENODEV);
++ if (!h_file->f_op || !h_file->f_op->mmap)
++ goto out;
++
++ err = h_file->f_op->mmap(h_file, vma);
++ vm_ops = ERR_PTR(err);
++ if (unlikely(err))
++ goto out;
++
++ vm_ops = vma->vm_ops;
++ err = do_munmap(current->mm, vma->vm_start,
++ vma->vm_end - vma->vm_start);
++ if (unlikely(err)) {
++ AuIOErr("failed internal unmapping %.*s, %d\n",
++ AuDLNPair(h_file->f_dentry), err);
++ vm_ops = ERR_PTR(-EIO);
++ }
++
++ out:
++ return vm_ops;
++}
++
++static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
++{
++ int err;
++ struct vm_operations_struct *h_ops;
++
++ AuRwMustAnyLock(&finfo->fi_rwsem);
++
++ err = 0;
++ h_ops = finfo->fi_h_vm_ops;
++ AuDebugOn(!h_ops);
++ if ((!h_ops->page_mkwrite && !h_ops->close)
++ || finfo->fi_vm_ops)
++ goto out;
++
++ err = -ENOMEM;
++ finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS);
++ if (unlikely(!finfo->fi_vm_ops))
++ goto out;
++
++ err = 0;
++ if (h_ops->page_mkwrite)
++ finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite;
++ if (h_ops->close)
++ finfo->fi_vm_ops->close = aufs_vm_close;
++
++ vma->vm_ops = finfo->fi_vm_ops;
++
++ out:
++ return err;
++}
++
++static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ int err;
++ unsigned char wlock, mmapped;
++ struct dentry *dentry;
++ struct super_block *sb;
++ struct file *h_file;
++ struct vm_operations_struct *vm_ops;
++
++ dentry = file->f_dentry;
++ wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ mmapped = !!au_test_mmapped(file);
++ if (wlock) {
++ struct au_pin pin;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
++ } else
++ di_downgrade_lock(dentry, AuLock_IR);
++
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
++ /*
++ * by this assignment, f_mapping will differs from aufs inode
++ * i_mapping.
++ * if someone else mixes the use of f_dentry->d_inode and
++ * f_mapping->host, then a problem may arise.
++ */
++ file->f_mapping = h_file->f_mapping;
++ }
++
++ vm_ops = NULL;
++ if (!mmapped) {
++ vm_ops = au_vm_ops(h_file, vma);
++ err = PTR_ERR(vm_ops);
++ if (IS_ERR(vm_ops))
++ goto out_unlock;
++ }
++
++ /*
++ * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
++ * currently MAP_DENYWRITE from userspace is ignored, but elf loader
++ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
++ * both of the aufs file and the lower file is deny_write_access()-ed.
++ * finally I hope we can skip handlling MAP_DENYWRITE here.
++ */
++ err = generic_file_mmap(file, vma);
++ if (unlikely(err))
++ goto out_unlock;
++
++ vma->vm_ops = &aufs_vm_ops;
++ if (!mmapped) {
++ struct au_finfo *finfo = au_fi(file);
++
++ finfo->fi_h_vm_ops = vm_ops;
++ mutex_init(&finfo->fi_vm_mtx);
++ }
++
++ err = au_custom_vm_ops(au_fi(file), vma);
++ if (unlikely(err))
++ goto out_unlock;
++
++ vfsub_file_accessed(h_file);
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_fsync_nondir(struct file *file, struct dentry *dentry,
++ int datasync)
++{
++ int err;
++ struct au_pin pin;
++ struct inode *inode;
++ struct file *h_file;
++ struct super_block *sb;
++
++ inode = dentry->d_inode;
++ IMustLock(file->f_mapping->host);
++ if (inode != file->f_mapping->host) {
++ mutex_unlock(&file->f_mapping->host->i_mutex);
++ mutex_lock(&inode->i_mutex);
++ }
++ IMustLock(inode);
++
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++
++ err = 0; /* -EBADF; */ /* posix? */
++ if (unlikely(!(file->f_mode & FMODE_WRITE)))
++ goto out;
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
++
++ err = -EINVAL;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (h_file->f_op && h_file->f_op->fsync) {
++ struct dentry *h_d;
++ struct mutex *h_mtx;
++
++ /*
++ * no filemap_fdatawrite() since aufs file has no its own
++ * mapping, but dir.
++ */
++ h_d = h_file->f_dentry;
++ h_mtx = &h_d->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ err = h_file->f_op->fsync(h_file, h_d, datasync);
++ if (!err)
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ au_cpup_attr_timesizes(inode);
++ mutex_unlock(h_mtx);
++ }
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ if (inode != file->f_mapping->host) {
++ mutex_unlock(&inode->i_mutex);
++ mutex_lock(&file->f_mapping->host->i_mutex);
++ }
++ return err;
++}
++
++/* no one supports this operation, currently */
++#if 0
++static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
++{
++ int err;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct file *file, *h_file;
++ struct super_block *sb;
++
++ file = kio->ki_filp;
++ dentry = file->f_dentry;
++ inode = dentry->d_inode;
++ mutex_lock(&inode->i_mutex);
++
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++
++ err = 0; /* -EBADF; */ /* posix? */
++ if (unlikely(!(file->f_mode & FMODE_WRITE)))
++ goto out;
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
++
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
++
++ err = -ENOSYS;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (h_file->f_op && h_file->f_op->aio_fsync) {
++ struct dentry *h_d;
++ struct mutex *h_mtx;
++
++ h_d = h_file->f_dentry;
++ h_mtx = &h_d->d_inode->i_mutex;
++ if (!is_sync_kiocb(kio)) {
++ get_file(h_file);
++ fput(file);
++ }
++ kio->ki_filp = h_file;
++ err = h_file->f_op->aio_fsync(kio, datasync);
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ if (!err)
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ au_cpup_attr_timesizes(inode);
++ mutex_unlock(h_mtx);
++ }
++
++ out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ out:
++ si_read_unlock(sb);
++ mutex_unlock(&inode->i_mutex);
++ return err;
++}
++#endif
++
++static int aufs_fasync(int fd, struct file *file, int flag)
++{
++ int err;
++ struct file *h_file;
++ struct dentry *dentry;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
++
++ h_file = au_h_fptr(file, au_fbstart(file));
++ if (h_file->f_op && h_file->f_op->fasync)
++ err = h_file->f_op->fasync(fd, h_file, flag);
++
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
++
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* no one supports this operation, currently */
++#if 0
++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
++ size_t len, loff_t *pos , int more)
++{
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++struct file_operations aufs_file_fop = {
++ /*
++ * while generic_file_llseek/_unlocked() don't use BKL,
++ * don't use it since it operates file->f_mapping->host.
++ * in aufs, it may be a real file and may confuse users by UDBA.
++ */
++ /* .llseek = generic_file_llseek, */
++
++ .read = aufs_read,
++ .write = aufs_write,
++ .aio_read = aufs_aio_read,
++ .aio_write = aufs_aio_write,
++ .unlocked_ioctl = aufs_ioctl_nondir,
++ .mmap = aufs_mmap,
++ .open = aufs_open_nondir,
++ .flush = aufs_flush,
++ .release = aufs_release_nondir,
++ .fsync = aufs_fsync_nondir,
++ /* .aio_fsync = aufs_aio_fsync_nondir, */
++ .fasync = aufs_fasync,
++ /* .sendpage = aufs_sendpage, */
++ .splice_write = aufs_splice_write,
++ .splice_read = aufs_splice_read,
++#if 0
++ .aio_splice_write = aufs_aio_splice_write,
++ .aio_splice_read = aufs_aio_splice_read
++#endif
++};
+diff --git a/fs/aufs/f_op_sp.c b/fs/aufs/f_op_sp.c
+new file mode 100644
+index 0000000..03fddc0
+--- /dev/null
++++ b/fs/aufs/f_op_sp.c
+@@ -0,0 +1,290 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * file operations for special files.
++ * while they exist in aufs virtually,
++ * their file I/O is handled out of aufs.
++ */
++
++#include <linux/fs_stack.h>
++#include "aufs.h"
++
++static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ aufs_bindex_t bstart;
++ unsigned char wbr;
++ struct file *file, *h_file;
++ struct super_block *sb;
++
++ file = kio->ki_filp;
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ fi_read_lock(file);
++ bstart = au_fbstart(file);
++ h_file = au_h_fptr(file, bstart);
++ fi_read_unlock(file);
++ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++ si_read_unlock(sb);
++
++ /* do not change the file in kio */
++ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
++ err = h_file->f_op->aio_read(kio, iov, nv, pos);
++ if (err > 0 && wbr)
++ file_accessed(h_file);
++
++ return err;
++}
++
++static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ aufs_bindex_t bstart;
++ unsigned char wbr;
++ struct super_block *sb;
++ struct file *file, *h_file;
++
++ file = kio->ki_filp;
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ fi_read_lock(file);
++ bstart = au_fbstart(file);
++ h_file = au_h_fptr(file, bstart);
++ fi_read_unlock(file);
++ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++ si_read_unlock(sb);
++
++ /* do not change the file in kio */
++ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
++ err = h_file->f_op->aio_write(kio, iov, nv, pos);
++ if (err > 0 && wbr)
++ file_update_time(h_file);
++
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_release_sp(struct inode *inode, struct file *file)
++{
++ int err;
++ struct file *h_file;
++
++ fi_read_lock(file);
++ h_file = au_h_fptr(file, au_fbstart(file));
++ fi_read_unlock(file);
++ /* close this fifo in aufs */
++ err = h_file->f_op->release(inode, file); /* ignore */
++ aufs_release_nondir(inode, file); /* ignore */
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* currently, support only FIFO */
++enum {AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
++ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
++ AuSp_Last};
++static int aufs_open_sp(struct inode *inode, struct file *file);
++static struct au_sp_fop {
++ int done;
++ struct file_operations fop; /* not 'const' */
++ spinlock_t spin;
++} au_sp_fop[AuSp_Last] = {
++ [AuSp_FIFO] = {
++ .fop = {
++ .open = aufs_open_sp
++ }
++ }
++};
++
++static void au_init_fop_sp(struct file *file)
++{
++ struct au_sp_fop *p;
++ int i;
++ struct file *h_file;
++
++ p = au_sp_fop;
++ if (unlikely(!p->done)) {
++ /* initialize first time only */
++ static DEFINE_SPINLOCK(spin);
++
++ spin_lock(&spin);
++ if (!p->done) {
++ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
++ != AuSp_Last);
++ for (i = 0; i < AuSp_Last; i++)
++ spin_lock_init(&p[i].spin);
++ p->done = 1;
++ }
++ spin_unlock(&spin);
++ }
++
++ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
++ case FMODE_READ:
++ i = AuSp_FIFO_R;
++ break;
++ case FMODE_WRITE:
++ i = AuSp_FIFO_W;
++ break;
++ case FMODE_READ | FMODE_WRITE:
++ i = AuSp_FIFO_RW;
++ break;
++ default:
++ BUG();
++ }
++
++ p += i;
++ if (unlikely(!p->done)) {
++ /* initialize first time only */
++ h_file = au_h_fptr(file, au_fbstart(file));
++ spin_lock(&p->spin);
++ if (!p->done) {
++ p->fop = *h_file->f_op;
++ if (p->fop.aio_read)
++ p->fop.aio_read = aufs_aio_read_sp;
++ if (p->fop.aio_write)
++ p->fop.aio_write = aufs_aio_write_sp;
++ p->fop.release = aufs_release_sp;
++ p->done = 1;
++ }
++ spin_unlock(&p->spin);
++ }
++ file->f_op = &p->fop;
++}
++
++static int au_cpup_sp(struct dentry *dentry)
++{
++ int err;
++ aufs_bindex_t bcpup;
++ struct au_pin pin;
++ struct au_wr_dir_args wr_dir_args = {
++ .force_btgt = -1,
++ .flags = 0
++ };
++
++ AuDbg("%.*s\n", AuDLNPair(dentry));
++
++ di_read_unlock(dentry, AuLock_IR);
++ di_write_lock_child(dentry);
++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++ if (unlikely(err < 0))
++ goto out;
++ bcpup = err;
++ err = 0;
++ if (bcpup == au_dbstart(dentry))
++ goto out; /* success */
++
++ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
++ AuPin_MNT_WRITE);
++ if (!err) {
++ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
++ au_unpin(&pin);
++ }
++
++ out:
++ di_downgrade_lock(dentry, AuLock_IR);
++ return err;
++}
++
++static int au_do_open_sp(struct file *file, int flags)
++{
++ int err;
++ struct dentry *dentry;
++ struct super_block *sb;
++ struct file *h_file;
++ struct inode *h_inode;
++
++ dentry = file->f_dentry;
++ AuDbg("%.*s\n", AuDLNPair(dentry));
++
++ /*
++ * try copying-up.
++ * operate on the ro branch is not an error.
++ */
++ au_cpup_sp(dentry); /* ignore */
++
++ /* prepare h_file */
++ err = au_do_open_nondir(file, file->f_flags);
++ if (unlikely(err))
++ goto out;
++
++ sb = dentry->d_sb;
++ h_file = au_h_fptr(file, au_fbstart(file));
++ h_inode = h_file->f_dentry->d_inode;
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ si_read_unlock(sb);
++ /* open this fifo in aufs */
++ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
++ si_noflush_read_lock(sb);
++ fi_write_lock(file);
++ di_read_lock_child(dentry, AuLock_IR);
++ if (!err)
++ au_init_fop_sp(file);
++ else
++ au_finfo_fin(file);
++
++ out:
++ return err;
++}
++
++static int aufs_open_sp(struct inode *inode, struct file *file)
++{
++ return au_do_open(file, au_do_open_sp);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
++{
++ init_special_inode(inode, mode, rdev);
++
++ switch (mode & S_IFMT) {
++ case S_IFIFO:
++ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
++ /*FALLTHROUGH*/
++ case S_IFCHR:
++ case S_IFBLK:
++ case S_IFSOCK:
++ break;
++ default:
++ AuDebugOn(1);
++ }
++}
++
++int au_special_file(umode_t mode)
++{
++ int ret;
++
++ ret = 0;
++ switch (mode & S_IFMT) {
++ case S_IFIFO:
++#if 0
++ case S_IFCHR:
++ case S_IFBLK:
++ case S_IFSOCK:
++#endif
++ ret = 1;
++ }
++
++ return ret;
++}
+diff --git a/fs/aufs/file.c b/fs/aufs/file.c
+new file mode 100644
+index 0000000..cc24802
+--- /dev/null
++++ b/fs/aufs/file.c
+@@ -0,0 +1,620 @@
++/*
++ * Copyright (C) 2005-2009 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * handling file/dir, and address_space operation
++ */
++
++#include <linux/file.h>
++#include <linux/fsnotify.h>
++#include <linux/namei.h>
++#include <linux/pagemap.h>
++#include "aufs.h"
++
++/*
++ * a dirty trick for handling deny_write_access().
++ * because FMODE_EXEC flag is not passed to f_op->open(),
++ * set it to file->private_data temporary.
++ */
++void au_store_oflag(struct nameidata *nd, struct inode *inode)
++{
++ if (nd
++ /* && !(nd->flags & LOOKUP_CONTINUE) */
++ && (nd->flags & LOOKUP_OPEN)
++ && (nd->intent.open.flags & FMODE_EXEC)
++ && inode
++ && S_ISREG(inode->i_mode)) {
++ /* suppress a warning in lp64 */
++ unsigned long flags = nd->intent.open.flags;
++ nd->intent.open.file->private_data = (void *)flags;
++ /* smp_mb(); */
++ }
++}
++
++/* drop flags for writing */
++unsigned int au_file_roflags(unsigned int flags)
++{
++ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
++ flags |= O_RDONLY | O_NOATIME;
++ return flags;
++}
++
++/* common functions to regular file and dir */
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++ struct file *file)
++{
++ struct file *h_file;
++ struct dentry *h_dentry;
++ struct inode *h_inode;
++ struct super_block *sb;
++ struct au_branch *br;
++ int err;
++
++ /* a race condition can happen between open and unlink/rmdir */
++ h_file = ERR_PTR(-ENOENT);
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (au_test_nfsd(current) && !h_dentry)
++ goto out;
++ h_inode = h_dentry->d_inode;
++ if (au_test_nfsd(current) && !h_inode)
++ goto out;
++ if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry))
++ || !h_inode))
++ goto out;
++
++ sb = dentry->d_sb;
++ br = au_sbr(sb, bindex);
++ h_file = ERR_PTR(-EACCES);
++ if (file && (file->f_mode & FMODE_EXEC)
++ && (br->br_mnt->mnt_flags & MNT_NOEXEC))
++ goto out;
++
++ /* drop flags for writing */
++ if (au_test_ro(sb, bindex, dentry->d_inode))
++ flags = au_file_roflags(flags);
++ flags &= ~O_CREAT;
++ atomic_inc(&br->br_count);
++ if (!au_special_file(h_inode->i_mode))
++ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags);
++ else {
++ /* this block depends upon the configuration */
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ si_read_unlock(sb);
++ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags);
++ si_noflush_read_lock(sb);
++ fi_write_lock(file);
++ di_read_lock_child(dentry, AuLock_IR);
++ }
++ if (IS_ERR(h_file))
++ goto out_br;
++
++ if (file && (file->f_mode & FMODE_EXEC)) {
++ h_file->f_mode |= FMODE_EXEC;
++ err = deny_write_access(h_file);
++ if (unlikely(err)) {
++ fput(h_file);
++ h_file = ERR_PTR(err);
++ goto out_br;
++ }
++ }
++ fsnotify_open(h_dentry);
++ goto out; /* success */
++
++ out_br:
++ atomic_dec(&br->br_count);
++ out:
++ return h_file;
++}
++
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags))
++{
++ int err;
++ struct dentry *dentry;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_finfo_init(file);
++ if (unlikely(err))
++ goto out;
++
++ di_read_lock_child(dentry, AuLock_IR);
++ err = open(file, file->f_flags);
++ di_read_unlock(dentry, AuLock_IR);
++
++ fi_write_unlock(file);
++ if (unlikely(err))
++ au_finfo_fin(file);
++ out:
++ si_read_unlock(sb);
++ return err;
++}
++
++int au_reopen_nondir(struct file *file)
++{
++ int err;
++ aufs_bindex_t bstart, bindex, bend;
++ struct dentry *dentry;
++ struct file *h_file, *h_file_tmp;
++
++ dentry = file->f_dentry;
++ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
++ bstart = au_dbstart(dentry);
++ h_file_tmp = NULL;
++ if (au_fbstart(file) == bstart) {
++ h_file = au_h_fptr(file, bstart);
++ if (file->f_mode == h_file->f_mode)
++ return 0; /* success */
++ h_file_tmp = h_file;
++ get_file(h_file_tmp);
++ au_set_h_fptr(file, bstart, NULL);
++ }
++ AuDebugOn(au_fbstart(file) < bstart
++ || au_fi(file)->fi_hfile[0 + bstart].hf_file);
++
++ h_file = au_h_open(dentry, bstart, file->f_flags & ~O_TRUNC, file);
++ err = PTR_ERR(h_file);
++ if (IS_ERR(h_file))
++ goto out; /* todo: close all? */
++
++ err = 0;
++ au_set_fbstart(file, bstart);
++ au_set_h_fptr(file, bstart, h_file);
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++
++ /* close lower files */
++ bend = au_fbend(file);
++ for (bindex = bstart + 1; bindex <= bend; bindex++)
++ au_set_h_fptr(file, bindex, NULL);
++ au_set_fbend(file, bstart);
++
++ out:
++ if (h_file_tmp)
++ fput(h_file_tmp);
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
++ struct dentry *hi_wh)
++{
++ int err;
++ aufs_bindex_t bstart;
++ struct au_dinfo *dinfo;
++ struct dentry *h_dentry;
++
++ dinfo = au_di(file->f_dentry);
++ AuRwMustWriteLock(&dinfo->di_rwsem);
++
++ bstart = dinfo->di_bstart;
++ dinfo->di_bstart = btgt;
++ h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry;
++ dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh;
++ err = au_reopen_nondir(file);
++ dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry;
++ dinfo->di_bstart = bstart;
++
++