omap3-pandora-kernel2: update
[openpandora.oe.git] / recipes / linux / omap3-pandora-kernel / aufs2 / 0007-AUFS2-Add-latest-AUFS2-in-tree-code-for-2.6.27.patch
1 From 4fafa8e8eedf21cf9aeed56f2b193110696154e2 Mon Sep 17 00:00:00 2001
2 From: David-John Willis <John.Willis@Distant-earth.com>
3 Date: Mon, 21 Dec 2009 21:36:25 +0000
4 Subject: [PATCH 7/7] AUFS2: Add latest AUFS2 in-tree code for 2.6.27.
5
6 ---
7  Documentation/ABI/testing/debugfs-aufs             |   40 +
8  Documentation/ABI/testing/sysfs-aufs               |   25 +
9  Documentation/filesystems/aufs/README              |  347 +++++
10  Documentation/filesystems/aufs/design/01intro.txt  |  137 ++
11  Documentation/filesystems/aufs/design/02struct.txt |  218 +++
12  Documentation/filesystems/aufs/design/03lookup.txt |  104 ++
13  Documentation/filesystems/aufs/design/04branch.txt |   76 +
14  .../filesystems/aufs/design/05wbr_policy.txt       |   65 +
15  .../filesystems/aufs/design/06fmode_exec.txt       |   33 +
16  Documentation/filesystems/aufs/design/07mmap.txt   |   53 +
17  Documentation/filesystems/aufs/design/08export.txt |   59 +
18  Documentation/filesystems/aufs/design/09shwh.txt   |   53 +
19  Documentation/filesystems/aufs/design/99plan.txt   |   96 ++
20  fs/Kconfig                                         |    2 +
21  fs/Makefile                                        |    1 +
22  fs/aufs/Kconfig                                    |  136 ++
23  fs/aufs/Makefile                                   |   23 +
24  fs/aufs/aufs.h                                     |   59 +
25  fs/aufs/branch.c                                   |  978 ++++++++++++
26  fs/aufs/branch.h                                   |  219 +++
27  fs/aufs/cpup.c                                     | 1048 +++++++++++++
28  fs/aufs/cpup.h                                     |   81 +
29  fs/aufs/dbgaufs.c                                  |  331 +++++
30  fs/aufs/dbgaufs.h                                  |   52 +
31  fs/aufs/dcsub.c                                    |  223 +++
32  fs/aufs/dcsub.h                                    |   54 +
33  fs/aufs/debug.c                                    |  431 ++++++
34  fs/aufs/debug.h                                    |  232 +++
35  fs/aufs/dentry.c                                   |  875 +++++++++++
36  fs/aufs/dentry.h                                   |  228 +++
37  fs/aufs/dinfo.c                                    |  367 +++++
38  fs/aufs/dir.c                                      |  579 ++++++++
39  fs/aufs/dir.h                                      |  127 ++
40  fs/aufs/export.c                                   |  745 ++++++++++
41  fs/aufs/f_op.c                                     |  810 ++++++++++
42  fs/aufs/f_op_sp.c                                  |  290 ++++
43  fs/aufs/file.c                                     |  620 ++++++++
44  fs/aufs/file.h                                     |  187 +++
45  fs/aufs/finfo.c                                    |  131 ++
46  fs/aufs/fstype.h                                   |  434 ++++++
47  fs/aufs/hinotify.c                                 |  755 ++++++++++
48  fs/aufs/i_op.c                                     |  875 +++++++++++
49  fs/aufs/i_op_add.c                                 |  658 +++++++++
50  fs/aufs/i_op_del.c                                 |  470 ++++++
51  fs/aufs/i_op_ren.c                                 |  965 ++++++++++++
52  fs/aufs/iinfo.c                                    |  283 ++++
53  fs/aufs/inode.c                                    |  414 ++++++
54  fs/aufs/inode.h                                    |  474 ++++++
55  fs/aufs/ioctl.c                                    |  127 ++
56  fs/aufs/loop.c                                     |   55 +
57  fs/aufs/loop.h                                     |   43 +
58  fs/aufs/magic.mk                                   |   66 +
59  fs/aufs/module.c                                   |  173 +++
60  fs/aufs/module.h                                   |   78 +
61  fs/aufs/opts.c                                     | 1550 ++++++++++++++++++++
62  fs/aufs/opts.h                                     |  196 +++
63  fs/aufs/plink.c                                    |  429 ++++++
64  fs/aufs/rdu.c                                      |  333 +++++
65  fs/aufs/rwsem.h                                    |  186 +++
66  fs/aufs/sbinfo.c                                   |  211 +++
67  fs/aufs/spl.h                                      |   57 +
68  fs/aufs/super.c                                    |  874 +++++++++++
69  fs/aufs/super.h                                    |  361 +++++
70  fs/aufs/sysaufs.c                                  |  104 ++
71  fs/aufs/sysaufs.h                                  |  105 ++
72  fs/aufs/sysfs.c                                    |  210 +++
73  fs/aufs/sysrq.c                                    |  118 ++
74  fs/aufs/vdir.c                                     |  884 +++++++++++
75  fs/aufs/vfsub.c                                    |  660 +++++++++
76  fs/aufs/vfsub.h                                    |  145 ++
77  fs/aufs/wbr_policy.c                               |  641 ++++++++
78  fs/aufs/whout.c                                    | 1054 +++++++++++++
79  fs/aufs/whout.h                                    |   87 ++
80  fs/aufs/wkq.c                                      |  259 ++++
81  fs/aufs/wkq.h                                      |   82 +
82  fs/aufs/xino.c                                     | 1199 +++++++++++++++
83  fs/namei.c                                         |    4 +-
84  fs/splice.c                                        |   10 +-
85  include/linux/Kbuild                               |    1 +
86  include/linux/aufs_type.h                          |  195 +++
87  include/linux/namei.h                              |    3 +
88  include/linux/splice.h                             |    6 +
89  scripts/basic/hash                                 |  Bin 0 -> 6907 bytes
90  83 files changed, 25962 insertions(+), 7 deletions(-)
91  create mode 100644 Documentation/ABI/testing/debugfs-aufs
92  create mode 100644 Documentation/ABI/testing/sysfs-aufs
93  create mode 100644 Documentation/filesystems/aufs/README
94  create mode 100644 Documentation/filesystems/aufs/design/01intro.txt
95  create mode 100644 Documentation/filesystems/aufs/design/02struct.txt
96  create mode 100644 Documentation/filesystems/aufs/design/03lookup.txt
97  create mode 100644 Documentation/filesystems/aufs/design/04branch.txt
98  create mode 100644 Documentation/filesystems/aufs/design/05wbr_policy.txt
99  create mode 100644 Documentation/filesystems/aufs/design/06fmode_exec.txt
100  create mode 100644 Documentation/filesystems/aufs/design/07mmap.txt
101  create mode 100644 Documentation/filesystems/aufs/design/08export.txt
102  create mode 100644 Documentation/filesystems/aufs/design/09shwh.txt
103  create mode 100644 Documentation/filesystems/aufs/design/99plan.txt
104  create mode 100644 fs/aufs/Kconfig
105  create mode 100644 fs/aufs/Makefile
106  create mode 100644 fs/aufs/aufs.h
107  create mode 100644 fs/aufs/branch.c
108  create mode 100644 fs/aufs/branch.h
109  create mode 100644 fs/aufs/cpup.c
110  create mode 100644 fs/aufs/cpup.h
111  create mode 100644 fs/aufs/dbgaufs.c
112  create mode 100644 fs/aufs/dbgaufs.h
113  create mode 100644 fs/aufs/dcsub.c
114  create mode 100644 fs/aufs/dcsub.h
115  create mode 100644 fs/aufs/debug.c
116  create mode 100644 fs/aufs/debug.h
117  create mode 100644 fs/aufs/dentry.c
118  create mode 100644 fs/aufs/dentry.h
119  create mode 100644 fs/aufs/dinfo.c
120  create mode 100644 fs/aufs/dir.c
121  create mode 100644 fs/aufs/dir.h
122  create mode 100644 fs/aufs/export.c
123  create mode 100644 fs/aufs/f_op.c
124  create mode 100644 fs/aufs/f_op_sp.c
125  create mode 100644 fs/aufs/file.c
126  create mode 100644 fs/aufs/file.h
127  create mode 100644 fs/aufs/finfo.c
128  create mode 100644 fs/aufs/fstype.h
129  create mode 100644 fs/aufs/hinotify.c
130  create mode 100644 fs/aufs/i_op.c
131  create mode 100644 fs/aufs/i_op_add.c
132  create mode 100644 fs/aufs/i_op_del.c
133  create mode 100644 fs/aufs/i_op_ren.c
134  create mode 100644 fs/aufs/iinfo.c
135  create mode 100644 fs/aufs/inode.c
136  create mode 100644 fs/aufs/inode.h
137  create mode 100644 fs/aufs/ioctl.c
138  create mode 100644 fs/aufs/loop.c
139  create mode 100644 fs/aufs/loop.h
140  create mode 100644 fs/aufs/magic.mk
141  create mode 100644 fs/aufs/module.c
142  create mode 100644 fs/aufs/module.h
143  create mode 100644 fs/aufs/opts.c
144  create mode 100644 fs/aufs/opts.h
145  create mode 100644 fs/aufs/plink.c
146  create mode 100644 fs/aufs/rdu.c
147  create mode 100644 fs/aufs/rwsem.h
148  create mode 100644 fs/aufs/sbinfo.c
149  create mode 100644 fs/aufs/spl.h
150  create mode 100644 fs/aufs/super.c
151  create mode 100644 fs/aufs/super.h
152  create mode 100644 fs/aufs/sysaufs.c
153  create mode 100644 fs/aufs/sysaufs.h
154  create mode 100644 fs/aufs/sysfs.c
155  create mode 100644 fs/aufs/sysrq.c
156  create mode 100644 fs/aufs/vdir.c
157  create mode 100644 fs/aufs/vfsub.c
158  create mode 100644 fs/aufs/vfsub.h
159  create mode 100644 fs/aufs/wbr_policy.c
160  create mode 100644 fs/aufs/whout.c
161  create mode 100644 fs/aufs/whout.h
162  create mode 100644 fs/aufs/wkq.c
163  create mode 100644 fs/aufs/wkq.h
164  create mode 100644 fs/aufs/xino.c
165  create mode 100644 include/linux/aufs_type.h
166  create mode 100644 scripts/basic/hash
167
168 diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs
169 new file mode 100644
170 index 0000000..4110b94
171 --- /dev/null
172 +++ b/Documentation/ABI/testing/debugfs-aufs
173 @@ -0,0 +1,40 @@
174 +What:          /debug/aufs/si_<id>/
175 +Date:          March 2009
176 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
177 +Description:
178 +               Under /debug/aufs, a directory named si_<id> is created
179 +               per aufs mount, where <id> is a unique id generated
180 +               internally.
181 +
182 +What:          /debug/aufs/si_<id>/xib
183 +Date:          March 2009
184 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
185 +Description:
186 +               It shows the consumed blocks by xib (External Inode Number
187 +               Bitmap), its block size and file size.
188 +               When the aufs mount option 'noxino' is specified, it
189 +               will be empty. About XINO files, see
190 +               Documentation/filesystems/aufs/aufs.5 in detail.
191 +
192 +What:          /debug/aufs/si_<id>/xino0, xino1 ... xinoN
193 +Date:          March 2009
194 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
195 +Description:
196 +               It shows the consumed blocks by xino (External Inode Number
197 +               Translation Table), its link count, block size and file
198 +               size.
199 +               When the aufs mount option 'noxino' is specified, it
200 +               will be empty. About XINO files, see
201 +               Documentation/filesystems/aufs/aufs.5 in detail.
202 +
203 +What:          /debug/aufs/si_<id>/xigen
204 +Date:          March 2009
205 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
206 +Description:
207 +               It shows the consumed blocks by xigen (External Inode
208 +               Generation Table), its block size and file size.
209 +               If CONFIG_AUFS_EXPORT is disabled, this entry will not
210 +               be created.
211 +               When the aufs mount option 'noxino' is specified, it
212 +               will be empty. About XINO files, see
213 +               Documentation/filesystems/aufs/aufs.5 in detail.
214 diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs
215 new file mode 100644
216 index 0000000..ca49330
217 --- /dev/null
218 +++ b/Documentation/ABI/testing/sysfs-aufs
219 @@ -0,0 +1,25 @@
220 +What:          /sys/fs/aufs/si_<id>/
221 +Date:          March 2009
222 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
223 +Description:
224 +               Under /sys/fs/aufs, a directory named si_<id> is created
225 +               per aufs mount, where <id> is a unique id generated
226 +               internally.
227 +
228 +What:          /sys/fs/aufs/si_<id>/br0, br1 ... brN
229 +Date:          March 2009
230 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
231 +Description:
232 +               It shows the abolute path of a member directory (which
233 +               is called branch) in aufs, and its permission.
234 +
235 +What:          /sys/fs/aufs/si_<id>/xi_path
236 +Date:          March 2009
237 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
238 +Description:
239 +               It shows the abolute path of XINO (External Inode Number
240 +               Bitmap, Translation Table and Generation Table) file
241 +               even if it is the default path.
242 +               When the aufs mount option 'noxino' is specified, it
243 +               will be empty. About XINO files, see
244 +               Documentation/filesystems/aufs/aufs.5 in detail.
245 diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
246 new file mode 100644
247 index 0000000..2fbd3e6
248 --- /dev/null
249 +++ b/Documentation/filesystems/aufs/README
250 @@ -0,0 +1,347 @@
251 +
252 +Aufs2 -- advanced multi layered unification filesystem version 2
253 +http://aufs.sf.net
254 +Junjiro R. Okajima
255 +
256 +
257 +0. Introduction
258 +----------------------------------------
259 +In the early days, aufs was entirely re-designed and re-implemented
260 +Unionfs Version 1.x series. After many original ideas, approaches,
261 +improvements and implementations, it becomes totally different from
262 +Unionfs while keeping the basic features.
263 +Recently, Unionfs Version 2.x series begin taking some of the same
264 +approaches to aufs1's.
265 +Unionfs is being developed by Professor Erez Zadok at Stony Brook
266 +University and his team.
267 +
268 +This version of AUFS, aufs2 has several purposes.
269 +- to be reviewed easily and widely.
270 +- to make the source files simpler and smaller by dropping several
271 +  original features.
272 +
273 +Through this work, I found some bad things in aufs1 source code and
274 +fixed them. Some of the dropped features will be reverted in the future,
275 +but not all I'm afraid.
276 +Aufs2 supports linux-2.6.27 and later. If you want older kernel version
277 +support, try aufs1 from CVS on SourceForge.
278 +
279 +Note: it becomes clear that "Aufs was rejected. Let's give it up."
280 +According to Christoph Hellwig, linux rejects all union-type filesystems
281 +but UnionMount.
282 +<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
283 +
284 +
285 +1. Features
286 +----------------------------------------
287 +- unite several directories into a single virtual filesystem. The member
288 +  directory is called as a branch.
289 +- you can specify the permission flags to the branch, which are 'readonly',
290 +  'readwrite' and 'whiteout-able.'
291 +- by upper writable branch, internal copyup and whiteout, files/dirs on
292 +  readonly branch are modifiable logically.
293 +- dynamic branch manipulation, add, del.
294 +- etc...
295 +
296 +Also there are many enhancements in aufs1, such as:
297 +- readdir(3) in userspace.
298 +- keep inode number by external inode number table
299 +- keep the timestamps of file/dir in internal copyup operation
300 +- seekable directory, supporting NFS readdir.
301 +- support mmap(2) including /proc/PID/exe symlink, without page-copy
302 +- whiteout is hardlinked in order to reduce the consumption of inodes
303 +  on branch
304 +- do not copyup, nor create a whiteout when it is unnecessary
305 +- revert a single systemcall when an error occurs in aufs
306 +- remount interface instead of ioctl
307 +- maintain /etc/mtab by an external command, /sbin/mount.aufs.
308 +- loopback mounted filesystem as a branch
309 +- kernel thread for removing the dir who has a plenty of whiteouts
310 +- support copyup sparse file (a file which has a 'hole' in it)
311 +- default permission flags for branches
312 +- selectable permission flags for ro branch, whether whiteout can
313 +  exist or not
314 +- export via NFS.
315 +- support <sysfs>/fs/aufs and <debugfs>/aufs.
316 +- support multiple writable branches, some policies to select one
317 +  among multiple writable branches.
318 +- a new semantics for link(2) and rename(2) to support multiple
319 +  writable branches.
320 +- no glibc changes are required.
321 +- pseudo hardlink (hardlink over branches)
322 +- allow a direct access manually to a file on branch, e.g. bypassing aufs.
323 +  including NFS or remote filesystem branch.
324 +- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
325 +- and more...
326 +
327 +Currently these features are dropped temporary from this version, aufs2.
328 +See design/08plan.txt in detail.
329 +- test only the highest one for the directory permission (dirperm1)
330 +- show whiteout mode (shwh)
331 +- copyup on open (coo=)
332 +- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
333 +  (robr)
334 +- statistics of aufs thread (/sys/fs/aufs/stat)
335 +- delegation mode (dlgt)
336 +  a delegation of the internal branch access to support task I/O
337 +  accounting, which also supports Linux Security Modules (LSM) mainly
338 +  for Suse AppArmor.
339 +- intent.open/create (file open in a single lookup)
340 +
341 +Features or just an idea in the future (see also design/*.txt),
342 +- reorder the branch index without del/re-add.
343 +- permanent xino files for NFSD
344 +- an option for refreshing the opened files after add/del branches
345 +- 'move' policy for copy-up between two writable branches, after
346 +  checking free space.
347 +- O_DIRECT
348 +- light version, without branch manipulation. (unnecessary?)
349 +- copyup in userspace
350 +- inotify in userspace
351 +- readv/writev
352 +- xattr, acl
353 +
354 +
355 +2. Download
356 +----------------------------------------
357 +Kindly one of aufs user, the Center for Scientific Computing and Free
358 +Software (C3SL), Federal University of Parana offered me a public GIT
359 +tree space.
360 +
361 +There are three GIT trees, aufs2-2.6, aufs2-standalone and aufs2-util.
362 +While the aufs2-util is always necessary, you need either of aufs2-2.6
363 +or aufs2-standalone.
364 +
365 +The aufs2-2.6 tree includes the whole linux-2.6 GIT tree,
366 +git://git.kernel.org/.../torvalds/linux-2.6.git.
367 +And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
368 +build aufs2 as an externel kernel module.
369 +If you already have linux-2.6 GIT tree, you may want to pull and merge
370 +the "aufs2" branch from this tree.
371 +
372 +On the other hand, the aufs2-standalone tree has only aufs2 source files
373 +and a necessary patch, and you can select CONFIG_AUFS_FS=m. In other
374 +words, the aufs2-standalone tree is generated from aufs2-2.6 tree by,
375 +- extract new files and modifications.
376 +- generate some patch files from modifications.
377 +- generate a ChangeLog file from git-log.
378 +- commit the files newly and no log messages. this is not git-pull.
379 +
380 +Both of aufs2-2.6 and aufs2-standalone trees have a branch whose name is
381 +in form of "aufs2-xx" where "xx" represents the linux kernel version,
382 +"linux-2.6.xx".
383 +
384 +o aufs2-2.6 tree
385 +$ git clone --reference /your/linux-2.6/git/tree \
386 +       http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git \
387 +       aufs2-2.6.git
388 +- if you don't have linux-2.6 GIT tree, then remove "--reference ..."
389 +$ cd aufs2-2.6.git
390 +$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
391 +                               # aufs2 (no -xx) for the latest -rc version.
392 +
393 +o aufs2-standalone tree
394 +$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-standalone.git \
395 +       aufs2-standalone.git
396 +$ cd aufs2-standalone.git
397 +$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
398 +                               # aufs2 (no -xx) for the latest -rc version.
399 +
400 +o aufs2-util tree
401 +$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-util.git \
402 +       aufs2-util.git
403 +$ cd aufs2-util.git
404 +- no particular tag/branch currently.
405 +
406 +o for advanced users
407 +$ git clone git://git.kernel.org/.../torvalds/linux-2.6.git linux-2.6.git
408 +  It will take very long time.
409 +
410 +$ cd linux-2.6.git
411 +$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
412 +$ git checkout -b aufs2-27 v2.6.27
413 +$ git pull aufs2 aufs2-27
414 +  It may take long time again.
415 +  Once pulling completes, you've got linux-2.6.27 and aufs2 for it in a
416 +  branch named aufs2-27, and you can configure and build it.
417 +
418 +Or
419 +
420 +$ git checkout -t -b aufs2 master
421 +$ git pull aufs2 aufs2
422 +  then you've got the latest linux kernel and the latest aufs2 in a
423 +  branch named aufs2, and you can configure and build it.
424 +  But aufs is released once a week, so you may meet a compilation error
425 +  due to mismatching between the mainline and aufs2.
426 +
427 +Or you may want build linux-2.6.xx.yy instead of linux-2.6.xx, then here
428 +is an approach using linux-2.6-stable GIT tree.
429 +
430 +$ cd linux-2.6.git/..
431 +$ git clone -q --reference ./linux-2.6.git git://git.kernel.org/.../linux-2.6-stable.git \
432 +       linux-2.6-stable.git
433 +  It will take very long time.
434 +
435 +$ cd linux-2.6-stable.git
436 +$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
437 +$ git checkout -b aufs2-27.1 v2.6.27.1
438 +$ git pull aufs2 aufs2-27
439 +  then you've got linux-2.6.27.1 and aufs2 for 2.6.27 in a branch named
440 +  aufs2-27.1, and you can configure and build it.
441 +  But the changes made by v2.6.xx.yy may conflict with aufs2-xx, since
442 +  aufs2-xx is for v2.6.xx only. In this case, you may find some patchces
443 +  for v2.6.xx.yy in aufs2-standalone.git#aufs2-xx branch if someone else
444 +  have ever requested me to support v2.6.xx.yy and I did it.
445 +
446 +You can also check what was changed by pulling aufs2.
447 +$ git diff v2.6.27.1..aufs2-27.1
448 +
449 +If you want to check the changed files other than fs/aufs, then try this.
450 +$ git diff v2.6.27.1..aufs2-27.1 |
451 +> awk '
452 +> /^diff / {new=1}
453 +> /^diff.*aufs/ {new=0}
454 +> new {print}
455 +> '
456 +
457 +
458 +3. Configuration and Compilation
459 +----------------------------------------
460 +Make sure you have git-checkout'ed the correct branch.
461 +
462 +For aufs2-2.6 tree,
463 +- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
464 +- set other aufs configurations if necessary.
465 +
466 +For aufs2-standalone tree,
467 +There are several ways to build.
468 +
469 +You may feel why aufs2-standalone.patch needs to export so many kernel
470 +symbols. Because you selected aufs2-standalone tree instead of aufs2-2.6
471 +tree. The number of necessary symbols to export essentially is zero.
472 +All other symbols are for the external module.
473 +If you don't like aufs2-standalone.patch, then try aufs2-2.6 tree.
474 +
475 +1.
476 +- apply ./aufs2-kbuild.patch to your kernel source files.
477 +- apply ./aufs2-base.patch too.
478 +- apply ./aufs2-standalone.patch too, if you have a plan to set
479 +  CONFIG_AUFS_FS=m. otherwise you don't need ./aufs2-standalone.patch.
480 +- copy ./{Documentation,fs,include} files to your kernel source tree.
481 +- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
482 +  =m or =y.
483 +- and build your kernel as usual.
484 +- install it and reboot your system.
485 +
486 +2.
487 +- module only (CONFIG_AUFS_FS=m).
488 +- apply ./aufs2-base.patch to your kernel source files.
489 +- apply ./aufs2-standalone.patch too.
490 +- build your kernel and reboot.
491 +- edit ./config.mk and set other aufs configurations if necessary.
492 +  Note: You should read ./fs/aufs/Kconfig carefully which describes
493 +  every aufs configurations.
494 +- build the module by simple "make".
495 +- you can specify ${KDIR} make variable which points to your kernel
496 +  source tree.
497 +- copy the build ./aufs.ko to /lib/modules/..., and run depmod -a (or
498 +  reboot simply).
499 +- no need to apply aufs2-kbuild.patch, nor copying source files to your
500 +  kernel source tree.
501 +
502 +And then,
503 +- read README in aufs2-util, build and install it
504 +- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
505 +  then run "make install_ulib" too. And refer to the aufs manual in
506 +  detail.
507 +
508 +
509 +4. Usage
510 +----------------------------------------
511 +At first, make sure aufs2-util are installed, and please read the aufs
512 +manual, aufs.5 in aufs2-util.git tree.
513 +$ man -l aufs.5
514 +
515 +And then,
516 +$ mkdir /tmp/rw /tmp/aufs
517 +# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
518 +
519 +Here is another example. The result is equivalent.
520 +# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
521 +  Or
522 +# mount -t aufs -o br:/tmp/rw none /tmp/aufs
523 +# mount -o remount,append:${HOME} /tmp/aufs
524 +
525 +Then, you can see whole tree of your home dir through /tmp/aufs. If
526 +you modify a file under /tmp/aufs, the one on your home directory is
527 +not affected, instead the same named file will be newly created under
528 +/tmp/rw. And all of your modification to a file will be applied to
529 +the one under /tmp/rw. This is called the file based Copy on Write
530 +(COW) method.
531 +Aufs mount options are described in aufs.5.
532 +
533 +Additionally, there are some sample usages of aufs which are a
534 +diskless system with network booting, and LiveCD over NFS.
535 +See sample dir in CVS tree on SourceForge.
536 +
537 +
538 +5. Contact
539 +----------------------------------------
540 +When you have any problems or strange behaviour in aufs, please let me
541 +know with:
542 +- /proc/mounts (instead of the output of mount(8))
543 +- /sys/module/aufs/*
544 +- /sys/fs/aufs/* (if you have them)
545 +- /debug/aufs/* (if you have them)
546 +- linux kernel version
547 +  if your kernel is not plain, for example modified by distributor,
548 +  the url where i can download its source is necessary too.
549 +- aufs version which was printed at loading the module or booting the
550 +  system, instead of the date you downloaded.
551 +- configuration (define/undefine CONFIG_AUFS_xxx)
552 +- kernel configuration or /proc/config.gz (if you have it)
553 +- behaviour which you think to be incorrect
554 +- actual operation, reproducible one is better
555 +- mailto: aufs-users at lists.sourceforge.net
556 +
557 +Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
558 +and Feature Requests) on SourceForge. Please join and write to
559 +aufs-users ML.
560 +
561 +
562 +6. Acknowledgements
563 +----------------------------------------
564 +Thanks to everyone who have tried and are using aufs, whoever
565 +have reported a bug or any feedback.
566 +
567 +Especially donors:
568 +Tomas Matejicek(slax.org) made a donation (much more than once).
569 +Dai Itasaka made a donation (2007/8).
570 +Chuck Smith made a donation (2008/4, 10 and 12).
571 +Henk Schoneveld made a donation (2008/9).
572 +Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
573 +Francois Dupoux made a donation (2008/11).
574 +Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
575 +aufs2 GIT tree (2009/2).
576 +William Grant made a donation (2009/3).
577 +Patrick Lane made a donation (2009/4).
578 +The Mail Archive (mail-archive.com) made donations (2009/5).
579 +Nippy Networks (Ed Wildgoose) made a donation (2009/7).
580 +New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
581 +
582 +Thank you very much.
583 +Donations are always, including future donations, very important and
584 +helpful for me to keep on developing aufs.
585 +
586 +
587 +7.
588 +----------------------------------------
589 +If you are an experienced user, no explanation is needed. Aufs is
590 +just a linux filesystem.
591 +
592 +
593 +Enjoy!
594 +
595 +# Local variables: ;
596 +# mode: text;
597 +# End: ;
598 diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt
599 new file mode 100644
600 index 0000000..ac678c0
601 --- /dev/null
602 +++ b/Documentation/filesystems/aufs/design/01intro.txt
603 @@ -0,0 +1,137 @@
604 +
605 +# Copyright (C) 2005-2009 Junjiro R. Okajima
606 +# 
607 +# This program is free software; you can redistribute it and/or modify
608 +# it under the terms of the GNU General Public License as published by
609 +# the Free Software Foundation; either version 2 of the License, or
610 +# (at your option) any later version.
611 +# 
612 +# This program is distributed in the hope that it will be useful,
613 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
614 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
615 +# GNU General Public License for more details.
616 +# 
617 +# You should have received a copy of the GNU General Public License
618 +# along with this program; if not, write to the Free Software
619 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
620 +
621 +Introduction
622 +----------------------------------------
623 +
624 +aufs [ei ju: ef es] | [a u f s]
625 +1. abbrev. for "advanced multi-layered unification filesystem".
626 +2. abbrev. for "another unionfs".
627 +3. abbrev. for "auf das" in German which means "on the" in English.
628 +   Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
629 +   But "Filesystem aufs Filesystem" is hard to understand.
630 +
631 +AUFS is a filesystem with features:
632 +- multi layered stackable unification filesystem, the member directory
633 +  is called as a branch.
634 +- branch permission and attribute, 'readonly', 'real-readonly',
635 +  'readwrite', 'whiteout-able', 'link-able whiteout' and their
636 +  combination.
637 +- internal "file copy-on-write".
638 +- logical deletion, whiteout.
639 +- dynamic branch manipulation, adding, deleting and changing permission.
640 +- allow bypassing aufs, user's direct branch access.
641 +- external inode number translation table and bitmap which maintains the
642 +  persistent aufs inode number.
643 +- seekable directory, including NFS readdir.
644 +- file mapping, mmap and sharing pages.
645 +- pseudo-link, hardlink over branches.
646 +- loopback mounted filesystem as a branch.
647 +- several policies to select one among multiple writable branches.
648 +- revert a single systemcall when an error occurs in aufs.
649 +- and more...
650 +
651 +
652 +Multi Layered Stackable Unification Filesystem
653 +----------------------------------------------------------------------
654 +Most people already knows what it is.
655 +It is a filesystem which unifies several directories and provides a
656 +merged single directory. When users access a file, the access will be
657 +passed/re-directed/converted (sorry, I am not sure which English word is
658 +correct) to the real file on the member filesystem. The member
659 +filesystem is called 'lower filesystem' or 'branch' and has a mode
660 +'readonly' and 'readwrite.' And the deletion for a file on the lower
661 +readonly branch is handled by creating 'whiteout' on the upper writable
662 +branch.
663 +
664 +On LKML, there have been discussions about UnionMount (Jan Blunck and
665 +Bharata B Rao) and Unionfs (Erez Zadok). They took different approaches
666 +to implement the merged-view.
667 +The former tries putting it into VFS, and the latter implements as a
668 +separate filesystem.
669 +(If I misunderstand about these implementations, please let me know and
670 +I shall correct it. Because it is a long time ago when I read their
671 +source files last time).
672 +UnionMount's approach will be able to small, but may be hard to share
673 +branches between several UnionMount since the whiteout in it is
674 +implemented in the inode on branch filesystem and always
675 +shared. According to Bharata's post, readdir does not seems to be
676 +finished yet.
677 +Unionfs has a longer history. When I started implementing a stacking filesystem
678 +(Aug 2005), it already existed. It has virtual super_block, inode,
679 +dentry and file objects and they have an array pointing lower same kind
680 +objects. After contributing many patches for Unionfs, I re-started my
681 +project AUFS (Jun 2006).
682 +
683 +In AUFS, the structure of filesystem resembles to Unionfs, but I
684 +implemented my own ideas, approaches and enhancements and it became
685 +totally different one.
686 +
687 +
688 +Several characters/aspects of aufs
689 +----------------------------------------------------------------------
690 +
691 +Aufs has several characters or aspects.
692 +1. a filesystem, callee of VFS helper
693 +2. sub-VFS, caller of VFS helper for branches
694 +3. a virtual filesystem which maintains persistent inode number
695 +4. reader/writer of files on branches such like an application
696 +
697 +1. Caller of VFS Helper
698 +As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
699 +unlink(2) from an application reaches sys_unlink() kernel function and
700 +then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
701 +calls filesystem specific unlink operation. Actually aufs implements the
702 +unlink operation but it behaves like a redirector.
703 +
704 +2. Caller of VFS Helper for Branches
705 +aufs_unlink() passes the unlink request to the branch filesystem as if
706 +it were called from VFS. So the called unlink operation of the branch
707 +filesystem acts as usual. As a caller of VFS helper, aufs should handle
708 +every necessary pre/post operation for the branch filesystem.
709 +- acquire the lock for the parent dir on a branch
710 +- lookup in a branch
711 +- revalidate dentry on a branch
712 +- mnt_want_write() for a branch
713 +- vfs_unlink() for a branch
714 +- mnt_drop_write() for a branch
715 +- release the lock on a branch
716 +
717 +3. Persistent Inode Number
718 +One of the most important issue for a filesystem is to maintain inode
719 +numbers. This is particularly important to support exporting a
720 +filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
721 +backend block device for its own. But some storage is necessary to
722 +maintain inode number. It may be a large space and may not suit to keep
723 +in memory. Aufs rents some space from its first writable branch
724 +filesystem (by default) and creates file(s) on it. These files are
725 +created by aufs internally and removed soon (currently) keeping opened.
726 +Note: Because these files are removed, they are totally gone after
727 +      unmounting aufs. It means the inode numbers are not persistent
728 +      across unmount or reboot. I have a plan to make them really
729 +      persistent which will be important for aufs on NFS server.
730 +
731 +4. Read/Write Files Internally (copy-on-write)
732 +Because a branch can be readonly, when you write a file on it, aufs will
733 +"copy-up" it to the upper writable branch internally. And then write the
734 +originally requested thing to the file. Generally kernel doesn't
735 +open/read/write file actively. In aufs, even a single write may cause a
736 +internal "file copy". This behaviour is very similar to cp(1) command.
737 +
738 +Some people may think it is better to pass such work to user space
739 +helper, instead of doing in kernel space. Actually I am still thinking
740 +about it. But currently I have implemented it in kernel space.
741 diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt
742 new file mode 100644
743 index 0000000..11cee07
744 --- /dev/null
745 +++ b/Documentation/filesystems/aufs/design/02struct.txt
746 @@ -0,0 +1,218 @@
747 +
748 +# Copyright (C) 2005-2009 Junjiro R. Okajima
749 +# 
750 +# This program is free software; you can redistribute it and/or modify
751 +# it under the terms of the GNU General Public License as published by
752 +# the Free Software Foundation; either version 2 of the License, or
753 +# (at your option) any later version.
754 +# 
755 +# This program is distributed in the hope that it will be useful,
756 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
757 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
758 +# GNU General Public License for more details.
759 +# 
760 +# You should have received a copy of the GNU General Public License
761 +# along with this program; if not, write to the Free Software
762 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
763 +
764 +Basic Aufs Internal Structure
765 +
766 +Superblock/Inode/Dentry/File Objects
767 +----------------------------------------------------------------------
768 +As like an ordinary filesystem, aufs has its own
769 +superblock/inode/dentry/file objects. All these objects have a
770 +dynamically allocated array and store the same kind of pointers to the
771 +lower filesystem, branch.
772 +For example, when you build a union with one readwrite branch and one
773 +readonly, mounted /au, /rw and /ro respectively.
774 +- /au = /rw + /ro
775 +- /ro/fileA exists but /rw/fileA
776 +
777 +Aufs lookup operation finds /ro/fileA and gets dentry for that. These
778 +pointers are stored in a aufs dentry. The array in aufs dentry will be,
779 +- [0] = NULL
780 +- [1] = /ro/fileA
781 +
782 +This style of an array is essentially same to the aufs
783 +superblock/inode/dentry/file objects.
784 +
785 +Because aufs supports manipulating branches, ie. add/delete/change
786 +dynamically, these objects has its own generation. When branches are
787 +changed, the generation in aufs superblock is incremented. And a
788 +generation in other object are compared when it is accessed.
789 +When a generation in other objects are obsoleted, aufs refreshes the
790 +internal array.
791 +
792 +
793 +Superblock
794 +----------------------------------------------------------------------
795 +Additionally aufs superblock has some data for policies to select one
796 +among multiple writable branches, XIB files, pseudo-links and kobject.
797 +See below in detail.
798 +About the policies which supports copy-down a directory, see policy.txt
799 +too.
800 +
801 +
802 +Branch and XINO(External Inode Number Translation Table)
803 +----------------------------------------------------------------------
804 +Every branch has its own xino (external inode number translation table)
805 +file. The xino file is created and unlinked by aufs internally. When two
806 +members of a union exist on the same filesystem, they share the single
807 +xino file.
808 +The struct of a xino file is simple, just a sequence of aufs inode
809 +numbers which is indexed by the lower inode number.
810 +In the above sample, assume the inode number of /ro/fileA is i111 and
811 +aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
812 +4(8) bytes at 111 * 4(8) bytes offset in the xino file.
813 +
814 +When the inode numbers are not contiguous, the xino file will be sparse
815 +which has a hole in it and doesn't consume as much disk space as it
816 +might appear. If your branch filesystem consumes disk space for such
817 +holes, then you should specify 'xino=' option at mounting aufs.
818 +
819 +Also a writable branch has three kinds of "whiteout bases". All these
820 +are existed when the branch is joined to aufs and the names are
821 +whiteout-ed doubly, so that users will never see their names in aufs
822 +hierarchy.
823 +1. a regular file which will be linked to all whiteouts.
824 +2. a directory to store a pseudo-link.
825 +3. a directory to store an "orphan-ed" file temporary.
826 +
827 +1. Whiteout Base
828 +   When you remove a file on a readonly branch, aufs handles it as a
829 +   logical deletion and creates a whiteout on the upper writable branch
830 +   as a hardlink of this file in order not to consume inode on the
831 +   writable branch.
832 +2. Pseudo-link Dir
833 +   See below, Pseudo-link.
834 +3. Step-Parent Dir
835 +   When "fileC" exists on the lower readonly branch only and it is
836 +   opened and removed with its parent dir, and then user writes
837 +   something into it, then aufs copies-up fileC to this
838 +   directory. Because there is no other dir to store fileC. After
839 +   creating a file under this dir, the file is unlinked.
840 +
841 +Because aufs supports manipulating branches, ie. add/delete/change
842 +dynamically, a branch has its own id. When the branch order changes, aufs
843 +finds the new index by searching the branch id.
844 +
845 +
846 +Pseudo-link
847 +----------------------------------------------------------------------
848 +Assume "fileA" exists on the lower readonly branch only and it is
849 +hardlinked to "fileB" on the branch. When you write something to fileA,
850 +aufs copies-up it to the upper writable branch. Additionally aufs
851 +creates a hardlink under the Pseudo-link Directory of the writable
852 +branch. The inode of a pseudo-link is kept in aufs super_block as a
853 +simple list. If fileB is read after unlinking fileA, aufs returns
854 +filedata from the pseudo-link instead of the lower readonly
855 +branch. Because the pseudo-link is based upon the inode, to keep the
856 +inode number by xino (see above) is important.
857 +
858 +All the hardlinks under the Pseudo-link Directory of the writable branch
859 +should be restored in a proper location later. Aufs provides a utility
860 +to do this. The userspace helpers executed at remounting and unmounting
861 +aufs by default.
862 +
863 +
864 +XIB(external inode number bitmap)
865 +----------------------------------------------------------------------
866 +Addition to the xino file per a branch, aufs has an external inode number
867 +bitmap in a superblock object. It is also a file such like a xino file.
868 +It is a simple bitmap to mark whether the aufs inode number is in-use or
869 +not.
870 +To reduce the file I/O, aufs prepares a single memory page to cache xib.
871 +
872 +Aufs implements a feature to truncate/refresh both of xino and xib to
873 +reduce the number of consumed disk blocks for these files.
874 +
875 +
876 +Virtual or Vertical Dir
877 +----------------------------------------------------------------------
878 +In order to support multiple layers (branches), aufs readdir operation
879 +constructs a virtual dir block on memory. For readdir, aufs calls
880 +vfs_readdir() internally for each dir on branches, merges their entries
881 +with eliminating the whiteout-ed ones, and sets it to file (dir)
882 +object. So the file object has its entry list until it is closed. The
883 +entry list will be updated when the file position is zero and becomes
884 +old. This decision is made in aufs automatically.
885 +
886 +The dynamically allocated memory block for the name of entries has a
887 +unit of 512 bytes (by default) and stores the names contiguously (no
888 +padding). Another block for each entry is handled by kmem_cache too.
889 +During building dir blocks, aufs creates hash list and judging whether
890 +the entry is whiteouted by its upper branch or already listed.
891 +
892 +Some people may call it can be a security hole or invite DoS attack
893 +since the opened and once readdir-ed dir (file object) holds its entry
894 +list and becomes a pressure for system memory. But I'd say it is similar
895 +to files under /proc or /sys. The virtual files in them also holds a
896 +memory page (generally) while they are opened. When an idea to reduce
897 +memory for them is introduced, it will be applied to aufs too.
898 +For those who really hate this situation, I've developed readdir(3)
899 +library which operates this merging in userspace. You just need to set
900 +LD_PRELOAD environment variable, and aufs will not consume no memory in
901 +kernel space for readdir(3).
902 +
903 +
904 +Workqueue
905 +----------------------------------------------------------------------
906 +Aufs sometimes requires privilege access to a branch. For instance,
907 +in copy-up/down operation. When a user process is going to make changes
908 +to a file which exists in the lower readonly branch only, and the mode
909 +of one of ancestor directories may not be writable by a user
910 +process. Here aufs copy-up the file with its ancestors and they may
911 +require privilege to set its owner/group/mode/etc.
912 +This is a typical case of a application character of aufs (see
913 +Introduction).
914 +
915 +Aufs uses workqueue synchronously for this case. It creates its own
916 +workqueue. The workqueue is a kernel thread and has privilege. Aufs
917 +passes the request to call mkdir or write (for example), and wait for
918 +its completion. This approach solves a problem of a signal handler
919 +simply.
920 +If aufs didn't adopt the workqueue and changed the privilege of the
921 +process, and if the mkdir/write call arises SIGXFSZ or other signal,
922 +then the user process might gain a privilege or the generated core file
923 +was owned by a superuser. But I have a plan to switch to a new
924 +credential approach which will be introduced in linux-2.6.29.
925 +
926 +Also aufs uses the system global workqueue ("events" kernel thread) too
927 +for asynchronous tasks, such like handling inotify, re-creating a
928 +whiteout base and etc. This is unrelated to a privilege.
929 +Most of aufs operation tries acquiring a rw_semaphore for aufs
930 +superblock at the beginning, at the same time waits for the completion
931 +of all queued asynchronous tasks.
932 +
933 +
934 +Whiteout
935 +----------------------------------------------------------------------
936 +The whiteout in aufs is very similar to Unionfs's. That is represented
937 +by its filename. UnionMount takes an approach of a file mode, but I am
938 +afraid several utilities (find(1) or something) will have to support it.
939 +
940 +Basically the whiteout represents "logical deletion" which stops aufs to
941 +lookup further, but also it represents "dir is opaque" which also stop
942 +lookup.
943 +
944 +In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
945 +In order to make several functions in a single systemcall to be
946 +revertible, aufs adopts an approach to rename a directory to a temporary
947 +unique whiteouted name.
948 +For example, in rename(2) dir where the target dir already existed, aufs
949 +renames the target dir to a temporary unique whiteouted name before the
950 +actual rename on a branch and then handles other actions (make it opaque,
951 +update the attributes, etc). If an error happens in these actions, aufs
952 +simply renames the whiteouted name back and returns an error. If all are
953 +succeeded, aufs registers a function to remove the whiteouted unique
954 +temporary name completely and asynchronously to the system global
955 +workqueue.
956 +
957 +
958 +Copy-up
959 +----------------------------------------------------------------------
960 +It is a well-known feature or concept.
961 +When user modifies a file on a readonly branch, aufs operate "copy-up"
962 +internally and makes change to the new file on the upper writable branch.
963 +When the trigger systemcall does not update the timestamps of the parent
964 +dir, aufs reverts it after copy-up.
965 diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt
966 new file mode 100644
967 index 0000000..7510fdb
968 --- /dev/null
969 +++ b/Documentation/filesystems/aufs/design/03lookup.txt
970 @@ -0,0 +1,104 @@
971 +
972 +# Copyright (C) 2005-2009 Junjiro R. Okajima
973 +# 
974 +# This program is free software; you can redistribute it and/or modify
975 +# it under the terms of the GNU General Public License as published by
976 +# the Free Software Foundation; either version 2 of the License, or
977 +# (at your option) any later version.
978 +# 
979 +# This program is distributed in the hope that it will be useful,
980 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
981 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
982 +# GNU General Public License for more details.
983 +# 
984 +# You should have received a copy of the GNU General Public License
985 +# along with this program; if not, write to the Free Software
986 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
987 +
988 +Lookup in a Branch
989 +----------------------------------------------------------------------
990 +Since aufs has a character of sub-VFS (see Introduction), it operates
991 +lookup for branches as VFS does. It may be a heavy work. Generally
992 +speaking struct nameidata is a bigger structure and includes many
993 +information. But almost all lookup operation in aufs is the simplest
994 +case, ie. lookup only an entry directly connected to its parent. Digging
995 +down the directory hierarchy is unnecessary.
996 +
997 +VFS has a function lookup_one_len() for that use, but it is not usable
998 +for a branch filesystem which requires struct nameidata. So aufs
999 +implements a simple lookup wrapper function. When a branch filesystem
1000 +allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
1001 +a simplest nameidata and calls lookup_hash().
1002 +Here aufs applies "a principle in NFSD", ie. if the filesystem supports
1003 +NFS-export, then it has to support NULL as a nameidata parameter for
1004 +->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
1005 +aufs tests if ->s_export_op in the branch is NULL or not.
1006 +
1007 +When a branch is a remote filesystem, aufs trusts its ->d_revalidate().
1008 +For d_revalidate, aufs implements three levels of revalidate tests. See
1009 +"Revalidate Dentry and UDBA" in detail.
1010 +
1011 +
1012 +Loopback Mount
1013 +----------------------------------------------------------------------
1014 +Basically aufs supports any type of filesystem and block device for a
1015 +branch (actually there are some exceptions). But it is prohibited to add
1016 +a loopback mounted one whose backend file exists in a filesystem which is
1017 +already added to aufs. The reason is to protect aufs from a recursive
1018 +lookup. If it was allowed, the aufs lookup operation might re-enter a
1019 +lookup for the loopback mounted branch in the same context, and will
1020 +cause a deadlock.
1021 +
1022 +
1023 +Revalidate Dentry and UDBA (User's Direct Branch Access)
1024 +----------------------------------------------------------------------
1025 +Generally VFS helpers re-validate a dentry as a part of lookup.
1026 +0. digging down the directory hierarchy.
1027 +1. lock the parent dir by its i_mutex.
1028 +2. lookup the final (child) entry.
1029 +3. revalidate it.
1030 +4. call the actual operation (create, unlink, etc.)
1031 +5. unlock the parent dir
1032 +
1033 +If the filesystem implements its ->d_revalidate() (step 3), then it is
1034 +called. Actually aufs implements it and checks the dentry on a branch is
1035 +still valid.
1036 +But it is not enough. Because aufs has to release the lock for the
1037 +parent dir on a branch at the end of ->lookup() (step 2) and
1038 +->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1039 +held by VFS.
1040 +If the file on a branch is changed directly, eg. bypassing aufs, after
1041 +aufs released the lock, then the subsequent operation may cause
1042 +something unpleasant result.
1043 +
1044 +This situation is a result of VFS architecture, ->lookup() and
1045 +->d_revalidate() is separated. But I never say it is wrong. It is a good
1046 +design from VFS's point of view. It is just not suitable for sub-VFS
1047 +character in aufs.
1048 +
1049 +Aufs supports such case by three level of revalidation which is
1050 +selectable by user.
1051 +1. Simple Revalidate
1052 +   Addition to the native flow in VFS's, confirm the child-parent
1053 +   relationship on the branch just after locking the parent dir on the
1054 +   branch in the "actual operation" (step 4). When this validation
1055 +   fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1056 +   checks the validation of the dentry on branches.
1057 +2. Monitor Changes Internally by Inotify
1058 +   Addition to above, in the "actual operation" (step 4) aufs re-lookup
1059 +   the dentry on the branch, and returns EBUSY if it finds different
1060 +   dentry.
1061 +   Additionally, aufs sets the inotify watch for every dir on branches
1062 +   during it is in cache. When the event is notified, aufs registers a
1063 +   function to kernel 'events' thread by schedule_work(). And the
1064 +   function sets some special status to the cached aufs dentry and inode
1065 +   private data. If they are not cached, then aufs has nothing to
1066 +   do. When the same file is accessed through aufs (step 0-3) later,
1067 +   aufs will detect the status and refresh all necessary data.
1068 +   In this mode, aufs has to ignore the event which is fired by aufs
1069 +   itself.
1070 +3. No Extra Validation
1071 +   This is the simplest test and doesn't add any additional revalidation
1072 +   test, and skip therevalidatin in step 4. It is useful and improves
1073 +   aufs performance when system surely hide the aufs branches from user,
1074 +   by over-mounting something (or another method).
1075 diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt
1076 new file mode 100644
1077 index 0000000..5529d6a
1078 --- /dev/null
1079 +++ b/Documentation/filesystems/aufs/design/04branch.txt
1080 @@ -0,0 +1,76 @@
1081 +
1082 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1083 +# 
1084 +# This program is free software; you can redistribute it and/or modify
1085 +# it under the terms of the GNU General Public License as published by
1086 +# the Free Software Foundation; either version 2 of the License, or
1087 +# (at your option) any later version.
1088 +# 
1089 +# This program is distributed in the hope that it will be useful,
1090 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1091 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1092 +# GNU General Public License for more details.
1093 +# 
1094 +# You should have received a copy of the GNU General Public License
1095 +# along with this program; if not, write to the Free Software
1096 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1097 +
1098 +Branch Manipulation
1099 +
1100 +Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1101 +and changing its permission/attribute, there are a lot of works to do.
1102 +
1103 +
1104 +Add a Branch
1105 +----------------------------------------------------------------------
1106 +o Confirm the adding dir exists outside of aufs, including loopback
1107 +  mount.
1108 +- and other various attributes...
1109 +o Initialize the xino file and whiteout bases if necessary.
1110 +  See struct.txt.
1111 +
1112 +o Check the owner/group/mode of the directory
1113 +  When the owner/group/mode of the adding directory differs from the
1114 +  existing branch, aufs issues a warning because it may impose a
1115 +  security risk.
1116 +  For example, when a upper writable branch has a world writable empty
1117 +  top directory, a malicious user can create any files on the writable
1118 +  branch directly, like copy-up and modify manually. If something like
1119 +  /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1120 +  writable branch, and the writable branch is world-writable, then a
1121 +  malicious guy may create /etc/passwd on the writable branch directly
1122 +  and the infected file will be valid in aufs.
1123 +  I am afraid it can be a security issue, but nothing to do except
1124 +  producing a warning.
1125 +
1126 +
1127 +Delete a Branch
1128 +----------------------------------------------------------------------
1129 +o Confirm the deleting branch is not busy
1130 +  To be general, there is one merit to adopt "remount" interface to
1131 +  manipulate branches. It is to discard caches. At deleting a branch,
1132 +  aufs checks the still cached (and connected) dentries and inodes. If
1133 +  there are any, then they are all in-use. An inode without its
1134 +  corresponding dentry can be alive alone (for example, inotify case).
1135 +
1136 +  For the cached one, aufs checks whether the same named entry exists on
1137 +  other branches.
1138 +  If the cached one is a directory, because aufs provides a merged view
1139 +  to users, as long as one dir is left on any branch aufs can show the
1140 +  dir to users. In this case, the branch can be removed from aufs.
1141 +  Otherwise aufs rejects deleting the branch.
1142 +
1143 +  If any file on the deleting branch is opened by aufs, then aufs
1144 +  rejects deleting.
1145 +
1146 +
1147 +Modify the Permission of a Branch
1148 +----------------------------------------------------------------------
1149 +o Re-initialize or remove the xino file and whiteout bases if necessary.
1150 +  See struct.txt.
1151 +
1152 +o rw --> ro: Confirm the modifying branch is not busy
1153 +  Aufs rejects the request if any of these conditions are true.
1154 +  - a file on the branch is mmap-ed.
1155 +  - a regular file on the branch is opened for write and there is no
1156 +    same named entry on the upper branch.
1157 diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt
1158 new file mode 100644
1159 index 0000000..5f25684
1160 --- /dev/null
1161 +++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt
1162 @@ -0,0 +1,65 @@
1163 +
1164 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1165 +# 
1166 +# This program is free software; you can redistribute it and/or modify
1167 +# it under the terms of the GNU General Public License as published by
1168 +# the Free Software Foundation; either version 2 of the License, or
1169 +# (at your option) any later version.
1170 +# 
1171 +# This program is distributed in the hope that it will be useful,
1172 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1173 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1174 +# GNU General Public License for more details.
1175 +# 
1176 +# You should have received a copy of the GNU General Public License
1177 +# along with this program; if not, write to the Free Software
1178 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1179 +
1180 +Policies to Select One among Multiple Writable Branches
1181 +----------------------------------------------------------------------
1182 +When the number of writable branch is more than one, aufs has to decide
1183 +the target branch for file creation or copy-up. By default, the highest
1184 +writable branch which has the parent (or ancestor) dir of the target
1185 +file is chosen (top-down-parent policy).
1186 +By user's request, aufs implements some other policies to select the
1187 +writable branch, for file creation two policies, round-robin and
1188 +most-free-space policies. For copy-up three policies, top-down-parent,
1189 +bottom-up-parent and bottom-up policies.
1190 +
1191 +As expected, the round-robin policy selects the branch in circular. When
1192 +you have two writable branches and creates 10 new files, 5 files will be
1193 +created for each branch. mkdir(2) systemcall is an exception. When you
1194 +create 10 new directories, all will be created on the same branch.
1195 +And the most-free-space policy selects the one which has most free
1196 +space among the writable branches. The amount of free space will be
1197 +checked by aufs internally, and users can specify its time interval.
1198 +
1199 +The policies for copy-up is more simple,
1200 +top-down-parent is equivalent to the same named on in create policy,
1201 +bottom-up-parent selects the writable branch where the parent dir
1202 +exists and the nearest upper one from the copyup-source,
1203 +bottom-up selects the nearest upper writable branch from the
1204 +copyup-source, regardless the existence of the parent dir.
1205 +
1206 +There are some rules or exceptions to apply these policies.
1207 +- If there is a readonly branch above the policy-selected branch and
1208 +  the parent dir is marked as opaque (a variation of whiteout), or the
1209 +  target (creating) file is whiteout-ed on the upper readonly branch,
1210 +  then the result of the policy is ignored and the target file will be
1211 +  created on the nearest upper writable branch than the readonly branch.
1212 +- If there is a writable branch above the policy-selected branch and
1213 +  the parent dir is marked as opaque or the target file is whiteouted
1214 +  on the branch, then the result of the policy is ignored and the target
1215 +  file will be created on the highest one among the upper writable
1216 +  branches who has diropq or whiteout. In case of whiteout, aufs removes
1217 +  it as usual.
1218 +- link(2) and rename(2) systemcalls are exceptions in every policy.
1219 +  They try selecting the branch where the source exists as possible
1220 +  since copyup a large file will take long time. If it can't be,
1221 +  ie. the branch where the source exists is readonly, then they will
1222 +  follow the copyup policy.
1223 +- There is an exception for rename(2) when the target exists.
1224 +  If the rename target exists, aufs compares the index of the branches
1225 +  where the source and the target exists and selects the higher
1226 +  one. If the selected branch is readonly, then aufs follows the
1227 +  copyup policy.
1228 diff --git a/Documentation/filesystems/aufs/design/06fmode_exec.txt b/Documentation/filesystems/aufs/design/06fmode_exec.txt
1229 new file mode 100644
1230 index 0000000..ebc311d
1231 --- /dev/null
1232 +++ b/Documentation/filesystems/aufs/design/06fmode_exec.txt
1233 @@ -0,0 +1,33 @@
1234 +
1235 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1236 +# 
1237 +# This program is free software; you can redistribute it and/or modify
1238 +# it under the terms of the GNU General Public License as published by
1239 +# the Free Software Foundation; either version 2 of the License, or
1240 +# (at your option) any later version.
1241 +# 
1242 +# This program is distributed in the hope that it will be useful,
1243 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1244 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1245 +# GNU General Public License for more details.
1246 +# 
1247 +# You should have received a copy of the GNU General Public License
1248 +# along with this program; if not, write to the Free Software
1249 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1250 +
1251 +FMODE_EXEC and deny_write()
1252 +----------------------------------------------------------------------
1253 +Generally Unix prevents an executing file from writing its filedata.
1254 +In linux it is implemented by deny_write() and allow_write().
1255 +When a file is executed by exec() family, open_exec() (and sys_uselib())
1256 +they opens the file and calls deny_write(). If the file is aufs's virtual
1257 +one, it has no meaning. The file which deny_write() is really necessary
1258 +is the file on a branch. But the FMODE_EXEC flag is not passed to
1259 +->open() operation. So aufs adopt a dirty trick.
1260 +
1261 +- in order to get FMODE_EXEC, aufs ->lookup() and ->d_revalidate() set
1262 +  nd->intent.open.file->private_data to nd->intent.open.flags temporary.
1263 +- in aufs ->open(), when FMODE_EXEC is set in file->private_data, it
1264 +  calls deny_write() for the file on a branch.
1265 +- when the aufs file is released, allow_write() for the file on a branch
1266 +  is called.
1267 diff --git a/Documentation/filesystems/aufs/design/07mmap.txt b/Documentation/filesystems/aufs/design/07mmap.txt
1268 new file mode 100644
1269 index 0000000..5d60fb9
1270 --- /dev/null
1271 +++ b/Documentation/filesystems/aufs/design/07mmap.txt
1272 @@ -0,0 +1,53 @@
1273 +
1274 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1275 +# 
1276 +# This program is free software; you can redistribute it and/or modify
1277 +# it under the terms of the GNU General Public License as published by
1278 +# the Free Software Foundation; either version 2 of the License, or
1279 +# (at your option) any later version.
1280 +# 
1281 +# This program is distributed in the hope that it will be useful,
1282 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1283 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1284 +# GNU General Public License for more details.
1285 +# 
1286 +# You should have received a copy of the GNU General Public License
1287 +# along with this program; if not, write to the Free Software
1288 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1289 +
1290 +mmap(2) -- File Memory Mapping
1291 +----------------------------------------------------------------------
1292 +In aufs, the file-mapped pages are shared between the file on a branch
1293 +and the virtual one in aufs by overriding vm_operation, particularly
1294 +->fault().
1295 +
1296 +In aufs_mmap(),
1297 +- get and store vm_ops of the real file on a branch.
1298 +- map the file of aufs by generic_file_mmap() and set aufs's vm
1299 +  operations.
1300 +
1301 +In aufs_fault(),
1302 +- get the file of aufs from the passed vma, sleep if needed.
1303 +- get the real file on a branch from the aufs file.
1304 +- a race may happen. for instance a multithreaded library. so some lock
1305 +  is implemented.
1306 +- call ->fault() in the previously stored vm_ops with setting the
1307 +  real file on a branch to vm_file.
1308 +- restore vm_file and wake_up if someone else got sleep.
1309 +
1310 +When a branch is added to or deleted from aufs, the same-named file may
1311 +unveil and its contents will be replaced by the new one when a process
1312 +read(2) through previously opened file.
1313 +(Some users may not want to refresh the filedata. For such users, I
1314 +have a plan to implement a mount option 'refrof' which decides to
1315 +refresh the opened files or not. See plan.txt too.)
1316 +In this case, an already mapped file will not be updated since the
1317 +contents are a part of a process already and it should not be changed by
1318 +aufs branch manipulation. (Even if MAP_SHARED is specified, currently).
1319 +Of course, in case of the deleting branch has a busy file, it cannot be
1320 +deleted from the union.
1321 +
1322 +In Unionfs, it took an approach which the memory pages mapped to
1323 +filedata are copied from the lower (real) file into the Unionfs's
1324 +virtual one and handles it by address_space operations. Recently Unionfs
1325 +changed it to this approach which aufs adopted since Jul 2006.
1326 diff --git a/Documentation/filesystems/aufs/design/08export.txt b/Documentation/filesystems/aufs/design/08export.txt
1327 new file mode 100644
1328 index 0000000..8394348
1329 --- /dev/null
1330 +++ b/Documentation/filesystems/aufs/design/08export.txt
1331 @@ -0,0 +1,59 @@
1332 +
1333 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1334 +# 
1335 +# This program is free software; you can redistribute it and/or modify
1336 +# it under the terms of the GNU General Public License as published by
1337 +# the Free Software Foundation; either version 2 of the License, or
1338 +# (at your option) any later version.
1339 +# 
1340 +# This program is distributed in the hope that it will be useful,
1341 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1342 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1343 +# GNU General Public License for more details.
1344 +# 
1345 +# You should have received a copy of the GNU General Public License
1346 +# along with this program; if not, write to the Free Software
1347 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1348 +
1349 +Export Aufs via NFS
1350 +----------------------------------------------------------------------
1351 +Here is an approach.
1352 +- like xino/xib, add a new file 'xigen' which stores aufs inode
1353 +  generation.
1354 +- iget_locked(): initialize aufs inode generation for a new inode, and
1355 +  store it in xigen file.
1356 +- destroy_inode(): increment aufs inode generation and store it in xigen
1357 +  file. it is necessary even if it is not unlinked, because any data of
1358 +  inode may be changed by UDBA.
1359 +- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1360 +  build file handle by
1361 +  + branch id (4 bytes)
1362 +  + superblock generation (4 bytes)
1363 +  + inode number (4 or 8 bytes)
1364 +  + parent dir inode number (4 or 8 bytes)
1365 +  + inode generation (4 bytes))
1366 +  + return value of exportfs_encode_fh() for the parent on a branch (4
1367 +    bytes)
1368 +  + file handle for a branch (by exportfs_encode_fh())
1369 +- fh_to_dentry():
1370 +  + find the index of a branch from its id in handle, and check it is
1371 +    still exist in aufs.
1372 +  + 1st level: get the inode number from handle and search it in cache.
1373 +  + 2nd level: if not found, get the parent inode number from handle and
1374 +    search it in cache. and then open the parent dir, find the matching
1375 +    inode number by vfs_readdir() and get its name, and call
1376 +    lookup_one_len() for the target dentry.
1377 +  + 3rd level: if the parent dir is not cached, call
1378 +    exportfs_decode_fh() for a branch and get the parent on a branch,
1379 +    build a pathname of it, convert it a pathname in aufs, call
1380 +    path_lookup(). now aufs gets a parent dir dentry, then handle it as
1381 +    the 2nd level.
1382 +  + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1383 +    for every branch, but not itself. to get this, (currently) aufs
1384 +    searches in current->nsproxy->mnt_ns list. it may not be a good
1385 +    idea, but I didn't get other approach.
1386 +  + test the generation of the gotten inode.
1387 +- every inode operation: they may get EBUSY due to UDBA. in this case,
1388 +  convert it into ESTALE for NFSD.
1389 +- readdir(): call lockdep_on/off() because filldir in NFSD calls
1390 +  lookup_one_len(), vfs_getattr(), encode_fh() and others.
1391 diff --git a/Documentation/filesystems/aufs/design/09shwh.txt b/Documentation/filesystems/aufs/design/09shwh.txt
1392 new file mode 100644
1393 index 0000000..ce0c633
1394 --- /dev/null
1395 +++ b/Documentation/filesystems/aufs/design/09shwh.txt
1396 @@ -0,0 +1,53 @@
1397 +
1398 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1399 +# 
1400 +# This program is free software; you can redistribute it and/or modify
1401 +# it under the terms of the GNU General Public License as published by
1402 +# the Free Software Foundation; either version 2 of the License, or
1403 +# (at your option) any later version.
1404 +# 
1405 +# This program is distributed in the hope that it will be useful,
1406 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1407 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1408 +# GNU General Public License for more details.
1409 +# 
1410 +# You should have received a copy of the GNU General Public License
1411 +# along with this program; if not, write to the Free Software
1412 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1413 +
1414 +Show Whiteout Mode (shwh)
1415 +----------------------------------------------------------------------
1416 +Generally aufs hides the name of whiteouts. But in some cases, to show
1417 +them is very useful for users. For instance, creating a new middle layer
1418 +(branch) by merging existing layers.
1419 +
1420 +(borrowing aufs1 HOW-TO from a user, Michael Towers)
1421 +When you have three branches,
1422 +- Bottom: 'system', squashfs (underlying base system), read-only
1423 +- Middle: 'mods', squashfs, read-only
1424 +- Top: 'overlay', ram (tmpfs), read-write
1425 +
1426 +The top layer is loaded at boot time and saved at shutdown, to preserve
1427 +the changes made to the system during the session.
1428 +When larger changes have been made, or smaller changes have accumulated,
1429 +the size of the saved top layer data grows. At this point, it would be
1430 +nice to be able to merge the two overlay branches ('mods' and 'overlay')
1431 +and rewrite the 'mods' squashfs, clearing the top layer and thus
1432 +restoring save and load speed.
1433 +
1434 +This merging is simplified by the use of another aufs mount, of just the
1435 +two overlay branches using the 'shwh' option.
1436 +# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1437 +       aufs /livesys/merge_union
1438 +
1439 +A merged view of these two branches is then available at
1440 +/livesys/merge_union, and the new feature is that the whiteouts are
1441 +visible!
1442 +Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1443 +writing to all branches. Also the default mode for all branches is 'ro'.
1444 +It is now possible to save the combined contents of the two overlay
1445 +branches to a new squashfs, e.g.:
1446 +# mksquashfs /livesys/merge_union /path/to/newmods.squash
1447 +
1448 +This new squashfs archive can be stored on the boot device and the
1449 +initramfs will use it to replace the old one at the next boot.
1450 diff --git a/Documentation/filesystems/aufs/design/99plan.txt b/Documentation/filesystems/aufs/design/99plan.txt
1451 new file mode 100644
1452 index 0000000..78d1f0e
1453 --- /dev/null
1454 +++ b/Documentation/filesystems/aufs/design/99plan.txt
1455 @@ -0,0 +1,96 @@
1456 +
1457 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1458 +# 
1459 +# This program is free software; you can redistribute it and/or modify
1460 +# it under the terms of the GNU General Public License as published by
1461 +# the Free Software Foundation; either version 2 of the License, or
1462 +# (at your option) any later version.
1463 +# 
1464 +# This program is distributed in the hope that it will be useful,
1465 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1466 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1467 +# GNU General Public License for more details.
1468 +# 
1469 +# You should have received a copy of the GNU General Public License
1470 +# along with this program; if not, write to the Free Software
1471 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1472 +
1473 +Plan
1474 +
1475 +Restoring some features which was implemented in aufs1.
1476 +They were dropped in aufs2 in order to make source files simpler and
1477 +easier to be reviewed.
1478 +
1479 +
1480 +Test Only the Highest One for the Directory Permission (dirperm1 option)
1481 +----------------------------------------------------------------------
1482 +Let's try case study.
1483 +- aufs has two branches, upper readwrite and lower readonly.
1484 +  /au = /rw + /ro
1485 +- "dirA" exists under /ro, but /rw. and its mode is 0700.
1486 +- user invoked "chmod a+rx /au/dirA"
1487 +- then "dirA" becomes world readable?
1488 +
1489 +In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1490 +or it may be a natively readonly filesystem. If aufs respects the lower
1491 +branch, it should not respond readdir request from other users. But user
1492 +allowed it by chmod. Should really aufs rejects showing the entries
1493 +under /ro/dirA?
1494 +
1495 +To be honest, I don't have a best solution for this case. So I
1496 +implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1497 +users.
1498 +When dirperm1 is specified, aufs checks only the highest one for the
1499 +directory permission, and shows the entries. Otherwise, as usual, checks
1500 +every dir existing on all branches and rejects the request.
1501 +
1502 +As a side effect, dirperm1 option improves the performance of aufs
1503 +because the number of permission check is reduced.
1504 +
1505 +
1506 +Being Another Aufs's Readonly Branch (robr)
1507 +----------------------------------------------------------------------
1508 +Aufs1 allows aufs to be another aufs's readonly branch.
1509 +This feature was developed by a user's request. But it may not be used
1510 +currecnly.
1511 +
1512 +
1513 +Copy-up on Open (coo=)
1514 +----------------------------------------------------------------------
1515 +By default the internal copy-up is executed when it is really necessary.
1516 +It is not done when a file is opened for writing, but when write(2) is
1517 +done. Users who have many (over 100) branches want to know and analyse
1518 +when and what file is copied-up. To insert a new upper branch which
1519 +contains such files only may improve the performance of aufs.
1520 +
1521 +Aufs1 implemented "coo=none | leaf | all" option.
1522 +
1523 +
1524 +Refresh the Opened File (refrof)
1525 +----------------------------------------------------------------------
1526 +This option is implemented in aufs1 but incomplete.
1527 +
1528 +When user reads from a file, he expects to get its latest filedata
1529 +generally. If the file is removed and a new same named file is created,
1530 +the content he gets is unchanged, ie. the unlinked filedata.
1531 +
1532 +Let's try case study again.
1533 +- aufs has two branches.
1534 +  /au = /rw + /ro
1535 +- "fileA" exists under /ro, but /rw.
1536 +- user opened "/au/fileA".
1537 +- he or someone else inserts a branch (/new) between /rw and /ro.
1538 +  /au = /rw + /new + /ro
1539 +- the new branch has "fileA".
1540 +- user reads from the opened "fileA"
1541 +- which filedata should aufs return, from /ro or /new?
1542 +
1543 +Some people says it has to be "from /ro" and it is a semantics of Unix.
1544 +The others say it should be "from /new" because the file is not removed
1545 +and it is equivalent to the case of someone else modifies the file.
1546 +
1547 +Here again I don't have a best and final answer. I got an idea to
1548 +implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1549 +Opened File) is specified (by default), aufs returns the filedata from
1550 +/new.
1551 +Otherwise from /new.
1552 diff --git a/fs/Kconfig b/fs/Kconfig
1553 index bd989c0..8af22e4 100644
1554 --- a/fs/Kconfig
1555 +++ b/fs/Kconfig
1556 @@ -1541,6 +1541,8 @@ config UFS_DEBUG
1557           Y here.  This will result in _many_ additional debugging messages to be
1558           written to the system log.
1559  
1560 +source "fs/aufs/Kconfig"
1561 +
1562  endmenu
1563  
1564  menuconfig NETWORK_FILESYSTEMS
1565 diff --git a/fs/Makefile b/fs/Makefile
1566 index 0871ab5..364e863 100644
1567 --- a/fs/Makefile
1568 +++ b/fs/Makefile
1569 @@ -123,3 +123,4 @@ obj-$(CONFIG_HPPFS)         += hppfs/
1570  obj-$(CONFIG_DEBUG_FS)         += debugfs/
1571  obj-$(CONFIG_OCFS2_FS)         += ocfs2/
1572  obj-$(CONFIG_GFS2_FS)           += gfs2/
1573 +obj-$(CONFIG_AUFS_FS)           += aufs/
1574 diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig
1575 new file mode 100644
1576 index 0000000..12e4327
1577 --- /dev/null
1578 +++ b/fs/aufs/Kconfig
1579 @@ -0,0 +1,136 @@
1580 +config AUFS_FS
1581 +       bool "Aufs (Advanced multi layered unification filesystem) support"
1582 +       depends on EXPERIMENTAL
1583 +       help
1584 +       Aufs is a stackable unification filesystem such as Unionfs,
1585 +       which unifies several directories and provides a merged single
1586 +       directory.
1587 +       In the early days, aufs was entirely re-designed and
1588 +       re-implemented Unionfs Version 1.x series. Introducing many
1589 +       original ideas, approaches and improvements, it becomes totally
1590 +       different from Unionfs while keeping the basic features.
1591 +
1592 +if AUFS_FS
1593 +choice
1594 +       prompt "Maximum number of branches"
1595 +       default AUFS_BRANCH_MAX_127
1596 +       help
1597 +       Specifies the maximum number of branches (or member directories)
1598 +       in a single aufs. The larger value consumes more system
1599 +       resources and has a minor impact to performance.
1600 +config AUFS_BRANCH_MAX_127
1601 +       bool "127"
1602 +       help
1603 +       Specifies the maximum number of branches (or member directories)
1604 +       in a single aufs. The larger value consumes more system
1605 +       resources and has a minor impact to performance.
1606 +config AUFS_BRANCH_MAX_511
1607 +       bool "511"
1608 +       help
1609 +       Specifies the maximum number of branches (or member directories)
1610 +       in a single aufs. The larger value consumes more system
1611 +       resources and has a minor impact to performance.
1612 +config AUFS_BRANCH_MAX_1023
1613 +       bool "1023"
1614 +       help
1615 +       Specifies the maximum number of branches (or member directories)
1616 +       in a single aufs. The larger value consumes more system
1617 +       resources and has a minor impact to performance.
1618 +config AUFS_BRANCH_MAX_32767
1619 +       bool "32767"
1620 +       help
1621 +       Specifies the maximum number of branches (or member directories)
1622 +       in a single aufs. The larger value consumes more system
1623 +       resources and has a minor impact to performance.
1624 +endchoice
1625 +
1626 +config AUFS_HINOTIFY
1627 +       bool "Use inotify to detect actions on a branch"
1628 +       depends on INOTIFY
1629 +       help
1630 +       If you want to modify files on branches directly, eg. bypassing aufs,
1631 +       and want aufs to detect the changes of them fully, then enable this
1632 +       option and use 'udba=inotify' mount option.
1633 +       It will have a negative impact to the performance.
1634 +       See detail in aufs.5.
1635 +
1636 +config AUFS_EXPORT
1637 +       bool "NFS-exportable aufs"
1638 +       depends on EXPORTFS = y
1639 +       help
1640 +       If you want to export your mounted aufs via NFS, then enable this
1641 +       option. There are several requirements for this configuration.
1642 +       See detail in aufs.5.
1643 +
1644 +config AUFS_INO_T_64
1645 +       bool
1646 +       depends on AUFS_EXPORT
1647 +       depends on 64BIT && !(ALPHA || S390)
1648 +       default y
1649 +       help
1650 +       Automatic configuration for internal use.
1651 +       /* typedef unsigned long/int __kernel_ino_t */
1652 +       /* alpha and s390x are int */
1653 +
1654 +config AUFS_RDU
1655 +       bool "Readdir in userspace"
1656 +       help
1657 +       If you have millions of files under a single aufs directory, and
1658 +       meet the out of memory, then enable this option and set
1659 +       environment variables for your readdir(3).
1660 +       See detail in aufs.5.
1661 +
1662 +config AUFS_SP_IATTR
1663 +       bool "Respect the attributes (mtime/ctime mainly) of special files"
1664 +       help
1665 +       When you write something to a special file, some attributes of it
1666 +       (mtime/ctime mainly) may be updated. Generally such updates are
1667 +       less important (actually some device drivers and NFS ignore
1668 +       it). But some applications (such like test program) requires
1669 +       such updates. If you need these updates, then enable this
1670 +       configuration which introduces some overhead.
1671 +       Currently this configuration handles FIFO only.
1672 +
1673 +config AUFS_SHWH
1674 +       bool "Show whiteouts"
1675 +       help
1676 +       If you want to make the whiteouts in aufs visible, then enable
1677 +       this option and specify 'shwh' mount option. Although it may
1678 +       sounds like philosophy or something, but in technically it
1679 +       simply shows the name of whiteout with keeping its behaviour.
1680 +
1681 +config AUFS_BR_RAMFS
1682 +       bool "Ramfs (initramfs/rootfs) as an aufs branch"
1683 +       help
1684 +       If you want to use ramfs as an aufs branch fs, then enable this
1685 +       option. Generally tmpfs is recommended.
1686 +       Aufs prohibited them to be a branch fs by default, because
1687 +       initramfs becomes unusable after switch_root or something
1688 +       generally. If you sets initramfs as an aufs branch and boot your
1689 +       system by switch_root, you will meet a problem easily since the
1690 +       files in initramfs may be inaccessible.
1691 +       Unless you are going to use ramfs as an aufs branch fs without
1692 +       switch_root or something, leave it N.
1693 +
1694 +config AUFS_BDEV_LOOP
1695 +       bool
1696 +       depends on BLK_DEV_LOOP
1697 +       default y
1698 +       help
1699 +       Automatic configuration for internal use.
1700 +       Convert =[ym] into =y.
1701 +
1702 +config AUFS_DEBUG
1703 +       bool "Debug aufs"
1704 +       help
1705 +       Enable this to compile aufs internal debug code.
1706 +       It will have a negative impact to the performance.
1707 +
1708 +config AUFS_MAGIC_SYSRQ
1709 +       bool
1710 +       depends on AUFS_DEBUG && MAGIC_SYSRQ
1711 +       default y
1712 +       help
1713 +       Automatic configuration for internal use.
1714 +       When aufs supports Magic SysRq, enabled automatically.
1715 +endif
1716 diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile
1717 new file mode 100644
1718 index 0000000..29411e9
1719 --- /dev/null
1720 +++ b/fs/aufs/Makefile
1721 @@ -0,0 +1,23 @@
1722 +
1723 +include ${srctree}/${src}/magic.mk
1724 +
1725 +obj-$(CONFIG_AUFS_FS) += aufs.o
1726 +aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
1727 +       wkq.o vfsub.o dcsub.o \
1728 +       cpup.o whout.o plink.o wbr_policy.o \
1729 +       dinfo.o dentry.o \
1730 +       finfo.o file.o f_op.o \
1731 +       dir.o vdir.o \
1732 +       iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
1733 +       ioctl.o
1734 +
1735 +# all are boolean
1736 +aufs-$(CONFIG_SYSFS) += sysfs.o
1737 +aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
1738 +aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
1739 +aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o
1740 +aufs-$(CONFIG_AUFS_EXPORT) += export.o
1741 +aufs-$(CONFIG_AUFS_RDU) += rdu.o
1742 +aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
1743 +aufs-$(CONFIG_AUFS_DEBUG) += debug.o
1744 +aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
1745 diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h
1746 new file mode 100644
1747 index 0000000..49bada2
1748 --- /dev/null
1749 +++ b/fs/aufs/aufs.h
1750 @@ -0,0 +1,59 @@
1751 +/*
1752 + * Copyright (C) 2005-2009 Junjiro R. Okajima
1753 + *
1754 + * This program, aufs is free software; you can redistribute it and/or modify
1755 + * it under the terms of the GNU General Public License as published by
1756 + * the Free Software Foundation; either version 2 of the License, or
1757 + * (at your option) any later version.
1758 + *
1759 + * This program is distributed in the hope that it will be useful,
1760 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1761 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1762 + * GNU General Public License for more details.
1763 + *
1764 + * You should have received a copy of the GNU General Public License
1765 + * along with this program; if not, write to the Free Software
1766 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1767 + */
1768 +
1769 +/*
1770 + * all header files
1771 + */
1772 +
1773 +#ifndef __AUFS_H__
1774 +#define __AUFS_H__
1775 +
1776 +#ifdef __KERNEL__
1777 +
1778 +#define AuStub(type, name, body, ...) \
1779 +       static inline type name(__VA_ARGS__) { body; }
1780 +
1781 +#define AuStubVoid(name, ...) \
1782 +       AuStub(void, name, , __VA_ARGS__)
1783 +#define AuStubInt0(name, ...) \
1784 +       AuStub(int, name, return 0, __VA_ARGS__)
1785 +
1786 +#include "debug.h"
1787 +
1788 +#include "branch.h"
1789 +#include "cpup.h"
1790 +#include "dcsub.h"
1791 +#include "dbgaufs.h"
1792 +#include "dentry.h"
1793 +#include "dir.h"
1794 +#include "file.h"
1795 +#include "fstype.h"
1796 +#include "inode.h"
1797 +#include "loop.h"
1798 +#include "module.h"
1799 +#include "opts.h"
1800 +#include "rwsem.h"
1801 +#include "spl.h"
1802 +#include "super.h"
1803 +#include "sysaufs.h"
1804 +#include "vfsub.h"
1805 +#include "whout.h"
1806 +#include "wkq.h"
1807 +
1808 +#endif /* __KERNEL__ */
1809 +#endif /* __AUFS_H__ */
1810 diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c
1811 new file mode 100644
1812 index 0000000..2717e9e
1813 --- /dev/null
1814 +++ b/fs/aufs/branch.c
1815 @@ -0,0 +1,978 @@
1816 +/*
1817 + * Copyright (C) 2005-2009 Junjiro R. Okajima
1818 + *
1819 + * This program, aufs is free software; you can redistribute it and/or modify
1820 + * it under the terms of the GNU General Public License as published by
1821 + * the Free Software Foundation; either version 2 of the License, or
1822 + * (at your option) any later version.
1823 + *
1824 + * This program is distributed in the hope that it will be useful,
1825 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1826 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1827 + * GNU General Public License for more details.
1828 + *
1829 + * You should have received a copy of the GNU General Public License
1830 + * along with this program; if not, write to the Free Software
1831 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1832 + */
1833 +
1834 +/*
1835 + * branch management
1836 + */
1837 +
1838 +#include <linux/file.h>
1839 +#include <linux/statfs.h>
1840 +#include "aufs.h"
1841 +
1842 +/*
1843 + * free a single branch
1844 + */
1845 +static void au_br_do_free(struct au_branch *br)
1846 +{
1847 +       int i;
1848 +       struct au_wbr *wbr;
1849 +
1850 +       if (br->br_xino.xi_file)
1851 +               fput(br->br_xino.xi_file);
1852 +       mutex_destroy(&br->br_xino.xi_nondir_mtx);
1853 +
1854 +       AuDebugOn(atomic_read(&br->br_count));
1855 +
1856 +       wbr = br->br_wbr;
1857 +       if (wbr) {
1858 +               for (i = 0; i < AuBrWh_Last; i++)
1859 +                       dput(wbr->wbr_wh[i]);
1860 +               AuDebugOn(atomic_read(&wbr->wbr_wh_running));
1861 +               AuRwDestroy(&wbr->wbr_wh_rwsem);
1862 +       }
1863 +
1864 +       /* some filesystems acquire extra lock */
1865 +       lockdep_off();
1866 +       mntput(br->br_mnt);
1867 +       lockdep_on();
1868 +
1869 +       kfree(wbr);
1870 +       kfree(br);
1871 +}
1872 +
1873 +/*
1874 + * frees all branches
1875 + */
1876 +void au_br_free(struct au_sbinfo *sbinfo)
1877 +{
1878 +       aufs_bindex_t bmax;
1879 +       struct au_branch **br;
1880 +
1881 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
1882 +
1883 +       bmax = sbinfo->si_bend + 1;
1884 +       br = sbinfo->si_branch;
1885 +       while (bmax--)
1886 +               au_br_do_free(*br++);
1887 +}
1888 +
1889 +/*
1890 + * find the index of a branch which is specified by @br_id.
1891 + */
1892 +int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1893 +{
1894 +       aufs_bindex_t bindex, bend;
1895 +
1896 +       bend = au_sbend(sb);
1897 +       for (bindex = 0; bindex <= bend; bindex++)
1898 +               if (au_sbr_id(sb, bindex) == br_id)
1899 +                       return bindex;
1900 +       return -1;
1901 +}
1902 +
1903 +/* ---------------------------------------------------------------------- */
1904 +
1905 +/*
1906 + * add a branch
1907 + */
1908 +
1909 +static int test_overlap(struct super_block *sb, struct dentry *h_d1,
1910 +                       struct dentry *h_d2)
1911 +{
1912 +       if (unlikely(h_d1 == h_d2))
1913 +               return 1;
1914 +       return !!au_test_subdir(h_d1, h_d2)
1915 +               || !!au_test_subdir(h_d2, h_d1)
1916 +               || au_test_loopback_overlap(sb, h_d1, h_d2)
1917 +               || au_test_loopback_overlap(sb, h_d2, h_d1);
1918 +}
1919 +
1920 +/*
1921 + * returns a newly allocated branch. @new_nbranch is a number of branches
1922 + * after adding a branch.
1923 + */
1924 +static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1925 +                                    int perm)
1926 +{
1927 +       struct au_branch *add_branch;
1928 +       struct dentry *root;
1929 +
1930 +       root = sb->s_root;
1931 +       add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1932 +       if (unlikely(!add_branch))
1933 +               goto out;
1934 +
1935 +       add_branch->br_wbr = NULL;
1936 +       if (au_br_writable(perm)) {
1937 +               /* may be freed separately at changing the branch permission */
1938 +               add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1939 +                                            GFP_NOFS);
1940 +               if (unlikely(!add_branch->br_wbr))
1941 +                       goto out_br;
1942 +       }
1943 +
1944 +       if (unlikely(au_sbr_realloc(au_sbi(sb), new_nbranch)
1945 +                    || au_di_realloc(au_di(root), new_nbranch)
1946 +                    || au_ii_realloc(au_ii(root->d_inode), new_nbranch)))
1947 +               goto out_wbr;
1948 +       return add_branch; /* success */
1949 +
1950 + out_wbr:
1951 +       kfree(add_branch->br_wbr);
1952 + out_br:
1953 +       kfree(add_branch);
1954 + out:
1955 +       return ERR_PTR(-ENOMEM);
1956 +}
1957 +
1958 +/*
1959 + * test if the branch permission is legal or not.
1960 + */
1961 +static int test_br(struct inode *inode, int brperm, char *path)
1962 +{
1963 +       int err;
1964 +
1965 +       err = 0;
1966 +       if (unlikely(au_br_writable(brperm) && IS_RDONLY(inode))) {
1967 +               AuErr("write permission for readonly mount or inode, %s\n",
1968 +                     path);
1969 +               err = -EINVAL;
1970 +       }
1971 +
1972 +       return err;
1973 +}
1974 +
1975 +/*
1976 + * returns:
1977 + * 0: success, the caller will add it
1978 + * plus: success, it is already unified, the caller should ignore it
1979 + * minus: error
1980 + */
1981 +static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1982 +{
1983 +       int err;
1984 +       aufs_bindex_t bend, bindex;
1985 +       struct dentry *root;
1986 +       struct inode *inode, *h_inode;
1987 +
1988 +       root = sb->s_root;
1989 +       bend = au_sbend(sb);
1990 +       if (unlikely(bend >= 0
1991 +                    && au_find_dbindex(root, add->path.dentry) >= 0)) {
1992 +               err = 1;
1993 +               if (!remount) {
1994 +                       err = -EINVAL;
1995 +                       AuErr("%s duplicated\n", add->pathname);
1996 +               }
1997 +               goto out;
1998 +       }
1999 +
2000 +       err = -ENOSPC; /* -E2BIG; */
2001 +       if (unlikely(AUFS_BRANCH_MAX <= add->bindex
2002 +                    || AUFS_BRANCH_MAX - 1 <= bend)) {
2003 +               AuErr("number of branches exceeded %s\n", add->pathname);
2004 +               goto out;
2005 +       }
2006 +
2007 +       err = -EDOM;
2008 +       if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
2009 +               AuErr("bad index %d\n", add->bindex);
2010 +               goto out;
2011 +       }
2012 +
2013 +       inode = add->path.dentry->d_inode;
2014 +       err = -ENOENT;
2015 +       if (unlikely(!inode->i_nlink)) {
2016 +               AuErr("no existence %s\n", add->pathname);
2017 +               goto out;
2018 +       }
2019 +
2020 +       err = -EINVAL;
2021 +       if (unlikely(inode->i_sb == sb)) {
2022 +               AuErr("%s must be outside\n", add->pathname);
2023 +               goto out;
2024 +       }
2025 +
2026 +       if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
2027 +               AuErr("unsupported filesystem, %s (%s)\n",
2028 +                     add->pathname, au_sbtype(inode->i_sb));
2029 +               goto out;
2030 +       }
2031 +
2032 +       err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
2033 +       if (unlikely(err))
2034 +               goto out;
2035 +
2036 +       if (bend < 0)
2037 +               return 0; /* success */
2038 +
2039 +       err = -EINVAL;
2040 +       for (bindex = 0; bindex <= bend; bindex++)
2041 +               if (unlikely(test_overlap(sb, add->path.dentry,
2042 +                                         au_h_dptr(root, bindex)))) {
2043 +                       AuErr("%s is overlapped\n", add->pathname);
2044 +                       goto out;
2045 +               }
2046 +
2047 +       err = 0;
2048 +       if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
2049 +               h_inode = au_h_dptr(root, 0)->d_inode;
2050 +               if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
2051 +                   || h_inode->i_uid != inode->i_uid
2052 +                   || h_inode->i_gid != inode->i_gid)
2053 +                       AuWarn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2054 +                              add->pathname,
2055 +                              inode->i_uid, inode->i_gid,
2056 +                              (inode->i_mode & S_IALLUGO),
2057 +                              h_inode->i_uid, h_inode->i_gid,
2058 +                              (h_inode->i_mode & S_IALLUGO));
2059 +       }
2060 +
2061 + out:
2062 +       return err;
2063 +}
2064 +
2065 +/*
2066 + * initialize or clean the whiteouts for an adding branch
2067 + */
2068 +static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
2069 +                        int new_perm, struct dentry *h_root)
2070 +{
2071 +       int err, old_perm;
2072 +       aufs_bindex_t bindex;
2073 +       struct mutex *h_mtx;
2074 +       struct au_wbr *wbr;
2075 +       struct au_hinode *hdir;
2076 +
2077 +       wbr = br->br_wbr;
2078 +       old_perm = br->br_perm;
2079 +       br->br_perm = new_perm;
2080 +       hdir = NULL;
2081 +       h_mtx = NULL;
2082 +       bindex = au_br_index(sb, br->br_id);
2083 +       if (0 <= bindex) {
2084 +               hdir = au_hi(sb->s_root->d_inode, bindex);
2085 +               au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
2086 +       } else {
2087 +               h_mtx = &h_root->d_inode->i_mutex;
2088 +               mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
2089 +       }
2090 +       if (!wbr)
2091 +               err = au_wh_init(h_root, br, sb);
2092 +       else {
2093 +               wbr_wh_write_lock(wbr);
2094 +               err = au_wh_init(h_root, br, sb);
2095 +               wbr_wh_write_unlock(wbr);
2096 +       }
2097 +       if (hdir)
2098 +               au_hin_imtx_unlock(hdir);
2099 +       else
2100 +               mutex_unlock(h_mtx);
2101 +       br->br_perm = old_perm;
2102 +
2103 +       if (!err && wbr && !au_br_writable(new_perm)) {
2104 +               kfree(wbr);
2105 +               br->br_wbr = NULL;
2106 +       }
2107 +
2108 +       return err;
2109 +}
2110 +
2111 +static int au_wbr_init(struct au_branch *br, struct super_block *sb,
2112 +                      int perm, struct path *path)
2113 +{
2114 +       int err;
2115 +       struct kstatfs kst;
2116 +       struct au_wbr *wbr;
2117 +       struct dentry *h_dentry;
2118 +
2119 +       wbr = br->br_wbr;
2120 +       au_rw_init(&wbr->wbr_wh_rwsem);
2121 +       memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
2122 +       atomic_set(&wbr->wbr_wh_running, 0);
2123 +       wbr->wbr_bytes = 0;
2124 +
2125 +       /*
2126 +        * a limit for rmdir/rename a dir
2127 +        * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
2128 +        */
2129 +       h_dentry = path->dentry;
2130 +       err = vfs_statfs(h_dentry, &kst);
2131 +       if (unlikely(err))
2132 +               goto out;
2133 +       err = -EINVAL;
2134 +       if (kst.f_namelen >= NAME_MAX)
2135 +               err = au_br_init_wh(sb, br, perm, h_dentry);
2136 +       else
2137 +               AuErr("%.*s(%s), unsupported namelen %ld\n",
2138 +                     AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
2139 +                     kst.f_namelen);
2140 +
2141 + out:
2142 +       return err;
2143 +}
2144 +
2145 +/* intialize a new branch */
2146 +static int au_br_init(struct au_branch *br, struct super_block *sb,
2147 +                     struct au_opt_add *add)
2148 +{
2149 +       int err;
2150 +
2151 +       err = 0;
2152 +       memset(&br->br_xino, 0, sizeof(br->br_xino));
2153 +       mutex_init(&br->br_xino.xi_nondir_mtx);
2154 +       br->br_perm = add->perm;
2155 +       br->br_mnt = add->path.mnt; /* set first, mntget() later */
2156 +       atomic_set(&br->br_count, 0);
2157 +       br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
2158 +       atomic_set(&br->br_xino_running, 0);
2159 +       br->br_id = au_new_br_id(sb);
2160 +
2161 +       if (au_br_writable(add->perm)) {
2162 +               err = au_wbr_init(br, sb, add->perm, &add->path);
2163 +               if (unlikely(err))
2164 +                       goto out;
2165 +       }
2166 +
2167 +       if (au_opt_test(au_mntflags(sb), XINO)) {
2168 +               err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
2169 +                                au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2170 +               if (unlikely(err)) {
2171 +                       AuDebugOn(br->br_xino.xi_file);
2172 +                       goto out;
2173 +               }
2174 +       }
2175 +
2176 +       sysaufs_br_init(br);
2177 +       mntget(add->path.mnt);
2178 +
2179 + out:
2180 +       return err;
2181 +}
2182 +
2183 +static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
2184 +                            struct au_branch *br, aufs_bindex_t bend,
2185 +                            aufs_bindex_t amount)
2186 +{
2187 +       struct au_branch **brp;
2188 +
2189 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
2190 +
2191 +       brp = sbinfo->si_branch + bindex;
2192 +       memmove(brp + 1, brp, sizeof(*brp) * amount);
2193 +       *brp = br;
2194 +       sbinfo->si_bend++;
2195 +       if (unlikely(bend < 0))
2196 +               sbinfo->si_bend = 0;
2197 +}
2198 +
2199 +static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
2200 +                            aufs_bindex_t bend, aufs_bindex_t amount)
2201 +{
2202 +       struct au_hdentry *hdp;
2203 +
2204 +       AuRwMustWriteLock(&dinfo->di_rwsem);
2205 +
2206 +       hdp = dinfo->di_hdentry + bindex;
2207 +       memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2208 +       au_h_dentry_init(hdp);
2209 +       dinfo->di_bend++;
2210 +       if (unlikely(bend < 0))
2211 +               dinfo->di_bstart = 0;
2212 +}
2213 +
2214 +static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
2215 +                            aufs_bindex_t bend, aufs_bindex_t amount)
2216 +{
2217 +       struct au_hinode *hip;
2218 +
2219 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
2220 +
2221 +       hip = iinfo->ii_hinode + bindex;
2222 +       memmove(hip + 1, hip, sizeof(*hip) * amount);
2223 +       hip->hi_inode = NULL;
2224 +       au_hin_init(hip, NULL);
2225 +       iinfo->ii_bend++;
2226 +       if (unlikely(bend < 0))
2227 +               iinfo->ii_bstart = 0;
2228 +}
2229 +
2230 +static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2231 +                        struct au_branch *br, aufs_bindex_t bindex)
2232 +{
2233 +       struct dentry *root;
2234 +       struct inode *root_inode;
2235 +       aufs_bindex_t bend, amount;
2236 +
2237 +       root = sb->s_root;
2238 +       root_inode = root->d_inode;
2239 +       au_plink_maint_block(sb);
2240 +       bend = au_sbend(sb);
2241 +       amount = bend + 1 - bindex;
2242 +       au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2243 +       au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2244 +       au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2245 +       au_set_h_dptr(root, bindex, dget(h_dentry));
2246 +       au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2247 +                     /*flags*/0);
2248 +}
2249 +
2250 +int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2251 +{
2252 +       int err;
2253 +       aufs_bindex_t bend, add_bindex;
2254 +       struct dentry *root, *h_dentry;
2255 +       struct inode *root_inode;
2256 +       struct au_branch *add_branch;
2257 +
2258 +       root = sb->s_root;
2259 +       root_inode = root->d_inode;
2260 +       IMustLock(root_inode);
2261 +       err = test_add(sb, add, remount);
2262 +       if (unlikely(err < 0))
2263 +               goto out;
2264 +       if (err) {
2265 +               err = 0;
2266 +               goto out; /* success */
2267 +       }
2268 +
2269 +       bend = au_sbend(sb);
2270 +       add_branch = au_br_alloc(sb, bend + 2, add->perm);
2271 +       err = PTR_ERR(add_branch);
2272 +       if (IS_ERR(add_branch))
2273 +               goto out;
2274 +
2275 +       err = au_br_init(add_branch, sb, add);
2276 +       if (unlikely(err)) {
2277 +               au_br_do_free(add_branch);
2278 +               goto out;
2279 +       }
2280 +
2281 +       add_bindex = add->bindex;
2282 +       h_dentry = add->path.dentry;
2283 +       if (!remount)
2284 +               au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2285 +       else {
2286 +               sysaufs_brs_del(sb, add_bindex);
2287 +               au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2288 +               sysaufs_brs_add(sb, add_bindex);
2289 +       }
2290 +
2291 +       if (!add_bindex) {
2292 +               au_cpup_attr_all(root_inode, /*force*/1);
2293 +               sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2294 +       } else
2295 +               au_add_nlink(root_inode, h_dentry->d_inode);
2296 +
2297 +       /*
2298 +        * this test/set prevents aufs from handling unnecesary inotify events
2299 +        * of xino files, in a case of re-adding a writable branch which was
2300 +        * once detached from aufs.
2301 +        */
2302 +       if (au_xino_brid(sb) < 0
2303 +           && au_br_writable(add_branch->br_perm)
2304 +           && !au_test_fs_bad_xino(h_dentry->d_sb)
2305 +           && add_branch->br_xino.xi_file
2306 +           && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2307 +               au_xino_brid_set(sb, add_branch->br_id);
2308 +
2309 + out:
2310 +       return err;
2311 +}
2312 +
2313 +/* ---------------------------------------------------------------------- */
2314 +
2315 +/*
2316 + * delete a branch
2317 + */
2318 +
2319 +/* to show the line number, do not make it inlined function */
2320 +#define AuVerbose(do_info, fmt, ...) do { \
2321 +       if (do_info) \
2322 +               AuInfo(fmt, ##__VA_ARGS__); \
2323 +} while (0)
2324 +
2325 +/*
2326 + * test if the branch is deletable or not.
2327 + */
2328 +static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
2329 +                           unsigned int sigen)
2330 +{
2331 +       int err, i, j, ndentry;
2332 +       aufs_bindex_t bstart, bend;
2333 +       unsigned char verbose;
2334 +       struct au_dcsub_pages dpages;
2335 +       struct au_dpage *dpage;
2336 +       struct dentry *d;
2337 +       struct inode *inode;
2338 +
2339 +       err = au_dpages_init(&dpages, GFP_NOFS);
2340 +       if (unlikely(err))
2341 +               goto out;
2342 +       err = au_dcsub_pages(&dpages, root, NULL, NULL);
2343 +       if (unlikely(err))
2344 +               goto out_dpages;
2345 +
2346 +       verbose = !!au_opt_test(au_mntflags(root->d_sb), VERBOSE);
2347 +       for (i = 0; !err && i < dpages.ndpage; i++) {
2348 +               dpage = dpages.dpages + i;
2349 +               ndentry = dpage->ndentry;
2350 +               for (j = 0; !err && j < ndentry; j++) {
2351 +                       d = dpage->dentries[j];
2352 +                       AuDebugOn(!atomic_read(&d->d_count));
2353 +                       inode = d->d_inode;
2354 +                       if (au_digen(d) == sigen && au_iigen(inode) == sigen)
2355 +                               di_read_lock_child(d, AuLock_IR);
2356 +                       else {
2357 +                               di_write_lock_child(d);
2358 +                               err = au_reval_dpath(d, sigen);
2359 +                               if (!err)
2360 +                                       di_downgrade_lock(d, AuLock_IR);
2361 +                               else {
2362 +                                       di_write_unlock(d);
2363 +                                       break;
2364 +                               }
2365 +                       }
2366 +
2367 +                       bstart = au_dbstart(d);
2368 +                       bend = au_dbend(d);
2369 +                       if (bstart <= bindex
2370 +                           && bindex <= bend
2371 +                           && au_h_dptr(d, bindex)
2372 +                           && (!S_ISDIR(inode->i_mode) || bstart == bend)) {
2373 +                               err = -EBUSY;
2374 +                               AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
2375 +                       }
2376 +                       di_read_unlock(d, AuLock_IR);
2377 +               }
2378 +       }
2379 +
2380 + out_dpages:
2381 +       au_dpages_free(&dpages);
2382 + out:
2383 +       return err;
2384 +}
2385 +
2386 +static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
2387 +                          unsigned int sigen)
2388 +{
2389 +       int err;
2390 +       struct inode *i;
2391 +       aufs_bindex_t bstart, bend;
2392 +       unsigned char verbose;
2393 +
2394 +       err = 0;
2395 +       verbose = !!au_opt_test(au_mntflags(sb), VERBOSE);
2396 +       list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
2397 +               AuDebugOn(!atomic_read(&i->i_count));
2398 +               if (!list_empty(&i->i_dentry))
2399 +                       continue;
2400 +
2401 +               if (au_iigen(i) == sigen)
2402 +                       ii_read_lock_child(i);
2403 +               else {
2404 +                       ii_write_lock_child(i);
2405 +                       err = au_refresh_hinode_self(i, /*do_attr*/1);
2406 +                       if (!err)
2407 +                               ii_downgrade_lock(i);
2408 +                       else {
2409 +                               ii_write_unlock(i);
2410 +                               break;
2411 +                       }
2412 +               }
2413 +
2414 +               bstart = au_ibstart(i);
2415 +               bend = au_ibend(i);
2416 +               if (bstart <= bindex
2417 +                   && bindex <= bend
2418 +                   && au_h_iptr(i, bindex)
2419 +                   && (!S_ISDIR(i->i_mode) || bstart == bend)) {
2420 +                       err = -EBUSY;
2421 +                       AuVerbose(verbose, "busy i%lu\n", i->i_ino);
2422 +                       ii_read_unlock(i);
2423 +                       break;
2424 +               }
2425 +               ii_read_unlock(i);
2426 +       }
2427 +
2428 +       return err;
2429 +}
2430 +
2431 +static int test_children_busy(struct dentry *root, aufs_bindex_t bindex)
2432 +{
2433 +       int err;
2434 +       unsigned int sigen;
2435 +
2436 +       sigen = au_sigen(root->d_sb);
2437 +       DiMustNoWaiters(root);
2438 +       IiMustNoWaiters(root->d_inode);
2439 +       di_write_unlock(root);
2440 +       err = test_dentry_busy(root, bindex, sigen);
2441 +       if (!err)
2442 +               err = test_inode_busy(root->d_sb, bindex, sigen);
2443 +       di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2444 +
2445 +       return err;
2446 +}
2447 +
2448 +static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2449 +                            const aufs_bindex_t bindex,
2450 +                            const aufs_bindex_t bend)
2451 +{
2452 +       struct au_branch **brp, **p;
2453 +
2454 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
2455 +
2456 +       brp = sbinfo->si_branch + bindex;
2457 +       if (bindex < bend)
2458 +               memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2459 +       sbinfo->si_branch[0 + bend] = NULL;
2460 +       sbinfo->si_bend--;
2461 +
2462 +       p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, GFP_NOFS);
2463 +       if (p)
2464 +               sbinfo->si_branch = p;
2465 +}
2466 +
2467 +static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2468 +                            const aufs_bindex_t bend)
2469 +{
2470 +       struct au_hdentry *hdp, *p;
2471 +
2472 +       AuRwMustWriteLock(&dinfo->di_rwsem);
2473 +
2474 +       hdp = dinfo->di_hdentry + bindex;
2475 +       if (bindex < bend)
2476 +               memmove(hdp, hdp + 1, sizeof(*hdp) * (bend - bindex));
2477 +       dinfo->di_hdentry[0 + bend].hd_dentry = NULL;
2478 +       dinfo->di_bend--;
2479 +
2480 +       p = krealloc(dinfo->di_hdentry, sizeof(*p) * bend, GFP_NOFS);
2481 +       if (p)
2482 +               dinfo->di_hdentry = p;
2483 +}
2484 +
2485 +static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2486 +                            const aufs_bindex_t bend)
2487 +{
2488 +       struct au_hinode *hip, *p;
2489 +
2490 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
2491 +
2492 +       hip = iinfo->ii_hinode + bindex;
2493 +       if (bindex < bend)
2494 +               memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2495 +       iinfo->ii_hinode[0 + bend].hi_inode = NULL;
2496 +       au_hin_init(iinfo->ii_hinode + bend, NULL);
2497 +       iinfo->ii_bend--;
2498 +
2499 +       p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, GFP_NOFS);
2500 +       if (p)
2501 +               iinfo->ii_hinode = p;
2502 +}
2503 +
2504 +static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2505 +                        struct au_branch *br)
2506 +{
2507 +       aufs_bindex_t bend;
2508 +       struct au_sbinfo *sbinfo;
2509 +       struct dentry *root;
2510 +       struct inode *inode;
2511 +
2512 +       SiMustWriteLock(sb);
2513 +
2514 +       root = sb->s_root;
2515 +       inode = root->d_inode;
2516 +       au_plink_maint_block(sb);
2517 +       sbinfo = au_sbi(sb);
2518 +       bend = sbinfo->si_bend;
2519 +
2520 +       dput(au_h_dptr(root, bindex));
2521 +       au_hiput(au_hi(inode, bindex));
2522 +       au_br_do_free(br);
2523 +
2524 +       au_br_do_del_brp(sbinfo, bindex, bend);
2525 +       au_br_do_del_hdp(au_di(root), bindex, bend);
2526 +       au_br_do_del_hip(au_ii(inode), bindex, bend);
2527 +}
2528 +
2529 +int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2530 +{
2531 +       int err, rerr, i;
2532 +       unsigned int mnt_flags;
2533 +       aufs_bindex_t bindex, bend, br_id;
2534 +       unsigned char do_wh, verbose;
2535 +       struct au_branch *br;
2536 +       struct au_wbr *wbr;
2537 +
2538 +       err = 0;
2539 +       bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2540 +       if (bindex < 0) {
2541 +               if (remount)
2542 +                       goto out; /* success */
2543 +               err = -ENOENT;
2544 +               AuErr("%s no such branch\n", del->pathname);
2545 +               goto out;
2546 +       }
2547 +       AuDbg("bindex b%d\n", bindex);
2548 +
2549 +       err = -EBUSY;
2550 +       mnt_flags = au_mntflags(sb);
2551 +       verbose = !!au_opt_test(mnt_flags, VERBOSE);
2552 +       bend = au_sbend(sb);
2553 +       if (unlikely(!bend)) {
2554 +               AuVerbose(verbose, "no more branches left\n");
2555 +               goto out;
2556 +       }
2557 +       br = au_sbr(sb, bindex);
2558 +       i = atomic_read(&br->br_count);
2559 +       if (unlikely(i)) {
2560 +               AuVerbose(verbose, "%d file(s) opened\n", i);
2561 +               goto out;
2562 +       }
2563 +
2564 +       wbr = br->br_wbr;
2565 +       do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2566 +       if (do_wh) {
2567 +               /* instead of WbrWhMustWriteLock(wbr) */
2568 +               SiMustWriteLock(sb);
2569 +               for (i = 0; i < AuBrWh_Last; i++) {
2570 +                       dput(wbr->wbr_wh[i]);
2571 +                       wbr->wbr_wh[i] = NULL;
2572 +               }
2573 +       }
2574 +
2575 +       err = test_children_busy(sb->s_root, bindex);
2576 +       if (unlikely(err)) {
2577 +               if (do_wh)
2578 +                       goto out_wh;
2579 +               goto out;
2580 +       }
2581 +
2582 +       err = 0;
2583 +       br_id = br->br_id;
2584 +       if (!remount)
2585 +               au_br_do_del(sb, bindex, br);
2586 +       else {
2587 +               sysaufs_brs_del(sb, bindex);
2588 +               au_br_do_del(sb, bindex, br);
2589 +               sysaufs_brs_add(sb, bindex);
2590 +       }
2591 +
2592 +       if (!bindex) {
2593 +               au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
2594 +               sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2595 +       } else
2596 +               au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2597 +       if (au_opt_test(mnt_flags, PLINK))
2598 +               au_plink_half_refresh(sb, br_id);
2599 +
2600 +       if (au_xino_brid(sb) == br->br_id)
2601 +               au_xino_brid_set(sb, -1);
2602 +       goto out; /* success */
2603 +
2604 + out_wh:
2605 +       /* revert */
2606 +       rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2607 +       if (rerr)
2608 +               AuWarn("failed re-creating base whiteout, %s. (%d)\n",
2609 +                      del->pathname, rerr);
2610 + out:
2611 +       return err;
2612 +}
2613 +
2614 +/* ---------------------------------------------------------------------- */
2615 +
2616 +/*
2617 + * change a branch permission
2618 + */
2619 +
2620 +static int do_need_sigen_inc(int a, int b)
2621 +{
2622 +       return au_br_whable(a) && !au_br_whable(b);
2623 +}
2624 +
2625 +static int need_sigen_inc(int old, int new)
2626 +{
2627 +       return do_need_sigen_inc(old, new)
2628 +               || do_need_sigen_inc(new, old);
2629 +}
2630 +
2631 +static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2632 +{
2633 +       int err;
2634 +       unsigned long n, ul, bytes, files;
2635 +       aufs_bindex_t bstart;
2636 +       struct file *file, *hf, **a;
2637 +       const int step_bytes = 1024, /* memory allocation unit */
2638 +               step_files = step_bytes / sizeof(*a);
2639 +
2640 +       err = -ENOMEM;
2641 +       n = 0;
2642 +       bytes = step_bytes;
2643 +       files = step_files;
2644 +       a = kmalloc(bytes, GFP_NOFS);
2645 +       if (unlikely(!a))
2646 +               goto out;
2647 +
2648 +       /* no need file_list_lock() since sbinfo is locked? defered? */
2649 +       list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
2650 +               if (special_file(file->f_dentry->d_inode->i_mode))
2651 +                       continue;
2652 +
2653 +               AuDbg("%.*s\n", AuDLNPair(file->f_dentry));
2654 +               fi_read_lock(file);
2655 +               if (unlikely(au_test_mmapped(file))) {
2656 +                       err = -EBUSY;
2657 +                       FiMustNoWaiters(file);
2658 +                       fi_read_unlock(file);
2659 +                       goto out_free;
2660 +               }
2661 +
2662 +               bstart = au_fbstart(file);
2663 +               if (!S_ISREG(file->f_dentry->d_inode->i_mode)
2664 +                   || !(file->f_mode & FMODE_WRITE)
2665 +                   || bstart != bindex) {
2666 +                       FiMustNoWaiters(file);
2667 +                       fi_read_unlock(file);
2668 +                       continue;
2669 +               }
2670 +
2671 +               hf = au_h_fptr(file, bstart);
2672 +               FiMustNoWaiters(file);
2673 +               fi_read_unlock(file);
2674 +
2675 +               if (n < files)
2676 +                       a[n++] = hf;
2677 +               else {
2678 +                       void *p;
2679 +
2680 +                       err = -ENOMEM;
2681 +                       bytes += step_bytes;
2682 +                       files += step_files;
2683 +                       p = krealloc(a, bytes, GFP_NOFS);
2684 +                       if (p) {
2685 +                               a = p;
2686 +                               a[n++] = hf;
2687 +                       } else
2688 +                               goto out_free;
2689 +               }
2690 +       }
2691 +
2692 +       err = 0;
2693 +       for (ul = 0; ul < n; ul++) {
2694 +               /* todo: already flushed? */
2695 +               /* cf. fs/super.c:mark_files_ro() */
2696 +               hf = a[ul];
2697 +               hf->f_mode &= ~FMODE_WRITE;
2698 +               if (!file_check_writeable(hf)) {
2699 +                       file_release_write(hf);
2700 +                       mnt_drop_write(hf->f_vfsmnt);
2701 +               }
2702 +       }
2703 +
2704 + out_free:
2705 +       kfree(a);
2706 + out:
2707 +       return err;
2708 +}
2709 +
2710 +int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2711 +             int *do_update)
2712 +{
2713 +       int err, rerr;
2714 +       aufs_bindex_t bindex;
2715 +       struct path path;
2716 +       struct dentry *root;
2717 +       struct au_branch *br;
2718 +
2719 +       root = sb->s_root;
2720 +       au_plink_maint_block(sb);
2721 +       bindex = au_find_dbindex(root, mod->h_root);
2722 +       if (bindex < 0) {
2723 +               if (remount)
2724 +                       return 0; /* success */
2725 +               err = -ENOENT;
2726 +               AuErr("%s no such branch\n", mod->path);
2727 +               goto out;
2728 +       }
2729 +       AuDbg("bindex b%d\n", bindex);
2730 +
2731 +       err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2732 +       if (unlikely(err))
2733 +               goto out;
2734 +
2735 +       br = au_sbr(sb, bindex);
2736 +       if (br->br_perm == mod->perm)
2737 +               return 0; /* success */
2738 +
2739 +       if (au_br_writable(br->br_perm)) {
2740 +               /* remove whiteout base */
2741 +               err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2742 +               if (unlikely(err))
2743 +                       goto out;
2744 +
2745 +               if (!au_br_writable(mod->perm)) {
2746 +                       /* rw --> ro, file might be mmapped */
2747 +                       DiMustNoWaiters(root);
2748 +                       IiMustNoWaiters(root->d_inode);
2749 +                       di_write_unlock(root);
2750 +                       err = au_br_mod_files_ro(sb, bindex);
2751 +                       /* aufs_write_lock() calls ..._child() */
2752 +                       di_write_lock_child(root);
2753 +
2754 +                       if (unlikely(err)) {
2755 +                               rerr = -ENOMEM;
2756 +                               br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2757 +                                                    GFP_NOFS);
2758 +                               if (br->br_wbr) {
2759 +                                       path.mnt = br->br_mnt;
2760 +                                       path.dentry = mod->h_root;
2761 +                                       rerr = au_wbr_init(br, sb, br->br_perm,
2762 +                                                          &path);
2763 +                               }
2764 +                               if (unlikely(rerr)) {
2765 +                                       AuIOErr("nested error %d (%d)\n",
2766 +                                               rerr, err);
2767 +                                       br->br_perm = mod->perm;
2768 +                               }
2769 +                       }
2770 +               }
2771 +       } else if (au_br_writable(mod->perm)) {
2772 +               /* ro --> rw */
2773 +               err = -ENOMEM;
2774 +               br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2775 +               if (br->br_wbr) {
2776 +                       path.mnt = br->br_mnt;
2777 +                       path.dentry = mod->h_root;
2778 +                       err = au_wbr_init(br, sb, mod->perm, &path);
2779 +                       if (unlikely(err)) {
2780 +                               kfree(br->br_wbr);
2781 +                               br->br_wbr = NULL;
2782 +                       }
2783 +               }
2784 +       }
2785 +
2786 +       if (!err) {
2787 +               *do_update |= need_sigen_inc(br->br_perm, mod->perm);
2788 +               br->br_perm = mod->perm;
2789 +       }
2790 +
2791 + out:
2792 +       return err;
2793 +}
2794 diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h
2795 new file mode 100644
2796 index 0000000..1a7219c
2797 --- /dev/null
2798 +++ b/fs/aufs/branch.h
2799 @@ -0,0 +1,219 @@
2800 +/*
2801 + * Copyright (C) 2005-2009 Junjiro R. Okajima
2802 + *
2803 + * This program, aufs is free software; you can redistribute it and/or modify
2804 + * it under the terms of the GNU General Public License as published by
2805 + * the Free Software Foundation; either version 2 of the License, or
2806 + * (at your option) any later version.
2807 + *
2808 + * This program is distributed in the hope that it will be useful,
2809 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
2810 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2811 + * GNU General Public License for more details.
2812 + *
2813 + * You should have received a copy of the GNU General Public License
2814 + * along with this program; if not, write to the Free Software
2815 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
2816 + */
2817 +
2818 +/*
2819 + * branch filesystems and xino for them
2820 + */
2821 +
2822 +#ifndef __AUFS_BRANCH_H__
2823 +#define __AUFS_BRANCH_H__
2824 +
2825 +#ifdef __KERNEL__
2826 +
2827 +#include <linux/fs.h>
2828 +#include <linux/mount.h>
2829 +#include <linux/aufs_type.h>
2830 +#include "rwsem.h"
2831 +#include "super.h"
2832 +
2833 +/* ---------------------------------------------------------------------- */
2834 +
2835 +/* a xino file */
2836 +struct au_xino_file {
2837 +       struct file             *xi_file;
2838 +       struct mutex            xi_nondir_mtx;
2839 +
2840 +       /* todo: make xino files an array to support huge inode number */
2841 +
2842 +#ifdef CONFIG_DEBUG_FS
2843 +       struct dentry            *xi_dbgaufs;
2844 +#endif
2845 +};
2846 +
2847 +/* members for writable branch only */
2848 +enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
2849 +struct au_wbr {
2850 +       struct au_rwsem         wbr_wh_rwsem;
2851 +       struct dentry           *wbr_wh[AuBrWh_Last];
2852 +       atomic_t                wbr_wh_running;
2853 +#define wbr_whbase             wbr_wh[AuBrWh_BASE]     /* whiteout base */
2854 +#define wbr_plink              wbr_wh[AuBrWh_PLINK]    /* pseudo-link dir */
2855 +#define wbr_orph               wbr_wh[AuBrWh_ORPH]     /* dir for orphans */
2856 +
2857 +       /* mfs mode */
2858 +       unsigned long long      wbr_bytes;
2859 +};
2860 +
2861 +/* protected by superblock rwsem */
2862 +struct au_branch {
2863 +       struct au_xino_file     br_xino;
2864 +
2865 +       aufs_bindex_t           br_id;
2866 +
2867 +       int                     br_perm;
2868 +       struct vfsmount         *br_mnt;
2869 +       atomic_t                br_count;
2870 +
2871 +       struct au_wbr           *br_wbr;
2872 +
2873 +       /* xino truncation */
2874 +       blkcnt_t                br_xino_upper;  /* watermark in blocks */
2875 +       atomic_t                br_xino_running;
2876 +
2877 +#ifdef CONFIG_SYSFS
2878 +       /* an entry under sysfs per mount-point */
2879 +       char                    br_name[8];
2880 +       struct attribute        br_attr;
2881 +#endif
2882 +};
2883 +
2884 +/* ---------------------------------------------------------------------- */
2885 +
2886 +/* branch permission and attribute */
2887 +enum {
2888 +       AuBrPerm_RW,            /* writable, linkable wh */
2889 +       AuBrPerm_RO,            /* readonly, no wh */
2890 +       AuBrPerm_RR,            /* natively readonly, no wh */
2891 +
2892 +       AuBrPerm_RWNoLinkWH,    /* un-linkable whiteouts */
2893 +
2894 +       AuBrPerm_ROWH,          /* whiteout-able */
2895 +       AuBrPerm_RRWH,          /* whiteout-able */
2896 +
2897 +       AuBrPerm_Last
2898 +};
2899 +
2900 +static inline int au_br_writable(int brperm)
2901 +{
2902 +       return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
2903 +}
2904 +
2905 +static inline int au_br_whable(int brperm)
2906 +{
2907 +       return brperm == AuBrPerm_RW
2908 +               || brperm == AuBrPerm_ROWH
2909 +               || brperm == AuBrPerm_RRWH;
2910 +}
2911 +
2912 +static inline int au_br_rdonly(struct au_branch *br)
2913 +{
2914 +       return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
2915 +               || !au_br_writable(br->br_perm))
2916 +               ? -EROFS : 0;
2917 +}
2918 +
2919 +static inline int au_br_hinotifyable(int brperm __maybe_unused)
2920 +{
2921 +#ifdef CONFIG_AUFS_HINOTIFY
2922 +       return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
2923 +#else
2924 +       return 0;
2925 +#endif
2926 +}
2927 +
2928 +/* ---------------------------------------------------------------------- */
2929 +
2930 +/* branch.c */
2931 +struct au_sbinfo;
2932 +void au_br_free(struct au_sbinfo *sinfo);
2933 +int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
2934 +struct au_opt_add;
2935 +int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
2936 +struct au_opt_del;
2937 +int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
2938 +struct au_opt_mod;
2939 +int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2940 +             int *do_update);
2941 +
2942 +/* xino.c */
2943 +static const loff_t au_loff_max = LLONG_MAX;
2944 +
2945 +int au_xib_trunc(struct super_block *sb);
2946 +ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
2947 +                  loff_t *pos);
2948 +ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
2949 +                   loff_t *pos);
2950 +struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
2951 +struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
2952 +ino_t au_xino_new_ino(struct super_block *sb);
2953 +int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2954 +                  ino_t ino);
2955 +int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2956 +                 ino_t ino);
2957 +int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2958 +                ino_t *ino);
2959 +int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
2960 +              struct file *base_file, int do_test);
2961 +int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
2962 +
2963 +struct au_opt_xino;
2964 +int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
2965 +void au_xino_clr(struct super_block *sb);
2966 +struct file *au_xino_def(struct super_block *sb);
2967 +int au_xino_path(struct seq_file *seq, struct file *file);
2968 +
2969 +/* ---------------------------------------------------------------------- */
2970 +
2971 +/* Superblock to branch */
2972 +static inline
2973 +aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
2974 +{
2975 +       return au_sbr(sb, bindex)->br_id;
2976 +}
2977 +
2978 +static inline
2979 +struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
2980 +{
2981 +       return au_sbr(sb, bindex)->br_mnt;
2982 +}
2983 +
2984 +static inline
2985 +struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
2986 +{
2987 +       return au_sbr_mnt(sb, bindex)->mnt_sb;
2988 +}
2989 +
2990 +static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
2991 +{
2992 +       atomic_dec_return(&au_sbr(sb, bindex)->br_count);
2993 +}
2994 +
2995 +static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
2996 +{
2997 +       return au_sbr(sb, bindex)->br_perm;
2998 +}
2999 +
3000 +static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
3001 +{
3002 +       return au_br_whable(au_sbr_perm(sb, bindex));
3003 +}
3004 +
3005 +/* ---------------------------------------------------------------------- */
3006 +
3007 +/*
3008 + * wbr_wh_read_lock, wbr_wh_write_lock
3009 + * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
3010 + */
3011 +AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
3012 +
3013 +#define WbrWhMustNoWaiters(wbr)        AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
3014 +#define WbrWhMustAnyLock(wbr)  AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
3015 +#define WbrWhMustWriteLock(wbr)        AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
3016 +
3017 +#endif /* __KERNEL__ */
3018 +#endif /* __AUFS_BRANCH_H__ */
3019 diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c
3020 new file mode 100644
3021 index 0000000..7f70e2f
3022 --- /dev/null
3023 +++ b/fs/aufs/cpup.c
3024 @@ -0,0 +1,1048 @@
3025 +/*
3026 + * Copyright (C) 2005-2009 Junjiro R. Okajima
3027 + *
3028 + * This program, aufs is free software; you can redistribute it and/or modify
3029 + * it under the terms of the GNU General Public License as published by
3030 + * the Free Software Foundation; either version 2 of the License, or
3031 + * (at your option) any later version.
3032 + *
3033 + * This program is distributed in the hope that it will be useful,
3034 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3035 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3036 + * GNU General Public License for more details.
3037 + *
3038 + * You should have received a copy of the GNU General Public License
3039 + * along with this program; if not, write to the Free Software
3040 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
3041 + */
3042 +
3043 +/*
3044 + * copy-up functions, see wbr_policy.c for copy-down
3045 + */
3046 +
3047 +#include <linux/file.h>
3048 +#include <linux/fs_stack.h>
3049 +#include <linux/mm.h>
3050 +#include <linux/uaccess.h>
3051 +#include "aufs.h"
3052 +
3053 +void au_cpup_attr_flags(struct inode *dst, struct inode *src)
3054 +{
3055 +       const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
3056 +               | S_NOATIME | S_NOCMTIME;
3057 +
3058 +       dst->i_flags |= src->i_flags & ~mask;
3059 +       if (au_test_fs_notime(dst->i_sb))
3060 +               dst->i_flags |= S_NOATIME | S_NOCMTIME;
3061 +}
3062 +
3063 +void au_cpup_attr_timesizes(struct inode *inode)
3064 +{
3065 +       struct inode *h_inode;
3066 +
3067 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
3068 +       fsstack_copy_attr_times(inode, h_inode);
3069 +       vfsub_copy_inode_size(inode, h_inode);
3070 +}
3071 +
3072 +void au_cpup_attr_nlink(struct inode *inode, int force)
3073 +{
3074 +       struct inode *h_inode;
3075 +       struct super_block *sb;
3076 +       aufs_bindex_t bindex, bend;
3077 +
3078 +       sb = inode->i_sb;
3079 +       bindex = au_ibstart(inode);
3080 +       h_inode = au_h_iptr(inode, bindex);
3081 +       if (!force
3082 +           && !S_ISDIR(h_inode->i_mode)
3083 +           && au_opt_test(au_mntflags(sb), PLINK)
3084 +           && au_plink_test(inode))
3085 +               return;
3086 +
3087 +       inode->i_nlink = h_inode->i_nlink;
3088 +
3089 +       /*
3090 +        * fewer nlink makes find(1) noisy, but larger nlink doesn't.
3091 +        * it may includes whplink directory.
3092 +        */
3093 +       if (S_ISDIR(h_inode->i_mode)) {
3094 +               bend = au_ibend(inode);
3095 +               for (bindex++; bindex <= bend; bindex++) {
3096 +                       h_inode = au_h_iptr(inode, bindex);
3097 +                       if (h_inode)
3098 +                               au_add_nlink(inode, h_inode);
3099 +               }
3100 +       }
3101 +}
3102 +
3103 +void au_cpup_attr_changeable(struct inode *inode)
3104 +{
3105 +       struct inode *h_inode;
3106 +
3107 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
3108 +       inode->i_mode = h_inode->i_mode;
3109 +       inode->i_uid = h_inode->i_uid;
3110 +       inode->i_gid = h_inode->i_gid;
3111 +       au_cpup_attr_timesizes(inode);
3112 +       au_cpup_attr_flags(inode, h_inode);
3113 +}
3114 +
3115 +void au_cpup_igen(struct inode *inode, struct inode *h_inode)
3116 +{
3117 +       struct au_iinfo *iinfo = au_ii(inode);
3118 +
3119 +       IiMustWriteLock(inode);
3120 +
3121 +       iinfo->ii_higen = h_inode->i_generation;
3122 +       iinfo->ii_hsb1 = h_inode->i_sb;
3123 +}
3124 +
3125 +void au_cpup_attr_all(struct inode *inode, int force)
3126 +{
3127 +       struct inode *h_inode;
3128 +
3129 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
3130 +       au_cpup_attr_changeable(inode);
3131 +       if (inode->i_nlink > 0)
3132 +               au_cpup_attr_nlink(inode, force);
3133 +       inode->i_rdev = h_inode->i_rdev;
3134 +       inode->i_blkbits = h_inode->i_blkbits;
3135 +       au_cpup_igen(inode, h_inode);
3136 +}
3137 +
3138 +/* ---------------------------------------------------------------------- */
3139 +
3140 +/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
3141 +
3142 +/* keep the timestamps of the parent dir when cpup */
3143 +void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3144 +                   struct path *h_path)
3145 +{
3146 +       struct inode *h_inode;
3147 +
3148 +       dt->dt_dentry = dentry;
3149 +       dt->dt_h_path = *h_path;
3150 +       h_inode = h_path->dentry->d_inode;
3151 +       dt->dt_atime = h_inode->i_atime;
3152 +       dt->dt_mtime = h_inode->i_mtime;
3153 +       /* smp_mb(); */
3154 +}
3155 +
3156 +void au_dtime_revert(struct au_dtime *dt)
3157 +{
3158 +       struct iattr attr;
3159 +       int err;
3160 +
3161 +       attr.ia_atime = dt->dt_atime;
3162 +       attr.ia_mtime = dt->dt_mtime;
3163 +       attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
3164 +               | ATTR_ATIME | ATTR_ATIME_SET;
3165 +
3166 +       err = vfsub_notify_change(&dt->dt_h_path, &attr);
3167 +       if (unlikely(err))
3168 +               AuWarn("restoring timestamps failed(%d). ignored\n", err);
3169 +}
3170 +
3171 +/* ---------------------------------------------------------------------- */
3172 +
3173 +static noinline_for_stack
3174 +int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
3175 +{
3176 +       int err, sbits;
3177 +       struct iattr ia;
3178 +       struct path h_path;
3179 +       struct inode *h_isrc, *h_idst;
3180 +
3181 +       h_path.dentry = au_h_dptr(dst, bindex);
3182 +       h_idst = h_path.dentry->d_inode;
3183 +       h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
3184 +       h_isrc = h_src->d_inode;
3185 +       ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
3186 +               | ATTR_ATIME | ATTR_MTIME
3187 +               | ATTR_ATIME_SET | ATTR_MTIME_SET;
3188 +       ia.ia_uid = h_isrc->i_uid;
3189 +       ia.ia_gid = h_isrc->i_gid;
3190 +       ia.ia_atime = h_isrc->i_atime;
3191 +       ia.ia_mtime = h_isrc->i_mtime;
3192 +       if (h_idst->i_mode != h_isrc->i_mode
3193 +           && !S_ISLNK(h_idst->i_mode)) {
3194 +               ia.ia_valid |= ATTR_MODE;
3195 +               ia.ia_mode = h_isrc->i_mode;
3196 +       }
3197 +       sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
3198 +       au_cpup_attr_flags(h_idst, h_isrc);
3199 +       err = vfsub_notify_change(&h_path, &ia);
3200 +
3201 +       /* is this nfs only? */
3202 +       if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
3203 +               ia.ia_valid = ATTR_FORCE | ATTR_MODE;
3204 +               ia.ia_mode = h_isrc->i_mode;
3205 +               err = vfsub_notify_change(&h_path, &ia);
3206 +       }
3207 +
3208 +       return err;
3209 +}
3210 +
3211 +/* ---------------------------------------------------------------------- */
3212 +
3213 +static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
3214 +                          char *buf, unsigned long blksize)
3215 +{
3216 +       int err;
3217 +       size_t sz, rbytes, wbytes;
3218 +       unsigned char all_zero;
3219 +       char *p, *zp;
3220 +       struct mutex *h_mtx;
3221 +       /* reduce stack usage */
3222 +       struct iattr *ia;
3223 +
3224 +       zp = page_address(ZERO_PAGE(0));
3225 +       if (unlikely(!zp))
3226 +               return -ENOMEM; /* possible? */
3227 +
3228 +       err = 0;
3229 +       all_zero = 0;
3230 +       while (len) {
3231 +               AuDbg("len %lld\n", len);
3232 +               sz = blksize;
3233 +               if (len < blksize)
3234 +                       sz = len;
3235 +
3236 +               rbytes = 0;
3237 +               /* todo: signal_pending? */
3238 +               while (!rbytes || err == -EAGAIN || err == -EINTR) {
3239 +                       rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3240 +                       err = rbytes;
3241 +               }
3242 +               if (unlikely(err < 0))
3243 +                       break;
3244 +
3245 +               all_zero = 0;
3246 +               if (len >= rbytes && rbytes == blksize)
3247 +                       all_zero = !memcmp(buf, zp, rbytes);
3248 +               if (!all_zero) {
3249 +                       wbytes = rbytes;
3250 +                       p = buf;
3251 +                       while (wbytes) {
3252 +                               size_t b;
3253 +
3254 +                               b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3255 +                               err = b;
3256 +                               /* todo: signal_pending? */
3257 +                               if (unlikely(err == -EAGAIN || err == -EINTR))
3258 +                                       continue;
3259 +                               if (unlikely(err < 0))
3260 +                                       break;
3261 +                               wbytes -= b;
3262 +                               p += b;
3263 +                       }
3264 +               } else {
3265 +                       loff_t res;
3266 +
3267 +                       AuLabel(hole);
3268 +                       res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3269 +                       err = res;
3270 +                       if (unlikely(res < 0))
3271 +                               break;
3272 +               }
3273 +               len -= rbytes;
3274 +               err = 0;
3275 +       }
3276 +
3277 +       /* the last block may be a hole */
3278 +       if (!err && all_zero) {
3279 +               AuLabel(last hole);
3280 +
3281 +               err = 1;
3282 +               if (au_test_nfs(dst->f_dentry->d_sb)) {
3283 +                       /* nfs requires this step to make last hole */
3284 +                       /* is this only nfs? */
3285 +                       do {
3286 +                               /* todo: signal_pending? */
3287 +                               err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3288 +                       } while (err == -EAGAIN || err == -EINTR);
3289 +                       if (err == 1)
3290 +                               dst->f_pos--;
3291 +               }
3292 +
3293 +               if (err == 1) {
3294 +                       ia = (void *)buf;
3295 +                       ia->ia_size = dst->f_pos;
3296 +                       ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3297 +                       ia->ia_file = dst;
3298 +                       h_mtx = &dst->f_dentry->d_inode->i_mutex;
3299 +                       mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3300 +                       err = vfsub_notify_change(&dst->f_path, ia);
3301 +                       mutex_unlock(h_mtx);
3302 +               }
3303 +       }
3304 +
3305 +       return err;
3306 +}
3307 +
3308 +int au_copy_file(struct file *dst, struct file *src, loff_t len)
3309 +{
3310 +       int err;
3311 +       unsigned long blksize;
3312 +       unsigned char do_kfree;
3313 +       char *buf;
3314 +
3315 +       err = -ENOMEM;
3316 +       blksize = dst->f_dentry->d_sb->s_blocksize;
3317 +       if (!blksize || PAGE_SIZE < blksize)
3318 +               blksize = PAGE_SIZE;
3319 +       AuDbg("blksize %lu\n", blksize);
3320 +       do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3321 +       if (do_kfree)
3322 +               buf = kmalloc(blksize, GFP_NOFS);
3323 +       else
3324 +               buf = (void *)__get_free_page(GFP_NOFS);
3325 +       if (unlikely(!buf))
3326 +               goto out;
3327 +
3328 +       if (len > (1 << 22))
3329 +               AuDbg("copying a large file %lld\n", (long long)len);
3330 +
3331 +       src->f_pos = 0;
3332 +       dst->f_pos = 0;
3333 +       err = au_do_copy_file(dst, src, len, buf, blksize);
3334 +       if (do_kfree)
3335 +               kfree(buf);
3336 +       else
3337 +               free_page((unsigned long)buf);
3338 +
3339 + out:
3340 +       return err;
3341 +}
3342 +
3343 +/*
3344 + * to support a sparse file which is opened with O_APPEND,
3345 + * we need to close the file.
3346 + */
3347 +static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
3348 +                       aufs_bindex_t bsrc, loff_t len)
3349 +{
3350 +       int err, i;
3351 +       enum { SRC, DST };
3352 +       struct {
3353 +               aufs_bindex_t bindex;
3354 +               unsigned int flags;
3355 +               struct dentry *dentry;
3356 +               struct file *file;
3357 +               void *label, *label_file;
3358 +       } *f, file[] = {
3359 +               {
3360 +                       .bindex = bsrc,
3361 +                       .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3362 +                       .file = NULL,
3363 +                       .label = &&out,
3364 +                       .label_file = &&out_src
3365 +               },
3366 +               {
3367 +                       .bindex = bdst,
3368 +                       .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3369 +                       .file = NULL,
3370 +                       .label = &&out_src,
3371 +                       .label_file = &&out_dst
3372 +               }
3373 +       };
3374 +       struct super_block *sb;
3375 +
3376 +       /* bsrc branch can be ro/rw. */
3377 +       sb = dentry->d_sb;
3378 +       f = file;
3379 +       for (i = 0; i < 2; i++, f++) {
3380 +               f->dentry = au_h_dptr(dentry, f->bindex);
3381 +               f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3382 +               err = PTR_ERR(f->file);
3383 +               if (IS_ERR(f->file))
3384 +                       goto *f->label;
3385 +               err = -EINVAL;
3386 +               if (unlikely(!f->file->f_op))
3387 +                       goto *f->label_file;
3388 +       }
3389 +
3390 +       /* try stopping to update while we copyup */
3391 +       IMustLock(file[SRC].dentry->d_inode);
3392 +       err = au_copy_file(file[DST].file, file[SRC].file, len);
3393 +
3394 + out_dst:
3395 +       fput(file[DST].file);
3396 +       au_sbr_put(sb, file[DST].bindex);
3397 + out_src:
3398 +       fput(file[SRC].file);
3399 +       au_sbr_put(sb, file[SRC].bindex);
3400 + out:
3401 +       return err;
3402 +}
3403 +
3404 +static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3405 +                             aufs_bindex_t bsrc, loff_t len,
3406 +                             struct inode *h_dir, struct path *h_path)
3407 +{
3408 +       int err, rerr;
3409 +       loff_t l;
3410 +
3411 +       err = 0;
3412 +       l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3413 +       if (len == -1 || l < len)
3414 +               len = l;
3415 +       if (len)
3416 +               err = au_cp_regular(dentry, bdst, bsrc, len);
3417 +       if (!err)
3418 +               goto out; /* success */
3419 +
3420 +       rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3421 +       if (rerr) {
3422 +               AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3423 +                       AuDLNPair(h_path->dentry), err, rerr);
3424 +               err = -EIO;
3425 +       }
3426 +
3427 + out:
3428 +       return err;
3429 +}
3430 +
3431 +static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3432 +                             struct inode *h_dir)
3433 +{
3434 +       int err, symlen;
3435 +       mm_segment_t old_fs;
3436 +       char *sym;
3437 +
3438 +       err = -ENOSYS;
3439 +       if (unlikely(!h_src->d_inode->i_op->readlink))
3440 +               goto out;
3441 +
3442 +       err = -ENOMEM;
3443 +       sym = __getname();
3444 +       if (unlikely(!sym))
3445 +               goto out;
3446 +
3447 +       old_fs = get_fs();
3448 +       set_fs(KERNEL_DS);
3449 +       symlen = h_src->d_inode->i_op->readlink(h_src, (char __user *)sym,
3450 +                                               PATH_MAX);
3451 +       err = symlen;
3452 +       set_fs(old_fs);
3453 +
3454 +       if (symlen > 0) {
3455 +               sym[symlen] = 0;
3456 +               err = vfsub_symlink(h_dir, h_path, sym);
3457 +       }
3458 +       __putname(sym);
3459 +
3460 + out:
3461 +       return err;
3462 +}
3463 +
3464 +/* return with the lower dst inode is locked */
3465 +static noinline_for_stack
3466 +int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3467 +              aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3468 +              struct dentry *dst_parent)
3469 +{
3470 +       int err;
3471 +       umode_t mode;
3472 +       unsigned int mnt_flags;
3473 +       unsigned char isdir;
3474 +       const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3475 +       struct au_dtime dt;
3476 +       struct path h_path;
3477 +       struct dentry *h_src, *h_dst, *h_parent;
3478 +       struct inode *h_inode, *h_dir;
3479 +       struct super_block *sb;
3480 +
3481 +       /* bsrc branch can be ro/rw. */
3482 +       h_src = au_h_dptr(dentry, bsrc);
3483 +       h_inode = h_src->d_inode;
3484 +       AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3485 +
3486 +       /* try stopping to be referenced while we are creating */
3487 +       h_dst = au_h_dptr(dentry, bdst);
3488 +       h_parent = h_dst->d_parent; /* dir inode is locked */
3489 +       h_dir = h_parent->d_inode;
3490 +       IMustLock(h_dir);
3491 +       AuDebugOn(h_parent != h_dst->d_parent);
3492 +
3493 +       sb = dentry->d_sb;
3494 +       h_path.mnt = au_sbr_mnt(sb, bdst);
3495 +       if (do_dt) {
3496 +               h_path.dentry = h_parent;
3497 +               au_dtime_store(&dt, dst_parent, &h_path);
3498 +       }
3499 +       h_path.dentry = h_dst;
3500 +
3501 +       isdir = 0;
3502 +       mode = h_inode->i_mode;
3503 +       switch (mode & S_IFMT) {
3504 +       case S_IFREG:
3505 +               /* try stopping to update while we are referencing */
3506 +               IMustLock(h_inode);
3507 +               err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3508 +               if (!err)
3509 +                       err = au_do_cpup_regular
3510 +                               (dentry, bdst, bsrc, len,
3511 +                                au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3512 +               break;
3513 +       case S_IFDIR:
3514 +               isdir = 1;
3515 +               err = vfsub_mkdir(h_dir, &h_path, mode);
3516 +               if (!err) {
3517 +                       /*
3518 +                        * strange behaviour from the users view,
3519 +                        * particularry setattr case
3520 +                        */
3521 +                       if (au_ibstart(dst_parent->d_inode) == bdst)
3522 +                               au_cpup_attr_nlink(dst_parent->d_inode,
3523 +                                                  /*force*/1);
3524 +                       au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3525 +               }
3526 +               break;
3527 +       case S_IFLNK:
3528 +               err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3529 +               break;
3530 +       case S_IFCHR:
3531 +       case S_IFBLK:
3532 +               AuDebugOn(!capable(CAP_MKNOD));
3533 +               /*FALLTHROUGH*/
3534 +       case S_IFIFO:
3535 +       case S_IFSOCK:
3536 +               err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3537 +               break;
3538 +       default:
3539 +               AuIOErr("Unknown inode type 0%o\n", mode);
3540 +               err = -EIO;
3541 +       }
3542 +
3543 +       mnt_flags = au_mntflags(sb);
3544 +       if (!au_opt_test(mnt_flags, UDBA_NONE)
3545 +           && !isdir
3546 +           && au_opt_test(mnt_flags, XINO)
3547 +           && h_inode->i_nlink == 1
3548 +           /* todo: unnecessary? */
3549 +           /* && dentry->d_inode->i_nlink == 1 */
3550 +           && bdst < bsrc
3551 +           && !au_ftest_cpup(flags, KEEPLINO))
3552 +               au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
3553 +               /* ignore this error */
3554 +
3555 +       if (do_dt)
3556 +               au_dtime_revert(&dt);
3557 +       return err;
3558 +}
3559 +
3560 +/*
3561 + * copyup the @dentry from @bsrc to @bdst.
3562 + * the caller must set the both of lower dentries.
3563 + * @len is for truncating when it is -1 copyup the entire file.
3564 + * in link/rename cases, @dst_parent may be different from the real one.
3565 + */
3566 +static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3567 +                         aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3568 +                         struct dentry *dst_parent)
3569 +{
3570 +       int err, rerr;
3571 +       aufs_bindex_t old_ibstart;
3572 +       unsigned char isdir, plink;
3573 +       struct au_dtime dt;
3574 +       struct path h_path;
3575 +       struct dentry *h_src, *h_dst, *h_parent;
3576 +       struct inode *dst_inode, *h_dir, *inode;
3577 +       struct super_block *sb;
3578 +
3579 +       AuDebugOn(bsrc <= bdst);
3580 +
3581 +       sb = dentry->d_sb;
3582 +       h_path.mnt = au_sbr_mnt(sb, bdst);
3583 +       h_dst = au_h_dptr(dentry, bdst);
3584 +       h_parent = h_dst->d_parent; /* dir inode is locked */
3585 +       h_dir = h_parent->d_inode;
3586 +       IMustLock(h_dir);
3587 +
3588 +       h_src = au_h_dptr(dentry, bsrc);
3589 +       inode = dentry->d_inode;
3590 +
3591 +       if (!dst_parent)
3592 +               dst_parent = dget_parent(dentry);
3593 +       else
3594 +               dget(dst_parent);
3595 +
3596 +       plink = !!au_opt_test(au_mntflags(sb), PLINK);
3597 +       dst_inode = au_h_iptr(inode, bdst);
3598 +       if (dst_inode) {
3599 +               if (unlikely(!plink)) {
3600 +                       err = -EIO;
3601 +                       AuIOErr("i%lu exists on a upper branch "
3602 +                               "but plink is disabled\n", inode->i_ino);
3603 +                       goto out;
3604 +               }
3605 +
3606 +               if (dst_inode->i_nlink) {
3607 +                       const int do_dt = au_ftest_cpup(flags, DTIME);
3608 +
3609 +                       h_src = au_plink_lkup(inode, bdst);
3610 +                       err = PTR_ERR(h_src);
3611 +                       if (IS_ERR(h_src))
3612 +                               goto out;
3613 +                       if (unlikely(!h_src->d_inode)) {
3614 +                               err = -EIO;
3615 +                               AuIOErr("i%lu exists on a upper branch "
3616 +                                       "but plink is broken\n", inode->i_ino);
3617 +                               dput(h_src);
3618 +                               goto out;
3619 +                       }
3620 +
3621 +                       if (do_dt) {
3622 +                               h_path.dentry = h_parent;
3623 +                               au_dtime_store(&dt, dst_parent, &h_path);
3624 +                       }
3625 +                       h_path.dentry = h_dst;
3626 +                       err = vfsub_link(h_src, h_dir, &h_path);
3627 +                       if (do_dt)
3628 +                               au_dtime_revert(&dt);
3629 +                       dput(h_src);
3630 +                       goto out;
3631 +               } else
3632 +                       /* todo: cpup_wh_file? */
3633 +                       /* udba work */
3634 +                       au_update_brange(inode, 1);
3635 +       }
3636 +
3637 +       old_ibstart = au_ibstart(inode);
3638 +       err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3639 +       if (unlikely(err))
3640 +               goto out;
3641 +       dst_inode = h_dst->d_inode;
3642 +       mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3643 +
3644 +       err = cpup_iattr(dentry, bdst, h_src);
3645 +       isdir = S_ISDIR(dst_inode->i_mode);
3646 +       if (!err) {
3647 +               if (bdst < old_ibstart)
3648 +                       au_set_ibstart(inode, bdst);
3649 +               au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3650 +                             au_hi_flags(inode, isdir));
3651 +               mutex_unlock(&dst_inode->i_mutex);
3652 +               if (!isdir
3653 +                   && h_src->d_inode->i_nlink > 1
3654 +                   && plink)
3655 +                       au_plink_append(inode, bdst, h_dst);
3656 +               goto out; /* success */
3657 +       }
3658 +
3659 +       /* revert */
3660 +       h_path.dentry = h_parent;
3661 +       mutex_unlock(&dst_inode->i_mutex);
3662 +       au_dtime_store(&dt, dst_parent, &h_path);
3663 +       h_path.dentry = h_dst;
3664 +       if (!isdir)
3665 +               rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3666 +       else
3667 +               rerr = vfsub_rmdir(h_dir, &h_path);
3668 +       au_dtime_revert(&dt);
3669 +       if (rerr) {
3670 +               AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3671 +               err = -EIO;
3672 +       }
3673 +
3674 + out:
3675 +       dput(dst_parent);
3676 +       return err;
3677 +}
3678 +
3679 +struct au_cpup_single_args {
3680 +       int *errp;
3681 +       struct dentry *dentry;
3682 +       aufs_bindex_t bdst, bsrc;
3683 +       loff_t len;
3684 +       unsigned int flags;
3685 +       struct dentry *dst_parent;
3686 +};
3687 +
3688 +static void au_call_cpup_single(void *args)
3689 +{
3690 +       struct au_cpup_single_args *a = args;
3691 +       *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3692 +                                 a->flags, a->dst_parent);
3693 +}
3694 +
3695 +int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3696 +                      aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3697 +                      struct dentry *dst_parent)
3698 +{
3699 +       int err, wkq_err;
3700 +       umode_t mode;
3701 +       struct dentry *h_dentry;
3702 +
3703 +       h_dentry = au_h_dptr(dentry, bsrc);
3704 +       mode = h_dentry->d_inode->i_mode & S_IFMT;
3705 +       if ((mode != S_IFCHR && mode != S_IFBLK)
3706 +           || capable(CAP_MKNOD))
3707 +               err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3708 +                                    dst_parent);
3709 +       else {
3710 +               struct au_cpup_single_args args = {
3711 +                       .errp           = &err,
3712 +                       .dentry         = dentry,
3713 +                       .bdst           = bdst,
3714 +                       .bsrc           = bsrc,
3715 +                       .len            = len,
3716 +                       .flags          = flags,
3717 +                       .dst_parent     = dst_parent
3718 +               };
3719 +               wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3720 +               if (unlikely(wkq_err))
3721 +                       err = wkq_err;
3722 +       }
3723 +
3724 +       return err;
3725 +}
3726 +
3727 +/*
3728 + * copyup the @dentry from the first active lower branch to @bdst,
3729 + * using au_cpup_single().
3730 + */
3731 +static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3732 +                         unsigned int flags)
3733 +{
3734 +       int err;
3735 +       aufs_bindex_t bsrc, bend;
3736 +
3737 +       bend = au_dbend(dentry);
3738 +       for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3739 +               if (au_h_dptr(dentry, bsrc))
3740 +                       break;
3741 +
3742 +       err = au_lkup_neg(dentry, bdst);
3743 +       if (!err) {
3744 +               err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3745 +               if (!err)
3746 +                       return 0; /* success */
3747 +
3748 +               /* revert */
3749 +               au_set_h_dptr(dentry, bdst, NULL);
3750 +               au_set_dbstart(dentry, bsrc);
3751 +       }
3752 +
3753 +       return err;
3754 +}
3755 +
3756 +struct au_cpup_simple_args {
3757 +       int *errp;
3758 +       struct dentry *dentry;
3759 +       aufs_bindex_t bdst;
3760 +       loff_t len;
3761 +       unsigned int flags;
3762 +};
3763 +
3764 +static void au_call_cpup_simple(void *args)
3765 +{
3766 +       struct au_cpup_simple_args *a = args;
3767 +       *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
3768 +}
3769 +
3770 +int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3771 +                      unsigned int flags)
3772 +{
3773 +       int err, wkq_err;
3774 +       unsigned char do_sio;
3775 +       struct dentry *parent;
3776 +       struct inode *h_dir;
3777 +
3778 +       parent = dget_parent(dentry);
3779 +       h_dir = au_h_iptr(parent->d_inode, bdst);
3780 +       do_sio = !!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE);
3781 +       if (!do_sio) {
3782 +               /*
3783 +                * testing CAP_MKNOD is for generic fs,
3784 +                * but CAP_FSETID is for xfs only, currently.
3785 +                */
3786 +               umode_t mode = dentry->d_inode->i_mode;
3787 +               do_sio = (((mode & (S_IFCHR | S_IFBLK))
3788 +                          && !capable(CAP_MKNOD))
3789 +                         || ((mode & (S_ISUID | S_ISGID))
3790 +                             && !capable(CAP_FSETID)));
3791 +       }
3792 +       if (!do_sio)
3793 +               err = au_cpup_simple(dentry, bdst, len, flags);
3794 +       else {
3795 +               struct au_cpup_simple_args args = {
3796 +                       .errp           = &err,
3797 +                       .dentry         = dentry,
3798 +                       .bdst           = bdst,
3799 +                       .len            = len,
3800 +                       .flags          = flags
3801 +               };
3802 +               wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
3803 +               if (unlikely(wkq_err))
3804 +                       err = wkq_err;
3805 +       }
3806 +
3807 +       dput(parent);
3808 +       return err;
3809 +}
3810 +
3811 +/* ---------------------------------------------------------------------- */
3812 +
3813 +/*
3814 + * copyup the deleted file for writing.
3815 + */
3816 +static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
3817 +                        struct dentry *wh_dentry, struct file *file,
3818 +                        loff_t len)
3819 +{
3820 +       int err;
3821 +       aufs_bindex_t bstart;
3822 +       struct au_dinfo *dinfo;
3823 +       struct dentry *h_d_dst, *h_d_start;
3824 +
3825 +       dinfo = au_di(dentry);
3826 +       AuRwMustWriteLock(&dinfo->di_rwsem);
3827 +
3828 +       bstart = dinfo->di_bstart;
3829 +       h_d_dst = dinfo->di_hdentry[0 + bdst].hd_dentry;
3830 +       dinfo->di_bstart = bdst;
3831 +       dinfo->di_hdentry[0 + bdst].hd_dentry = wh_dentry;
3832 +       h_d_start = dinfo->di_hdentry[0 + bstart].hd_dentry;
3833 +       if (file)
3834 +               dinfo->di_hdentry[0 + bstart].hd_dentry
3835 +                       = au_h_fptr(file, au_fbstart(file))->f_dentry;
3836 +       err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
3837 +                            /*h_parent*/NULL);
3838 +       if (!err && file) {
3839 +               err = au_reopen_nondir(file);
3840 +               dinfo->di_hdentry[0 + bstart].hd_dentry = h_d_start;
3841 +       }
3842 +       dinfo->di_hdentry[0 + bdst].hd_dentry = h_d_dst;
3843 +       dinfo->di_bstart = bstart;
3844 +
3845 +       return err;
3846 +}
3847 +
3848 +static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3849 +                     struct file *file)
3850 +{
3851 +       int err;
3852 +       struct au_dtime dt;
3853 +       struct dentry *parent, *h_parent, *wh_dentry;
3854 +       struct au_branch *br;
3855 +       struct path h_path;
3856 +
3857 +       br = au_sbr(dentry->d_sb, bdst);
3858 +       parent = dget_parent(dentry);
3859 +       h_parent = au_h_dptr(parent, bdst);
3860 +       wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
3861 +       err = PTR_ERR(wh_dentry);
3862 +       if (IS_ERR(wh_dentry))
3863 +               goto out;
3864 +
3865 +       h_path.dentry = h_parent;
3866 +       h_path.mnt = br->br_mnt;
3867 +       au_dtime_store(&dt, parent, &h_path);
3868 +       err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
3869 +       if (unlikely(err))
3870 +               goto out_wh;
3871 +
3872 +       dget(wh_dentry);
3873 +       h_path.dentry = wh_dentry;
3874 +       err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
3875 +       if (unlikely(err)) {
3876 +               AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
3877 +                       AuDLNPair(wh_dentry), err);
3878 +               err = -EIO;
3879 +       }
3880 +       au_dtime_revert(&dt);
3881 +       au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
3882 +
3883 + out_wh:
3884 +       dput(wh_dentry);
3885 + out:
3886 +       dput(parent);
3887 +       return err;
3888 +}
3889 +
3890 +struct au_cpup_wh_args {
3891 +       int *errp;
3892 +       struct dentry *dentry;
3893 +       aufs_bindex_t bdst;
3894 +       loff_t len;
3895 +       struct file *file;
3896 +};
3897 +
3898 +static void au_call_cpup_wh(void *args)
3899 +{
3900 +       struct au_cpup_wh_args *a = args;
3901 +       *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
3902 +}
3903 +
3904 +int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3905 +                  struct file *file)
3906 +{
3907 +       int err, wkq_err;
3908 +       struct dentry *parent, *h_orph, *h_parent, *h_dentry;
3909 +       struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
3910 +       struct au_wbr *wbr;
3911 +
3912 +       parent = dget_parent(dentry);
3913 +       dir = parent->d_inode;
3914 +       h_orph = NULL;
3915 +       h_parent = NULL;
3916 +       h_dir = au_igrab(au_h_iptr(dir, bdst));
3917 +       h_tmpdir = h_dir;
3918 +       if (!h_dir->i_nlink) {
3919 +               wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
3920 +               h_orph = wbr->wbr_orph;
3921 +
3922 +               h_parent = dget(au_h_dptr(parent, bdst));
3923 +               au_set_h_dptr(parent, bdst, NULL);
3924 +               au_set_h_dptr(parent, bdst, dget(h_orph));
3925 +               h_tmpdir = h_orph->d_inode;
3926 +               au_set_h_iptr(dir, bdst, NULL, 0);
3927 +               au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
3928 +
3929 +               /* this temporary unlock is safe */
3930 +               if (file)
3931 +                       h_dentry = au_h_fptr(file, au_fbstart(file))->f_dentry;
3932 +               else
3933 +                       h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
3934 +               h_inode = h_dentry->d_inode;
3935 +               IMustLock(h_inode);
3936 +               mutex_unlock(&h_inode->i_mutex);
3937 +               mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
3938 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
3939 +       }
3940 +
3941 +       if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE))
3942 +               err = au_cpup_wh(dentry, bdst, len, file);
3943 +       else {
3944 +               struct au_cpup_wh_args args = {
3945 +                       .errp   = &err,
3946 +                       .dentry = dentry,
3947 +                       .bdst   = bdst,
3948 +                       .len    = len,
3949 +                       .file   = file
3950 +               };
3951 +               wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
3952 +               if (unlikely(wkq_err))
3953 +                       err = wkq_err;
3954 +       }
3955 +
3956 +       if (h_orph) {
3957 +               mutex_unlock(&h_tmpdir->i_mutex);
3958 +               au_set_h_iptr(dir, bdst, NULL, 0);
3959 +               au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
3960 +               au_set_h_dptr(parent, bdst, NULL);
3961 +               au_set_h_dptr(parent, bdst, h_parent);
3962 +       }
3963 +       iput(h_dir);
3964 +       dput(parent);
3965 +
3966 +       return err;
3967 +}
3968 +
3969 +/* ---------------------------------------------------------------------- */
3970 +
3971 +/*
3972 + * generic routine for both of copy-up and copy-down.
3973 + */
3974 +/* cf. revalidate function in file.c */
3975 +int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
3976 +              int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
3977 +                        struct dentry *h_parent, void *arg),
3978 +              void *arg)
3979 +{
3980 +       int err;
3981 +       struct au_pin pin;
3982 +       struct dentry *d, *parent, *h_parent, *real_parent;
3983 +
3984 +       err = 0;
3985 +       parent = dget_parent(dentry);
3986 +       if (IS_ROOT(parent))
3987 +               goto out;
3988 +
3989 +       au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
3990 +                   au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
3991 +
3992 +       /* do not use au_dpage */
3993 +       real_parent = parent;
3994 +       while (1) {
3995 +               dput(parent);
3996 +               parent = dget_parent(dentry);
3997 +               h_parent = au_h_dptr(parent, bdst);
3998 +               if (h_parent)
3999 +                       goto out; /* success */
4000 +
4001 +               /* find top dir which is necessary to cpup */
4002 +               do {
4003 +                       d = parent;
4004 +                       dput(parent);
4005 +                       parent = dget_parent(d);
4006 +                       di_read_lock_parent3(parent, !AuLock_IR);
4007 +                       h_parent = au_h_dptr(parent, bdst);
4008 +                       di_read_unlock(parent, !AuLock_IR);
4009 +               } while (!h_parent);
4010 +
4011 +               if (d != real_parent)
4012 +                       di_write_lock_child3(d);
4013 +
4014 +               /* somebody else might create while we were sleeping */
4015 +               if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
4016 +                       if (au_h_dptr(d, bdst))
4017 +                               au_update_dbstart(d);
4018 +
4019 +                       au_pin_set_dentry(&pin, d);
4020 +                       err = au_do_pin(&pin);
4021 +                       if (!err) {
4022 +                               err = cp(d, bdst, h_parent, arg);
4023 +                               au_unpin(&pin);
4024 +                       }
4025 +               }
4026 +
4027 +               if (d != real_parent)
4028 +                       di_write_unlock(d);
4029 +               if (unlikely(err))
4030 +                       break;
4031 +       }
4032 +
4033 + out:
4034 +       dput(parent);
4035 +       return err;
4036 +}
4037 +
4038 +static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
4039 +                      struct dentry *h_parent __maybe_unused ,
4040 +                      void *arg __maybe_unused)
4041 +{
4042 +       return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
4043 +}
4044 +
4045 +int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4046 +{
4047 +       return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
4048 +}
4049 +
4050 +int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4051 +{
4052 +       int err;
4053 +       struct dentry *parent;
4054 +       struct inode *dir;
4055 +
4056 +       parent = dget_parent(dentry);
4057 +       dir = parent->d_inode;
4058 +       err = 0;
4059 +       if (au_h_iptr(dir, bdst))
4060 +               goto out;
4061 +
4062 +       di_read_unlock(parent, AuLock_IR);
4063 +       di_write_lock_parent(parent);
4064 +       /* someone else might change our inode while we were sleeping */
4065 +       if (!au_h_iptr(dir, bdst))
4066 +               err = au_cpup_dirs(dentry, bdst);
4067 +       di_downgrade_lock(parent, AuLock_IR);
4068 +
4069 + out:
4070 +       dput(parent);
4071 +       return err;
4072 +}
4073 diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h
4074 new file mode 100644
4075 index 0000000..29e2508
4076 --- /dev/null
4077 +++ b/fs/aufs/cpup.h
4078 @@ -0,0 +1,81 @@
4079 +/*
4080 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4081 + *
4082 + * This program, aufs is free software; you can redistribute it and/or modify
4083 + * it under the terms of the GNU General Public License as published by
4084 + * the Free Software Foundation; either version 2 of the License, or
4085 + * (at your option) any later version.
4086 + *
4087 + * This program is distributed in the hope that it will be useful,
4088 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4089 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4090 + * GNU General Public License for more details.
4091 + *
4092 + * You should have received a copy of the GNU General Public License
4093 + * along with this program; if not, write to the Free Software
4094 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4095 + */
4096 +
4097 +/*
4098 + * copy-up/down functions
4099 + */
4100 +
4101 +#ifndef __AUFS_CPUP_H__
4102 +#define __AUFS_CPUP_H__
4103 +
4104 +#ifdef __KERNEL__
4105 +
4106 +#include <linux/path.h>
4107 +#include <linux/time.h>
4108 +#include <linux/aufs_type.h>
4109 +
4110 +struct inode;
4111 +struct file;
4112 +
4113 +void au_cpup_attr_flags(struct inode *dst, struct inode *src);
4114 +void au_cpup_attr_timesizes(struct inode *inode);
4115 +void au_cpup_attr_nlink(struct inode *inode, int force);
4116 +void au_cpup_attr_changeable(struct inode *inode);
4117 +void au_cpup_igen(struct inode *inode, struct inode *h_inode);
4118 +void au_cpup_attr_all(struct inode *inode, int force);
4119 +
4120 +/* ---------------------------------------------------------------------- */
4121 +
4122 +/* cpup flags */
4123 +#define AuCpup_DTIME   1               /* do dtime_store/revert */
4124 +#define AuCpup_KEEPLINO        (1 << 1)        /* do not clear the lower xino,
4125 +                                          for link(2) */
4126 +#define au_ftest_cpup(flags, name)     ((flags) & AuCpup_##name)
4127 +#define au_fset_cpup(flags, name)      { (flags) |= AuCpup_##name; }
4128 +#define au_fclr_cpup(flags, name)      { (flags) &= ~AuCpup_##name; }
4129 +
4130 +int au_copy_file(struct file *dst, struct file *src, loff_t len);
4131 +int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
4132 +                      aufs_bindex_t bsrc, loff_t len, unsigned int flags,
4133 +                      struct dentry *dst_parent);
4134 +int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4135 +                      unsigned int flags);
4136 +int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4137 +                  struct file *file);
4138 +
4139 +int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4140 +              int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4141 +                        struct dentry *h_parent, void *arg),
4142 +              void *arg);
4143 +int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4144 +int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4145 +
4146 +/* ---------------------------------------------------------------------- */
4147 +
4148 +/* keep timestamps when copyup */
4149 +struct au_dtime {
4150 +       struct dentry *dt_dentry;
4151 +       struct path dt_h_path;
4152 +       struct timespec dt_atime, dt_mtime;
4153 +};
4154 +void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4155 +                   struct path *h_path);
4156 +void au_dtime_revert(struct au_dtime *dt);
4157 +
4158 +#endif /* __KERNEL__ */
4159 +#endif /* __AUFS_CPUP_H__ */
4160 diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
4161 new file mode 100644
4162 index 0000000..6b19d09
4163 --- /dev/null
4164 +++ b/fs/aufs/dbgaufs.c
4165 @@ -0,0 +1,331 @@
4166 +/*
4167 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4168 + *
4169 + * This program, aufs is free software; you can redistribute it and/or modify
4170 + * it under the terms of the GNU General Public License as published by
4171 + * the Free Software Foundation; either version 2 of the License, or
4172 + * (at your option) any later version.
4173 + *
4174 + * This program is distributed in the hope that it will be useful,
4175 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4176 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4177 + * GNU General Public License for more details.
4178 + *
4179 + * You should have received a copy of the GNU General Public License
4180 + * along with this program; if not, write to the Free Software
4181 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4182 + */
4183 +
4184 +/*
4185 + * debugfs interface
4186 + */
4187 +
4188 +#include <linux/debugfs.h>
4189 +#include "aufs.h"
4190 +
4191 +#ifndef CONFIG_SYSFS
4192 +#error DEBUG_FS depends upon SYSFS
4193 +#endif
4194 +
4195 +static struct dentry *dbgaufs;
4196 +static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
4197 +
4198 +/* 20 is max digits length of ulong 64 */
4199 +struct dbgaufs_arg {
4200 +       int n;
4201 +       char a[20 * 4];
4202 +};
4203 +
4204 +/*
4205 + * common function for all XINO files
4206 + */
4207 +static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
4208 +                             struct file *file)
4209 +{
4210 +       kfree(file->private_data);
4211 +       return 0;
4212 +}
4213 +
4214 +static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
4215 +{
4216 +       int err;
4217 +       struct kstat st;
4218 +       struct dbgaufs_arg *p;
4219 +
4220 +       err = -ENOMEM;
4221 +       p = kmalloc(sizeof(*p), GFP_NOFS);
4222 +       if (unlikely(!p))
4223 +               goto out;
4224 +
4225 +       err = 0;
4226 +       p->n = 0;
4227 +       file->private_data = p;
4228 +       if (!xf)
4229 +               goto out;
4230 +
4231 +       err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4232 +       if (!err) {
4233 +               if (do_fcnt)
4234 +                       p->n = snprintf
4235 +                               (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4236 +                                (long)file_count(xf), st.blocks, st.blksize,
4237 +                                (long long)st.size);
4238 +               else
4239 +                       p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4240 +                                       st.blocks, st.blksize,
4241 +                                       (long long)st.size);
4242 +               AuDebugOn(p->n >= sizeof(p->a));
4243 +       } else {
4244 +               p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4245 +               err = 0;
4246 +       }
4247 +
4248 + out:
4249 +       return err;
4250 +
4251 +}
4252 +
4253 +static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4254 +                              size_t count, loff_t *ppos)
4255 +{
4256 +       struct dbgaufs_arg *p;
4257 +
4258 +       p = file->private_data;
4259 +       return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4260 +}
4261 +
4262 +/* ---------------------------------------------------------------------- */
4263 +
4264 +static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4265 +{
4266 +       int err;
4267 +       struct au_sbinfo *sbinfo;
4268 +       struct super_block *sb;
4269 +
4270 +       sbinfo = inode->i_private;
4271 +       sb = sbinfo->si_sb;
4272 +       si_noflush_read_lock(sb);
4273 +       err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4274 +       si_read_unlock(sb);
4275 +       return err;
4276 +}
4277 +
4278 +static struct file_operations dbgaufs_xib_fop = {
4279 +       .open           = dbgaufs_xib_open,
4280 +       .release        = dbgaufs_xi_release,
4281 +       .read           = dbgaufs_xi_read
4282 +};
4283 +
4284 +/* ---------------------------------------------------------------------- */
4285 +
4286 +#define DbgaufsXi_PREFIX "xi"
4287 +
4288 +static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4289 +{
4290 +       int err;
4291 +       long l;
4292 +       struct au_sbinfo *sbinfo;
4293 +       struct super_block *sb;
4294 +       struct file *xf;
4295 +       struct qstr *name;
4296 +
4297 +       err = -ENOENT;
4298 +       xf = NULL;
4299 +       name = &file->f_dentry->d_name;
4300 +       if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4301 +                    || memcmp(name->name, DbgaufsXi_PREFIX,
4302 +                              sizeof(DbgaufsXi_PREFIX) - 1)))
4303 +               goto out;
4304 +       err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4305 +       if (unlikely(err))
4306 +               goto out;
4307 +
4308 +       sbinfo = inode->i_private;
4309 +       sb = sbinfo->si_sb;
4310 +       si_noflush_read_lock(sb);
4311 +       if (l <= au_sbend(sb)) {
4312 +               xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4313 +               err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4314 +       } else
4315 +               err = -ENOENT;
4316 +       si_read_unlock(sb);
4317 +
4318 + out:
4319 +       return err;
4320 +}
4321 +
4322 +static struct file_operations dbgaufs_xino_fop = {
4323 +       .open           = dbgaufs_xino_open,
4324 +       .release        = dbgaufs_xi_release,
4325 +       .read           = dbgaufs_xi_read
4326 +};
4327 +
4328 +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4329 +{
4330 +       aufs_bindex_t bend;
4331 +       struct au_branch *br;
4332 +       struct au_xino_file *xi;
4333 +
4334 +       if (!au_sbi(sb)->si_dbgaufs)
4335 +               return;
4336 +
4337 +       bend = au_sbend(sb);
4338 +       for (; bindex <= bend; bindex++) {
4339 +               br = au_sbr(sb, bindex);
4340 +               xi = &br->br_xino;
4341 +               if (xi->xi_dbgaufs) {
4342 +                       debugfs_remove(xi->xi_dbgaufs);
4343 +                       xi->xi_dbgaufs = NULL;
4344 +               }
4345 +       }
4346 +}
4347 +
4348 +void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4349 +{
4350 +       struct au_sbinfo *sbinfo;
4351 +       struct dentry *parent;
4352 +       struct au_branch *br;
4353 +       struct au_xino_file *xi;
4354 +       aufs_bindex_t bend;
4355 +       char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4356 +
4357 +       sbinfo = au_sbi(sb);
4358 +       parent = sbinfo->si_dbgaufs;
4359 +       if (!parent)
4360 +               return;
4361 +
4362 +       bend = au_sbend(sb);
4363 +       for (; bindex <= bend; bindex++) {
4364 +               snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4365 +               br = au_sbr(sb, bindex);
4366 +               xi = &br->br_xino;
4367 +               AuDebugOn(xi->xi_dbgaufs);
4368 +               xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4369 +                                                    sbinfo, &dbgaufs_xino_fop);
4370 +               /* ignore an error */
4371 +               if (unlikely(!xi->xi_dbgaufs))
4372 +                       AuWarn1("failed %s under debugfs\n", name);
4373 +       }
4374 +}
4375 +
4376 +/* ---------------------------------------------------------------------- */
4377 +
4378 +#ifdef CONFIG_AUFS_EXPORT
4379 +static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4380 +{
4381 +       int err;
4382 +       struct au_sbinfo *sbinfo;
4383 +       struct super_block *sb;
4384 +
4385 +       sbinfo = inode->i_private;
4386 +       sb = sbinfo->si_sb;
4387 +       si_noflush_read_lock(sb);
4388 +       err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4389 +       si_read_unlock(sb);
4390 +       return err;
4391 +}
4392 +
4393 +static struct file_operations dbgaufs_xigen_fop = {
4394 +       .open           = dbgaufs_xigen_open,
4395 +       .release        = dbgaufs_xi_release,
4396 +       .read           = dbgaufs_xi_read
4397 +};
4398 +
4399 +static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4400 +{
4401 +       int err;
4402 +
4403 +       /*
4404 +        * This function is a dynamic '__init' fucntion actually,
4405 +        * so the tiny check for si_rwsem is unnecessary.
4406 +        */
4407 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4408 +
4409 +       err = -EIO;
4410 +       sbinfo->si_dbgaufs_xigen = debugfs_create_file
4411 +               ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4412 +                &dbgaufs_xigen_fop);
4413 +       if (sbinfo->si_dbgaufs_xigen)
4414 +               err = 0;
4415 +
4416 +       return err;
4417 +}
4418 +#else
4419 +static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4420 +{
4421 +       return 0;
4422 +}
4423 +#endif /* CONFIG_AUFS_EXPORT */
4424 +
4425 +/* ---------------------------------------------------------------------- */
4426 +
4427 +void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4428 +{
4429 +       /*
4430 +        * This function is a dynamic '__init' fucntion actually,
4431 +        * so the tiny check for si_rwsem is unnecessary.
4432 +        */
4433 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4434 +
4435 +       debugfs_remove_recursive(sbinfo->si_dbgaufs);
4436 +       sbinfo->si_dbgaufs = NULL;
4437 +       kobject_put(&sbinfo->si_kobj);
4438 +}
4439 +
4440 +int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4441 +{
4442 +       int err;
4443 +       char name[SysaufsSiNameLen];
4444 +
4445 +       /*
4446 +        * This function is a dynamic '__init' fucntion actually,
4447 +        * so the tiny check for si_rwsem is unnecessary.
4448 +        */
4449 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4450 +
4451 +       err = -ENOENT;
4452 +       if (!dbgaufs) {
4453 +               AuErr1("/debug/aufs is uninitialized\n");
4454 +               goto out;
4455 +       }
4456 +
4457 +       err = -EIO;
4458 +       sysaufs_name(sbinfo, name);
4459 +       sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4460 +       if (unlikely(!sbinfo->si_dbgaufs))
4461 +               goto out;
4462 +       kobject_get(&sbinfo->si_kobj);
4463 +
4464 +       sbinfo->si_dbgaufs_xib = debugfs_create_file
4465 +               ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4466 +                &dbgaufs_xib_fop);
4467 +       if (unlikely(!sbinfo->si_dbgaufs_xib))
4468 +               goto out_dir;
4469 +
4470 +       err = dbgaufs_xigen_init(sbinfo);
4471 +       if (!err)
4472 +               goto out; /* success */
4473 +
4474 + out_dir:
4475 +       dbgaufs_si_fin(sbinfo);
4476 + out:
4477 +       return err;
4478 +}
4479 +
4480 +/* ---------------------------------------------------------------------- */
4481 +
4482 +void dbgaufs_fin(void)
4483 +{
4484 +       debugfs_remove(dbgaufs);
4485 +}
4486 +
4487 +int __init dbgaufs_init(void)
4488 +{
4489 +       int err;
4490 +
4491 +       err = -EIO;
4492 +       dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4493 +       if (dbgaufs)
4494 +               err = 0;
4495 +       return err;
4496 +}
4497 diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h
4498 new file mode 100644
4499 index 0000000..3481fc2
4500 --- /dev/null
4501 +++ b/fs/aufs/dbgaufs.h
4502 @@ -0,0 +1,52 @@
4503 +/*
4504 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4505 + *
4506 + * This program, aufs is free software; you can redistribute it and/or modify
4507 + * it under the terms of the GNU General Public License as published by
4508 + * the Free Software Foundation; either version 2 of the License, or
4509 + * (at your option) any later version.
4510 + *
4511 + * This program is distributed in the hope that it will be useful,
4512 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4513 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4514 + * GNU General Public License for more details.
4515 + *
4516 + * You should have received a copy of the GNU General Public License
4517 + * along with this program; if not, write to the Free Software
4518 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4519 + */
4520 +
4521 +/*
4522 + * debugfs interface
4523 + */
4524 +
4525 +#ifndef __DBGAUFS_H__
4526 +#define __DBGAUFS_H__
4527 +
4528 +#ifdef __KERNEL__
4529 +
4530 +#include <linux/init.h>
4531 +#include <linux/aufs_type.h>
4532 +
4533 +struct super_block;
4534 +struct au_sbinfo;
4535 +
4536 +#ifdef CONFIG_DEBUG_FS
4537 +/* dbgaufs.c */
4538 +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4539 +void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4540 +void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4541 +int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4542 +void dbgaufs_fin(void);
4543 +int __init dbgaufs_init(void);
4544 +#else
4545 +AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
4546 +AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
4547 +AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
4548 +AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
4549 +AuStubVoid(dbgaufs_fin, void)
4550 +AuStubInt0(__init dbgaufs_init, void)
4551 +#endif /* CONFIG_DEBUG_FS */
4552 +
4553 +#endif /* __KERNEL__ */
4554 +#endif /* __DBGAUFS_H__ */
4555 diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c
4556 new file mode 100644
4557 index 0000000..43a8cb4
4558 --- /dev/null
4559 +++ b/fs/aufs/dcsub.c
4560 @@ -0,0 +1,223 @@
4561 +/*
4562 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4563 + *
4564 + * This program, aufs is free software; you can redistribute it and/or modify
4565 + * it under the terms of the GNU General Public License as published by
4566 + * the Free Software Foundation; either version 2 of the License, or
4567 + * (at your option) any later version.
4568 + *
4569 + * This program is distributed in the hope that it will be useful,
4570 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4571 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4572 + * GNU General Public License for more details.
4573 + *
4574 + * You should have received a copy of the GNU General Public License
4575 + * along with this program; if not, write to the Free Software
4576 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4577 + */
4578 +
4579 +/*
4580 + * sub-routines for dentry cache
4581 + */
4582 +
4583 +#include "aufs.h"
4584 +
4585 +static void au_dpage_free(struct au_dpage *dpage)
4586 +{
4587 +       int i;
4588 +       struct dentry **p;
4589 +
4590 +       p = dpage->dentries;
4591 +       for (i = 0; i < dpage->ndentry; i++)
4592 +               dput(*p++);
4593 +       free_page((unsigned long)dpage->dentries);
4594 +}
4595 +
4596 +int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4597 +{
4598 +       int err;
4599 +       void *p;
4600 +
4601 +       err = -ENOMEM;
4602 +       dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4603 +       if (unlikely(!dpages->dpages))
4604 +               goto out;
4605 +
4606 +       p = (void *)__get_free_page(gfp);
4607 +       if (unlikely(!p))
4608 +               goto out_dpages;
4609 +
4610 +       dpages->dpages[0].ndentry = 0;
4611 +       dpages->dpages[0].dentries = p;
4612 +       dpages->ndpage = 1;
4613 +       return 0; /* success */
4614 +
4615 + out_dpages:
4616 +       kfree(dpages->dpages);
4617 + out:
4618 +       return err;
4619 +}
4620 +
4621 +void au_dpages_free(struct au_dcsub_pages *dpages)
4622 +{
4623 +       int i;
4624 +       struct au_dpage *p;
4625 +
4626 +       p = dpages->dpages;
4627 +       for (i = 0; i < dpages->ndpage; i++)
4628 +               au_dpage_free(p++);
4629 +       kfree(dpages->dpages);
4630 +}
4631 +
4632 +static int au_dpages_append(struct au_dcsub_pages *dpages,
4633 +                           struct dentry *dentry, gfp_t gfp)
4634 +{
4635 +       int err, sz;
4636 +       struct au_dpage *dpage;
4637 +       void *p;
4638 +
4639 +       dpage = dpages->dpages + dpages->ndpage - 1;
4640 +       sz = PAGE_SIZE / sizeof(dentry);
4641 +       if (unlikely(dpage->ndentry >= sz)) {
4642 +               AuLabel(new dpage);
4643 +               err = -ENOMEM;
4644 +               sz = dpages->ndpage * sizeof(*dpages->dpages);
4645 +               p = au_kzrealloc(dpages->dpages, sz,
4646 +                                sz + sizeof(*dpages->dpages), gfp);
4647 +               if (unlikely(!p))
4648 +                       goto out;
4649 +
4650 +               dpages->dpages = p;
4651 +               dpage = dpages->dpages + dpages->ndpage;
4652 +               p = (void *)__get_free_page(gfp);
4653 +               if (unlikely(!p))
4654 +                       goto out;
4655 +
4656 +               dpage->ndentry = 0;
4657 +               dpage->dentries = p;
4658 +               dpages->ndpage++;
4659 +       }
4660 +
4661 +       dpage->dentries[dpage->ndentry++] = dget(dentry);
4662 +       return 0; /* success */
4663 +
4664 + out:
4665 +       return err;
4666 +}
4667 +
4668 +int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4669 +                  au_dpages_test test, void *arg)
4670 +{
4671 +       int err;
4672 +       struct dentry *this_parent = root;
4673 +       struct list_head *next;
4674 +       struct super_block *sb = root->d_sb;
4675 +
4676 +       err = 0;
4677 +       spin_lock(&dcache_lock);
4678 + repeat:
4679 +       next = this_parent->d_subdirs.next;
4680 + resume:
4681 +       if (this_parent->d_sb == sb
4682 +           && !IS_ROOT(this_parent)
4683 +           && atomic_read(&this_parent->d_count)
4684 +           && this_parent->d_inode
4685 +           && (!test || test(this_parent, arg))) {
4686 +               err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4687 +               if (unlikely(err))
4688 +                       goto out;
4689 +       }
4690 +
4691 +       while (next != &this_parent->d_subdirs) {
4692 +               struct list_head *tmp = next;
4693 +               struct dentry *dentry = list_entry(tmp, struct dentry,
4694 +                                                  d_u.d_child);
4695 +               next = tmp->next;
4696 +               if (/*d_unhashed(dentry) || */!dentry->d_inode)
4697 +                       continue;
4698 +               if (!list_empty(&dentry->d_subdirs)) {
4699 +                       this_parent = dentry;
4700 +                       goto repeat;
4701 +               }
4702 +               if (dentry->d_sb == sb
4703 +                   && atomic_read(&dentry->d_count)
4704 +                   && (!test || test(dentry, arg))) {
4705 +                       err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4706 +                       if (unlikely(err))
4707 +                               goto out;
4708 +               }
4709 +       }
4710 +
4711 +       if (this_parent != root) {
4712 +               next = this_parent->d_u.d_child.next;
4713 +               this_parent = this_parent->d_parent; /* dcache_lock is locked */
4714 +               goto resume;
4715 +       }
4716 + out:
4717 +       spin_unlock(&dcache_lock);
4718 +       return err;
4719 +}
4720 +
4721 +int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4722 +                      int do_include, au_dpages_test test, void *arg)
4723 +{
4724 +       int err;
4725 +
4726 +       err = 0;
4727 +       spin_lock(&dcache_lock);
4728 +       if (do_include && (!test || test(dentry, arg))) {
4729 +               err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4730 +               if (unlikely(err))
4731 +                       goto out;
4732 +       }
4733 +       while (!IS_ROOT(dentry)) {
4734 +               dentry = dentry->d_parent; /* dcache_lock is locked */
4735 +               if (!test || test(dentry, arg)) {
4736 +                       err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4737 +                       if (unlikely(err))
4738 +                               break;
4739 +               }
4740 +       }
4741 +
4742 + out:
4743 +       spin_unlock(&dcache_lock);
4744 +
4745 +       return err;
4746 +}
4747 +
4748 +struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2)
4749 +{
4750 +       struct dentry *trap, **dentries;
4751 +       int err, i, j;
4752 +       struct au_dcsub_pages dpages;
4753 +       struct au_dpage *dpage;
4754 +
4755 +       trap = ERR_PTR(-ENOMEM);
4756 +       err = au_dpages_init(&dpages, GFP_NOFS);
4757 +       if (unlikely(err))
4758 +               goto out;
4759 +       err = au_dcsub_pages_rev(&dpages, d1, /*do_include*/1, NULL, NULL);
4760 +       if (unlikely(err))
4761 +               goto out_dpages;
4762 +
4763 +       trap = d1;
4764 +       for (i = 0; !err && i < dpages.ndpage; i++) {
4765 +               dpage = dpages.dpages + i;
4766 +               dentries = dpage->dentries;
4767 +               for (j = 0; !err && j < dpage->ndentry; j++) {
4768 +                       struct dentry *d;
4769 +
4770 +                       d = dentries[j];
4771 +                       err = (d == d2);
4772 +                       if (!err)
4773 +                               trap = d;
4774 +               }
4775 +       }
4776 +       if (!err)
4777 +               trap = NULL;
4778 +
4779 + out_dpages:
4780 +       au_dpages_free(&dpages);
4781 + out:
4782 +       return trap;
4783 +}
4784 diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h
4785 new file mode 100644
4786 index 0000000..bb934b4
4787 --- /dev/null
4788 +++ b/fs/aufs/dcsub.h
4789 @@ -0,0 +1,54 @@
4790 +/*
4791 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4792 + *
4793 + * This program, aufs is free software; you can redistribute it and/or modify
4794 + * it under the terms of the GNU General Public License as published by
4795 + * the Free Software Foundation; either version 2 of the License, or
4796 + * (at your option) any later version.
4797 + *
4798 + * This program is distributed in the hope that it will be useful,
4799 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4800 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4801 + * GNU General Public License for more details.
4802 + *
4803 + * You should have received a copy of the GNU General Public License
4804 + * along with this program; if not, write to the Free Software
4805 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4806 + */
4807 +
4808 +/*
4809 + * sub-routines for dentry cache
4810 + */
4811 +
4812 +#ifndef __AUFS_DCSUB_H__
4813 +#define __AUFS_DCSUB_H__
4814 +
4815 +#ifdef __KERNEL__
4816 +
4817 +#include <linux/types.h>
4818 +
4819 +struct dentry;
4820 +
4821 +struct au_dpage {
4822 +       int ndentry;
4823 +       struct dentry **dentries;
4824 +};
4825 +
4826 +struct au_dcsub_pages {
4827 +       int ndpage;
4828 +       struct au_dpage *dpages;
4829 +};
4830 +
4831 +/* ---------------------------------------------------------------------- */
4832 +
4833 +int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
4834 +void au_dpages_free(struct au_dcsub_pages *dpages);
4835 +typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
4836 +int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4837 +                  au_dpages_test test, void *arg);
4838 +int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4839 +                      int do_include, au_dpages_test test, void *arg);
4840 +struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2);
4841 +
4842 +#endif /* __KERNEL__ */
4843 +#endif /* __AUFS_DCSUB_H__ */
4844 diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c
4845 new file mode 100644
4846 index 0000000..5cfd707
4847 --- /dev/null
4848 +++ b/fs/aufs/debug.c
4849 @@ -0,0 +1,431 @@
4850 +/*
4851 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4852 + *
4853 + * This program, aufs is free software; you can redistribute it and/or modify
4854 + * it under the terms of the GNU General Public License as published by
4855 + * the Free Software Foundation; either version 2 of the License, or
4856 + * (at your option) any later version.
4857 + *
4858 + * This program is distributed in the hope that it will be useful,
4859 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4860 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4861 + * GNU General Public License for more details.
4862 + *
4863 + * You should have received a copy of the GNU General Public License
4864 + * along with this program; if not, write to the Free Software
4865 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4866 + */
4867 +
4868 +/*
4869 + * debug print functions
4870 + */
4871 +
4872 +#include <linux/module.h>
4873 +#include <linux/vt_kern.h>
4874 +#include "aufs.h"
4875 +
4876 +int aufs_debug;
4877 +MODULE_PARM_DESC(debug, "debug print");
4878 +module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
4879 +
4880 +char *au_plevel = KERN_DEBUG;
4881 +#define dpri(fmt, ...) do { \
4882 +       if (au_debug_test()) \
4883 +               printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
4884 +} while (0)
4885 +
4886 +/* ---------------------------------------------------------------------- */
4887 +
4888 +void au_dpri_whlist(struct au_nhash *whlist)
4889 +{
4890 +       unsigned long ul, n;
4891 +       struct hlist_head *head;
4892 +       struct au_vdir_wh *tpos;
4893 +       struct hlist_node *pos;
4894 +
4895 +       n = whlist->nh_num;
4896 +       head = whlist->nh_head;
4897 +       for (ul = 0; ul < n; ul++) {
4898 +               hlist_for_each_entry(tpos, pos, head, wh_hash)
4899 +                       dpri("b%d, %.*s, %d\n",
4900 +                            tpos->wh_bindex,
4901 +                            tpos->wh_str.len, tpos->wh_str.name,
4902 +                            tpos->wh_str.len);
4903 +               head++;
4904 +       }
4905 +}
4906 +
4907 +void au_dpri_vdir(struct au_vdir *vdir)
4908 +{
4909 +       unsigned long ul;
4910 +       union au_vdir_deblk_p p;
4911 +       unsigned char *o;
4912 +
4913 +       if (!vdir || IS_ERR(vdir)) {
4914 +               dpri("err %ld\n", PTR_ERR(vdir));
4915 +               return;
4916 +       }
4917 +
4918 +       dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
4919 +            vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
4920 +            vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
4921 +       for (ul = 0; ul < vdir->vd_nblk; ul++) {
4922 +               p.deblk = vdir->vd_deblk[ul];
4923 +               o = p.deblk;
4924 +               dpri("[%lu]: %p\n", ul, o);
4925 +       }
4926 +}
4927 +
4928 +static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode,
4929 +                       struct dentry *wh)
4930 +{
4931 +       char *n = NULL;
4932 +       int l = 0;
4933 +
4934 +       if (!inode || IS_ERR(inode)) {
4935 +               dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
4936 +               return -1;
4937 +       }
4938 +
4939 +       /* the type of i_blocks depends upon CONFIG_LSF */
4940 +       BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
4941 +                    && sizeof(inode->i_blocks) != sizeof(u64));
4942 +       if (wh) {
4943 +               n = (void *)wh->d_name.name;
4944 +               l = wh->d_name.len;
4945 +       }
4946 +
4947 +       dpri("i%d: i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
4948 +            " ct %lld, np %lu, st 0x%lx, f 0x%x, g %x%s%.*s\n",
4949 +            bindex,
4950 +            inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
4951 +            atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
4952 +            i_size_read(inode), (unsigned long long)inode->i_blocks,
4953 +            (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
4954 +            inode->i_mapping ? inode->i_mapping->nrpages : 0,
4955 +            inode->i_state, inode->i_flags, inode->i_generation,
4956 +            l ? ", wh " : "", l, n);
4957 +       return 0;
4958 +}
4959 +
4960 +void au_dpri_inode(struct inode *inode)
4961 +{
4962 +       struct au_iinfo *iinfo;
4963 +       aufs_bindex_t bindex;
4964 +       int err;
4965 +
4966 +       err = do_pri_inode(-1, inode, NULL);
4967 +       if (err || !au_test_aufs(inode->i_sb))
4968 +               return;
4969 +
4970 +       iinfo = au_ii(inode);
4971 +       if (!iinfo)
4972 +               return;
4973 +       dpri("i-1: bstart %d, bend %d, gen %d\n",
4974 +            iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
4975 +       if (iinfo->ii_bstart < 0)
4976 +               return;
4977 +       for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++)
4978 +               do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode,
4979 +                            iinfo->ii_hinode[0 + bindex].hi_whdentry);
4980 +}
4981 +
4982 +static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
4983 +{
4984 +       struct dentry *wh = NULL;
4985 +
4986 +       if (!dentry || IS_ERR(dentry)) {
4987 +               dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
4988 +               return -1;
4989 +       }
4990 +       /* do not call dget_parent() here */
4991 +       dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
4992 +            bindex,
4993 +            AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
4994 +            dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
4995 +            atomic_read(&dentry->d_count), dentry->d_flags);
4996 +       if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
4997 +               struct au_iinfo *iinfo = au_ii(dentry->d_inode);
4998 +               if (iinfo)
4999 +                       wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
5000 +       }
5001 +       do_pri_inode(bindex, dentry->d_inode, wh);
5002 +       return 0;
5003 +}
5004 +
5005 +void au_dpri_dentry(struct dentry *dentry)
5006 +{
5007 +       struct au_dinfo *dinfo;
5008 +       aufs_bindex_t bindex;
5009 +       int err;
5010 +
5011 +       err = do_pri_dentry(-1, dentry);
5012 +       if (err || !au_test_aufs(dentry->d_sb))
5013 +               return;
5014 +
5015 +       dinfo = au_di(dentry);
5016 +       if (!dinfo)
5017 +               return;
5018 +       dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
5019 +            dinfo->di_bstart, dinfo->di_bend,
5020 +            dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
5021 +       if (dinfo->di_bstart < 0)
5022 +               return;
5023 +       for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
5024 +               do_pri_dentry(bindex, dinfo->di_hdentry[0 + bindex].hd_dentry);
5025 +}
5026 +
5027 +static int do_pri_file(aufs_bindex_t bindex, struct file *file)
5028 +{
5029 +       char a[32];
5030 +
5031 +       if (!file || IS_ERR(file)) {
5032 +               dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
5033 +               return -1;
5034 +       }
5035 +       a[0] = 0;
5036 +       if (bindex < 0
5037 +           && file->f_dentry
5038 +           && au_test_aufs(file->f_dentry->d_sb)
5039 +           && au_fi(file))
5040 +               snprintf(a, sizeof(a), ", mmapped %d", au_test_mmapped(file));
5041 +       dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, pos %llu%s\n",
5042 +            bindex, file->f_mode, file->f_flags, (long)file_count(file),
5043 +            file->f_pos, a);
5044 +       if (file->f_dentry)
5045 +               do_pri_dentry(bindex, file->f_dentry);
5046 +       return 0;
5047 +}
5048 +
5049 +void au_dpri_file(struct file *file)
5050 +{
5051 +       struct au_finfo *finfo;
5052 +       aufs_bindex_t bindex;
5053 +       int err;
5054 +
5055 +       err = do_pri_file(-1, file);
5056 +       if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
5057 +               return;
5058 +
5059 +       finfo = au_fi(file);
5060 +       if (!finfo)
5061 +               return;
5062 +       if (finfo->fi_bstart < 0)
5063 +               return;
5064 +       for (bindex = finfo->fi_bstart; bindex <= finfo->fi_bend; bindex++) {
5065 +               struct au_hfile *hf;
5066 +
5067 +               hf = finfo->fi_hfile + bindex;
5068 +               do_pri_file(bindex, hf ? hf->hf_file : NULL);
5069 +       }
5070 +}
5071 +
5072 +static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
5073 +{
5074 +       struct vfsmount *mnt;
5075 +       struct super_block *sb;
5076 +
5077 +       if (!br || IS_ERR(br))
5078 +               goto out;
5079 +       mnt = br->br_mnt;
5080 +       if (!mnt || IS_ERR(mnt))
5081 +               goto out;
5082 +       sb = mnt->mnt_sb;
5083 +       if (!sb || IS_ERR(sb))
5084 +               goto out;
5085 +
5086 +       dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
5087 +            "%s, dev 0x%02x%02x, flags 0x%lx, cnt(BIAS) %d, active %d, "
5088 +            "xino %d\n",
5089 +            bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
5090 +            au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
5091 +            sb->s_flags, sb->s_count - S_BIAS,
5092 +            atomic_read(&sb->s_active), !!br->br_xino.xi_file);
5093 +       return 0;
5094 +
5095 + out:
5096 +       dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
5097 +       return -1;
5098 +}
5099 +
5100 +void au_dpri_sb(struct super_block *sb)
5101 +{
5102 +       struct au_sbinfo *sbinfo;
5103 +       aufs_bindex_t bindex;
5104 +       int err;
5105 +       /* to reuduce stack size */
5106 +       struct {
5107 +               struct vfsmount mnt;
5108 +               struct au_branch fake;
5109 +       } *a;
5110 +
5111 +       /* this function can be called from magic sysrq */
5112 +       a = kzalloc(sizeof(*a), GFP_ATOMIC);
5113 +       if (unlikely(!a)) {
5114 +               dpri("no memory\n");
5115 +               return;
5116 +       }
5117 +
5118 +       a->mnt.mnt_sb = sb;
5119 +       a->fake.br_perm = 0;
5120 +       a->fake.br_mnt = &a->mnt;
5121 +       a->fake.br_xino.xi_file = NULL;
5122 +       atomic_set(&a->fake.br_count, 0);
5123 +       smp_mb(); /* atomic_set */
5124 +       err = do_pri_br(-1, &a->fake);
5125 +       kfree(a);
5126 +       dpri("dev 0x%x\n", sb->s_dev);
5127 +       if (err || !au_test_aufs(sb))
5128 +               return;
5129 +
5130 +       sbinfo = au_sbi(sb);
5131 +       if (!sbinfo)
5132 +               return;
5133 +       dpri("nw %d, gen %u, kobj %d\n",
5134 +            atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
5135 +            atomic_read(&sbinfo->si_kobj.kref.refcount));
5136 +       for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
5137 +               do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
5138 +}
5139 +
5140 +/* ---------------------------------------------------------------------- */
5141 +
5142 +void au_dbg_sleep_jiffy(int jiffy)
5143 +{
5144 +       while (jiffy)
5145 +               jiffy = schedule_timeout_uninterruptible(jiffy);
5146 +}
5147 +
5148 +void au_dbg_iattr(struct iattr *ia)
5149 +{
5150 +#define AuBit(name)    if (ia->ia_valid & ATTR_ ## name) \
5151 +                               dpri(#name "\n")
5152 +       AuBit(MODE);
5153 +       AuBit(UID);
5154 +       AuBit(GID);
5155 +       AuBit(SIZE);
5156 +       AuBit(ATIME);
5157 +       AuBit(MTIME);
5158 +       AuBit(CTIME);
5159 +       AuBit(ATIME_SET);
5160 +       AuBit(MTIME_SET);
5161 +       AuBit(FORCE);
5162 +       AuBit(ATTR_FLAG);
5163 +       AuBit(KILL_SUID);
5164 +       AuBit(KILL_SGID);
5165 +       AuBit(FILE);
5166 +       AuBit(KILL_PRIV);
5167 +       AuBit(OPEN);
5168 +       AuBit(TIMES_SET);
5169 +#undef AuBit
5170 +       dpri("ia_file %p\n", ia->ia_file);
5171 +}
5172 +
5173 +/* ---------------------------------------------------------------------- */
5174 +
5175 +void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
5176 +{
5177 +       struct dentry *parent;
5178 +
5179 +       parent = dget_parent(dentry);
5180 +       AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)
5181 +                 || IS_ROOT(dentry)
5182 +                 || au_digen(parent) != sigen);
5183 +       dput(parent);
5184 +}
5185 +
5186 +void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
5187 +{
5188 +       struct dentry *parent;
5189 +
5190 +       parent = dget_parent(dentry);
5191 +       AuDebugOn(S_ISDIR(dentry->d_inode->i_mode)
5192 +                 || au_digen(parent) != sigen);
5193 +       dput(parent);
5194 +}
5195 +
5196 +void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5197 +{
5198 +       int err, i, j;
5199 +       struct au_dcsub_pages dpages;
5200 +       struct au_dpage *dpage;
5201 +       struct dentry **dentries;
5202 +
5203 +       err = au_dpages_init(&dpages, GFP_NOFS);
5204 +       AuDebugOn(err);
5205 +       err = au_dcsub_pages_rev(&dpages, parent, /*do_include*/1, NULL, NULL);
5206 +       AuDebugOn(err);
5207 +       for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
5208 +               dpage = dpages.dpages + i;
5209 +               dentries = dpage->dentries;
5210 +               for (j = dpage->ndentry - 1; !err && j >= 0; j--)
5211 +                       AuDebugOn(au_digen(dentries[j]) != sigen);
5212 +       }
5213 +       au_dpages_free(&dpages);
5214 +}
5215 +
5216 +void au_dbg_verify_hf(struct au_finfo *finfo)
5217 +{
5218 +       struct au_hfile *hf;
5219 +       aufs_bindex_t bend, bindex;
5220 +
5221 +       if (finfo->fi_bstart >= 0) {
5222 +               bend = finfo->fi_bend;
5223 +               for (bindex = finfo->fi_bstart; bindex <= bend; bindex++) {
5224 +                       hf = finfo->fi_hfile + bindex;
5225 +                       AuDebugOn(hf->hf_file || hf->hf_br);
5226 +               }
5227 +       }
5228 +}
5229 +
5230 +void au_dbg_verify_kthread(void)
5231 +{
5232 +       if (au_test_wkq(current)) {
5233 +               au_dbg_blocked();
5234 +               BUG();
5235 +       }
5236 +}
5237 +
5238 +/* ---------------------------------------------------------------------- */
5239 +
5240 +void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5241 +{
5242 +#ifdef AuForceNoPlink
5243 +       au_opt_clr(sbinfo->si_mntflags, PLINK);
5244 +#endif
5245 +#ifdef AuForceNoXino
5246 +       au_opt_clr(sbinfo->si_mntflags, XINO);
5247 +#endif
5248 +#ifdef AuForceNoRefrof
5249 +       au_opt_clr(sbinfo->si_mntflags, REFROF);
5250 +#endif
5251 +#ifdef AuForceHinotify
5252 +       au_opt_set_udba(sbinfo->si_mntflags, UDBA_HINOTIFY);
5253 +#endif
5254 +#ifdef AuForceRd0
5255 +       sbinfo->si_rdblk = 0;
5256 +       sbinfo->si_rdhash = 0;
5257 +#endif
5258 +}
5259 +
5260 +int __init au_debug_init(void)
5261 +{
5262 +       aufs_bindex_t bindex;
5263 +       struct au_vdir_destr destr;
5264 +
5265 +       bindex = -1;
5266 +       AuDebugOn(bindex >= 0);
5267 +
5268 +       destr.len = -1;
5269 +       AuDebugOn(destr.len < NAME_MAX);
5270 +
5271 +#ifdef CONFIG_4KSTACKS
5272 +       AuWarn("CONFIG_4KSTACKS is defined.\n");
5273 +#endif
5274 +
5275 +#ifdef AuForceNoBrs
5276 +       sysaufs_brs = 0;
5277 +#endif
5278 +
5279 +       return 0;
5280 +}
5281 diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h
5282 new file mode 100644
5283 index 0000000..dbcbf31
5284 --- /dev/null
5285 +++ b/fs/aufs/debug.h
5286 @@ -0,0 +1,232 @@
5287 +/*
5288 + * Copyright (C) 2005-2009 Junjiro R. Okajima
5289 + *
5290 + * This program, aufs is free software; you can redistribute it and/or modify
5291 + * it under the terms of the GNU General Public License as published by
5292 + * the Free Software Foundation; either version 2 of the License, or
5293 + * (at your option) any later version.
5294 + *
5295 + * This program is distributed in the hope that it will be useful,
5296 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5297 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
5298 + * GNU General Public License for more details.
5299 + *
5300 + * You should have received a copy of the GNU General Public License
5301 + * along with this program; if not, write to the Free Software
5302 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
5303 + */
5304 +
5305 +/*
5306 + * debug print functions
5307 + */
5308 +
5309 +#ifndef __AUFS_DEBUG_H__
5310 +#define __AUFS_DEBUG_H__
5311 +
5312 +#ifdef __KERNEL__
5313 +
5314 +#include <linux/bug.h>
5315 +/* #include <linux/err.h> */
5316 +/* #include <linux/init.h> */
5317 +/* #include <linux/kernel.h> */
5318 +#include <linux/delay.h>
5319 +/* #include <linux/kd.h> */
5320 +/* #include <linux/vt_kern.h> */
5321 +#include <linux/sysrq.h>
5322 +#include <linux/aufs_type.h>
5323 +
5324 +#ifdef CONFIG_AUFS_DEBUG
5325 +#define AuDebugOn(a)           BUG_ON(a)
5326 +
5327 +/* module parameter */
5328 +extern int aufs_debug;
5329 +static inline void au_debug(int n)
5330 +{
5331 +       aufs_debug = n;
5332 +       smp_mb();
5333 +}
5334 +
5335 +static inline int au_debug_test(void)
5336 +{
5337 +       return aufs_debug;
5338 +}
5339 +#else
5340 +#define AuDebugOn(a)           do {} while (0)
5341 +AuStubVoid(au_debug, int n)
5342 +AuStubInt0(au_debug_test, void)
5343 +#endif /* CONFIG_AUFS_DEBUG */
5344 +
5345 +/* ---------------------------------------------------------------------- */
5346 +
5347 +/* debug print */
5348 +
5349 +#define AuDpri(lvl, fmt, ...) \
5350 +       printk(lvl AUFS_NAME " %s:%d:%s[%d]: " fmt, \
5351 +              __func__, __LINE__, current->comm, current->pid, ##__VA_ARGS__)
5352 +#define AuDbg(fmt, ...) do { \
5353 +       if (au_debug_test()) \
5354 +               AuDpri(KERN_DEBUG, "DEBUG: " fmt, ##__VA_ARGS__); \
5355 +} while (0)
5356 +#define AuLabel(l)             AuDbg(#l "\n")
5357 +#define AuInfo(fmt, ...)       AuDpri(KERN_INFO, fmt, ##__VA_ARGS__)
5358 +#define AuWarn(fmt, ...)       AuDpri(KERN_WARNING, fmt, ##__VA_ARGS__)
5359 +#define AuErr(fmt, ...)                AuDpri(KERN_ERR, fmt, ##__VA_ARGS__)
5360 +#define AuIOErr(fmt, ...)      AuErr("I/O Error, " fmt, ##__VA_ARGS__)
5361 +#define AuWarn1(fmt, ...) do { \
5362 +       static unsigned char _c; \
5363 +       if (!_c++) \
5364 +               AuWarn(fmt, ##__VA_ARGS__); \
5365 +} while (0)
5366 +
5367 +#define AuErr1(fmt, ...) do { \
5368 +       static unsigned char _c; \
5369 +       if (!_c++) \
5370 +               AuErr(fmt, ##__VA_ARGS__); \
5371 +} while (0)
5372 +
5373 +#define AuIOErr1(fmt, ...) do { \
5374 +       static unsigned char _c; \
5375 +       if (!_c++) \
5376 +               AuIOErr(fmt, ##__VA_ARGS__); \
5377 +} while (0)
5378 +
5379 +#define AuUnsupportMsg "This operation is not supported." \
5380 +                       " Please report this application to aufs-users ML."
5381 +#define AuUnsupport(fmt, ...) do { \
5382 +       AuErr(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
5383 +       dump_stack(); \
5384 +} while (0)
5385 +
5386 +#define AuTraceErr(e) do { \
5387 +       if (unlikely((e) < 0)) \
5388 +               AuDbg("err %d\n", (int)(e)); \
5389 +} while (0)
5390 +
5391 +#define AuTraceErrPtr(p) do { \
5392 +       if (IS_ERR(p)) \
5393 +               AuDbg("err %ld\n", PTR_ERR(p)); \
5394 +} while (0)
5395 +
5396 +/* dirty macros for debug print, use with "%.*s" and caution */
5397 +#define AuLNPair(qstr)         (qstr)->len, (qstr)->name
5398 +#define AuDLNPair(d)           AuLNPair(&(d)->d_name)
5399 +
5400 +/* ---------------------------------------------------------------------- */
5401 +
5402 +struct au_sbinfo;
5403 +struct au_finfo;
5404 +struct dentry;
5405 +#ifdef CONFIG_AUFS_DEBUG
5406 +extern char *au_plevel;
5407 +struct au_nhash;
5408 +void au_dpri_whlist(struct au_nhash *whlist);
5409 +struct au_vdir;
5410 +void au_dpri_vdir(struct au_vdir *vdir);
5411 +struct inode;
5412 +void au_dpri_inode(struct inode *inode);
5413 +void au_dpri_dentry(struct dentry *dentry);
5414 +struct file;
5415 +void au_dpri_file(struct file *filp);
5416 +struct super_block;
5417 +void au_dpri_sb(struct super_block *sb);
5418 +
5419 +void au_dbg_sleep_jiffy(int jiffy);
5420 +struct iattr;
5421 +void au_dbg_iattr(struct iattr *ia);
5422 +
5423 +void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5424 +void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5425 +void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
5426 +void au_dbg_verify_hf(struct au_finfo *finfo);
5427 +void au_dbg_verify_kthread(void);
5428 +
5429 +int __init au_debug_init(void);
5430 +void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5431 +#define AuDbgWhlist(w) do { \
5432 +       AuDbg(#w "\n"); \
5433 +       au_dpri_whlist(w); \
5434 +} while (0)
5435 +
5436 +#define AuDbgVdir(v) do { \
5437 +       AuDbg(#v "\n"); \
5438 +       au_dpri_vdir(v); \
5439 +} while (0)
5440 +
5441 +#define AuDbgInode(i) do { \
5442 +       AuDbg(#i "\n"); \
5443 +       au_dpri_inode(i); \
5444 +} while (0)
5445 +
5446 +#define AuDbgDentry(d) do { \
5447 +       AuDbg(#d "\n"); \
5448 +       au_dpri_dentry(d); \
5449 +} while (0)
5450 +
5451 +#define AuDbgFile(f) do { \
5452 +       AuDbg(#f "\n"); \
5453 +       au_dpri_file(f); \
5454 +} while (0)
5455 +
5456 +#define AuDbgSb(sb) do { \
5457 +       AuDbg(#sb "\n"); \
5458 +       au_dpri_sb(sb); \
5459 +} while (0)
5460 +
5461 +#define AuDbgSleep(sec) do { \
5462 +       AuDbg("sleep %d sec\n", sec); \
5463 +       ssleep(sec); \
5464 +} while (0)
5465 +
5466 +#define AuDbgSleepJiffy(jiffy) do { \
5467 +       AuDbg("sleep %d jiffies\n", jiffy); \
5468 +       au_dbg_sleep_jiffy(jiffy); \
5469 +} while (0)
5470 +
5471 +#define AuDbgIAttr(ia) do { \
5472 +       AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5473 +       au_dbg_iattr(ia); \
5474 +} while (0)
5475 +#else
5476 +AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
5477 +AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
5478 +          unsigned int sigen)
5479 +AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
5480 +AuStubVoid(au_dbg_verify_hf, struct au_finfo *finfo)
5481 +AuStubVoid(au_dbg_verify_kthread, void)
5482 +AuStubInt0(__init au_debug_init, void)
5483 +AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
5484 +
5485 +#define AuDbgWhlist(w)         do {} while (0)
5486 +#define AuDbgVdir(v)           do {} while (0)
5487 +#define AuDbgInode(i)          do {} while (0)
5488 +#define AuDbgDentry(d)         do {} while (0)
5489 +#define AuDbgFile(f)           do {} while (0)
5490 +#define AuDbgSb(sb)            do {} while (0)
5491 +#define AuDbgSleep(sec)                do {} while (0)
5492 +#define AuDbgSleepJiffy(jiffy) do {} while (0)
5493 +#define AuDbgIAttr(ia)         do {} while (0)
5494 +#endif /* CONFIG_AUFS_DEBUG */
5495 +
5496 +/* ---------------------------------------------------------------------- */
5497 +
5498 +#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5499 +int __init au_sysrq_init(void);
5500 +void au_sysrq_fin(void);
5501 +
5502 +#ifdef CONFIG_HW_CONSOLE
5503 +#define au_dbg_blocked() do { \
5504 +       WARN_ON(1); \
5505 +       handle_sysrq('w', vc_cons[fg_console].d->vc_tty); \
5506 +} while (0)
5507 +#else
5508 +AuStubVoid(au_dbg_blocked, void)
5509 +#endif
5510 +
5511 +#else
5512 +AuStubInt0(__init au_sysrq_init, void)
5513 +AuStubVoid(au_sysrq_fin, void)
5514 +AuStubVoid(au_dbg_blocked, void)
5515 +#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5516 +
5517 +#endif /* __KERNEL__ */
5518 +#endif /* __AUFS_DEBUG_H__ */
5519 diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c
5520 new file mode 100644
5521 index 0000000..2b18f3c
5522 --- /dev/null
5523 +++ b/fs/aufs/dentry.c
5524 @@ -0,0 +1,875 @@
5525 +/*
5526 + * Copyright (C) 2005-2009 Junjiro R. Okajima
5527 + *
5528 + * This program, aufs is free software; you can redistribute it and/or modify
5529 + * it under the terms of the GNU General Public License as published by
5530 + * the Free Software Foundation; either version 2 of the License, or
5531 + * (at your option) any later version.
5532 + *
5533 + * This program is distributed in the hope that it will be useful,
5534 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5535 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
5536 + * GNU General Public License for more details.
5537 + *
5538 + * You should have received a copy of the GNU General Public License
5539 + * along with this program; if not, write to the Free Software
5540 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
5541 + */
5542 +
5543 +/*
5544 + * lookup and dentry operations
5545 + */
5546 +
5547 +#include <linux/namei.h>
5548 +#include "aufs.h"
5549 +
5550 +static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5551 +{
5552 +       if (nd) {
5553 +               *h_nd = *nd;
5554 +
5555 +               /*
5556 +                * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5557 +                * due to whiteout and branch permission.
5558 +                */
5559 +               h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
5560 +                                | LOOKUP_FOLLOW);
5561 +               /* unnecessary? */
5562 +               h_nd->intent.open.file = NULL;
5563 +       } else
5564 +               memset(h_nd, 0, sizeof(*h_nd));
5565 +}
5566 +
5567 +struct au_lkup_one_args {
5568 +       struct dentry **errp;
5569 +       struct qstr *name;
5570 +       struct dentry *h_parent;
5571 +       struct au_branch *br;
5572 +       struct nameidata *nd;
5573 +};
5574 +
5575 +struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5576 +                          struct au_branch *br, struct nameidata *nd)
5577 +{
5578 +       struct dentry *h_dentry;
5579 +       int err;
5580 +       struct nameidata h_nd;
5581 +
5582 +       if (au_test_fs_null_nd(h_parent->d_sb))
5583 +               return vfsub_lookup_one_len(name->name, h_parent, name->len);
5584 +
5585 +       au_h_nd(&h_nd, nd);
5586 +       h_nd.path.dentry = h_parent;
5587 +       h_nd.path.mnt = br->br_mnt;
5588 +
5589 +       err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
5590 +       h_dentry = ERR_PTR(err);
5591 +       if (!err) {
5592 +               path_get(&h_nd.path);
5593 +               h_dentry = vfsub_lookup_hash(&h_nd);
5594 +               path_put(&h_nd.path);
5595 +       }
5596 +
5597 +       AuTraceErrPtr(h_dentry);
5598 +       return h_dentry;
5599 +}
5600 +
5601 +static void au_call_lkup_one(void *args)
5602 +{
5603 +       struct au_lkup_one_args *a = args;
5604 +       *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5605 +}
5606 +
5607 +#define AuLkup_ALLOW_NEG       1
5608 +#define au_ftest_lkup(flags, name)     ((flags) & AuLkup_##name)
5609 +#define au_fset_lkup(flags, name)      { (flags) |= AuLkup_##name; }
5610 +#define au_fclr_lkup(flags, name)      { (flags) &= ~AuLkup_##name; }
5611 +
5612 +struct au_do_lookup_args {
5613 +       unsigned int            flags;
5614 +       mode_t                  type;
5615 +       struct nameidata        *nd;
5616 +};
5617 +
5618 +/*
5619 + * returns positive/negative dentry, NULL or an error.
5620 + * NULL means whiteout-ed or not-found.
5621 + */
5622 +static struct dentry*
5623 +au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
5624 +            aufs_bindex_t bindex, struct qstr *wh_name,
5625 +            struct au_do_lookup_args *args)
5626 +{
5627 +       struct dentry *h_dentry;
5628 +       struct inode *h_inode, *inode;
5629 +       struct qstr *name;
5630 +       struct au_branch *br;
5631 +       int wh_found, opq;
5632 +       unsigned char wh_able;
5633 +       const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
5634 +
5635 +       name = &dentry->d_name;
5636 +       wh_found = 0;
5637 +       br = au_sbr(dentry->d_sb, bindex);
5638 +       wh_able = !!au_br_whable(br->br_perm);
5639 +       if (wh_able)
5640 +               wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
5641 +       h_dentry = ERR_PTR(wh_found);
5642 +       if (!wh_found)
5643 +               goto real_lookup;
5644 +       if (unlikely(wh_found < 0))
5645 +               goto out;
5646 +
5647 +       /* We found a whiteout */
5648 +       /* au_set_dbend(dentry, bindex); */
5649 +       au_set_dbwh(dentry, bindex);
5650 +       if (!allow_neg)
5651 +               return NULL; /* success */
5652 +
5653 + real_lookup:
5654 +       h_dentry = au_lkup_one(name, h_parent, br, args->nd);
5655 +       if (IS_ERR(h_dentry))
5656 +               goto out;
5657 +
5658 +       h_inode = h_dentry->d_inode;
5659 +       if (!h_inode) {
5660 +               if (!allow_neg)
5661 +                       goto out_neg;
5662 +       } else if (wh_found
5663 +                  || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
5664 +               goto out_neg;
5665 +
5666 +       if (au_dbend(dentry) <= bindex)
5667 +               au_set_dbend(dentry, bindex);
5668 +       if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
5669 +               au_set_dbstart(dentry, bindex);
5670 +       au_set_h_dptr(dentry, bindex, h_dentry);
5671 +
5672 +       inode = dentry->d_inode;
5673 +       if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
5674 +           || (inode && !S_ISDIR(inode->i_mode)))
5675 +               goto out; /* success */
5676 +
5677 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
5678 +       opq = au_diropq_test(h_dentry, br);
5679 +       mutex_unlock(&h_inode->i_mutex);
5680 +       if (opq > 0)
5681 +               au_set_dbdiropq(dentry, bindex);
5682 +       else if (unlikely(opq < 0)) {
5683 +               au_set_h_dptr(dentry, bindex, NULL);
5684 +               h_dentry = ERR_PTR(opq);
5685 +       }
5686 +       goto out;
5687 +
5688 + out_neg:
5689 +       dput(h_dentry);
5690 +       h_dentry = NULL;
5691 + out:
5692 +       return h_dentry;
5693 +}
5694 +
5695 +static int au_test_shwh(struct super_block *sb, const struct qstr *name)
5696 +{
5697 +       if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
5698 +                    && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
5699 +               return -EPERM;
5700 +       return 0;
5701 +}
5702 +
5703 +/*
5704 + * returns the number of lower positive dentries,
5705 + * otherwise an error.
5706 + * can be called at unlinking with @type is zero.
5707 + */
5708 +int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
5709 +                  struct nameidata *nd)
5710 +{
5711 +       int npositive, err;
5712 +       aufs_bindex_t bindex, btail, bdiropq;
5713 +       unsigned char isdir;
5714 +       struct qstr whname;
5715 +       struct au_do_lookup_args args = {
5716 +               .flags  = 0,
5717 +               .type   = type,
5718 +               .nd     = nd
5719 +       };
5720 +       const struct qstr *name = &dentry->d_name;
5721 +       struct dentry *parent;
5722 +       struct inode *inode;
5723 +
5724 +       parent = dget_parent(dentry);
5725 +       err = au_test_shwh(dentry->d_sb, name);
5726 +       if (unlikely(err))
5727 +               goto out;
5728 +
5729 +       err = au_wh_name_alloc(&whname, name);
5730 +       if (unlikely(err))
5731 +               goto out;
5732 +
5733 +       inode = dentry->d_inode;
5734 +       isdir = !!(inode && S_ISDIR(inode->i_mode));
5735 +       if (!type)
5736 +               au_fset_lkup(args.flags, ALLOW_NEG);
5737 +
5738 +       npositive = 0;
5739 +       btail = au_dbtaildir(parent);
5740 +       for (bindex = bstart; bindex <= btail; bindex++) {
5741 +               struct dentry *h_parent, *h_dentry;
5742 +               struct inode *h_inode, *h_dir;
5743 +
5744 +               h_dentry = au_h_dptr(dentry, bindex);
5745 +               if (h_dentry) {
5746 +                       if (h_dentry->d_inode)
5747 +                               npositive++;
5748 +                       if (type != S_IFDIR)
5749 +                               break;
5750 +                       continue;
5751 +               }
5752 +               h_parent = au_h_dptr(parent, bindex);
5753 +               if (!h_parent)
5754 +                       continue;
5755 +               h_dir = h_parent->d_inode;
5756 +               if (!h_dir || !S_ISDIR(h_dir->i_mode))
5757 +                       continue;
5758 +
5759 +               mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
5760 +               h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
5761 +                                       &args);
5762 +               mutex_unlock(&h_dir->i_mutex);
5763 +               err = PTR_ERR(h_dentry);
5764 +               if (IS_ERR(h_dentry))
5765 +                       goto out_wh;
5766 +               au_fclr_lkup(args.flags, ALLOW_NEG);
5767 +
5768 +               if (au_dbwh(dentry) >= 0)
5769 +                       break;
5770 +               if (!h_dentry)
5771 +                       continue;
5772 +               h_inode = h_dentry->d_inode;
5773 +               if (!h_inode)
5774 +                       continue;
5775 +               npositive++;
5776 +               if (!args.type)
5777 +                       args.type = h_inode->i_mode & S_IFMT;
5778 +               if (args.type != S_IFDIR)
5779 +                       break;
5780 +               else if (isdir) {
5781 +                       /* the type of lower may be different */
5782 +                       bdiropq = au_dbdiropq(dentry);
5783 +                       if (bdiropq >= 0 && bdiropq <= bindex)
5784 +                               break;
5785 +               }
5786 +       }
5787 +
5788 +       if (npositive) {
5789 +               AuLabel(positive);
5790 +               au_update_dbstart(dentry);
5791 +       }
5792 +       err = npositive;
5793 +       if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
5794 +                    && au_dbstart(dentry) < 0))
5795 +               /* both of real entry and whiteout found */
5796 +               err = -EIO;
5797 +
5798 + out_wh:
5799 +       kfree(whname.name);
5800 + out:
5801 +       dput(parent);
5802 +       return err;
5803 +}
5804 +
5805 +struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
5806 +                              struct au_branch *br)
5807 +{
5808 +       struct dentry *dentry;
5809 +       int wkq_err;
5810 +
5811 +       if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
5812 +               dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
5813 +       else {
5814 +               struct au_lkup_one_args args = {
5815 +                       .errp           = &dentry,
5816 +                       .name           = name,
5817 +                       .h_parent       = parent,
5818 +                       .br             = br,
5819 +                       .nd             = NULL
5820 +               };
5821 +
5822 +               wkq_err = au_wkq_wait(au_call_lkup_one, &args);
5823 +               if (unlikely(wkq_err))
5824 +                       dentry = ERR_PTR(wkq_err);
5825 +       }
5826 +
5827 +       return dentry;
5828 +}
5829 +
5830 +/*
5831 + * lookup @dentry on @bindex which should be negative.
5832 + */
5833 +int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
5834 +{
5835 +       int err;
5836 +       struct dentry *parent, *h_parent, *h_dentry;
5837 +       struct qstr *name;
5838 +
5839 +       name = &dentry->d_name;
5840 +       parent = dget_parent(dentry);
5841 +       h_parent = au_h_dptr(parent, bindex);
5842 +       h_dentry = au_sio_lkup_one(name, h_parent,
5843 +                                  au_sbr(dentry->d_sb, bindex));
5844 +       err = PTR_ERR(h_dentry);
5845 +       if (IS_ERR(h_dentry))
5846 +               goto out;
5847 +       if (unlikely(h_dentry->d_inode)) {
5848 +               err = -EIO;
5849 +               AuIOErr("b%d %.*s should be negative.\n",
5850 +                       bindex, AuDLNPair(h_dentry));
5851 +               dput(h_dentry);
5852 +               goto out;
5853 +       }
5854 +
5855 +       if (bindex < au_dbstart(dentry))
5856 +               au_set_dbstart(dentry, bindex);
5857 +       if (au_dbend(dentry) < bindex)
5858 +               au_set_dbend(dentry, bindex);
5859 +       au_set_h_dptr(dentry, bindex, h_dentry);
5860 +       err = 0;
5861 +
5862 + out:
5863 +       dput(parent);
5864 +       return err;
5865 +}
5866 +
5867 +/* ---------------------------------------------------------------------- */
5868 +
5869 +/* subset of struct inode */
5870 +struct au_iattr {
5871 +       unsigned long           i_ino;
5872 +       /* unsigned int         i_nlink; */
5873 +       uid_t                   i_uid;
5874 +       gid_t                   i_gid;
5875 +       u64                     i_version;
5876 +/*
5877 +       loff_t                  i_size;
5878 +       blkcnt_t                i_blocks;
5879 +*/
5880 +       umode_t                 i_mode;
5881 +};
5882 +
5883 +static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
5884 +{
5885 +       ia->i_ino = h_inode->i_ino;
5886 +       /* ia->i_nlink = h_inode->i_nlink; */
5887 +       ia->i_uid = h_inode->i_uid;
5888 +       ia->i_gid = h_inode->i_gid;
5889 +       ia->i_version = h_inode->i_version;
5890 +/*
5891 +       ia->i_size = h_inode->i_size;
5892 +       ia->i_blocks = h_inode->i_blocks;
5893 +*/
5894 +       ia->i_mode = (h_inode->i_mode & S_IFMT);
5895 +}
5896 +
5897 +static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
5898 +{
5899 +       return ia->i_ino != h_inode->i_ino
5900 +               /* || ia->i_nlink != h_inode->i_nlink */
5901 +               || ia->i_uid != h_inode->i_uid
5902 +               || ia->i_gid != h_inode->i_gid
5903 +               || ia->i_version != h_inode->i_version
5904 +/*
5905 +               || ia->i_size != h_inode->i_size
5906 +               || ia->i_blocks != h_inode->i_blocks
5907 +*/
5908 +               || ia->i_mode != (h_inode->i_mode & S_IFMT);
5909 +}
5910 +
5911 +static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
5912 +                             struct au_branch *br)
5913 +{
5914 +       int err;
5915 +       struct au_iattr ia;
5916 +       struct inode *h_inode;
5917 +       struct dentry *h_d;
5918 +
5919 +       memset(&ia, -1, sizeof(ia));
5920 +       h_inode = h_dentry->d_inode;
5921 +       if (h_inode)
5922 +               au_iattr_save(&ia, h_inode);
5923 +
5924 +       /* main purpose is namei.c:cached_lookup() and d_revalidate */
5925 +       h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
5926 +       err = PTR_ERR(h_d);
5927 +       if (IS_ERR(h_d))
5928 +               goto out;
5929 +
5930 +       /* fuse d_revalidate always return 0 for negative dentries */
5931 +       err = 0;
5932 +       if (unlikely((h_d != h_dentry
5933 +                    || h_d->d_inode != h_inode
5934 +                    || (h_inode && au_iattr_test(&ia, h_inode)))
5935 +                    && !au_test_fuse(h_parent->d_sb)))
5936 +               err = au_busy_or_stale();
5937 +       dput(h_d);
5938 +
5939 + out:
5940 +       return err;
5941 +}
5942 +
5943 +int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
5944 +               struct dentry *h_parent, struct au_branch *br)
5945 +{
5946 +       int err;
5947 +
5948 +       err = 0;
5949 +       if (udba == AuOpt_UDBA_REVAL) {
5950 +               IMustLock(h_dir);
5951 +               err = (h_dentry->d_parent->d_inode != h_dir);
5952 +       } else if (udba == AuOpt_UDBA_HINOTIFY)
5953 +               err = au_h_verify_dentry(h_dentry, h_parent, br);
5954 +
5955 +       return err;
5956 +}
5957 +
5958 +/* ---------------------------------------------------------------------- */
5959 +
5960 +static void au_do_refresh_hdentry(struct au_hdentry *p, struct au_dinfo *dinfo,
5961 +                                 struct dentry *parent)
5962 +{
5963 +       struct dentry *h_d, *h_dp;
5964 +       struct au_hdentry tmp, *q;
5965 +       struct super_block *sb;
5966 +       aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
5967 +
5968 +       AuRwMustWriteLock(&dinfo->di_rwsem);
5969 +
5970 +       bend = dinfo->di_bend;
5971 +       bwh = dinfo->di_bwh;
5972 +       bdiropq = dinfo->di_bdiropq;
5973 +       for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
5974 +               h_d = p->hd_dentry;
5975 +               if (!h_d)
5976 +                       continue;
5977 +
5978 +               h_dp = dget_parent(h_d);
5979 +               if (h_dp == au_h_dptr(parent, bindex)) {
5980 +                       dput(h_dp);
5981 +                       continue;
5982 +               }
5983 +
5984 +               new_bindex = au_find_dbindex(parent, h_dp);
5985 +               dput(h_dp);
5986 +               if (dinfo->di_bwh == bindex)
5987 +                       bwh = new_bindex;
5988 +               if (dinfo->di_bdiropq == bindex)
5989 +                       bdiropq = new_bindex;
5990 +               if (new_bindex < 0) {
5991 +                       au_hdput(p);
5992 +                       p->hd_dentry = NULL;
5993 +                       continue;
5994 +               }
5995 +
5996 +               /* swap two lower dentries, and loop again */
5997 +               q = dinfo->di_hdentry + new_bindex;
5998 +               tmp = *q;
5999 +               *q = *p;
6000 +               *p = tmp;
6001 +               if (tmp.hd_dentry) {
6002 +                       bindex--;
6003 +                       p--;
6004 +               }
6005 +       }
6006 +
6007 +       sb = parent->d_sb;
6008 +       dinfo->di_bwh = -1;
6009 +       if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
6010 +               dinfo->di_bwh = bwh;
6011 +
6012 +       dinfo->di_bdiropq = -1;
6013 +       if (bdiropq >= 0
6014 +           && bdiropq <= au_sbend(sb)
6015 +           && au_sbr_whable(sb, bdiropq))
6016 +               dinfo->di_bdiropq = bdiropq;
6017 +
6018 +       bend = au_dbend(parent);
6019 +       p = dinfo->di_hdentry;
6020 +       for (bindex = 0; bindex <= bend; bindex++, p++)
6021 +               if (p->hd_dentry) {
6022 +                       dinfo->di_bstart = bindex;
6023 +                       break;
6024 +               }
6025 +
6026 +       p = dinfo->di_hdentry + bend;
6027 +       for (bindex = bend; bindex >= 0; bindex--, p--)
6028 +               if (p->hd_dentry) {
6029 +                       dinfo->di_bend = bindex;
6030 +                       break;
6031 +               }
6032 +}
6033 +
6034 +/*
6035 + * returns the number of found lower positive dentries,
6036 + * otherwise an error.
6037 + */
6038 +int au_refresh_hdentry(struct dentry *dentry, mode_t type)
6039 +{
6040 +       int npositive, err;
6041 +       unsigned int sigen;
6042 +       aufs_bindex_t bstart;
6043 +       struct au_dinfo *dinfo;
6044 +       struct super_block *sb;
6045 +       struct dentry *parent;
6046 +
6047 +       DiMustWriteLock(dentry);
6048 +
6049 +       sb = dentry->d_sb;
6050 +       AuDebugOn(IS_ROOT(dentry));
6051 +       sigen = au_sigen(sb);
6052 +       parent = dget_parent(dentry);
6053 +       AuDebugOn(au_digen(parent) != sigen
6054 +                 || au_iigen(parent->d_inode) != sigen);
6055 +
6056 +       dinfo = au_di(dentry);
6057 +       err = au_di_realloc(dinfo, au_sbend(sb) + 1);
6058 +       npositive = err;
6059 +       if (unlikely(err))
6060 +               goto out;
6061 +       au_do_refresh_hdentry(dinfo->di_hdentry + dinfo->di_bstart, dinfo,
6062 +                             parent);
6063 +
6064 +       npositive = 0;
6065 +       bstart = au_dbstart(parent);
6066 +       if (type != S_IFDIR && dinfo->di_bstart == bstart)
6067 +               goto out_dgen; /* success */
6068 +
6069 +       npositive = au_lkup_dentry(dentry, bstart, type, /*nd*/NULL);
6070 +       if (npositive < 0)
6071 +               goto out;
6072 +       if (dinfo->di_bwh >= 0 && dinfo->di_bwh <= dinfo->di_bstart)
6073 +               d_drop(dentry);
6074 +
6075 + out_dgen:
6076 +       au_update_digen(dentry);
6077 + out:
6078 +       dput(parent);
6079 +       AuTraceErr(npositive);
6080 +       return npositive;
6081 +}
6082 +
6083 +static noinline_for_stack
6084 +int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
6085 +                   struct dentry *dentry, aufs_bindex_t bindex)
6086 +{
6087 +       int err, valid;
6088 +       int (*reval)(struct dentry *, struct nameidata *);
6089 +
6090 +       err = 0;
6091 +       reval = NULL;
6092 +       if (h_dentry->d_op)
6093 +               reval = h_dentry->d_op->d_revalidate;
6094 +       if (!reval)
6095 +               goto out;
6096 +
6097 +       AuDbg("b%d\n", bindex);
6098 +       if (au_test_fs_null_nd(h_dentry->d_sb))
6099 +               /* it may return tri-state */
6100 +               valid = reval(h_dentry, NULL);
6101 +       else {
6102 +               struct nameidata h_nd;
6103 +               int locked;
6104 +               struct dentry *parent;
6105 +
6106 +               au_h_nd(&h_nd, nd);
6107 +               parent = nd->path.dentry;
6108 +               locked = (nd && nd->path.dentry != dentry);
6109 +               if (locked)
6110 +                       di_read_lock_parent(parent, AuLock_IR);
6111 +               BUG_ON(bindex > au_dbend(parent));
6112 +               h_nd.path.dentry = au_h_dptr(parent, bindex);
6113 +               BUG_ON(!h_nd.path.dentry);
6114 +               h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
6115 +               path_get(&h_nd.path);
6116 +               valid = reval(h_dentry, &h_nd);
6117 +               path_put(&h_nd.path);
6118 +               if (locked)
6119 +                       di_read_unlock(parent, AuLock_IR);
6120 +       }
6121 +
6122 +       if (unlikely(valid < 0))
6123 +               err = valid;
6124 +       else if (!valid)
6125 +               err = -EINVAL;
6126 +
6127 + out:
6128 +       AuTraceErr(err);
6129 +       return err;
6130 +}
6131 +
6132 +/* todo: remove this */
6133 +static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
6134 +                         struct nameidata *nd, int do_udba)
6135 +{
6136 +       int err;
6137 +       umode_t mode, h_mode;
6138 +       aufs_bindex_t bindex, btail, bstart, ibs, ibe;
6139 +       unsigned char plus, unhashed, is_root, h_plus;
6140 +       struct inode *first, *h_inode, *h_cached_inode;
6141 +       struct dentry *h_dentry;
6142 +       struct qstr *name, *h_name;
6143 +
6144 +       err = 0;
6145 +       plus = 0;
6146 +       mode = 0;
6147 +       first = NULL;
6148 +       ibs = -1;
6149 +       ibe = -1;
6150 +       unhashed = !!d_unhashed(dentry);
6151 +       is_root = !!IS_ROOT(dentry);
6152 +       name = &dentry->d_name;
6153 +
6154 +       /*
6155 +        * Theoretically, REVAL test should be unnecessary in case of INOTIFY.
6156 +        * But inotify doesn't fire some necessary events,
6157 +        *      IN_ATTRIB for atime/nlink/pageio
6158 +        *      IN_DELETE for NFS dentry
6159 +        * Let's do REVAL test too.
6160 +        */
6161 +       if (do_udba && inode) {
6162 +               mode = (inode->i_mode & S_IFMT);
6163 +               plus = (inode->i_nlink > 0);
6164 +               first = au_h_iptr(inode, au_ibstart(inode));
6165 +               ibs = au_ibstart(inode);
6166 +               ibe = au_ibend(inode);
6167 +       }
6168 +
6169 +       bstart = au_dbstart(dentry);
6170 +       btail = bstart;
6171 +       if (inode && S_ISDIR(inode->i_mode))
6172 +               btail = au_dbtaildir(dentry);
6173 +       for (bindex = bstart; bindex <= btail; bindex++) {
6174 +               h_dentry = au_h_dptr(dentry, bindex);
6175 +               if (!h_dentry)
6176 +                       continue;
6177 +
6178 +               AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
6179 +               h_name = &h_dentry->d_name;
6180 +               if (unlikely(do_udba
6181 +                            && !is_root
6182 +                            && (unhashed != !!d_unhashed(h_dentry)
6183 +                                || name->len != h_name->len
6184 +                                || memcmp(name->name, h_name->name, name->len))
6185 +                           )) {
6186 +                       AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6187 +                                 unhashed, d_unhashed(h_dentry),
6188 +                                 AuDLNPair(dentry), AuDLNPair(h_dentry));
6189 +                       goto err;
6190 +               }
6191 +
6192 +               err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6193 +               if (unlikely(err))
6194 +                       /* do not goto err, to keep the errno */
6195 +                       break;
6196 +
6197 +               /* todo: plink too? */
6198 +               if (!do_udba)
6199 +                       continue;
6200 +
6201 +               /* UDBA tests */
6202 +               h_inode = h_dentry->d_inode;
6203 +               if (unlikely(!!inode != !!h_inode))
6204 +                       goto err;
6205 +
6206 +               h_plus = plus;
6207 +               h_mode = mode;
6208 +               h_cached_inode = h_inode;
6209 +               if (h_inode) {
6210 +                       h_mode = (h_inode->i_mode & S_IFMT);
6211 +                       h_plus = (h_inode->i_nlink > 0);
6212 +               }
6213 +               if (inode && ibs <= bindex && bindex <= ibe)
6214 +                       h_cached_inode = au_h_iptr(inode, bindex);
6215 +
6216 +               if (unlikely(plus != h_plus
6217 +                            || mode != h_mode
6218 +                            || h_cached_inode != h_inode))
6219 +                       goto err;
6220 +               continue;
6221 +
6222 +       err:
6223 +               err = -EINVAL;
6224 +               break;
6225 +       }
6226 +
6227 +       return err;
6228 +}
6229 +
6230 +static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6231 +{
6232 +       int err;
6233 +       struct dentry *parent;
6234 +       struct inode *inode;
6235 +
6236 +       inode = dentry->d_inode;
6237 +       if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
6238 +               return 0;
6239 +
6240 +       parent = dget_parent(dentry);
6241 +       di_read_lock_parent(parent, AuLock_IR);
6242 +       AuDebugOn(au_digen(parent) != sigen
6243 +                 || au_iigen(parent->d_inode) != sigen);
6244 +       au_dbg_verify_gen(parent, sigen);
6245 +
6246 +       /* returns a number of positive dentries */
6247 +       err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
6248 +       if (err >= 0)
6249 +               err = au_refresh_hinode(inode, dentry);
6250 +
6251 +       di_read_unlock(parent, AuLock_IR);
6252 +       dput(parent);
6253 +       return err;
6254 +}
6255 +
6256 +int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6257 +{
6258 +       int err;
6259 +       struct dentry *d, *parent;
6260 +       struct inode *inode;
6261 +
6262 +       if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS))
6263 +               return simple_reval_dpath(dentry, sigen);
6264 +
6265 +       /* slow loop, keep it simple and stupid */
6266 +       /* cf: au_cpup_dirs() */
6267 +       err = 0;
6268 +       parent = NULL;
6269 +       while (au_digen(dentry) != sigen
6270 +              || au_iigen(dentry->d_inode) != sigen) {
6271 +               d = dentry;
6272 +               while (1) {
6273 +                       dput(parent);
6274 +                       parent = dget_parent(d);
6275 +                       if (au_digen(parent) == sigen
6276 +                           && au_iigen(parent->d_inode) == sigen)
6277 +                               break;
6278 +                       d = parent;
6279 +               }
6280 +
6281 +               inode = d->d_inode;
6282 +               if (d != dentry)
6283 +                       di_write_lock_child(d);
6284 +
6285 +               /* someone might update our dentry while we were sleeping */
6286 +               if (au_digen(d) != sigen || au_iigen(d->d_inode) != sigen) {
6287 +                       di_read_lock_parent(parent, AuLock_IR);
6288 +                       /* returns a number of positive dentries */
6289 +                       err = au_refresh_hdentry(d, inode->i_mode & S_IFMT);
6290 +                       if (err >= 0)
6291 +                               err = au_refresh_hinode(inode, d);
6292 +                       di_read_unlock(parent, AuLock_IR);
6293 +               }
6294 +
6295 +               if (d != dentry)
6296 +                       di_write_unlock(d);
6297 +               dput(parent);
6298 +               if (unlikely(err))
6299 +                       break;
6300 +       }
6301 +
6302 +       return err;
6303 +}
6304 +
6305 +/*
6306 + * if valid returns 1, otherwise 0.
6307 + */
6308 +static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6309 +{
6310 +       int valid, err;
6311 +       unsigned int sigen;
6312 +       unsigned char do_udba;
6313 +       struct super_block *sb;
6314 +       struct inode *inode;
6315 +
6316 +       err = -EINVAL;
6317 +       sb = dentry->d_sb;
6318 +       inode = dentry->d_inode;
6319 +       aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW);
6320 +       sigen = au_sigen(sb);
6321 +       if (au_digen(dentry) != sigen) {
6322 +               AuDebugOn(IS_ROOT(dentry));
6323 +               if (inode)
6324 +                       err = au_reval_dpath(dentry, sigen);
6325 +               if (unlikely(err))
6326 +                       goto out_dgrade;
6327 +               AuDebugOn(au_digen(dentry) != sigen);
6328 +       }
6329 +       if (inode && au_iigen(inode) != sigen) {
6330 +               AuDebugOn(IS_ROOT(dentry));
6331 +               err = au_refresh_hinode(inode, dentry);
6332 +               if (unlikely(err))
6333 +                       goto out_dgrade;
6334 +               AuDebugOn(au_iigen(inode) != sigen);
6335 +       }
6336 +       di_downgrade_lock(dentry, AuLock_IR);
6337 +
6338 +       AuDebugOn(au_digen(dentry) != sigen);
6339 +       AuDebugOn(inode && au_iigen(inode) != sigen);
6340 +       err = -EINVAL;
6341 +       do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6342 +       if (do_udba && inode) {
6343 +               aufs_bindex_t bstart = au_ibstart(inode);
6344 +
6345 +               if (bstart >= 0
6346 +                   && au_test_higen(inode, au_h_iptr(inode, bstart)))
6347 +                       goto out;
6348 +       }
6349 +
6350 +       err = h_d_revalidate(dentry, inode, nd, do_udba);
6351 +       if (unlikely(!err && do_udba && au_dbstart(dentry) < 0))
6352 +               /* both of real entry and whiteout found */
6353 +               err = -EIO;
6354 +       goto out;
6355 +
6356 + out_dgrade:
6357 +       di_downgrade_lock(dentry, AuLock_IR);
6358 + out:
6359 +       au_store_oflag(nd, inode);
6360 +       aufs_read_unlock(dentry, AuLock_IR);
6361 +       AuTraceErr(err);
6362 +       valid = !err;
6363 +       if (!valid)
6364 +               AuDbg("%.*s invalid\n", AuDLNPair(dentry));
6365 +       return valid;
6366 +}
6367 +
6368 +static void aufs_d_release(struct dentry *dentry)
6369 +{
6370 +       struct au_dinfo *dinfo;
6371 +       aufs_bindex_t bend, bindex;
6372 +
6373 +       dinfo = dentry->d_fsdata;
6374 +       if (!dinfo)
6375 +               return;
6376 +
6377 +       /* dentry may not be revalidated */
6378 +       bindex = dinfo->di_bstart;
6379 +       if (bindex >= 0) {
6380 +               struct au_hdentry *p;
6381 +
6382 +               bend = dinfo->di_bend;
6383 +               p = dinfo->di_hdentry + bindex;
6384 +               while (bindex++ <= bend) {
6385 +                       if (p->hd_dentry)
6386 +                               au_hdput(p);
6387 +                       p++;
6388 +               }
6389 +       }
6390 +       kfree(dinfo->di_hdentry);
6391 +       AuRwDestroy(&dinfo->di_rwsem);
6392 +       au_cache_free_dinfo(dinfo);
6393 +       au_hin_di_reinit(dentry);
6394 +}
6395 +
6396 +struct dentry_operations aufs_dop = {
6397 +       .d_revalidate   = aufs_d_revalidate,
6398 +       .d_release      = aufs_d_release
6399 +};
6400 diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h
6401 new file mode 100644
6402 index 0000000..893288c
6403 --- /dev/null
6404 +++ b/fs/aufs/dentry.h
6405 @@ -0,0 +1,228 @@
6406 +/*
6407 + * Copyright (C) 2005-2009 Junjiro R. Okajima
6408 + *
6409 + * This program, aufs is free software; you can redistribute it and/or modify
6410 + * it under the terms of the GNU General Public License as published by
6411 + * the Free Software Foundation; either version 2 of the License, or
6412 + * (at your option) any later version.
6413 + *
6414 + * This program is distributed in the hope that it will be useful,
6415 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6416 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6417 + * GNU General Public License for more details.
6418 + *
6419 + * You should have received a copy of the GNU General Public License
6420 + * along with this program; if not, write to the Free Software
6421 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
6422 + */
6423 +
6424 +/*
6425 + * lookup and dentry operations
6426 + */
6427 +
6428 +#ifndef __AUFS_DENTRY_H__
6429 +#define __AUFS_DENTRY_H__
6430 +
6431 +#ifdef __KERNEL__
6432 +
6433 +#include <linux/dcache.h>
6434 +#include <linux/aufs_type.h>
6435 +#include "rwsem.h"
6436 +
6437 +/* make a single member structure for future use */
6438 +/* todo: remove this structure */
6439 +struct au_hdentry {
6440 +       struct dentry           *hd_dentry;
6441 +};
6442 +
6443 +struct au_dinfo {
6444 +       atomic_t                di_generation;
6445 +
6446 +       struct au_rwsem         di_rwsem;
6447 +       aufs_bindex_t           di_bstart, di_bend, di_bwh, di_bdiropq;
6448 +       struct au_hdentry       *di_hdentry;
6449 +};
6450 +
6451 +/* ---------------------------------------------------------------------- */
6452 +
6453 +/* dentry.c */
6454 +extern struct dentry_operations aufs_dop;
6455 +struct au_branch;
6456 +struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
6457 +                          struct au_branch *br, struct nameidata *nd);
6458 +struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6459 +                              struct au_branch *br);
6460 +int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6461 +               struct dentry *h_parent, struct au_branch *br);
6462 +
6463 +int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6464 +                  struct nameidata *nd);
6465 +int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
6466 +int au_refresh_hdentry(struct dentry *dentry, mode_t type);
6467 +int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
6468 +
6469 +/* dinfo.c */
6470 +int au_alloc_dinfo(struct dentry *dentry);
6471 +int au_di_realloc(struct au_dinfo *dinfo, int nbr);
6472 +
6473 +void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
6474 +void di_read_unlock(struct dentry *d, int flags);
6475 +void di_downgrade_lock(struct dentry *d, int flags);
6476 +void di_write_lock(struct dentry *d, unsigned int lsc);
6477 +void di_write_unlock(struct dentry *d);
6478 +void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
6479 +void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
6480 +void di_write_unlock2(struct dentry *d1, struct dentry *d2);
6481 +
6482 +struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
6483 +aufs_bindex_t au_dbtail(struct dentry *dentry);
6484 +aufs_bindex_t au_dbtaildir(struct dentry *dentry);
6485 +
6486 +void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6487 +                  struct dentry *h_dentry);
6488 +void au_update_digen(struct dentry *dentry);
6489 +void au_update_dbrange(struct dentry *dentry, int do_put_zero);
6490 +void au_update_dbstart(struct dentry *dentry);
6491 +void au_update_dbend(struct dentry *dentry);
6492 +int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
6493 +
6494 +/* ---------------------------------------------------------------------- */
6495 +
6496 +static inline struct au_dinfo *au_di(struct dentry *dentry)
6497 +{
6498 +       return dentry->d_fsdata;
6499 +}
6500 +
6501 +/* ---------------------------------------------------------------------- */
6502 +
6503 +/* lock subclass for dinfo */
6504 +enum {
6505 +       AuLsc_DI_CHILD,         /* child first */
6506 +       AuLsc_DI_CHILD2,        /* rename(2), link(2), and cpup at hinotify */
6507 +       AuLsc_DI_CHILD3,        /* copyup dirs */
6508 +       AuLsc_DI_PARENT,
6509 +       AuLsc_DI_PARENT2,
6510 +       AuLsc_DI_PARENT3
6511 +};
6512 +
6513 +/*
6514 + * di_read_lock_child, di_write_lock_child,
6515 + * di_read_lock_child2, di_write_lock_child2,
6516 + * di_read_lock_child3, di_write_lock_child3,
6517 + * di_read_lock_parent, di_write_lock_parent,
6518 + * di_read_lock_parent2, di_write_lock_parent2,
6519 + * di_read_lock_parent3, di_write_lock_parent3,
6520 + */
6521 +#define AuReadLockFunc(name, lsc) \
6522 +static inline void di_read_lock_##name(struct dentry *d, int flags) \
6523 +{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
6524 +
6525 +#define AuWriteLockFunc(name, lsc) \
6526 +static inline void di_write_lock_##name(struct dentry *d) \
6527 +{ di_write_lock(d, AuLsc_DI_##lsc); }
6528 +
6529 +#define AuRWLockFuncs(name, lsc) \
6530 +       AuReadLockFunc(name, lsc) \
6531 +       AuWriteLockFunc(name, lsc)
6532 +
6533 +AuRWLockFuncs(child, CHILD);
6534 +AuRWLockFuncs(child2, CHILD2);
6535 +AuRWLockFuncs(child3, CHILD3);
6536 +AuRWLockFuncs(parent, PARENT);
6537 +AuRWLockFuncs(parent2, PARENT2);
6538 +AuRWLockFuncs(parent3, PARENT3);
6539 +
6540 +#undef AuReadLockFunc
6541 +#undef AuWriteLockFunc
6542 +#undef AuRWLockFuncs
6543 +
6544 +#define DiMustNoWaiters(d)     AuRwMustNoWaiters(&au_di(d)->di_rwsem)
6545 +#define DiMustAnyLock(d)       AuRwMustAnyLock(&au_di(d)->di_rwsem)
6546 +#define DiMustWriteLock(d)     AuRwMustWriteLock(&au_di(d)->di_rwsem)
6547 +
6548 +/* ---------------------------------------------------------------------- */
6549 +
6550 +/* todo: memory barrier? */
6551 +static inline unsigned int au_digen(struct dentry *d)
6552 +{
6553 +       return atomic_read(&au_di(d)->di_generation);
6554 +}
6555 +
6556 +static inline void au_h_dentry_init(struct au_hdentry *hdentry)
6557 +{
6558 +       hdentry->hd_dentry = NULL;
6559 +}
6560 +
6561 +static inline void au_hdput(struct au_hdentry *hd)
6562 +{
6563 +       dput(hd->hd_dentry);
6564 +}
6565 +
6566 +static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
6567 +{
6568 +       DiMustAnyLock(dentry);
6569 +       return au_di(dentry)->di_bstart;
6570 +}
6571 +
6572 +static inline aufs_bindex_t au_dbend(struct dentry *dentry)
6573 +{
6574 +       DiMustAnyLock(dentry);
6575 +       return au_di(dentry)->di_bend;
6576 +}
6577 +
6578 +static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
6579 +{
6580 +       DiMustAnyLock(dentry);
6581 +       return au_di(dentry)->di_bwh;
6582 +}
6583 +
6584 +static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
6585 +{
6586 +       DiMustAnyLock(dentry);
6587 +       return au_di(dentry)->di_bdiropq;
6588 +}
6589 +
6590 +/* todo: hard/soft set? */
6591 +static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
6592 +{
6593 +       DiMustWriteLock(dentry);
6594 +       au_di(dentry)->di_bstart = bindex;
6595 +}
6596 +
6597 +static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
6598 +{
6599 +       DiMustWriteLock(dentry);
6600 +       au_di(dentry)->di_bend = bindex;
6601 +}
6602 +
6603 +static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
6604 +{
6605 +       DiMustWriteLock(dentry);
6606 +       /* dbwh can be outside of bstart - bend range */
6607 +       au_di(dentry)->di_bwh = bindex;
6608 +}
6609 +
6610 +static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
6611 +{
6612 +       DiMustWriteLock(dentry);
6613 +       au_di(dentry)->di_bdiropq = bindex;
6614 +}
6615 +
6616 +/* ---------------------------------------------------------------------- */
6617 +
6618 +#ifdef CONFIG_AUFS_HINOTIFY
6619 +static inline void au_digen_dec(struct dentry *d)
6620 +{
6621 +       atomic_dec_return(&au_di(d)->di_generation);
6622 +}
6623 +
6624 +static inline void au_hin_di_reinit(struct dentry *dentry)
6625 +{
6626 +       dentry->d_fsdata = NULL;
6627 +}
6628 +#else
6629 +AuStubVoid(au_hin_di_reinit, struct dentry *dentry __maybe_unused)
6630 +#endif /* CONFIG_AUFS_HINOTIFY */
6631 +
6632 +#endif /* __KERNEL__ */
6633 +#endif /* __AUFS_DENTRY_H__ */
6634 diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
6635 new file mode 100644
6636 index 0000000..0010c99
6637 --- /dev/null
6638 +++ b/fs/aufs/dinfo.c
6639 @@ -0,0 +1,367 @@
6640 +/*
6641 + * Copyright (C) 2005-2009 Junjiro R. Okajima
6642 + *
6643 + * This program, aufs is free software; you can redistribute it and/or modify
6644 + * it under the terms of the GNU General Public License as published by
6645 + * the Free Software Foundation; either version 2 of the License, or
6646 + * (at your option) any later version.
6647 + *
6648 + * This program is distributed in the hope that it will be useful,
6649 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6650 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6651 + * GNU General Public License for more details.
6652 + *
6653 + * You should have received a copy of the GNU General Public License
6654 + * along with this program; if not, write to the Free Software
6655 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
6656 + */
6657 +
6658 +/*
6659 + * dentry private data
6660 + */
6661 +
6662 +#include "aufs.h"
6663 +
6664 +int au_alloc_dinfo(struct dentry *dentry)
6665 +{
6666 +       struct au_dinfo *dinfo;
6667 +       struct super_block *sb;
6668 +       int nbr;
6669 +
6670 +       dinfo = au_cache_alloc_dinfo();
6671 +       if (unlikely(!dinfo))
6672 +               goto out;
6673 +
6674 +       sb = dentry->d_sb;
6675 +       nbr = au_sbend(sb) + 1;
6676 +       if (nbr <= 0)
6677 +               nbr = 1;
6678 +       dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
6679 +       if (unlikely(!dinfo->di_hdentry))
6680 +               goto out_dinfo;
6681 +
6682 +       atomic_set(&dinfo->di_generation, au_sigen(sb));
6683 +       /* smp_mb(); */ /* atomic_set */
6684 +       au_rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD);
6685 +       dinfo->di_bstart = -1;
6686 +       dinfo->di_bend = -1;
6687 +       dinfo->di_bwh = -1;
6688 +       dinfo->di_bdiropq = -1;
6689 +
6690 +       dentry->d_fsdata = dinfo;
6691 +       dentry->d_op = &aufs_dop;
6692 +       return 0; /* success */
6693 +
6694 + out_dinfo:
6695 +       au_cache_free_dinfo(dinfo);
6696 + out:
6697 +       return -ENOMEM;
6698 +}
6699 +
6700 +int au_di_realloc(struct au_dinfo *dinfo, int nbr)
6701 +{
6702 +       int err, sz;
6703 +       struct au_hdentry *hdp;
6704 +
6705 +       AuRwMustWriteLock(&dinfo->di_rwsem);
6706 +
6707 +       err = -ENOMEM;
6708 +       sz = sizeof(*hdp) * (dinfo->di_bend + 1);
6709 +       if (!sz)
6710 +               sz = sizeof(*hdp);
6711 +       hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
6712 +       if (hdp) {
6713 +               dinfo->di_hdentry = hdp;
6714 +               err = 0;
6715 +       }
6716 +
6717 +       return err;
6718 +}
6719 +
6720 +/* ---------------------------------------------------------------------- */
6721 +
6722 +static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
6723 +{
6724 +       switch (lsc) {
6725 +       case AuLsc_DI_CHILD:
6726 +               ii_write_lock_child(inode);
6727 +               break;
6728 +       case AuLsc_DI_CHILD2:
6729 +               ii_write_lock_child2(inode);
6730 +               break;
6731 +       case AuLsc_DI_CHILD3:
6732 +               ii_write_lock_child3(inode);
6733 +               break;
6734 +       case AuLsc_DI_PARENT:
6735 +               ii_write_lock_parent(inode);
6736 +               break;
6737 +       case AuLsc_DI_PARENT2:
6738 +               ii_write_lock_parent2(inode);
6739 +               break;
6740 +       case AuLsc_DI_PARENT3:
6741 +               ii_write_lock_parent3(inode);
6742 +               break;
6743 +       default:
6744 +               BUG();
6745 +       }
6746 +}
6747 +
6748 +static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
6749 +{
6750 +       switch (lsc) {
6751 +       case AuLsc_DI_CHILD:
6752 +               ii_read_lock_child(inode);
6753 +               break;
6754 +       case AuLsc_DI_CHILD2:
6755 +               ii_read_lock_child2(inode);
6756 +               break;
6757 +       case AuLsc_DI_CHILD3:
6758 +               ii_read_lock_child3(inode);
6759 +               break;
6760 +       case AuLsc_DI_PARENT:
6761 +               ii_read_lock_parent(inode);
6762 +               break;
6763 +       case AuLsc_DI_PARENT2:
6764 +               ii_read_lock_parent2(inode);
6765 +               break;
6766 +       case AuLsc_DI_PARENT3:
6767 +               ii_read_lock_parent3(inode);
6768 +               break;
6769 +       default:
6770 +               BUG();
6771 +       }
6772 +}
6773 +
6774 +void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
6775 +{
6776 +       au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
6777 +       if (d->d_inode) {
6778 +               if (au_ftest_lock(flags, IW))
6779 +                       do_ii_write_lock(d->d_inode, lsc);
6780 +               else if (au_ftest_lock(flags, IR))
6781 +                       do_ii_read_lock(d->d_inode, lsc);
6782 +       }
6783 +}
6784 +
6785 +void di_read_unlock(struct dentry *d, int flags)
6786 +{
6787 +       if (d->d_inode) {
6788 +               if (au_ftest_lock(flags, IW))
6789 +                       ii_write_unlock(d->d_inode);
6790 +               else if (au_ftest_lock(flags, IR))
6791 +                       ii_read_unlock(d->d_inode);
6792 +       }
6793 +       au_rw_read_unlock(&au_di(d)->di_rwsem);
6794 +}
6795 +
6796 +void di_downgrade_lock(struct dentry *d, int flags)
6797 +{
6798 +       if (d->d_inode && au_ftest_lock(flags, IR))
6799 +               ii_downgrade_lock(d->d_inode);
6800 +       au_rw_dgrade_lock(&au_di(d)->di_rwsem);
6801 +}
6802 +
6803 +void di_write_lock(struct dentry *d, unsigned int lsc)
6804 +{
6805 +       au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
6806 +       if (d->d_inode)
6807 +               do_ii_write_lock(d->d_inode, lsc);
6808 +}
6809 +
6810 +void di_write_unlock(struct dentry *d)
6811 +{
6812 +       if (d->d_inode)
6813 +               ii_write_unlock(d->d_inode);
6814 +       au_rw_write_unlock(&au_di(d)->di_rwsem);
6815 +}
6816 +
6817 +void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
6818 +{
6819 +       AuDebugOn(d1 == d2
6820 +                 || d1->d_inode == d2->d_inode
6821 +                 || d1->d_sb != d2->d_sb);
6822 +
6823 +       if (isdir && au_test_subdir(d1, d2)) {
6824 +               di_write_lock_child(d1);
6825 +               di_write_lock_child2(d2);
6826 +       } else {
6827 +               /* there should be no races */
6828 +               di_write_lock_child(d2);
6829 +               di_write_lock_child2(d1);
6830 +       }
6831 +}
6832 +
6833 +void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
6834 +{
6835 +       AuDebugOn(d1 == d2
6836 +                 || d1->d_inode == d2->d_inode
6837 +                 || d1->d_sb != d2->d_sb);
6838 +
6839 +       if (isdir && au_test_subdir(d1, d2)) {
6840 +               di_write_lock_parent(d1);
6841 +               di_write_lock_parent2(d2);
6842 +       } else {
6843 +               /* there should be no races */
6844 +               di_write_lock_parent(d2);
6845 +               di_write_lock_parent2(d1);
6846 +       }
6847 +}
6848 +
6849 +void di_write_unlock2(struct dentry *d1, struct dentry *d2)
6850 +{
6851 +       di_write_unlock(d1);
6852 +       if (d1->d_inode == d2->d_inode)
6853 +               au_rw_write_unlock(&au_di(d2)->di_rwsem);
6854 +       else
6855 +               di_write_unlock(d2);
6856 +}
6857 +
6858 +/* ---------------------------------------------------------------------- */
6859 +
6860 +struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
6861 +{
6862 +       struct dentry *d;
6863 +
6864 +       DiMustAnyLock(dentry);
6865 +
6866 +       if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6867 +               return NULL;
6868 +       AuDebugOn(bindex < 0);
6869 +       d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
6870 +       AuDebugOn(d && (atomic_read(&d->d_count) <= 0));
6871 +       return d;
6872 +}
6873 +
6874 +aufs_bindex_t au_dbtail(struct dentry *dentry)
6875 +{
6876 +       aufs_bindex_t bend, bwh;
6877 +
6878 +       bend = au_dbend(dentry);
6879 +       if (0 <= bend) {
6880 +               bwh = au_dbwh(dentry);
6881 +               if (!bwh)
6882 +                       return bwh;
6883 +               if (0 < bwh && bwh < bend)
6884 +                       return bwh - 1;
6885 +       }
6886 +       return bend;
6887 +}
6888 +
6889 +aufs_bindex_t au_dbtaildir(struct dentry *dentry)
6890 +{
6891 +       aufs_bindex_t bend, bopq;
6892 +
6893 +       bend = au_dbtail(dentry);
6894 +       if (0 <= bend) {
6895 +               bopq = au_dbdiropq(dentry);
6896 +               if (0 <= bopq && bopq < bend)
6897 +                       bend = bopq;
6898 +       }
6899 +       return bend;
6900 +}
6901 +
6902 +/* ---------------------------------------------------------------------- */
6903 +
6904 +void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6905 +                  struct dentry *h_dentry)
6906 +{
6907 +       struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
6908 +
6909 +       DiMustWriteLock(dentry);
6910 +
6911 +       if (hd->hd_dentry)
6912 +               au_hdput(hd);
6913 +       hd->hd_dentry = h_dentry;
6914 +}
6915 +
6916 +void au_update_digen(struct dentry *dentry)
6917 +{
6918 +       atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
6919 +       /* smp_mb(); */ /* atomic_set */
6920 +}
6921 +
6922 +void au_update_dbrange(struct dentry *dentry, int do_put_zero)
6923 +{
6924 +       struct au_dinfo *dinfo;
6925 +       struct dentry *h_d;
6926 +
6927 +       DiMustWriteLock(dentry);
6928 +
6929 +       dinfo = au_di(dentry);
6930 +       if (!dinfo || dinfo->di_bstart < 0)
6931 +               return;
6932 +
6933 +       if (do_put_zero) {
6934 +               aufs_bindex_t bindex, bend;
6935 +
6936 +               bend = dinfo->di_bend;
6937 +               for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
6938 +                       h_d = dinfo->di_hdentry[0 + bindex].hd_dentry;
6939 +                       if (h_d && !h_d->d_inode)
6940 +                               au_set_h_dptr(dentry, bindex, NULL);
6941 +               }
6942 +       }
6943 +
6944 +       dinfo->di_bstart = -1;
6945 +       while (++dinfo->di_bstart <= dinfo->di_bend)
6946 +               if (dinfo->di_hdentry[0 + dinfo->di_bstart].hd_dentry)
6947 +                       break;
6948 +       if (dinfo->di_bstart > dinfo->di_bend) {
6949 +               dinfo->di_bstart = -1;
6950 +               dinfo->di_bend = -1;
6951 +               return;
6952 +       }
6953 +
6954 +       dinfo->di_bend++;
6955 +       while (0 <= --dinfo->di_bend)
6956 +               if (dinfo->di_hdentry[0 + dinfo->di_bend].hd_dentry)
6957 +                       break;
6958 +       AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
6959 +}
6960 +
6961 +void au_update_dbstart(struct dentry *dentry)
6962 +{
6963 +       aufs_bindex_t bindex, bend;
6964 +       struct dentry *h_dentry;
6965 +
6966 +       bend = au_dbend(dentry);
6967 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
6968 +               h_dentry = au_h_dptr(dentry, bindex);
6969 +               if (!h_dentry)
6970 +                       continue;
6971 +               if (h_dentry->d_inode) {
6972 +                       au_set_dbstart(dentry, bindex);
6973 +                       return;
6974 +               }
6975 +               au_set_h_dptr(dentry, bindex, NULL);
6976 +       }
6977 +}
6978 +
6979 +void au_update_dbend(struct dentry *dentry)
6980 +{
6981 +       aufs_bindex_t bindex, bstart;
6982 +       struct dentry *h_dentry;
6983 +
6984 +       bstart = au_dbstart(dentry);
6985 +       for (bindex = au_dbend(dentry); bindex <= bstart; bindex--) {
6986 +               h_dentry = au_h_dptr(dentry, bindex);
6987 +               if (!h_dentry)
6988 +                       continue;
6989 +               if (h_dentry->d_inode) {
6990 +                       au_set_dbend(dentry, bindex);
6991 +                       return;
6992 +               }
6993 +               au_set_h_dptr(dentry, bindex, NULL);
6994 +       }
6995 +}
6996 +
6997 +int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
6998 +{
6999 +       aufs_bindex_t bindex, bend;
7000 +
7001 +       bend = au_dbend(dentry);
7002 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
7003 +               if (au_h_dptr(dentry, bindex) == h_dentry)
7004 +                       return bindex;
7005 +       return -1;
7006 +}
7007 diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c
7008 new file mode 100644
7009 index 0000000..c9d8284
7010 --- /dev/null
7011 +++ b/fs/aufs/dir.c
7012 @@ -0,0 +1,579 @@
7013 +/*
7014 + * Copyright (C) 2005-2009 Junjiro R. Okajima
7015 + *
7016 + * This program, aufs is free software; you can redistribute it and/or modify
7017 + * it under the terms of the GNU General Public License as published by
7018 + * the Free Software Foundation; either version 2 of the License, or
7019 + * (at your option) any later version.
7020 + *
7021 + * This program is distributed in the hope that it will be useful,
7022 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7023 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7024 + * GNU General Public License for more details.
7025 + *
7026 + * You should have received a copy of the GNU General Public License
7027 + * along with this program; if not, write to the Free Software
7028 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
7029 + */
7030 +
7031 +/*
7032 + * directory operations
7033 + */
7034 +
7035 +#include <linux/file.h>
7036 +#include <linux/fs_stack.h>
7037 +#include "aufs.h"
7038 +
7039 +void au_add_nlink(struct inode *dir, struct inode *h_dir)
7040 +{
7041 +       AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7042 +
7043 +       dir->i_nlink += h_dir->i_nlink - 2;
7044 +       if (h_dir->i_nlink < 2)
7045 +               dir->i_nlink += 2;
7046 +}
7047 +
7048 +void au_sub_nlink(struct inode *dir, struct inode *h_dir)
7049 +{
7050 +       AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7051 +
7052 +       dir->i_nlink -= h_dir->i_nlink - 2;
7053 +       if (h_dir->i_nlink < 2)
7054 +               dir->i_nlink -= 2;
7055 +}
7056 +
7057 +loff_t au_dir_size(struct file *file, struct dentry *dentry)
7058 +{
7059 +       loff_t sz;
7060 +       aufs_bindex_t bindex, bend;
7061 +       struct file *h_file;
7062 +       struct dentry *h_dentry;
7063 +
7064 +       sz = 0;
7065 +       if (file) {
7066 +               AuDebugOn(!file->f_dentry);
7067 +               AuDebugOn(!file->f_dentry->d_inode);
7068 +               AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
7069 +
7070 +               bend = au_fbend(file);
7071 +               for (bindex = au_fbstart(file);
7072 +                    bindex <= bend && sz < KMALLOC_MAX_SIZE;
7073 +                    bindex++) {
7074 +                       h_file = au_h_fptr(file, bindex);
7075 +                       if (h_file
7076 +                           && h_file->f_dentry
7077 +                           && h_file->f_dentry->d_inode)
7078 +                               sz += i_size_read(h_file->f_dentry->d_inode);
7079 +               }
7080 +       } else {
7081 +               AuDebugOn(!dentry);
7082 +               AuDebugOn(!dentry->d_inode);
7083 +               AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
7084 +
7085 +               bend = au_dbtaildir(dentry);
7086 +               for (bindex = au_dbstart(dentry);
7087 +                    bindex <= bend && sz < KMALLOC_MAX_SIZE;
7088 +                    bindex++) {
7089 +                       h_dentry = au_h_dptr(dentry, bindex);
7090 +                       if (h_dentry && h_dentry->d_inode)
7091 +                               sz += i_size_read(h_dentry->d_inode);
7092 +               }
7093 +       }
7094 +       if (sz < KMALLOC_MAX_SIZE)
7095 +               sz = roundup_pow_of_two(sz);
7096 +       if (sz > KMALLOC_MAX_SIZE)
7097 +               sz = KMALLOC_MAX_SIZE;
7098 +       else if (sz < NAME_MAX) {
7099 +               BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
7100 +               sz = AUFS_RDBLK_DEF;
7101 +       }
7102 +       return sz;
7103 +}
7104 +
7105 +/* ---------------------------------------------------------------------- */
7106 +
7107 +static int reopen_dir(struct file *file)
7108 +{
7109 +       int err;
7110 +       unsigned int flags;
7111 +       aufs_bindex_t bindex, btail, bstart;
7112 +       struct dentry *dentry, *h_dentry;
7113 +       struct file *h_file;
7114 +
7115 +       /* open all lower dirs */
7116 +       dentry = file->f_dentry;
7117 +       bstart = au_dbstart(dentry);
7118 +       for (bindex = au_fbstart(file); bindex < bstart; bindex++)
7119 +               au_set_h_fptr(file, bindex, NULL);
7120 +       au_set_fbstart(file, bstart);
7121 +
7122 +       btail = au_dbtaildir(dentry);
7123 +       for (bindex = au_fbend(file); btail < bindex; bindex--)
7124 +               au_set_h_fptr(file, bindex, NULL);
7125 +       au_set_fbend(file, btail);
7126 +
7127 +       flags = file->f_flags;
7128 +       for (bindex = bstart; bindex <= btail; bindex++) {
7129 +               h_dentry = au_h_dptr(dentry, bindex);
7130 +               if (!h_dentry)
7131 +                       continue;
7132 +               h_file = au_h_fptr(file, bindex);
7133 +               if (h_file)
7134 +                       continue;
7135 +
7136 +               h_file = au_h_open(dentry, bindex, flags, file);
7137 +               err = PTR_ERR(h_file);
7138 +               if (IS_ERR(h_file))
7139 +                       goto out; /* close all? */
7140 +               au_set_h_fptr(file, bindex, h_file);
7141 +       }
7142 +       au_update_figen(file);
7143 +       /* todo: necessary? */
7144 +       /* file->f_ra = h_file->f_ra; */
7145 +       err = 0;
7146 +
7147 + out:
7148 +       return err;
7149 +}
7150 +
7151 +static int do_open_dir(struct file *file, int flags)
7152 +{
7153 +       int err;
7154 +       aufs_bindex_t bindex, btail;
7155 +       struct dentry *dentry, *h_dentry;
7156 +       struct file *h_file;
7157 +
7158 +       FiMustWriteLock(file);
7159 +
7160 +       err = 0;
7161 +       dentry = file->f_dentry;
7162 +       au_set_fvdir_cache(file, NULL);
7163 +       file->f_version = dentry->d_inode->i_version;
7164 +       bindex = au_dbstart(dentry);
7165 +       au_set_fbstart(file, bindex);
7166 +       btail = au_dbtaildir(dentry);
7167 +       au_set_fbend(file, btail);
7168 +       for (; !err && bindex <= btail; bindex++) {
7169 +               h_dentry = au_h_dptr(dentry, bindex);
7170 +               if (!h_dentry)
7171 +                       continue;
7172 +
7173 +               h_file = au_h_open(dentry, bindex, flags, file);
7174 +               if (IS_ERR(h_file)) {
7175 +                       err = PTR_ERR(h_file);
7176 +                       break;
7177 +               }
7178 +               au_set_h_fptr(file, bindex, h_file);
7179 +       }
7180 +       au_update_figen(file);
7181 +       /* todo: necessary? */
7182 +       /* file->f_ra = h_file->f_ra; */
7183 +       if (!err)
7184 +               return 0; /* success */
7185 +
7186 +       /* close all */
7187 +       for (bindex = au_fbstart(file); bindex <= btail; bindex++)
7188 +               au_set_h_fptr(file, bindex, NULL);
7189 +       au_set_fbstart(file, -1);
7190 +       au_set_fbend(file, -1);
7191 +       return err;
7192 +}
7193 +
7194 +static int aufs_open_dir(struct inode *inode __maybe_unused,
7195 +                        struct file *file)
7196 +{
7197 +       return au_do_open(file, do_open_dir);
7198 +}
7199 +
7200 +static int aufs_release_dir(struct inode *inode __maybe_unused,
7201 +                           struct file *file)
7202 +{
7203 +       struct au_vdir *vdir_cache;
7204 +       struct super_block *sb;
7205 +
7206 +       sb = file->f_dentry->d_sb;
7207 +       vdir_cache = au_fi(file)->fi_vdir_cache; /* lock-free */
7208 +       if (vdir_cache)
7209 +               au_vdir_free(vdir_cache);
7210 +       au_plink_maint_leave(file);
7211 +       au_finfo_fin(file);
7212 +       return 0;
7213 +}
7214 +
7215 +/* ---------------------------------------------------------------------- */
7216 +
7217 +static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
7218 +{
7219 +       int err;
7220 +       aufs_bindex_t bend, bindex;
7221 +       struct inode *inode;
7222 +       struct super_block *sb;
7223 +
7224 +       err = 0;
7225 +       sb = dentry->d_sb;
7226 +       inode = dentry->d_inode;
7227 +       IMustLock(inode);
7228 +       bend = au_dbend(dentry);
7229 +       for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
7230 +               struct path h_path;
7231 +               struct inode *h_inode;
7232 +
7233 +               if (au_test_ro(sb, bindex, inode))
7234 +                       continue;
7235 +               h_path.dentry = au_h_dptr(dentry, bindex);
7236 +               if (!h_path.dentry)
7237 +                       continue;
7238 +               h_inode = h_path.dentry->d_inode;
7239 +               if (!h_inode)
7240 +                       continue;
7241 +
7242 +               /* no mnt_want_write() */
7243 +               /* cf. fs/nsfd/vfs.c and fs/nfsd/nfs4recover.c */
7244 +               /* todo: inotiry fired? */
7245 +               h_path.mnt = au_sbr_mnt(sb, bindex);
7246 +               mutex_lock(&h_inode->i_mutex);
7247 +               err = filemap_fdatawrite(h_inode->i_mapping);
7248 +               AuDebugOn(!h_inode->i_fop);
7249 +               if (!err && h_inode->i_fop->fsync)
7250 +                       err = h_inode->i_fop->fsync(NULL, h_path.dentry,
7251 +                                                   datasync);
7252 +               if (!err)
7253 +                       err = filemap_fdatawrite(h_inode->i_mapping);
7254 +               if (!err)
7255 +                       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
7256 +               mutex_unlock(&h_inode->i_mutex);
7257 +       }
7258 +
7259 +       return err;
7260 +}
7261 +
7262 +static int au_do_fsync_dir(struct file *file, int datasync)
7263 +{
7264 +       int err;
7265 +       aufs_bindex_t bend, bindex;
7266 +       struct file *h_file;
7267 +       struct super_block *sb;
7268 +       struct inode *inode;
7269 +       struct mutex *h_mtx;
7270 +
7271 +       err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7272 +       if (unlikely(err))
7273 +               goto out;
7274 +
7275 +       sb = file->f_dentry->d_sb;
7276 +       inode = file->f_dentry->d_inode;
7277 +       bend = au_fbend(file);
7278 +       for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
7279 +               h_file = au_h_fptr(file, bindex);
7280 +               if (!h_file || au_test_ro(sb, bindex, inode))
7281 +                       continue;
7282 +
7283 +               err = (int)do_fsync(h_file, datasync);
7284 +               if (!err) {
7285 +                       h_mtx = &h_file->f_dentry->d_inode->i_mutex;
7286 +                       mutex_lock(h_mtx);
7287 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
7288 +                       /*ignore*/
7289 +                       mutex_unlock(h_mtx);
7290 +               }
7291 +       }
7292 +
7293 + out:
7294 +       return err;
7295 +}
7296 +
7297 +/*
7298 + * @file may be NULL
7299 + */
7300 +static int aufs_fsync_dir(struct file *file, struct dentry *dentry,
7301 +                         int datasync)
7302 +{
7303 +       int err;
7304 +       struct super_block *sb;
7305 +
7306 +       IMustLock(dentry->d_inode);
7307 +
7308 +       err = 0;
7309 +       sb = dentry->d_sb;
7310 +       si_noflush_read_lock(sb);
7311 +       if (file)
7312 +               err = au_do_fsync_dir(file, datasync);
7313 +       else {
7314 +               di_write_lock_child(dentry);
7315 +               err = au_do_fsync_dir_no_file(dentry, datasync);
7316 +       }
7317 +       au_cpup_attr_timesizes(dentry->d_inode);
7318 +       di_write_unlock(dentry);
7319 +       if (file)
7320 +               fi_write_unlock(file);
7321 +
7322 +       si_read_unlock(sb);
7323 +       return err;
7324 +}
7325 +
7326 +/* ---------------------------------------------------------------------- */
7327 +
7328 +static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
7329 +{
7330 +       int err;
7331 +       struct dentry *dentry;
7332 +       struct inode *inode;
7333 +       struct super_block *sb;
7334 +
7335 +       dentry = file->f_dentry;
7336 +       inode = dentry->d_inode;
7337 +       IMustLock(inode);
7338 +
7339 +       sb = dentry->d_sb;
7340 +       si_read_lock(sb, AuLock_FLUSH);
7341 +       err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7342 +       if (unlikely(err))
7343 +               goto out;
7344 +       err = au_vdir_init(file);
7345 +       di_downgrade_lock(dentry, AuLock_IR);
7346 +       if (unlikely(err))
7347 +               goto out_unlock;
7348 +
7349 +       if (!au_test_nfsd(current)) {
7350 +               err = au_vdir_fill_de(file, dirent, filldir);
7351 +               fsstack_copy_attr_atime(inode,
7352 +                                       au_h_iptr(inode, au_ibstart(inode)));
7353 +       } else {
7354 +               /*
7355 +                * nfsd filldir may call lookup_one_len(), vfs_getattr(),
7356 +                * encode_fh() and others.
7357 +                */
7358 +               struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
7359 +
7360 +               di_read_unlock(dentry, AuLock_IR);
7361 +               si_read_unlock(sb);
7362 +               lockdep_off();
7363 +               err = au_vdir_fill_de(file, dirent, filldir);
7364 +               lockdep_on();
7365 +               fsstack_copy_attr_atime(inode, h_inode);
7366 +               fi_write_unlock(file);
7367 +
7368 +               AuTraceErr(err);
7369 +               return err;
7370 +       }
7371 +
7372 + out_unlock:
7373 +       di_read_unlock(dentry, AuLock_IR);
7374 +       fi_write_unlock(file);
7375 + out:
7376 +       si_read_unlock(sb);
7377 +       return err;
7378 +}
7379 +
7380 +/* ---------------------------------------------------------------------- */
7381 +
7382 +#define AuTestEmpty_WHONLY     1
7383 +#define AuTestEmpty_CALLED     (1 << 1)
7384 +#define AuTestEmpty_SHWH       (1 << 2)
7385 +#define au_ftest_testempty(flags, name)        ((flags) & AuTestEmpty_##name)
7386 +#define au_fset_testempty(flags, name) { (flags) |= AuTestEmpty_##name; }
7387 +#define au_fclr_testempty(flags, name) { (flags) &= ~AuTestEmpty_##name; }
7388 +
7389 +#ifndef CONFIG_AUFS_SHWH
7390 +#undef AuTestEmpty_SHWH
7391 +#define AuTestEmpty_SHWH       0
7392 +#endif
7393 +
7394 +struct test_empty_arg {
7395 +       struct au_nhash *whlist;
7396 +       unsigned int flags;
7397 +       int err;
7398 +       aufs_bindex_t bindex;
7399 +};
7400 +
7401 +static int test_empty_cb(void *__arg, const char *__name, int namelen,
7402 +                        loff_t offset __maybe_unused, u64 ino,
7403 +                        unsigned int d_type)
7404 +{
7405 +       struct test_empty_arg *arg = __arg;
7406 +       char *name = (void *)__name;
7407 +
7408 +       arg->err = 0;
7409 +       au_fset_testempty(arg->flags, CALLED);
7410 +       /* smp_mb(); */
7411 +       if (name[0] == '.'
7412 +           && (namelen == 1 || (name[1] == '.' && namelen == 2)))
7413 +               goto out; /* success */
7414 +
7415 +       if (namelen <= AUFS_WH_PFX_LEN
7416 +           || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
7417 +               if (au_ftest_testempty(arg->flags, WHONLY)
7418 +                   && !au_nhash_test_known_wh(arg->whlist, name, namelen))
7419 +                       arg->err = -ENOTEMPTY;
7420 +               goto out;
7421 +       }
7422 +
7423 +       name += AUFS_WH_PFX_LEN;
7424 +       namelen -= AUFS_WH_PFX_LEN;
7425 +       if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
7426 +               arg->err = au_nhash_append_wh
7427 +                       (arg->whlist, name, namelen, ino, d_type, arg->bindex,
7428 +                        au_ftest_testempty(arg->flags, SHWH));
7429 +
7430 + out:
7431 +       /* smp_mb(); */
7432 +       AuTraceErr(arg->err);
7433 +       return arg->err;
7434 +}
7435 +
7436 +static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7437 +{
7438 +       int err;
7439 +       struct file *h_file;
7440 +
7441 +       h_file = au_h_open(dentry, arg->bindex,
7442 +                          O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
7443 +                          /*file*/NULL);
7444 +       err = PTR_ERR(h_file);
7445 +       if (IS_ERR(h_file))
7446 +               goto out;
7447 +
7448 +       err = 0;
7449 +       if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
7450 +           && !h_file->f_dentry->d_inode->i_nlink)
7451 +               goto out_put;
7452 +
7453 +       do {
7454 +               arg->err = 0;
7455 +               au_fclr_testempty(arg->flags, CALLED);
7456 +               /* smp_mb(); */
7457 +               err = vfsub_readdir(h_file, test_empty_cb, arg);
7458 +               if (err >= 0)
7459 +                       err = arg->err;
7460 +       } while (!err && au_ftest_testempty(arg->flags, CALLED));
7461 +
7462 + out_put:
7463 +       fput(h_file);
7464 +       au_sbr_put(dentry->d_sb, arg->bindex);
7465 + out:
7466 +       return err;
7467 +}
7468 +
7469 +struct do_test_empty_args {
7470 +       int *errp;
7471 +       struct dentry *dentry;
7472 +       struct test_empty_arg *arg;
7473 +};
7474 +
7475 +static void call_do_test_empty(void *args)
7476 +{
7477 +       struct do_test_empty_args *a = args;
7478 +       *a->errp = do_test_empty(a->dentry, a->arg);
7479 +}
7480 +
7481 +static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7482 +{
7483 +       int err, wkq_err;
7484 +       struct dentry *h_dentry;
7485 +       struct inode *h_inode;
7486 +
7487 +       h_dentry = au_h_dptr(dentry, arg->bindex);
7488 +       h_inode = h_dentry->d_inode;
7489 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
7490 +       err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
7491 +       mutex_unlock(&h_inode->i_mutex);
7492 +       if (!err)
7493 +               err = do_test_empty(dentry, arg);
7494 +       else {
7495 +               struct do_test_empty_args args = {
7496 +                       .errp   = &err,
7497 +                       .dentry = dentry,
7498 +                       .arg    = arg
7499 +               };
7500 +               unsigned int flags = arg->flags;
7501 +
7502 +               wkq_err = au_wkq_wait(call_do_test_empty, &args);
7503 +               if (unlikely(wkq_err))
7504 +                       err = wkq_err;
7505 +               arg->flags = flags;
7506 +       }
7507 +
7508 +       return err;
7509 +}
7510 +
7511 +int au_test_empty_lower(struct dentry *dentry)
7512 +{
7513 +       int err;
7514 +       unsigned int rdhash;
7515 +       aufs_bindex_t bindex, bstart, btail;
7516 +       struct au_nhash whlist;
7517 +       struct test_empty_arg arg;
7518 +
7519 +       SiMustAnyLock(dentry->d_sb);
7520 +
7521 +       rdhash = au_sbi(dentry->d_sb)->si_rdhash;
7522 +       if (!rdhash)
7523 +               rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
7524 +       err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
7525 +       if (unlikely(err))
7526 +               goto out;
7527 +
7528 +       arg.flags = 0;
7529 +       arg.whlist = &whlist;
7530 +       bstart = au_dbstart(dentry);
7531 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7532 +               au_fset_testempty(arg.flags, SHWH);
7533 +       arg.bindex = bstart;
7534 +       err = do_test_empty(dentry, &arg);
7535 +       if (unlikely(err))
7536 +               goto out_whlist;
7537 +
7538 +       au_fset_testempty(arg.flags, WHONLY);
7539 +       btail = au_dbtaildir(dentry);
7540 +       for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
7541 +               struct dentry *h_dentry;
7542 +
7543 +               h_dentry = au_h_dptr(dentry, bindex);
7544 +               if (h_dentry && h_dentry->d_inode) {
7545 +                       arg.bindex = bindex;
7546 +                       err = do_test_empty(dentry, &arg);
7547 +               }
7548 +       }
7549 +
7550 + out_whlist:
7551 +       au_nhash_wh_free(&whlist);
7552 + out:
7553 +       return err;
7554 +}
7555 +
7556 +int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
7557 +{
7558 +       int err;
7559 +       struct test_empty_arg arg;
7560 +       aufs_bindex_t bindex, btail;
7561 +
7562 +       err = 0;
7563 +       arg.whlist = whlist;
7564 +       arg.flags = AuTestEmpty_WHONLY;
7565 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7566 +               au_fset_testempty(arg.flags, SHWH);
7567 +       btail = au_dbtaildir(dentry);
7568 +       for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
7569 +               struct dentry *h_dentry;
7570 +
7571 +               h_dentry = au_h_dptr(dentry, bindex);
7572 +               if (h_dentry && h_dentry->d_inode) {
7573 +                       arg.bindex = bindex;
7574 +                       err = sio_test_empty(dentry, &arg);
7575 +               }
7576 +       }
7577 +
7578 +       return err;
7579 +}
7580 +
7581 +/* ---------------------------------------------------------------------- */
7582 +
7583 +struct file_operations aufs_dir_fop = {
7584 +       .read           = generic_read_dir,
7585 +       .readdir        = aufs_readdir,
7586 +       .unlocked_ioctl = aufs_ioctl_dir,
7587 +       .open           = aufs_open_dir,
7588 +       .release        = aufs_release_dir,
7589 +       .flush          = aufs_flush,
7590 +       .fsync          = aufs_fsync_dir
7591 +};
7592 diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h
7593 new file mode 100644
7594 index 0000000..dc40539
7595 --- /dev/null
7596 +++ b/fs/aufs/dir.h
7597 @@ -0,0 +1,127 @@
7598 +/*
7599 + * Copyright (C) 2005-2009 Junjiro R. Okajima
7600 + *
7601 + * This program, aufs is free software; you can redistribute it and/or modify
7602 + * it under the terms of the GNU General Public License as published by
7603 + * the Free Software Foundation; either version 2 of the License, or
7604 + * (at your option) any later version.
7605 + *
7606 + * This program is distributed in the hope that it will be useful,
7607 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7608 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7609 + * GNU General Public License for more details.
7610 + *
7611 + * You should have received a copy of the GNU General Public License
7612 + * along with this program; if not, write to the Free Software
7613 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
7614 + */
7615 +
7616 +/*
7617 + * directory operations
7618 + */
7619 +
7620 +#ifndef __AUFS_DIR_H__
7621 +#define __AUFS_DIR_H__
7622 +
7623 +#ifdef __KERNEL__
7624 +
7625 +#include <linux/fs.h>
7626 +#include <linux/aufs_type.h>
7627 +
7628 +/* ---------------------------------------------------------------------- */
7629 +
7630 +/* need to be faster and smaller */
7631 +
7632 +struct au_nhash {
7633 +       unsigned int            nh_num;
7634 +       struct hlist_head       *nh_head;
7635 +};
7636 +
7637 +struct au_vdir_destr {
7638 +       unsigned char   len;
7639 +       unsigned char   name[0];
7640 +} __packed;
7641 +
7642 +struct au_vdir_dehstr {
7643 +       struct hlist_node       hash;
7644 +       struct au_vdir_destr    *str;
7645 +};
7646 +
7647 +struct au_vdir_de {
7648 +       ino_t                   de_ino;
7649 +       unsigned char           de_type;
7650 +       /* caution: packed */
7651 +       struct au_vdir_destr    de_str;
7652 +} __packed;
7653 +
7654 +struct au_vdir_wh {
7655 +       struct hlist_node       wh_hash;
7656 +#ifdef CONFIG_AUFS_SHWH
7657 +       ino_t                   wh_ino;
7658 +       aufs_bindex_t           wh_bindex;
7659 +       unsigned char           wh_type;
7660 +#else
7661 +       aufs_bindex_t           wh_bindex;
7662 +#endif
7663 +       /* caution: packed */
7664 +       struct au_vdir_destr    wh_str;
7665 +} __packed;
7666 +
7667 +union au_vdir_deblk_p {
7668 +       unsigned char           *deblk;
7669 +       struct au_vdir_de       *de;
7670 +};
7671 +
7672 +struct au_vdir {
7673 +       unsigned char   **vd_deblk;
7674 +       unsigned long   vd_nblk;
7675 +       struct {
7676 +               unsigned long           ul;
7677 +               union au_vdir_deblk_p   p;
7678 +       } vd_last;
7679 +
7680 +       unsigned long   vd_version;
7681 +       unsigned int    vd_deblk_sz;
7682 +       unsigned long   vd_jiffy;
7683 +};
7684 +
7685 +/* ---------------------------------------------------------------------- */
7686 +
7687 +/* dir.c */
7688 +extern struct file_operations aufs_dir_fop;
7689 +void au_add_nlink(struct inode *dir, struct inode *h_dir);
7690 +void au_sub_nlink(struct inode *dir, struct inode *h_dir);
7691 +loff_t au_dir_size(struct file *file, struct dentry *dentry);
7692 +int au_test_empty_lower(struct dentry *dentry);
7693 +int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
7694 +
7695 +/* vdir.c */
7696 +unsigned int au_rdhash_est(loff_t sz);
7697 +int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
7698 +void au_nhash_wh_free(struct au_nhash *whlist);
7699 +int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
7700 +                           int limit);
7701 +int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
7702 +int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
7703 +                      unsigned int d_type, aufs_bindex_t bindex,
7704 +                      unsigned char shwh);
7705 +void au_vdir_free(struct au_vdir *vdir);
7706 +int au_vdir_init(struct file *file);
7707 +int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
7708 +
7709 +/* ioctl.c */
7710 +long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
7711 +
7712 +#ifdef CONFIG_AUFS_RDU
7713 +/* rdu.c */
7714 +long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
7715 +#else
7716 +static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
7717 +                               unsigned long arg)
7718 +{
7719 +       return -EINVAL;
7720 +}
7721 +#endif
7722 +
7723 +#endif /* __KERNEL__ */
7724 +#endif /* __AUFS_DIR_H__ */
7725 diff --git a/fs/aufs/export.c b/fs/aufs/export.c
7726 new file mode 100644
7727 index 0000000..3d13ff5
7728 --- /dev/null
7729 +++ b/fs/aufs/export.c
7730 @@ -0,0 +1,745 @@
7731 +/*
7732 + * Copyright (C) 2005-2009 Junjiro R. Okajima
7733 + *
7734 + * This program, aufs is free software; you can redistribute it and/or modify
7735 + * it under the terms of the GNU General Public License as published by
7736 + * the Free Software Foundation; either version 2 of the License, or
7737 + * (at your option) any later version.
7738 + *
7739 + * This program is distributed in the hope that it will be useful,
7740 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7741 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7742 + * GNU General Public License for more details.
7743 + *
7744 + * You should have received a copy of the GNU General Public License
7745 + * along with this program; if not, write to the Free Software
7746 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
7747 + */
7748 +
7749 +/*
7750 + * export via nfs
7751 + */
7752 +
7753 +#include <linux/exportfs.h>
7754 +#include <linux/file.h>
7755 +#include <linux/mnt_namespace.h>
7756 +#include <linux/namei.h>
7757 +#include <linux/random.h>
7758 +#include "aufs.h"
7759 +
7760 +union conv {
7761 +#ifdef CONFIG_AUFS_INO_T_64
7762 +       __u32 a[2];
7763 +#else
7764 +       __u32 a[1];
7765 +#endif
7766 +       ino_t ino;
7767 +};
7768 +
7769 +static ino_t decode_ino(__u32 *a)
7770 +{
7771 +       union conv u;
7772 +
7773 +       BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
7774 +       u.a[0] = a[0];
7775 +#ifdef CONFIG_AUFS_INO_T_64
7776 +       u.a[1] = a[1];
7777 +#endif
7778 +       return u.ino;
7779 +}
7780 +
7781 +static void encode_ino(__u32 *a, ino_t ino)
7782 +{
7783 +       union conv u;
7784 +
7785 +       u.ino = ino;
7786 +       a[0] = u.a[0];
7787 +#ifdef CONFIG_AUFS_INO_T_64
7788 +       a[1] = u.a[1];
7789 +#endif
7790 +}
7791 +
7792 +/* NFS file handle */
7793 +enum {
7794 +       Fh_br_id,
7795 +       Fh_sigen,
7796 +#ifdef CONFIG_AUFS_INO_T_64
7797 +       /* support 64bit inode number */
7798 +       Fh_ino1,
7799 +       Fh_ino2,
7800 +       Fh_dir_ino1,
7801 +       Fh_dir_ino2,
7802 +#else
7803 +       Fh_ino1,
7804 +       Fh_dir_ino1,
7805 +#endif
7806 +       Fh_igen,
7807 +       Fh_h_type,
7808 +       Fh_tail,
7809 +
7810 +       Fh_ino = Fh_ino1,
7811 +       Fh_dir_ino = Fh_dir_ino1
7812 +};
7813 +
7814 +static int au_test_anon(struct dentry *dentry)
7815 +{
7816 +       return !!(dentry->d_flags & DCACHE_DISCONNECTED);
7817 +}
7818 +
7819 +/* ---------------------------------------------------------------------- */
7820 +/* inode generation external table */
7821 +
7822 +int au_xigen_inc(struct inode *inode)
7823 +{
7824 +       int err;
7825 +       loff_t pos;
7826 +       ssize_t sz;
7827 +       __u32 igen;
7828 +       struct super_block *sb;
7829 +       struct au_sbinfo *sbinfo;
7830 +
7831 +       err = 0;
7832 +       sb = inode->i_sb;
7833 +       sbinfo = au_sbi(sb);
7834 +       /*
7835 +        * temporary workaround for escaping from SiMustAnyLock() in
7836 +        * au_mntflags(), since this function is called from au_iinfo_fin().
7837 +        */
7838 +       if (unlikely(!au_opt_test(sbinfo->si_mntflags, XINO)))
7839 +               goto out;
7840 +
7841 +       pos = inode->i_ino;
7842 +       pos *= sizeof(igen);
7843 +       igen = inode->i_generation + 1;
7844 +       sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
7845 +                        sizeof(igen), &pos);
7846 +       if (sz == sizeof(igen))
7847 +               goto out; /* success */
7848 +
7849 +       err = sz;
7850 +       if (unlikely(sz >= 0)) {
7851 +               err = -EIO;
7852 +               AuIOErr("xigen error (%zd)\n", sz);
7853 +       }
7854 +
7855 + out:
7856 +       return err;
7857 +}
7858 +
7859 +int au_xigen_new(struct inode *inode)
7860 +{
7861 +       int err;
7862 +       loff_t pos;
7863 +       ssize_t sz;
7864 +       struct super_block *sb;
7865 +       struct au_sbinfo *sbinfo;
7866 +       struct file *file;
7867 +
7868 +       err = 0;
7869 +       /* todo: dirty, at mount time */
7870 +       if (inode->i_ino == AUFS_ROOT_INO)
7871 +               goto out;
7872 +       sb = inode->i_sb;
7873 +       SiMustAnyLock(sb);
7874 +       if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
7875 +               goto out;
7876 +
7877 +       err = -EFBIG;
7878 +       pos = inode->i_ino;
7879 +       if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
7880 +               AuIOErr1("too large i%lld\n", pos);
7881 +               goto out;
7882 +       }
7883 +       pos *= sizeof(inode->i_generation);
7884 +
7885 +       err = 0;
7886 +       sbinfo = au_sbi(sb);
7887 +       file = sbinfo->si_xigen;
7888 +       BUG_ON(!file);
7889 +
7890 +       if (i_size_read(file->f_dentry->d_inode)
7891 +           < pos + sizeof(inode->i_generation)) {
7892 +               inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
7893 +               sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
7894 +                                sizeof(inode->i_generation), &pos);
7895 +       } else
7896 +               sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
7897 +                               sizeof(inode->i_generation), &pos);
7898 +       if (sz == sizeof(inode->i_generation))
7899 +               goto out; /* success */
7900 +
7901 +       err = sz;
7902 +       if (unlikely(sz >= 0)) {
7903 +               err = -EIO;
7904 +               AuIOErr("xigen error (%zd)\n", sz);
7905 +       }
7906 +
7907 + out:
7908 +       return err;
7909 +}
7910 +
7911 +int au_xigen_set(struct super_block *sb, struct file *base)
7912 +{
7913 +       int err;
7914 +       struct au_sbinfo *sbinfo;
7915 +       struct file *file;
7916 +
7917 +       SiMustWriteLock(sb);
7918 +
7919 +       sbinfo = au_sbi(sb);
7920 +       file = au_xino_create2(base, sbinfo->si_xigen);
7921 +       err = PTR_ERR(file);
7922 +       if (IS_ERR(file))
7923 +               goto out;
7924 +       err = 0;
7925 +       if (sbinfo->si_xigen)
7926 +               fput(sbinfo->si_xigen);
7927 +       sbinfo->si_xigen = file;
7928 +
7929 + out:
7930 +       return err;
7931 +}
7932 +
7933 +void au_xigen_clr(struct super_block *sb)
7934 +{
7935 +       struct au_sbinfo *sbinfo;
7936 +
7937 +       SiMustWriteLock(sb);
7938 +
7939 +       sbinfo = au_sbi(sb);
7940 +       if (sbinfo->si_xigen) {
7941 +               fput(sbinfo->si_xigen);
7942 +               sbinfo->si_xigen = NULL;
7943 +       }
7944 +}
7945 +
7946 +/* ---------------------------------------------------------------------- */
7947 +
7948 +static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
7949 +                                   ino_t dir_ino)
7950 +{
7951 +       struct dentry *dentry, *d;
7952 +       struct inode *inode;
7953 +       unsigned int sigen;
7954 +
7955 +       dentry = NULL;
7956 +       inode = ilookup(sb, ino);
7957 +       if (!inode)
7958 +               goto out;
7959 +
7960 +       dentry = ERR_PTR(-ESTALE);
7961 +       sigen = au_sigen(sb);
7962 +       if (unlikely(is_bad_inode(inode)
7963 +                    || IS_DEADDIR(inode)
7964 +                    || sigen != au_iigen(inode)))
7965 +               goto out_iput;
7966 +
7967 +       dentry = NULL;
7968 +       if (!dir_ino || S_ISDIR(inode->i_mode))
7969 +               dentry = d_find_alias(inode);
7970 +       else {
7971 +               spin_lock(&dcache_lock);
7972 +               list_for_each_entry(d, &inode->i_dentry, d_alias)
7973 +                       if (!au_test_anon(d)
7974 +                           && d->d_parent->d_inode->i_ino == dir_ino) {
7975 +                               dentry = dget_locked(d);
7976 +                               break;
7977 +                       }
7978 +               spin_unlock(&dcache_lock);
7979 +       }
7980 +       if (unlikely(dentry && sigen != au_digen(dentry))) {
7981 +               dput(dentry);
7982 +               dentry = ERR_PTR(-ESTALE);
7983 +       }
7984 +
7985 + out_iput:
7986 +       iput(inode);
7987 + out:
7988 +       return dentry;
7989 +}
7990 +
7991 +/* ---------------------------------------------------------------------- */
7992 +
7993 +/* todo: dirty? */
7994 +/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
7995 +static struct vfsmount *au_mnt_get(struct super_block *sb)
7996 +{
7997 +       struct mnt_namespace *ns;
7998 +       struct vfsmount *pos, *mnt;
7999 +
8000 +       spin_lock(&vfsmount_lock);
8001 +       /* no get/put ?? */
8002 +       AuDebugOn(!current->nsproxy);
8003 +       ns = current->nsproxy->mnt_ns;
8004 +       AuDebugOn(!ns);
8005 +       mnt = NULL;
8006 +       /* the order (reverse) will not be a problem */
8007 +       list_for_each_entry(pos, &ns->list, mnt_list)
8008 +               if (pos->mnt_sb == sb) {
8009 +                       mnt = mntget(pos);
8010 +                       break;
8011 +               }
8012 +       spin_unlock(&vfsmount_lock);
8013 +       AuDebugOn(!mnt);
8014 +
8015 +       return mnt;
8016 +}
8017 +
8018 +struct au_nfsd_si_lock {
8019 +       const unsigned int sigen;
8020 +       const aufs_bindex_t br_id;
8021 +       unsigned char force_lock;
8022 +};
8023 +
8024 +static aufs_bindex_t si_nfsd_read_lock(struct super_block *sb,
8025 +                                      struct au_nfsd_si_lock *nsi_lock)
8026 +{
8027 +       aufs_bindex_t bindex;
8028 +
8029 +       si_read_lock(sb, AuLock_FLUSH);
8030 +
8031 +       /* branch id may be wrapped around */
8032 +       bindex = au_br_index(sb, nsi_lock->br_id);
8033 +       if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
8034 +               goto out; /* success */
8035 +
8036 +       if (!nsi_lock->force_lock)
8037 +               si_read_unlock(sb);
8038 +       bindex = -1;
8039 +
8040 + out:
8041 +       return bindex;
8042 +}
8043 +
8044 +struct find_name_by_ino {
8045 +       int called, found;
8046 +       ino_t ino;
8047 +       char *name;
8048 +       int namelen;
8049 +};
8050 +
8051 +static int
8052 +find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
8053 +                u64 ino, unsigned int d_type)
8054 +{
8055 +       struct find_name_by_ino *a = arg;
8056 +
8057 +       a->called++;
8058 +       if (a->ino != ino)
8059 +               return 0;
8060 +
8061 +       memcpy(a->name, name, namelen);
8062 +       a->namelen = namelen;
8063 +       a->found = 1;
8064 +       return 1;
8065 +}
8066 +
8067 +static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
8068 +                                    struct au_nfsd_si_lock *nsi_lock)
8069 +{
8070 +       struct dentry *dentry, *parent;
8071 +       struct file *file;
8072 +       struct inode *dir;
8073 +       struct find_name_by_ino arg;
8074 +       int err;
8075 +
8076 +       parent = path->dentry;
8077 +       if (nsi_lock)
8078 +               si_read_unlock(parent->d_sb);
8079 +       path_get(path);
8080 +       file = dentry_open(parent, path->mnt, au_dir_roflags);
8081 +       dentry = (void *)file;
8082 +       if (IS_ERR(file))
8083 +               goto out;
8084 +
8085 +       dentry = ERR_PTR(-ENOMEM);
8086 +       arg.name = __getname();
8087 +       if (unlikely(!arg.name))
8088 +               goto out_file;
8089 +       arg.ino = ino;
8090 +       arg.found = 0;
8091 +       do {
8092 +               arg.called = 0;
8093 +               /* smp_mb(); */
8094 +               err = vfsub_readdir(file, find_name_by_ino, &arg);
8095 +       } while (!err && !arg.found && arg.called);
8096 +       dentry = ERR_PTR(err);
8097 +       if (unlikely(err))
8098 +               goto out_name;
8099 +       dentry = ERR_PTR(-ENOENT);
8100 +       if (!arg.found)
8101 +               goto out_name;
8102 +
8103 +       /* do not call au_lkup_one() */
8104 +       dir = parent->d_inode;
8105 +       mutex_lock(&dir->i_mutex);
8106 +       dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
8107 +       mutex_unlock(&dir->i_mutex);
8108 +       AuTraceErrPtr(dentry);
8109 +       if (IS_ERR(dentry))
8110 +               goto out_name;
8111 +       AuDebugOn(au_test_anon(dentry));
8112 +       if (unlikely(!dentry->d_inode)) {
8113 +               dput(dentry);
8114 +               dentry = ERR_PTR(-ENOENT);
8115 +       }
8116 +
8117 + out_name:
8118 +       __putname(arg.name);
8119 + out_file:
8120 +       fput(file);
8121 + out:
8122 +       if (unlikely(nsi_lock
8123 +                    && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
8124 +               if (!IS_ERR(dentry)) {
8125 +                       dput(dentry);
8126 +                       dentry = ERR_PTR(-ESTALE);
8127 +               }
8128 +       AuTraceErrPtr(dentry);
8129 +       return dentry;
8130 +}
8131 +
8132 +static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
8133 +                                       ino_t dir_ino,
8134 +                                       struct au_nfsd_si_lock *nsi_lock)
8135 +{
8136 +       struct dentry *dentry;
8137 +       struct path path;
8138 +
8139 +       if (dir_ino != AUFS_ROOT_INO) {
8140 +               path.dentry = decode_by_ino(sb, dir_ino, 0);
8141 +               dentry = path.dentry;
8142 +               if (!path.dentry || IS_ERR(path.dentry))
8143 +                       goto out;
8144 +               AuDebugOn(au_test_anon(path.dentry));
8145 +       } else
8146 +               path.dentry = dget(sb->s_root);
8147 +
8148 +       path.mnt = au_mnt_get(sb);
8149 +       dentry = au_lkup_by_ino(&path, ino, nsi_lock);
8150 +       path_put(&path);
8151 +
8152 + out:
8153 +       AuTraceErrPtr(dentry);
8154 +       return dentry;
8155 +}
8156 +
8157 +/* ---------------------------------------------------------------------- */
8158 +
8159 +static int h_acceptable(void *expv, struct dentry *dentry)
8160 +{
8161 +       return 1;
8162 +}
8163 +
8164 +static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
8165 +                          char *buf, int len, struct super_block *sb)
8166 +{
8167 +       char *p;
8168 +       int n;
8169 +       struct path path;
8170 +
8171 +       p = d_path(h_rootpath, buf, len);
8172 +       if (IS_ERR(p))
8173 +               goto out;
8174 +       n = strlen(p);
8175 +
8176 +       path.mnt = h_rootpath->mnt;
8177 +       path.dentry = h_parent;
8178 +       p = d_path(&path, buf, len);
8179 +       if (IS_ERR(p))
8180 +               goto out;
8181 +       if (n != 1)
8182 +               p += n;
8183 +
8184 +       path.mnt = au_mnt_get(sb);
8185 +       path.dentry = sb->s_root;
8186 +       p = d_path(&path, buf, len - strlen(p));
8187 +       mntput(path.mnt);
8188 +       if (IS_ERR(p))
8189 +               goto out;
8190 +       if (n != 1)
8191 +               p[strlen(p)] = '/';
8192 +
8193 + out:
8194 +       AuTraceErrPtr(p);
8195 +       return p;
8196 +}
8197 +
8198 +static noinline_for_stack
8199 +struct dentry *decode_by_path(struct super_block *sb, aufs_bindex_t bindex,
8200 +                             ino_t ino, __u32 *fh, int fh_len,
8201 +                             struct au_nfsd_si_lock *nsi_lock)
8202 +{
8203 +       struct dentry *dentry, *h_parent, *root;
8204 +       struct super_block *h_sb;
8205 +       char *pathname, *p;
8206 +       struct vfsmount *h_mnt;
8207 +       struct au_branch *br;
8208 +       int err;
8209 +       struct nameidata nd;
8210 +
8211 +       br = au_sbr(sb, bindex);
8212 +       /* au_br_get(br); */
8213 +       h_mnt = br->br_mnt;
8214 +       h_sb = h_mnt->mnt_sb;
8215 +       /* todo: call lower fh_to_dentry()? fh_to_parent()? */
8216 +       h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
8217 +                                     fh_len - Fh_tail, fh[Fh_h_type],
8218 +                                     h_acceptable, /*context*/NULL);
8219 +       dentry = h_parent;
8220 +       if (unlikely(!h_parent || IS_ERR(h_parent))) {
8221 +               AuWarn1("%s decode_fh failed, %ld\n",
8222 +                       au_sbtype(h_sb), PTR_ERR(h_parent));
8223 +               goto out;
8224 +       }
8225 +       dentry = NULL;
8226 +       if (unlikely(au_test_anon(h_parent))) {
8227 +               AuWarn1("%s decode_fh returned a disconnected dentry\n",
8228 +                       au_sbtype(h_sb));
8229 +               goto out_h_parent;
8230 +       }
8231 +
8232 +       dentry = ERR_PTR(-ENOMEM);
8233 +       pathname = (void *)__get_free_page(GFP_NOFS);
8234 +       if (unlikely(!pathname))
8235 +               goto out_h_parent;
8236 +
8237 +       root = sb->s_root;
8238 +       nd.path.mnt = h_mnt;
8239 +       di_read_lock_parent(root, !AuLock_IR);
8240 +       nd.path.dentry = au_h_dptr(root, bindex);
8241 +       di_read_unlock(root, !AuLock_IR);
8242 +       p = au_build_path(h_parent, &nd.path, pathname, PAGE_SIZE, sb);
8243 +       dentry = (void *)p;
8244 +       if (IS_ERR(p))
8245 +               goto out_pathname;
8246 +
8247 +       si_read_unlock(sb);
8248 +       err = vfsub_path_lookup(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
8249 +       dentry = ERR_PTR(err);
8250 +       if (unlikely(err))
8251 +               goto out_relock;
8252 +
8253 +       dentry = ERR_PTR(-ENOENT);
8254 +       AuDebugOn(au_test_anon(nd.path.dentry));
8255 +       if (unlikely(!nd.path.dentry->d_inode))
8256 +               goto out_nd;
8257 +
8258 +       if (ino != nd.path.dentry->d_inode->i_ino)
8259 +               dentry = au_lkup_by_ino(&nd.path, ino, /*nsi_lock*/NULL);
8260 +       else
8261 +               dentry = dget(nd.path.dentry);
8262 +
8263 + out_nd:
8264 +       path_put(&nd.path);
8265 + out_relock:
8266 +       if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
8267 +               if (!IS_ERR(dentry)) {
8268 +                       dput(dentry);
8269 +                       dentry = ERR_PTR(-ESTALE);
8270 +               }
8271 + out_pathname:
8272 +       free_page((unsigned long)pathname);
8273 + out_h_parent:
8274 +       dput(h_parent);
8275 + out:
8276 +       /* au_br_put(br); */
8277 +       AuTraceErrPtr(dentry);
8278 +       return dentry;
8279 +}
8280 +
8281 +/* ---------------------------------------------------------------------- */
8282 +
8283 +static struct dentry *
8284 +aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
8285 +                 int fh_type)
8286 +{
8287 +       struct dentry *dentry;
8288 +       __u32 *fh = fid->raw;
8289 +       ino_t ino, dir_ino;
8290 +       aufs_bindex_t bindex;
8291 +       struct au_nfsd_si_lock nsi_lock = {
8292 +               .sigen          = fh[Fh_sigen],
8293 +               .br_id          = fh[Fh_br_id],
8294 +               .force_lock     = 0
8295 +       };
8296 +
8297 +       AuDebugOn(fh_len < Fh_tail);
8298 +
8299 +       dentry = ERR_PTR(-ESTALE);
8300 +       /* branch id may be wrapped around */
8301 +       bindex = si_nfsd_read_lock(sb, &nsi_lock);
8302 +       if (unlikely(bindex < 0))
8303 +               goto out;
8304 +       nsi_lock.force_lock = 1;
8305 +
8306 +       /* is this inode still cached? */
8307 +       ino = decode_ino(fh + Fh_ino);
8308 +       AuDebugOn(ino == AUFS_ROOT_INO);
8309 +       dir_ino = decode_ino(fh + Fh_dir_ino);
8310 +       dentry = decode_by_ino(sb, ino, dir_ino);
8311 +       if (IS_ERR(dentry))
8312 +               goto out_unlock;
8313 +       if (dentry)
8314 +               goto accept;
8315 +
8316 +       /* is the parent dir cached? */
8317 +       dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
8318 +       if (IS_ERR(dentry))
8319 +               goto out_unlock;
8320 +       if (dentry)
8321 +               goto accept;
8322 +
8323 +       /* lookup path */
8324 +       dentry = decode_by_path(sb, bindex, ino, fh, fh_len, &nsi_lock);
8325 +       if (IS_ERR(dentry))
8326 +               goto out_unlock;
8327 +       if (unlikely(!dentry))
8328 +               /* todo?: make it ESTALE */
8329 +               goto out_unlock;
8330 +
8331 + accept:
8332 +       if (dentry->d_inode->i_generation == fh[Fh_igen])
8333 +               goto out_unlock; /* success */
8334 +
8335 +       dput(dentry);
8336 +       dentry = ERR_PTR(-ESTALE);
8337 + out_unlock:
8338 +       si_read_unlock(sb);
8339 + out:
8340 +       AuTraceErrPtr(dentry);
8341 +       return dentry;
8342 +}
8343 +
8344 +#if 0 /* reserved for future use */
8345 +/* support subtreecheck option */
8346 +static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
8347 +                                       int fh_len, int fh_type)
8348 +{
8349 +       struct dentry *parent;
8350 +       __u32 *fh = fid->raw;
8351 +       ino_t dir_ino;
8352 +
8353 +       dir_ino = decode_ino(fh + Fh_dir_ino);
8354 +       parent = decode_by_ino(sb, dir_ino, 0);
8355 +       if (IS_ERR(parent))
8356 +               goto out;
8357 +       if (!parent)
8358 +               parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
8359 +                                       dir_ino, fh, fh_len);
8360 +
8361 + out:
8362 +       AuTraceErrPtr(parent);
8363 +       return parent;
8364 +}
8365 +#endif
8366 +
8367 +/* ---------------------------------------------------------------------- */
8368 +
8369 +static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
8370 +                         int connectable)
8371 +{
8372 +       int err;
8373 +       aufs_bindex_t bindex, bend;
8374 +       struct super_block *sb, *h_sb;
8375 +       struct inode *inode;
8376 +       struct dentry *parent, *h_parent;
8377 +       struct au_branch *br;
8378 +
8379 +       AuDebugOn(au_test_anon(dentry));
8380 +
8381 +       parent = NULL;
8382 +       err = -ENOSPC;
8383 +       if (unlikely(*max_len <= Fh_tail)) {
8384 +               AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
8385 +               goto out;
8386 +       }
8387 +
8388 +       err = FILEID_ROOT;
8389 +       if (IS_ROOT(dentry)) {
8390 +               AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
8391 +               goto out;
8392 +       }
8393 +
8394 +       err = -EIO;
8395 +       h_parent = NULL;
8396 +       sb = dentry->d_sb;
8397 +       aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR);
8398 +       parent = dget_parent(dentry);
8399 +       di_read_lock_parent(parent, !AuLock_IR);
8400 +       inode = dentry->d_inode;
8401 +       AuDebugOn(!inode);
8402 +#ifdef CONFIG_AUFS_DEBUG
8403 +       if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
8404 +               AuWarn1("NFS-exporting requires xino\n");
8405 +#endif
8406 +
8407 +       bend = au_dbtaildir(parent);
8408 +       for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
8409 +               h_parent = au_h_dptr(parent, bindex);
8410 +               if (h_parent) {
8411 +                       dget(h_parent);
8412 +                       break;
8413 +               }
8414 +       }
8415 +       if (unlikely(!h_parent))
8416 +               goto out_unlock;
8417 +
8418 +       err = -EPERM;
8419 +       br = au_sbr(sb, bindex);
8420 +       h_sb = br->br_mnt->mnt_sb;
8421 +       if (unlikely(!h_sb->s_export_op)) {
8422 +               AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
8423 +               goto out_dput;
8424 +       }
8425 +
8426 +       fh[Fh_br_id] = br->br_id;
8427 +       fh[Fh_sigen] = au_sigen(sb);
8428 +       encode_ino(fh + Fh_ino, inode->i_ino);
8429 +       encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
8430 +       fh[Fh_igen] = inode->i_generation;
8431 +
8432 +       *max_len -= Fh_tail;
8433 +       fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
8434 +                                          max_len,
8435 +                                          /*connectable or subtreecheck*/0);
8436 +       err = fh[Fh_h_type];
8437 +       *max_len += Fh_tail;
8438 +       /* todo: macros? */
8439 +       if (err != 255)
8440 +               err = 99;
8441 +       else
8442 +               AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
8443 +
8444 + out_dput:
8445 +       dput(h_parent);
8446 + out_unlock:
8447 +       di_read_unlock(parent, !AuLock_IR);
8448 +       dput(parent);
8449 +       aufs_read_unlock(dentry, AuLock_IR);
8450 + out:
8451 +       if (unlikely(err < 0))
8452 +               err = 255;
8453 +       return err;
8454 +}
8455 +
8456 +/* ---------------------------------------------------------------------- */
8457 +
8458 +static struct export_operations aufs_export_op = {
8459 +       .fh_to_dentry   = aufs_fh_to_dentry,
8460 +       /* .fh_to_parent        = aufs_fh_to_parent, */
8461 +       .encode_fh      = aufs_encode_fh
8462 +};
8463 +
8464 +void au_export_init(struct super_block *sb)
8465 +{
8466 +       struct au_sbinfo *sbinfo;
8467 +       __u32 u;
8468 +
8469 +       sb->s_export_op = &aufs_export_op;
8470 +       sbinfo = au_sbi(sb);
8471 +       sbinfo->si_xigen = NULL;
8472 +       get_random_bytes(&u, sizeof(u));
8473 +       BUILD_BUG_ON(sizeof(u) != sizeof(int));
8474 +       atomic_set(&sbinfo->si_xigen_next, u);
8475 +}
8476 diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
8477 new file mode 100644
8478 index 0000000..867d1e1
8479 --- /dev/null
8480 +++ b/fs/aufs/f_op.c
8481 @@ -0,0 +1,810 @@
8482 +/*
8483 + * Copyright (C) 2005-2009 Junjiro R. Okajima
8484 + *
8485 + * This program, aufs is free software; you can redistribute it and/or modify
8486 + * it under the terms of the GNU General Public License as published by
8487 + * the Free Software Foundation; either version 2 of the License, or
8488 + * (at your option) any later version.
8489 + *
8490 + * This program is distributed in the hope that it will be useful,
8491 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8492 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
8493 + * GNU General Public License for more details.
8494 + *
8495 + * You should have received a copy of the GNU General Public License
8496 + * along with this program; if not, write to the Free Software
8497 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
8498 + */
8499 +
8500 +/*
8501 + * file and vm operations
8502 + */
8503 +
8504 +#include <linux/file.h>
8505 +#include <linux/fs_stack.h>
8506 +#include <linux/mm.h>
8507 +#include <linux/security.h>
8508 +#include "aufs.h"
8509 +
8510 +/* common function to regular file and dir */
8511 +int aufs_flush(struct file *file, fl_owner_t id)
8512 +{
8513 +       int err;
8514 +       aufs_bindex_t bindex, bend;
8515 +       struct dentry *dentry;
8516 +       struct file *h_file;
8517 +
8518 +       dentry = file->f_dentry;
8519 +       si_noflush_read_lock(dentry->d_sb);
8520 +       fi_read_lock(file);
8521 +       di_read_lock_child(dentry, AuLock_IW);
8522 +
8523 +       err = 0;
8524 +       bend = au_fbend(file);
8525 +       for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8526 +               h_file = au_h_fptr(file, bindex);
8527 +               if (!h_file || !h_file->f_op || !h_file->f_op->flush)
8528 +                       continue;
8529 +
8530 +               err = h_file->f_op->flush(h_file, id);
8531 +               if (!err)
8532 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
8533 +               /*ignore*/
8534 +       }
8535 +       au_cpup_attr_timesizes(dentry->d_inode);
8536 +
8537 +       di_read_unlock(dentry, AuLock_IW);
8538 +       fi_read_unlock(file);
8539 +       si_read_unlock(dentry->d_sb);
8540 +       return err;
8541 +}
8542 +
8543 +/* ---------------------------------------------------------------------- */
8544 +
8545 +int au_do_open_nondir(struct file *file, int flags)
8546 +{
8547 +       int err;
8548 +       aufs_bindex_t bindex;
8549 +       struct file *h_file;
8550 +       struct dentry *dentry;
8551 +       struct au_finfo *finfo;
8552 +
8553 +       FiMustWriteLock(file);
8554 +
8555 +       err = 0;
8556 +       dentry = file->f_dentry;
8557 +       finfo = au_fi(file);
8558 +       finfo->fi_h_vm_ops = NULL;
8559 +       finfo->fi_vm_ops = NULL;
8560 +       bindex = au_dbstart(dentry);
8561 +       /* O_TRUNC is processed already */
8562 +       BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
8563 +              && (flags & O_TRUNC));
8564 +
8565 +       h_file = au_h_open(dentry, bindex, flags, file);
8566 +       if (IS_ERR(h_file))
8567 +               err = PTR_ERR(h_file);
8568 +       else {
8569 +               au_set_fbstart(file, bindex);
8570 +               au_set_fbend(file, bindex);
8571 +               au_set_h_fptr(file, bindex, h_file);
8572 +               au_update_figen(file);
8573 +               /* todo: necessary? */
8574 +               /* file->f_ra = h_file->f_ra; */
8575 +       }
8576 +       return err;
8577 +}
8578 +
8579 +static int aufs_open_nondir(struct inode *inode __maybe_unused,
8580 +                           struct file *file)
8581 +{
8582 +       return au_do_open(file, au_do_open_nondir);
8583 +}
8584 +
8585 +int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
8586 +{
8587 +       kfree(au_fi(file)->fi_vm_ops);
8588 +       au_finfo_fin(file);
8589 +       return 0;
8590 +}
8591 +
8592 +/* ---------------------------------------------------------------------- */
8593 +
8594 +static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
8595 +                        loff_t *ppos)
8596 +{
8597 +       ssize_t err;
8598 +       struct dentry *dentry;
8599 +       struct file *h_file;
8600 +       struct super_block *sb;
8601 +
8602 +       dentry = file->f_dentry;
8603 +       sb = dentry->d_sb;
8604 +       si_read_lock(sb, AuLock_FLUSH);
8605 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8606 +       if (unlikely(err))
8607 +               goto out;
8608 +
8609 +       h_file = au_h_fptr(file, au_fbstart(file));
8610 +       err = vfsub_read_u(h_file, buf, count, ppos);
8611 +       /* todo: necessary? */
8612 +       /* file->f_ra = h_file->f_ra; */
8613 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8614 +
8615 +       di_read_unlock(dentry, AuLock_IR);
8616 +       fi_read_unlock(file);
8617 + out:
8618 +       si_read_unlock(sb);
8619 +       return err;
8620 +}
8621 +
8622 +static ssize_t aufs_write(struct file *file, const char __user *ubuf,
8623 +                         size_t count, loff_t *ppos)
8624 +{
8625 +       ssize_t err;
8626 +       aufs_bindex_t bstart;
8627 +       struct au_pin pin;
8628 +       struct dentry *dentry;
8629 +       struct inode *inode;
8630 +       struct super_block *sb;
8631 +       struct file *h_file;
8632 +       char __user *buf = (char __user *)ubuf;
8633 +
8634 +       dentry = file->f_dentry;
8635 +       sb = dentry->d_sb;
8636 +       inode = dentry->d_inode;
8637 +       mutex_lock(&inode->i_mutex);
8638 +       si_read_lock(sb, AuLock_FLUSH);
8639 +
8640 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8641 +       if (unlikely(err))
8642 +               goto out;
8643 +
8644 +       err = au_ready_to_write(file, -1, &pin);
8645 +       di_downgrade_lock(dentry, AuLock_IR);
8646 +       if (unlikely(err))
8647 +               goto out_unlock;
8648 +
8649 +       bstart = au_fbstart(file);
8650 +       h_file = au_h_fptr(file, bstart);
8651 +       au_unpin(&pin);
8652 +       err = vfsub_write_u(h_file, buf, count, ppos);
8653 +       au_cpup_attr_timesizes(inode);
8654 +       inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8655 +
8656 + out_unlock:
8657 +       di_read_unlock(dentry, AuLock_IR);
8658 +       fi_write_unlock(file);
8659 + out:
8660 +       si_read_unlock(sb);
8661 +       mutex_unlock(&inode->i_mutex);
8662 +       return err;
8663 +}
8664 +
8665 +static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
8666 +                        const struct iovec *iov, unsigned long nv, loff_t pos)
8667 +{
8668 +       ssize_t err;
8669 +       struct file *file;
8670 +
8671 +       err = security_file_permission(h_file, rw);
8672 +       if (unlikely(err))
8673 +               goto out;
8674 +
8675 +       file = kio->ki_filp;
8676 +       if (!is_sync_kiocb(kio)) {
8677 +               get_file(h_file);
8678 +               fput(file);
8679 +       }
8680 +       kio->ki_filp = h_file;
8681 +       if (rw == MAY_READ)
8682 +               err = h_file->f_op->aio_read(kio, iov, nv, pos);
8683 +       else if (rw == MAY_WRITE)
8684 +               err = h_file->f_op->aio_write(kio, iov, nv, pos);
8685 +       else
8686 +               BUG();
8687 +       /* do not restore kio->ki_filp */
8688 +
8689 + out:
8690 +       return err;
8691 +}
8692 +
8693 +static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
8694 +                            unsigned long nv, loff_t pos)
8695 +{
8696 +       ssize_t err;
8697 +       struct file *file, *h_file;
8698 +       struct dentry *dentry;
8699 +       struct super_block *sb;
8700 +
8701 +       file = kio->ki_filp;
8702 +       dentry = file->f_dentry;
8703 +       sb = dentry->d_sb;
8704 +       si_read_lock(sb, AuLock_FLUSH);
8705 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8706 +       if (unlikely(err))
8707 +               goto out;
8708 +
8709 +       err = -ENOSYS;
8710 +       h_file = au_h_fptr(file, au_fbstart(file));
8711 +       if (h_file->f_op && h_file->f_op->aio_read) {
8712 +               err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
8713 +               /* todo: necessary? */
8714 +               /* file->f_ra = h_file->f_ra; */
8715 +               fsstack_copy_attr_atime(dentry->d_inode,
8716 +                                       h_file->f_dentry->d_inode);
8717 +       } else
8718 +               /* currently there is no such fs */
8719 +               WARN_ON_ONCE(h_file->f_op && h_file->f_op->read);
8720 +
8721 +       di_read_unlock(dentry, AuLock_IR);
8722 +       fi_read_unlock(file);
8723 +
8724 + out:
8725 +       si_read_unlock(sb);
8726 +       return err;
8727 +}
8728 +
8729 +static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
8730 +                             unsigned long nv, loff_t pos)
8731 +{
8732 +       ssize_t err;
8733 +       struct au_pin pin;
8734 +       struct dentry *dentry;
8735 +       struct inode *inode;
8736 +       struct super_block *sb;
8737 +       struct file *file, *h_file;
8738 +
8739 +       file = kio->ki_filp;
8740 +       dentry = file->f_dentry;
8741 +       sb = dentry->d_sb;
8742 +       inode = dentry->d_inode;
8743 +       mutex_lock(&inode->i_mutex);
8744 +       si_read_lock(sb, AuLock_FLUSH);
8745 +
8746 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8747 +       if (unlikely(err))
8748 +               goto out;
8749 +
8750 +       err = au_ready_to_write(file, -1, &pin);
8751 +       di_downgrade_lock(dentry, AuLock_IR);
8752 +       if (unlikely(err))
8753 +               goto out_unlock;
8754 +
8755 +       err = -ENOSYS;
8756 +       h_file = au_h_fptr(file, au_fbstart(file));
8757 +       au_unpin(&pin);
8758 +       if (h_file->f_op && h_file->f_op->aio_write) {
8759 +               err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
8760 +               au_cpup_attr_timesizes(inode);
8761 +               inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8762 +       } else
8763 +               /* currently there is no such fs */
8764 +               WARN_ON_ONCE(h_file->f_op && h_file->f_op->write);
8765 +
8766 + out_unlock:
8767 +       di_read_unlock(dentry, AuLock_IR);
8768 +       fi_write_unlock(file);
8769 + out:
8770 +       si_read_unlock(sb);
8771 +       mutex_unlock(&inode->i_mutex);
8772 +       return err;
8773 +}
8774 +
8775 +static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
8776 +                               struct pipe_inode_info *pipe, size_t len,
8777 +                               unsigned int flags)
8778 +{
8779 +       ssize_t err;
8780 +       struct file *h_file;
8781 +       struct dentry *dentry;
8782 +       struct super_block *sb;
8783 +
8784 +       dentry = file->f_dentry;
8785 +       sb = dentry->d_sb;
8786 +       si_read_lock(sb, AuLock_FLUSH);
8787 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8788 +       if (unlikely(err))
8789 +               goto out;
8790 +
8791 +       err = -EINVAL;
8792 +       h_file = au_h_fptr(file, au_fbstart(file));
8793 +       if (au_test_loopback_kthread()) {
8794 +               file->f_mapping = h_file->f_mapping;
8795 +               smp_mb(); /* unnecessary? */
8796 +       }
8797 +       err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
8798 +       /* todo: necessasry? */
8799 +       /* file->f_ra = h_file->f_ra; */
8800 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8801 +
8802 +       di_read_unlock(dentry, AuLock_IR);
8803 +       fi_read_unlock(file);
8804 +
8805 + out:
8806 +       si_read_unlock(sb);
8807 +       return err;
8808 +}
8809 +
8810 +static ssize_t
8811 +aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
8812 +                 size_t len, unsigned int flags)
8813 +{
8814 +       ssize_t err;
8815 +       struct au_pin pin;
8816 +       struct dentry *dentry;
8817 +       struct inode *inode;
8818 +       struct super_block *sb;
8819 +       struct file *h_file;
8820 +
8821 +       dentry = file->f_dentry;
8822 +       inode = dentry->d_inode;
8823 +       mutex_lock(&inode->i_mutex);
8824 +       sb = dentry->d_sb;
8825 +       si_read_lock(sb, AuLock_FLUSH);
8826 +
8827 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8828 +       if (unlikely(err))
8829 +               goto out;
8830 +
8831 +       err = au_ready_to_write(file, -1, &pin);
8832 +       di_downgrade_lock(dentry, AuLock_IR);
8833 +       if (unlikely(err))
8834 +               goto out_unlock;
8835 +
8836 +       h_file = au_h_fptr(file, au_fbstart(file));
8837 +       au_unpin(&pin);
8838 +       err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
8839 +       au_cpup_attr_timesizes(inode);
8840 +       inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8841 +
8842 + out_unlock:
8843 +       di_read_unlock(dentry, AuLock_IR);
8844 +       fi_write_unlock(file);
8845 + out:
8846 +       si_read_unlock(sb);
8847 +       mutex_unlock(&inode->i_mutex);
8848 +       return err;
8849 +}
8850 +
8851 +/* ---------------------------------------------------------------------- */
8852 +
8853 +static struct file *au_safe_file(struct vm_area_struct *vma)
8854 +{
8855 +       struct file *file;
8856 +
8857 +       file = vma->vm_file;
8858 +       if (file->private_data && au_test_aufs(file->f_dentry->d_sb))
8859 +               return file;
8860 +       return NULL;
8861 +}
8862 +
8863 +static void au_reset_file(struct vm_area_struct *vma, struct file *file)
8864 +{
8865 +       vma->vm_file = file;
8866 +       /* smp_mb(); */ /* flush vm_file */
8867 +}
8868 +
8869 +static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
8870 +{
8871 +       int err;
8872 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8873 +       struct file *file, *h_file;
8874 +       struct au_finfo *finfo;
8875 +
8876 +       /* todo: non-robr mode, user vm_file as it is? */
8877 +       wait_event(wq, (file = au_safe_file(vma)));
8878 +
8879 +       /* do not revalidate, no si lock */
8880 +       finfo = au_fi(file);
8881 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8882 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8883 +
8884 +       mutex_lock(&finfo->fi_vm_mtx);
8885 +       vma->vm_file = h_file;
8886 +       err = finfo->fi_h_vm_ops->fault(vma, vmf);
8887 +       /* todo: necessary? */
8888 +       /* file->f_ra = h_file->f_ra; */
8889 +       au_reset_file(vma, file);
8890 +       mutex_unlock(&finfo->fi_vm_mtx);
8891 +#if 0 /* def CONFIG_SMP */
8892 +       /* wake_up_nr(&wq, online_cpu - 1); */
8893 +       wake_up_all(&wq);
8894 +#else
8895 +       wake_up(&wq);
8896 +#endif
8897 +
8898 +       return err;
8899 +}
8900 +
8901 +static int aufs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
8902 +{
8903 +       int err;
8904 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8905 +       struct file *file, *h_file;
8906 +       struct au_finfo *finfo;
8907 +
8908 +       wait_event(wq, (file = au_safe_file(vma)));
8909 +
8910 +       finfo = au_fi(file);
8911 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8912 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8913 +
8914 +       mutex_lock(&finfo->fi_vm_mtx);
8915 +       vma->vm_file = h_file;
8916 +       err = finfo->fi_h_vm_ops->page_mkwrite(vma, page);
8917 +       au_reset_file(vma, file);
8918 +       mutex_unlock(&finfo->fi_vm_mtx);
8919 +       wake_up(&wq);
8920 +
8921 +       return err;
8922 +}
8923 +
8924 +static void aufs_vm_close(struct vm_area_struct *vma)
8925 +{
8926 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8927 +       struct file *file, *h_file;
8928 +       struct au_finfo *finfo;
8929 +
8930 +       wait_event(wq, (file = au_safe_file(vma)));
8931 +
8932 +       finfo = au_fi(file);
8933 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8934 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8935 +
8936 +       mutex_lock(&finfo->fi_vm_mtx);
8937 +       vma->vm_file = h_file;
8938 +       finfo->fi_h_vm_ops->close(vma);
8939 +       au_reset_file(vma, file);
8940 +       mutex_unlock(&finfo->fi_vm_mtx);
8941 +       wake_up(&wq);
8942 +}
8943 +
8944 +static struct vm_operations_struct aufs_vm_ops = {
8945 +       /* .close and .page_mkwrite are not set by default */
8946 +       .fault          = aufs_fault,
8947 +};
8948 +
8949 +/* ---------------------------------------------------------------------- */
8950 +
8951 +static struct vm_operations_struct *au_vm_ops(struct file *h_file,
8952 +                                             struct vm_area_struct *vma)
8953 +{
8954 +       struct vm_operations_struct *vm_ops;
8955 +       int err;
8956 +
8957 +       /* todo: call security_file_mmap() here */
8958 +
8959 +       vm_ops = ERR_PTR(-ENODEV);
8960 +       if (!h_file->f_op || !h_file->f_op->mmap)
8961 +               goto out;
8962 +
8963 +       err = h_file->f_op->mmap(h_file, vma);
8964 +       vm_ops = ERR_PTR(err);
8965 +       if (unlikely(err))
8966 +               goto out;
8967 +
8968 +       vm_ops = vma->vm_ops;
8969 +       err = do_munmap(current->mm, vma->vm_start,
8970 +                       vma->vm_end - vma->vm_start);
8971 +       if (unlikely(err)) {
8972 +               AuIOErr("failed internal unmapping %.*s, %d\n",
8973 +                       AuDLNPair(h_file->f_dentry), err);
8974 +               vm_ops = ERR_PTR(-EIO);
8975 +       }
8976 +
8977 + out:
8978 +       return vm_ops;
8979 +}
8980 +
8981 +static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
8982 +{
8983 +       int err;
8984 +       struct vm_operations_struct *h_ops;
8985 +
8986 +       AuRwMustAnyLock(&finfo->fi_rwsem);
8987 +
8988 +       err = 0;
8989 +       h_ops = finfo->fi_h_vm_ops;
8990 +       AuDebugOn(!h_ops);
8991 +       if ((!h_ops->page_mkwrite && !h_ops->close)
8992 +           || finfo->fi_vm_ops)
8993 +               goto out;
8994 +
8995 +       err = -ENOMEM;
8996 +       finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS);
8997 +       if (unlikely(!finfo->fi_vm_ops))
8998 +               goto out;
8999 +
9000 +       err = 0;
9001 +       if (h_ops->page_mkwrite)
9002 +               finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite;
9003 +       if (h_ops->close)
9004 +               finfo->fi_vm_ops->close = aufs_vm_close;
9005 +
9006 +       vma->vm_ops = finfo->fi_vm_ops;
9007 +
9008 + out:
9009 +       return err;
9010 +}
9011 +
9012 +static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
9013 +{
9014 +       int err;
9015 +       unsigned char wlock, mmapped;
9016 +       struct dentry *dentry;
9017 +       struct super_block *sb;
9018 +       struct file *h_file;
9019 +       struct vm_operations_struct *vm_ops;
9020 +
9021 +       dentry = file->f_dentry;
9022 +       wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
9023 +       sb = dentry->d_sb;
9024 +       si_read_lock(sb, AuLock_FLUSH);
9025 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
9026 +       if (unlikely(err))
9027 +               goto out;
9028 +
9029 +       mmapped = !!au_test_mmapped(file);
9030 +       if (wlock) {
9031 +               struct au_pin pin;
9032 +
9033 +               err = au_ready_to_write(file, -1, &pin);
9034 +               di_downgrade_lock(dentry, AuLock_IR);
9035 +               if (unlikely(err))
9036 +                       goto out_unlock;
9037 +               au_unpin(&pin);
9038 +       } else
9039 +               di_downgrade_lock(dentry, AuLock_IR);
9040 +
9041 +       h_file = au_h_fptr(file, au_fbstart(file));
9042 +       if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
9043 +               /*
9044 +                * by this assignment, f_mapping will differs from aufs inode
9045 +                * i_mapping.
9046 +                * if someone else mixes the use of f_dentry->d_inode and
9047 +                * f_mapping->host, then a problem may arise.
9048 +                */
9049 +               file->f_mapping = h_file->f_mapping;
9050 +       }
9051 +
9052 +       vm_ops = NULL;
9053 +       if (!mmapped) {
9054 +               vm_ops = au_vm_ops(h_file, vma);
9055 +               err = PTR_ERR(vm_ops);
9056 +               if (IS_ERR(vm_ops))
9057 +                       goto out_unlock;
9058 +       }
9059 +
9060 +       /*
9061 +        * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
9062 +        * currently MAP_DENYWRITE from userspace is ignored, but elf loader
9063 +        * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
9064 +        * both of the aufs file and the lower file is deny_write_access()-ed.
9065 +        * finally I hope we can skip handlling MAP_DENYWRITE here.
9066 +        */
9067 +       err = generic_file_mmap(file, vma);
9068 +       if (unlikely(err))
9069 +               goto out_unlock;
9070 +
9071 +       vma->vm_ops = &aufs_vm_ops;
9072 +       if (!mmapped) {
9073 +               struct au_finfo *finfo = au_fi(file);
9074 +
9075 +               finfo->fi_h_vm_ops = vm_ops;
9076 +               mutex_init(&finfo->fi_vm_mtx);
9077 +       }
9078 +
9079 +       err = au_custom_vm_ops(au_fi(file), vma);
9080 +       if (unlikely(err))
9081 +               goto out_unlock;
9082 +
9083 +       vfsub_file_accessed(h_file);
9084 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
9085 +
9086 + out_unlock:
9087 +       di_read_unlock(dentry, AuLock_IR);
9088 +       fi_write_unlock(file);
9089 + out:
9090 +       si_read_unlock(sb);
9091 +       return err;
9092 +}
9093 +
9094 +/* ---------------------------------------------------------------------- */
9095 +
9096 +static int aufs_fsync_nondir(struct file *file, struct dentry *dentry,
9097 +                            int datasync)
9098 +{
9099 +       int err;
9100 +       struct au_pin pin;
9101 +       struct inode *inode;
9102 +       struct file *h_file;
9103 +       struct super_block *sb;
9104 +
9105 +       inode = dentry->d_inode;
9106 +       IMustLock(file->f_mapping->host);
9107 +       if (inode != file->f_mapping->host) {
9108 +               mutex_unlock(&file->f_mapping->host->i_mutex);
9109 +               mutex_lock(&inode->i_mutex);
9110 +       }
9111 +       IMustLock(inode);
9112 +
9113 +       sb = dentry->d_sb;
9114 +       si_read_lock(sb, AuLock_FLUSH);
9115 +
9116 +       err = 0; /* -EBADF; */ /* posix? */
9117 +       if (unlikely(!(file->f_mode & FMODE_WRITE)))
9118 +               goto out;
9119 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
9120 +       if (unlikely(err))
9121 +               goto out;
9122 +
9123 +       err = au_ready_to_write(file, -1, &pin);
9124 +       di_downgrade_lock(dentry, AuLock_IR);
9125 +       if (unlikely(err))
9126 +               goto out_unlock;
9127 +       au_unpin(&pin);
9128 +
9129 +       err = -EINVAL;
9130 +       h_file = au_h_fptr(file, au_fbstart(file));
9131 +       if (h_file->f_op && h_file->f_op->fsync) {
9132 +               struct dentry *h_d;
9133 +               struct mutex *h_mtx;
9134 +
9135 +               /*
9136 +                * no filemap_fdatawrite() since aufs file has no its own
9137 +                * mapping, but dir.
9138 +                */
9139 +               h_d = h_file->f_dentry;
9140 +               h_mtx = &h_d->d_inode->i_mutex;
9141 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
9142 +               err = h_file->f_op->fsync(h_file, h_d, datasync);
9143 +               if (!err)
9144 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
9145 +               /*ignore*/
9146 +               au_cpup_attr_timesizes(inode);
9147 +               mutex_unlock(h_mtx);
9148 +       }
9149 +
9150 + out_unlock:
9151 +       di_read_unlock(dentry, AuLock_IR);
9152 +       fi_write_unlock(file);
9153 + out:
9154 +       si_read_unlock(sb);
9155 +       if (inode != file->f_mapping->host) {
9156 +               mutex_unlock(&inode->i_mutex);
9157 +               mutex_lock(&file->f_mapping->host->i_mutex);
9158 +       }
9159 +       return err;
9160 +}
9161 +
9162 +/* no one supports this operation, currently */
9163 +#if 0
9164 +static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
9165 +{
9166 +       int err;
9167 +       struct au_pin pin;
9168 +       struct dentry *dentry;
9169 +       struct inode *inode;
9170 +       struct file *file, *h_file;
9171 +       struct super_block *sb;
9172 +
9173 +       file = kio->ki_filp;
9174 +       dentry = file->f_dentry;
9175 +       inode = dentry->d_inode;
9176 +       mutex_lock(&inode->i_mutex);
9177 +
9178 +       sb = dentry->d_sb;
9179 +       si_read_lock(sb, AuLock_FLUSH);
9180 +
9181 +       err = 0; /* -EBADF; */ /* posix? */
9182 +       if (unlikely(!(file->f_mode & FMODE_WRITE)))
9183 +               goto out;
9184 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
9185 +       if (unlikely(err))
9186 +               goto out;
9187 +
9188 +       err = au_ready_to_write(file, -1, &pin);
9189 +       di_downgrade_lock(dentry, AuLock_IR);
9190 +       if (unlikely(err))
9191 +               goto out_unlock;
9192 +       au_unpin(&pin);
9193 +
9194 +       err = -ENOSYS;
9195 +       h_file = au_h_fptr(file, au_fbstart(file));
9196 +       if (h_file->f_op && h_file->f_op->aio_fsync) {
9197 +               struct dentry *h_d;
9198 +               struct mutex *h_mtx;
9199 +
9200 +               h_d = h_file->f_dentry;
9201 +               h_mtx = &h_d->d_inode->i_mutex;
9202 +               if (!is_sync_kiocb(kio)) {
9203 +                       get_file(h_file);
9204 +                       fput(file);
9205 +               }
9206 +               kio->ki_filp = h_file;
9207 +               err = h_file->f_op->aio_fsync(kio, datasync);
9208 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
9209 +               if (!err)
9210 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
9211 +               /*ignore*/
9212 +               au_cpup_attr_timesizes(inode);
9213 +               mutex_unlock(h_mtx);
9214 +       }
9215 +
9216 + out_unlock:
9217 +       di_read_unlock(dentry, AuLock_IR);
9218 +       fi_write_unlock(file);
9219 + out:
9220 +       si_read_unlock(sb);
9221 +       mutex_unlock(&inode->i_mutex);
9222 +       return err;
9223 +}
9224 +#endif
9225 +
9226 +static int aufs_fasync(int fd, struct file *file, int flag)
9227 +{
9228 +       int err;
9229 +       struct file *h_file;
9230 +       struct dentry *dentry;
9231 +       struct super_block *sb;
9232 +
9233 +       dentry = file->f_dentry;
9234 +       sb = dentry->d_sb;
9235 +       si_read_lock(sb, AuLock_FLUSH);
9236 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
9237 +       if (unlikely(err))
9238 +               goto out;
9239 +
9240 +       h_file = au_h_fptr(file, au_fbstart(file));
9241 +       if (h_file->f_op && h_file->f_op->fasync)
9242 +               err = h_file->f_op->fasync(fd, h_file, flag);
9243 +
9244 +       di_read_unlock(dentry, AuLock_IR);
9245 +       fi_read_unlock(file);
9246 +
9247 + out:
9248 +       si_read_unlock(sb);
9249 +       return err;
9250 +}
9251 +
9252 +/* ---------------------------------------------------------------------- */
9253 +
9254 +/* no one supports this operation, currently */
9255 +#if 0
9256 +static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
9257 +                            size_t len, loff_t *pos , int more)
9258 +{
9259 +}
9260 +#endif
9261 +
9262 +/* ---------------------------------------------------------------------- */
9263 +
9264 +struct file_operations aufs_file_fop = {
9265 +       /*
9266 +        * while generic_file_llseek/_unlocked() don't use BKL,
9267 +        * don't use it since it operates file->f_mapping->host.
9268 +        * in aufs, it may be a real file and may confuse users by UDBA.
9269 +        */
9270 +       /* .llseek              = generic_file_llseek, */
9271 +
9272 +       .read           = aufs_read,
9273 +       .write          = aufs_write,
9274 +       .aio_read       = aufs_aio_read,
9275 +       .aio_write      = aufs_aio_write,
9276 +       .unlocked_ioctl = aufs_ioctl_nondir,
9277 +       .mmap           = aufs_mmap,
9278 +       .open           = aufs_open_nondir,
9279 +       .flush          = aufs_flush,
9280 +       .release        = aufs_release_nondir,
9281 +       .fsync          = aufs_fsync_nondir,
9282 +       /* .aio_fsync   = aufs_aio_fsync_nondir, */
9283 +       .fasync         = aufs_fasync,
9284 +       /* .sendpage    = aufs_sendpage, */
9285 +       .splice_write   = aufs_splice_write,
9286 +       .splice_read    = aufs_splice_read,
9287 +#if 0
9288 +       .aio_splice_write = aufs_aio_splice_write,
9289 +       .aio_splice_read  = aufs_aio_splice_read
9290 +#endif
9291 +};
9292 diff --git a/fs/aufs/f_op_sp.c b/fs/aufs/f_op_sp.c
9293 new file mode 100644
9294 index 0000000..03fddc0
9295 --- /dev/null
9296 +++ b/fs/aufs/f_op_sp.c
9297 @@ -0,0 +1,290 @@
9298 +/*
9299 + * Copyright (C) 2005-2009 Junjiro R. Okajima
9300 + *
9301 + * This program, aufs is free software; you can redistribute it and/or modify
9302 + * it under the terms of the GNU General Public License as published by
9303 + * the Free Software Foundation; either version 2 of the License, or
9304 + * (at your option) any later version.
9305 + *
9306 + * This program is distributed in the hope that it will be useful,
9307 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9308 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9309 + * GNU General Public License for more details.
9310 + *
9311 + * You should have received a copy of the GNU General Public License
9312 + * along with this program; if not, write to the Free Software
9313 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
9314 + */
9315 +
9316 +/*
9317 + * file operations for special files.
9318 + * while they exist in aufs virtually,
9319 + * their file I/O is handled out of aufs.
9320 + */
9321 +
9322 +#include <linux/fs_stack.h>
9323 +#include "aufs.h"
9324 +
9325 +static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
9326 +                               unsigned long nv, loff_t pos)
9327 +{
9328 +       ssize_t err;
9329 +       aufs_bindex_t bstart;
9330 +       unsigned char wbr;
9331 +       struct file *file, *h_file;
9332 +       struct super_block *sb;
9333 +
9334 +       file = kio->ki_filp;
9335 +       sb = file->f_dentry->d_sb;
9336 +       si_read_lock(sb, AuLock_FLUSH);
9337 +       fi_read_lock(file);
9338 +       bstart = au_fbstart(file);
9339 +       h_file = au_h_fptr(file, bstart);
9340 +       fi_read_unlock(file);
9341 +       wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
9342 +       si_read_unlock(sb);
9343 +
9344 +       /* do not change the file in kio */
9345 +       AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
9346 +       err = h_file->f_op->aio_read(kio, iov, nv, pos);
9347 +       if (err > 0 && wbr)
9348 +               file_accessed(h_file);
9349 +
9350 +       return err;
9351 +}
9352 +
9353 +static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
9354 +                                unsigned long nv, loff_t pos)
9355 +{
9356 +       ssize_t err;
9357 +       aufs_bindex_t bstart;
9358 +       unsigned char wbr;
9359 +       struct super_block *sb;
9360 +       struct file *file, *h_file;
9361 +
9362 +       file = kio->ki_filp;
9363 +       sb = file->f_dentry->d_sb;
9364 +       si_read_lock(sb, AuLock_FLUSH);
9365 +       fi_read_lock(file);
9366 +       bstart = au_fbstart(file);
9367 +       h_file = au_h_fptr(file, bstart);
9368 +       fi_read_unlock(file);
9369 +       wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
9370 +       si_read_unlock(sb);
9371 +
9372 +       /* do not change the file in kio */
9373 +       AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
9374 +       err = h_file->f_op->aio_write(kio, iov, nv, pos);
9375 +       if (err > 0 && wbr)
9376 +               file_update_time(h_file);
9377 +
9378 +       return err;
9379 +}
9380 +
9381 +/* ---------------------------------------------------------------------- */
9382 +
9383 +static int aufs_release_sp(struct inode *inode, struct file *file)
9384 +{
9385 +       int err;
9386 +       struct file *h_file;
9387 +
9388 +       fi_read_lock(file);
9389 +       h_file = au_h_fptr(file, au_fbstart(file));
9390 +       fi_read_unlock(file);
9391 +       /* close this fifo in aufs */
9392 +       err = h_file->f_op->release(inode, file); /* ignore */
9393 +       aufs_release_nondir(inode, file); /* ignore */
9394 +       return err;
9395 +}
9396 +
9397 +/* ---------------------------------------------------------------------- */
9398 +
9399 +/* currently, support only FIFO */
9400 +enum {AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
9401 +      /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
9402 +      AuSp_Last};
9403 +static int aufs_open_sp(struct inode *inode, struct file *file);
9404 +static struct au_sp_fop {
9405 +       int                     done;
9406 +       struct file_operations  fop;    /* not 'const' */
9407 +       spinlock_t              spin;
9408 +} au_sp_fop[AuSp_Last] = {
9409 +       [AuSp_FIFO] = {
9410 +               .fop    = {
9411 +                       .open   = aufs_open_sp
9412 +               }
9413 +       }
9414 +};
9415 +
9416 +static void au_init_fop_sp(struct file *file)
9417 +{
9418 +       struct au_sp_fop *p;
9419 +       int i;
9420 +       struct file *h_file;
9421 +
9422 +       p = au_sp_fop;
9423 +       if (unlikely(!p->done)) {
9424 +               /* initialize first time only */
9425 +               static DEFINE_SPINLOCK(spin);
9426 +
9427 +               spin_lock(&spin);
9428 +               if (!p->done) {
9429 +                       BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
9430 +                                    != AuSp_Last);
9431 +                       for (i = 0; i < AuSp_Last; i++)
9432 +                               spin_lock_init(&p[i].spin);
9433 +                       p->done = 1;
9434 +               }
9435 +               spin_unlock(&spin);
9436 +       }
9437 +
9438 +       switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
9439 +       case FMODE_READ:
9440 +               i = AuSp_FIFO_R;
9441 +               break;
9442 +       case FMODE_WRITE:
9443 +               i = AuSp_FIFO_W;
9444 +               break;
9445 +       case FMODE_READ | FMODE_WRITE:
9446 +               i = AuSp_FIFO_RW;
9447 +               break;
9448 +       default:
9449 +               BUG();
9450 +       }
9451 +
9452 +       p += i;
9453 +       if (unlikely(!p->done)) {
9454 +               /* initialize first time only */
9455 +               h_file = au_h_fptr(file, au_fbstart(file));
9456 +               spin_lock(&p->spin);
9457 +               if (!p->done) {
9458 +                       p->fop = *h_file->f_op;
9459 +                       if (p->fop.aio_read)
9460 +                               p->fop.aio_read = aufs_aio_read_sp;
9461 +                       if (p->fop.aio_write)
9462 +                               p->fop.aio_write = aufs_aio_write_sp;
9463 +                       p->fop.release = aufs_release_sp;
9464 +                       p->done = 1;
9465 +               }
9466 +               spin_unlock(&p->spin);
9467 +       }
9468 +       file->f_op = &p->fop;
9469 +}
9470 +
9471 +static int au_cpup_sp(struct dentry *dentry)
9472 +{
9473 +       int err;
9474 +       aufs_bindex_t bcpup;
9475 +       struct au_pin pin;
9476 +       struct au_wr_dir_args wr_dir_args = {
9477 +               .force_btgt     = -1,
9478 +               .flags          = 0
9479 +       };
9480 +
9481 +       AuDbg("%.*s\n", AuDLNPair(dentry));
9482 +
9483 +       di_read_unlock(dentry, AuLock_IR);
9484 +       di_write_lock_child(dentry);
9485 +       err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
9486 +       if (unlikely(err < 0))
9487 +               goto out;
9488 +       bcpup = err;
9489 +       err = 0;
9490 +       if (bcpup == au_dbstart(dentry))
9491 +               goto out; /* success */
9492 +
9493 +       err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
9494 +                    AuPin_MNT_WRITE);
9495 +       if (!err) {
9496 +               err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
9497 +               au_unpin(&pin);
9498 +       }
9499 +
9500 + out:
9501 +       di_downgrade_lock(dentry, AuLock_IR);
9502 +       return err;
9503 +}
9504 +
9505 +static int au_do_open_sp(struct file *file, int flags)
9506 +{
9507 +       int err;
9508 +       struct dentry *dentry;
9509 +       struct super_block *sb;
9510 +       struct file *h_file;
9511 +       struct inode *h_inode;
9512 +
9513 +       dentry = file->f_dentry;
9514 +       AuDbg("%.*s\n", AuDLNPair(dentry));
9515 +
9516 +       /*
9517 +        * try copying-up.
9518 +        * operate on the ro branch is not an error.
9519 +        */
9520 +       au_cpup_sp(dentry); /* ignore */
9521 +
9522 +       /* prepare h_file */
9523 +       err = au_do_open_nondir(file, file->f_flags);
9524 +       if (unlikely(err))
9525 +               goto out;
9526 +
9527 +       sb = dentry->d_sb;
9528 +       h_file = au_h_fptr(file, au_fbstart(file));
9529 +       h_inode = h_file->f_dentry->d_inode;
9530 +       di_read_unlock(dentry, AuLock_IR);
9531 +       fi_write_unlock(file);
9532 +       si_read_unlock(sb);
9533 +       /* open this fifo in aufs */
9534 +       err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
9535 +       si_noflush_read_lock(sb);
9536 +       fi_write_lock(file);
9537 +       di_read_lock_child(dentry, AuLock_IR);
9538 +       if (!err)
9539 +               au_init_fop_sp(file);
9540 +       else
9541 +               au_finfo_fin(file);
9542 +
9543 + out:
9544 +       return err;
9545 +}
9546 +
9547 +static int aufs_open_sp(struct inode *inode, struct file *file)
9548 +{
9549 +       return au_do_open(file, au_do_open_sp);
9550 +}
9551 +
9552 +/* ---------------------------------------------------------------------- */
9553 +
9554 +void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
9555 +{
9556 +       init_special_inode(inode, mode, rdev);
9557 +
9558 +       switch (mode & S_IFMT) {
9559 +       case S_IFIFO:
9560 +               inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
9561 +               /*FALLTHROUGH*/
9562 +       case S_IFCHR:
9563 +       case S_IFBLK:
9564 +       case S_IFSOCK:
9565 +               break;
9566 +       default:
9567 +               AuDebugOn(1);
9568 +       }
9569 +}
9570 +
9571 +int au_special_file(umode_t mode)
9572 +{
9573 +       int ret;
9574 +
9575 +       ret = 0;
9576 +       switch (mode & S_IFMT) {
9577 +       case S_IFIFO:
9578 +#if 0
9579 +       case S_IFCHR:
9580 +       case S_IFBLK:
9581 +       case S_IFSOCK:
9582 +#endif
9583 +               ret = 1;
9584 +       }
9585 +
9586 +       return ret;
9587 +}
9588 diff --git a/fs/aufs/file.c b/fs/aufs/file.c
9589 new file mode 100644
9590 index 0000000..cc24802
9591 --- /dev/null
9592 +++ b/fs/aufs/file.c
9593 @@ -0,0 +1,620 @@
9594 +/*
9595 + * Copyright (C) 2005-2009 Junjiro R. Okajima
9596 + *
9597 + * This program, aufs is free software; you can redistribute it and/or modify
9598 + * it under the terms of the GNU General Public License as published by
9599 + * the Free Software Foundation; either version 2 of the License, or
9600 + * (at your option) any later version.
9601 + *
9602 + * This program is distributed in the hope that it will be useful,
9603 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9604 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9605 + * GNU General Public License for more details.
9606 + *
9607 + * You should have received a copy of the GNU General Public License
9608 + * along with this program; if not, write to the Free Software
9609 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
9610 + */
9611 +
9612 +/*
9613 + * handling file/dir, and address_space operation
9614 + */
9615 +
9616 +#include <linux/file.h>
9617 +#include <linux/fsnotify.h>
9618 +#include <linux/namei.h>
9619 +#include <linux/pagemap.h>
9620 +#include "aufs.h"
9621 +
9622 +/*
9623 + * a dirty trick for handling deny_write_access().
9624 + * because FMODE_EXEC flag is not passed to f_op->open(),
9625 + * set it to file->private_data temporary.
9626 + */
9627 +void au_store_oflag(struct nameidata *nd, struct inode *inode)
9628 +{
9629 +       if (nd
9630 +           /* && !(nd->flags & LOOKUP_CONTINUE) */
9631 +           && (nd->flags & LOOKUP_OPEN)
9632 +           && (nd->intent.open.flags & FMODE_EXEC)
9633 +           && inode
9634 +           && S_ISREG(inode->i_mode)) {
9635 +               /* suppress a warning in lp64 */
9636 +               unsigned long flags = nd->intent.open.flags;
9637 +               nd->intent.open.file->private_data = (void *)flags;
9638 +               /* smp_mb(); */
9639 +       }
9640 +}
9641 +
9642 +/* drop flags for writing */
9643 +unsigned int au_file_roflags(unsigned int flags)
9644 +{
9645 +       flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9646 +       flags |= O_RDONLY | O_NOATIME;
9647 +       return flags;
9648 +}
9649 +
9650 +/* common functions to regular file and dir */
9651 +struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9652 +                      struct file *file)
9653 +{
9654 +       struct file *h_file;
9655 +       struct dentry *h_dentry;
9656 +       struct inode *h_inode;
9657 +       struct super_block *sb;
9658 +       struct au_branch *br;
9659 +       int err;
9660 +
9661 +       /* a race condition can happen between open and unlink/rmdir */
9662 +       h_file = ERR_PTR(-ENOENT);
9663 +       h_dentry = au_h_dptr(dentry, bindex);
9664 +       if (au_test_nfsd(current) && !h_dentry)
9665 +               goto out;
9666 +       h_inode = h_dentry->d_inode;
9667 +       if (au_test_nfsd(current) && !h_inode)
9668 +               goto out;
9669 +       if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry))
9670 +                    || !h_inode))
9671 +               goto out;
9672 +
9673 +       sb = dentry->d_sb;
9674 +       br = au_sbr(sb, bindex);
9675 +       h_file = ERR_PTR(-EACCES);
9676 +       if (file && (file->f_mode & FMODE_EXEC)
9677 +           && (br->br_mnt->mnt_flags & MNT_NOEXEC))
9678 +               goto out;
9679 +
9680 +       /* drop flags for writing */
9681 +       if (au_test_ro(sb, bindex, dentry->d_inode))
9682 +               flags = au_file_roflags(flags);
9683 +       flags &= ~O_CREAT;
9684 +       atomic_inc(&br->br_count);
9685 +       if (!au_special_file(h_inode->i_mode))
9686 +               h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags);
9687 +       else {
9688 +               /* this block depends upon the configuration */
9689 +               di_read_unlock(dentry, AuLock_IR);
9690 +               fi_write_unlock(file);
9691 +               si_read_unlock(sb);
9692 +               h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags);
9693 +               si_noflush_read_lock(sb);
9694 +               fi_write_lock(file);
9695 +               di_read_lock_child(dentry, AuLock_IR);
9696 +       }
9697 +       if (IS_ERR(h_file))
9698 +               goto out_br;
9699 +
9700 +       if (file && (file->f_mode & FMODE_EXEC)) {
9701 +               h_file->f_mode |= FMODE_EXEC;
9702 +               err = deny_write_access(h_file);
9703 +               if (unlikely(err)) {
9704 +                       fput(h_file);
9705 +                       h_file = ERR_PTR(err);
9706 +                       goto out_br;
9707 +               }
9708 +       }
9709 +       fsnotify_open(h_dentry);
9710 +       goto out; /* success */
9711 +
9712 + out_br:
9713 +       atomic_dec(&br->br_count);
9714 + out:
9715 +       return h_file;
9716 +}
9717 +
9718 +int au_do_open(struct file *file, int (*open)(struct file *file, int flags))
9719 +{
9720 +       int err;
9721 +       struct dentry *dentry;
9722 +       struct super_block *sb;
9723 +
9724 +       dentry = file->f_dentry;
9725 +       sb = dentry->d_sb;
9726 +       si_read_lock(sb, AuLock_FLUSH);
9727 +       err = au_finfo_init(file);
9728 +       if (unlikely(err))
9729 +               goto out;
9730 +
9731 +       di_read_lock_child(dentry, AuLock_IR);
9732 +       err = open(file, file->f_flags);
9733 +       di_read_unlock(dentry, AuLock_IR);
9734 +
9735 +       fi_write_unlock(file);
9736 +       if (unlikely(err))
9737 +               au_finfo_fin(file);
9738 + out:
9739 +       si_read_unlock(sb);
9740 +       return err;
9741 +}
9742 +
9743 +int au_reopen_nondir(struct file *file)
9744 +{
9745 +       int err;
9746 +       aufs_bindex_t bstart, bindex, bend;
9747 +       struct dentry *dentry;
9748 +       struct file *h_file, *h_file_tmp;
9749 +
9750 +       dentry = file->f_dentry;
9751 +       AuDebugOn(au_special_file(dentry->d_inode->i_mode));
9752 +       bstart = au_dbstart(dentry);
9753 +       h_file_tmp = NULL;
9754 +       if (au_fbstart(file) == bstart) {
9755 +               h_file = au_h_fptr(file, bstart);
9756 +               if (file->f_mode == h_file->f_mode)
9757 +                       return 0; /* success */
9758 +               h_file_tmp = h_file;
9759 +               get_file(h_file_tmp);
9760 +               au_set_h_fptr(file, bstart, NULL);
9761 +       }
9762 +       AuDebugOn(au_fbstart(file) < bstart
9763 +                 || au_fi(file)->fi_hfile[0 + bstart].hf_file);
9764 +
9765 +       h_file = au_h_open(dentry, bstart, file->f_flags & ~O_TRUNC, file);
9766 +       err = PTR_ERR(h_file);
9767 +       if (IS_ERR(h_file))
9768 +               goto out; /* todo: close all? */
9769 +
9770 +       err = 0;
9771 +       au_set_fbstart(file, bstart);
9772 +       au_set_h_fptr(file, bstart, h_file);
9773 +       au_update_figen(file);
9774 +       /* todo: necessary? */
9775 +       /* file->f_ra = h_file->f_ra; */
9776 +
9777 +       /* close lower files */
9778 +       bend = au_fbend(file);
9779 +       for (bindex = bstart + 1; bindex <= bend; bindex++)
9780 +               au_set_h_fptr(file, bindex, NULL);
9781 +       au_set_fbend(file, bstart);
9782 +
9783 + out:
9784 +       if (h_file_tmp)
9785 +               fput(h_file_tmp);
9786 +       return err;
9787 +}
9788 +
9789 +/* ---------------------------------------------------------------------- */
9790 +
9791 +static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
9792 +                       struct dentry *hi_wh)
9793 +{
9794 +       int err;
9795 +       aufs_bindex_t bstart;
9796 +       struct au_dinfo *dinfo;
9797 +       struct dentry *h_dentry;
9798 +
9799 +       dinfo = au_di(file->f_dentry);
9800 +       AuRwMustWriteLock(&dinfo->di_rwsem);
9801 +
9802 +       bstart = dinfo->di_bstart;
9803 +       dinfo->di_bstart = btgt;
9804 +       h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry;
9805 +       dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh;
9806 +       err = au_reopen_nondir(file);
9807 +       dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry;
9808 +       dinfo->di_bstart = bstart;
9809 +
9810 +       return err;
9811 +}
9812 +
9813 +static int au_ready_to_write_wh(struct file *file, loff_t len,
9814 +                               aufs_bindex_t bcpup)
9815 +{
9816 +       int err;
9817 +       struct inode *inode;
9818 +       struct dentry *dentry, *hi_wh;
9819 +       struct super_block *sb;
9820 +
9821 +       dentry = file->f_dentry;
9822 +       inode = dentry->d_inode;
9823 +       hi_wh = au_hi_wh(inode, bcpup);
9824 +       if (!hi_wh)
9825 +               err = au_sio_cpup_wh(dentry, bcpup, len, file);
9826 +       else
9827 +               /* already copied-up after unlink */
9828 +               err = au_reopen_wh(file, bcpup, hi_wh);
9829 +
9830 +       sb = dentry->d_sb;
9831 +       if (!err && inode->i_nlink > 1 && au_opt_test(au_mntflags(sb), PLINK))
9832 +               au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
9833 +
9834 +       return err;
9835 +}
9836 +
9837 +/*
9838 + * prepare the @file for writing.
9839 + */
9840 +int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
9841 +{
9842 +       int err;
9843 +       aufs_bindex_t bstart, bcpup;
9844 +       struct dentry *dentry, *parent, *h_dentry;
9845 +       struct inode *h_inode, *inode;
9846 +       struct super_block *sb;
9847 +
9848 +       dentry = file->f_dentry;
9849 +       sb = dentry->d_sb;
9850 +       bstart = au_fbstart(file);
9851 +       inode = dentry->d_inode;
9852 +       AuDebugOn(au_special_file(inode->i_mode));
9853 +       err = au_test_ro(sb, bstart, inode);
9854 +       if (!err && (au_h_fptr(file, bstart)->f_mode & FMODE_WRITE)) {
9855 +               err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
9856 +               goto out;
9857 +       }
9858 +
9859 +       /* need to cpup */
9860 +       parent = dget_parent(dentry);
9861 +       di_write_lock_parent(parent);
9862 +       err = AuWbrCopyup(au_sbi(sb), dentry);
9863 +       bcpup = err;
9864 +       if (unlikely(err < 0))
9865 +               goto out_dgrade;
9866 +       err = 0;
9867 +
9868 +       if (!au_h_dptr(parent, bcpup)) {
9869 +               err = au_cpup_dirs(dentry, bcpup);
9870 +               if (unlikely(err))
9871 +                       goto out_dgrade;
9872 +       }
9873 +
9874 +       err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
9875 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
9876 +       if (unlikely(err))
9877 +               goto out_dgrade;
9878 +
9879 +       h_dentry = au_h_fptr(file, bstart)->f_dentry;
9880 +       h_inode = h_dentry->d_inode;
9881 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
9882 +       if (d_unhashed(dentry) /* || d_unhashed(h_dentry) */
9883 +           /* || !h_inode->i_nlink */) {
9884 +               err = au_ready_to_write_wh(file, len, bcpup);
9885 +               di_downgrade_lock(parent, AuLock_IR);
9886 +       } else {
9887 +               di_downgrade_lock(parent, AuLock_IR);
9888 +               if (!au_h_dptr(dentry, bcpup))
9889 +                       err = au_sio_cpup_simple(dentry, bcpup, len,
9890 +                                                AuCpup_DTIME);
9891 +               if (!err)
9892 +                       err = au_reopen_nondir(file);
9893 +       }
9894 +       mutex_unlock(&h_inode->i_mutex);
9895 +
9896 +       if (!err) {
9897 +               au_pin_set_parent_lflag(pin, /*lflag*/0);
9898 +               goto out_dput; /* success */
9899 +       }
9900 +       au_unpin(pin);
9901 +       goto out_unlock;
9902 +
9903 + out_dgrade:
9904 +       di_downgrade_lock(parent, AuLock_IR);
9905 + out_unlock:
9906 +       di_read_unlock(parent, AuLock_IR);
9907 + out_dput:
9908 +       dput(parent);
9909 + out:
9910 +       return err;
9911 +}
9912 +
9913 +/* ---------------------------------------------------------------------- */
9914 +
9915 +static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
9916 +{
9917 +       int err;
9918 +       aufs_bindex_t bstart;
9919 +       struct au_pin pin;
9920 +       struct au_finfo *finfo;
9921 +       struct dentry *dentry, *parent, *hi_wh;
9922 +       struct inode *inode;
9923 +       struct super_block *sb;
9924 +
9925 +       FiMustWriteLock(file);
9926 +
9927 +       err = 0;
9928 +       finfo = au_fi(file);
9929 +       dentry = file->f_dentry;
9930 +       sb = dentry->d_sb;
9931 +       inode = dentry->d_inode;
9932 +       bstart = au_ibstart(inode);
9933 +       if (bstart == finfo->fi_bstart)
9934 +               goto out;
9935 +
9936 +       parent = dget_parent(dentry);
9937 +       if (au_test_ro(sb, bstart, inode)) {
9938 +               di_read_lock_parent(parent, !AuLock_IR);
9939 +               err = AuWbrCopyup(au_sbi(sb), dentry);
9940 +               bstart = err;
9941 +               di_read_unlock(parent, !AuLock_IR);
9942 +               if (unlikely(err < 0))
9943 +                       goto out_parent;
9944 +               err = 0;
9945 +       }
9946 +
9947 +       di_read_lock_parent(parent, AuLock_IR);
9948 +       hi_wh = au_hi_wh(inode, bstart);
9949 +       if (au_opt_test(au_mntflags(sb), PLINK)
9950 +           && au_plink_test(inode)
9951 +           && !d_unhashed(dentry)) {
9952 +               err = au_test_and_cpup_dirs(dentry, bstart);
9953 +               if (unlikely(err))
9954 +                       goto out_unlock;
9955 +
9956 +               /* always superio. */
9957 +               err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
9958 +                            AuPin_DI_LOCKED | AuPin_MNT_WRITE);
9959 +               if (!err)
9960 +                       err = au_sio_cpup_simple(dentry, bstart, -1,
9961 +                                                AuCpup_DTIME);
9962 +               au_unpin(&pin);
9963 +       } else if (hi_wh) {
9964 +               /* already copied-up after unlink */
9965 +               err = au_reopen_wh(file, bstart, hi_wh);
9966 +               *need_reopen = 0;
9967 +       }
9968 +
9969 + out_unlock:
9970 +       di_read_unlock(parent, AuLock_IR);
9971 + out_parent:
9972 +       dput(parent);
9973 + out:
9974 +       return err;
9975 +}
9976 +
9977 +static void au_do_refresh_file(struct file *file)
9978 +{
9979 +       aufs_bindex_t bindex, bend, new_bindex, brid;
9980 +       struct au_hfile *p, tmp, *q;
9981 +       struct au_finfo *finfo;
9982 +       struct super_block *sb;
9983 +
9984 +       FiMustWriteLock(file);
9985 +
9986 +       sb = file->f_dentry->d_sb;
9987 +       finfo = au_fi(file);
9988 +       p = finfo->fi_hfile + finfo->fi_bstart;
9989 +       brid = p->hf_br->br_id;
9990 +       bend = finfo->fi_bend;
9991 +       for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) {
9992 +               if (!p->hf_file)
9993 +                       continue;
9994 +
9995 +               new_bindex = au_br_index(sb, p->hf_br->br_id);
9996 +               if (new_bindex == bindex)
9997 +                       continue;
9998 +               if (new_bindex < 0) {
9999 +                       au_set_h_fptr(file, bindex, NULL);
10000 +                       continue;
10001 +               }
10002 +
10003 +               /* swap two lower inode, and loop again */
10004 +               q = finfo->fi_hfile + new_bindex;
10005 +               tmp = *q;
10006 +               *q = *p;
10007 +               *p = tmp;
10008 +               if (tmp.hf_file) {
10009 +                       bindex--;
10010 +                       p--;
10011 +               }
10012 +       }
10013 +
10014 +       p = finfo->fi_hfile;
10015 +       if (!au_test_mmapped(file) && !d_unhashed(file->f_dentry)) {
10016 +               bend = au_sbend(sb);
10017 +               for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend;
10018 +                    finfo->fi_bstart++, p++)
10019 +                       if (p->hf_file) {
10020 +                               if (p->hf_file->f_dentry
10021 +                                   && p->hf_file->f_dentry->d_inode)
10022 +                                       break;
10023 +                               else
10024 +                                       au_hfput(p, file);
10025 +                       }
10026 +       } else {
10027 +               bend = au_br_index(sb, brid);
10028 +               for (finfo->fi_bstart = 0; finfo->fi_bstart < bend;
10029 +                    finfo->fi_bstart++, p++)
10030 +                       if (p->hf_file)
10031 +                               au_hfput(p, file);
10032 +               bend = au_sbend(sb);
10033 +       }
10034 +
10035 +       p = finfo->fi_hfile + bend;
10036 +       for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart;
10037 +            finfo->fi_bend--, p--)
10038 +               if (p->hf_file) {
10039 +                       if (p->hf_file->f_dentry
10040 +                           && p->hf_file->f_dentry->d_inode)
10041 +                               break;
10042 +                       else
10043 +                               au_hfput(p, file);
10044 +               }
10045 +       AuDebugOn(finfo->fi_bend < finfo->fi_bstart);
10046 +}
10047 +
10048 +/*
10049 + * after branch manipulating, refresh the file.
10050 + */
10051 +static int refresh_file(struct file *file, int (*reopen)(struct file *file))
10052 +{
10053 +       int err, need_reopen;
10054 +       struct dentry *dentry;
10055 +       aufs_bindex_t bend, bindex;
10056 +
10057 +       dentry = file->f_dentry;
10058 +       err = au_fi_realloc(au_fi(file), au_sbend(dentry->d_sb) + 1);
10059 +       if (unlikely(err))
10060 +               goto out;
10061 +       au_do_refresh_file(file);
10062 +
10063 +       err = 0;
10064 +       need_reopen = 1;
10065 +       if (!au_test_mmapped(file))
10066 +               err = au_file_refresh_by_inode(file, &need_reopen);
10067 +       if (!err && need_reopen && !d_unhashed(dentry))
10068 +               err = reopen(file);
10069 +       if (!err) {
10070 +               au_update_figen(file);
10071 +               return 0; /* success */
10072 +       }
10073 +
10074 +       /* error, close all lower files */
10075 +       bend = au_fbend(file);
10076 +       for (bindex = au_fbstart(file); bindex <= bend; bindex++)
10077 +               au_set_h_fptr(file, bindex, NULL);
10078 +
10079 + out:
10080 +       return err;
10081 +}
10082 +
10083 +/* common function to regular file and dir */
10084 +int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10085 +                         int wlock)
10086 +{
10087 +       int err;
10088 +       unsigned int sigen, figen;
10089 +       aufs_bindex_t bstart;
10090 +       unsigned char pseudo_link;
10091 +       struct dentry *dentry;
10092 +       struct inode *inode;
10093 +
10094 +       err = 0;
10095 +       dentry = file->f_dentry;
10096 +       inode = dentry->d_inode;
10097 +       AuDebugOn(au_special_file(inode->i_mode));
10098 +       sigen = au_sigen(dentry->d_sb);
10099 +       fi_write_lock(file);
10100 +       figen = au_figen(file);
10101 +       di_write_lock_child(dentry);
10102 +       bstart = au_dbstart(dentry);
10103 +       pseudo_link = (bstart != au_ibstart(inode));
10104 +       if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
10105 +               if (!wlock) {
10106 +                       di_downgrade_lock(dentry, AuLock_IR);
10107 +                       fi_downgrade_lock(file);
10108 +               }
10109 +               goto out; /* success */
10110 +       }
10111 +
10112 +       AuDbg("sigen %d, figen %d\n", sigen, figen);
10113 +       if (sigen != au_digen(dentry)
10114 +           || sigen != au_iigen(inode)) {
10115 +               err = au_reval_dpath(dentry, sigen);
10116 +               if (unlikely(err < 0))
10117 +                       goto out;
10118 +               AuDebugOn(au_digen(dentry) != sigen
10119 +                         || au_iigen(inode) != sigen);
10120 +       }
10121 +
10122 +       err = refresh_file(file, reopen);
10123 +       if (!err) {
10124 +               if (!wlock) {
10125 +                       di_downgrade_lock(dentry, AuLock_IR);
10126 +                       fi_downgrade_lock(file);
10127 +               }
10128 +       } else {
10129 +               di_write_unlock(dentry);
10130 +               fi_write_unlock(file);
10131 +       }
10132 +
10133 + out:
10134 +       return err;
10135 +}
10136 +
10137 +/* ---------------------------------------------------------------------- */
10138 +
10139 +/* cf. aufs_nopage() */
10140 +/* for madvise(2) */
10141 +static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
10142 +{
10143 +       unlock_page(page);
10144 +       return 0;
10145 +}
10146 +
10147 +/* they will never be called. */
10148 +#ifdef CONFIG_AUFS_DEBUG
10149 +static int aufs_prepare_write(struct file *file, struct page *page,
10150 +                             unsigned from, unsigned to)
10151 +{ AuUnsupport(); return 0; }
10152 +static int aufs_commit_write(struct file *file, struct page *page,
10153 +                            unsigned from, unsigned to)
10154 +{ AuUnsupport(); return 0; }
10155 +static int aufs_write_begin(struct file *file, struct address_space *mapping,
10156 +                           loff_t pos, unsigned len, unsigned flags,
10157 +                           struct page **pagep, void **fsdata)
10158 +{ AuUnsupport(); return 0; }
10159 +static int aufs_write_end(struct file *file, struct address_space *mapping,
10160 +                         loff_t pos, unsigned len, unsigned copied,
10161 +                         struct page *page, void *fsdata)
10162 +{ AuUnsupport(); return 0; }
10163 +static int aufs_writepage(struct page *page, struct writeback_control *wbc)
10164 +{ AuUnsupport(); return 0; }
10165 +static void aufs_sync_page(struct page *page)
10166 +{ AuUnsupport(); }
10167 +
10168 +static int aufs_set_page_dirty(struct page *page)
10169 +{ AuUnsupport(); return 0; }
10170 +static void aufs_invalidatepage(struct page *page, unsigned long offset)
10171 +{ AuUnsupport(); }
10172 +static int aufs_releasepage(struct page *page, gfp_t gfp)
10173 +{ AuUnsupport(); return 0; }
10174 +static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
10175 +                             const struct iovec *iov, loff_t offset,
10176 +                             unsigned long nr_segs)
10177 +{ AuUnsupport(); return 0; }
10178 +static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
10179 +                           int create, void **kmem, unsigned long *pfn)
10180 +{ AuUnsupport(); return 0; }
10181 +static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
10182 +                           struct page *page)
10183 +{ AuUnsupport(); return 0; }
10184 +static int aufs_launder_page(struct page *page)
10185 +{ AuUnsupport(); return 0; }
10186 +static int aufs_is_partially_uptodate(struct page *page,
10187 +                                     read_descriptor_t *desc,
10188 +                                     unsigned long from)
10189 +{ AuUnsupport(); return 0; }
10190 +#endif /* CONFIG_AUFS_DEBUG */
10191 +
10192 +struct address_space_operations aufs_aop = {
10193 +       .readpage               = aufs_readpage,
10194 +#ifdef CONFIG_AUFS_DEBUG
10195 +       .writepage              = aufs_writepage,
10196 +       .sync_page              = aufs_sync_page,
10197 +       /* no writepages, because of writepage */
10198 +       .set_page_dirty         = aufs_set_page_dirty,
10199 +       /* no readpages, because of readpage */
10200 +       .prepare_write          = aufs_prepare_write,
10201 +       .commit_write           = aufs_commit_write,
10202 +       .write_begin            = aufs_write_begin,
10203 +       .write_end              = aufs_write_end,
10204 +       /* no bmap, no block device */
10205 +       .invalidatepage         = aufs_invalidatepage,
10206 +       .releasepage            = aufs_releasepage,
10207 +       .direct_IO              = aufs_direct_IO,       /* todo */
10208 +       .get_xip_mem            = aufs_get_xip_mem,     /* todo */
10209 +       .migratepage            = aufs_migratepage,
10210 +       .launder_page           = aufs_launder_page,
10211 +       .is_partially_uptodate  = aufs_is_partially_uptodate
10212 +#endif /* CONFIG_AUFS_DEBUG */
10213 +};
10214 diff --git a/fs/aufs/file.h b/fs/aufs/file.h
10215 new file mode 100644
10216 index 0000000..c6626dd
10217 --- /dev/null
10218 +++ b/fs/aufs/file.h
10219 @@ -0,0 +1,187 @@
10220 +/*
10221 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10222 + *
10223 + * This program, aufs is free software; you can redistribute it and/or modify
10224 + * it under the terms of the GNU General Public License as published by
10225 + * the Free Software Foundation; either version 2 of the License, or
10226 + * (at your option) any later version.
10227 + *
10228 + * This program is distributed in the hope that it will be useful,
10229 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10230 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10231 + * GNU General Public License for more details.
10232 + *
10233 + * You should have received a copy of the GNU General Public License
10234 + * along with this program; if not, write to the Free Software
10235 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
10236 + */
10237 +
10238 +/*
10239 + * file operations
10240 + */
10241 +
10242 +#ifndef __AUFS_FILE_H__
10243 +#define __AUFS_FILE_H__
10244 +
10245 +#ifdef __KERNEL__
10246 +
10247 +#include <linux/fs.h>
10248 +#include <linux/aufs_type.h>
10249 +#include "rwsem.h"
10250 +
10251 +struct au_branch;
10252 +struct au_hfile {
10253 +       struct file             *hf_file;
10254 +       struct au_branch        *hf_br;
10255 +};
10256 +
10257 +struct au_vdir;
10258 +struct au_finfo {
10259 +       atomic_t                fi_generation;
10260 +
10261 +       struct au_rwsem         fi_rwsem;
10262 +       struct au_hfile         *fi_hfile;
10263 +       aufs_bindex_t           fi_bstart, fi_bend;
10264 +
10265 +       union {
10266 +               /* non-dir only */
10267 +               struct {
10268 +                       struct vm_operations_struct     *fi_h_vm_ops;
10269 +                       struct vm_operations_struct     *fi_vm_ops;
10270 +                       struct mutex                    fi_vm_mtx;
10271 +               };
10272 +
10273 +               /* dir only */
10274 +               struct {
10275 +                       struct au_vdir          *fi_vdir_cache;
10276 +               };
10277 +       };
10278 +};
10279 +
10280 +/* ---------------------------------------------------------------------- */
10281 +
10282 +/* file.c */
10283 +extern struct address_space_operations aufs_aop;
10284 +void au_store_oflag(struct nameidata *nd, struct inode *inode);
10285 +unsigned int au_file_roflags(unsigned int flags);
10286 +struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
10287 +                      struct file *file);
10288 +int au_do_open(struct file *file, int (*open)(struct file *file, int flags));
10289 +int au_reopen_nondir(struct file *file);
10290 +struct au_pin;
10291 +int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
10292 +int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10293 +                         int wlock);
10294 +
10295 +/* f_op.c */
10296 +extern struct file_operations aufs_file_fop;
10297 +int aufs_flush(struct file *file, fl_owner_t id);
10298 +int au_do_open_nondir(struct file *file, int flags);
10299 +int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
10300 +
10301 +#ifdef CONFIG_AUFS_SP_IATTR
10302 +/* f_op_sp.c */
10303 +int au_special_file(umode_t mode);
10304 +void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
10305 +#else
10306 +AuStubInt0(au_special_file, umode_t mode)
10307 +static inline void au_init_special_fop(struct inode *inode, umode_t mode,
10308 +                                      dev_t rdev)
10309 +{
10310 +       init_special_inode(inode, mode, rdev);
10311 +}
10312 +#endif
10313 +
10314 +/* finfo.c */
10315 +void au_hfput(struct au_hfile *hf, struct file *file);
10316 +void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
10317 +                  struct file *h_file);
10318 +
10319 +void au_update_figen(struct file *file);
10320 +
10321 +void au_finfo_fin(struct file *file);
10322 +int au_finfo_init(struct file *file);
10323 +int au_fi_realloc(struct au_finfo *finfo, int nbr);
10324 +
10325 +/* ioctl.c */
10326 +long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
10327 +
10328 +/* ---------------------------------------------------------------------- */
10329 +
10330 +static inline struct au_finfo *au_fi(struct file *file)
10331 +{
10332 +       return file->private_data;
10333 +}
10334 +
10335 +/* ---------------------------------------------------------------------- */
10336 +
10337 +/*
10338 + * fi_read_lock, fi_write_lock,
10339 + * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
10340 + */
10341 +AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
10342 +
10343 +#define FiMustNoWaiters(f)     AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
10344 +#define FiMustAnyLock(f)       AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
10345 +#define FiMustWriteLock(f)     AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
10346 +
10347 +/* ---------------------------------------------------------------------- */
10348 +
10349 +/* todo: hard/soft set? */
10350 +static inline aufs_bindex_t au_fbstart(struct file *file)
10351 +{
10352 +       FiMustAnyLock(file);
10353 +       return au_fi(file)->fi_bstart;
10354 +}
10355 +
10356 +static inline aufs_bindex_t au_fbend(struct file *file)
10357 +{
10358 +       FiMustAnyLock(file);
10359 +       return au_fi(file)->fi_bend;
10360 +}
10361 +
10362 +static inline struct au_vdir *au_fvdir_cache(struct file *file)
10363 +{
10364 +       FiMustAnyLock(file);
10365 +       return au_fi(file)->fi_vdir_cache;
10366 +}
10367 +
10368 +static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
10369 +{
10370 +       FiMustWriteLock(file);
10371 +       au_fi(file)->fi_bstart = bindex;
10372 +}
10373 +
10374 +static inline void au_set_fbend(struct file *file, aufs_bindex_t bindex)
10375 +{
10376 +       FiMustWriteLock(file);
10377 +       au_fi(file)->fi_bend = bindex;
10378 +}
10379 +
10380 +static inline void au_set_fvdir_cache(struct file *file,
10381 +                                     struct au_vdir *vdir_cache)
10382 +{
10383 +       FiMustWriteLock(file);
10384 +       au_fi(file)->fi_vdir_cache = vdir_cache;
10385 +}
10386 +
10387 +static inline struct file *au_h_fptr(struct file *file, aufs_bindex_t bindex)
10388 +{
10389 +       FiMustAnyLock(file);
10390 +       return au_fi(file)->fi_hfile[0 + bindex].hf_file;
10391 +}
10392 +
10393 +/* todo: memory barrier? */
10394 +static inline unsigned int au_figen(struct file *f)
10395 +{
10396 +       return atomic_read(&au_fi(f)->fi_generation);
10397 +}
10398 +
10399 +static inline int au_test_mmapped(struct file *f)
10400 +{
10401 +       /* FiMustAnyLock(f); */
10402 +       return !!(au_fi(f)->fi_h_vm_ops);
10403 +}
10404 +
10405 +#endif /* __KERNEL__ */
10406 +#endif /* __AUFS_FILE_H__ */
10407 diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c
10408 new file mode 100644
10409 index 0000000..158cd7f
10410 --- /dev/null
10411 +++ b/fs/aufs/finfo.c
10412 @@ -0,0 +1,131 @@
10413 +/*
10414 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10415 + *
10416 + * This program, aufs is free software; you can redistribute it and/or modify
10417 + * it under the terms of the GNU General Public License as published by
10418 + * the Free Software Foundation; either version 2 of the License, or
10419 + * (at your option) any later version.
10420 + *
10421 + * This program is distributed in the hope that it will be useful,
10422 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10423 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10424 + * GNU General Public License for more details.
10425 + *
10426 + * You should have received a copy of the GNU General Public License
10427 + * along with this program; if not, write to the Free Software
10428 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
10429 + */
10430 +
10431 +/*
10432 + * file private data
10433 + */
10434 +
10435 +#include <linux/file.h>
10436 +#include "aufs.h"
10437 +
10438 +void au_hfput(struct au_hfile *hf, struct file *file)
10439 +{
10440 +       if (file->f_mode & FMODE_EXEC)
10441 +               allow_write_access(hf->hf_file);
10442 +       fput(hf->hf_file);
10443 +       hf->hf_file = NULL;
10444 +       atomic_dec_return(&hf->hf_br->br_count);
10445 +       hf->hf_br = NULL;
10446 +}
10447 +
10448 +void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
10449 +{
10450 +       struct au_finfo *finfo = au_fi(file);
10451 +       struct au_hfile *hf;
10452 +
10453 +       hf = finfo->fi_hfile + bindex;
10454 +       if (hf->hf_file)
10455 +               au_hfput(hf, file);
10456 +       if (val) {
10457 +               hf->hf_file = val;
10458 +               hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
10459 +       }
10460 +}
10461 +
10462 +void au_update_figen(struct file *file)
10463 +{
10464 +       atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
10465 +       /* smp_mb(); */ /* atomic_set */
10466 +}
10467 +
10468 +/* ---------------------------------------------------------------------- */
10469 +
10470 +void au_finfo_fin(struct file *file)
10471 +{
10472 +       struct au_finfo *finfo;
10473 +       aufs_bindex_t bindex, bend;
10474 +
10475 +       finfo = au_fi(file);
10476 +       bindex = finfo->fi_bstart;
10477 +       if (bindex >= 0) {
10478 +               /*
10479 +                * calls fput() instead of filp_close(),
10480 +                * since no dnotify or lock for the lower file.
10481 +                */
10482 +               bend = finfo->fi_bend;
10483 +               for (; bindex <= bend; bindex++)
10484 +                       au_set_h_fptr(file, bindex, NULL);
10485 +       }
10486 +
10487 +       au_dbg_verify_hf(finfo);
10488 +       kfree(finfo->fi_hfile);
10489 +       AuRwDestroy(&finfo->fi_rwsem);
10490 +       au_cache_free_finfo(finfo);
10491 +}
10492 +
10493 +int au_finfo_init(struct file *file)
10494 +{
10495 +       struct au_finfo *finfo;
10496 +       struct dentry *dentry;
10497 +       unsigned long ul;
10498 +
10499 +       dentry = file->f_dentry;
10500 +       finfo = au_cache_alloc_finfo();
10501 +       if (unlikely(!finfo))
10502 +               goto out;
10503 +
10504 +       finfo->fi_hfile = kcalloc(au_sbend(dentry->d_sb) + 1,
10505 +                                 sizeof(*finfo->fi_hfile), GFP_NOFS);
10506 +       if (unlikely(!finfo->fi_hfile))
10507 +               goto out_finfo;
10508 +
10509 +       au_rw_init_wlock(&finfo->fi_rwsem);
10510 +       finfo->fi_bstart = -1;
10511 +       finfo->fi_bend = -1;
10512 +       atomic_set(&finfo->fi_generation, au_digen(dentry));
10513 +       /* smp_mb(); */ /* atomic_set */
10514 +
10515 +       /* cf. au_store_oflag() */
10516 +       ul = (unsigned long)file->private_data;
10517 +       file->f_mode |= (ul & FMODE_EXEC);
10518 +       file->private_data = finfo;
10519 +       return 0; /* success */
10520 +
10521 + out_finfo:
10522 +       au_cache_free_finfo(finfo);
10523 + out:
10524 +       return -ENOMEM;
10525 +}
10526 +
10527 +int au_fi_realloc(struct au_finfo *finfo, int nbr)
10528 +{
10529 +       int err, sz;
10530 +       struct au_hfile *hfp;
10531 +
10532 +       err = -ENOMEM;
10533 +       sz = sizeof(*hfp) * (finfo->fi_bend + 1);
10534 +       if (!sz)
10535 +               sz = sizeof(*hfp);
10536 +       hfp = au_kzrealloc(finfo->fi_hfile, sz, sizeof(*hfp) * nbr, GFP_NOFS);
10537 +       if (hfp) {
10538 +               finfo->fi_hfile = hfp;
10539 +               err = 0;
10540 +       }
10541 +
10542 +       return err;
10543 +}
10544 diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h
10545 new file mode 100644
10546 index 0000000..3f3709f
10547 --- /dev/null
10548 +++ b/fs/aufs/fstype.h
10549 @@ -0,0 +1,434 @@
10550 +/*
10551 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10552 + *
10553 + * This program, aufs is free software; you can redistribute it and/or modify
10554 + * it under the terms of the GNU General Public License as published by
10555 + * the Free Software Foundation; either version 2 of the License, or
10556 + * (at your option) any later version.
10557 + *
10558 + * This program is distributed in the hope that it will be useful,
10559 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10560 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10561 + * GNU General Public License for more details.
10562 + *
10563 + * You should have received a copy of the GNU General Public License
10564 + * along with this program; if not, write to the Free Software
10565 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
10566 + */
10567 +
10568 +/*
10569 + * judging filesystem type
10570 + */
10571 +
10572 +#ifndef __AUFS_FSTYPE_H__
10573 +#define __AUFS_FSTYPE_H__
10574 +
10575 +#ifdef __KERNEL__
10576 +
10577 +#include <linux/cramfs_fs.h>
10578 +#include <linux/fs.h>
10579 +#include <linux/magic.h>
10580 +#include <linux/romfs_fs.h>
10581 +#include <linux/aufs_type.h>
10582 +
10583 +static inline int au_test_aufs(struct super_block *sb)
10584 +{
10585 +       return sb->s_magic == AUFS_SUPER_MAGIC;
10586 +}
10587 +
10588 +static inline const char *au_sbtype(struct super_block *sb)
10589 +{
10590 +       return sb->s_type->name;
10591 +}
10592 +
10593 +static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
10594 +{
10595 +#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
10596 +       return sb->s_magic == ROMFS_MAGIC;
10597 +#else
10598 +       return 0;
10599 +#endif
10600 +}
10601 +
10602 +static inline int au_test_romfs(struct super_block *sb __maybe_unused)
10603 +{
10604 +#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
10605 +       return sb->s_magic == ISOFS_SUPER_MAGIC;
10606 +#else
10607 +       return 0;
10608 +#endif
10609 +}
10610 +
10611 +static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
10612 +{
10613 +#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
10614 +       return sb->s_magic == CRAMFS_MAGIC;
10615 +#endif
10616 +       return 0;
10617 +}
10618 +
10619 +static inline int au_test_nfs(struct super_block *sb __maybe_unused)
10620 +{
10621 +#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
10622 +       return sb->s_magic == NFS_SUPER_MAGIC;
10623 +#else
10624 +       return 0;
10625 +#endif
10626 +}
10627 +
10628 +static inline int au_test_fuse(struct super_block *sb __maybe_unused)
10629 +{
10630 +#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
10631 +       return sb->s_magic == FUSE_SUPER_MAGIC;
10632 +#else
10633 +       return 0;
10634 +#endif
10635 +}
10636 +
10637 +static inline int au_test_xfs(struct super_block *sb __maybe_unused)
10638 +{
10639 +#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
10640 +       return sb->s_magic == XFS_SB_MAGIC;
10641 +#else
10642 +       return 0;
10643 +#endif
10644 +}
10645 +
10646 +static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
10647 +{
10648 +#ifdef CONFIG_TMPFS
10649 +       return sb->s_magic == TMPFS_MAGIC;
10650 +#else
10651 +       return 0;
10652 +#endif
10653 +}
10654 +
10655 +static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
10656 +{
10657 +#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
10658 +       return !strcmp(au_sbtype(sb), "ecryptfs");
10659 +#else
10660 +       return 0;
10661 +#endif
10662 +}
10663 +
10664 +static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
10665 +{
10666 +#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
10667 +       return sb->s_magic == SMB_SUPER_MAGIC;
10668 +#else
10669 +       return 0;
10670 +#endif
10671 +}
10672 +
10673 +static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
10674 +{
10675 +#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
10676 +       return sb->s_magic == OCFS2_SUPER_MAGIC;
10677 +#else
10678 +       return 0;
10679 +#endif
10680 +}
10681 +
10682 +static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
10683 +{
10684 +#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
10685 +       return sb->s_magic == DLMFS_MAGIC;
10686 +#else
10687 +       return 0;
10688 +#endif
10689 +}
10690 +
10691 +static inline int au_test_coda(struct super_block *sb __maybe_unused)
10692 +{
10693 +#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
10694 +       return sb->s_magic == CODA_SUPER_MAGIC;
10695 +#else
10696 +       return 0;
10697 +#endif
10698 +}
10699 +
10700 +static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
10701 +{
10702 +#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
10703 +       return sb->s_magic == V9FS_MAGIC;
10704 +#else
10705 +       return 0;
10706 +#endif
10707 +}
10708 +
10709 +static inline int au_test_ext4(struct super_block *sb __maybe_unused)
10710 +{
10711 +#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
10712 +       return sb->s_magic == EXT4_SUPER_MAGIC;
10713 +#else
10714 +       return 0;
10715 +#endif
10716 +}
10717 +
10718 +static inline int au_test_sysv(struct super_block *sb __maybe_unused)
10719 +{
10720 +#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
10721 +       return !strcmp(au_sbtype(sb), "sysv");
10722 +#else
10723 +       return 0;
10724 +#endif
10725 +}
10726 +
10727 +static inline int au_test_ramfs(struct super_block *sb)
10728 +{
10729 +       return sb->s_magic == RAMFS_MAGIC;
10730 +}
10731 +
10732 +static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
10733 +{
10734 +#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
10735 +       return sb->s_magic == UBIFS_SUPER_MAGIC;
10736 +#else
10737 +       return 0;
10738 +#endif
10739 +}
10740 +
10741 +static inline int au_test_procfs(struct super_block *sb __maybe_unused)
10742 +{
10743 +#ifdef CONFIG_PROC_FS
10744 +       return sb->s_magic == PROC_SUPER_MAGIC;
10745 +#else
10746 +       return 0;
10747 +#endif
10748 +}
10749 +
10750 +static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
10751 +{
10752 +#ifdef CONFIG_SYSFS
10753 +       return sb->s_magic == SYSFS_MAGIC;
10754 +#else
10755 +       return 0;
10756 +#endif
10757 +}
10758 +
10759 +static inline int au_test_configfs(struct super_block *sb __maybe_unused)
10760 +{
10761 +#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
10762 +       return sb->s_magic == CONFIGFS_MAGIC;
10763 +#else
10764 +       return 0;
10765 +#endif
10766 +}
10767 +
10768 +static inline int au_test_minix(struct super_block *sb __maybe_unused)
10769 +{
10770 +#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
10771 +       return sb->s_magic == MINIX3_SUPER_MAGIC
10772 +               || sb->s_magic == MINIX2_SUPER_MAGIC
10773 +               || sb->s_magic == MINIX2_SUPER_MAGIC2
10774 +               || sb->s_magic == MINIX_SUPER_MAGIC
10775 +               || sb->s_magic == MINIX_SUPER_MAGIC2;
10776 +#else
10777 +       return 0;
10778 +#endif
10779 +}
10780 +
10781 +static inline int au_test_cifs(struct super_block *sb __maybe_unused)
10782 +{
10783 +#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
10784 +       return sb->s_magic == CIFS_MAGIC_NUMBER;
10785 +#else
10786 +       return 0;
10787 +#endif
10788 +}
10789 +
10790 +static inline int au_test_fat(struct super_block *sb __maybe_unused)
10791 +{
10792 +#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
10793 +       return sb->s_magic == MSDOS_SUPER_MAGIC;
10794 +#else
10795 +       return 0;
10796 +#endif
10797 +}
10798 +
10799 +static inline int au_test_msdos(struct super_block *sb)
10800 +{
10801 +       return au_test_fat(sb);
10802 +}
10803 +
10804 +static inline int au_test_vfat(struct super_block *sb)
10805 +{
10806 +       return au_test_fat(sb);
10807 +}
10808 +
10809 +static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
10810 +{
10811 +#ifdef CONFIG_DEBUG_FS
10812 +       return sb->s_magic == DEBUGFS_MAGIC;
10813 +#else
10814 +       return 0;
10815 +#endif
10816 +}
10817 +
10818 +/* ---------------------------------------------------------------------- */
10819 +/*
10820 + * they can't be an aufs branch.
10821 + */
10822 +static inline int au_test_fs_unsuppoted(struct super_block *sb)
10823 +{
10824 +       return
10825 +#ifndef CONFIG_AUFS_BR_RAMFS
10826 +               au_test_ramfs(sb) ||
10827 +#endif
10828 +               au_test_procfs(sb)
10829 +               || au_test_sysfs(sb)
10830 +               || au_test_configfs(sb)
10831 +               || au_test_debugfs(sb)
10832 +               /* || !strcmp(au_sbtype(sb), "unionfs") */
10833 +               || au_test_aufs(sb); /* will be supported in next version */
10834 +}
10835 +
10836 +/*
10837 + * If the filesystem supports NFS-export, then it has to support NULL as
10838 + * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
10839 + * We can apply this principle when we handle a lower filesystem.
10840 + */
10841 +static inline int au_test_fs_null_nd(struct super_block *sb)
10842 +{
10843 +       return !!sb->s_export_op;
10844 +}
10845 +
10846 +static inline int au_test_fs_remote(struct super_block *sb)
10847 +{
10848 +       return !au_test_tmpfs(sb)
10849 +#ifdef CONFIG_AUFS_BR_RAMFS
10850 +               && !au_test_ramfs(sb)
10851 +#endif
10852 +               && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
10853 +}
10854 +
10855 +/* ---------------------------------------------------------------------- */
10856 +
10857 +/*
10858 + * Note: these functions (below) are created after reading ->getattr() in all
10859 + * filesystems under linux/fs. it means we have to do so in every update...
10860 + */
10861 +
10862 +/*
10863 + * some filesystems require getattr to refresh the inode attributes before
10864 + * referencing.
10865 + * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
10866 + * and leave the work for d_revalidate()
10867 + */
10868 +static inline int au_test_fs_refresh_iattr(struct super_block *sb)
10869 +{
10870 +       return au_test_nfs(sb)
10871 +               || au_test_fuse(sb)
10872 +               /* || au_test_smbfs(sb) */      /* untested */
10873 +               /* || au_test_ocfs2(sb) */      /* untested */
10874 +               /* || au_test_coda(sb) */       /* untested */
10875 +               /* || au_test_v9fs(sb) */       /* untested */
10876 +               ;
10877 +}
10878 +
10879 +/*
10880 + * filesystems which don't maintain i_size or i_blocks.
10881 + */
10882 +static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
10883 +{
10884 +       return au_test_xfs(sb)
10885 +               /* || au_test_ext4(sb) */       /* untested */
10886 +               /* || au_test_ocfs2(sb) */      /* untested */
10887 +               /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
10888 +               /* || au_test_sysv(sb) */       /* untested */
10889 +               /* || au_test_ubifs(sb) */      /* untested */
10890 +               /* || au_test_minix(sb) */      /* untested */
10891 +               ;
10892 +}
10893 +
10894 +/*
10895 + * filesystems which don't store the correct value in some of their inode
10896 + * attributes.
10897 + */
10898 +static inline int au_test_fs_bad_iattr(struct super_block *sb)
10899 +{
10900 +       return au_test_fs_bad_iattr_size(sb)
10901 +               /* || au_test_cifs(sb) */       /* untested */
10902 +               || au_test_fat(sb)
10903 +               || au_test_msdos(sb)
10904 +               || au_test_vfat(sb);
10905 +}
10906 +
10907 +/* they don't check i_nlink in link(2) */
10908 +static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
10909 +{
10910 +       return au_test_tmpfs(sb)
10911 +#ifdef CONFIG_AUFS_BR_RAMFS
10912 +               || au_test_ramfs(sb)
10913 +#endif
10914 +               || au_test_ubifs(sb);
10915 +}
10916 +
10917 +/*
10918 + * filesystems which sets S_NOATIME and S_NOCMTIME.
10919 + */
10920 +static inline int au_test_fs_notime(struct super_block *sb)
10921 +{
10922 +       return au_test_nfs(sb)
10923 +               || au_test_fuse(sb)
10924 +               || au_test_ubifs(sb)
10925 +               /* || au_test_cifs(sb) */       /* untested */
10926 +               ;
10927 +}
10928 +
10929 +/*
10930 + * filesystems which requires replacing i_mapping.
10931 + */
10932 +static inline int au_test_fs_bad_mapping(struct super_block *sb)
10933 +{
10934 +       return au_test_fuse(sb)
10935 +               || au_test_ubifs(sb);
10936 +}
10937 +
10938 +/* temporary support for i#1 in cramfs */
10939 +static inline int au_test_fs_unique_ino(struct inode *inode)
10940 +{
10941 +       if (au_test_cramfs(inode->i_sb))
10942 +               return inode->i_ino != 1;
10943 +       return 1;
10944 +}
10945 +
10946 +/* ---------------------------------------------------------------------- */
10947 +
10948 +/*
10949 + * the filesystem where the xino files placed must support i/o after unlink and
10950 + * maintain i_size and i_blocks.
10951 + */
10952 +static inline int au_test_fs_bad_xino(struct super_block *sb)
10953 +{
10954 +       return au_test_fs_remote(sb)
10955 +               || au_test_fs_bad_iattr_size(sb)
10956 +#ifdef CONFIG_AUFS_BR_RAMFS
10957 +               || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
10958 +#else
10959 +               || !au_test_fs_null_nd(sb) /* to keep xino code simple */
10960 +#endif
10961 +               /* don't want unnecessary work for xino */
10962 +               || au_test_aufs(sb)
10963 +               || au_test_ecryptfs(sb);
10964 +}
10965 +
10966 +static inline int au_test_fs_trunc_xino(struct super_block *sb)
10967 +{
10968 +       return au_test_tmpfs(sb)
10969 +               || au_test_ramfs(sb);
10970 +}
10971 +
10972 +/*
10973 + * test if the @sb is real-readonly.
10974 + */
10975 +static inline int au_test_fs_rr(struct super_block *sb)
10976 +{
10977 +       return au_test_iso9660(sb)
10978 +               || au_test_cramfs(sb)
10979 +               || au_test_romfs(sb);
10980 +}
10981 +
10982 +#endif /* __KERNEL__ */
10983 +#endif /* __AUFS_FSTYPE_H__ */
10984 diff --git a/fs/aufs/hinotify.c b/fs/aufs/hinotify.c
10985 new file mode 100644
10986 index 0000000..66b761f
10987 --- /dev/null
10988 +++ b/fs/aufs/hinotify.c
10989 @@ -0,0 +1,755 @@
10990 +/*
10991 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10992 + *
10993 + * This program, aufs is free software; you can redistribute it and/or modify
10994 + * it under the terms of the GNU General Public License as published by
10995 + * the Free Software Foundation; either version 2 of the License, or
10996 + * (at your option) any later version.
10997 + *
10998 + * This program is distributed in the hope that it will be useful,
10999 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11000 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11001 + * GNU General Public License for more details.
11002 + *
11003 + * You should have received a copy of the GNU General Public License
11004 + * along with this program; if not, write to the Free Software
11005 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
11006 + */
11007 +
11008 +/*
11009 + * inotify for the lower directories
11010 + */
11011 +
11012 +#include "aufs.h"
11013 +
11014 +static const __u32 AuHinMask = (IN_MOVE | IN_DELETE | IN_CREATE);
11015 +static struct inotify_handle *au_hin_handle;
11016 +
11017 +AuCacheFuncs(hinotify, HINOTIFY);
11018 +
11019 +int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
11020 +                struct inode *h_inode)
11021 +{
11022 +       int err;
11023 +       struct au_hinotify *hin;
11024 +       s32 wd;
11025 +
11026 +       err = -ENOMEM;
11027 +       hin = au_cache_alloc_hinotify();
11028 +       if (hin) {
11029 +               AuDebugOn(hinode->hi_notify);
11030 +               hinode->hi_notify = hin;
11031 +               hin->hin_aufs_inode = inode;
11032 +
11033 +               inotify_init_watch(&hin->hin_watch);
11034 +               wd = inotify_add_watch(au_hin_handle, &hin->hin_watch, h_inode,
11035 +                                      AuHinMask);
11036 +               if (wd >= 0)
11037 +                       return 0; /* success */
11038 +
11039 +               err = wd;
11040 +               put_inotify_watch(&hin->hin_watch);
11041 +               au_cache_free_hinotify(hin);
11042 +               hinode->hi_notify = NULL;
11043 +       }
11044 +
11045 +       return err;
11046 +}
11047 +
11048 +void au_hin_free(struct au_hinode *hinode)
11049 +{
11050 +       int err;
11051 +       struct au_hinotify *hin;
11052 +
11053 +       hin = hinode->hi_notify;
11054 +       if (hin) {
11055 +               err = 0;
11056 +               if (atomic_read(&hin->hin_watch.count))
11057 +                       err = inotify_rm_watch(au_hin_handle, &hin->hin_watch);
11058 +               if (unlikely(err))
11059 +                       /* it means the watch is already removed */
11060 +                       AuWarn("failed inotify_rm_watch() %d\n", err);
11061 +               au_cache_free_hinotify(hin);
11062 +               hinode->hi_notify = NULL;
11063 +       }
11064 +}
11065 +
11066 +/* ---------------------------------------------------------------------- */
11067 +
11068 +void au_hin_ctl(struct au_hinode *hinode, int do_set)
11069 +{
11070 +       struct inode *h_inode;
11071 +       struct inotify_watch *watch;
11072 +
11073 +       if (!hinode->hi_notify)
11074 +               return;
11075 +
11076 +       h_inode = hinode->hi_inode;
11077 +       IMustLock(h_inode);
11078 +
11079 +       /* todo: try inotify_find_update_watch()? */
11080 +       watch = &hinode->hi_notify->hin_watch;
11081 +       mutex_lock(&h_inode->inotify_mutex);
11082 +       /* mutex_lock(&watch->ih->mutex); */
11083 +       if (do_set) {
11084 +               AuDebugOn(watch->mask & AuHinMask);
11085 +               watch->mask |= AuHinMask;
11086 +       } else {
11087 +               AuDebugOn(!(watch->mask & AuHinMask));
11088 +               watch->mask &= ~AuHinMask;
11089 +       }
11090 +       /* mutex_unlock(&watch->ih->mutex); */
11091 +       mutex_unlock(&h_inode->inotify_mutex);
11092 +}
11093 +
11094 +void au_reset_hinotify(struct inode *inode, unsigned int flags)
11095 +{
11096 +       aufs_bindex_t bindex, bend;
11097 +       struct inode *hi;
11098 +       struct dentry *iwhdentry;
11099 +
11100 +       bend = au_ibend(inode);
11101 +       for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
11102 +               hi = au_h_iptr(inode, bindex);
11103 +               if (!hi)
11104 +                       continue;
11105 +
11106 +               /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
11107 +               iwhdentry = au_hi_wh(inode, bindex);
11108 +               if (iwhdentry)
11109 +                       dget(iwhdentry);
11110 +               au_igrab(hi);
11111 +               au_set_h_iptr(inode, bindex, NULL, 0);
11112 +               au_set_h_iptr(inode, bindex, au_igrab(hi),
11113 +                             flags & ~AuHi_XINO);
11114 +               iput(hi);
11115 +               dput(iwhdentry);
11116 +               /* mutex_unlock(&hi->i_mutex); */
11117 +       }
11118 +}
11119 +
11120 +/* ---------------------------------------------------------------------- */
11121 +
11122 +static int hin_xino(struct inode *inode, struct inode *h_inode)
11123 +{
11124 +       int err;
11125 +       aufs_bindex_t bindex, bend, bfound, bstart;
11126 +       struct inode *h_i;
11127 +
11128 +       err = 0;
11129 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
11130 +               AuWarn("branch root dir was changed\n");
11131 +               goto out;
11132 +       }
11133 +
11134 +       bfound = -1;
11135 +       bend = au_ibend(inode);
11136 +       bstart = au_ibstart(inode);
11137 +#if 0 /* reserved for future use */
11138 +       if (bindex == bend) {
11139 +               /* keep this ino in rename case */
11140 +               goto out;
11141 +       }
11142 +#endif
11143 +       for (bindex = bstart; bindex <= bend; bindex++) {
11144 +               if (au_h_iptr(inode, bindex) == h_inode) {
11145 +                       bfound = bindex;
11146 +                       break;
11147 +               }
11148 +       }
11149 +       if (bfound < 0)
11150 +               goto out;
11151 +
11152 +       for (bindex = bstart; bindex <= bend; bindex++) {
11153 +               h_i = au_h_iptr(inode, bindex);
11154 +               if (!h_i)
11155 +                       continue;
11156 +
11157 +               err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
11158 +               /* ignore this error */
11159 +               /* bad action? */
11160 +       }
11161 +
11162 +       /* children inode number will be broken */
11163 +
11164 + out:
11165 +       AuTraceErr(err);
11166 +       return err;
11167 +}
11168 +
11169 +static int hin_gen_tree(struct dentry *dentry)
11170 +{
11171 +       int err, i, j, ndentry;
11172 +       struct au_dcsub_pages dpages;
11173 +       struct au_dpage *dpage;
11174 +       struct dentry **dentries;
11175 +
11176 +       err = au_dpages_init(&dpages, GFP_NOFS);
11177 +       if (unlikely(err))
11178 +               goto out;
11179 +       err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
11180 +       if (unlikely(err))
11181 +               goto out_dpages;
11182 +
11183 +       for (i = 0; i < dpages.ndpage; i++) {
11184 +               dpage = dpages.dpages + i;
11185 +               dentries = dpage->dentries;
11186 +               ndentry = dpage->ndentry;
11187 +               for (j = 0; j < ndentry; j++) {
11188 +                       struct dentry *d;
11189 +
11190 +                       d = dentries[j];
11191 +                       if (IS_ROOT(d))
11192 +                               continue;
11193 +
11194 +                       d_drop(d);
11195 +                       au_digen_dec(d);
11196 +                       if (d->d_inode)
11197 +                               /* todo: reset children xino?
11198 +                                  cached children only? */
11199 +                               au_iigen_dec(d->d_inode);
11200 +               }
11201 +       }
11202 +
11203 + out_dpages:
11204 +       au_dpages_free(&dpages);
11205 +
11206 +       /* discard children */
11207 +       dentry_unhash(dentry);
11208 +       dput(dentry);
11209 + out:
11210 +       return err;
11211 +}
11212 +
11213 +/*
11214 + * return 0 if processed.
11215 + */
11216 +static int hin_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
11217 +                           const unsigned int isdir)
11218 +{
11219 +       int err;
11220 +       struct dentry *d;
11221 +       struct qstr *dname;
11222 +
11223 +       err = 1;
11224 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
11225 +               AuWarn("branch root dir was changed\n");
11226 +               err = 0;
11227 +               goto out;
11228 +       }
11229 +
11230 +       if (!isdir) {
11231 +               AuDebugOn(!name);
11232 +               au_iigen_dec(inode);
11233 +               spin_lock(&dcache_lock);
11234 +               list_for_each_entry(d, &inode->i_dentry, d_alias) {
11235 +                       dname = &d->d_name;
11236 +                       if (dname->len != nlen
11237 +                           && memcmp(dname->name, name, nlen))
11238 +                               continue;
11239 +                       err = 0;
11240 +                       spin_lock(&d->d_lock);
11241 +                       __d_drop(d);
11242 +                       au_digen_dec(d);
11243 +                       spin_unlock(&d->d_lock);
11244 +                       break;
11245 +               }
11246 +               spin_unlock(&dcache_lock);
11247 +       } else {
11248 +               au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIRS);
11249 +               d = d_find_alias(inode);
11250 +               if (!d) {
11251 +                       au_iigen_dec(inode);
11252 +                       goto out;
11253 +               }
11254 +
11255 +               dname = &d->d_name;
11256 +               if (dname->len == nlen && !memcmp(dname->name, name, nlen))
11257 +                       err = hin_gen_tree(d);
11258 +               dput(d);
11259 +       }
11260 +
11261 + out:
11262 +       AuTraceErr(err);
11263 +       return err;
11264 +}
11265 +
11266 +static int hin_gen_by_name(struct dentry *dentry, const unsigned int isdir)
11267 +{
11268 +       int err;
11269 +       struct inode *inode;
11270 +
11271 +       inode = dentry->d_inode;
11272 +       if (IS_ROOT(dentry)
11273 +           /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
11274 +               ) {
11275 +               AuWarn("branch root dir was changed\n");
11276 +               return 0;
11277 +       }
11278 +
11279 +       err = 0;
11280 +       if (!isdir) {
11281 +               d_drop(dentry);
11282 +               au_digen_dec(dentry);
11283 +               if (inode)
11284 +                       au_iigen_dec(inode);
11285 +       } else {
11286 +               au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS);
11287 +               if (inode)
11288 +                       err = hin_gen_tree(dentry);
11289 +       }
11290 +
11291 +       AuTraceErr(err);
11292 +       return err;
11293 +}
11294 +
11295 +/* ---------------------------------------------------------------------- */
11296 +
11297 +/* hinotify job flags */
11298 +#define AuHinJob_XINO0         1
11299 +#define AuHinJob_GEN           (1 << 1)
11300 +#define AuHinJob_DIRENT                (1 << 2)
11301 +#define AuHinJob_ISDIR         (1 << 3)
11302 +#define AuHinJob_TRYXINO0      (1 << 4)
11303 +#define AuHinJob_MNTPNT                (1 << 5)
11304 +#define au_ftest_hinjob(flags, name)   ((flags) & AuHinJob_##name)
11305 +#define au_fset_hinjob(flags, name)    { (flags) |= AuHinJob_##name; }
11306 +#define au_fclr_hinjob(flags, name)    { (flags) &= ~AuHinJob_##name; }
11307 +
11308 +struct hin_job_args {
11309 +       unsigned int flags;
11310 +       struct inode *inode, *h_inode, *dir, *h_dir;
11311 +       struct dentry *dentry;
11312 +       char *h_name;
11313 +       int h_nlen;
11314 +};
11315 +
11316 +static int hin_job(struct hin_job_args *a)
11317 +{
11318 +       const unsigned int isdir = au_ftest_hinjob(a->flags, ISDIR);
11319 +
11320 +       /* reset xino */
11321 +       if (au_ftest_hinjob(a->flags, XINO0) && a->inode)
11322 +               hin_xino(a->inode, a->h_inode); /* ignore this error */
11323 +
11324 +       if (au_ftest_hinjob(a->flags, TRYXINO0)
11325 +           && a->inode
11326 +           && a->h_inode) {
11327 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11328 +               if (!a->h_inode->i_nlink)
11329 +                       hin_xino(a->inode, a->h_inode); /* ignore this error */
11330 +               mutex_unlock(&a->h_inode->i_mutex);
11331 +       }
11332 +
11333 +       /* make the generation obsolete */
11334 +       if (au_ftest_hinjob(a->flags, GEN)) {
11335 +               int err = -1;
11336 +               if (a->inode)
11337 +                       err = hin_gen_by_inode(a->h_name, a->h_nlen, a->inode,
11338 +                                              isdir);
11339 +               if (err && a->dentry)
11340 +                       hin_gen_by_name(a->dentry, isdir);
11341 +               /* ignore this error */
11342 +       }
11343 +
11344 +       /* make dir entries obsolete */
11345 +       if (au_ftest_hinjob(a->flags, DIRENT) && a->inode) {
11346 +               struct au_vdir *vdir;
11347 +
11348 +               vdir = au_ivdir(a->inode);
11349 +               if (vdir)
11350 +                       vdir->vd_jiffy = 0;
11351 +               /* IMustLock(a->inode); */
11352 +               /* a->inode->i_version++; */
11353 +       }
11354 +
11355 +       /* can do nothing but warn */
11356 +       if (au_ftest_hinjob(a->flags, MNTPNT)
11357 +           && a->dentry
11358 +           && d_mountpoint(a->dentry))
11359 +               AuWarn("mount-point %.*s is removed or renamed\n",
11360 +                      AuDLNPair(a->dentry));
11361 +
11362 +       return 0;
11363 +}
11364 +
11365 +/* ---------------------------------------------------------------------- */
11366 +
11367 +static char *in_name(u32 mask)
11368 +{
11369 +#ifdef CONFIG_AUFS_DEBUG
11370 +#define test_ret(flag) if (mask & flag) \
11371 +                               return #flag;
11372 +       test_ret(IN_ACCESS);
11373 +       test_ret(IN_MODIFY);
11374 +       test_ret(IN_ATTRIB);
11375 +       test_ret(IN_CLOSE_WRITE);
11376 +       test_ret(IN_CLOSE_NOWRITE);
11377 +       test_ret(IN_OPEN);
11378 +       test_ret(IN_MOVED_FROM);
11379 +       test_ret(IN_MOVED_TO);
11380 +       test_ret(IN_CREATE);
11381 +       test_ret(IN_DELETE);
11382 +       test_ret(IN_DELETE_SELF);
11383 +       test_ret(IN_MOVE_SELF);
11384 +       test_ret(IN_UNMOUNT);
11385 +       test_ret(IN_Q_OVERFLOW);
11386 +       test_ret(IN_IGNORED);
11387 +       return "";
11388 +#undef test_ret
11389 +#else
11390 +       return "??";
11391 +#endif
11392 +}
11393 +
11394 +static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
11395 +                                          struct inode *dir)
11396 +{
11397 +       struct dentry *dentry, *d, *parent;
11398 +       struct qstr *dname;
11399 +
11400 +       parent = d_find_alias(dir);
11401 +       if (!parent)
11402 +               return NULL;
11403 +
11404 +       dentry = NULL;
11405 +       spin_lock(&dcache_lock);
11406 +       list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
11407 +               /* AuDbg("%.*s\n", AuDLNPair(d)); */
11408 +               dname = &d->d_name;
11409 +               if (dname->len != nlen || memcmp(dname->name, name, nlen))
11410 +                       continue;
11411 +               if (!atomic_read(&d->d_count) || !d->d_fsdata) {
11412 +                       spin_lock(&d->d_lock);
11413 +                       __d_drop(d);
11414 +                       spin_unlock(&d->d_lock);
11415 +                       continue;
11416 +               }
11417 +
11418 +               dentry = dget(d);
11419 +               break;
11420 +       }
11421 +       spin_unlock(&dcache_lock);
11422 +       dput(parent);
11423 +
11424 +       if (dentry)
11425 +               di_write_lock_child(dentry);
11426 +
11427 +       return dentry;
11428 +}
11429 +
11430 +static struct inode *lookup_wlock_by_ino(struct super_block *sb,
11431 +                                        aufs_bindex_t bindex, ino_t h_ino)
11432 +{
11433 +       struct inode *inode;
11434 +       ino_t ino;
11435 +       int err;
11436 +
11437 +       inode = NULL;
11438 +       err = au_xino_read(sb, bindex, h_ino, &ino);
11439 +       if (!err && ino)
11440 +               inode = ilookup(sb, ino);
11441 +       if (!inode)
11442 +               goto out;
11443 +
11444 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
11445 +               AuWarn("wrong root branch\n");
11446 +               iput(inode);
11447 +               inode = NULL;
11448 +               goto out;
11449 +       }
11450 +
11451 +       ii_write_lock_child(inode);
11452 +
11453 + out:
11454 +       return inode;
11455 +}
11456 +
11457 +enum { CHILD, PARENT };
11458 +struct postproc_args {
11459 +       struct inode *h_dir, *dir, *h_child_inode;
11460 +       u32 mask;
11461 +       unsigned int flags[2];
11462 +       unsigned int h_child_nlen;
11463 +       char h_child_name[];
11464 +};
11465 +
11466 +static void postproc(void *_args)
11467 +{
11468 +       struct postproc_args *a = _args;
11469 +       struct super_block *sb;
11470 +       aufs_bindex_t bindex, bend, bfound;
11471 +       unsigned char xino, try_iput;
11472 +       int err;
11473 +       struct inode *inode;
11474 +       ino_t h_ino;
11475 +       struct hin_job_args args;
11476 +       struct dentry *dentry;
11477 +       struct au_sbinfo *sbinfo;
11478 +
11479 +       AuDebugOn(!_args);
11480 +       AuDebugOn(!a->h_dir);
11481 +       AuDebugOn(!a->dir);
11482 +       AuDebugOn(!a->mask);
11483 +       AuDbg("mask 0x%x %s, i%lu, hi%lu, hci%lu\n",
11484 +             a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino,
11485 +             a->h_child_inode ? a->h_child_inode->i_ino : 0);
11486 +
11487 +       inode = NULL;
11488 +       dentry = NULL;
11489 +       /*
11490 +        * do not lock a->dir->i_mutex here
11491 +        * because of d_revalidate() may cause a deadlock.
11492 +        */
11493 +       sb = a->dir->i_sb;
11494 +       AuDebugOn(!sb);
11495 +       sbinfo = au_sbi(sb);
11496 +       AuDebugOn(!sbinfo);
11497 +       /* big aufs lock */
11498 +       si_noflush_write_lock(sb);
11499 +
11500 +       ii_read_lock_parent(a->dir);
11501 +       bfound = -1;
11502 +       bend = au_ibend(a->dir);
11503 +       for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
11504 +               if (au_h_iptr(a->dir, bindex) == a->h_dir) {
11505 +                       bfound = bindex;
11506 +                       break;
11507 +               }
11508 +       ii_read_unlock(a->dir);
11509 +       if (unlikely(bfound < 0))
11510 +               goto out;
11511 +
11512 +       xino = !!au_opt_test(au_mntflags(sb), XINO);
11513 +       h_ino = 0;
11514 +       if (a->h_child_inode)
11515 +               h_ino = a->h_child_inode->i_ino;
11516 +
11517 +       if (a->h_child_nlen
11518 +           && (au_ftest_hinjob(a->flags[CHILD], GEN)
11519 +               || au_ftest_hinjob(a->flags[CHILD], MNTPNT)))
11520 +               dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
11521 +                                             a->dir);
11522 +       try_iput = 0;
11523 +       if (dentry)
11524 +               inode = dentry->d_inode;
11525 +       if (xino && !inode && h_ino
11526 +           && (au_ftest_hinjob(a->flags[CHILD], XINO0)
11527 +               || au_ftest_hinjob(a->flags[CHILD], TRYXINO0)
11528 +               || au_ftest_hinjob(a->flags[CHILD], GEN))) {
11529 +               inode = lookup_wlock_by_ino(sb, bfound, h_ino);
11530 +               try_iput = 1;
11531 +           }
11532 +
11533 +       args.flags = a->flags[CHILD];
11534 +       args.dentry = dentry;
11535 +       args.inode = inode;
11536 +       args.h_inode = a->h_child_inode;
11537 +       args.dir = a->dir;
11538 +       args.h_dir = a->h_dir;
11539 +       args.h_name = a->h_child_name;
11540 +       args.h_nlen = a->h_child_nlen;
11541 +       err = hin_job(&args);
11542 +       if (dentry) {
11543 +               if (dentry->d_fsdata)
11544 +                       di_write_unlock(dentry);
11545 +               dput(dentry);
11546 +       }
11547 +       if (inode && try_iput) {
11548 +               ii_write_unlock(inode);
11549 +               iput(inode);
11550 +       }
11551 +
11552 +       ii_write_lock_parent(a->dir);
11553 +       args.flags = a->flags[PARENT];
11554 +       args.dentry = NULL;
11555 +       args.inode = a->dir;
11556 +       args.h_inode = a->h_dir;
11557 +       args.dir = NULL;
11558 +       args.h_dir = NULL;
11559 +       args.h_name = NULL;
11560 +       args.h_nlen = 0;
11561 +       err = hin_job(&args);
11562 +       ii_write_unlock(a->dir);
11563 +
11564 + out:
11565 +       au_nwt_done(&sbinfo->si_nowait);
11566 +       si_write_unlock(sb);
11567 +
11568 +       iput(a->h_child_inode);
11569 +       iput(a->h_dir);
11570 +       iput(a->dir);
11571 +       kfree(a);
11572 +}
11573 +
11574 +/* ---------------------------------------------------------------------- */
11575 +
11576 +static void aufs_inotify(struct inotify_watch *watch, u32 wd __maybe_unused,
11577 +                        u32 mask, u32 cookie __maybe_unused,
11578 +                        const char *h_child_name, struct inode *h_child_inode)
11579 +{
11580 +       struct au_hinotify *hinotify;
11581 +       struct postproc_args *args;
11582 +       int len, wkq_err;
11583 +       unsigned char isdir, isroot, wh;
11584 +       char *p;
11585 +       struct inode *dir;
11586 +       unsigned int flags[2];
11587 +
11588 +       /* if IN_UNMOUNT happens, there must be another bug */
11589 +       AuDebugOn(mask & IN_UNMOUNT);
11590 +       if (mask & (IN_IGNORED | IN_UNMOUNT)) {
11591 +               put_inotify_watch(watch);
11592 +               return;
11593 +       }
11594 +#ifdef AuDbgHinotify
11595 +       au_debug(1);
11596 +       if (1 || !h_child_name || strcmp(h_child_name, AUFS_XINO_FNAME)) {
11597 +               AuDbg("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s,"
11598 +                     " hi%lu\n",
11599 +                     watch->inode->i_ino, wd, mask, in_name(mask), cookie,
11600 +                     h_child_name ? h_child_name : "",
11601 +                     h_child_inode ? h_child_inode->i_ino : 0);
11602 +               WARN_ON(1);
11603 +       }
11604 +       au_debug(0);
11605 +#endif
11606 +
11607 +       hinotify = container_of(watch, struct au_hinotify, hin_watch);
11608 +       AuDebugOn(!hinotify || !hinotify->hin_aufs_inode);
11609 +       dir = igrab(hinotify->hin_aufs_inode);
11610 +       if (!dir)
11611 +               return;
11612 +
11613 +       isroot = (dir->i_ino == AUFS_ROOT_INO);
11614 +       len = 0;
11615 +       wh = 0;
11616 +       if (h_child_name) {
11617 +               len = strlen(h_child_name);
11618 +               if (!memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
11619 +                       h_child_name += AUFS_WH_PFX_LEN;
11620 +                       len -= AUFS_WH_PFX_LEN;
11621 +                       wh = 1;
11622 +               }
11623 +       }
11624 +
11625 +       isdir = 0;
11626 +       if (h_child_inode)
11627 +               isdir = !!S_ISDIR(h_child_inode->i_mode);
11628 +       flags[PARENT] = AuHinJob_ISDIR;
11629 +       flags[CHILD] = 0;
11630 +       if (isdir)
11631 +               flags[CHILD] = AuHinJob_ISDIR;
11632 +       switch (mask & IN_ALL_EVENTS) {
11633 +       case IN_MOVED_FROM:
11634 +       case IN_MOVED_TO:
11635 +               AuDebugOn(!h_child_name || !h_child_inode);
11636 +               au_fset_hinjob(flags[CHILD], GEN);
11637 +               au_fset_hinjob(flags[CHILD], XINO0);
11638 +               au_fset_hinjob(flags[CHILD], MNTPNT);
11639 +               au_fset_hinjob(flags[PARENT], DIRENT);
11640 +               break;
11641 +
11642 +       case IN_CREATE:
11643 +               AuDebugOn(!h_child_name || !h_child_inode);
11644 +               au_fset_hinjob(flags[PARENT], DIRENT);
11645 +               au_fset_hinjob(flags[CHILD], GEN);
11646 +               break;
11647 +
11648 +       case IN_DELETE:
11649 +               /*
11650 +                * aufs never be able to get this child inode.
11651 +                * revalidation should be in d_revalidate()
11652 +                * by checking i_nlink, i_generation or d_unhashed().
11653 +                */
11654 +               AuDebugOn(!h_child_name);
11655 +               au_fset_hinjob(flags[PARENT], DIRENT);
11656 +               au_fset_hinjob(flags[CHILD], GEN);
11657 +               au_fset_hinjob(flags[CHILD], TRYXINO0);
11658 +               au_fset_hinjob(flags[CHILD], MNTPNT);
11659 +               break;
11660 +
11661 +       default:
11662 +               AuDebugOn(1);
11663 +       }
11664 +
11665 +       if (wh)
11666 +               h_child_inode = NULL;
11667 +
11668 +       /* iput() and kfree() will be called in postproc() */
11669 +       /*
11670 +        * inotify_mutex is already acquired and kmalloc/prune_icache may lock
11671 +        * iprune_mutex. strange.
11672 +        */
11673 +       lockdep_off();
11674 +       args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
11675 +       lockdep_on();
11676 +       if (unlikely(!args)) {
11677 +               AuErr1("no memory\n");
11678 +               iput(dir);
11679 +               return;
11680 +       }
11681 +       args->flags[PARENT] = flags[PARENT];
11682 +       args->flags[CHILD] = flags[CHILD];
11683 +       args->mask = mask;
11684 +       args->dir = dir;
11685 +       args->h_dir = igrab(watch->inode);
11686 +       if (h_child_inode)
11687 +               h_child_inode = igrab(h_child_inode); /* can be NULL */
11688 +       args->h_child_inode = h_child_inode;
11689 +       args->h_child_nlen = len;
11690 +       if (len) {
11691 +               p = (void *)args;
11692 +               p += sizeof(*args);
11693 +               memcpy(p, h_child_name, len + 1);
11694 +       }
11695 +
11696 +       lockdep_off();
11697 +       wkq_err = au_wkq_nowait(postproc, args, dir->i_sb);
11698 +       lockdep_on();
11699 +       if (unlikely(wkq_err))
11700 +               AuErr("wkq %d\n", wkq_err);
11701 +}
11702 +
11703 +static void aufs_inotify_destroy(struct inotify_watch *watch __maybe_unused)
11704 +{
11705 +       return;
11706 +}
11707 +
11708 +static struct inotify_operations aufs_inotify_ops = {
11709 +       .handle_event   = aufs_inotify,
11710 +       .destroy_watch  = aufs_inotify_destroy
11711 +};
11712 +
11713 +/* ---------------------------------------------------------------------- */
11714 +
11715 +static void au_hin_destroy_cache(void)
11716 +{
11717 +       kmem_cache_destroy(au_cachep[AuCache_HINOTIFY]);
11718 +       au_cachep[AuCache_HINOTIFY] = NULL;
11719 +}
11720 +
11721 +int __init au_hinotify_init(void)
11722 +{
11723 +       int err;
11724 +
11725 +       err = -ENOMEM;
11726 +       au_cachep[AuCache_HINOTIFY] = AuCache(au_hinotify);
11727 +       if (au_cachep[AuCache_HINOTIFY]) {
11728 +               err = 0;
11729 +               au_hin_handle = inotify_init(&aufs_inotify_ops);
11730 +               if (IS_ERR(au_hin_handle)) {
11731 +                       err = PTR_ERR(au_hin_handle);
11732 +                       au_hin_destroy_cache();
11733 +               }
11734 +       }
11735 +       AuTraceErr(err);
11736 +       return err;
11737 +}
11738 +
11739 +void au_hinotify_fin(void)
11740 +{
11741 +       inotify_destroy(au_hin_handle);
11742 +       if (au_cachep[AuCache_HINOTIFY])
11743 +               au_hin_destroy_cache();
11744 +}
11745 diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c
11746 new file mode 100644
11747 index 0000000..f4f4912
11748 --- /dev/null
11749 +++ b/fs/aufs/i_op.c
11750 @@ -0,0 +1,875 @@
11751 +/*
11752 + * Copyright (C) 2005-2009 Junjiro R. Okajima
11753 + *
11754 + * This program, aufs is free software; you can redistribute it and/or modify
11755 + * it under the terms of the GNU General Public License as published by
11756 + * the Free Software Foundation; either version 2 of the License, or
11757 + * (at your option) any later version.
11758 + *
11759 + * This program is distributed in the hope that it will be useful,
11760 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11761 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11762 + * GNU General Public License for more details.
11763 + *
11764 + * You should have received a copy of the GNU General Public License
11765 + * along with this program; if not, write to the Free Software
11766 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
11767 + */
11768 +
11769 +/*
11770 + * inode operations (except add/del/rename)
11771 + */
11772 +
11773 +#include <linux/device_cgroup.h>
11774 +#include <linux/fs_stack.h>
11775 +#include <linux/mm.h>
11776 +#include <linux/namei.h>
11777 +#include <linux/security.h>
11778 +#include <linux/uaccess.h>
11779 +#include "aufs.h"
11780 +
11781 +static int h_permission(struct inode *h_inode, int mask,
11782 +                       struct vfsmount *h_mnt, int brperm)
11783 +{
11784 +       int err;
11785 +       const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
11786 +
11787 +       err = -EACCES;
11788 +       if ((write_mask && IS_IMMUTABLE(h_inode))
11789 +           || ((mask & MAY_EXEC)
11790 +               && S_ISREG(h_inode->i_mode)
11791 +               && ((h_mnt->mnt_flags & MNT_NOEXEC)
11792 +                   || !(h_inode->i_mode & S_IXUGO))))
11793 +               goto out;
11794 +
11795 +       /*
11796 +        * - skip the lower fs test in the case of write to ro branch.
11797 +        * - nfs dir permission write check is optimized, but a policy for
11798 +        *   link/rename requires a real check.
11799 +        */
11800 +       if ((write_mask && !au_br_writable(brperm))
11801 +           || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
11802 +               && write_mask && !(mask & MAY_READ))
11803 +           || !h_inode->i_op
11804 +           || !h_inode->i_op->permission) {
11805 +               /* AuLabel(generic_permission); */
11806 +               err = generic_permission(h_inode, mask, NULL);
11807 +       } else {
11808 +               /* AuLabel(h_inode->permission); */
11809 +               err = h_inode->i_op->permission(h_inode, mask);
11810 +               AuTraceErr(err);
11811 +       }
11812 +
11813 +       if (!err)
11814 +               err = devcgroup_inode_permission(h_inode, mask);
11815 +       if (!err)
11816 +               err = security_inode_permission
11817 +                       (h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC
11818 +                                         | MAY_APPEND));
11819 +
11820 + out:
11821 +       return err;
11822 +}
11823 +
11824 +static int aufs_permission(struct inode *inode, int mask)
11825 +{
11826 +       int err;
11827 +       aufs_bindex_t bindex, bend;
11828 +       const unsigned char isdir = !!S_ISDIR(inode->i_mode);
11829 +       const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
11830 +       struct inode *h_inode;
11831 +       struct super_block *sb;
11832 +       struct au_branch *br;
11833 +
11834 +       sb = inode->i_sb;
11835 +       si_read_lock(sb, AuLock_FLUSH);
11836 +       ii_read_lock_child(inode);
11837 +
11838 +       if (!isdir || write_mask) {
11839 +               err = au_busy_or_stale();
11840 +               h_inode = au_h_iptr(inode, au_ibstart(inode));
11841 +               if (unlikely(!h_inode
11842 +                            || (h_inode->i_mode & S_IFMT)
11843 +                            != (inode->i_mode & S_IFMT)))
11844 +                       goto out;
11845 +
11846 +               err = 0;
11847 +               bindex = au_ibstart(inode);
11848 +               br = au_sbr(sb, bindex);
11849 +               err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
11850 +               if (write_mask && !err) {
11851 +                       /* test whether the upper writable branch exists */
11852 +                       err = -EROFS;
11853 +                       for (; bindex >= 0; bindex--)
11854 +                               if (!au_br_rdonly(au_sbr(sb, bindex))) {
11855 +                                       err = 0;
11856 +                                       break;
11857 +                               }
11858 +               }
11859 +               goto out;
11860 +       }
11861 +
11862 +       /* non-write to dir */
11863 +       err = 0;
11864 +       bend = au_ibend(inode);
11865 +       for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
11866 +               h_inode = au_h_iptr(inode, bindex);
11867 +               if (h_inode) {
11868 +                       err = au_busy_or_stale();
11869 +                       if (unlikely(!S_ISDIR(h_inode->i_mode)))
11870 +                               break;
11871 +
11872 +                       br = au_sbr(sb, bindex);
11873 +                       err = h_permission(h_inode, mask, br->br_mnt,
11874 +                                          br->br_perm);
11875 +               }
11876 +       }
11877 +
11878 + out:
11879 +       ii_read_unlock(inode);
11880 +       si_read_unlock(sb);
11881 +       return err;
11882 +}
11883 +
11884 +/* ---------------------------------------------------------------------- */
11885 +
11886 +static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
11887 +                                 struct nameidata *nd)
11888 +{
11889 +       struct dentry *ret, *parent;
11890 +       struct inode *inode, *h_inode;
11891 +       struct mutex *mtx;
11892 +       struct super_block *sb;
11893 +       int err, npositive;
11894 +       aufs_bindex_t bstart;
11895 +
11896 +       IMustLock(dir);
11897 +
11898 +       sb = dir->i_sb;
11899 +       si_read_lock(sb, AuLock_FLUSH);
11900 +       ret = ERR_PTR(-ENAMETOOLONG);
11901 +       if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
11902 +               goto out;
11903 +       err = au_alloc_dinfo(dentry);
11904 +       ret = ERR_PTR(err);
11905 +       if (unlikely(err))
11906 +               goto out;
11907 +
11908 +       parent = dentry->d_parent; /* dir inode is locked */
11909 +       di_read_lock_parent(parent, AuLock_IR);
11910 +       npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd);
11911 +       di_read_unlock(parent, AuLock_IR);
11912 +       err = npositive;
11913 +       ret = ERR_PTR(err);
11914 +       if (unlikely(err < 0))
11915 +               goto out_unlock;
11916 +
11917 +       inode = NULL;
11918 +       if (npositive) {
11919 +               bstart = au_dbstart(dentry);
11920 +               h_inode = au_h_dptr(dentry, bstart)->d_inode;
11921 +               if (!S_ISDIR(h_inode->i_mode)) {
11922 +                       /*
11923 +                        * stop 'race'-ing between hardlinks under different
11924 +                        * parents.
11925 +                        */
11926 +                       mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
11927 +                       mutex_lock(mtx);
11928 +                       inode = au_new_inode(dentry, /*must_new*/0);
11929 +                       mutex_unlock(mtx);
11930 +               } else
11931 +                       inode = au_new_inode(dentry, /*must_new*/0);
11932 +               ret = (void *)inode;
11933 +       }
11934 +       if (IS_ERR(inode))
11935 +               goto out_unlock;
11936 +
11937 +       ret = d_splice_alias(inode, dentry);
11938 +       if (unlikely(IS_ERR(ret) && inode))
11939 +               ii_write_unlock(inode);
11940 +       au_store_oflag(nd, inode);
11941 +
11942 + out_unlock:
11943 +       di_write_unlock(dentry);
11944 + out:
11945 +       si_read_unlock(sb);
11946 +       return ret;
11947 +}
11948 +
11949 +/* ---------------------------------------------------------------------- */
11950 +
11951 +static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
11952 +                         const unsigned char add_entry, aufs_bindex_t bcpup,
11953 +                         aufs_bindex_t bstart)
11954 +{
11955 +       int err;
11956 +       struct dentry *h_parent;
11957 +       struct inode *h_dir;
11958 +
11959 +       if (add_entry) {
11960 +               au_update_dbstart(dentry);
11961 +               IMustLock(parent->d_inode);
11962 +       } else
11963 +               di_write_lock_parent(parent);
11964 +
11965 +       err = 0;
11966 +       if (!au_h_dptr(parent, bcpup)) {
11967 +               if (bstart < bcpup)
11968 +                       err = au_cpdown_dirs(dentry, bcpup);
11969 +               else
11970 +                       err = au_cpup_dirs(dentry, bcpup);
11971 +       }
11972 +       if (!err && add_entry) {
11973 +               h_parent = au_h_dptr(parent, bcpup);
11974 +               h_dir = h_parent->d_inode;
11975 +               mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
11976 +               err = au_lkup_neg(dentry, bcpup);
11977 +               /* todo: no unlock here */
11978 +               mutex_unlock(&h_dir->i_mutex);
11979 +               if (bstart < bcpup && au_dbstart(dentry) < 0) {
11980 +                       au_set_dbstart(dentry, 0);
11981 +                       au_update_dbrange(dentry, /*do_put_zero*/0);
11982 +               }
11983 +       }
11984 +
11985 +       if (!add_entry)
11986 +               di_write_unlock(parent);
11987 +       if (!err)
11988 +               err = bcpup; /* success */
11989 +
11990 +       return err;
11991 +}
11992 +
11993 +/*
11994 + * decide the branch and the parent dir where we will create a new entry.
11995 + * returns new bindex or an error.
11996 + * copyup the parent dir if needed.
11997 + */
11998 +int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
11999 +             struct au_wr_dir_args *args)
12000 +{
12001 +       int err;
12002 +       aufs_bindex_t bcpup, bstart, src_bstart;
12003 +       const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
12004 +                                                        ADD_ENTRY);
12005 +       struct super_block *sb;
12006 +       struct dentry *parent;
12007 +       struct au_sbinfo *sbinfo;
12008 +
12009 +       sb = dentry->d_sb;
12010 +       sbinfo = au_sbi(sb);
12011 +       parent = dget_parent(dentry);
12012 +       bstart = au_dbstart(dentry);
12013 +       bcpup = bstart;
12014 +       if (args->force_btgt < 0) {
12015 +               if (src_dentry) {
12016 +                       src_bstart = au_dbstart(src_dentry);
12017 +                       if (src_bstart < bstart)
12018 +                               bcpup = src_bstart;
12019 +               } else if (add_entry) {
12020 +                       err = AuWbrCreate(sbinfo, dentry,
12021 +                                         au_ftest_wrdir(args->flags, ISDIR));
12022 +                       bcpup = err;
12023 +               }
12024 +
12025 +               if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
12026 +                       if (add_entry)
12027 +                               err = AuWbrCopyup(sbinfo, dentry);
12028 +                       else {
12029 +                               if (!IS_ROOT(dentry)) {
12030 +                                       di_read_lock_parent(parent, !AuLock_IR);
12031 +                                       err = AuWbrCopyup(sbinfo, dentry);
12032 +                                       di_read_unlock(parent, !AuLock_IR);
12033 +                               } else
12034 +                                       err = AuWbrCopyup(sbinfo, dentry);
12035 +                       }
12036 +                       bcpup = err;
12037 +                       if (unlikely(err < 0))
12038 +                               goto out;
12039 +               }
12040 +       } else {
12041 +               bcpup = args->force_btgt;
12042 +               AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
12043 +       }
12044 +       AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
12045 +       if (bstart < bcpup)
12046 +               au_update_dbrange(dentry, /*do_put_zero*/1);
12047 +
12048 +       err = bcpup;
12049 +       if (bcpup == bstart)
12050 +               goto out; /* success */
12051 +
12052 +       /* copyup the new parent into the branch we process */
12053 +       err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
12054 +
12055 + out:
12056 +       dput(parent);
12057 +       return err;
12058 +}
12059 +
12060 +/* ---------------------------------------------------------------------- */
12061 +
12062 +struct dentry *au_pinned_h_parent(struct au_pin *pin)
12063 +{
12064 +       if (pin && pin->parent)
12065 +               return au_h_dptr(pin->parent, pin->bindex);
12066 +       return NULL;
12067 +}
12068 +
12069 +void au_unpin(struct au_pin *p)
12070 +{
12071 +       if (au_ftest_pin(p->flags, MNT_WRITE))
12072 +               mnt_drop_write(p->h_mnt);
12073 +       if (!p->hdir)
12074 +               return;
12075 +
12076 +       au_hin_imtx_unlock(p->hdir);
12077 +       if (!au_ftest_pin(p->flags, DI_LOCKED))
12078 +               di_read_unlock(p->parent, AuLock_IR);
12079 +       iput(p->hdir->hi_inode);
12080 +       dput(p->parent);
12081 +       p->parent = NULL;
12082 +       p->hdir = NULL;
12083 +       p->h_mnt = NULL;
12084 +}
12085 +
12086 +int au_do_pin(struct au_pin *p)
12087 +{
12088 +       int err;
12089 +       struct super_block *sb;
12090 +       struct dentry *h_dentry, *h_parent;
12091 +       struct au_branch *br;
12092 +       struct inode *h_dir;
12093 +
12094 +       err = 0;
12095 +       sb = p->dentry->d_sb;
12096 +       br = au_sbr(sb, p->bindex);
12097 +       if (IS_ROOT(p->dentry)) {
12098 +               if (au_ftest_pin(p->flags, MNT_WRITE)) {
12099 +                       p->h_mnt = br->br_mnt;
12100 +                       err = mnt_want_write(p->h_mnt);
12101 +                       if (unlikely(err)) {
12102 +                               au_fclr_pin(p->flags, MNT_WRITE);
12103 +                               goto out_err;
12104 +                       }
12105 +               }
12106 +               goto out;
12107 +       }
12108 +
12109 +       h_dentry = NULL;
12110 +       if (p->bindex <= au_dbend(p->dentry))
12111 +               h_dentry = au_h_dptr(p->dentry, p->bindex);
12112 +
12113 +       p->parent = dget_parent(p->dentry);
12114 +       if (!au_ftest_pin(p->flags, DI_LOCKED))
12115 +               di_read_lock(p->parent, AuLock_IR, p->lsc_di);
12116 +
12117 +       h_dir = NULL;
12118 +       h_parent = au_h_dptr(p->parent, p->bindex);
12119 +       p->hdir = au_hi(p->parent->d_inode, p->bindex);
12120 +       if (p->hdir)
12121 +               h_dir = p->hdir->hi_inode;
12122 +
12123 +       /* udba case */
12124 +       if (unlikely(!p->hdir || !h_dir)) {
12125 +               if (!au_ftest_pin(p->flags, DI_LOCKED))
12126 +                       di_read_unlock(p->parent, AuLock_IR);
12127 +               dput(p->parent);
12128 +               p->parent = NULL;
12129 +               goto out_err;
12130 +       }
12131 +
12132 +       au_igrab(h_dir);
12133 +       au_hin_imtx_lock_nested(p->hdir, p->lsc_hi);
12134 +
12135 +       if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
12136 +               err = -EBUSY;
12137 +               goto out_unpin;
12138 +       }
12139 +       if (h_dentry) {
12140 +               err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
12141 +               if (unlikely(err)) {
12142 +                       au_fclr_pin(p->flags, MNT_WRITE);
12143 +                       goto out_unpin;
12144 +               }
12145 +       }
12146 +
12147 +       if (au_ftest_pin(p->flags, MNT_WRITE)) {
12148 +               p->h_mnt = br->br_mnt;
12149 +               err = mnt_want_write(p->h_mnt);
12150 +               if (unlikely(err)) {
12151 +                       au_fclr_pin(p->flags, MNT_WRITE);
12152 +                       goto out_unpin;
12153 +               }
12154 +       }
12155 +       goto out; /* success */
12156 +
12157 + out_unpin:
12158 +       au_unpin(p);
12159 + out_err:
12160 +       AuErr("err %d\n", err);
12161 +       err = au_busy_or_stale();
12162 + out:
12163 +       return err;
12164 +}
12165 +
12166 +void au_pin_init(struct au_pin *p, struct dentry *dentry,
12167 +                aufs_bindex_t bindex, int lsc_di, int lsc_hi,
12168 +                unsigned int udba, unsigned char flags)
12169 +{
12170 +       p->dentry = dentry;
12171 +       p->udba = udba;
12172 +       p->lsc_di = lsc_di;
12173 +       p->lsc_hi = lsc_hi;
12174 +       p->flags = flags;
12175 +       p->bindex = bindex;
12176 +
12177 +       p->parent = NULL;
12178 +       p->hdir = NULL;
12179 +       p->h_mnt = NULL;
12180 +}
12181 +
12182 +int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
12183 +          unsigned int udba, unsigned char flags)
12184 +{
12185 +       au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
12186 +                   udba, flags);
12187 +       return au_do_pin(pin);
12188 +}
12189 +
12190 +/* ---------------------------------------------------------------------- */
12191 +
12192 +#define AuIcpup_DID_CPUP       1
12193 +#define au_ftest_icpup(flags, name)    ((flags) & AuIcpup_##name)
12194 +#define au_fset_icpup(flags, name)     { (flags) |= AuIcpup_##name; }
12195 +#define au_fclr_icpup(flags, name)     { (flags) &= ~AuIcpup_##name; }
12196 +
12197 +struct au_icpup_args {
12198 +       unsigned char flags;
12199 +       unsigned char pin_flags;
12200 +       aufs_bindex_t btgt;
12201 +       struct au_pin pin;
12202 +       struct path h_path;
12203 +       struct inode *h_inode;
12204 +};
12205 +
12206 +static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia,
12207 +                            struct au_icpup_args *a)
12208 +{
12209 +       int err;
12210 +       unsigned int udba;
12211 +       loff_t sz;
12212 +       aufs_bindex_t bstart;
12213 +       struct dentry *hi_wh, *parent;
12214 +       struct inode *inode;
12215 +       struct au_wr_dir_args wr_dir_args = {
12216 +               .force_btgt     = -1,
12217 +               .flags          = 0
12218 +       };
12219 +
12220 +       di_write_lock_child(dentry);
12221 +       bstart = au_dbstart(dentry);
12222 +       inode = dentry->d_inode;
12223 +       if (S_ISDIR(inode->i_mode))
12224 +               au_fset_wrdir(wr_dir_args.flags, ISDIR);
12225 +       /* plink or hi_wh() case */
12226 +       if (bstart != au_ibstart(inode))
12227 +               wr_dir_args.force_btgt = au_ibstart(inode);
12228 +       err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
12229 +       if (unlikely(err < 0))
12230 +               goto out_dentry;
12231 +       a->btgt = err;
12232 +       if (err != bstart)
12233 +               au_fset_icpup(a->flags, DID_CPUP);
12234 +
12235 +       err = 0;
12236 +       a->pin_flags = AuPin_MNT_WRITE;
12237 +       parent = NULL;
12238 +       if (!IS_ROOT(dentry)) {
12239 +               au_fset_pin(a->pin_flags, DI_LOCKED);
12240 +               parent = dget_parent(dentry);
12241 +               di_write_lock_parent(parent);
12242 +       }
12243 +
12244 +       udba = au_opt_udba(dentry->d_sb);
12245 +       if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE))
12246 +               udba = AuOpt_UDBA_NONE;
12247 +       err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags);
12248 +       if (unlikely(err)) {
12249 +               if (parent) {
12250 +                       di_write_unlock(parent);
12251 +                       dput(parent);
12252 +               }
12253 +               goto out_dentry;
12254 +       }
12255 +       a->h_path.dentry = au_h_dptr(dentry, bstart);
12256 +       a->h_inode = a->h_path.dentry->d_inode;
12257 +       mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
12258 +       sz = -1;
12259 +       if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
12260 +               sz = ia->ia_size;
12261 +
12262 +       hi_wh = NULL;
12263 +       if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) {
12264 +               hi_wh = au_hi_wh(inode, a->btgt);
12265 +               if (!hi_wh) {
12266 +                       err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
12267 +                       if (unlikely(err))
12268 +                               goto out_unlock;
12269 +                       hi_wh = au_hi_wh(inode, a->btgt);
12270 +                       /* todo: revalidate hi_wh? */
12271 +               }
12272 +       }
12273 +
12274 +       if (parent) {
12275 +               au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
12276 +               di_downgrade_lock(parent, AuLock_IR);
12277 +               dput(parent);
12278 +       }
12279 +       if (!au_ftest_icpup(a->flags, DID_CPUP))
12280 +               goto out; /* success */
12281 +
12282 +       if (!d_unhashed(dentry)) {
12283 +               err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME);
12284 +               if (!err)
12285 +                       a->h_path.dentry = au_h_dptr(dentry, a->btgt);
12286 +       } else if (!hi_wh)
12287 +               a->h_path.dentry = au_h_dptr(dentry, a->btgt);
12288 +       else
12289 +               a->h_path.dentry = hi_wh; /* do not dget here */
12290 +
12291 + out_unlock:
12292 +       mutex_unlock(&a->h_inode->i_mutex);
12293 +       a->h_inode = a->h_path.dentry->d_inode;
12294 +       if (!err) {
12295 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
12296 +               goto out; /* success */
12297 +       }
12298 +
12299 +       au_unpin(&a->pin);
12300 +
12301 + out_dentry:
12302 +       di_write_unlock(dentry);
12303 + out:
12304 +       return err;
12305 +}
12306 +
12307 +static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
12308 +{
12309 +       int err;
12310 +       struct inode *inode;
12311 +       struct super_block *sb;
12312 +       struct file *file;
12313 +       struct au_icpup_args *a;
12314 +
12315 +       err = -ENOMEM;
12316 +       a = kzalloc(sizeof(*a), GFP_NOFS);
12317 +       if (unlikely(!a))
12318 +               goto out;
12319 +
12320 +       inode = dentry->d_inode;
12321 +       IMustLock(inode);
12322 +       sb = dentry->d_sb;
12323 +       si_read_lock(sb, AuLock_FLUSH);
12324 +
12325 +       file = NULL;
12326 +       if (ia->ia_valid & ATTR_FILE) {
12327 +               /* currently ftruncate(2) only */
12328 +               file = ia->ia_file;
12329 +               fi_write_lock(file);
12330 +               ia->ia_file = au_h_fptr(file, au_fbstart(file));
12331 +       }
12332 +
12333 +       if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
12334 +               ia->ia_valid &= ~ATTR_MODE;
12335 +
12336 +       err = au_lock_and_icpup(dentry, ia, a);
12337 +       if (unlikely(err < 0))
12338 +               goto out_si;
12339 +       if (au_ftest_icpup(a->flags, DID_CPUP)) {
12340 +               ia->ia_file = NULL;
12341 +               ia->ia_valid &= ~ATTR_FILE;
12342 +       }
12343 +
12344 +       a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
12345 +       if (ia->ia_valid & ATTR_SIZE) {
12346 +               struct file *f;
12347 +
12348 +               if (ia->ia_size < i_size_read(inode)) {
12349 +                       /* unmap only */
12350 +                       err = vmtruncate(inode, ia->ia_size);
12351 +                       if (unlikely(err))
12352 +                               goto out_unlock;
12353 +               }
12354 +
12355 +               f = NULL;
12356 +               if (ia->ia_valid & ATTR_FILE)
12357 +                       f = ia->ia_file;
12358 +               mutex_unlock(&a->h_inode->i_mutex);
12359 +               err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
12360 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
12361 +       } else
12362 +               err = vfsub_notify_change(&a->h_path, ia);
12363 +       if (!err)
12364 +               au_cpup_attr_changeable(inode);
12365 +
12366 + out_unlock:
12367 +       mutex_unlock(&a->h_inode->i_mutex);
12368 +       au_unpin(&a->pin);
12369 +       di_write_unlock(dentry);
12370 + out_si:
12371 +       if (file) {
12372 +               fi_write_unlock(file);
12373 +               ia->ia_file = file;
12374 +               ia->ia_valid |= ATTR_FILE;
12375 +       }
12376 +       si_read_unlock(sb);
12377 +       kfree(a);
12378 + out:
12379 +       return err;
12380 +}
12381 +
12382 +static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen)
12383 +{
12384 +       int err;
12385 +       struct inode *inode;
12386 +       struct dentry *parent;
12387 +
12388 +       err = 0;
12389 +       inode = dentry->d_inode;
12390 +       di_write_lock_child(dentry);
12391 +       if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) {
12392 +               parent = dget_parent(dentry);
12393 +               di_read_lock_parent(parent, AuLock_IR);
12394 +               /* returns a number of positive dentries */
12395 +               err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
12396 +               if (err >= 0)
12397 +                       err = au_refresh_hinode(inode, dentry);
12398 +               di_read_unlock(parent, AuLock_IR);
12399 +               dput(parent);
12400 +       }
12401 +       di_downgrade_lock(dentry, AuLock_IR);
12402 +       if (unlikely(err))
12403 +               di_read_unlock(dentry, AuLock_IR);
12404 +
12405 +       AuTraceErr(err);
12406 +       return err;
12407 +}
12408 +
12409 +static void au_refresh_iattr(struct inode *inode, struct kstat *st,
12410 +                            unsigned int nlink)
12411 +{
12412 +       inode->i_mode = st->mode;
12413 +       inode->i_uid = st->uid;
12414 +       inode->i_gid = st->gid;
12415 +       inode->i_atime = st->atime;
12416 +       inode->i_mtime = st->mtime;
12417 +       inode->i_ctime = st->ctime;
12418 +
12419 +       au_cpup_attr_nlink(inode, /*force*/0);
12420 +       if (S_ISDIR(inode->i_mode)) {
12421 +               inode->i_nlink -= nlink;
12422 +               inode->i_nlink += st->nlink;
12423 +       }
12424 +
12425 +       spin_lock(&inode->i_lock);
12426 +       inode->i_blocks = st->blocks;
12427 +       i_size_write(inode, st->size);
12428 +       spin_unlock(&inode->i_lock);
12429 +}
12430 +
12431 +static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
12432 +                       struct dentry *dentry, struct kstat *st)
12433 +{
12434 +       int err;
12435 +       unsigned int mnt_flags;
12436 +       aufs_bindex_t bindex;
12437 +       unsigned char udba_none, positive;
12438 +       struct super_block *sb, *h_sb;
12439 +       struct inode *inode;
12440 +       struct vfsmount *h_mnt;
12441 +       struct dentry *h_dentry;
12442 +
12443 +       err = 0;
12444 +       sb = dentry->d_sb;
12445 +       inode = dentry->d_inode;
12446 +       si_read_lock(sb, AuLock_FLUSH);
12447 +       mnt_flags = au_mntflags(sb);
12448 +       udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
12449 +
12450 +       /* support fstat(2) */
12451 +       if (!d_unhashed(dentry) && !udba_none) {
12452 +               unsigned int sigen = au_sigen(sb);
12453 +               if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
12454 +                       di_read_lock_child(dentry, AuLock_IR);
12455 +               else {
12456 +                       AuDebugOn(IS_ROOT(dentry));
12457 +                       err = au_getattr_lock_reval(dentry, sigen);
12458 +                       if (unlikely(err))
12459 +                               goto out;
12460 +               }
12461 +       } else
12462 +               di_read_lock_child(dentry, AuLock_IR);
12463 +
12464 +       bindex = au_ibstart(inode);
12465 +       h_mnt = au_sbr_mnt(sb, bindex);
12466 +       h_sb = h_mnt->mnt_sb;
12467 +       if (!au_test_fs_bad_iattr(h_sb) && udba_none)
12468 +               goto out_fill; /* success */
12469 +
12470 +       h_dentry = NULL;
12471 +       if (au_dbstart(dentry) == bindex)
12472 +               h_dentry = dget(au_h_dptr(dentry, bindex));
12473 +       else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
12474 +               h_dentry = au_plink_lkup(inode, bindex);
12475 +               if (IS_ERR(h_dentry))
12476 +                       goto out_fill; /* pretending success */
12477 +       }
12478 +       /* illegally overlapped or something */
12479 +       if (unlikely(!h_dentry))
12480 +               goto out_fill; /* pretending success */
12481 +
12482 +       positive = !!h_dentry->d_inode;
12483 +       if (positive)
12484 +               err = vfs_getattr(h_mnt, h_dentry, st);
12485 +       dput(h_dentry);
12486 +       if (!err) {
12487 +               if (positive)
12488 +                       au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
12489 +               goto out_fill; /* success */
12490 +       }
12491 +       goto out_unlock;
12492 +
12493 + out_fill:
12494 +       generic_fillattr(inode, st);
12495 + out_unlock:
12496 +       di_read_unlock(dentry, AuLock_IR);
12497 + out:
12498 +       si_read_unlock(sb);
12499 +       return err;
12500 +}
12501 +
12502 +/* ---------------------------------------------------------------------- */
12503 +
12504 +static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
12505 +                     int bufsiz)
12506 +{
12507 +       int err;
12508 +       struct super_block *sb;
12509 +       struct dentry *h_dentry;
12510 +
12511 +       err = -EINVAL;
12512 +       h_dentry = au_h_dptr(dentry, bindex);
12513 +       if (unlikely(/* !h_dentry
12514 +                    || !h_dentry->d_inode
12515 +                    || */ !h_dentry->d_inode->i_op
12516 +                    || !h_dentry->d_inode->i_op->readlink))
12517 +               goto out;
12518 +
12519 +       err = security_inode_readlink(h_dentry);
12520 +       if (unlikely(err))
12521 +               goto out;
12522 +
12523 +       sb = dentry->d_sb;
12524 +       if (!au_test_ro(sb, bindex, dentry->d_inode)) {
12525 +               vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
12526 +               fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
12527 +       }
12528 +       err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
12529 +
12530 + out:
12531 +       return err;
12532 +}
12533 +
12534 +static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
12535 +{
12536 +       int err;
12537 +
12538 +       aufs_read_lock(dentry, AuLock_IR);
12539 +       err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
12540 +       aufs_read_unlock(dentry, AuLock_IR);
12541 +
12542 +       return err;
12543 +}
12544 +
12545 +static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
12546 +{
12547 +       int err;
12548 +       char *buf;
12549 +       mm_segment_t old_fs;
12550 +
12551 +       err = -ENOMEM;
12552 +       buf = __getname();
12553 +       if (unlikely(!buf))
12554 +               goto out;
12555 +
12556 +       aufs_read_lock(dentry, AuLock_IR);
12557 +       old_fs = get_fs();
12558 +       set_fs(KERNEL_DS);
12559 +       err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf,
12560 +                        PATH_MAX);
12561 +       set_fs(old_fs);
12562 +       aufs_read_unlock(dentry, AuLock_IR);
12563 +
12564 +       if (err >= 0) {
12565 +               buf[err] = 0;
12566 +               /* will be freed by put_link */
12567 +               nd_set_link(nd, buf);
12568 +               return NULL; /* success */
12569 +       }
12570 +       __putname(buf);
12571 +
12572 + out:
12573 +       path_put(&nd->path);
12574 +       AuTraceErr(err);
12575 +       return ERR_PTR(err);
12576 +}
12577 +
12578 +static void aufs_put_link(struct dentry *dentry __maybe_unused,
12579 +                         struct nameidata *nd, void *cookie __maybe_unused)
12580 +{
12581 +       __putname(nd_get_link(nd));
12582 +}
12583 +
12584 +/* ---------------------------------------------------------------------- */
12585 +
12586 +static void aufs_truncate_range(struct inode *inode __maybe_unused,
12587 +                               loff_t start __maybe_unused,
12588 +                               loff_t end __maybe_unused)
12589 +{
12590 +       AuUnsupport();
12591 +}
12592 +
12593 +/* ---------------------------------------------------------------------- */
12594 +
12595 +struct inode_operations aufs_symlink_iop = {
12596 +       .permission     = aufs_permission,
12597 +       .setattr        = aufs_setattr,
12598 +       .getattr        = aufs_getattr,
12599 +       .readlink       = aufs_readlink,
12600 +       .follow_link    = aufs_follow_link,
12601 +       .put_link       = aufs_put_link
12602 +};
12603 +
12604 +struct inode_operations aufs_dir_iop = {
12605 +       .create         = aufs_create,
12606 +       .lookup         = aufs_lookup,
12607 +       .link           = aufs_link,
12608 +       .unlink         = aufs_unlink,
12609 +       .symlink        = aufs_symlink,
12610 +       .mkdir          = aufs_mkdir,
12611 +       .rmdir          = aufs_rmdir,
12612 +       .mknod          = aufs_mknod,
12613 +       .rename         = aufs_rename,
12614 +
12615 +       .permission     = aufs_permission,
12616 +       .setattr        = aufs_setattr,
12617 +       .getattr        = aufs_getattr
12618 +};
12619 +
12620 +struct inode_operations aufs_iop = {
12621 +       .permission     = aufs_permission,
12622 +       .setattr        = aufs_setattr,
12623 +       .getattr        = aufs_getattr,
12624 +       .truncate_range = aufs_truncate_range
12625 +};
12626 diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
12627 new file mode 100644
12628 index 0000000..813890f
12629 --- /dev/null
12630 +++ b/fs/aufs/i_op_add.c
12631 @@ -0,0 +1,658 @@
12632 +/*
12633 + * Copyright (C) 2005-2009 Junjiro R. Okajima
12634 + *
12635 + * This program, aufs is free software; you can redistribute it and/or modify
12636 + * it under the terms of the GNU General Public License as published by
12637 + * the Free Software Foundation; either version 2 of the License, or
12638 + * (at your option) any later version.
12639 + *
12640 + * This program is distributed in the hope that it will be useful,
12641 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
12642 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12643 + * GNU General Public License for more details.
12644 + *
12645 + * You should have received a copy of the GNU General Public License
12646 + * along with this program; if not, write to the Free Software
12647 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
12648 + */
12649 +
12650 +/*
12651 + * inode operations (add entry)
12652 + */
12653 +
12654 +#include "aufs.h"
12655 +
12656 +/*
12657 + * final procedure of adding a new entry, except link(2).
12658 + * remove whiteout, instantiate, copyup the parent dir's times and size
12659 + * and update version.
12660 + * if it failed, re-create the removed whiteout.
12661 + */
12662 +static int epilog(struct inode *dir, aufs_bindex_t bindex,
12663 +                 struct dentry *wh_dentry, struct dentry *dentry)
12664 +{
12665 +       int err, rerr;
12666 +       aufs_bindex_t bwh;
12667 +       struct path h_path;
12668 +       struct inode *inode, *h_dir;
12669 +       struct dentry *wh;
12670 +
12671 +       bwh = -1;
12672 +       if (wh_dentry) {
12673 +               h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
12674 +               IMustLock(h_dir);
12675 +               AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
12676 +               bwh = au_dbwh(dentry);
12677 +               h_path.dentry = wh_dentry;
12678 +               h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
12679 +               err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
12680 +                                         dentry);
12681 +               if (unlikely(err))
12682 +                       goto out;
12683 +       }
12684 +
12685 +       inode = au_new_inode(dentry, /*must_new*/1);
12686 +       if (!IS_ERR(inode)) {
12687 +               d_instantiate(dentry, inode);
12688 +               dir = dentry->d_parent->d_inode; /* dir inode is locked */
12689 +               IMustLock(dir);
12690 +               if (au_ibstart(dir) == au_dbstart(dentry))
12691 +                       au_cpup_attr_timesizes(dir);
12692 +               dir->i_version++;
12693 +               return 0; /* success */
12694 +       }
12695 +
12696 +       err = PTR_ERR(inode);
12697 +       if (!wh_dentry)
12698 +               goto out;
12699 +
12700 +       /* revert */
12701 +       /* dir inode is locked */
12702 +       wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
12703 +       rerr = PTR_ERR(wh);
12704 +       if (IS_ERR(wh)) {
12705 +               AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
12706 +                       AuDLNPair(dentry), err, rerr);
12707 +               err = -EIO;
12708 +       } else
12709 +               dput(wh);
12710 +
12711 + out:
12712 +       return err;
12713 +}
12714 +
12715 +/*
12716 + * simple tests for the adding inode operations.
12717 + * following the checks in vfs, plus the parent-child relationship.
12718 + */
12719 +int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
12720 +              struct dentry *h_parent, int isdir)
12721 +{
12722 +       int err;
12723 +       umode_t h_mode;
12724 +       struct dentry *h_dentry;
12725 +       struct inode *h_inode;
12726 +
12727 +       err = -ENAMETOOLONG;
12728 +       if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
12729 +               goto out;
12730 +
12731 +       h_dentry = au_h_dptr(dentry, bindex);
12732 +       h_inode = h_dentry->d_inode;
12733 +       if (!dentry->d_inode) {
12734 +               err = -EEXIST;
12735 +               if (unlikely(h_inode))
12736 +                       goto out;
12737 +       } else {
12738 +               /* rename(2) case */
12739 +               err = -EIO;
12740 +               if (unlikely(!h_inode || !h_inode->i_nlink))
12741 +                       goto out;
12742 +
12743 +               h_mode = h_inode->i_mode;
12744 +               if (!isdir) {
12745 +                       err = -EISDIR;
12746 +                       if (unlikely(S_ISDIR(h_mode)))
12747 +                               goto out;
12748 +               } else if (unlikely(!S_ISDIR(h_mode))) {
12749 +                       err = -ENOTDIR;
12750 +                       goto out;
12751 +               }
12752 +       }
12753 +
12754 +       err = -EIO;
12755 +       /* expected parent dir is locked */
12756 +       if (unlikely(h_parent != h_dentry->d_parent))
12757 +               goto out;
12758 +       err = 0;
12759 +
12760 + out:
12761 +       AuTraceErr(err);
12762 +       return err;
12763 +}
12764 +
12765 +/*
12766 + * initial procedure of adding a new entry.
12767 + * prepare writable branch and the parent dir, lock it,
12768 + * and lookup whiteout for the new entry.
12769 + */
12770 +static struct dentry*
12771 +lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
12772 +                 struct dentry *src_dentry, struct au_pin *pin,
12773 +                 struct au_wr_dir_args *wr_dir_args)
12774 +{
12775 +       struct dentry *wh_dentry, *h_parent;
12776 +       struct super_block *sb;
12777 +       struct au_branch *br;
12778 +       int err;
12779 +       unsigned int udba;
12780 +       aufs_bindex_t bcpup;
12781 +
12782 +       AuDbg("%.*s\n", AuDLNPair(dentry));
12783 +
12784 +       err = au_wr_dir(dentry, src_dentry, wr_dir_args);
12785 +       bcpup = err;
12786 +       wh_dentry = ERR_PTR(err);
12787 +       if (unlikely(err < 0))
12788 +               goto out;
12789 +
12790 +       sb = dentry->d_sb;
12791 +       udba = au_opt_udba(sb);
12792 +       err = au_pin(pin, dentry, bcpup, udba,
12793 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12794 +       wh_dentry = ERR_PTR(err);
12795 +       if (unlikely(err))
12796 +               goto out;
12797 +
12798 +       h_parent = au_pinned_h_parent(pin);
12799 +       if (udba != AuOpt_UDBA_NONE
12800 +           && au_dbstart(dentry) == bcpup)
12801 +               err = au_may_add(dentry, bcpup, h_parent,
12802 +                                au_ftest_wrdir(wr_dir_args->flags, ISDIR));
12803 +       else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
12804 +               err = -ENAMETOOLONG;
12805 +       wh_dentry = ERR_PTR(err);
12806 +       if (unlikely(err))
12807 +               goto out_unpin;
12808 +
12809 +       br = au_sbr(sb, bcpup);
12810 +       if (dt) {
12811 +               struct path tmp = {
12812 +                       .dentry = h_parent,
12813 +                       .mnt    = br->br_mnt
12814 +               };
12815 +               au_dtime_store(dt, au_pinned_parent(pin), &tmp);
12816 +       }
12817 +
12818 +       wh_dentry = NULL;
12819 +       if (bcpup != au_dbwh(dentry))
12820 +               goto out; /* success */
12821 +
12822 +       wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
12823 +
12824 + out_unpin:
12825 +       if (IS_ERR(wh_dentry))
12826 +               au_unpin(pin);
12827 + out:
12828 +       return wh_dentry;
12829 +}
12830 +
12831 +/* ---------------------------------------------------------------------- */
12832 +
12833 +enum { Mknod, Symlink, Creat };
12834 +struct simple_arg {
12835 +       int type;
12836 +       union {
12837 +               struct {
12838 +                       int mode;
12839 +                       struct nameidata *nd;
12840 +               } c;
12841 +               struct {
12842 +                       const char *symname;
12843 +               } s;
12844 +               struct {
12845 +                       int mode;
12846 +                       dev_t dev;
12847 +               } m;
12848 +       } u;
12849 +};
12850 +
12851 +static int add_simple(struct inode *dir, struct dentry *dentry,
12852 +                     struct simple_arg *arg)
12853 +{
12854 +       int err;
12855 +       aufs_bindex_t bstart;
12856 +       unsigned char created;
12857 +       struct au_dtime dt;
12858 +       struct au_pin pin;
12859 +       struct path h_path;
12860 +       struct dentry *wh_dentry, *parent;
12861 +       struct inode *h_dir;
12862 +       struct au_wr_dir_args wr_dir_args = {
12863 +               .force_btgt     = -1,
12864 +               .flags          = AuWrDir_ADD_ENTRY
12865 +       };
12866 +
12867 +       AuDbg("%.*s\n", AuDLNPair(dentry));
12868 +       IMustLock(dir);
12869 +
12870 +       parent = dentry->d_parent; /* dir inode is locked */
12871 +       aufs_read_lock(dentry, AuLock_DW);
12872 +       di_write_lock_parent(parent);
12873 +       wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
12874 +                                     &wr_dir_args);
12875 +       err = PTR_ERR(wh_dentry);
12876 +       if (IS_ERR(wh_dentry))
12877 +               goto out;
12878 +
12879 +       bstart = au_dbstart(dentry);
12880 +       h_path.dentry = au_h_dptr(dentry, bstart);
12881 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
12882 +       h_dir = au_pinned_h_dir(&pin);
12883 +       switch (arg->type) {
12884 +       case Creat:
12885 +               err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
12886 +               break;
12887 +       case Symlink:
12888 +               err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
12889 +               break;
12890 +       case Mknod:
12891 +               err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
12892 +               break;
12893 +       default:
12894 +               BUG();
12895 +       }
12896 +       created = !err;
12897 +       if (!err)
12898 +               err = epilog(dir, bstart, wh_dentry, dentry);
12899 +
12900 +       /* revert */
12901 +       if (unlikely(created && err && h_path.dentry->d_inode)) {
12902 +               int rerr;
12903 +               rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
12904 +               if (rerr) {
12905 +                       AuIOErr("%.*s revert failure(%d, %d)\n",
12906 +                               AuDLNPair(dentry), err, rerr);
12907 +                       err = -EIO;
12908 +               }
12909 +               au_dtime_revert(&dt);
12910 +               d_drop(dentry);
12911 +       }
12912 +
12913 +       au_unpin(&pin);
12914 +       dput(wh_dentry);
12915 +
12916 + out:
12917 +       if (unlikely(err)) {
12918 +               au_update_dbstart(dentry);
12919 +               d_drop(dentry);
12920 +       }
12921 +       di_write_unlock(parent);
12922 +       aufs_read_unlock(dentry, AuLock_DW);
12923 +       return err;
12924 +}
12925 +
12926 +int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
12927 +{
12928 +       struct simple_arg arg = {
12929 +               .type = Mknod,
12930 +               .u.m = {
12931 +                       .mode   = mode,
12932 +                       .dev    = dev
12933 +               }
12934 +       };
12935 +       return add_simple(dir, dentry, &arg);
12936 +}
12937 +
12938 +int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
12939 +{
12940 +       struct simple_arg arg = {
12941 +               .type = Symlink,
12942 +               .u.s.symname = symname
12943 +       };
12944 +       return add_simple(dir, dentry, &arg);
12945 +}
12946 +
12947 +int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
12948 +               struct nameidata *nd)
12949 +{
12950 +       struct simple_arg arg = {
12951 +               .type = Creat,
12952 +               .u.c = {
12953 +                       .mode   = mode,
12954 +                       .nd     = nd
12955 +               }
12956 +       };
12957 +       return add_simple(dir, dentry, &arg);
12958 +}
12959 +
12960 +/* ---------------------------------------------------------------------- */
12961 +
12962 +struct au_link_args {
12963 +       aufs_bindex_t bdst, bsrc;
12964 +       struct au_pin pin;
12965 +       struct path h_path;
12966 +       struct dentry *src_parent, *parent;
12967 +};
12968 +
12969 +static int au_cpup_before_link(struct dentry *src_dentry,
12970 +                              struct au_link_args *a)
12971 +{
12972 +       int err;
12973 +       struct dentry *h_src_dentry;
12974 +       struct mutex *h_mtx;
12975 +
12976 +       di_read_lock_parent(a->src_parent, AuLock_IR);
12977 +       err = au_test_and_cpup_dirs(src_dentry, a->bdst);
12978 +       if (unlikely(err))
12979 +               goto out;
12980 +
12981 +       h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
12982 +       h_mtx = &h_src_dentry->d_inode->i_mutex;
12983 +       err = au_pin(&a->pin, src_dentry, a->bdst,
12984 +                    au_opt_udba(src_dentry->d_sb),
12985 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12986 +       if (unlikely(err))
12987 +               goto out;
12988 +       mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12989 +       err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
12990 +                                AuCpup_DTIME /* | AuCpup_KEEPLINO */);
12991 +       mutex_unlock(h_mtx);
12992 +       au_unpin(&a->pin);
12993 +
12994 + out:
12995 +       di_read_unlock(a->src_parent, AuLock_IR);
12996 +       return err;
12997 +}
12998 +
12999 +static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
13000 +{
13001 +       int err;
13002 +       unsigned char plink;
13003 +       struct inode *h_inode, *inode;
13004 +       struct dentry *h_src_dentry;
13005 +       struct super_block *sb;
13006 +
13007 +       plink = 0;
13008 +       h_inode = NULL;
13009 +       sb = src_dentry->d_sb;
13010 +       inode = src_dentry->d_inode;
13011 +       if (au_ibstart(inode) <= a->bdst)
13012 +               h_inode = au_h_iptr(inode, a->bdst);
13013 +       if (!h_inode || !h_inode->i_nlink) {
13014 +               /* copyup src_dentry as the name of dentry. */
13015 +               au_set_dbstart(src_dentry, a->bdst);
13016 +               au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
13017 +               h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
13018 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
13019 +               err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1,
13020 +                                        AuCpup_KEEPLINO, a->parent);
13021 +               mutex_unlock(&h_inode->i_mutex);
13022 +               au_set_h_dptr(src_dentry, a->bdst, NULL);
13023 +               au_set_dbstart(src_dentry, a->bsrc);
13024 +       } else {
13025 +               /* the inode of src_dentry already exists on a.bdst branch */
13026 +               h_src_dentry = d_find_alias(h_inode);
13027 +               if (!h_src_dentry && au_plink_test(inode)) {
13028 +                       plink = 1;
13029 +                       h_src_dentry = au_plink_lkup(inode, a->bdst);
13030 +                       err = PTR_ERR(h_src_dentry);
13031 +                       if (IS_ERR(h_src_dentry))
13032 +                               goto out;
13033 +
13034 +                       if (unlikely(!h_src_dentry->d_inode)) {
13035 +                               dput(h_src_dentry);
13036 +                               h_src_dentry = NULL;
13037 +                       }
13038 +
13039 +               }
13040 +               if (h_src_dentry) {
13041 +                       err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
13042 +                                        &a->h_path);
13043 +                       dput(h_src_dentry);
13044 +               } else {
13045 +                       AuIOErr("no dentry found for hi%lu on b%d\n",
13046 +                               h_inode->i_ino, a->bdst);
13047 +                       err = -EIO;
13048 +               }
13049 +       }
13050 +
13051 +       if (!err && !plink)
13052 +               au_plink_append(inode, a->bdst, a->h_path.dentry);
13053 +
13054 +out:
13055 +       return err;
13056 +}
13057 +
13058 +int aufs_link(struct dentry *src_dentry, struct inode *dir,
13059 +             struct dentry *dentry)
13060 +{
13061 +       int err, rerr;
13062 +       struct au_dtime dt;
13063 +       struct au_link_args *a;
13064 +       struct dentry *wh_dentry, *h_src_dentry;
13065 +       struct inode *inode;
13066 +       struct super_block *sb;
13067 +       struct au_wr_dir_args wr_dir_args = {
13068 +               /* .force_btgt  = -1, */
13069 +               .flags          = AuWrDir_ADD_ENTRY
13070 +       };
13071 +
13072 +       IMustLock(dir);
13073 +       inode = src_dentry->d_inode;
13074 +       IMustLock(inode);
13075 +
13076 +       err = -ENOENT;
13077 +       if (unlikely(!inode->i_nlink))
13078 +               goto out;
13079 +
13080 +       err = -ENOMEM;
13081 +       a = kzalloc(sizeof(*a), GFP_NOFS);
13082 +       if (unlikely(!a))
13083 +               goto out;
13084 +
13085 +       a->parent = dentry->d_parent; /* dir inode is locked */
13086 +       aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0);
13087 +       a->src_parent = dget_parent(src_dentry);
13088 +       wr_dir_args.force_btgt = au_dbstart(src_dentry);
13089 +
13090 +       di_write_lock_parent(a->parent);
13091 +       wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
13092 +       wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
13093 +                                     &wr_dir_args);
13094 +       err = PTR_ERR(wh_dentry);
13095 +       if (IS_ERR(wh_dentry))
13096 +               goto out_unlock;
13097 +
13098 +       err = 0;
13099 +       sb = dentry->d_sb;
13100 +       a->bdst = au_dbstart(dentry);
13101 +       a->h_path.dentry = au_h_dptr(dentry, a->bdst);
13102 +       a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
13103 +       a->bsrc = au_dbstart(src_dentry);
13104 +       if (au_opt_test(au_mntflags(sb), PLINK)) {
13105 +               if (a->bdst < a->bsrc
13106 +                   /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
13107 +                       err = au_cpup_or_link(src_dentry, a);
13108 +               else {
13109 +                       h_src_dentry = au_h_dptr(src_dentry, a->bdst);
13110 +                       err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
13111 +                                        &a->h_path);
13112 +               }
13113 +       } else {
13114 +               /*
13115 +                * copyup src_dentry to the branch we process,
13116 +                * and then link(2) to it.
13117 +                */
13118 +               if (a->bdst < a->bsrc
13119 +                   /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
13120 +                       au_unpin(&a->pin);
13121 +                       di_write_unlock(a->parent);
13122 +                       err = au_cpup_before_link(src_dentry, a);
13123 +                       di_write_lock_parent(a->parent);
13124 +                       if (!err)
13125 +                               err = au_pin(&a->pin, dentry, a->bdst,
13126 +                                            au_opt_udba(sb),
13127 +                                            AuPin_DI_LOCKED | AuPin_MNT_WRITE);
13128 +                       if (unlikely(err))
13129 +                               goto out_wh;
13130 +               }
13131 +               if (!err) {
13132 +                       h_src_dentry = au_h_dptr(src_dentry, a->bdst);
13133 +                       err = -ENOENT;
13134 +                       if (h_src_dentry && h_src_dentry->d_inode)
13135 +                               err = vfsub_link(h_src_dentry,
13136 +                                                au_pinned_h_dir(&a->pin),
13137 +                                                &a->h_path);
13138 +               }
13139 +       }
13140 +       if (unlikely(err))
13141 +               goto out_unpin;
13142 +
13143 +       if (wh_dentry) {
13144 +               a->h_path.dentry = wh_dentry;
13145 +               err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
13146 +                                         dentry);
13147 +               if (unlikely(err))
13148 +                       goto out_revert;
13149 +       }
13150 +
13151 +       dir->i_version++;
13152 +       if (au_ibstart(dir) == au_dbstart(dentry))
13153 +               au_cpup_attr_timesizes(dir);
13154 +       inc_nlink(inode);
13155 +       inode->i_ctime = dir->i_ctime;
13156 +       if (!d_unhashed(a->h_path.dentry))
13157 +               d_instantiate(dentry, au_igrab(inode));
13158 +       else
13159 +               /* some filesystem calls d_drop() */
13160 +               d_drop(dentry);
13161 +       goto out_unpin; /* success */
13162 +
13163 + out_revert:
13164 +       rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
13165 +       if (!rerr)
13166 +               goto out_dt;
13167 +       AuIOErr("%.*s reverting failed(%d, %d)\n",
13168 +               AuDLNPair(dentry), err, rerr);
13169 +       err = -EIO;
13170 + out_dt:
13171 +       d_drop(dentry);
13172 +       au_dtime_revert(&dt);
13173 + out_unpin:
13174 +       au_unpin(&a->pin);
13175 + out_wh:
13176 +       dput(wh_dentry);
13177 + out_unlock:
13178 +       if (unlikely(err)) {
13179 +               au_update_dbstart(dentry);
13180 +               d_drop(dentry);
13181 +       }
13182 +       di_write_unlock(a->parent);
13183 +       dput(a->src_parent);
13184 +       aufs_read_and_write_unlock2(dentry, src_dentry);
13185 +       kfree(a);
13186 + out:
13187 +       return err;
13188 +}
13189 +
13190 +int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
13191 +{
13192 +       int err, rerr;
13193 +       aufs_bindex_t bindex;
13194 +       unsigned char diropq;
13195 +       struct path h_path;
13196 +       struct dentry *wh_dentry, *parent, *opq_dentry;
13197 +       struct mutex *h_mtx;
13198 +       struct super_block *sb;
13199 +       struct {
13200 +               struct au_pin pin;
13201 +               struct au_dtime dt;
13202 +       } *a; /* reduce the stack usage */
13203 +       struct au_wr_dir_args wr_dir_args = {
13204 +               .force_btgt     = -1,
13205 +               .flags          = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
13206 +       };
13207 +
13208 +       IMustLock(dir);
13209 +
13210 +       err = -ENOMEM;
13211 +       a = kmalloc(sizeof(*a), GFP_NOFS);
13212 +       if (unlikely(!a))
13213 +               goto out;
13214 +
13215 +       aufs_read_lock(dentry, AuLock_DW);
13216 +       parent = dentry->d_parent; /* dir inode is locked */
13217 +       di_write_lock_parent(parent);
13218 +       wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
13219 +                                     &a->pin, &wr_dir_args);
13220 +       err = PTR_ERR(wh_dentry);
13221 +       if (IS_ERR(wh_dentry))
13222 +               goto out_free;
13223 +
13224 +       sb = dentry->d_sb;
13225 +       bindex = au_dbstart(dentry);
13226 +       h_path.dentry = au_h_dptr(dentry, bindex);
13227 +       h_path.mnt = au_sbr_mnt(sb, bindex);
13228 +       err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
13229 +       if (unlikely(err))
13230 +               goto out_unlock;
13231 +
13232 +       /* make the dir opaque */
13233 +       diropq = 0;
13234 +       h_mtx = &h_path.dentry->d_inode->i_mutex;
13235 +       if (wh_dentry
13236 +           || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
13237 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13238 +               opq_dentry = au_diropq_create(dentry, bindex);
13239 +               mutex_unlock(h_mtx);
13240 +               err = PTR_ERR(opq_dentry);
13241 +               if (IS_ERR(opq_dentry))
13242 +                       goto out_dir;
13243 +               dput(opq_dentry);
13244 +               diropq = 1;
13245 +       }
13246 +
13247 +       err = epilog(dir, bindex, wh_dentry, dentry);
13248 +       if (!err) {
13249 +               inc_nlink(dir);
13250 +               goto out_unlock; /* success */
13251 +       }
13252 +
13253 +       /* revert */
13254 +       if (diropq) {
13255 +               AuLabel(revert opq);
13256 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13257 +               rerr = au_diropq_remove(dentry, bindex);
13258 +               mutex_unlock(h_mtx);
13259 +               if (rerr) {
13260 +                       AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
13261 +                               AuDLNPair(dentry), err, rerr);
13262 +                       err = -EIO;
13263 +               }
13264 +       }
13265 +
13266 + out_dir:
13267 +       AuLabel(revert dir);
13268 +       rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
13269 +       if (rerr) {
13270 +               AuIOErr("%.*s reverting dir failed(%d, %d)\n",
13271 +                       AuDLNPair(dentry), err, rerr);
13272 +               err = -EIO;
13273 +       }
13274 +       d_drop(dentry);
13275 +       au_dtime_revert(&a->dt);
13276 + out_unlock:
13277 +       au_unpin(&a->pin);
13278 +       dput(wh_dentry);
13279 + out_free:
13280 +       if (unlikely(err)) {
13281 +               au_update_dbstart(dentry);
13282 +               d_drop(dentry);
13283 +       }
13284 +       di_write_unlock(parent);
13285 +       aufs_read_unlock(dentry, AuLock_DW);
13286 +       kfree(a);
13287 + out:
13288 +       return err;
13289 +}
13290 diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
13291 new file mode 100644
13292 index 0000000..d47ddfb
13293 --- /dev/null
13294 +++ b/fs/aufs/i_op_del.c
13295 @@ -0,0 +1,470 @@
13296 +/*
13297 + * Copyright (C) 2005-2009 Junjiro R. Okajima
13298 + *
13299 + * This program, aufs is free software; you can redistribute it and/or modify
13300 + * it under the terms of the GNU General Public License as published by
13301 + * the Free Software Foundation; either version 2 of the License, or
13302 + * (at your option) any later version.
13303 + *
13304 + * This program is distributed in the hope that it will be useful,
13305 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
13306 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13307 + * GNU General Public License for more details.
13308 + *
13309 + * You should have received a copy of the GNU General Public License
13310 + * along with this program; if not, write to the Free Software
13311 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
13312 + */
13313 +
13314 +/*
13315 + * inode operations (del entry)
13316 + */
13317 +
13318 +#include "aufs.h"
13319 +
13320 +/*
13321 + * decide if a new whiteout for @dentry is necessary or not.
13322 + * when it is necessary, prepare the parent dir for the upper branch whose
13323 + * branch index is @bcpup for creation. the actual creation of the whiteout will
13324 + * be done by caller.
13325 + * return value:
13326 + * 0: wh is unnecessary
13327 + * plus: wh is necessary
13328 + * minus: error
13329 + */
13330 +int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
13331 +{
13332 +       int need_wh, err;
13333 +       aufs_bindex_t bstart;
13334 +       struct super_block *sb;
13335 +
13336 +       sb = dentry->d_sb;
13337 +       bstart = au_dbstart(dentry);
13338 +       if (*bcpup < 0) {
13339 +               *bcpup = bstart;
13340 +               if (au_test_ro(sb, bstart, dentry->d_inode)) {
13341 +                       err = AuWbrCopyup(au_sbi(sb), dentry);
13342 +                       *bcpup = err;
13343 +                       if (unlikely(err < 0))
13344 +                               goto out;
13345 +               }
13346 +       } else
13347 +               AuDebugOn(bstart < *bcpup
13348 +                         || au_test_ro(sb, *bcpup, dentry->d_inode));
13349 +       AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
13350 +
13351 +       if (*bcpup != bstart) {
13352 +               err = au_cpup_dirs(dentry, *bcpup);
13353 +               if (unlikely(err))
13354 +                       goto out;
13355 +               need_wh = 1;
13356 +       } else {
13357 +               aufs_bindex_t old_bend, new_bend, bdiropq = -1;
13358 +
13359 +               old_bend = au_dbend(dentry);
13360 +               if (isdir) {
13361 +                       bdiropq = au_dbdiropq(dentry);
13362 +                       au_set_dbdiropq(dentry, -1);
13363 +               }
13364 +               need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
13365 +                                        /*nd*/NULL);
13366 +               err = need_wh;
13367 +               if (isdir)
13368 +                       au_set_dbdiropq(dentry, bdiropq);
13369 +               if (unlikely(err < 0))
13370 +                       goto out;
13371 +               new_bend = au_dbend(dentry);
13372 +               if (!need_wh && old_bend != new_bend) {
13373 +                       au_set_h_dptr(dentry, new_bend, NULL);
13374 +                       au_set_dbend(dentry, old_bend);
13375 +               }
13376 +       }
13377 +       AuDbg("need_wh %d\n", need_wh);
13378 +       err = need_wh;
13379 +
13380 + out:
13381 +       return err;
13382 +}
13383 +
13384 +/*
13385 + * simple tests for the del-entry operations.
13386 + * following the checks in vfs, plus the parent-child relationship.
13387 + */
13388 +int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
13389 +              struct dentry *h_parent, int isdir)
13390 +{
13391 +       int err;
13392 +       umode_t h_mode;
13393 +       struct dentry *h_dentry, *h_latest;
13394 +       struct inode *h_inode;
13395 +
13396 +       h_dentry = au_h_dptr(dentry, bindex);
13397 +       h_inode = h_dentry->d_inode;
13398 +       if (dentry->d_inode) {
13399 +               err = -ENOENT;
13400 +               if (unlikely(!h_inode || !h_inode->i_nlink))
13401 +                       goto out;
13402 +
13403 +               h_mode = h_inode->i_mode;
13404 +               if (!isdir) {
13405 +                       err = -EISDIR;
13406 +                       if (unlikely(S_ISDIR(h_mode)))
13407 +                               goto out;
13408 +               } else if (unlikely(!S_ISDIR(h_mode))) {
13409 +                       err = -ENOTDIR;
13410 +                       goto out;
13411 +               }
13412 +       } else {
13413 +               /* rename(2) case */
13414 +               err = -EIO;
13415 +               if (unlikely(h_inode))
13416 +                       goto out;
13417 +       }
13418 +
13419 +       err = -ENOENT;
13420 +       /* expected parent dir is locked */
13421 +       if (unlikely(h_parent != h_dentry->d_parent))
13422 +               goto out;
13423 +       err = 0;
13424 +
13425 +       /*
13426 +        * rmdir a dir may break the consistency on some filesystem.
13427 +        * let's try heavy test.
13428 +        */
13429 +       err = -EACCES;
13430 +       if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
13431 +               goto out;
13432 +
13433 +       h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
13434 +                                  au_sbr(dentry->d_sb, bindex));
13435 +       err = -EIO;
13436 +       if (IS_ERR(h_latest))
13437 +               goto out;
13438 +       if (h_latest == h_dentry)
13439 +               err = 0;
13440 +       dput(h_latest);
13441 +
13442 + out:
13443 +       return err;
13444 +}
13445 +
13446 +/*
13447 + * decide the branch where we operate for @dentry. the branch index will be set
13448 + * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
13449 + * dir for reverting.
13450 + * when a new whiteout is necessary, create it.
13451 + */
13452 +static struct dentry*
13453 +lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
13454 +                   struct au_dtime *dt, struct au_pin *pin)
13455 +{
13456 +       struct dentry *wh_dentry;
13457 +       struct super_block *sb;
13458 +       struct path h_path;
13459 +       int err, need_wh;
13460 +       unsigned int udba;
13461 +       aufs_bindex_t bcpup;
13462 +
13463 +       need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
13464 +       wh_dentry = ERR_PTR(need_wh);
13465 +       if (unlikely(need_wh < 0))
13466 +               goto out;
13467 +
13468 +       sb = dentry->d_sb;
13469 +       udba = au_opt_udba(sb);
13470 +       bcpup = *rbcpup;
13471 +       err = au_pin(pin, dentry, bcpup, udba,
13472 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
13473 +       wh_dentry = ERR_PTR(err);
13474 +       if (unlikely(err))
13475 +               goto out;
13476 +
13477 +       h_path.dentry = au_pinned_h_parent(pin);
13478 +       if (udba != AuOpt_UDBA_NONE
13479 +           && au_dbstart(dentry) == bcpup) {
13480 +               err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
13481 +               wh_dentry = ERR_PTR(err);
13482 +               if (unlikely(err))
13483 +                       goto out_unpin;
13484 +       }
13485 +
13486 +       h_path.mnt = au_sbr_mnt(sb, bcpup);
13487 +       au_dtime_store(dt, au_pinned_parent(pin), &h_path);
13488 +       wh_dentry = NULL;
13489 +       if (!need_wh)
13490 +               goto out; /* success, no need to create whiteout */
13491 +
13492 +       wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
13493 +       if (!IS_ERR(wh_dentry))
13494 +               goto out; /* success */
13495 +       /* returns with the parent is locked and wh_dentry is dget-ed */
13496 +
13497 + out_unpin:
13498 +       au_unpin(pin);
13499 + out:
13500 +       return wh_dentry;
13501 +}
13502 +
13503 +/*
13504 + * when removing a dir, rename it to a unique temporary whiteout-ed name first
13505 + * in order to be revertible and save time for removing many child whiteouts
13506 + * under the dir.
13507 + * returns 1 when there are too many child whiteout and caller should remove
13508 + * them asynchronously. returns 0 when the number of children is enough small to
13509 + * remove now or the branch fs is a remote fs.
13510 + * otherwise return an error.
13511 + */
13512 +static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
13513 +                          struct au_nhash *whlist, struct inode *dir)
13514 +{
13515 +       int rmdir_later, err, dirwh;
13516 +       struct dentry *h_dentry;
13517 +       struct super_block *sb;
13518 +
13519 +       sb = dentry->d_sb;
13520 +       SiMustAnyLock(sb);
13521 +       h_dentry = au_h_dptr(dentry, bindex);
13522 +       err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
13523 +       if (unlikely(err))
13524 +               goto out;
13525 +
13526 +       /* stop monitoring */
13527 +       au_hin_free(au_hi(dentry->d_inode, bindex));
13528 +
13529 +       if (!au_test_fs_remote(h_dentry->d_sb)) {
13530 +               dirwh = au_sbi(sb)->si_dirwh;
13531 +               rmdir_later = (dirwh <= 1);
13532 +               if (!rmdir_later)
13533 +                       rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
13534 +                                                             dirwh);
13535 +               if (rmdir_later)
13536 +                       return rmdir_later;
13537 +       }
13538 +
13539 +       err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
13540 +       if (unlikely(err)) {
13541 +               AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
13542 +                       AuDLNPair(h_dentry), bindex, err);
13543 +               err = 0;
13544 +       }
13545 +
13546 + out:
13547 +       AuTraceErr(err);
13548 +       return err;
13549 +}
13550 +
13551 +/*
13552 + * final procedure for deleting a entry.
13553 + * maintain dentry and iattr.
13554 + */
13555 +static void epilog(struct inode *dir, struct dentry *dentry,
13556 +                  aufs_bindex_t bindex)
13557 +{
13558 +       struct inode *inode;
13559 +
13560 +       inode = dentry->d_inode;
13561 +       d_drop(dentry);
13562 +       inode->i_ctime = dir->i_ctime;
13563 +
13564 +       if (atomic_read(&dentry->d_count) == 1) {
13565 +               au_set_h_dptr(dentry, au_dbstart(dentry), NULL);
13566 +               au_update_dbstart(dentry);
13567 +       }
13568 +       if (au_ibstart(dir) == bindex)
13569 +               au_cpup_attr_timesizes(dir);
13570 +       dir->i_version++;
13571 +}
13572 +
13573 +/*
13574 + * when an error happened, remove the created whiteout and revert everything.
13575 + */
13576 +static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh,
13577 +                    struct dentry *wh_dentry, struct dentry *dentry,
13578 +                    struct au_dtime *dt)
13579 +{
13580 +       int rerr;
13581 +       struct path h_path = {
13582 +               .dentry = wh_dentry,
13583 +               .mnt    = au_sbr_mnt(dir->i_sb, bwh)
13584 +       };
13585 +
13586 +       rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry);
13587 +       if (!rerr) {
13588 +               au_set_dbwh(dentry, bwh);
13589 +               au_dtime_revert(dt);
13590 +               return 0;
13591 +       }
13592 +
13593 +       AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
13594 +               AuDLNPair(dentry), err, rerr);
13595 +       return -EIO;
13596 +}
13597 +
13598 +/* ---------------------------------------------------------------------- */
13599 +
13600 +int aufs_unlink(struct inode *dir, struct dentry *dentry)
13601 +{
13602 +       int err;
13603 +       aufs_bindex_t bwh, bindex, bstart;
13604 +       struct au_dtime dt;
13605 +       struct au_pin pin;
13606 +       struct path h_path;
13607 +       struct inode *inode, *h_dir;
13608 +       struct dentry *parent, *wh_dentry;
13609 +
13610 +       IMustLock(dir);
13611 +       inode = dentry->d_inode;
13612 +       if (unlikely(!inode))
13613 +               return -ENOENT; /* possible? */
13614 +       IMustLock(inode);
13615 +
13616 +       aufs_read_lock(dentry, AuLock_DW);
13617 +       parent = dentry->d_parent; /* dir inode is locked */
13618 +       di_write_lock_parent(parent);
13619 +
13620 +       bstart = au_dbstart(dentry);
13621 +       bwh = au_dbwh(dentry);
13622 +       bindex = -1;
13623 +       wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
13624 +       err = PTR_ERR(wh_dentry);
13625 +       if (IS_ERR(wh_dentry))
13626 +               goto out;
13627 +
13628 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
13629 +       h_path.dentry = au_h_dptr(dentry, bstart);
13630 +       dget(h_path.dentry);
13631 +       if (bindex == bstart) {
13632 +               h_dir = au_pinned_h_dir(&pin);
13633 +               err = vfsub_unlink(h_dir, &h_path, /*force*/0);
13634 +       } else {
13635 +               /* dir inode is locked */
13636 +               h_dir = wh_dentry->d_parent->d_inode;
13637 +               IMustLock(h_dir);
13638 +               err = 0;
13639 +       }
13640 +
13641 +       if (!err) {
13642 +               drop_nlink(inode);
13643 +               epilog(dir, dentry, bindex);
13644 +
13645 +               /* update target timestamps */
13646 +               if (bindex == bstart) {
13647 +                       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
13648 +                       inode->i_ctime = h_path.dentry->d_inode->i_ctime;
13649 +               } else
13650 +                       /* todo: this timestamp may be reverted later */
13651 +                       inode->i_ctime = h_dir->i_ctime;
13652 +               goto out_unlock; /* success */
13653 +       }
13654 +
13655 +       /* revert */
13656 +       if (wh_dentry) {
13657 +               int rerr;
13658 +
13659 +               rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13660 +               if (rerr)
13661 +                       err = rerr;
13662 +       }
13663 +
13664 + out_unlock:
13665 +       au_unpin(&pin);
13666 +       dput(wh_dentry);
13667 +       dput(h_path.dentry);
13668 + out:
13669 +       di_write_unlock(parent);
13670 +       aufs_read_unlock(dentry, AuLock_DW);
13671 +       return err;
13672 +}
13673 +
13674 +int aufs_rmdir(struct inode *dir, struct dentry *dentry)
13675 +{
13676 +       int err, rmdir_later;
13677 +       aufs_bindex_t bwh, bindex, bstart;
13678 +       struct au_dtime dt;
13679 +       struct au_pin pin;
13680 +       struct inode *inode;
13681 +       struct dentry *parent, *wh_dentry, *h_dentry;
13682 +       struct au_whtmp_rmdir *args;
13683 +
13684 +       IMustLock(dir);
13685 +       inode = dentry->d_inode;
13686 +       err = -ENOENT; /* possible? */
13687 +       if (unlikely(!inode))
13688 +               goto out;
13689 +       IMustLock(inode);
13690 +
13691 +       aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH);
13692 +       err = -ENOMEM;
13693 +       args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
13694 +       if (unlikely(!args))
13695 +               goto out_unlock;
13696 +
13697 +       parent = dentry->d_parent; /* dir inode is locked */
13698 +       di_write_lock_parent(parent);
13699 +       err = au_test_empty(dentry, &args->whlist);
13700 +       if (unlikely(err))
13701 +               goto out_args;
13702 +
13703 +       bstart = au_dbstart(dentry);
13704 +       bwh = au_dbwh(dentry);
13705 +       bindex = -1;
13706 +       wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
13707 +       err = PTR_ERR(wh_dentry);
13708 +       if (IS_ERR(wh_dentry))
13709 +               goto out_args;
13710 +
13711 +       h_dentry = au_h_dptr(dentry, bstart);
13712 +       dget(h_dentry);
13713 +       rmdir_later = 0;
13714 +       if (bindex == bstart) {
13715 +               err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
13716 +               if (err > 0) {
13717 +                       rmdir_later = err;
13718 +                       err = 0;
13719 +               }
13720 +       } else {
13721 +               /* stop monitoring */
13722 +               au_hin_free(au_hi(inode, bstart));
13723 +
13724 +               /* dir inode is locked */
13725 +               IMustLock(wh_dentry->d_parent->d_inode);
13726 +               err = 0;
13727 +       }
13728 +
13729 +       if (!err) {
13730 +               clear_nlink(inode);
13731 +               au_set_dbdiropq(dentry, -1);
13732 +               epilog(dir, dentry, bindex);
13733 +
13734 +               if (rmdir_later) {
13735 +                       au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
13736 +                       args = NULL;
13737 +               }
13738 +
13739 +               goto out_unpin; /* success */
13740 +       }
13741 +
13742 +       /* revert */
13743 +       AuLabel(revert);
13744 +       if (wh_dentry) {
13745 +               int rerr;
13746 +
13747 +               rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13748 +               if (rerr)
13749 +                       err = rerr;
13750 +       }
13751 +
13752 + out_unpin:
13753 +       au_unpin(&pin);
13754 +       dput(wh_dentry);
13755 +       dput(h_dentry);
13756 + out_args:
13757 +       di_write_unlock(parent);
13758 +       if (args)
13759 +               au_whtmp_rmdir_free(args);
13760 + out_unlock:
13761 +       aufs_read_unlock(dentry, AuLock_DW);
13762 + out:
13763 +       AuTraceErr(err);
13764 +       return err;
13765 +}
13766 diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
13767 new file mode 100644
13768 index 0000000..e7b2402
13769 --- /dev/null
13770 +++ b/fs/aufs/i_op_ren.c
13771 @@ -0,0 +1,965 @@
13772 +/*
13773 + * Copyright (C) 2005-2009 Junjiro R. Okajima
13774 + *
13775 + * This program, aufs is free software; you can redistribute it and/or modify
13776 + * it under the terms of the GNU General Public License as published by
13777 + * the Free Software Foundation; either version 2 of the License, or
13778 + * (at your option) any later version.
13779 + *
13780 + * This program is distributed in the hope that it will be useful,
13781 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
13782 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13783 + * GNU General Public License for more details.
13784 + *
13785 + * You should have received a copy of the GNU General Public License
13786 + * along with this program; if not, write to the Free Software
13787 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
13788 + */
13789 +
13790 +/*
13791 + * inode operation (rename entry)
13792 + * todo: this is crazy monster
13793 + */
13794 +
13795 +#include "aufs.h"
13796 +
13797 +enum { AuSRC, AuDST, AuSrcDst };
13798 +enum { AuPARENT, AuCHILD, AuParentChild };
13799 +
13800 +#define AuRen_ISDIR    1
13801 +#define AuRen_ISSAMEDIR        (1 << 1)
13802 +#define AuRen_WHSRC    (1 << 2)
13803 +#define AuRen_WHDST    (1 << 3)
13804 +#define AuRen_MNT_WRITE        (1 << 4)
13805 +#define AuRen_DT_DSTDIR        (1 << 5)
13806 +#define AuRen_DIROPQ   (1 << 6)
13807 +#define AuRen_CPUP     (1 << 7)
13808 +#define au_ftest_ren(flags, name)      ((flags) & AuRen_##name)
13809 +#define au_fset_ren(flags, name)       { (flags) |= AuRen_##name; }
13810 +#define au_fclr_ren(flags, name)       { (flags) &= ~AuRen_##name; }
13811 +
13812 +struct au_ren_args {
13813 +       struct {
13814 +               struct dentry *dentry, *h_dentry, *parent, *h_parent,
13815 +                       *wh_dentry;
13816 +               struct inode *dir, *inode;
13817 +               struct au_hinode *hdir;
13818 +               struct au_dtime dt[AuParentChild];
13819 +               aufs_bindex_t bstart;
13820 +       } sd[AuSrcDst];
13821 +
13822 +#define src_dentry     sd[AuSRC].dentry
13823 +#define src_dir                sd[AuSRC].dir
13824 +#define src_inode      sd[AuSRC].inode
13825 +#define src_h_dentry   sd[AuSRC].h_dentry
13826 +#define src_parent     sd[AuSRC].parent
13827 +#define src_h_parent   sd[AuSRC].h_parent
13828 +#define src_wh_dentry  sd[AuSRC].wh_dentry
13829 +#define src_hdir       sd[AuSRC].hdir
13830 +#define src_h_dir      sd[AuSRC].hdir->hi_inode
13831 +#define src_dt         sd[AuSRC].dt
13832 +#define src_bstart     sd[AuSRC].bstart
13833 +
13834 +#define dst_dentry     sd[AuDST].dentry
13835 +#define dst_dir                sd[AuDST].dir
13836 +#define dst_inode      sd[AuDST].inode
13837 +#define dst_h_dentry   sd[AuDST].h_dentry
13838 +#define dst_parent     sd[AuDST].parent
13839 +#define dst_h_parent   sd[AuDST].h_parent
13840 +#define dst_wh_dentry  sd[AuDST].wh_dentry
13841 +#define dst_hdir       sd[AuDST].hdir
13842 +#define dst_h_dir      sd[AuDST].hdir->hi_inode
13843 +#define dst_dt         sd[AuDST].dt
13844 +#define dst_bstart     sd[AuDST].bstart
13845 +
13846 +       struct dentry *h_trap;
13847 +       struct au_branch *br;
13848 +       struct au_hinode *src_hinode;
13849 +       struct path h_path;
13850 +       struct au_nhash whlist;
13851 +       aufs_bindex_t btgt;
13852 +
13853 +       unsigned int flags;
13854 +
13855 +       struct au_whtmp_rmdir *thargs;
13856 +       struct dentry *h_dst;
13857 +};
13858 +
13859 +/* ---------------------------------------------------------------------- */
13860 +
13861 +/*
13862 + * functions for reverting.
13863 + * when an error happened in a single rename systemcall, we should revert
13864 + * everything as if nothing happend.
13865 + * we don't need to revert the copied-up/down the parent dir since they are
13866 + * harmless.
13867 + */
13868 +
13869 +#define RevertFailure(fmt, ...) do { \
13870 +       AuIOErr("revert failure: " fmt " (%d, %d)\n", \
13871 +               ##__VA_ARGS__, err, rerr); \
13872 +       err = -EIO; \
13873 +} while (0)
13874 +
13875 +static void au_ren_rev_diropq(int err, struct au_ren_args *a)
13876 +{
13877 +       int rerr;
13878 +
13879 +       au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
13880 +       rerr = au_diropq_remove(a->src_dentry, a->btgt);
13881 +       au_hin_imtx_unlock(a->src_hinode);
13882 +       if (rerr)
13883 +               RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
13884 +}
13885 +
13886 +
13887 +static void au_ren_rev_rename(int err, struct au_ren_args *a)
13888 +{
13889 +       int rerr;
13890 +
13891 +       a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
13892 +                                      a->br, /*nd*/NULL);
13893 +       rerr = PTR_ERR(a->h_path.dentry);
13894 +       if (IS_ERR(a->h_path.dentry)) {
13895 +               RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
13896 +               return;
13897 +       }
13898 +
13899 +       rerr = vfsub_rename(a->dst_h_dir,
13900 +                           au_h_dptr(a->src_dentry, a->btgt),
13901 +                           a->src_h_dir, &a->h_path);
13902 +       d_drop(a->h_path.dentry);
13903 +       dput(a->h_path.dentry);
13904 +       /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
13905 +       if (rerr)
13906 +               RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
13907 +}
13908 +
13909 +static void au_ren_rev_cpup(int err, struct au_ren_args *a)
13910 +{
13911 +       int rerr;
13912 +
13913 +       a->h_path.dentry = a->dst_h_dentry;
13914 +       rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
13915 +       au_set_h_dptr(a->src_dentry, a->btgt, NULL);
13916 +       au_set_dbstart(a->src_dentry, a->src_bstart);
13917 +       if (rerr)
13918 +               RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
13919 +}
13920 +
13921 +
13922 +static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
13923 +{
13924 +       int rerr;
13925 +
13926 +       a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
13927 +                                      a->br, /*nd*/NULL);
13928 +       rerr = PTR_ERR(a->h_path.dentry);
13929 +       if (IS_ERR(a->h_path.dentry)) {
13930 +               RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
13931 +               return;
13932 +       }
13933 +       if (a->h_path.dentry->d_inode) {
13934 +               d_drop(a->h_path.dentry);
13935 +               dput(a->h_path.dentry);
13936 +               return;
13937 +       }
13938 +
13939 +       rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
13940 +       d_drop(a->h_path.dentry);
13941 +       dput(a->h_path.dentry);
13942 +       if (!rerr) {
13943 +               au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
13944 +               au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
13945 +       } else
13946 +               RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
13947 +}
13948 +
13949 +static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
13950 +{
13951 +       int rerr;
13952 +
13953 +       a->h_path.dentry = a->src_wh_dentry;
13954 +       rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
13955 +       if (rerr)
13956 +               RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
13957 +}
13958 +
13959 +static void au_ren_rev_drop(struct au_ren_args *a)
13960 +{
13961 +       struct dentry *d, *h_d;
13962 +       int i;
13963 +       aufs_bindex_t bend, bindex;
13964 +
13965 +       for (i = 0; i < AuSrcDst; i++) {
13966 +               d = a->sd[i].dentry;
13967 +               d_drop(d);
13968 +               bend = au_dbend(d);
13969 +               for (bindex = au_dbstart(d); bindex <= bend; bindex++) {
13970 +                       h_d = au_h_dptr(d, bindex);
13971 +                       if (h_d)
13972 +                               d_drop(h_d);
13973 +               }
13974 +       }
13975 +
13976 +       au_update_dbstart(a->dst_dentry);
13977 +       if (a->thargs)
13978 +               d_drop(a->h_dst);
13979 +}
13980 +#undef RevertFailure
13981 +
13982 +/* ---------------------------------------------------------------------- */
13983 +
13984 +/*
13985 + * when we have to copyup the renaming entry, do it with the rename-target name
13986 + * in order to minimize the cost (the later actual rename is unnecessary).
13987 + * otherwise rename it on the target branch.
13988 + */
13989 +static int au_ren_or_cpup(struct au_ren_args *a)
13990 +{
13991 +       int err;
13992 +       struct dentry *d;
13993 +
13994 +       d = a->src_dentry;
13995 +       if (au_dbstart(d) == a->btgt) {
13996 +               a->h_path.dentry = a->dst_h_dentry;
13997 +               if (au_ftest_ren(a->flags, DIROPQ)
13998 +                   && au_dbdiropq(d) == a->btgt)
13999 +                       au_fclr_ren(a->flags, DIROPQ);
14000 +               AuDebugOn(au_dbstart(d) != a->btgt);
14001 +               err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
14002 +                                  a->dst_h_dir, &a->h_path);
14003 +       } else {
14004 +               struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
14005 +
14006 +               au_fset_ren(a->flags, CPUP);
14007 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14008 +               au_set_dbstart(d, a->btgt);
14009 +               au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
14010 +               err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
14011 +                                        !AuCpup_DTIME, a->dst_parent);
14012 +               mutex_unlock(h_mtx);
14013 +               if (!err) {
14014 +                       d = a->dst_dentry;
14015 +                       au_set_h_dptr(d, a->btgt, NULL);
14016 +                       au_update_dbstart(d);
14017 +               } else {
14018 +                       au_set_h_dptr(d, a->btgt, NULL);
14019 +                       au_set_dbstart(d, a->src_bstart);
14020 +               }
14021 +       }
14022 +
14023 +       return err;
14024 +}
14025 +
14026 +/* cf. aufs_rmdir() */
14027 +static int au_ren_del_whtmp(struct au_ren_args *a)
14028 +{
14029 +       int err;
14030 +       struct inode *dir;
14031 +
14032 +       dir = a->dst_dir;
14033 +       SiMustAnyLock(dir->i_sb);
14034 +       if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
14035 +                                    au_sbi(dir->i_sb)->si_dirwh)
14036 +           || au_test_fs_remote(a->h_dst->d_sb)) {
14037 +               err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
14038 +               if (unlikely(err))
14039 +                       AuWarn("failed removing whtmp dir %.*s (%d), "
14040 +                              "ignored.\n", AuDLNPair(a->h_dst), err);
14041 +       } else {
14042 +               au_nhash_wh_free(&a->thargs->whlist);
14043 +               a->thargs->whlist = a->whlist;
14044 +               a->whlist.nh_num = 0;
14045 +               au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
14046 +               dput(a->h_dst);
14047 +               a->thargs = NULL;
14048 +       }
14049 +
14050 +       return 0;
14051 +}
14052 +
14053 +/* make it 'opaque' dir. */
14054 +static int au_ren_diropq(struct au_ren_args *a)
14055 +{
14056 +       int err;
14057 +       struct dentry *diropq;
14058 +
14059 +       err = 0;
14060 +       a->src_hinode = au_hi(a->src_inode, a->btgt);
14061 +       au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
14062 +       diropq = au_diropq_create(a->src_dentry, a->btgt);
14063 +       au_hin_imtx_unlock(a->src_hinode);
14064 +       if (IS_ERR(diropq))
14065 +               err = PTR_ERR(diropq);
14066 +       dput(diropq);
14067 +
14068 +       return err;
14069 +}
14070 +
14071 +static int do_rename(struct au_ren_args *a)
14072 +{
14073 +       int err;
14074 +       struct dentry *d, *h_d;
14075 +
14076 +       /* prepare workqueue args for asynchronous rmdir */
14077 +       h_d = a->dst_h_dentry;
14078 +       if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
14079 +               err = -ENOMEM;
14080 +               a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
14081 +               if (unlikely(!a->thargs))
14082 +                       goto out;
14083 +               a->h_dst = dget(h_d);
14084 +       }
14085 +
14086 +       /* create whiteout for src_dentry */
14087 +       if (au_ftest_ren(a->flags, WHSRC)) {
14088 +               a->src_wh_dentry
14089 +                       = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
14090 +               err = PTR_ERR(a->src_wh_dentry);
14091 +               if (IS_ERR(a->src_wh_dentry))
14092 +                       goto out_thargs;
14093 +       }
14094 +
14095 +       /* lookup whiteout for dentry */
14096 +       if (au_ftest_ren(a->flags, WHDST)) {
14097 +               h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
14098 +                                a->br);
14099 +               err = PTR_ERR(h_d);
14100 +               if (IS_ERR(h_d))
14101 +                       goto out_whsrc;
14102 +               if (!h_d->d_inode)
14103 +                       dput(h_d);
14104 +               else
14105 +                       a->dst_wh_dentry = h_d;
14106 +       }
14107 +
14108 +       /* rename dentry to tmpwh */
14109 +       if (a->thargs) {
14110 +               err = au_whtmp_ren(a->dst_h_dentry, a->br);
14111 +               if (unlikely(err))
14112 +                       goto out_whdst;
14113 +
14114 +               d = a->dst_dentry;
14115 +               au_set_h_dptr(d, a->btgt, NULL);
14116 +               err = au_lkup_neg(d, a->btgt);
14117 +               if (unlikely(err))
14118 +                       goto out_whtmp;
14119 +               a->dst_h_dentry = au_h_dptr(d, a->btgt);
14120 +       }
14121 +
14122 +       /* cpup src */
14123 +       if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
14124 +               struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
14125 +
14126 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14127 +               err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
14128 +                                        !AuCpup_DTIME);
14129 +               mutex_unlock(h_mtx);
14130 +               if (unlikely(err))
14131 +                       goto out_whtmp;
14132 +       }
14133 +
14134 +       /* rename by vfs_rename or cpup */
14135 +       d = a->dst_dentry;
14136 +       if (au_ftest_ren(a->flags, ISDIR)
14137 +           && (a->dst_wh_dentry
14138 +               || au_dbdiropq(d) == a->btgt
14139 +               /* hide the lower to keep xino */
14140 +               || a->btgt < au_dbend(d)
14141 +               || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
14142 +               au_fset_ren(a->flags, DIROPQ);
14143 +       err = au_ren_or_cpup(a);
14144 +       if (unlikely(err))
14145 +               /* leave the copied-up one */
14146 +               goto out_whtmp;
14147 +
14148 +       /* make dir opaque */
14149 +       if (au_ftest_ren(a->flags, DIROPQ)) {
14150 +               err = au_ren_diropq(a);
14151 +               if (unlikely(err))
14152 +                       goto out_rename;
14153 +       }
14154 +
14155 +       /* update target timestamps */
14156 +       AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
14157 +       a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
14158 +       vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
14159 +       a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
14160 +
14161 +       /* remove whiteout for dentry */
14162 +       if (a->dst_wh_dentry) {
14163 +               a->h_path.dentry = a->dst_wh_dentry;
14164 +               err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
14165 +                                         a->dst_dentry);
14166 +               if (unlikely(err))
14167 +                       goto out_diropq;
14168 +       }
14169 +
14170 +       /* remove whtmp */
14171 +       if (a->thargs)
14172 +               au_ren_del_whtmp(a); /* ignore this error */
14173 +
14174 +       err = 0;
14175 +       goto out_success;
14176 +
14177 + out_diropq:
14178 +       if (au_ftest_ren(a->flags, DIROPQ))
14179 +               au_ren_rev_diropq(err, a);
14180 + out_rename:
14181 +       if (!au_ftest_ren(a->flags, CPUP))
14182 +               au_ren_rev_rename(err, a);
14183 +       else
14184 +               au_ren_rev_cpup(err, a);
14185 + out_whtmp:
14186 +       if (a->thargs)
14187 +               au_ren_rev_whtmp(err, a);
14188 + out_whdst:
14189 +       dput(a->dst_wh_dentry);
14190 +       a->dst_wh_dentry = NULL;
14191 + out_whsrc:
14192 +       if (a->src_wh_dentry)
14193 +               au_ren_rev_whsrc(err, a);
14194 +       au_ren_rev_drop(a);
14195 + out_success:
14196 +       dput(a->src_wh_dentry);
14197 +       dput(a->dst_wh_dentry);
14198 + out_thargs:
14199 +       if (a->thargs) {
14200 +               dput(a->h_dst);
14201 +               au_whtmp_rmdir_free(a->thargs);
14202 +               a->thargs = NULL;
14203 +       }
14204 + out:
14205 +       return err;
14206 +}
14207 +
14208 +/* ---------------------------------------------------------------------- */
14209 +
14210 +/*
14211 + * test if @dentry dir can be rename destination or not.
14212 + * success means, it is a logically empty dir.
14213 + */
14214 +static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
14215 +{
14216 +       return au_test_empty(dentry, whlist);
14217 +}
14218 +
14219 +/*
14220 + * test if @dentry dir can be rename source or not.
14221 + * if it can, return 0 and @children is filled.
14222 + * success means,
14223 + * - it is a logically empty dir.
14224 + * - or, it exists on writable branch and has no children including whiteouts
14225 + *       on the lower branch.
14226 + */
14227 +static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
14228 +{
14229 +       int err;
14230 +       unsigned int rdhash;
14231 +       aufs_bindex_t bstart;
14232 +
14233 +       bstart = au_dbstart(dentry);
14234 +       if (bstart != btgt) {
14235 +               struct au_nhash whlist;
14236 +
14237 +               SiMustAnyLock(dentry->d_sb);
14238 +               rdhash = au_sbi(dentry->d_sb)->si_rdhash;
14239 +               if (!rdhash)
14240 +                       rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
14241 +                                                          dentry));
14242 +               err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
14243 +               if (unlikely(err))
14244 +                       goto out;
14245 +               err = au_test_empty(dentry, &whlist);
14246 +               au_nhash_wh_free(&whlist);
14247 +               goto out;
14248 +       }
14249 +
14250 +       if (bstart == au_dbtaildir(dentry))
14251 +               return 0; /* success */
14252 +
14253 +       err = au_test_empty_lower(dentry);
14254 +
14255 + out:
14256 +       if (err == -ENOTEMPTY) {
14257 +               AuWarn1("renaming dir who has child(ren) on multiple branches,"
14258 +                       " is not supported\n");
14259 +               err = -EXDEV;
14260 +       }
14261 +       return err;
14262 +}
14263 +
14264 +/* side effect: sets whlist and h_dentry */
14265 +static int au_ren_may_dir(struct au_ren_args *a)
14266 +{
14267 +       int err;
14268 +       unsigned int rdhash;
14269 +       struct dentry *d;
14270 +
14271 +       d = a->dst_dentry;
14272 +       SiMustAnyLock(d->d_sb);
14273 +
14274 +       err = 0;
14275 +       if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
14276 +               rdhash = au_sbi(d->d_sb)->si_rdhash;
14277 +               if (!rdhash)
14278 +                       rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
14279 +               err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
14280 +               if (unlikely(err))
14281 +                       goto out;
14282 +
14283 +               au_set_dbstart(d, a->dst_bstart);
14284 +               err = may_rename_dstdir(d, &a->whlist);
14285 +               au_set_dbstart(d, a->btgt);
14286 +       }
14287 +       a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
14288 +       if (unlikely(err))
14289 +               goto out;
14290 +
14291 +       d = a->src_dentry;
14292 +       a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
14293 +       if (au_ftest_ren(a->flags, ISDIR)) {
14294 +               err = may_rename_srcdir(d, a->btgt);
14295 +               if (unlikely(err)) {
14296 +                       au_nhash_wh_free(&a->whlist);
14297 +                       a->whlist.nh_num = 0;
14298 +               }
14299 +       }
14300 + out:
14301 +       return err;
14302 +}
14303 +
14304 +/* ---------------------------------------------------------------------- */
14305 +
14306 +/*
14307 + * simple tests for rename.
14308 + * following the checks in vfs, plus the parent-child relationship.
14309 + */
14310 +static int au_may_ren(struct au_ren_args *a)
14311 +{
14312 +       int err, isdir;
14313 +       struct inode *h_inode;
14314 +
14315 +       if (a->src_bstart == a->btgt) {
14316 +               err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
14317 +                                au_ftest_ren(a->flags, ISDIR));
14318 +               if (unlikely(err))
14319 +                       goto out;
14320 +               err = -EINVAL;
14321 +               if (unlikely(a->src_h_dentry == a->h_trap))
14322 +                       goto out;
14323 +       }
14324 +
14325 +       err = 0;
14326 +       if (a->dst_bstart != a->btgt)
14327 +               goto out;
14328 +
14329 +       err = -EIO;
14330 +       h_inode = a->dst_h_dentry->d_inode;
14331 +       isdir = !!au_ftest_ren(a->flags, ISDIR);
14332 +       if (!a->dst_dentry->d_inode) {
14333 +               if (unlikely(h_inode))
14334 +                       goto out;
14335 +               err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
14336 +                                isdir);
14337 +       } else {
14338 +               if (unlikely(!h_inode || !h_inode->i_nlink))
14339 +                       goto out;
14340 +               err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
14341 +                                isdir);
14342 +               if (unlikely(err))
14343 +                       goto out;
14344 +               err = -ENOTEMPTY;
14345 +               if (unlikely(a->dst_h_dentry == a->h_trap))
14346 +                       goto out;
14347 +               err = 0;
14348 +       }
14349 +
14350 + out:
14351 +       if (unlikely(err == -ENOENT || err == -EEXIST))
14352 +               err = -EIO;
14353 +       AuTraceErr(err);
14354 +       return err;
14355 +}
14356 +
14357 +/* ---------------------------------------------------------------------- */
14358 +
14359 +/*
14360 + * locking order
14361 + * (VFS)
14362 + * - src_dir and dir by lock_rename()
14363 + * - inode if exitsts
14364 + * (aufs)
14365 + * - lock all
14366 + *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
14367 + *     + si_read_lock
14368 + *     + di_write_lock2_child()
14369 + *       + di_write_lock_child()
14370 + *        + ii_write_lock_child()
14371 + *       + di_write_lock_child2()
14372 + *        + ii_write_lock_child2()
14373 + *     + src_parent and parent
14374 + *       + di_write_lock_parent()
14375 + *        + ii_write_lock_parent()
14376 + *       + di_write_lock_parent2()
14377 + *        + ii_write_lock_parent2()
14378 + *   + lower src_dir and dir by vfsub_lock_rename()
14379 + *   + verify the every relationships between child and parent. if any
14380 + *     of them failed, unlock all and return -EBUSY.
14381 + */
14382 +static void au_ren_unlock(struct au_ren_args *a)
14383 +{
14384 +       struct super_block *sb;
14385 +
14386 +       sb = a->dst_dentry->d_sb;
14387 +       if (au_ftest_ren(a->flags, MNT_WRITE))
14388 +               mnt_drop_write(a->br->br_mnt);
14389 +       vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
14390 +                           a->dst_h_parent, a->dst_hdir);
14391 +}
14392 +
14393 +static int au_ren_lock(struct au_ren_args *a)
14394 +{
14395 +       int err;
14396 +       unsigned int udba;
14397 +
14398 +       err = 0;
14399 +       a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
14400 +       a->src_hdir = au_hi(a->src_dir, a->btgt);
14401 +       a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
14402 +       a->dst_hdir = au_hi(a->dst_dir, a->btgt);
14403 +       a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
14404 +                                     a->dst_h_parent, a->dst_hdir);
14405 +       udba = au_opt_udba(a->src_dentry->d_sb);
14406 +       if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
14407 +                    || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
14408 +               err = au_busy_or_stale();
14409 +       if (!err && au_dbstart(a->src_dentry) == a->btgt)
14410 +               err = au_h_verify(a->src_h_dentry, udba,
14411 +                                 a->src_h_parent->d_inode, a->src_h_parent,
14412 +                                 a->br);
14413 +       if (!err && au_dbstart(a->dst_dentry) == a->btgt)
14414 +               err = au_h_verify(a->dst_h_dentry, udba,
14415 +                                 a->dst_h_parent->d_inode, a->dst_h_parent,
14416 +                                 a->br);
14417 +       if (!err) {
14418 +               err = mnt_want_write(a->br->br_mnt);
14419 +               if (unlikely(err))
14420 +                       goto out_unlock;
14421 +               au_fset_ren(a->flags, MNT_WRITE);
14422 +               goto out; /* success */
14423 +       }
14424 +
14425 +       err = au_busy_or_stale();
14426 +
14427 + out_unlock:
14428 +       au_ren_unlock(a);
14429 + out:
14430 +       return err;
14431 +}
14432 +
14433 +/* ---------------------------------------------------------------------- */
14434 +
14435 +static void au_ren_refresh_dir(struct au_ren_args *a)
14436 +{
14437 +       struct inode *dir;
14438 +
14439 +       dir = a->dst_dir;
14440 +       dir->i_version++;
14441 +       if (au_ftest_ren(a->flags, ISDIR)) {
14442 +               /* is this updating defined in POSIX? */
14443 +               au_cpup_attr_timesizes(a->src_inode);
14444 +               au_cpup_attr_nlink(dir, /*force*/1);
14445 +               if (a->dst_inode) {
14446 +                       clear_nlink(a->dst_inode);
14447 +                       au_cpup_attr_timesizes(a->dst_inode);
14448 +               }
14449 +       }
14450 +       if (au_ibstart(dir) == a->btgt)
14451 +               au_cpup_attr_timesizes(dir);
14452 +
14453 +       if (au_ftest_ren(a->flags, ISSAMEDIR))
14454 +               return;
14455 +
14456 +       dir = a->src_dir;
14457 +       dir->i_version++;
14458 +       if (au_ftest_ren(a->flags, ISDIR))
14459 +               au_cpup_attr_nlink(dir, /*force*/1);
14460 +       if (au_ibstart(dir) == a->btgt)
14461 +               au_cpup_attr_timesizes(dir);
14462 +}
14463 +
14464 +static void au_ren_refresh(struct au_ren_args *a)
14465 +{
14466 +       aufs_bindex_t bend, bindex;
14467 +       struct dentry *d, *h_d;
14468 +       struct inode *i, *h_i;
14469 +       struct super_block *sb;
14470 +
14471 +       d = a->src_dentry;
14472 +       au_set_dbwh(d, -1);
14473 +       bend = au_dbend(d);
14474 +       for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
14475 +               h_d = au_h_dptr(d, bindex);
14476 +               if (h_d)
14477 +                       au_set_h_dptr(d, bindex, NULL);
14478 +       }
14479 +       au_set_dbend(d, a->btgt);
14480 +
14481 +       sb = d->d_sb;
14482 +       i = a->src_inode;
14483 +       if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
14484 +               return; /* success */
14485 +
14486 +       bend = au_ibend(i);
14487 +       for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
14488 +               h_i = au_h_iptr(i, bindex);
14489 +               if (h_i) {
14490 +                       au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
14491 +                       /* ignore this error */
14492 +                       au_set_h_iptr(i, bindex, NULL, 0);
14493 +               }
14494 +       }
14495 +       au_set_ibend(i, a->btgt);
14496 +}
14497 +
14498 +/* ---------------------------------------------------------------------- */
14499 +
14500 +/* mainly for link(2) and rename(2) */
14501 +int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
14502 +{
14503 +       aufs_bindex_t bdiropq, bwh;
14504 +       struct dentry *parent;
14505 +       struct au_branch *br;
14506 +
14507 +       parent = dentry->d_parent;
14508 +       IMustLock(parent->d_inode); /* dir is locked */
14509 +
14510 +       bdiropq = au_dbdiropq(parent);
14511 +       bwh = au_dbwh(dentry);
14512 +       br = au_sbr(dentry->d_sb, btgt);
14513 +       if (au_br_rdonly(br)
14514 +           || (0 <= bdiropq && bdiropq < btgt)
14515 +           || (0 <= bwh && bwh < btgt))
14516 +               btgt = -1;
14517 +
14518 +       AuDbg("btgt %d\n", btgt);
14519 +       return btgt;
14520 +}
14521 +
14522 +/* sets src_bstart, dst_bstart and btgt */
14523 +static int au_ren_wbr(struct au_ren_args *a)
14524 +{
14525 +       int err;
14526 +       struct au_wr_dir_args wr_dir_args = {
14527 +               /* .force_btgt  = -1, */
14528 +               .flags          = AuWrDir_ADD_ENTRY
14529 +       };
14530 +
14531 +       a->src_bstart = au_dbstart(a->src_dentry);
14532 +       a->dst_bstart = au_dbstart(a->dst_dentry);
14533 +       if (au_ftest_ren(a->flags, ISDIR))
14534 +               au_fset_wrdir(wr_dir_args.flags, ISDIR);
14535 +       wr_dir_args.force_btgt = a->src_bstart;
14536 +       if (a->dst_inode && a->dst_bstart < a->src_bstart)
14537 +               wr_dir_args.force_btgt = a->dst_bstart;
14538 +       wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
14539 +       err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
14540 +       a->btgt = err;
14541 +
14542 +       return err;
14543 +}
14544 +
14545 +static void au_ren_dt(struct au_ren_args *a)
14546 +{
14547 +       a->h_path.dentry = a->src_h_parent;
14548 +       au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
14549 +       if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
14550 +               a->h_path.dentry = a->dst_h_parent;
14551 +               au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
14552 +       }
14553 +
14554 +       au_fclr_ren(a->flags, DT_DSTDIR);
14555 +       if (!au_ftest_ren(a->flags, ISDIR))
14556 +               return;
14557 +
14558 +       a->h_path.dentry = a->src_h_dentry;
14559 +       au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
14560 +       if (a->dst_h_dentry->d_inode) {
14561 +               au_fset_ren(a->flags, DT_DSTDIR);
14562 +               a->h_path.dentry = a->dst_h_dentry;
14563 +               au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
14564 +       }
14565 +}
14566 +
14567 +static void au_ren_rev_dt(int err, struct au_ren_args *a)
14568 +{
14569 +       struct dentry *h_d;
14570 +       struct mutex *h_mtx;
14571 +
14572 +       au_dtime_revert(a->src_dt + AuPARENT);
14573 +       if (!au_ftest_ren(a->flags, ISSAMEDIR))
14574 +               au_dtime_revert(a->dst_dt + AuPARENT);
14575 +
14576 +       if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
14577 +               h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
14578 +               h_mtx = &h_d->d_inode->i_mutex;
14579 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14580 +               au_dtime_revert(a->src_dt + AuCHILD);
14581 +               mutex_unlock(h_mtx);
14582 +
14583 +               if (au_ftest_ren(a->flags, DT_DSTDIR)) {
14584 +                       h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
14585 +                       h_mtx = &h_d->d_inode->i_mutex;
14586 +                       mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14587 +                       au_dtime_revert(a->dst_dt + AuCHILD);
14588 +                       mutex_unlock(h_mtx);
14589 +               }
14590 +       }
14591 +}
14592 +
14593 +/* ---------------------------------------------------------------------- */
14594 +
14595 +int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
14596 +               struct inode *_dst_dir, struct dentry *_dst_dentry)
14597 +{
14598 +       int err;
14599 +       /* reduce stack space */
14600 +       struct au_ren_args *a;
14601 +
14602 +       AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
14603 +       IMustLock(_src_dir);
14604 +       IMustLock(_dst_dir);
14605 +
14606 +       err = -ENOMEM;
14607 +       BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
14608 +       a = kzalloc(sizeof(*a), GFP_NOFS);
14609 +       if (unlikely(!a))
14610 +               goto out;
14611 +
14612 +       a->src_dir = _src_dir;
14613 +       a->src_dentry = _src_dentry;
14614 +       a->src_inode = a->src_dentry->d_inode;
14615 +       a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
14616 +       a->dst_dir = _dst_dir;
14617 +       a->dst_dentry = _dst_dentry;
14618 +       a->dst_inode = a->dst_dentry->d_inode;
14619 +       a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
14620 +       if (a->dst_inode) {
14621 +               IMustLock(a->dst_inode);
14622 +               au_igrab(a->dst_inode);
14623 +       }
14624 +
14625 +       err = -ENOTDIR;
14626 +       if (S_ISDIR(a->src_inode->i_mode)) {
14627 +               au_fset_ren(a->flags, ISDIR);
14628 +               if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
14629 +                       goto out_free;
14630 +               aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14631 +                                         AuLock_DIR | AuLock_FLUSH);
14632 +       } else
14633 +               aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14634 +                                         AuLock_FLUSH);
14635 +
14636 +       au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
14637 +       di_write_lock_parent(a->dst_parent);
14638 +
14639 +       /* which branch we process */
14640 +       err = au_ren_wbr(a);
14641 +       if (unlikely(err < 0))
14642 +               goto out_unlock;
14643 +       a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
14644 +       a->h_path.mnt = a->br->br_mnt;
14645 +
14646 +       /* are they available to be renamed */
14647 +       err = au_ren_may_dir(a);
14648 +       if (unlikely(err))
14649 +               goto out_children;
14650 +
14651 +       /* prepare the writable parent dir on the same branch */
14652 +       if (a->dst_bstart == a->btgt) {
14653 +               au_fset_ren(a->flags, WHDST);
14654 +       } else {
14655 +               err = au_cpup_dirs(a->dst_dentry, a->btgt);
14656 +               if (unlikely(err))
14657 +                       goto out_children;
14658 +       }
14659 +
14660 +       if (a->src_dir != a->dst_dir) {
14661 +               /*
14662 +                * this temporary unlock is safe,
14663 +                * because both dir->i_mutex are locked.
14664 +                */
14665 +               di_write_unlock(a->dst_parent);
14666 +               di_write_lock_parent(a->src_parent);
14667 +               err = au_wr_dir_need_wh(a->src_dentry,
14668 +                                       au_ftest_ren(a->flags, ISDIR),
14669 +                                       &a->btgt);
14670 +               di_write_unlock(a->src_parent);
14671 +               di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
14672 +               au_fclr_ren(a->flags, ISSAMEDIR);
14673 +       } else
14674 +               err = au_wr_dir_need_wh(a->src_dentry,
14675 +                                       au_ftest_ren(a->flags, ISDIR),
14676 +                                       &a->btgt);
14677 +       if (unlikely(err < 0))
14678 +               goto out_children;
14679 +       if (err)
14680 +               au_fset_ren(a->flags, WHSRC);
14681 +
14682 +       /* lock them all */
14683 +       err = au_ren_lock(a);
14684 +       if (unlikely(err))
14685 +               goto out_children;
14686 +
14687 +       if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
14688 +               err = au_may_ren(a);
14689 +       else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
14690 +               err = -ENAMETOOLONG;
14691 +       if (unlikely(err))
14692 +               goto out_hdir;
14693 +
14694 +       /* store timestamps to be revertible */
14695 +       au_ren_dt(a);
14696 +
14697 +       /* here we go */
14698 +       err = do_rename(a);
14699 +       if (unlikely(err))
14700 +               goto out_dt;
14701 +
14702 +       /* update dir attributes */
14703 +       au_ren_refresh_dir(a);
14704 +
14705 +       /* dput/iput all lower dentries */
14706 +       au_ren_refresh(a);
14707 +
14708 +       goto out_hdir; /* success */
14709 +
14710 + out_dt:
14711 +       au_ren_rev_dt(err, a);
14712 + out_hdir:
14713 +       au_ren_unlock(a);
14714 + out_children:
14715 +       au_nhash_wh_free(&a->whlist);
14716 + out_unlock:
14717 +       if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) {
14718 +               au_update_dbstart(a->dst_dentry);
14719 +               d_drop(a->dst_dentry);
14720 +       }
14721 +       if (!err)
14722 +               d_move(a->src_dentry, a->dst_dentry);
14723 +       if (au_ftest_ren(a->flags, ISSAMEDIR))
14724 +               di_write_unlock(a->dst_parent);
14725 +       else
14726 +               di_write_unlock2(a->src_parent, a->dst_parent);
14727 +       aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
14728 + out_free:
14729 +       iput(a->dst_inode);
14730 +       if (a->thargs)
14731 +               au_whtmp_rmdir_free(a->thargs);
14732 +       kfree(a);
14733 + out:
14734 +       AuTraceErr(err);
14735 +       return err;
14736 +}
14737 diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
14738 new file mode 100644
14739 index 0000000..072ddfc
14740 --- /dev/null
14741 +++ b/fs/aufs/iinfo.c
14742 @@ -0,0 +1,283 @@
14743 +/*
14744 + * Copyright (C) 2005-2009 Junjiro R. Okajima
14745 + *
14746 + * This program, aufs is free software; you can redistribute it and/or modify
14747 + * it under the terms of the GNU General Public License as published by
14748 + * the Free Software Foundation; either version 2 of the License, or
14749 + * (at your option) any later version.
14750 + *
14751 + * This program is distributed in the hope that it will be useful,
14752 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14753 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14754 + * GNU General Public License for more details.
14755 + *
14756 + * You should have received a copy of the GNU General Public License
14757 + * along with this program; if not, write to the Free Software
14758 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
14759 + */
14760 +
14761 +/*
14762 + * inode private data
14763 + */
14764 +
14765 +#include "aufs.h"
14766 +
14767 +struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
14768 +{
14769 +       struct inode *h_inode;
14770 +
14771 +       IiMustAnyLock(inode);
14772 +
14773 +       h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
14774 +       AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14775 +       return h_inode;
14776 +}
14777 +
14778 +/* todo: hard/soft set? */
14779 +void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
14780 +{
14781 +       struct au_iinfo *iinfo = au_ii(inode);
14782 +       struct inode *h_inode;
14783 +
14784 +       IiMustWriteLock(inode);
14785 +
14786 +       iinfo->ii_bstart = bindex;
14787 +       h_inode = iinfo->ii_hinode[bindex + 0].hi_inode;
14788 +       if (h_inode)
14789 +               au_cpup_igen(inode, h_inode);
14790 +}
14791 +
14792 +void au_hiput(struct au_hinode *hinode)
14793 +{
14794 +       au_hin_free(hinode);
14795 +       dput(hinode->hi_whdentry);
14796 +       iput(hinode->hi_inode);
14797 +}
14798 +
14799 +unsigned int au_hi_flags(struct inode *inode, int isdir)
14800 +{
14801 +       unsigned int flags;
14802 +       const unsigned int mnt_flags = au_mntflags(inode->i_sb);
14803 +
14804 +       flags = 0;
14805 +       if (au_opt_test(mnt_flags, XINO))
14806 +               au_fset_hi(flags, XINO);
14807 +       if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY))
14808 +               au_fset_hi(flags, HINOTIFY);
14809 +       return flags;
14810 +}
14811 +
14812 +void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14813 +                  struct inode *h_inode, unsigned int flags)
14814 +{
14815 +       struct au_hinode *hinode;
14816 +       struct inode *hi;
14817 +       struct au_iinfo *iinfo = au_ii(inode);
14818 +
14819 +       IiMustWriteLock(inode);
14820 +
14821 +       hinode = iinfo->ii_hinode + bindex;
14822 +       hi = hinode->hi_inode;
14823 +       AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14824 +       AuDebugOn(h_inode && hi);
14825 +
14826 +       if (hi)
14827 +               au_hiput(hinode);
14828 +       hinode->hi_inode = h_inode;
14829 +       if (h_inode) {
14830 +               int err;
14831 +               struct super_block *sb = inode->i_sb;
14832 +               struct au_branch *br;
14833 +
14834 +               if (bindex == iinfo->ii_bstart)
14835 +                       au_cpup_igen(inode, h_inode);
14836 +               br = au_sbr(sb, bindex);
14837 +               hinode->hi_id = br->br_id;
14838 +               if (au_ftest_hi(flags, XINO)) {
14839 +                       err = au_xino_write(sb, bindex, h_inode->i_ino,
14840 +                                           inode->i_ino);
14841 +                       if (unlikely(err))
14842 +                               AuIOErr1("failed au_xino_write() %d\n", err);
14843 +               }
14844 +
14845 +               if (au_ftest_hi(flags, HINOTIFY)
14846 +                   && au_br_hinotifyable(br->br_perm)) {
14847 +                       err = au_hin_alloc(hinode, inode, h_inode);
14848 +                       if (unlikely(err))
14849 +                               AuIOErr1("au_hin_alloc() %d\n", err);
14850 +               }
14851 +       }
14852 +}
14853 +
14854 +void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14855 +                 struct dentry *h_wh)
14856 +{
14857 +       struct au_hinode *hinode;
14858 +
14859 +       IiMustWriteLock(inode);
14860 +
14861 +       hinode = au_ii(inode)->ii_hinode + bindex;
14862 +       AuDebugOn(hinode->hi_whdentry);
14863 +       hinode->hi_whdentry = h_wh;
14864 +}
14865 +
14866 +void au_update_iigen(struct inode *inode)
14867 +{
14868 +       atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
14869 +       /* smp_mb(); */ /* atomic_set */
14870 +}
14871 +
14872 +/* it may be called at remount time, too */
14873 +void au_update_brange(struct inode *inode, int do_put_zero)
14874 +{
14875 +       struct au_iinfo *iinfo;
14876 +
14877 +       iinfo = au_ii(inode);
14878 +       if (!iinfo || iinfo->ii_bstart < 0)
14879 +               return;
14880 +
14881 +       IiMustWriteLock(inode);
14882 +
14883 +       if (do_put_zero) {
14884 +               aufs_bindex_t bindex;
14885 +
14886 +               for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
14887 +                    bindex++) {
14888 +                       struct inode *h_i;
14889 +
14890 +                       h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
14891 +                       if (h_i && !h_i->i_nlink)
14892 +                               au_set_h_iptr(inode, bindex, NULL, 0);
14893 +               }
14894 +       }
14895 +
14896 +       iinfo->ii_bstart = -1;
14897 +       while (++iinfo->ii_bstart <= iinfo->ii_bend)
14898 +               if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode)
14899 +                       break;
14900 +       if (iinfo->ii_bstart > iinfo->ii_bend) {
14901 +               iinfo->ii_bstart = -1;
14902 +               iinfo->ii_bend = -1;
14903 +               return;
14904 +       }
14905 +
14906 +       iinfo->ii_bend++;
14907 +       while (0 <= --iinfo->ii_bend)
14908 +               if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode)
14909 +                       break;
14910 +       AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0);
14911 +}
14912 +
14913 +/* ---------------------------------------------------------------------- */
14914 +
14915 +int au_iinfo_init(struct inode *inode)
14916 +{
14917 +       struct au_iinfo *iinfo;
14918 +       struct super_block *sb;
14919 +       int nbr, i;
14920 +
14921 +       sb = inode->i_sb;
14922 +       iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14923 +       nbr = au_sbend(sb) + 1;
14924 +       if (unlikely(nbr <= 0))
14925 +               nbr = 1;
14926 +       iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
14927 +       if (iinfo->ii_hinode) {
14928 +               for (i = 0; i < nbr; i++)
14929 +                       iinfo->ii_hinode[i].hi_id = -1;
14930 +
14931 +               atomic_set(&iinfo->ii_generation, au_sigen(sb));
14932 +               /* smp_mb(); */ /* atomic_set */
14933 +               au_rw_init(&iinfo->ii_rwsem);
14934 +               iinfo->ii_bstart = -1;
14935 +               iinfo->ii_bend = -1;
14936 +               iinfo->ii_vdir = NULL;
14937 +               return 0;
14938 +       }
14939 +       return -ENOMEM;
14940 +}
14941 +
14942 +int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
14943 +{
14944 +       int err, sz;
14945 +       struct au_hinode *hip;
14946 +
14947 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
14948 +
14949 +       err = -ENOMEM;
14950 +       sz = sizeof(*hip) * (iinfo->ii_bend + 1);
14951 +       if (!sz)
14952 +               sz = sizeof(*hip);
14953 +       hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
14954 +       if (hip) {
14955 +               iinfo->ii_hinode = hip;
14956 +               err = 0;
14957 +       }
14958 +
14959 +       return err;
14960 +}
14961 +
14962 +static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode,
14963 +                          ino_t ino)
14964 +{
14965 +       int err;
14966 +       aufs_bindex_t bindex;
14967 +       unsigned char locked;
14968 +
14969 +       err = 0;
14970 +       locked = !!si_noflush_read_trylock(sb);
14971 +       bindex = au_br_index(sb, hinode->hi_id);
14972 +       if (bindex >= 0)
14973 +               err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino);
14974 +       /* error action? */
14975 +       if (locked)
14976 +               si_read_unlock(sb);
14977 +       return err;
14978 +}
14979 +
14980 +void au_iinfo_fin(struct inode *inode)
14981 +{
14982 +       ino_t ino;
14983 +       aufs_bindex_t bend;
14984 +       unsigned char unlinked = !inode->i_nlink;
14985 +       struct au_iinfo *iinfo;
14986 +       struct au_hinode *hi;
14987 +       struct super_block *sb;
14988 +
14989 +       if (unlinked) {
14990 +               int err = au_xigen_inc(inode);
14991 +               if (unlikely(err))
14992 +                       AuWarn1("failed resetting i_generation, %d\n", err);
14993 +       }
14994 +
14995 +       iinfo = au_ii(inode);
14996 +       /* bad_inode case */
14997 +       if (!iinfo)
14998 +               return;
14999 +
15000 +       if (iinfo->ii_vdir)
15001 +               au_vdir_free(iinfo->ii_vdir);
15002 +
15003 +       if (iinfo->ii_bstart >= 0) {
15004 +               sb = inode->i_sb;
15005 +               ino = 0;
15006 +               if (unlinked)
15007 +                       ino = inode->i_ino;
15008 +               hi = iinfo->ii_hinode + iinfo->ii_bstart;
15009 +               bend = iinfo->ii_bend;
15010 +               while (iinfo->ii_bstart++ <= bend) {
15011 +                       if (hi->hi_inode) {
15012 +                               if (unlinked || !hi->hi_inode->i_nlink) {
15013 +                                       au_iinfo_write0(sb, hi, ino);
15014 +                                       /* ignore this error */
15015 +                                       ino = 0;
15016 +                               }
15017 +                               au_hiput(hi);
15018 +                       }
15019 +                       hi++;
15020 +               }
15021 +       }
15022 +
15023 +       kfree(iinfo->ii_hinode);
15024 +       AuRwDestroy(&iinfo->ii_rwsem);
15025 +}
15026 diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c
15027 new file mode 100644
15028 index 0000000..4273007
15029 --- /dev/null
15030 +++ b/fs/aufs/inode.c
15031 @@ -0,0 +1,414 @@
15032 +/*
15033 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15034 + *
15035 + * This program, aufs is free software; you can redistribute it and/or modify
15036 + * it under the terms of the GNU General Public License as published by
15037 + * the Free Software Foundation; either version 2 of the License, or
15038 + * (at your option) any later version.
15039 + *
15040 + * This program is distributed in the hope that it will be useful,
15041 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15042 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15043 + * GNU General Public License for more details.
15044 + *
15045 + * You should have received a copy of the GNU General Public License
15046 + * along with this program; if not, write to the Free Software
15047 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15048 + */
15049 +
15050 +/*
15051 + * inode functions
15052 + */
15053 +
15054 +#include "aufs.h"
15055 +
15056 +struct inode *au_igrab(struct inode *inode)
15057 +{
15058 +       if (inode) {
15059 +               AuDebugOn(!atomic_read(&inode->i_count));
15060 +               atomic_inc_return(&inode->i_count);
15061 +       }
15062 +       return inode;
15063 +}
15064 +
15065 +static void au_refresh_hinode_attr(struct inode *inode, int do_version)
15066 +{
15067 +       au_cpup_attr_all(inode, /*force*/0);
15068 +       au_update_iigen(inode);
15069 +       if (do_version)
15070 +               inode->i_version++;
15071 +}
15072 +
15073 +int au_refresh_hinode_self(struct inode *inode, int do_attr)
15074 +{
15075 +       int err;
15076 +       aufs_bindex_t bindex, new_bindex;
15077 +       unsigned char update;
15078 +       struct inode *first;
15079 +       struct au_hinode *p, *q, tmp;
15080 +       struct super_block *sb;
15081 +       struct au_iinfo *iinfo;
15082 +
15083 +       IiMustWriteLock(inode);
15084 +
15085 +       update = 0;
15086 +       sb = inode->i_sb;
15087 +       iinfo = au_ii(inode);
15088 +       err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
15089 +       if (unlikely(err))
15090 +               goto out;
15091 +
15092 +       p = iinfo->ii_hinode + iinfo->ii_bstart;
15093 +       first = p->hi_inode;
15094 +       err = 0;
15095 +       for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15096 +            bindex++, p++) {
15097 +               if (!p->hi_inode)
15098 +                       continue;
15099 +
15100 +               new_bindex = au_br_index(sb, p->hi_id);
15101 +               if (new_bindex == bindex)
15102 +                       continue;
15103 +
15104 +               if (new_bindex < 0) {
15105 +                       update++;
15106 +                       au_hiput(p);
15107 +                       p->hi_inode = NULL;
15108 +                       continue;
15109 +               }
15110 +
15111 +               if (new_bindex < iinfo->ii_bstart)
15112 +                       iinfo->ii_bstart = new_bindex;
15113 +               if (iinfo->ii_bend < new_bindex)
15114 +                       iinfo->ii_bend = new_bindex;
15115 +               /* swap two lower inode, and loop again */
15116 +               q = iinfo->ii_hinode + new_bindex;
15117 +               tmp = *q;
15118 +               *q = *p;
15119 +               *p = tmp;
15120 +               if (tmp.hi_inode) {
15121 +                       bindex--;
15122 +                       p--;
15123 +               }
15124 +       }
15125 +       au_update_brange(inode, /*do_put_zero*/0);
15126 +       if (do_attr)
15127 +               au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
15128 +
15129 + out:
15130 +       return err;
15131 +}
15132 +
15133 +int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
15134 +{
15135 +       int err, update;
15136 +       unsigned int flags;
15137 +       aufs_bindex_t bindex, bend;
15138 +       unsigned char isdir;
15139 +       struct inode *first;
15140 +       struct au_hinode *p;
15141 +       struct au_iinfo *iinfo;
15142 +
15143 +       err = au_refresh_hinode_self(inode, /*do_attr*/0);
15144 +       if (unlikely(err))
15145 +               goto out;
15146 +
15147 +       update = 0;
15148 +       iinfo = au_ii(inode);
15149 +       p = iinfo->ii_hinode + iinfo->ii_bstart;
15150 +       first = p->hi_inode;
15151 +       isdir = S_ISDIR(inode->i_mode);
15152 +       flags = au_hi_flags(inode, isdir);
15153 +       bend = au_dbend(dentry);
15154 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
15155 +               struct inode *h_i;
15156 +               struct dentry *h_d;
15157 +
15158 +               h_d = au_h_dptr(dentry, bindex);
15159 +               if (!h_d || !h_d->d_inode)
15160 +                       continue;
15161 +
15162 +               if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
15163 +                       h_i = au_h_iptr(inode, bindex);
15164 +                       if (h_i) {
15165 +                               if (h_i == h_d->d_inode)
15166 +                                       continue;
15167 +                               err = -EIO;
15168 +                               break;
15169 +                       }
15170 +               }
15171 +               if (bindex < iinfo->ii_bstart)
15172 +                       iinfo->ii_bstart = bindex;
15173 +               if (iinfo->ii_bend < bindex)
15174 +                       iinfo->ii_bend = bindex;
15175 +               au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
15176 +               update = 1;
15177 +       }
15178 +       au_update_brange(inode, /*do_put_zero*/0);
15179 +
15180 +       if (unlikely(err))
15181 +               goto out;
15182 +
15183 +       au_refresh_hinode_attr(inode, update && isdir);
15184 +
15185 + out:
15186 +       AuTraceErr(err);
15187 +       return err;
15188 +}
15189 +
15190 +static int set_inode(struct inode *inode, struct dentry *dentry)
15191 +{
15192 +       int err;
15193 +       unsigned int flags;
15194 +       umode_t mode;
15195 +       aufs_bindex_t bindex, bstart, btail;
15196 +       unsigned char isdir;
15197 +       struct dentry *h_dentry;
15198 +       struct inode *h_inode;
15199 +       struct au_iinfo *iinfo;
15200 +
15201 +       IiMustWriteLock(inode);
15202 +
15203 +       err = 0;
15204 +       isdir = 0;
15205 +       bstart = au_dbstart(dentry);
15206 +       h_inode = au_h_dptr(dentry, bstart)->d_inode;
15207 +       mode = h_inode->i_mode;
15208 +       switch (mode & S_IFMT) {
15209 +       case S_IFREG:
15210 +               btail = au_dbtail(dentry);
15211 +               inode->i_op = &aufs_iop;
15212 +               inode->i_fop = &aufs_file_fop;
15213 +               inode->i_mapping->a_ops = &aufs_aop;
15214 +               break;
15215 +       case S_IFDIR:
15216 +               isdir = 1;
15217 +               btail = au_dbtaildir(dentry);
15218 +               inode->i_op = &aufs_dir_iop;
15219 +               inode->i_fop = &aufs_dir_fop;
15220 +               break;
15221 +       case S_IFLNK:
15222 +               btail = au_dbtail(dentry);
15223 +               inode->i_op = &aufs_symlink_iop;
15224 +               break;
15225 +       case S_IFBLK:
15226 +       case S_IFCHR:
15227 +       case S_IFIFO:
15228 +       case S_IFSOCK:
15229 +               btail = au_dbtail(dentry);
15230 +               inode->i_op = &aufs_iop;
15231 +               au_init_special_fop(inode, mode, h_inode->i_rdev);
15232 +               break;
15233 +       default:
15234 +               AuIOErr("Unknown file type 0%o\n", mode);
15235 +               err = -EIO;
15236 +               goto out;
15237 +       }
15238 +
15239 +       /* do not set inotify for whiteouted dirs (SHWH mode) */
15240 +       flags = au_hi_flags(inode, isdir);
15241 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
15242 +           && au_ftest_hi(flags, HINOTIFY)
15243 +           && dentry->d_name.len > AUFS_WH_PFX_LEN
15244 +           && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
15245 +               au_fclr_hi(flags, HINOTIFY);
15246 +       iinfo = au_ii(inode);
15247 +       iinfo->ii_bstart = bstart;
15248 +       iinfo->ii_bend = btail;
15249 +       for (bindex = bstart; bindex <= btail; bindex++) {
15250 +               h_dentry = au_h_dptr(dentry, bindex);
15251 +               if (h_dentry)
15252 +                       au_set_h_iptr(inode, bindex,
15253 +                                     au_igrab(h_dentry->d_inode), flags);
15254 +       }
15255 +       au_cpup_attr_all(inode, /*force*/1);
15256 +
15257 + out:
15258 +       return err;
15259 +}
15260 +
15261 +/* successful returns with iinfo write_locked */
15262 +static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched)
15263 +{
15264 +       int err;
15265 +       aufs_bindex_t bindex, bend;
15266 +       struct inode *h_inode, *h_dinode;
15267 +
15268 +       *matched = 0;
15269 +
15270 +       /*
15271 +        * before this function, if aufs got any iinfo lock, it must be only
15272 +        * one, the parent dir.
15273 +        * it can happen by UDBA and the obsoleted inode number.
15274 +        */
15275 +       err = -EIO;
15276 +       if (unlikely(inode->i_ino == parent_ino(dentry)))
15277 +               goto out;
15278 +
15279 +       err = 0;
15280 +       ii_write_lock_new_child(inode);
15281 +       h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
15282 +       bend = au_ibend(inode);
15283 +       for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
15284 +               h_inode = au_h_iptr(inode, bindex);
15285 +               if (h_inode && h_inode == h_dinode) {
15286 +                       *matched = 1;
15287 +                       err = 0;
15288 +                       if (au_iigen(inode) != au_digen(dentry))
15289 +                               err = au_refresh_hinode(inode, dentry);
15290 +                       break;
15291 +               }
15292 +       }
15293 +
15294 +       if (unlikely(err))
15295 +               ii_write_unlock(inode);
15296 + out:
15297 +       return err;
15298 +}
15299 +
15300 +int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
15301 +          unsigned int d_type, ino_t *ino)
15302 +{
15303 +       int err;
15304 +       struct mutex *mtx;
15305 +       const int isdir = (d_type == DT_DIR);
15306 +
15307 +       /* prevent hardlinks from race condition */
15308 +       mtx = NULL;
15309 +       if (!isdir) {
15310 +               mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
15311 +               mutex_lock(mtx);
15312 +       }
15313 +       err = au_xino_read(sb, bindex, h_ino, ino);
15314 +       if (unlikely(err))
15315 +               goto out;
15316 +
15317 +       if (!*ino) {
15318 +               err = -EIO;
15319 +               *ino = au_xino_new_ino(sb);
15320 +               if (unlikely(!*ino))
15321 +                       goto out;
15322 +               err = au_xino_write(sb, bindex, h_ino, *ino);
15323 +               if (unlikely(err))
15324 +                       goto out;
15325 +       }
15326 +
15327 + out:
15328 +       if (!isdir)
15329 +               mutex_unlock(mtx);
15330 +       return err;
15331 +}
15332 +
15333 +/* successful returns with iinfo write_locked */
15334 +/* todo: return with unlocked? */
15335 +struct inode *au_new_inode(struct dentry *dentry, int must_new)
15336 +{
15337 +       struct inode *inode;
15338 +       struct dentry *h_dentry;
15339 +       struct super_block *sb;
15340 +       ino_t h_ino, ino;
15341 +       int err, match;
15342 +       aufs_bindex_t bstart;
15343 +
15344 +       sb = dentry->d_sb;
15345 +       bstart = au_dbstart(dentry);
15346 +       h_dentry = au_h_dptr(dentry, bstart);
15347 +       h_ino = h_dentry->d_inode->i_ino;
15348 +       err = au_xino_read(sb, bstart, h_ino, &ino);
15349 +       inode = ERR_PTR(err);
15350 +       if (unlikely(err))
15351 +               goto out;
15352 + new_ino:
15353 +       if (!ino) {
15354 +               ino = au_xino_new_ino(sb);
15355 +               if (unlikely(!ino)) {
15356 +                       inode = ERR_PTR(-EIO);
15357 +                       goto out;
15358 +               }
15359 +       }
15360 +
15361 +       AuDbg("i%lu\n", (unsigned long)ino);
15362 +       inode = au_iget_locked(sb, ino);
15363 +       err = PTR_ERR(inode);
15364 +       if (IS_ERR(inode))
15365 +               goto out;
15366 +
15367 +       AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
15368 +       if (inode->i_state & I_NEW) {
15369 +               ii_write_lock_new_child(inode);
15370 +               err = set_inode(inode, dentry);
15371 +               unlock_new_inode(inode);
15372 +               if (!err)
15373 +                       goto out; /* success */
15374 +
15375 +               iget_failed(inode);
15376 +               ii_write_unlock(inode);
15377 +               goto out_iput;
15378 +       } else if (!must_new) {
15379 +               err = reval_inode(inode, dentry, &match);
15380 +               if (!err)
15381 +                       goto out; /* success */
15382 +               else if (match)
15383 +                       goto out_iput;
15384 +       }
15385 +
15386 +       if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
15387 +               AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
15388 +                       " b%d, %s, %.*s, hi%lu, i%lu.\n",
15389 +                       bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
15390 +                       (unsigned long)h_ino, (unsigned long)ino);
15391 +       ino = 0;
15392 +       err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
15393 +       if (!err) {
15394 +               iput(inode);
15395 +               goto new_ino;
15396 +       }
15397 +
15398 + out_iput:
15399 +       iput(inode);
15400 +       inode = ERR_PTR(err);
15401 + out:
15402 +       return inode;
15403 +}
15404 +
15405 +/* ---------------------------------------------------------------------- */
15406 +
15407 +int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
15408 +              struct inode *inode)
15409 +{
15410 +       int err;
15411 +
15412 +       err = au_br_rdonly(au_sbr(sb, bindex));
15413 +
15414 +       /* pseudo-link after flushed may happen out of bounds */
15415 +       if (!err
15416 +           && inode
15417 +           && au_ibstart(inode) <= bindex
15418 +           && bindex <= au_ibend(inode)) {
15419 +               /*
15420 +                * permission check is unnecessary since vfsub routine
15421 +                * will be called later
15422 +                */
15423 +               struct inode *hi = au_h_iptr(inode, bindex);
15424 +               if (hi)
15425 +                       err = IS_IMMUTABLE(hi) ? -EROFS : 0;
15426 +       }
15427 +
15428 +       return err;
15429 +}
15430 +
15431 +int au_test_h_perm(struct inode *h_inode, int mask)
15432 +{
15433 +       if (!current->fsuid)
15434 +               return 0;
15435 +       return inode_permission(h_inode, mask);
15436 +}
15437 +
15438 +int au_test_h_perm_sio(struct inode *h_inode, int mask)
15439 +{
15440 +       if (au_test_nfs(h_inode->i_sb)
15441 +           && (mask & MAY_WRITE)
15442 +           && S_ISDIR(h_inode->i_mode))
15443 +               mask |= MAY_READ; /* force permission check */
15444 +       return au_test_h_perm(h_inode, mask);
15445 +}
15446 diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h
15447 new file mode 100644
15448 index 0000000..39a8ae5
15449 --- /dev/null
15450 +++ b/fs/aufs/inode.h
15451 @@ -0,0 +1,474 @@
15452 +/*
15453 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15454 + *
15455 + * This program, aufs is free software; you can redistribute it and/or modify
15456 + * it under the terms of the GNU General Public License as published by
15457 + * the Free Software Foundation; either version 2 of the License, or
15458 + * (at your option) any later version.
15459 + *
15460 + * This program is distributed in the hope that it will be useful,
15461 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15462 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15463 + * GNU General Public License for more details.
15464 + *
15465 + * You should have received a copy of the GNU General Public License
15466 + * along with this program; if not, write to the Free Software
15467 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15468 + */
15469 +
15470 +/*
15471 + * inode operations
15472 + */
15473 +
15474 +#ifndef __AUFS_INODE_H__
15475 +#define __AUFS_INODE_H__
15476 +
15477 +#ifdef __KERNEL__
15478 +
15479 +#include <linux/fs.h>
15480 +#include <linux/inotify.h>
15481 +#include <linux/aufs_type.h>
15482 +#include "rwsem.h"
15483 +
15484 +struct vfsmount;
15485 +
15486 +struct au_hinotify {
15487 +#ifdef CONFIG_AUFS_HINOTIFY
15488 +       struct inotify_watch    hin_watch;
15489 +       struct inode            *hin_aufs_inode;        /* no get/put */
15490 +#endif
15491 +};
15492 +
15493 +struct au_hinode {
15494 +       struct inode            *hi_inode;
15495 +       aufs_bindex_t           hi_id;
15496 +#ifdef CONFIG_AUFS_HINOTIFY
15497 +       struct au_hinotify      *hi_notify;
15498 +#endif
15499 +
15500 +       /* reference to the copied-up whiteout with get/put */
15501 +       struct dentry           *hi_whdentry;
15502 +};
15503 +
15504 +struct au_vdir;
15505 +struct au_iinfo {
15506 +       atomic_t                ii_generation;
15507 +       struct super_block      *ii_hsb1;       /* no get/put */
15508 +
15509 +       struct au_rwsem         ii_rwsem;
15510 +       aufs_bindex_t           ii_bstart, ii_bend;
15511 +       __u32                   ii_higen;
15512 +       struct au_hinode        *ii_hinode;
15513 +       struct au_vdir          *ii_vdir;
15514 +};
15515 +
15516 +struct au_icntnr {
15517 +       struct au_iinfo iinfo;
15518 +       struct inode vfs_inode;
15519 +};
15520 +
15521 +/* au_pin flags */
15522 +#define AuPin_DI_LOCKED                1
15523 +#define AuPin_MNT_WRITE                (1 << 1)
15524 +#define au_ftest_pin(flags, name)      ((flags) & AuPin_##name)
15525 +#define au_fset_pin(flags, name)       { (flags) |= AuPin_##name; }
15526 +#define au_fclr_pin(flags, name)       { (flags) &= ~AuPin_##name; }
15527 +
15528 +struct au_pin {
15529 +       /* input */
15530 +       struct dentry *dentry;
15531 +       unsigned int udba;
15532 +       unsigned char lsc_di, lsc_hi, flags;
15533 +       aufs_bindex_t bindex;
15534 +
15535 +       /* output */
15536 +       struct dentry *parent;
15537 +       struct au_hinode *hdir;
15538 +       struct vfsmount *h_mnt;
15539 +};
15540 +
15541 +/* ---------------------------------------------------------------------- */
15542 +
15543 +static inline struct au_iinfo *au_ii(struct inode *inode)
15544 +{
15545 +       struct au_iinfo *iinfo;
15546 +
15547 +       iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15548 +       if (iinfo->ii_hinode)
15549 +               return iinfo;
15550 +       return NULL; /* debugging bad_inode case */
15551 +}
15552 +
15553 +/* ---------------------------------------------------------------------- */
15554 +
15555 +/* inode.c */
15556 +struct inode *au_igrab(struct inode *inode);
15557 +int au_refresh_hinode_self(struct inode *inode, int do_attr);
15558 +int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
15559 +int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
15560 +          unsigned int d_type, ino_t *ino);
15561 +struct inode *au_new_inode(struct dentry *dentry, int must_new);
15562 +int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
15563 +              struct inode *inode);
15564 +int au_test_h_perm(struct inode *h_inode, int mask);
15565 +int au_test_h_perm_sio(struct inode *h_inode, int mask);
15566 +
15567 +static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
15568 +                           ino_t h_ino, unsigned int d_type, ino_t *ino)
15569 +{
15570 +#ifdef CONFIG_AUFS_SHWH
15571 +       return au_ino(sb, bindex, h_ino, d_type, ino);
15572 +#else
15573 +       return 0;
15574 +#endif
15575 +}
15576 +
15577 +/* i_op.c */
15578 +extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
15579 +
15580 +/* au_wr_dir flags */
15581 +#define AuWrDir_ADD_ENTRY      1
15582 +#define AuWrDir_ISDIR          (1 << 1)
15583 +#define au_ftest_wrdir(flags, name)    ((flags) & AuWrDir_##name)
15584 +#define au_fset_wrdir(flags, name)     { (flags) |= AuWrDir_##name; }
15585 +#define au_fclr_wrdir(flags, name)     { (flags) &= ~AuWrDir_##name; }
15586 +
15587 +struct au_wr_dir_args {
15588 +       aufs_bindex_t force_btgt;
15589 +       unsigned char flags;
15590 +};
15591 +int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
15592 +             struct au_wr_dir_args *args);
15593 +
15594 +struct dentry *au_pinned_h_parent(struct au_pin *pin);
15595 +void au_pin_init(struct au_pin *pin, struct dentry *dentry,
15596 +                aufs_bindex_t bindex, int lsc_di, int lsc_hi,
15597 +                unsigned int udba, unsigned char flags);
15598 +int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
15599 +          unsigned int udba, unsigned char flags) __must_check;
15600 +int au_do_pin(struct au_pin *pin) __must_check;
15601 +void au_unpin(struct au_pin *pin);
15602 +
15603 +/* i_op_add.c */
15604 +int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15605 +              struct dentry *h_parent, int isdir);
15606 +int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
15607 +int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
15608 +int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15609 +               struct nameidata *nd);
15610 +int aufs_link(struct dentry *src_dentry, struct inode *dir,
15611 +             struct dentry *dentry);
15612 +int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
15613 +
15614 +/* i_op_del.c */
15615 +int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
15616 +int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
15617 +              struct dentry *h_parent, int isdir);
15618 +int aufs_unlink(struct inode *dir, struct dentry *dentry);
15619 +int aufs_rmdir(struct inode *dir, struct dentry *dentry);
15620 +
15621 +/* i_op_ren.c */
15622 +int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
15623 +int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
15624 +               struct inode *dir, struct dentry *dentry);
15625 +
15626 +/* iinfo.c */
15627 +struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
15628 +void au_hiput(struct au_hinode *hinode);
15629 +void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex);
15630 +void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15631 +                 struct dentry *h_wh);
15632 +unsigned int au_hi_flags(struct inode *inode, int isdir);
15633 +
15634 +/* hinode flags */
15635 +#define AuHi_XINO      1
15636 +#define AuHi_HINOTIFY  (1 << 1)
15637 +#define au_ftest_hi(flags, name)       ((flags) & AuHi_##name)
15638 +#define au_fset_hi(flags, name)                { (flags) |= AuHi_##name; }
15639 +#define au_fclr_hi(flags, name)                { (flags) &= ~AuHi_##name; }
15640 +
15641 +#ifndef CONFIG_AUFS_HINOTIFY
15642 +#undef AuHi_HINOTIFY
15643 +#define AuHi_HINOTIFY  0
15644 +#endif
15645 +
15646 +void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15647 +                  struct inode *h_inode, unsigned int flags);
15648 +
15649 +void au_update_iigen(struct inode *inode);
15650 +void au_update_brange(struct inode *inode, int do_put_zero);
15651 +
15652 +int au_iinfo_init(struct inode *inode);
15653 +void au_iinfo_fin(struct inode *inode);
15654 +int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
15655 +
15656 +/* plink.c */
15657 +void au_plink_maint_block(struct super_block *sb);
15658 +void au_plink_maint_leave(struct file *file);
15659 +#ifdef CONFIG_AUFS_DEBUG
15660 +void au_plink_list(struct super_block *sb);
15661 +#else
15662 +AuStubVoid(au_plink_list, struct super_block *sb)
15663 +#endif
15664 +int au_plink_test(struct inode *inode);
15665 +struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
15666 +void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
15667 +                    struct dentry *h_dentry);
15668 +void au_plink_put(struct super_block *sb);
15669 +void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
15670 +long au_plink_ioctl(struct file *file, unsigned int cmd);
15671 +
15672 +/* ---------------------------------------------------------------------- */
15673 +
15674 +/* lock subclass for iinfo */
15675 +enum {
15676 +       AuLsc_II_CHILD,         /* child first */
15677 +       AuLsc_II_CHILD2,        /* rename(2), link(2), and cpup at hinotify */
15678 +       AuLsc_II_CHILD3,        /* copyup dirs */
15679 +       AuLsc_II_PARENT,        /* see AuLsc_I_PARENT in vfsub.h */
15680 +       AuLsc_II_PARENT2,
15681 +       AuLsc_II_PARENT3,       /* copyup dirs */
15682 +       AuLsc_II_NEW_CHILD
15683 +};
15684 +
15685 +/*
15686 + * ii_read_lock_child, ii_write_lock_child,
15687 + * ii_read_lock_child2, ii_write_lock_child2,
15688 + * ii_read_lock_child3, ii_write_lock_child3,
15689 + * ii_read_lock_parent, ii_write_lock_parent,
15690 + * ii_read_lock_parent2, ii_write_lock_parent2,
15691 + * ii_read_lock_parent3, ii_write_lock_parent3,
15692 + * ii_read_lock_new_child, ii_write_lock_new_child,
15693 + */
15694 +#define AuReadLockFunc(name, lsc) \
15695 +static inline void ii_read_lock_##name(struct inode *i) \
15696 +{ \
15697 +       au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
15698 +}
15699 +
15700 +#define AuWriteLockFunc(name, lsc) \
15701 +static inline void ii_write_lock_##name(struct inode *i) \
15702 +{ \
15703 +       au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
15704 +}
15705 +
15706 +#define AuRWLockFuncs(name, lsc) \
15707 +       AuReadLockFunc(name, lsc) \
15708 +       AuWriteLockFunc(name, lsc)
15709 +
15710 +AuRWLockFuncs(child, CHILD);
15711 +AuRWLockFuncs(child2, CHILD2);
15712 +AuRWLockFuncs(child3, CHILD3);
15713 +AuRWLockFuncs(parent, PARENT);
15714 +AuRWLockFuncs(parent2, PARENT2);
15715 +AuRWLockFuncs(parent3, PARENT3);
15716 +AuRWLockFuncs(new_child, NEW_CHILD);
15717 +
15718 +#undef AuReadLockFunc
15719 +#undef AuWriteLockFunc
15720 +#undef AuRWLockFuncs
15721 +
15722 +/*
15723 + * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
15724 + */
15725 +AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
15726 +
15727 +#define IiMustNoWaiters(i)     AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
15728 +#define IiMustAnyLock(i)       AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
15729 +#define IiMustWriteLock(i)     AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
15730 +
15731 +/* ---------------------------------------------------------------------- */
15732 +
15733 +static inline unsigned int au_iigen(struct inode *inode)
15734 +{
15735 +       return atomic_read(&au_ii(inode)->ii_generation);
15736 +}
15737 +
15738 +/* tiny test for inode number */
15739 +/* tmpfs generation is too rough */
15740 +static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
15741 +{
15742 +       struct au_iinfo *iinfo;
15743 +
15744 +       iinfo = au_ii(inode);
15745 +       AuRwMustAnyLock(&iinfo->ii_rwsem);
15746 +       return !(iinfo->ii_hsb1 == h_inode->i_sb
15747 +                && iinfo->ii_higen == h_inode->i_generation);
15748 +}
15749 +
15750 +/* ---------------------------------------------------------------------- */
15751 +
15752 +static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
15753 +                                       aufs_bindex_t bindex)
15754 +{
15755 +       IiMustAnyLock(inode);
15756 +       return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
15757 +}
15758 +
15759 +static inline aufs_bindex_t au_ibstart(struct inode *inode)
15760 +{
15761 +       IiMustAnyLock(inode);
15762 +       return au_ii(inode)->ii_bstart;
15763 +}
15764 +
15765 +static inline aufs_bindex_t au_ibend(struct inode *inode)
15766 +{
15767 +       IiMustAnyLock(inode);
15768 +       return au_ii(inode)->ii_bend;
15769 +}
15770 +
15771 +static inline struct au_vdir *au_ivdir(struct inode *inode)
15772 +{
15773 +       IiMustAnyLock(inode);
15774 +       return au_ii(inode)->ii_vdir;
15775 +}
15776 +
15777 +static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
15778 +{
15779 +       IiMustAnyLock(inode);
15780 +       return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
15781 +}
15782 +
15783 +static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
15784 +{
15785 +       IiMustWriteLock(inode);
15786 +       au_ii(inode)->ii_bend = bindex;
15787 +}
15788 +
15789 +static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
15790 +{
15791 +       IiMustWriteLock(inode);
15792 +       au_ii(inode)->ii_vdir = vdir;
15793 +}
15794 +
15795 +static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
15796 +{
15797 +       IiMustAnyLock(inode);
15798 +       return au_ii(inode)->ii_hinode + bindex;
15799 +}
15800 +
15801 +/* ---------------------------------------------------------------------- */
15802 +
15803 +static inline struct dentry *au_pinned_parent(struct au_pin *pin)
15804 +{
15805 +       if (pin)
15806 +               return pin->parent;
15807 +       return NULL;
15808 +}
15809 +
15810 +static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
15811 +{
15812 +       if (pin && pin->hdir)
15813 +               return pin->hdir->hi_inode;
15814 +       return NULL;
15815 +}
15816 +
15817 +static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
15818 +{
15819 +       if (pin)
15820 +               return pin->hdir;
15821 +       return NULL;
15822 +}
15823 +
15824 +static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
15825 +{
15826 +       if (pin)
15827 +               pin->dentry = dentry;
15828 +}
15829 +
15830 +static inline void au_pin_set_parent_lflag(struct au_pin *pin,
15831 +                                          unsigned char lflag)
15832 +{
15833 +       if (pin) {
15834 +               /* dirty macros require brackets */
15835 +               if (lflag) {
15836 +                       au_fset_pin(pin->flags, DI_LOCKED);
15837 +               } else {
15838 +                       au_fclr_pin(pin->flags, DI_LOCKED);
15839 +               }
15840 +       }
15841 +}
15842 +
15843 +static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
15844 +{
15845 +       if (pin) {
15846 +               dput(pin->parent);
15847 +               pin->parent = dget(parent);
15848 +       }
15849 +}
15850 +
15851 +/* ---------------------------------------------------------------------- */
15852 +
15853 +#ifdef CONFIG_AUFS_HINOTIFY
15854 +/* hinotify.c */
15855 +int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
15856 +                struct inode *h_inode);
15857 +void au_hin_free(struct au_hinode *hinode);
15858 +void au_hin_ctl(struct au_hinode *hinode, int do_set);
15859 +void au_reset_hinotify(struct inode *inode, unsigned int flags);
15860 +
15861 +int __init au_hinotify_init(void);
15862 +void au_hinotify_fin(void);
15863 +
15864 +static inline
15865 +void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
15866 +{
15867 +       hinode->hi_notify = val;
15868 +}
15869 +
15870 +static inline void au_iigen_dec(struct inode *inode)
15871 +{
15872 +       atomic_dec_return(&au_ii(inode)->ii_generation);
15873 +}
15874 +
15875 +#else
15876 +static inline
15877 +int au_hin_alloc(struct au_hinode *hinode __maybe_unused,
15878 +                struct inode *inode __maybe_unused,
15879 +                struct inode *h_inode __maybe_unused)
15880 +{
15881 +       return -EOPNOTSUPP;
15882 +}
15883 +
15884 +AuStubVoid(au_hin_free, struct au_hinode *hinode __maybe_unused)
15885 +AuStubVoid(au_hin_ctl, struct au_hinode *hinode __maybe_unused,
15886 +          int do_set __maybe_unused)
15887 +AuStubVoid(au_reset_hinotify, struct inode *inode __maybe_unused,
15888 +          unsigned int flags __maybe_unused)
15889 +AuStubInt0(__init au_hinotify_init, void)
15890 +AuStubVoid(au_hinotify_fin, void)
15891 +AuStubVoid(au_hin_init, struct au_hinode *hinode __maybe_unused,
15892 +          struct au_hinotify *val __maybe_unused)
15893 +#endif /* CONFIG_AUFS_HINOTIFY */
15894 +
15895 +static inline void au_hin_suspend(struct au_hinode *hdir)
15896 +{
15897 +       au_hin_ctl(hdir, /*do_set*/0);
15898 +}
15899 +
15900 +static inline void au_hin_resume(struct au_hinode *hdir)
15901 +{
15902 +       au_hin_ctl(hdir, /*do_set*/1);
15903 +}
15904 +
15905 +static inline void au_hin_imtx_lock(struct au_hinode *hdir)
15906 +{
15907 +       mutex_lock(&hdir->hi_inode->i_mutex);
15908 +       au_hin_suspend(hdir);
15909 +}
15910 +
15911 +static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir,
15912 +                                          unsigned int sc __maybe_unused)
15913 +{
15914 +       mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
15915 +       au_hin_suspend(hdir);
15916 +}
15917 +
15918 +static inline void au_hin_imtx_unlock(struct au_hinode *hdir)
15919 +{
15920 +       au_hin_resume(hdir);
15921 +       mutex_unlock(&hdir->hi_inode->i_mutex);
15922 +}
15923 +
15924 +#endif /* __KERNEL__ */
15925 +#endif /* __AUFS_INODE_H__ */
15926 diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c
15927 new file mode 100644
15928 index 0000000..e4aba32
15929 --- /dev/null
15930 +++ b/fs/aufs/ioctl.c
15931 @@ -0,0 +1,127 @@
15932 +/*
15933 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15934 + *
15935 + * This program, aufs is free software; you can redistribute it and/or modify
15936 + * it under the terms of the GNU General Public License as published by
15937 + * the Free Software Foundation; either version 2 of the License, or
15938 + * (at your option) any later version.
15939 + *
15940 + * This program is distributed in the hope that it will be useful,
15941 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15942 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15943 + * GNU General Public License for more details.
15944 + *
15945 + * You should have received a copy of the GNU General Public License
15946 + * along with this program; if not, write to the Free Software
15947 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15948 + */
15949 +
15950 +/*
15951 + * ioctl
15952 + * plink-management and readdir in userspace.
15953 + * assist the pathconf(3) wrapper library.
15954 + */
15955 +
15956 +#include <linux/file.h>
15957 +#include "aufs.h"
15958 +
15959 +static int au_wbr_fd(struct path *path)
15960 +{
15961 +       int err, fd, flags;
15962 +       aufs_bindex_t wbi, bindex, bend;
15963 +       struct file *h_file;
15964 +       struct super_block *sb;
15965 +       struct dentry *root;
15966 +       struct au_branch *wbr;
15967 +
15968 +       err = get_unused_fd();
15969 +       if (unlikely(err < 0))
15970 +               goto out;
15971 +       fd = err;
15972 +
15973 +       flags = O_RDONLY | O_DIRECTORY;
15974 +       if (force_o_largefile())
15975 +               flags |= O_LARGEFILE;
15976 +
15977 +       wbi = 0;
15978 +       sb = path->dentry->d_sb;
15979 +       root = sb->s_root;
15980 +       aufs_read_lock(root, AuLock_IR);
15981 +       wbr = au_sbr(sb, wbi);
15982 +       if (!(path->mnt->mnt_flags & MNT_READONLY)
15983 +           && !au_br_writable(wbr->br_perm)) {
15984 +               bend = au_sbend(sb);
15985 +               for (bindex = 1; bindex <= bend; bindex++) {
15986 +                       wbr = au_sbr(sb, bindex);
15987 +                       if (au_br_writable(wbr->br_perm)) {
15988 +                               wbi = bindex;
15989 +                               break;
15990 +                       }
15991 +               }
15992 +               wbr = au_sbr(sb, wbi);
15993 +       }
15994 +       AuDbg("wbi %d\n", wbi);
15995 +       h_file = au_h_open(root, wbi, flags, NULL);
15996 +       aufs_read_unlock(root, AuLock_IR);
15997 +       err = PTR_ERR(h_file);
15998 +       if (IS_ERR(h_file))
15999 +               goto out_fd;
16000 +
16001 +       atomic_dec(&wbr->br_count); /* cf. au_h_open() */
16002 +       fd_install(fd, h_file);
16003 +       err = fd;
16004 +       goto out; /* success */
16005 +
16006 + out_fd:
16007 +       put_unused_fd(fd);
16008 + out:
16009 +       return err;
16010 +}
16011 +
16012 +/* ---------------------------------------------------------------------- */
16013 +
16014 +long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
16015 +{
16016 +       long err;
16017 +
16018 +       switch (cmd) {
16019 +       case AUFS_CTL_PLINK_MAINT:
16020 +       case AUFS_CTL_PLINK_CLEAN:
16021 +               err = au_plink_ioctl(file, cmd);
16022 +               break;
16023 +
16024 +       case AUFS_CTL_RDU:
16025 +       case AUFS_CTL_RDU_INO:
16026 +               err = au_rdu_ioctl(file, cmd, arg);
16027 +               break;
16028 +
16029 +       case AUFS_CTL_WBR_FD:
16030 +               err = au_wbr_fd(&file->f_path);
16031 +               break;
16032 +
16033 +       default:
16034 +               AuDbg("0x%x\n", cmd);
16035 +               err = -EINVAL;
16036 +       }
16037 +
16038 +       AuTraceErr(err);
16039 +       return err;
16040 +}
16041 +
16042 +long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
16043 +{
16044 +       long err;
16045 +
16046 +       switch (cmd) {
16047 +       case AUFS_CTL_WBR_FD:
16048 +               err = au_wbr_fd(&file->f_path);
16049 +               break;
16050 +
16051 +       default:
16052 +               AuDbg("0x%x\n", cmd);
16053 +               err = -EINVAL;
16054 +       }
16055 +
16056 +       AuTraceErr(err);
16057 +       return err;
16058 +}
16059 diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
16060 new file mode 100644
16061 index 0000000..277011f
16062 --- /dev/null
16063 +++ b/fs/aufs/loop.c
16064 @@ -0,0 +1,55 @@
16065 +/*
16066 + * Copyright (C) 2005-2009 Junjiro R. Okajima
16067 + *
16068 + * This program, aufs is free software; you can redistribute it and/or modify
16069 + * it under the terms of the GNU General Public License as published by
16070 + * the Free Software Foundation; either version 2 of the License, or
16071 + * (at your option) any later version.
16072 + *
16073 + * This program is distributed in the hope that it will be useful,
16074 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16075 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16076 + * GNU General Public License for more details.
16077 + *
16078 + * You should have received a copy of the GNU General Public License
16079 + * along with this program; if not, write to the Free Software
16080 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16081 + */
16082 +
16083 +/*
16084 + * support for loopback block device as a branch
16085 + */
16086 +
16087 +#include <linux/loop.h>
16088 +#include "aufs.h"
16089 +
16090 +/*
16091 + * test if two lower dentries have overlapping branches.
16092 + */
16093 +int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
16094 +                            struct dentry *h_d2)
16095 +{
16096 +       struct inode *h_inode;
16097 +       struct loop_device *l;
16098 +
16099 +       h_inode = h_d1->d_inode;
16100 +       if (MAJOR(h_inode->i_sb->s_dev) != LOOP_MAJOR)
16101 +               return 0;
16102 +
16103 +       l = h_inode->i_sb->s_bdev->bd_disk->private_data;
16104 +       h_d1 = l->lo_backing_file->f_dentry;
16105 +       /* h_d1 can be local NFS. in this case aufs cannot detect the loop */
16106 +       if (unlikely(h_d1->d_sb == sb))
16107 +               return 1;
16108 +       return !!au_test_subdir(h_d1, h_d2);
16109 +}
16110 +
16111 +/* true if a kernel thread named 'loop[0-9].*' accesses a file */
16112 +int au_test_loopback_kthread(void)
16113 +{
16114 +       const char c = current->comm[4];
16115 +
16116 +       return current->mm == NULL
16117 +              && '0' <= c && c <= '9'
16118 +              && strncmp(current->comm, "loop", 4) == 0;
16119 +}
16120 diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
16121 new file mode 100644
16122 index 0000000..fe88093
16123 --- /dev/null
16124 +++ b/fs/aufs/loop.h
16125 @@ -0,0 +1,43 @@
16126 +/*
16127 + * Copyright (C) 2005-2009 Junjiro R. Okajima
16128 + *
16129 + * This program, aufs is free software; you can redistribute it and/or modify
16130 + * it under the terms of the GNU General Public License as published by
16131 + * the Free Software Foundation; either version 2 of the License, or
16132 + * (at your option) any later version.
16133 + *
16134 + * This program is distributed in the hope that it will be useful,
16135 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16136 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16137 + * GNU General Public License for more details.
16138 + *
16139 + * You should have received a copy of the GNU General Public License
16140 + * along with this program; if not, write to the Free Software
16141 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16142 + */
16143 +
16144 +/*
16145 + * support for loopback mount as a branch
16146 + */
16147 +
16148 +#ifndef __AUFS_LOOP_H__
16149 +#define __AUFS_LOOP_H__
16150 +
16151 +#ifdef __KERNEL__
16152 +
16153 +struct dentry;
16154 +struct super_block;
16155 +
16156 +#ifdef CONFIG_AUFS_BDEV_LOOP
16157 +/* loop.c */
16158 +int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
16159 +                            struct dentry *h_d2);
16160 +int au_test_loopback_kthread(void);
16161 +#else
16162 +AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
16163 +          struct dentry *h_d1, struct dentry *h_d2)
16164 +AuStubInt0(au_test_loopback_kthread, void)
16165 +#endif /* BLK_DEV_LOOP */
16166 +
16167 +#endif /* __KERNEL__ */
16168 +#endif /* __AUFS_LOOP_H__ */
16169 diff --git a/fs/aufs/magic.mk b/fs/aufs/magic.mk
16170 new file mode 100644
16171 index 0000000..47c5af7
16172 --- /dev/null
16173 +++ b/fs/aufs/magic.mk
16174 @@ -0,0 +1,66 @@
16175 +
16176 +# defined in ${srctree}/fs/fuse/inode.c
16177 +# tristate
16178 +ifdef CONFIG_FUSE_FS
16179 +ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
16180 +endif
16181 +
16182 +# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
16183 +# tristate
16184 +ifdef CONFIG_OCFS2_FS
16185 +ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
16186 +endif
16187 +
16188 +# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
16189 +# tristate
16190 +ifdef CONFIG_OCFS2_FS_O2CB
16191 +ccflags-y += -DDLMFS_MAGIC=0x76a9f425
16192 +endif
16193 +
16194 +# defined in ${srctree}/fs/ramfs/inode.c
16195 +# always true
16196 +ccflags-y += -DRAMFS_MAGIC=0x858458f6
16197 +
16198 +# defined in ${srctree}/fs/cifs/cifsfs.c
16199 +# tristate
16200 +ifdef CONFIG_CIFS_FS
16201 +ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
16202 +endif
16203 +
16204 +# defined in ${srctree}/fs/xfs/xfs_sb.h
16205 +# tristate
16206 +ifdef CONFIG_XFS_FS
16207 +ccflags-y += -DXFS_SB_MAGIC=0x58465342
16208 +endif
16209 +
16210 +# defined in ${srctree}/mm/shmem.c
16211 +# bool
16212 +ccflags-$(CONFIG_TMPFS) += -DTMPFS_MAGIC=0x01021994
16213 +
16214 +# defined in ${srctree}/fs/sysfs/mount.c
16215 +# bool
16216 +ccflags-$(CONFIG_SYSFS) += -DSYSFS_MAGIC=0x62656572
16217 +
16218 +# defined in ${srctree}/fs/configfs/mount.c
16219 +# tristate
16220 +ifdef CONFIG_CONFIGFS_FS
16221 +ccflags-y += -DCONFIGFS_MAGIC=0x62656570
16222 +endif
16223 +
16224 +# defined in ${srctree}/fs/9p/v9fs.h
16225 +# tristate
16226 +ifdef CONFIG_9P_FS
16227 +ccflags-y += -DV9FS_MAGIC=0x01021997
16228 +endif
16229 +
16230 +# defined in ${srctree}/fs/ubifs/ubifs.h
16231 +# tristate
16232 +ifdef CONFIG_UBIFS_FS
16233 +ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
16234 +endif
16235 +
16236 +# defined in ${srctree}/fs/debugfs/inode.c
16237 +# boolean
16238 +ifdef CONFIG_DEBUG_FS
16239 +ccflags-y += -DDEBUGFS_MAGIC=0x64626720
16240 +endif
16241 diff --git a/fs/aufs/module.c b/fs/aufs/module.c
16242 new file mode 100644
16243 index 0000000..dbfb2c4
16244 --- /dev/null
16245 +++ b/fs/aufs/module.c
16246 @@ -0,0 +1,173 @@
16247 +/*
16248 + * Copyright (C) 2005-2009 Junjiro R. Okajima
16249 + *
16250 + * This program, aufs is free software; you can redistribute it and/or modify
16251 + * it under the terms of the GNU General Public License as published by
16252 + * the Free Software Foundation; either version 2 of the License, or
16253 + * (at your option) any later version.
16254 + *
16255 + * This program is distributed in the hope that it will be useful,
16256 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16257 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16258 + * GNU General Public License for more details.
16259 + *
16260 + * You should have received a copy of the GNU General Public License
16261 + * along with this program; if not, write to the Free Software
16262 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16263 + */
16264 +
16265 +/*
16266 + * module global variables and operations
16267 + */
16268 +
16269 +#include <linux/module.h>
16270 +#include <linux/seq_file.h>
16271 +#include "aufs.h"
16272 +
16273 +void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
16274 +{
16275 +       if (new_sz <= nused)
16276 +               return p;
16277 +
16278 +       p = krealloc(p, new_sz, gfp);
16279 +       if (p)
16280 +               memset(p + nused, 0, new_sz - nused);
16281 +       return p;
16282 +}
16283 +
16284 +/* ---------------------------------------------------------------------- */
16285 +
16286 +/*
16287 + * aufs caches
16288 + */
16289 +struct kmem_cache *au_cachep[AuCache_Last];
16290 +static int __init au_cache_init(void)
16291 +{
16292 +       au_cachep[AuCache_DINFO] = AuCache(au_dinfo);
16293 +       if (au_cachep[AuCache_DINFO])
16294 +               au_cachep[AuCache_ICNTNR] = AuCache(au_icntnr);
16295 +       if (au_cachep[AuCache_ICNTNR])
16296 +               au_cachep[AuCache_FINFO] = AuCache(au_finfo);
16297 +       if (au_cachep[AuCache_FINFO])
16298 +               au_cachep[AuCache_VDIR] = AuCache(au_vdir);
16299 +       if (au_cachep[AuCache_VDIR])
16300 +               au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
16301 +       if (au_cachep[AuCache_DEHSTR])
16302 +               return 0;
16303 +
16304 +       return -ENOMEM;
16305 +}
16306 +
16307 +static void au_cache_fin(void)
16308 +{
16309 +       int i;
16310 +       for (i = 0; i < AuCache_Last; i++)
16311 +               if (au_cachep[i]) {
16312 +                       kmem_cache_destroy(au_cachep[i]);
16313 +                       au_cachep[i] = NULL;
16314 +               }
16315 +}
16316 +
16317 +/* ---------------------------------------------------------------------- */
16318 +
16319 +int au_dir_roflags;
16320 +
16321 +/*
16322 + * functions for module interface.
16323 + */
16324 +MODULE_LICENSE("GPL");
16325 +/* MODULE_LICENSE("GPL v2"); */
16326 +MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
16327 +MODULE_DESCRIPTION(AUFS_NAME
16328 +       " -- Advanced multi layered unification filesystem");
16329 +MODULE_VERSION(AUFS_VERSION);
16330 +
16331 +/* it should be 'byte', but param_set_byte() prints it by "%c" */
16332 +short aufs_nwkq = AUFS_NWKQ_DEF;
16333 +MODULE_PARM_DESC(nwkq, "the number of workqueue thread, " AUFS_WKQ_NAME);
16334 +module_param_named(nwkq, aufs_nwkq, short, S_IRUGO);
16335 +
16336 +/* this module parameter has no meaning when SYSFS is disabled */
16337 +int sysaufs_brs = 1;
16338 +MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
16339 +module_param_named(brs, sysaufs_brs, int, S_IRUGO);
16340 +
16341 +/* ---------------------------------------------------------------------- */
16342 +
16343 +static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
16344 +
16345 +int au_seq_path(struct seq_file *seq, struct path *path)
16346 +{
16347 +       return seq_path(seq, path, au_esc_chars);
16348 +}
16349 +
16350 +/* ---------------------------------------------------------------------- */
16351 +
16352 +static int __init aufs_init(void)
16353 +{
16354 +       int err, i;
16355 +       char *p;
16356 +
16357 +       p = au_esc_chars;
16358 +       for (i = 1; i <= ' '; i++)
16359 +               *p++ = i;
16360 +       *p++ = '\\';
16361 +       *p++ = '\x7f';
16362 +       *p = 0;
16363 +
16364 +       au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
16365 +
16366 +       sysaufs_brs_init();
16367 +       au_debug_init();
16368 +
16369 +       err = -EINVAL;
16370 +       if (unlikely(aufs_nwkq <= 0))
16371 +               goto out;
16372 +
16373 +       err = sysaufs_init();
16374 +       if (unlikely(err))
16375 +               goto out;
16376 +       err = au_wkq_init();
16377 +       if (unlikely(err))
16378 +               goto out_sysaufs;
16379 +       err = au_hinotify_init();
16380 +       if (unlikely(err))
16381 +               goto out_wkq;
16382 +       err = au_sysrq_init();
16383 +       if (unlikely(err))
16384 +               goto out_hin;
16385 +       err = au_cache_init();
16386 +       if (unlikely(err))
16387 +               goto out_sysrq;
16388 +       err = register_filesystem(&aufs_fs_type);
16389 +       if (unlikely(err))
16390 +               goto out_cache;
16391 +       pr_info(AUFS_NAME " " AUFS_VERSION "\n");
16392 +       goto out; /* success */
16393 +
16394 + out_cache:
16395 +       au_cache_fin();
16396 + out_sysrq:
16397 +       au_sysrq_fin();
16398 + out_hin:
16399 +       au_hinotify_fin();
16400 + out_wkq:
16401 +       au_wkq_fin();
16402 + out_sysaufs:
16403 +       sysaufs_fin();
16404 + out:
16405 +       return err;
16406 +}
16407 +
16408 +static void __exit aufs_exit(void)
16409 +{
16410 +       unregister_filesystem(&aufs_fs_type);
16411 +       au_cache_fin();
16412 +       au_sysrq_fin();
16413 +       au_hinotify_fin();
16414 +       au_wkq_fin();
16415 +       sysaufs_fin();
16416 +}
16417 +
16418 +module_init(aufs_init);
16419 +module_exit(aufs_exit);
16420 diff --git a/fs/aufs/module.h b/fs/aufs/module.h
16421 new file mode 100644
16422 index 0000000..0825b79
16423 --- /dev/null
16424 +++ b/fs/aufs/module.h
16425 @@ -0,0 +1,78 @@
16426 +/*
16427 + * Copyright (C) 2005-2009 Junjiro R. Okajima
16428 + *
16429 + * This program, aufs is free software; you can redistribute it and/or modify
16430 + * it under the terms of the GNU General Public License as published by
16431 + * the Free Software Foundation; either version 2 of the License, or
16432 + * (at your option) any later version.
16433 + *
16434 + * This program is distributed in the hope that it will be useful,
16435 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16436 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16437 + * GNU General Public License for more details.
16438 + *
16439 + * You should have received a copy of the GNU General Public License
16440 + * along with this program; if not, write to the Free Software
16441 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16442 + */
16443 +
16444 +/*
16445 + * module initialization and module-global
16446 + */
16447 +
16448 +#ifndef __AUFS_MODULE_H__
16449 +#define __AUFS_MODULE_H__
16450 +
16451 +#ifdef __KERNEL__
16452 +
16453 +#include <linux/slab.h>
16454 +
16455 +struct path;
16456 +struct seq_file;
16457 +
16458 +/* module parameters */
16459 +extern short aufs_nwkq;
16460 +extern int sysaufs_brs;
16461 +
16462 +/* ---------------------------------------------------------------------- */
16463 +
16464 +extern int au_dir_roflags;
16465 +
16466 +void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
16467 +int au_seq_path(struct seq_file *seq, struct path *path);
16468 +
16469 +/* ---------------------------------------------------------------------- */
16470 +
16471 +/* kmem cache */
16472 +enum {
16473 +       AuCache_DINFO,
16474 +       AuCache_ICNTNR,
16475 +       AuCache_FINFO,
16476 +       AuCache_VDIR,
16477 +       AuCache_DEHSTR,
16478 +#ifdef CONFIG_AUFS_HINOTIFY
16479 +       AuCache_HINOTIFY,
16480 +#endif
16481 +       AuCache_Last
16482 +};
16483 +
16484 +#define AuCache(type)  KMEM_CACHE(type, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
16485 +
16486 +extern struct kmem_cache *au_cachep[];
16487 +
16488 +#define AuCacheFuncs(name, index) \
16489 +static inline struct au_##name *au_cache_alloc_##name(void) \
16490 +{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
16491 +static inline void au_cache_free_##name(struct au_##name *p) \
16492 +{ kmem_cache_free(au_cachep[AuCache_##index], p); }
16493 +
16494 +AuCacheFuncs(dinfo, DINFO);
16495 +AuCacheFuncs(icntnr, ICNTNR);
16496 +AuCacheFuncs(finfo, FINFO);
16497 +AuCacheFuncs(vdir, VDIR);
16498 +AuCacheFuncs(vdir_dehstr, DEHSTR);
16499 +
16500 +/*  ---------------------------------------------------------------------- */
16501 +
16502 +#endif /* __KERNEL__ */
16503 +#endif /* __AUFS_MODULE_H__ */
16504 diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c
16505 new file mode 100644
16506 index 0000000..b792b9f
16507 --- /dev/null
16508 +++ b/fs/aufs/opts.c
16509 @@ -0,0 +1,1550 @@
16510 +/*
16511 + * Copyright (C) 2005-2009 Junjiro R. Okajima
16512 + *
16513 + * This program, aufs is free software; you can redistribute it and/or modify
16514 + * it under the terms of the GNU General Public License as published by
16515 + * the Free Software Foundation; either version 2 of the License, or
16516 + * (at your option) any later version.
16517 + *
16518 + * This program is distributed in the hope that it will be useful,
16519 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16520 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16521 + * GNU General Public License for more details.
16522 + *
16523 + * You should have received a copy of the GNU General Public License
16524 + * along with this program; if not, write to the Free Software
16525 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16526 + */
16527 +
16528 +/*
16529 + * mount options/flags
16530 + */
16531 +
16532 +#include <linux/file.h>
16533 +#include <linux/namei.h>
16534 +#include <linux/types.h> /* a distribution requires */
16535 +#include <linux/parser.h>
16536 +#include "aufs.h"
16537 +
16538 +/* ---------------------------------------------------------------------- */
16539 +
16540 +enum {
16541 +       Opt_br,
16542 +       Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
16543 +       Opt_idel, Opt_imod, Opt_ireorder,
16544 +       Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
16545 +       Opt_rdblk_def, Opt_rdhash_def,
16546 +       Opt_xino, Opt_zxino, Opt_noxino,
16547 +       Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
16548 +       Opt_trunc_xino_path, Opt_itrunc_xino,
16549 +       Opt_trunc_xib, Opt_notrunc_xib,
16550 +       Opt_shwh, Opt_noshwh,
16551 +       Opt_plink, Opt_noplink, Opt_list_plink,
16552 +       Opt_udba,
16553 +       /* Opt_lock, Opt_unlock, */
16554 +       Opt_cmd, Opt_cmd_args,
16555 +       Opt_diropq_a, Opt_diropq_w,
16556 +       Opt_warn_perm, Opt_nowarn_perm,
16557 +       Opt_wbr_copyup, Opt_wbr_create,
16558 +       Opt_refrof, Opt_norefrof,
16559 +       Opt_verbose, Opt_noverbose,
16560 +       Opt_sum, Opt_nosum, Opt_wsum,
16561 +       Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
16562 +};
16563 +
16564 +static match_table_t options = {
16565 +       {Opt_br, "br=%s"},
16566 +       {Opt_br, "br:%s"},
16567 +
16568 +       {Opt_add, "add=%d:%s"},
16569 +       {Opt_add, "add:%d:%s"},
16570 +       {Opt_add, "ins=%d:%s"},
16571 +       {Opt_add, "ins:%d:%s"},
16572 +       {Opt_append, "append=%s"},
16573 +       {Opt_append, "append:%s"},
16574 +       {Opt_prepend, "prepend=%s"},
16575 +       {Opt_prepend, "prepend:%s"},
16576 +
16577 +       {Opt_del, "del=%s"},
16578 +       {Opt_del, "del:%s"},
16579 +       /* {Opt_idel, "idel:%d"}, */
16580 +       {Opt_mod, "mod=%s"},
16581 +       {Opt_mod, "mod:%s"},
16582 +       /* {Opt_imod, "imod:%d:%s"}, */
16583 +
16584 +       {Opt_dirwh, "dirwh=%d"},
16585 +
16586 +       {Opt_xino, "xino=%s"},
16587 +       {Opt_noxino, "noxino"},
16588 +       {Opt_trunc_xino, "trunc_xino"},
16589 +       {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
16590 +       {Opt_notrunc_xino, "notrunc_xino"},
16591 +       {Opt_trunc_xino_path, "trunc_xino=%s"},
16592 +       {Opt_itrunc_xino, "itrunc_xino=%d"},
16593 +       /* {Opt_zxino, "zxino=%s"}, */
16594 +       {Opt_trunc_xib, "trunc_xib"},
16595 +       {Opt_notrunc_xib, "notrunc_xib"},
16596 +
16597 +       {Opt_plink, "plink"},
16598 +       {Opt_noplink, "noplink"},
16599 +#ifdef CONFIG_AUFS_DEBUG
16600 +       {Opt_list_plink, "list_plink"},
16601 +#endif
16602 +
16603 +       {Opt_udba, "udba=%s"},
16604 +
16605 +       {Opt_diropq_a, "diropq=always"},
16606 +       {Opt_diropq_a, "diropq=a"},
16607 +       {Opt_diropq_w, "diropq=whiteouted"},
16608 +       {Opt_diropq_w, "diropq=w"},
16609 +
16610 +       {Opt_warn_perm, "warn_perm"},
16611 +       {Opt_nowarn_perm, "nowarn_perm"},
16612 +
16613 +       /* keep them temporary */
16614 +       {Opt_ignore_silent, "coo=%s"},
16615 +       {Opt_ignore_silent, "nodlgt"},
16616 +       {Opt_ignore_silent, "nodirperm1"},
16617 +       {Opt_ignore_silent, "clean_plink"},
16618 +
16619 +#ifdef CONFIG_AUFS_SHWH
16620 +       {Opt_shwh, "shwh"},
16621 +#endif
16622 +       {Opt_noshwh, "noshwh"},
16623 +
16624 +       {Opt_rendir, "rendir=%d"},
16625 +
16626 +       {Opt_refrof, "refrof"},
16627 +       {Opt_norefrof, "norefrof"},
16628 +
16629 +       {Opt_verbose, "verbose"},
16630 +       {Opt_verbose, "v"},
16631 +       {Opt_noverbose, "noverbose"},
16632 +       {Opt_noverbose, "quiet"},
16633 +       {Opt_noverbose, "q"},
16634 +       {Opt_noverbose, "silent"},
16635 +
16636 +       {Opt_sum, "sum"},
16637 +       {Opt_nosum, "nosum"},
16638 +       {Opt_wsum, "wsum"},
16639 +
16640 +       {Opt_rdcache, "rdcache=%d"},
16641 +       {Opt_rdblk, "rdblk=%d"},
16642 +       {Opt_rdblk_def, "rdblk=def"},
16643 +       {Opt_rdhash, "rdhash=%d"},
16644 +       {Opt_rdhash_def, "rdhash=def"},
16645 +
16646 +       {Opt_wbr_create, "create=%s"},
16647 +       {Opt_wbr_create, "create_policy=%s"},
16648 +       {Opt_wbr_copyup, "cpup=%s"},
16649 +       {Opt_wbr_copyup, "copyup=%s"},
16650 +       {Opt_wbr_copyup, "copyup_policy=%s"},
16651 +
16652 +       /* internal use for the scripts */
16653 +       {Opt_ignore_silent, "si=%s"},
16654 +
16655 +       {Opt_br, "dirs=%s"},
16656 +       {Opt_ignore, "debug=%d"},
16657 +       {Opt_ignore, "delete=whiteout"},
16658 +       {Opt_ignore, "delete=all"},
16659 +       {Opt_ignore, "imap=%s"},
16660 +
16661 +       {Opt_err, NULL}
16662 +};
16663 +
16664 +/* ---------------------------------------------------------------------- */
16665 +
16666 +static const char *au_parser_pattern(int val, struct match_token *token)
16667 +{
16668 +       while (token->pattern) {
16669 +               if (token->token == val)
16670 +                       return token->pattern;
16671 +               token++;
16672 +       }
16673 +       BUG();
16674 +       return "??";
16675 +}
16676 +
16677 +/* ---------------------------------------------------------------------- */
16678 +
16679 +static match_table_t brperms = {
16680 +       {AuBrPerm_RO, AUFS_BRPERM_RO},
16681 +       {AuBrPerm_RR, AUFS_BRPERM_RR},
16682 +       {AuBrPerm_RW, AUFS_BRPERM_RW},
16683 +
16684 +       {AuBrPerm_ROWH, AUFS_BRPERM_ROWH},
16685 +       {AuBrPerm_RRWH, AUFS_BRPERM_RRWH},
16686 +       {AuBrPerm_RWNoLinkWH, AUFS_BRPERM_RWNLWH},
16687 +
16688 +       {AuBrPerm_ROWH, "nfsro"},
16689 +       {AuBrPerm_RO, NULL}
16690 +};
16691 +
16692 +static int br_perm_val(char *perm)
16693 +{
16694 +       int val;
16695 +       substring_t args[MAX_OPT_ARGS];
16696 +
16697 +       val = match_token(perm, brperms, args);
16698 +       return val;
16699 +}
16700 +
16701 +const char *au_optstr_br_perm(int brperm)
16702 +{
16703 +       return au_parser_pattern(brperm, (void *)brperms);
16704 +}
16705 +
16706 +/* ---------------------------------------------------------------------- */
16707 +
16708 +static match_table_t udbalevel = {
16709 +       {AuOpt_UDBA_REVAL, "reval"},
16710 +       {AuOpt_UDBA_NONE, "none"},
16711 +#ifdef CONFIG_AUFS_HINOTIFY
16712 +       {AuOpt_UDBA_HINOTIFY, "inotify"},
16713 +#endif
16714 +       {-1, NULL}
16715 +};
16716 +
16717 +static int udba_val(char *str)
16718 +{
16719 +       substring_t args[MAX_OPT_ARGS];
16720 +
16721 +       return match_token(str, udbalevel, args);
16722 +}
16723 +
16724 +const char *au_optstr_udba(int udba)
16725 +{
16726 +       return au_parser_pattern(udba, (void *)udbalevel);
16727 +}
16728 +
16729 +/* ---------------------------------------------------------------------- */
16730 +
16731 +static match_table_t au_wbr_create_policy = {
16732 +       {AuWbrCreate_TDP, "tdp"},
16733 +       {AuWbrCreate_TDP, "top-down-parent"},
16734 +       {AuWbrCreate_RR, "rr"},
16735 +       {AuWbrCreate_RR, "round-robin"},
16736 +       {AuWbrCreate_MFS, "mfs"},
16737 +       {AuWbrCreate_MFS, "most-free-space"},
16738 +       {AuWbrCreate_MFSV, "mfs:%d"},
16739 +       {AuWbrCreate_MFSV, "most-free-space:%d"},
16740 +
16741 +       {AuWbrCreate_MFSRR, "mfsrr:%d"},
16742 +       {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
16743 +       {AuWbrCreate_PMFS, "pmfs"},
16744 +       {AuWbrCreate_PMFSV, "pmfs:%d"},
16745 +
16746 +       {-1, NULL}
16747 +};
16748 +
16749 +/*
16750 + * cf. linux/lib/parser.c and cmdline.c
16751 + * gave up calling memparse() since it uses simple_strtoull() instead of
16752 + * strict_...().
16753 + */
16754 +static int au_match_ull(substring_t *s, unsigned long long *result)
16755 +{
16756 +       int err;
16757 +       unsigned int len;
16758 +       char a[32];
16759 +
16760 +       err = -ERANGE;
16761 +       len = s->to - s->from;
16762 +       if (len + 1 <= sizeof(a)) {
16763 +               memcpy(a, s->from, len);
16764 +               a[len] = '\0';
16765 +               err = strict_strtoull(a, 0, result);
16766 +       }
16767 +       return err;
16768 +}
16769 +
16770 +static int au_wbr_mfs_wmark(substring_t *arg, char *str,
16771 +                           struct au_opt_wbr_create *create)
16772 +{
16773 +       int err;
16774 +       unsigned long long ull;
16775 +
16776 +       err = 0;
16777 +       if (!au_match_ull(arg, &ull))
16778 +               create->mfsrr_watermark = ull;
16779 +       else {
16780 +               AuErr("bad integer in %s\n", str);
16781 +               err = -EINVAL;
16782 +       }
16783 +
16784 +       return err;
16785 +}
16786 +
16787 +static int au_wbr_mfs_sec(substring_t *arg, char *str,
16788 +                         struct au_opt_wbr_create *create)
16789 +{
16790 +       int n, err;
16791 +
16792 +       err = 0;
16793 +       if (!match_int(arg, &n) && 0 <= n)
16794 +               create->mfs_second = n;
16795 +       else {
16796 +               AuErr("bad integer in %s\n", str);
16797 +               err = -EINVAL;
16798 +       }
16799 +
16800 +       return err;
16801 +}
16802 +
16803 +static int au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
16804 +{
16805 +       int err, e;
16806 +       substring_t args[MAX_OPT_ARGS];
16807 +
16808 +       err = match_token(str, au_wbr_create_policy, args);
16809 +       create->wbr_create = err;
16810 +       switch (err) {
16811 +       case AuWbrCreate_MFSRRV:
16812 +               e = au_wbr_mfs_wmark(&args[0], str, create);
16813 +               if (!e)
16814 +                       e = au_wbr_mfs_sec(&args[1], str, create);
16815 +               if (unlikely(e))
16816 +                       err = e;
16817 +               break;
16818 +       case AuWbrCreate_MFSRR:
16819 +               e = au_wbr_mfs_wmark(&args[0], str, create);
16820 +               if (unlikely(e)) {
16821 +                       err = e;
16822 +                       break;
16823 +               }
16824 +               /*FALLTHROUGH*/
16825 +       case AuWbrCreate_MFS:
16826 +       case AuWbrCreate_PMFS:
16827 +               create->mfs_second = AUFS_MFS_SECOND_DEF;
16828 +               break;
16829 +       case AuWbrCreate_MFSV:
16830 +       case AuWbrCreate_PMFSV:
16831 +               e = au_wbr_mfs_sec(&args[0], str, create);
16832 +               if (unlikely(e))
16833 +                       err = e;
16834 +               break;
16835 +       }
16836 +
16837 +       return err;
16838 +}
16839 +
16840 +const char *au_optstr_wbr_create(int wbr_create)
16841 +{
16842 +       return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
16843 +}
16844 +
16845 +static match_table_t au_wbr_copyup_policy = {
16846 +       {AuWbrCopyup_TDP, "tdp"},
16847 +       {AuWbrCopyup_TDP, "top-down-parent"},
16848 +       {AuWbrCopyup_BUP, "bup"},
16849 +       {AuWbrCopyup_BUP, "bottom-up-parent"},
16850 +       {AuWbrCopyup_BU, "bu"},
16851 +       {AuWbrCopyup_BU, "bottom-up"},
16852 +       {-1, NULL}
16853 +};
16854 +
16855 +static int au_wbr_copyup_val(char *str)
16856 +{
16857 +       substring_t args[MAX_OPT_ARGS];
16858 +
16859 +       return match_token(str, au_wbr_copyup_policy, args);
16860 +}
16861 +
16862 +const char *au_optstr_wbr_copyup(int wbr_copyup)
16863 +{
16864 +       return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
16865 +}
16866 +
16867 +/* ---------------------------------------------------------------------- */
16868 +
16869 +static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
16870 +
16871 +static void dump_opts(struct au_opts *opts)
16872 +{
16873 +#ifdef CONFIG_AUFS_DEBUG
16874 +       /* reduce stack space */
16875 +       union {
16876 +               struct au_opt_add *add;
16877 +               struct au_opt_del *del;
16878 +               struct au_opt_mod *mod;
16879 +               struct au_opt_xino *xino;
16880 +               struct au_opt_xino_itrunc *xino_itrunc;
16881 +               struct au_opt_wbr_create *create;
16882 +       } u;
16883 +       struct au_opt *opt;
16884 +
16885 +       opt = opts->opt;
16886 +       while (opt->type != Opt_tail) {
16887 +               switch (opt->type) {
16888 +               case Opt_add:
16889 +                       u.add = &opt->add;
16890 +                       AuDbg("add {b%d, %s, 0x%x, %p}\n",
16891 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16892 +                                 u.add->path.dentry);
16893 +                       break;
16894 +               case Opt_del:
16895 +               case Opt_idel:
16896 +                       u.del = &opt->del;
16897 +                       AuDbg("del {%s, %p}\n",
16898 +                             u.del->pathname, u.del->h_path.dentry);
16899 +                       break;
16900 +               case Opt_mod:
16901 +               case Opt_imod:
16902 +                       u.mod = &opt->mod;
16903 +                       AuDbg("mod {%s, 0x%x, %p}\n",
16904 +                                 u.mod->path, u.mod->perm, u.mod->h_root);
16905 +                       break;
16906 +               case Opt_append:
16907 +                       u.add = &opt->add;
16908 +                       AuDbg("append {b%d, %s, 0x%x, %p}\n",
16909 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16910 +                                 u.add->path.dentry);
16911 +                       break;
16912 +               case Opt_prepend:
16913 +                       u.add = &opt->add;
16914 +                       AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
16915 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16916 +                                 u.add->path.dentry);
16917 +                       break;
16918 +               case Opt_dirwh:
16919 +                       AuDbg("dirwh %d\n", opt->dirwh);
16920 +                       break;
16921 +               case Opt_rdcache:
16922 +                       AuDbg("rdcache %d\n", opt->rdcache);
16923 +                       break;
16924 +               case Opt_rdblk:
16925 +                       AuDbg("rdblk %u\n", opt->rdblk);
16926 +                       break;
16927 +               case Opt_rdblk_def:
16928 +                       AuDbg("rdblk_def\n");
16929 +                       break;
16930 +               case Opt_rdhash:
16931 +                       AuDbg("rdhash %u\n", opt->rdhash);
16932 +                       break;
16933 +               case Opt_rdhash_def:
16934 +                       AuDbg("rdhash_def\n");
16935 +                       break;
16936 +               case Opt_xino:
16937 +                       u.xino = &opt->xino;
16938 +                       AuDbg("xino {%s %.*s}\n",
16939 +                                 u.xino->path,
16940 +                                 AuDLNPair(u.xino->file->f_dentry));
16941 +                       break;
16942 +               case Opt_trunc_xino:
16943 +                       AuLabel(trunc_xino);
16944 +                       break;
16945 +               case Opt_notrunc_xino:
16946 +                       AuLabel(notrunc_xino);
16947 +                       break;
16948 +               case Opt_trunc_xino_path:
16949 +               case Opt_itrunc_xino:
16950 +                       u.xino_itrunc = &opt->xino_itrunc;
16951 +                       AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
16952 +                       break;
16953 +
16954 +               case Opt_noxino:
16955 +                       AuLabel(noxino);
16956 +                       break;
16957 +               case Opt_trunc_xib:
16958 +                       AuLabel(trunc_xib);
16959 +                       break;
16960 +               case Opt_notrunc_xib:
16961 +                       AuLabel(notrunc_xib);
16962 +                       break;
16963 +               case Opt_shwh:
16964 +                       AuLabel(shwh);
16965 +                       break;
16966 +               case Opt_noshwh:
16967 +                       AuLabel(noshwh);
16968 +                       break;
16969 +               case Opt_plink:
16970 +                       AuLabel(plink);
16971 +                       break;
16972 +               case Opt_noplink:
16973 +                       AuLabel(noplink);
16974 +                       break;
16975 +               case Opt_list_plink:
16976 +                       AuLabel(list_plink);
16977 +                       break;
16978 +               case Opt_udba:
16979 +                       AuDbg("udba %d, %s\n",
16980 +                                 opt->udba, au_optstr_udba(opt->udba));
16981 +                       break;
16982 +               case Opt_diropq_a:
16983 +                       AuLabel(diropq_a);
16984 +                       break;
16985 +               case Opt_diropq_w:
16986 +                       AuLabel(diropq_w);
16987 +                       break;
16988 +               case Opt_warn_perm:
16989 +                       AuLabel(warn_perm);
16990 +                       break;
16991 +               case Opt_nowarn_perm:
16992 +                       AuLabel(nowarn_perm);
16993 +                       break;
16994 +               case Opt_refrof:
16995 +                       AuLabel(refrof);
16996 +                       break;
16997 +               case Opt_norefrof:
16998 +                       AuLabel(norefrof);
16999 +                       break;
17000 +               case Opt_verbose:
17001 +                       AuLabel(verbose);
17002 +                       break;
17003 +               case Opt_noverbose:
17004 +                       AuLabel(noverbose);
17005 +                       break;
17006 +               case Opt_sum:
17007 +                       AuLabel(sum);
17008 +                       break;
17009 +               case Opt_nosum:
17010 +                       AuLabel(nosum);
17011 +                       break;
17012 +               case Opt_wsum:
17013 +                       AuLabel(wsum);
17014 +                       break;
17015 +               case Opt_wbr_create:
17016 +                       u.create = &opt->wbr_create;
17017 +                       AuDbg("create %d, %s\n", u.create->wbr_create,
17018 +                                 au_optstr_wbr_create(u.create->wbr_create));
17019 +                       switch (u.create->wbr_create) {
17020 +                       case AuWbrCreate_MFSV:
17021 +                       case AuWbrCreate_PMFSV:
17022 +                               AuDbg("%d sec\n", u.create->mfs_second);
17023 +                               break;
17024 +                       case AuWbrCreate_MFSRR:
17025 +                               AuDbg("%llu watermark\n",
17026 +                                         u.create->mfsrr_watermark);
17027 +                               break;
17028 +                       case AuWbrCreate_MFSRRV:
17029 +                               AuDbg("%llu watermark, %d sec\n",
17030 +                                         u.create->mfsrr_watermark,
17031 +                                         u.create->mfs_second);
17032 +                               break;
17033 +                       }
17034 +                       break;
17035 +               case Opt_wbr_copyup:
17036 +                       AuDbg("copyup %d, %s\n", opt->wbr_copyup,
17037 +                                 au_optstr_wbr_copyup(opt->wbr_copyup));
17038 +                       break;
17039 +               default:
17040 +                       BUG();
17041 +               }
17042 +               opt++;
17043 +       }
17044 +#endif
17045 +}
17046 +
17047 +void au_opts_free(struct au_opts *opts)
17048 +{
17049 +       struct au_opt *opt;
17050 +
17051 +       opt = opts->opt;
17052 +       while (opt->type != Opt_tail) {
17053 +               switch (opt->type) {
17054 +               case Opt_add:
17055 +               case Opt_append:
17056 +               case Opt_prepend:
17057 +                       path_put(&opt->add.path);
17058 +                       break;
17059 +               case Opt_del:
17060 +               case Opt_idel:
17061 +                       path_put(&opt->del.h_path);
17062 +                       break;
17063 +               case Opt_mod:
17064 +               case Opt_imod:
17065 +                       dput(opt->mod.h_root);
17066 +                       break;
17067 +               case Opt_xino:
17068 +                       fput(opt->xino.file);
17069 +                       break;
17070 +               }
17071 +               opt++;
17072 +       }
17073 +}
17074 +
17075 +static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
17076 +                  aufs_bindex_t bindex)
17077 +{
17078 +       int err;
17079 +       struct nameidata nd;
17080 +       struct au_opt_add *add = &opt->add;
17081 +       char *p;
17082 +
17083 +       add->bindex = bindex;
17084 +       add->perm = AuBrPerm_Last;
17085 +       add->pathname = opt_str;
17086 +       p = strchr(opt_str, '=');
17087 +       if (p) {
17088 +               *p++ = 0;
17089 +               if (*p)
17090 +                       add->perm = br_perm_val(p);
17091 +       }
17092 +
17093 +       err = vfsub_path_lookup(add->pathname, lkup_dirflags, &nd);
17094 +       if (!err) {
17095 +               /* keep it path_get()ed */
17096 +               add->path = nd.path;
17097 +               if (!p) {
17098 +                       add->perm = AuBrPerm_RO;
17099 +                       if (au_test_fs_rr(nd.path.dentry->d_sb))
17100 +                               add->perm = AuBrPerm_RR;
17101 +                       else if (!bindex && !(sb_flags & MS_RDONLY))
17102 +                               add->perm = AuBrPerm_RW;
17103 +               }
17104 +               opt->type = Opt_add;
17105 +               goto out;
17106 +       }
17107 +       AuErr("lookup failed %s (%d)\n", add->pathname, err);
17108 +       err = -EINVAL;
17109 +
17110 + out:
17111 +       return err;
17112 +}
17113 +
17114 +static int au_opts_parse_del(struct au_opt_del *del, substring_t args[],
17115 +                            struct nameidata *nd)
17116 +{
17117 +       int err;
17118 +
17119 +       del->pathname = args[0].from;
17120 +       AuDbg("del path %s\n", del->pathname);
17121 +
17122 +       err = vfsub_path_lookup(del->pathname, lkup_dirflags, nd);
17123 +       if (!err)
17124 +               /* keep it get-ed */
17125 +               del->h_path = nd->path;
17126 +       else
17127 +               AuErr("lookup failed %s (%d)\n", del->pathname, err);
17128 +
17129 +       return err;
17130 +}
17131 +
17132 +#if 0 /* reserved for future use */
17133 +static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
17134 +                             struct au_opt_del *del, substring_t args[])
17135 +{
17136 +       int err;
17137 +       struct dentry *root;
17138 +
17139 +       err = -EINVAL;
17140 +       root = sb->s_root;
17141 +       aufs_read_lock(root, AuLock_FLUSH);
17142 +       if (bindex < 0 || au_sbend(sb) < bindex) {
17143 +               AuErr("out of bounds, %d\n", bindex);
17144 +               goto out;
17145 +       }
17146 +
17147 +       err = 0;
17148 +       del->h_path.dentry = dget(au_h_dptr(root, bindex));
17149 +       del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
17150 +
17151 + out:
17152 +       aufs_read_unlock(root, !AuLock_IR);
17153 +       return err;
17154 +}
17155 +#endif
17156 +
17157 +static int au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[],
17158 +                            struct nameidata *nd)
17159 +{
17160 +       int err;
17161 +       char *p;
17162 +
17163 +       err = -EINVAL;
17164 +       mod->path = args[0].from;
17165 +       p = strchr(mod->path, '=');
17166 +       if (unlikely(!p)) {
17167 +               AuErr("no permssion %s\n", args[0].from);
17168 +               goto out;
17169 +       }
17170 +
17171 +       *p++ = 0;
17172 +       err = vfsub_path_lookup(mod->path, lkup_dirflags, nd);
17173 +       if (unlikely(err)) {
17174 +               AuErr("lookup failed %s (%d)\n", mod->path, err);
17175 +               goto out;
17176 +       }
17177 +
17178 +       mod->perm = br_perm_val(p);
17179 +       AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
17180 +       mod->h_root = dget(nd->path.dentry);
17181 +       path_put(&nd->path);
17182 +
17183 + out:
17184 +       return err;
17185 +}
17186 +
17187 +#if 0 /* reserved for future use */
17188 +static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
17189 +                             struct au_opt_mod *mod, substring_t args[])
17190 +{
17191 +       int err;
17192 +       struct dentry *root;
17193 +
17194 +       err = -EINVAL;
17195 +       root = sb->s_root;
17196 +       aufs_read_lock(root, AuLock_FLUSH);
17197 +       if (bindex < 0 || au_sbend(sb) < bindex) {
17198 +               AuErr("out of bounds, %d\n", bindex);
17199 +               goto out;
17200 +       }
17201 +
17202 +       err = 0;
17203 +       mod->perm = br_perm_val(args[1].from);
17204 +       AuDbg("mod path %s, perm 0x%x, %s\n",
17205 +             mod->path, mod->perm, args[1].from);
17206 +       mod->h_root = dget(au_h_dptr(root, bindex));
17207 +
17208 + out:
17209 +       aufs_read_unlock(root, !AuLock_IR);
17210 +       return err;
17211 +}
17212 +#endif
17213 +
17214 +static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
17215 +                             substring_t args[])
17216 +{
17217 +       int err;
17218 +       struct file *file;
17219 +
17220 +       file = au_xino_create(sb, args[0].from, /*silent*/0);
17221 +       err = PTR_ERR(file);
17222 +       if (IS_ERR(file))
17223 +               goto out;
17224 +
17225 +       err = -EINVAL;
17226 +       if (unlikely(file->f_dentry->d_sb == sb)) {
17227 +               fput(file);
17228 +               AuErr("%s must be outside\n", args[0].from);
17229 +               goto out;
17230 +       }
17231 +
17232 +       err = 0;
17233 +       xino->file = file;
17234 +       xino->path = args[0].from;
17235 +
17236 + out:
17237 +       return err;
17238 +}
17239 +
17240 +static
17241 +int au_opts_parse_xino_itrunc_path(struct super_block *sb,
17242 +                                  struct au_opt_xino_itrunc *xino_itrunc,
17243 +                                  substring_t args[], struct nameidata *nd)
17244 +{
17245 +       int err;
17246 +       aufs_bindex_t bend, bindex;
17247 +       struct dentry *root;
17248 +
17249 +       err = vfsub_path_lookup(args[0].from, lkup_dirflags, nd);
17250 +       if (unlikely(err)) {
17251 +               AuErr("lookup failed %s (%d)\n", args[0].from, err);
17252 +               goto out;
17253 +       }
17254 +
17255 +       xino_itrunc->bindex = -1;
17256 +       root = sb->s_root;
17257 +       aufs_read_lock(root, AuLock_FLUSH);
17258 +       bend = au_sbend(sb);
17259 +       for (bindex = 0; bindex <= bend; bindex++) {
17260 +               if (au_h_dptr(root, bindex) == nd->path.dentry) {
17261 +                       xino_itrunc->bindex = bindex;
17262 +                       break;
17263 +               }
17264 +       }
17265 +       aufs_read_unlock(root, !AuLock_IR);
17266 +       path_put(&nd->path);
17267 +
17268 +       if (unlikely(xino_itrunc->bindex < 0)) {
17269 +               AuErr("no such branch %s\n", args[0].from);
17270 +               err = -EINVAL;
17271 +       }
17272 +
17273 + out:
17274 +       return err;
17275 +}
17276 +
17277 +/* called without aufs lock */
17278 +int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
17279 +{
17280 +       int err, n, token;
17281 +       aufs_bindex_t bindex;
17282 +       unsigned char skipped;
17283 +       struct dentry *root;
17284 +       struct au_opt *opt, *opt_tail;
17285 +       char *opt_str;
17286 +       /* reduce the stack space */
17287 +       union {
17288 +               struct au_opt_xino_itrunc *xino_itrunc;
17289 +               struct au_opt_wbr_create *create;
17290 +       } u;
17291 +       struct {
17292 +               substring_t args[MAX_OPT_ARGS];
17293 +               struct nameidata nd;
17294 +       } *a;
17295 +
17296 +       err = -ENOMEM;
17297 +       a = kmalloc(sizeof(*a), GFP_NOFS);
17298 +       if (unlikely(!a))
17299 +               goto out;
17300 +
17301 +       root = sb->s_root;
17302 +       err = 0;
17303 +       bindex = 0;
17304 +       opt = opts->opt;
17305 +       opt_tail = opt + opts->max_opt - 1;
17306 +       opt->type = Opt_tail;
17307 +       while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
17308 +               err = -EINVAL;
17309 +               skipped = 0;
17310 +               token = match_token(opt_str, options, a->args);
17311 +               switch (token) {
17312 +               case Opt_br:
17313 +                       err = 0;
17314 +                       while (!err && (opt_str = strsep(&a->args[0].from, ":"))
17315 +                              && *opt_str) {
17316 +                               err = opt_add(opt, opt_str, opts->sb_flags,
17317 +                                             bindex++);
17318 +                               if (unlikely(!err && ++opt > opt_tail)) {
17319 +                                       err = -E2BIG;
17320 +                                       break;
17321 +                               }
17322 +                               opt->type = Opt_tail;
17323 +                               skipped = 1;
17324 +                       }
17325 +                       break;
17326 +               case Opt_add:
17327 +                       if (unlikely(match_int(&a->args[0], &n))) {
17328 +                               AuErr("bad integer in %s\n", opt_str);
17329 +                               break;
17330 +                       }
17331 +                       bindex = n;
17332 +                       err = opt_add(opt, a->args[1].from, opts->sb_flags,
17333 +                                     bindex);
17334 +                       if (!err)
17335 +                               opt->type = token;
17336 +                       break;
17337 +               case Opt_append:
17338 +                       err = opt_add(opt, a->args[0].from, opts->sb_flags,
17339 +                                     /*dummy bindex*/1);
17340 +                       if (!err)
17341 +                               opt->type = token;
17342 +                       break;
17343 +               case Opt_prepend:
17344 +                       err = opt_add(opt, a->args[0].from, opts->sb_flags,
17345 +                                     /*bindex*/0);
17346 +                       if (!err)
17347 +                               opt->type = token;
17348 +                       break;
17349 +               case Opt_del:
17350 +                       err = au_opts_parse_del(&opt->del, a->args, &a->nd);
17351 +                       if (!err)
17352 +                               opt->type = token;
17353 +                       break;
17354 +#if 0 /* reserved for future use */
17355 +               case Opt_idel:
17356 +                       del->pathname = "(indexed)";
17357 +                       if (unlikely(match_int(&args[0], &n))) {
17358 +                               AuErr("bad integer in %s\n", opt_str);
17359 +                               break;
17360 +                       }
17361 +                       err = au_opts_parse_idel(sb, n, &opt->del, a->args);
17362 +                       if (!err)
17363 +                               opt->type = token;
17364 +                       break;
17365 +#endif
17366 +               case Opt_mod:
17367 +                       err = au_opts_parse_mod(&opt->mod, a->args, &a->nd);
17368 +                       if (!err)
17369 +                               opt->type = token;
17370 +                       break;
17371 +#ifdef IMOD /* reserved for future use */
17372 +               case Opt_imod:
17373 +                       u.mod->path = "(indexed)";
17374 +                       if (unlikely(match_int(&a->args[0], &n))) {
17375 +                               AuErr("bad integer in %s\n", opt_str);
17376 +                               break;
17377 +                       }
17378 +                       err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
17379 +                       if (!err)
17380 +                               opt->type = token;
17381 +                       break;
17382 +#endif
17383 +               case Opt_xino:
17384 +                       err = au_opts_parse_xino(sb, &opt->xino, a->args);
17385 +                       if (!err)
17386 +                               opt->type = token;
17387 +                       break;
17388 +
17389 +               case Opt_trunc_xino_path:
17390 +                       err = au_opts_parse_xino_itrunc_path
17391 +                               (sb, &opt->xino_itrunc, a->args, &a->nd);
17392 +                       if (!err)
17393 +                               opt->type = token;
17394 +                       break;
17395 +
17396 +               case Opt_itrunc_xino:
17397 +                       u.xino_itrunc = &opt->xino_itrunc;
17398 +                       if (unlikely(match_int(&a->args[0], &n))) {
17399 +                               AuErr("bad integer in %s\n", opt_str);
17400 +                               break;
17401 +                       }
17402 +                       u.xino_itrunc->bindex = n;
17403 +                       aufs_read_lock(root, AuLock_FLUSH);
17404 +                       if (n < 0 || au_sbend(sb) < n) {
17405 +                               AuErr("out of bounds, %d\n", n);
17406 +                               aufs_read_unlock(root, !AuLock_IR);
17407 +                               break;
17408 +                       }
17409 +                       aufs_read_unlock(root, !AuLock_IR);
17410 +                       err = 0;
17411 +                       opt->type = token;
17412 +                       break;
17413 +
17414 +               case Opt_dirwh:
17415 +                       if (unlikely(match_int(&a->args[0], &opt->dirwh)))
17416 +                               break;
17417 +                       err = 0;
17418 +                       opt->type = token;
17419 +                       break;
17420 +
17421 +               case Opt_rdcache:
17422 +                       if (unlikely(match_int(&a->args[0], &opt->rdcache)))
17423 +                               break;
17424 +                       err = 0;
17425 +                       opt->type = token;
17426 +                       break;
17427 +               case Opt_rdblk:
17428 +                       if (unlikely(match_int(&a->args[0], &n)
17429 +                                    || n < 0
17430 +                                    || n > KMALLOC_MAX_SIZE)) {
17431 +                               AuErr("bad integer in %s\n", opt_str);
17432 +                               break;
17433 +                       }
17434 +                       if (unlikely(n && n < NAME_MAX)) {
17435 +                               AuErr("rdblk must be larger than %d\n",
17436 +                                     NAME_MAX);
17437 +                               break;
17438 +                       }
17439 +                       opt->rdblk = n;
17440 +                       err = 0;
17441 +                       opt->type = token;
17442 +                       break;
17443 +               case Opt_rdhash:
17444 +                       if (unlikely(match_int(&a->args[0], &n)
17445 +                                    || n < 0
17446 +                                    || n * sizeof(struct hlist_head)
17447 +                                    > KMALLOC_MAX_SIZE)) {
17448 +                               AuErr("bad integer in %s\n", opt_str);
17449 +                               break;
17450 +                       }
17451 +                       opt->rdhash = n;
17452 +                       err = 0;
17453 +                       opt->type = token;
17454 +                       break;
17455 +
17456 +               case Opt_trunc_xino:
17457 +               case Opt_notrunc_xino:
17458 +               case Opt_noxino:
17459 +               case Opt_trunc_xib:
17460 +               case Opt_notrunc_xib:
17461 +               case Opt_shwh:
17462 +               case Opt_noshwh:
17463 +               case Opt_plink:
17464 +               case Opt_noplink:
17465 +               case Opt_list_plink:
17466 +               case Opt_diropq_a:
17467 +               case Opt_diropq_w:
17468 +               case Opt_warn_perm:
17469 +               case Opt_nowarn_perm:
17470 +               case Opt_refrof:
17471 +               case Opt_norefrof:
17472 +               case Opt_verbose:
17473 +               case Opt_noverbose:
17474 +               case Opt_sum:
17475 +               case Opt_nosum:
17476 +               case Opt_wsum:
17477 +               case Opt_rdblk_def:
17478 +               case Opt_rdhash_def:
17479 +                       err = 0;
17480 +                       opt->type = token;
17481 +                       break;
17482 +
17483 +               case Opt_udba:
17484 +                       opt->udba = udba_val(a->args[0].from);
17485 +                       if (opt->udba >= 0) {
17486 +                               err = 0;
17487 +                               opt->type = token;
17488 +                       } else
17489 +                               AuErr("wrong value, %s\n", opt_str);
17490 +                       break;
17491 +
17492 +               case Opt_wbr_create:
17493 +                       u.create = &opt->wbr_create;
17494 +                       u.create->wbr_create
17495 +                               = au_wbr_create_val(a->args[0].from, u.create);
17496 +                       if (u.create->wbr_create >= 0) {
17497 +                               err = 0;
17498 +                               opt->type = token;
17499 +                       } else
17500 +                               AuErr("wrong value, %s\n", opt_str);
17501 +                       break;
17502 +               case Opt_wbr_copyup:
17503 +                       opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
17504 +                       if (opt->wbr_copyup >= 0) {
17505 +                               err = 0;
17506 +                               opt->type = token;
17507 +                       } else
17508 +                               AuErr("wrong value, %s\n", opt_str);
17509 +                       break;
17510 +
17511 +               case Opt_ignore:
17512 +                       AuWarn("ignored %s\n", opt_str);
17513 +                       /*FALLTHROUGH*/
17514 +               case Opt_ignore_silent:
17515 +                       skipped = 1;
17516 +                       err = 0;
17517 +                       break;
17518 +               case Opt_err:
17519 +                       AuErr("unknown option %s\n", opt_str);
17520 +                       break;
17521 +               }
17522 +
17523 +               if (!err && !skipped) {
17524 +                       if (unlikely(++opt > opt_tail)) {
17525 +                               err = -E2BIG;
17526 +                               opt--;
17527 +                               opt->type = Opt_tail;
17528 +                               break;
17529 +                       }
17530 +                       opt->type = Opt_tail;
17531 +               }
17532 +       }
17533 +
17534 +       kfree(a);
17535 +       dump_opts(opts);
17536 +       if (unlikely(err))
17537 +               au_opts_free(opts);
17538 +
17539 + out:
17540 +       return err;
17541 +}
17542 +
17543 +static int au_opt_wbr_create(struct super_block *sb,
17544 +                            struct au_opt_wbr_create *create)
17545 +{
17546 +       int err;
17547 +       struct au_sbinfo *sbinfo;
17548 +
17549 +       SiMustWriteLock(sb);
17550 +
17551 +       err = 1; /* handled */
17552 +       sbinfo = au_sbi(sb);
17553 +       if (sbinfo->si_wbr_create_ops->fin) {
17554 +               err = sbinfo->si_wbr_create_ops->fin(sb);
17555 +               if (!err)
17556 +                       err = 1;
17557 +       }
17558 +
17559 +       sbinfo->si_wbr_create = create->wbr_create;
17560 +       sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
17561 +       switch (create->wbr_create) {
17562 +       case AuWbrCreate_MFSRRV:
17563 +       case AuWbrCreate_MFSRR:
17564 +               sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
17565 +               /*FALLTHROUGH*/
17566 +       case AuWbrCreate_MFS:
17567 +       case AuWbrCreate_MFSV:
17568 +       case AuWbrCreate_PMFS:
17569 +       case AuWbrCreate_PMFSV:
17570 +               sbinfo->si_wbr_mfs.mfs_expire = create->mfs_second * HZ;
17571 +               break;
17572 +       }
17573 +
17574 +       if (sbinfo->si_wbr_create_ops->init)
17575 +               sbinfo->si_wbr_create_ops->init(sb); /* ignore */
17576 +
17577 +       return err;
17578 +}
17579 +
17580 +/*
17581 + * returns,
17582 + * plus: processed without an error
17583 + * zero: unprocessed
17584 + */
17585 +static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
17586 +                        struct au_opts *opts)
17587 +{
17588 +       int err;
17589 +       struct au_sbinfo *sbinfo;
17590 +
17591 +       SiMustWriteLock(sb);
17592 +
17593 +       err = 1; /* handled */
17594 +       sbinfo = au_sbi(sb);
17595 +       switch (opt->type) {
17596 +       case Opt_udba:
17597 +               sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17598 +               sbinfo->si_mntflags |= opt->udba;
17599 +               opts->given_udba |= opt->udba;
17600 +               break;
17601 +
17602 +       case Opt_plink:
17603 +               au_opt_set(sbinfo->si_mntflags, PLINK);
17604 +               break;
17605 +       case Opt_noplink:
17606 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
17607 +                       au_plink_put(sb);
17608 +               au_opt_clr(sbinfo->si_mntflags, PLINK);
17609 +               break;
17610 +       case Opt_list_plink:
17611 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
17612 +                       au_plink_list(sb);
17613 +               break;
17614 +
17615 +       case Opt_diropq_a:
17616 +               au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17617 +               break;
17618 +       case Opt_diropq_w:
17619 +               au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17620 +               break;
17621 +
17622 +       case Opt_warn_perm:
17623 +               au_opt_set(sbinfo->si_mntflags, WARN_PERM);
17624 +               break;
17625 +       case Opt_nowarn_perm:
17626 +               au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
17627 +               break;
17628 +
17629 +       case Opt_refrof:
17630 +               au_opt_set(sbinfo->si_mntflags, REFROF);
17631 +               break;
17632 +       case Opt_norefrof:
17633 +               au_opt_clr(sbinfo->si_mntflags, REFROF);
17634 +               break;
17635 +
17636 +       case Opt_verbose:
17637 +               au_opt_set(sbinfo->si_mntflags, VERBOSE);
17638 +               break;
17639 +       case Opt_noverbose:
17640 +               au_opt_clr(sbinfo->si_mntflags, VERBOSE);
17641 +               break;
17642 +
17643 +       case Opt_sum:
17644 +               au_opt_set(sbinfo->si_mntflags, SUM);
17645 +               break;
17646 +       case Opt_wsum:
17647 +               au_opt_clr(sbinfo->si_mntflags, SUM);
17648 +               au_opt_set(sbinfo->si_mntflags, SUM_W);
17649 +       case Opt_nosum:
17650 +               au_opt_clr(sbinfo->si_mntflags, SUM);
17651 +               au_opt_clr(sbinfo->si_mntflags, SUM_W);
17652 +               break;
17653 +
17654 +       case Opt_wbr_create:
17655 +               err = au_opt_wbr_create(sb, &opt->wbr_create);
17656 +               break;
17657 +       case Opt_wbr_copyup:
17658 +               sbinfo->si_wbr_copyup = opt->wbr_copyup;
17659 +               sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
17660 +               break;
17661 +
17662 +       case Opt_dirwh:
17663 +               sbinfo->si_dirwh = opt->dirwh;
17664 +               break;
17665 +
17666 +       case Opt_rdcache:
17667 +               sbinfo->si_rdcache = opt->rdcache * HZ;
17668 +               break;
17669 +       case Opt_rdblk:
17670 +               sbinfo->si_rdblk = opt->rdblk;
17671 +               break;
17672 +       case Opt_rdblk_def:
17673 +               sbinfo->si_rdblk = AUFS_RDBLK_DEF;
17674 +               break;
17675 +       case Opt_rdhash:
17676 +               sbinfo->si_rdhash = opt->rdhash;
17677 +               break;
17678 +       case Opt_rdhash_def:
17679 +               sbinfo->si_rdhash = AUFS_RDHASH_DEF;
17680 +               break;
17681 +
17682 +       case Opt_shwh:
17683 +               au_opt_set(sbinfo->si_mntflags, SHWH);
17684 +               break;
17685 +       case Opt_noshwh:
17686 +               au_opt_clr(sbinfo->si_mntflags, SHWH);
17687 +               break;
17688 +
17689 +       case Opt_trunc_xino:
17690 +               au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
17691 +               break;
17692 +       case Opt_notrunc_xino:
17693 +               au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
17694 +               break;
17695 +
17696 +       case Opt_trunc_xino_path:
17697 +       case Opt_itrunc_xino:
17698 +               err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
17699 +               if (!err)
17700 +                       err = 1;
17701 +               break;
17702 +
17703 +       case Opt_trunc_xib:
17704 +               au_fset_opts(opts->flags, TRUNC_XIB);
17705 +               break;
17706 +       case Opt_notrunc_xib:
17707 +               au_fclr_opts(opts->flags, TRUNC_XIB);
17708 +               break;
17709 +
17710 +       default:
17711 +               err = 0;
17712 +               break;
17713 +       }
17714 +
17715 +       return err;
17716 +}
17717 +
17718 +/*
17719 + * returns tri-state.
17720 + * plus: processed without an error
17721 + * zero: unprocessed
17722 + * minus: error
17723 + */
17724 +static int au_opt_br(struct super_block *sb, struct au_opt *opt,
17725 +                    struct au_opts *opts)
17726 +{
17727 +       int err, do_refresh;
17728 +
17729 +       err = 0;
17730 +       switch (opt->type) {
17731 +       case Opt_append:
17732 +               opt->add.bindex = au_sbend(sb) + 1;
17733 +               if (opt->add.bindex < 0)
17734 +                       opt->add.bindex = 0;
17735 +               goto add;
17736 +       case Opt_prepend:
17737 +               opt->add.bindex = 0;
17738 +       add:
17739 +       case Opt_add:
17740 +               err = au_br_add(sb, &opt->add,
17741 +                               au_ftest_opts(opts->flags, REMOUNT));
17742 +               if (!err) {
17743 +                       err = 1;
17744 +                       au_fset_opts(opts->flags, REFRESH_DIR);
17745 +                       if (au_br_whable(opt->add.perm))
17746 +                               au_fset_opts(opts->flags, REFRESH_NONDIR);
17747 +               }
17748 +               break;
17749 +
17750 +       case Opt_del:
17751 +       case Opt_idel:
17752 +               err = au_br_del(sb, &opt->del,
17753 +                               au_ftest_opts(opts->flags, REMOUNT));
17754 +               if (!err) {
17755 +                       err = 1;
17756 +                       au_fset_opts(opts->flags, TRUNC_XIB);
17757 +                       au_fset_opts(opts->flags, REFRESH_DIR);
17758 +                       au_fset_opts(opts->flags, REFRESH_NONDIR);
17759 +               }
17760 +               break;
17761 +
17762 +       case Opt_mod:
17763 +       case Opt_imod:
17764 +               err = au_br_mod(sb, &opt->mod,
17765 +                               au_ftest_opts(opts->flags, REMOUNT),
17766 +                               &do_refresh);
17767 +               if (!err) {
17768 +                       err = 1;
17769 +                       if (do_refresh) {
17770 +                               au_fset_opts(opts->flags, REFRESH_DIR);
17771 +                               au_fset_opts(opts->flags, REFRESH_NONDIR);
17772 +                       }
17773 +               }
17774 +               break;
17775 +       }
17776 +
17777 +       return err;
17778 +}
17779 +
17780 +static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
17781 +                      struct au_opt_xino **opt_xino,
17782 +                      struct au_opts *opts)
17783 +{
17784 +       int err;
17785 +       aufs_bindex_t bend, bindex;
17786 +       struct dentry *root, *parent, *h_root;
17787 +
17788 +       err = 0;
17789 +       switch (opt->type) {
17790 +       case Opt_xino:
17791 +               err = au_xino_set(sb, &opt->xino,
17792 +                                 !!au_ftest_opts(opts->flags, REMOUNT));
17793 +               if (unlikely(err))
17794 +                       break;
17795 +
17796 +               *opt_xino = &opt->xino;
17797 +               au_xino_brid_set(sb, -1);
17798 +
17799 +               /* safe d_parent access */
17800 +               parent = opt->xino.file->f_dentry->d_parent;
17801 +               root = sb->s_root;
17802 +               bend = au_sbend(sb);
17803 +               for (bindex = 0; bindex <= bend; bindex++) {
17804 +                       h_root = au_h_dptr(root, bindex);
17805 +                       if (h_root == parent) {
17806 +                               au_xino_brid_set(sb, au_sbr_id(sb, bindex));
17807 +                               break;
17808 +                       }
17809 +               }
17810 +               break;
17811 +
17812 +       case Opt_noxino:
17813 +               au_xino_clr(sb);
17814 +               au_xino_brid_set(sb, -1);
17815 +               *opt_xino = (void *)-1;
17816 +               break;
17817 +       }
17818 +
17819 +       return err;
17820 +}
17821 +
17822 +int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
17823 +                  unsigned int pending)
17824 +{
17825 +       int err;
17826 +       aufs_bindex_t bindex, bend;
17827 +       unsigned char do_plink, skip, do_free;
17828 +       struct au_branch *br;
17829 +       struct au_wbr *wbr;
17830 +       struct dentry *root;
17831 +       struct inode *dir, *h_dir;
17832 +       struct au_sbinfo *sbinfo;
17833 +       struct au_hinode *hdir;
17834 +
17835 +       SiMustAnyLock(sb);
17836 +
17837 +       sbinfo = au_sbi(sb);
17838 +       AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
17839 +
17840 +       if (!(sb_flags & MS_RDONLY)) {
17841 +               if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
17842 +                       AuWarn("first branch should be rw\n");
17843 +               if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
17844 +                       AuWarn("shwh should be used with ro\n");
17845 +       }
17846 +
17847 +       if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HINOTIFY)
17848 +           && !au_opt_test(sbinfo->si_mntflags, XINO))
17849 +               AuWarn("udba=inotify requires xino\n");
17850 +
17851 +       err = 0;
17852 +       root = sb->s_root;
17853 +       dir = sb->s_root->d_inode;
17854 +       do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
17855 +       bend = au_sbend(sb);
17856 +       for (bindex = 0; !err && bindex <= bend; bindex++) {
17857 +               skip = 0;
17858 +               h_dir = au_h_iptr(dir, bindex);
17859 +               br = au_sbr(sb, bindex);
17860 +               do_free = 0;
17861 +
17862 +               wbr = br->br_wbr;
17863 +               if (wbr)
17864 +                       wbr_wh_read_lock(wbr);
17865 +
17866 +               switch (br->br_perm) {
17867 +               case AuBrPerm_RO:
17868 +               case AuBrPerm_ROWH:
17869 +               case AuBrPerm_RR:
17870 +               case AuBrPerm_RRWH:
17871 +                       do_free = !!wbr;
17872 +                       skip = (!wbr
17873 +                               || (!wbr->wbr_whbase
17874 +                                   && !wbr->wbr_plink
17875 +                                   && !wbr->wbr_orph));
17876 +                       break;
17877 +
17878 +               case AuBrPerm_RWNoLinkWH:
17879 +                       /* skip = (!br->br_whbase && !br->br_orph); */
17880 +                       skip = (!wbr || !wbr->wbr_whbase);
17881 +                       if (skip && wbr) {
17882 +                               if (do_plink)
17883 +                                       skip = !!wbr->wbr_plink;
17884 +                               else
17885 +                                       skip = !wbr->wbr_plink;
17886 +                       }
17887 +                       break;
17888 +
17889 +               case AuBrPerm_RW:
17890 +                       /* skip = (br->br_whbase && br->br_ohph); */
17891 +                       skip = (wbr && wbr->wbr_whbase);
17892 +                       if (skip) {
17893 +                               if (do_plink)
17894 +                                       skip = !!wbr->wbr_plink;
17895 +                               else
17896 +                                       skip = !wbr->wbr_plink;
17897 +                       }
17898 +                       break;
17899 +
17900 +               default:
17901 +                       BUG();
17902 +               }
17903 +               if (wbr)
17904 +                       wbr_wh_read_unlock(wbr);
17905 +
17906 +               if (skip)
17907 +                       continue;
17908 +
17909 +               hdir = au_hi(dir, bindex);
17910 +               au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
17911 +               if (wbr)
17912 +                       wbr_wh_write_lock(wbr);
17913 +               err = au_wh_init(au_h_dptr(root, bindex), br, sb);
17914 +               if (wbr)
17915 +                       wbr_wh_write_unlock(wbr);
17916 +               au_hin_imtx_unlock(hdir);
17917 +
17918 +               if (!err && do_free) {
17919 +                       kfree(wbr);
17920 +                       br->br_wbr = NULL;
17921 +               }
17922 +       }
17923 +
17924 +       return err;
17925 +}
17926 +
17927 +int au_opts_mount(struct super_block *sb, struct au_opts *opts)
17928 +{
17929 +       int err;
17930 +       unsigned int tmp;
17931 +       aufs_bindex_t bend;
17932 +       struct au_opt *opt;
17933 +       struct au_opt_xino *opt_xino, xino;
17934 +       struct au_sbinfo *sbinfo;
17935 +
17936 +       SiMustWriteLock(sb);
17937 +
17938 +       err = 0;
17939 +       opt_xino = NULL;
17940 +       opt = opts->opt;
17941 +       while (err >= 0 && opt->type != Opt_tail)
17942 +               err = au_opt_simple(sb, opt++, opts);
17943 +       if (err > 0)
17944 +               err = 0;
17945 +       else if (unlikely(err < 0))
17946 +               goto out;
17947 +
17948 +       /* disable xino and udba temporary */
17949 +       sbinfo = au_sbi(sb);
17950 +       tmp = sbinfo->si_mntflags;
17951 +       au_opt_clr(sbinfo->si_mntflags, XINO);
17952 +       au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
17953 +
17954 +       opt = opts->opt;
17955 +       while (err >= 0 && opt->type != Opt_tail)
17956 +               err = au_opt_br(sb, opt++, opts);
17957 +       if (err > 0)
17958 +               err = 0;
17959 +       else if (unlikely(err < 0))
17960 +               goto out;
17961 +
17962 +       bend = au_sbend(sb);
17963 +       if (unlikely(bend < 0)) {
17964 +               err = -EINVAL;
17965 +               AuErr("no branches\n");
17966 +               goto out;
17967 +       }
17968 +
17969 +       if (au_opt_test(tmp, XINO))
17970 +               au_opt_set(sbinfo->si_mntflags, XINO);
17971 +       opt = opts->opt;
17972 +       while (!err && opt->type != Opt_tail)
17973 +               err = au_opt_xino(sb, opt++, &opt_xino, opts);
17974 +       if (unlikely(err))
17975 +               goto out;
17976 +
17977 +       err = au_opts_verify(sb, sb->s_flags, tmp);
17978 +       if (unlikely(err))
17979 +               goto out;
17980 +
17981 +       /* restore xino */
17982 +       if (au_opt_test(tmp, XINO) && !opt_xino) {
17983 +               xino.file = au_xino_def(sb);
17984 +               err = PTR_ERR(xino.file);
17985 +               if (IS_ERR(xino.file))
17986 +                       goto out;
17987 +
17988 +               err = au_xino_set(sb, &xino, /*remount*/0);
17989 +               fput(xino.file);
17990 +               if (unlikely(err))
17991 +                       goto out;
17992 +       }
17993 +
17994 +       /* restore udba */
17995 +       sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17996 +       sbinfo->si_mntflags |= (tmp & AuOptMask_UDBA);
17997 +       if (au_opt_test(tmp, UDBA_HINOTIFY)) {
17998 +               struct inode *dir = sb->s_root->d_inode;
17999 +               au_reset_hinotify(dir,
18000 +                                 au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
18001 +       }
18002 +
18003 + out:
18004 +       return err;
18005 +}
18006 +
18007 +int au_opts_remount(struct super_block *sb, struct au_opts *opts)
18008 +{
18009 +       int err, rerr;
18010 +       struct inode *dir;
18011 +       struct au_opt_xino *opt_xino;
18012 +       struct au_opt *opt;
18013 +       struct au_sbinfo *sbinfo;
18014 +
18015 +       SiMustWriteLock(sb);
18016 +
18017 +       dir = sb->s_root->d_inode;
18018 +       sbinfo = au_sbi(sb);
18019 +       err = 0;
18020 +       opt_xino = NULL;
18021 +       opt = opts->opt;
18022 +       while (err >= 0 && opt->type != Opt_tail) {
18023 +               err = au_opt_simple(sb, opt, opts);
18024 +               if (!err)
18025 +                       err = au_opt_br(sb, opt, opts);
18026 +               if (!err)
18027 +                       err = au_opt_xino(sb, opt, &opt_xino, opts);
18028 +               opt++;
18029 +       }
18030 +       if (err > 0)
18031 +               err = 0;
18032 +       AuTraceErr(err);
18033 +       /* go on even err */
18034 +
18035 +       rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
18036 +       if (unlikely(rerr && !err))
18037 +               err = rerr;
18038 +
18039 +       if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
18040 +               rerr = au_xib_trunc(sb);
18041 +               if (unlikely(rerr && !err))
18042 +                       err = rerr;
18043 +       }
18044 +
18045 +       /* will be handled by the caller */
18046 +       if (!au_ftest_opts(opts->flags, REFRESH_DIR)
18047 +           && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
18048 +               au_fset_opts(opts->flags, REFRESH_DIR);
18049 +
18050 +       AuDbg("status 0x%x\n", opts->flags);
18051 +       return err;
18052 +}
18053 +
18054 +/* ---------------------------------------------------------------------- */
18055 +
18056 +unsigned int au_opt_udba(struct super_block *sb)
18057 +{
18058 +       return au_mntflags(sb) & AuOptMask_UDBA;
18059 +}
18060 diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h
18061 new file mode 100644
18062 index 0000000..27439b1
18063 --- /dev/null
18064 +++ b/fs/aufs/opts.h
18065 @@ -0,0 +1,196 @@
18066 +/*
18067 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18068 + *
18069 + * This program, aufs is free software; you can redistribute it and/or modify
18070 + * it under the terms of the GNU General Public License as published by
18071 + * the Free Software Foundation; either version 2 of the License, or
18072 + * (at your option) any later version.
18073 + *
18074 + * This program is distributed in the hope that it will be useful,
18075 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18076 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18077 + * GNU General Public License for more details.
18078 + *
18079 + * You should have received a copy of the GNU General Public License
18080 + * along with this program; if not, write to the Free Software
18081 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18082 + */
18083 +
18084 +/*
18085 + * mount options/flags
18086 + */
18087 +
18088 +#ifndef __AUFS_OPTS_H__
18089 +#define __AUFS_OPTS_H__
18090 +
18091 +#ifdef __KERNEL__
18092 +
18093 +#include <linux/path.h>
18094 +#include <linux/aufs_type.h>
18095 +
18096 +struct file;
18097 +struct super_block;
18098 +
18099 +/* ---------------------------------------------------------------------- */
18100 +
18101 +/* mount flags */
18102 +#define AuOpt_XINO             1               /* external inode number bitmap
18103 +                                                  and translation table */
18104 +#define AuOpt_TRUNC_XINO       (1 << 1)        /* truncate xino files */
18105 +#define AuOpt_UDBA_NONE                (1 << 2)        /* users direct branch access */
18106 +#define AuOpt_UDBA_REVAL       (1 << 3)
18107 +#define AuOpt_UDBA_HINOTIFY    (1 << 4)
18108 +#define AuOpt_SHWH             (1 << 5)        /* show whiteout */
18109 +#define AuOpt_PLINK            (1 << 6)        /* pseudo-link */
18110 +#define AuOpt_DIRPERM1         (1 << 7)        /* unimplemented */
18111 +#define AuOpt_REFROF           (1 << 8)        /* unimplemented */
18112 +#define AuOpt_ALWAYS_DIROPQ    (1 << 9)        /* policy to creating diropq */
18113 +#define AuOpt_SUM              (1 << 10)       /* summation for statfs(2) */
18114 +#define AuOpt_SUM_W            (1 << 11)       /* unimplemented */
18115 +#define AuOpt_WARN_PERM                (1 << 12)       /* warn when add-branch */
18116 +#define AuOpt_VERBOSE          (1 << 13)       /* busy inode when del-branch */
18117 +
18118 +#ifndef CONFIG_AUFS_HINOTIFY
18119 +#undef AuOpt_UDBA_HINOTIFY
18120 +#define AuOpt_UDBA_HINOTIFY    0
18121 +#endif
18122 +#ifndef CONFIG_AUFS_SHWH
18123 +#undef AuOpt_SHWH
18124 +#define AuOpt_SHWH             0
18125 +#endif
18126 +
18127 +#define AuOpt_Def      (AuOpt_XINO \
18128 +                        | AuOpt_UDBA_REVAL \
18129 +                        | AuOpt_PLINK \
18130 +                        /* | AuOpt_DIRPERM1 */ \
18131 +                        | AuOpt_WARN_PERM)
18132 +#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
18133 +                        | AuOpt_UDBA_REVAL \
18134 +                        | AuOpt_UDBA_HINOTIFY)
18135 +
18136 +#define au_opt_test(flags, name)       (flags & AuOpt_##name)
18137 +#define au_opt_set(flags, name) do { \
18138 +       BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
18139 +       ((flags) |= AuOpt_##name); \
18140 +} while (0)
18141 +#define au_opt_set_udba(flags, name) do { \
18142 +       (flags) &= ~AuOptMask_UDBA; \
18143 +       ((flags) |= AuOpt_##name); \
18144 +} while (0)
18145 +#define au_opt_clr(flags, name)                { ((flags) &= ~AuOpt_##name); }
18146 +
18147 +/* ---------------------------------------------------------------------- */
18148 +
18149 +/* policies to select one among multiple writable branches */
18150 +enum {
18151 +       AuWbrCreate_TDP,        /* top down parent */
18152 +       AuWbrCreate_RR,         /* round robin */
18153 +       AuWbrCreate_MFS,        /* most free space */
18154 +       AuWbrCreate_MFSV,       /* mfs with seconds */
18155 +       AuWbrCreate_MFSRR,      /* mfs then rr */
18156 +       AuWbrCreate_MFSRRV,     /* mfs then rr with seconds */
18157 +       AuWbrCreate_PMFS,       /* parent and mfs */
18158 +       AuWbrCreate_PMFSV,      /* parent and mfs with seconds */
18159 +
18160 +       AuWbrCreate_Def = AuWbrCreate_TDP
18161 +};
18162 +
18163 +enum {
18164 +       AuWbrCopyup_TDP,        /* top down parent */
18165 +       AuWbrCopyup_BUP,        /* bottom up parent */
18166 +       AuWbrCopyup_BU,         /* bottom up */
18167 +
18168 +       AuWbrCopyup_Def = AuWbrCopyup_TDP
18169 +};
18170 +
18171 +/* ---------------------------------------------------------------------- */
18172 +
18173 +struct au_opt_add {
18174 +       aufs_bindex_t   bindex;
18175 +       char            *pathname;
18176 +       int             perm;
18177 +       struct path     path;
18178 +};
18179 +
18180 +struct au_opt_del {
18181 +       char            *pathname;
18182 +       struct path     h_path;
18183 +};
18184 +
18185 +struct au_opt_mod {
18186 +       char            *path;
18187 +       int             perm;
18188 +       struct dentry   *h_root;
18189 +};
18190 +
18191 +struct au_opt_xino {
18192 +       char            *path;
18193 +       struct file     *file;
18194 +};
18195 +
18196 +struct au_opt_xino_itrunc {
18197 +       aufs_bindex_t   bindex;
18198 +};
18199 +
18200 +struct au_opt_wbr_create {
18201 +       int                     wbr_create;
18202 +       int                     mfs_second;
18203 +       unsigned long long      mfsrr_watermark;
18204 +};
18205 +
18206 +struct au_opt {
18207 +       int type;
18208 +       union {
18209 +               struct au_opt_xino      xino;
18210 +               struct au_opt_xino_itrunc xino_itrunc;
18211 +               struct au_opt_add       add;
18212 +               struct au_opt_del       del;
18213 +               struct au_opt_mod       mod;
18214 +               int                     dirwh;
18215 +               int                     rdcache;
18216 +               unsigned int            rdblk;
18217 +               unsigned int            rdhash;
18218 +               int                     udba;
18219 +               struct au_opt_wbr_create wbr_create;
18220 +               int                     wbr_copyup;
18221 +       };
18222 +};
18223 +
18224 +/* opts flags */
18225 +#define AuOpts_REMOUNT         1
18226 +#define AuOpts_REFRESH_DIR     (1 << 1)
18227 +#define AuOpts_REFRESH_NONDIR  (1 << 2)
18228 +#define AuOpts_TRUNC_XIB       (1 << 3)
18229 +#define au_ftest_opts(flags, name)     ((flags) & AuOpts_##name)
18230 +#define au_fset_opts(flags, name)      { (flags) |= AuOpts_##name; }
18231 +#define au_fclr_opts(flags, name)      { (flags) &= ~AuOpts_##name; }
18232 +
18233 +struct au_opts {
18234 +       struct au_opt   *opt;
18235 +       int             max_opt;
18236 +
18237 +       unsigned int    given_udba;
18238 +       unsigned int    flags;
18239 +       unsigned long   sb_flags;
18240 +};
18241 +
18242 +/* ---------------------------------------------------------------------- */
18243 +
18244 +const char *au_optstr_br_perm(int brperm);
18245 +const char *au_optstr_udba(int udba);
18246 +const char *au_optstr_wbr_copyup(int wbr_copyup);
18247 +const char *au_optstr_wbr_create(int wbr_create);
18248 +
18249 +void au_opts_free(struct au_opts *opts);
18250 +int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
18251 +int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
18252 +                  unsigned int pending);
18253 +int au_opts_mount(struct super_block *sb, struct au_opts *opts);
18254 +int au_opts_remount(struct super_block *sb, struct au_opts *opts);
18255 +
18256 +unsigned int au_opt_udba(struct super_block *sb);
18257 +
18258 +/* ---------------------------------------------------------------------- */
18259 +
18260 +#endif /* __KERNEL__ */
18261 +#endif /* __AUFS_OPTS_H__ */
18262 diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c
18263 new file mode 100644
18264 index 0000000..0fe806e
18265 --- /dev/null
18266 +++ b/fs/aufs/plink.c
18267 @@ -0,0 +1,429 @@
18268 +/*
18269 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18270 + *
18271 + * This program, aufs is free software; you can redistribute it and/or modify
18272 + * it under the terms of the GNU General Public License as published by
18273 + * the Free Software Foundation; either version 2 of the License, or
18274 + * (at your option) any later version.
18275 + *
18276 + * This program is distributed in the hope that it will be useful,
18277 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18278 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18279 + * GNU General Public License for more details.
18280 + *
18281 + * You should have received a copy of the GNU General Public License
18282 + * along with this program; if not, write to the Free Software
18283 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18284 + */
18285 +
18286 +/*
18287 + * pseudo-link
18288 + */
18289 +
18290 +#include "aufs.h"
18291 +
18292 +/*
18293 + * during a user process maintains the pseudo-links,
18294 + * prohibit adding a new plink and branch manipulation.
18295 + */
18296 +void au_plink_maint_block(struct super_block *sb)
18297 +{
18298 +       struct au_sbinfo *sbi = au_sbi(sb);
18299 +
18300 +       SiMustAnyLock(sb);
18301 +
18302 +       /* gave up wake_up_bit() */
18303 +       wait_event(sbi->si_plink_wq, !sbi->si_plink_maint);
18304 +}
18305 +
18306 +void au_plink_maint_leave(struct file *file)
18307 +{
18308 +       struct au_sbinfo *sbinfo;
18309 +       int iam;
18310 +
18311 +       AuDebugOn(atomic_long_read(&file->f_count));
18312 +
18313 +       sbinfo = au_sbi(file->f_dentry->d_sb);
18314 +       spin_lock(&sbinfo->si_plink_maint_lock);
18315 +       iam = (sbinfo->si_plink_maint == file);
18316 +       if (iam)
18317 +               sbinfo->si_plink_maint = NULL;
18318 +       spin_unlock(&sbinfo->si_plink_maint_lock);
18319 +       if (iam)
18320 +               wake_up_all(&sbinfo->si_plink_wq);
18321 +}
18322 +
18323 +static int au_plink_maint_enter(struct file *file)
18324 +{
18325 +       int err;
18326 +       struct super_block *sb;
18327 +       struct au_sbinfo *sbinfo;
18328 +
18329 +       err = 0;
18330 +       sb = file->f_dentry->d_sb;
18331 +       sbinfo = au_sbi(sb);
18332 +       /* make sure i am the only one in this fs */
18333 +       si_write_lock(sb);
18334 +       /* spin_lock(&sbinfo->si_plink_maint_lock); */
18335 +       if (!sbinfo->si_plink_maint)
18336 +               sbinfo->si_plink_maint = file;
18337 +       else
18338 +               err = -EBUSY;
18339 +       /* spin_unlock(&sbinfo->si_plink_maint_lock); */
18340 +       si_write_unlock(sb);
18341 +
18342 +       return err;
18343 +}
18344 +
18345 +/* ---------------------------------------------------------------------- */
18346 +
18347 +struct pseudo_link {
18348 +       struct list_head list;
18349 +       struct inode *inode;
18350 +};
18351 +
18352 +#ifdef CONFIG_AUFS_DEBUG
18353 +void au_plink_list(struct super_block *sb)
18354 +{
18355 +       struct au_sbinfo *sbinfo;
18356 +       struct list_head *plink_list;
18357 +       struct pseudo_link *plink;
18358 +
18359 +       SiMustAnyLock(sb);
18360 +
18361 +       sbinfo = au_sbi(sb);
18362 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
18363 +
18364 +       plink_list = &sbinfo->si_plink.head;
18365 +       spin_lock(&sbinfo->si_plink.spin);
18366 +       list_for_each_entry(plink, plink_list, list)
18367 +               AuDbg("%lu\n", plink->inode->i_ino);
18368 +       spin_unlock(&sbinfo->si_plink.spin);
18369 +}
18370 +#endif
18371 +
18372 +/* is the inode pseudo-linked? */
18373 +int au_plink_test(struct inode *inode)
18374 +{
18375 +       int found;
18376 +       struct au_sbinfo *sbinfo;
18377 +       struct list_head *plink_list;
18378 +       struct pseudo_link *plink;
18379 +
18380 +       sbinfo = au_sbi(inode->i_sb);
18381 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
18382 +       AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
18383 +
18384 +       found = 0;
18385 +       plink_list = &sbinfo->si_plink.head;
18386 +       spin_lock(&sbinfo->si_plink.spin);
18387 +       list_for_each_entry(plink, plink_list, list)
18388 +               if (plink->inode == inode) {
18389 +                       found = 1;
18390 +                       break;
18391 +               }
18392 +       spin_unlock(&sbinfo->si_plink.spin);
18393 +       return found;
18394 +}
18395 +
18396 +/* ---------------------------------------------------------------------- */
18397 +
18398 +/*
18399 + * generate a name for plink.
18400 + * the file will be stored under AUFS_WH_PLINKDIR.
18401 + */
18402 +/* 20 is max digits length of ulong 64 */
18403 +#define PLINK_NAME_LEN ((20 + 1) * 2)
18404 +
18405 +static int plink_name(char *name, int len, struct inode *inode,
18406 +                     aufs_bindex_t bindex)
18407 +{
18408 +       int rlen;
18409 +       struct inode *h_inode;
18410 +
18411 +       h_inode = au_h_iptr(inode, bindex);
18412 +       rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
18413 +       return rlen;
18414 +}
18415 +
18416 +/* lookup the plink-ed @inode under the branch at @bindex */
18417 +struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
18418 +{
18419 +       struct dentry *h_dentry, *h_parent;
18420 +       struct au_branch *br;
18421 +       struct inode *h_dir;
18422 +       char a[PLINK_NAME_LEN];
18423 +       struct qstr tgtname = {
18424 +               .name   = a
18425 +       };
18426 +
18427 +       br = au_sbr(inode->i_sb, bindex);
18428 +       h_parent = br->br_wbr->wbr_plink;
18429 +       h_dir = h_parent->d_inode;
18430 +       tgtname.len = plink_name(a, sizeof(a), inode, bindex);
18431 +
18432 +       /* always superio. */
18433 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
18434 +       h_dentry = au_sio_lkup_one(&tgtname, h_parent, br);
18435 +       mutex_unlock(&h_dir->i_mutex);
18436 +       return h_dentry;
18437 +}
18438 +
18439 +/* create a pseudo-link */
18440 +static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
18441 +                     struct dentry *h_dentry, struct au_branch *br)
18442 +{
18443 +       int err;
18444 +       struct path h_path = {
18445 +               .mnt = br->br_mnt
18446 +       };
18447 +       struct inode *h_dir;
18448 +
18449 +       h_dir = h_parent->d_inode;
18450 + again:
18451 +       h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
18452 +       err = PTR_ERR(h_path.dentry);
18453 +       if (IS_ERR(h_path.dentry))
18454 +               goto out;
18455 +
18456 +       err = 0;
18457 +       /* wh.plink dir is not monitored */
18458 +       if (h_path.dentry->d_inode
18459 +           && h_path.dentry->d_inode != h_dentry->d_inode) {
18460 +               err = vfsub_unlink(h_dir, &h_path, /*force*/0);
18461 +               dput(h_path.dentry);
18462 +               h_path.dentry = NULL;
18463 +               if (!err)
18464 +                       goto again;
18465 +       }
18466 +       if (!err && !h_path.dentry->d_inode)
18467 +               err = vfsub_link(h_dentry, h_dir, &h_path);
18468 +       dput(h_path.dentry);
18469 +
18470 + out:
18471 +       return err;
18472 +}
18473 +
18474 +struct do_whplink_args {
18475 +       int *errp;
18476 +       struct qstr *tgt;
18477 +       struct dentry *h_parent;
18478 +       struct dentry *h_dentry;
18479 +       struct au_branch *br;
18480 +};
18481 +
18482 +static void call_do_whplink(void *args)
18483 +{
18484 +       struct do_whplink_args *a = args;
18485 +       *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
18486 +}
18487 +
18488 +static int whplink(struct dentry *h_dentry, struct inode *inode,
18489 +                  aufs_bindex_t bindex, struct au_branch *br)
18490 +{
18491 +       int err, wkq_err;
18492 +       struct au_wbr *wbr;
18493 +       struct dentry *h_parent;
18494 +       struct inode *h_dir;
18495 +       char a[PLINK_NAME_LEN];
18496 +       struct qstr tgtname = {
18497 +               .name = a
18498 +       };
18499 +
18500 +       wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
18501 +       h_parent = wbr->wbr_plink;
18502 +       h_dir = h_parent->d_inode;
18503 +       tgtname.len = plink_name(a, sizeof(a), inode, bindex);
18504 +
18505 +       /* always superio. */
18506 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
18507 +       if (!au_test_wkq(current)) {
18508 +               struct do_whplink_args args = {
18509 +                       .errp           = &err,
18510 +                       .tgt            = &tgtname,
18511 +                       .h_parent       = h_parent,
18512 +                       .h_dentry       = h_dentry,
18513 +                       .br             = br
18514 +               };
18515 +               wkq_err = au_wkq_wait(call_do_whplink, &args);
18516 +               if (unlikely(wkq_err))
18517 +                       err = wkq_err;
18518 +       } else
18519 +               err = do_whplink(&tgtname, h_parent, h_dentry, br);
18520 +       mutex_unlock(&h_dir->i_mutex);
18521 +
18522 +       return err;
18523 +}
18524 +
18525 +/* free a single plink */
18526 +static void do_put_plink(struct pseudo_link *plink, int do_del)
18527 +{
18528 +       iput(plink->inode);
18529 +       if (do_del)
18530 +               list_del(&plink->list);
18531 +       kfree(plink);
18532 +}
18533 +
18534 +/*
18535 + * create a new pseudo-link for @h_dentry on @bindex.
18536 + * the linked inode is held in aufs @inode.
18537 + */
18538 +void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
18539 +                    struct dentry *h_dentry)
18540 +{
18541 +       struct super_block *sb;
18542 +       struct au_sbinfo *sbinfo;
18543 +       struct list_head *plink_list;
18544 +       struct pseudo_link *plink;
18545 +       int found, err, cnt;
18546 +
18547 +       sb = inode->i_sb;
18548 +       sbinfo = au_sbi(sb);
18549 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
18550 +
18551 +       err = 0;
18552 +       cnt = 0;
18553 +       found = 0;
18554 +       plink_list = &sbinfo->si_plink.head;
18555 +       spin_lock(&sbinfo->si_plink.spin);
18556 +       list_for_each_entry(plink, plink_list, list) {
18557 +               cnt++;
18558 +               if (plink->inode == inode) {
18559 +                       found = 1;
18560 +                       break;
18561 +               }
18562 +       }
18563 +       if (found) {
18564 +               spin_unlock(&sbinfo->si_plink.spin);
18565 +               return;
18566 +       }
18567 +
18568 +       plink = NULL;
18569 +       if (!found) {
18570 +               plink = kmalloc(sizeof(*plink), GFP_ATOMIC);
18571 +               if (plink) {
18572 +                       plink->inode = au_igrab(inode);
18573 +                       list_add(&plink->list, plink_list);
18574 +                       cnt++;
18575 +               } else
18576 +                       err = -ENOMEM;
18577 +       }
18578 +       spin_unlock(&sbinfo->si_plink.spin);
18579 +
18580 +       if (!err) {
18581 +               au_plink_maint_block(sb);
18582 +               err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
18583 +       }
18584 +
18585 +       if (unlikely(cnt > AUFS_PLINK_WARN))
18586 +               AuWarn1("unexpectedly many pseudo links, %d\n", cnt);
18587 +       if (unlikely(err)) {
18588 +               AuWarn("err %d, damaged pseudo link.\n", err);
18589 +               if (!found && plink)
18590 +                       do_put_plink(plink, /*do_del*/1);
18591 +       }
18592 +}
18593 +
18594 +/* free all plinks */
18595 +void au_plink_put(struct super_block *sb)
18596 +{
18597 +       struct au_sbinfo *sbinfo;
18598 +       struct list_head *plink_list;
18599 +       struct pseudo_link *plink, *tmp;
18600 +
18601 +       SiMustWriteLock(sb);
18602 +
18603 +       sbinfo = au_sbi(sb);
18604 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
18605 +
18606 +       plink_list = &sbinfo->si_plink.head;
18607 +       /* no spin_lock since sbinfo is write-locked */
18608 +       list_for_each_entry_safe(plink, tmp, plink_list, list)
18609 +               do_put_plink(plink, 0);
18610 +       INIT_LIST_HEAD(plink_list);
18611 +}
18612 +
18613 +/* free the plinks on a branch specified by @br_id */
18614 +void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
18615 +{
18616 +       struct au_sbinfo *sbinfo;
18617 +       struct list_head *plink_list;
18618 +       struct pseudo_link *plink, *tmp;
18619 +       struct inode *inode;
18620 +       aufs_bindex_t bstart, bend, bindex;
18621 +       unsigned char do_put;
18622 +
18623 +       SiMustWriteLock(sb);
18624 +
18625 +       sbinfo = au_sbi(sb);
18626 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
18627 +
18628 +       plink_list = &sbinfo->si_plink.head;
18629 +       /* no spin_lock since sbinfo is write-locked */
18630 +       list_for_each_entry_safe(plink, tmp, plink_list, list) {
18631 +               do_put = 0;
18632 +               inode = au_igrab(plink->inode);
18633 +               ii_write_lock_child(inode);
18634 +               bstart = au_ibstart(inode);
18635 +               bend = au_ibend(inode);
18636 +               if (bstart >= 0) {
18637 +                       for (bindex = bstart; bindex <= bend; bindex++) {
18638 +                               if (!au_h_iptr(inode, bindex)
18639 +                                   || au_ii_br_id(inode, bindex) != br_id)
18640 +                                       continue;
18641 +                               au_set_h_iptr(inode, bindex, NULL, 0);
18642 +                               do_put = 1;
18643 +                               break;
18644 +                       }
18645 +               } else
18646 +                       do_put_plink(plink, 1);
18647 +
18648 +               if (do_put) {
18649 +                       for (bindex = bstart; bindex <= bend; bindex++)
18650 +                               if (au_h_iptr(inode, bindex)) {
18651 +                                       do_put = 0;
18652 +                                       break;
18653 +                               }
18654 +                       if (do_put)
18655 +                               do_put_plink(plink, 1);
18656 +               }
18657 +               ii_write_unlock(inode);
18658 +               iput(inode);
18659 +       }
18660 +}
18661 +
18662 +/* ---------------------------------------------------------------------- */
18663 +
18664 +long au_plink_ioctl(struct file *file, unsigned int cmd)
18665 +{
18666 +       long err;
18667 +       struct super_block *sb;
18668 +       struct au_sbinfo *sbinfo;
18669 +
18670 +       err = -EACCES;
18671 +       if (!capable(CAP_SYS_ADMIN))
18672 +               goto out;
18673 +
18674 +       err = 0;
18675 +       sb = file->f_dentry->d_sb;
18676 +       sbinfo = au_sbi(sb);
18677 +       switch (cmd) {
18678 +       case AUFS_CTL_PLINK_MAINT:
18679 +               /*
18680 +                * pseudo-link maintenance mode,
18681 +                * cleared by aufs_release_dir()
18682 +                */
18683 +               err = au_plink_maint_enter(file);
18684 +               break;
18685 +       case AUFS_CTL_PLINK_CLEAN:
18686 +               aufs_write_lock(sb->s_root);
18687 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
18688 +                       au_plink_put(sb);
18689 +               aufs_write_unlock(sb->s_root);
18690 +               break;
18691 +       default:
18692 +               err = -EINVAL;
18693 +       }
18694 + out:
18695 +       return err;
18696 +}
18697 diff --git a/fs/aufs/rdu.c b/fs/aufs/rdu.c
18698 new file mode 100644
18699 index 0000000..c38f280
18700 --- /dev/null
18701 +++ b/fs/aufs/rdu.c
18702 @@ -0,0 +1,333 @@
18703 +/*
18704 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18705 + *
18706 + * This program, aufs is free software; you can redistribute it and/or modify
18707 + * it under the terms of the GNU General Public License as published by
18708 + * the Free Software Foundation; either version 2 of the License, or
18709 + * (at your option) any later version.
18710 + *
18711 + * This program is distributed in the hope that it will be useful,
18712 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18713 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18714 + * GNU General Public License for more details.
18715 + *
18716 + * You should have received a copy of the GNU General Public License
18717 + * along with this program; if not, write to the Free Software
18718 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18719 + */
18720 +
18721 +/*
18722 + * readdir in userspace.
18723 + */
18724 +
18725 +#include <linux/security.h>
18726 +#include <linux/uaccess.h>
18727 +#include <linux/aufs_type.h>
18728 +#include "aufs.h"
18729 +
18730 +/* bits for struct aufs_rdu.flags */
18731 +#define        AuRdu_CALLED    1
18732 +#define        AuRdu_CONT      (1 << 1)
18733 +#define        AuRdu_FULL      (1 << 2)
18734 +#define au_ftest_rdu(flags, name)      ((flags) & AuRdu_##name)
18735 +#define au_fset_rdu(flags, name)       { (flags) |= AuRdu_##name; }
18736 +#define au_fclr_rdu(flags, name)       { (flags) &= ~AuRdu_##name; }
18737 +
18738 +struct au_rdu_arg {
18739 +       struct aufs_rdu                 *rdu;
18740 +       union au_rdu_ent_ul             ent;
18741 +       unsigned long                   end;
18742 +
18743 +       struct super_block              *sb;
18744 +       int                             err;
18745 +};
18746 +
18747 +static int au_rdu_fill(void *__arg, const char *name, int nlen,
18748 +                      loff_t offset, u64 h_ino, unsigned int d_type)
18749 +{
18750 +       int err, len;
18751 +       struct au_rdu_arg *arg = __arg;
18752 +       struct aufs_rdu *rdu = arg->rdu;
18753 +       struct au_rdu_ent ent;
18754 +
18755 +       err = 0;
18756 +       arg->err = 0;
18757 +       au_fset_rdu(rdu->cookie.flags, CALLED);
18758 +       len = au_rdu_len(nlen);
18759 +       if (arg->ent.ul + len  < arg->end) {
18760 +               ent.ino = h_ino;
18761 +               ent.bindex = rdu->cookie.bindex;
18762 +               ent.type = d_type;
18763 +               ent.nlen = nlen;
18764 +               if (unlikely(nlen > AUFS_MAX_NAMELEN))
18765 +                       ent.type = DT_UNKNOWN;
18766 +
18767 +               err = -EFAULT;
18768 +               if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
18769 +                       goto out;
18770 +               if (copy_to_user(arg->ent.e->name, name, nlen))
18771 +                       goto out;
18772 +               /* the terminating NULL */
18773 +               if (__put_user(0, arg->ent.e->name + nlen))
18774 +                       goto out;
18775 +               err = 0;
18776 +               /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
18777 +               arg->ent.ul += len;
18778 +               rdu->rent++;
18779 +       } else {
18780 +               err = -EFAULT;
18781 +               au_fset_rdu(rdu->cookie.flags, FULL);
18782 +               rdu->full = 1;
18783 +               rdu->tail = arg->ent;
18784 +       }
18785 +
18786 + out:
18787 +       /* AuTraceErr(err); */
18788 +       return err;
18789 +}
18790 +
18791 +static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
18792 +{
18793 +       int err;
18794 +       loff_t offset;
18795 +       struct au_rdu_cookie *cookie = &arg->rdu->cookie;
18796 +
18797 +       offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
18798 +       err = offset;
18799 +       if (unlikely(offset != cookie->h_pos))
18800 +               goto out;
18801 +
18802 +       err = 0;
18803 +       do {
18804 +               arg->err = 0;
18805 +               au_fclr_rdu(cookie->flags, CALLED);
18806 +               /* smp_mb(); */
18807 +               err = vfsub_readdir(h_file, au_rdu_fill, arg);
18808 +               if (err >= 0)
18809 +                       err = arg->err;
18810 +       } while (!err
18811 +                && au_ftest_rdu(cookie->flags, CALLED)
18812 +                && !au_ftest_rdu(cookie->flags, FULL));
18813 +       cookie->h_pos = h_file->f_pos;
18814 +
18815 + out:
18816 +       AuTraceErr(err);
18817 +       return err;
18818 +}
18819 +
18820 +static int au_rdu(struct file *file, struct aufs_rdu *rdu)
18821 +{
18822 +       int err;
18823 +       aufs_bindex_t bend;
18824 +       struct au_rdu_arg arg;
18825 +       struct dentry *dentry;
18826 +       struct inode *inode;
18827 +       struct file *h_file;
18828 +       struct au_rdu_cookie *cookie = &rdu->cookie;
18829 +
18830 +       err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
18831 +       if (unlikely(err)) {
18832 +               err = -EFAULT;
18833 +               AuTraceErr(err);
18834 +               goto out;
18835 +       }
18836 +       rdu->rent = 0;
18837 +       rdu->tail = rdu->ent;
18838 +       rdu->full = 0;
18839 +       arg.rdu = rdu;
18840 +       arg.ent = rdu->ent;
18841 +       arg.end = arg.ent.ul;
18842 +       arg.end += rdu->sz;
18843 +
18844 +       err = -ENOTDIR;
18845 +       if (unlikely(!file->f_op || !file->f_op->readdir))
18846 +               goto out;
18847 +
18848 +       err = security_file_permission(file, MAY_READ);
18849 +       AuTraceErr(err);
18850 +       if (unlikely(err))
18851 +               goto out;
18852 +
18853 +       dentry = file->f_dentry;
18854 +       inode = dentry->d_inode;
18855 +#if 1
18856 +       mutex_lock(&inode->i_mutex);
18857 +#else
18858 +       err = mutex_lock_killable(&inode->i_mutex);
18859 +       AuTraceErr(err);
18860 +       if (unlikely(err))
18861 +               goto out;
18862 +#endif
18863 +       err = -ENOENT;
18864 +       if (unlikely(IS_DEADDIR(inode)))
18865 +               goto out_mtx;
18866 +
18867 +       arg.sb = inode->i_sb;
18868 +       si_read_lock(arg.sb, AuLock_FLUSH);
18869 +       fi_read_lock(file);
18870 +
18871 +       err = -EAGAIN;
18872 +       if (unlikely(au_ftest_rdu(cookie->flags, CONT)
18873 +                    && cookie->generation != au_figen(file)))
18874 +               goto out_unlock;
18875 +
18876 +       err = 0;
18877 +       if (!rdu->blk) {
18878 +               rdu->blk = au_sbi(arg.sb)->si_rdblk;
18879 +               if (!rdu->blk)
18880 +                       rdu->blk = au_dir_size(file, /*dentry*/NULL);
18881 +       }
18882 +       bend = au_fbstart(file);
18883 +       if (cookie->bindex < bend)
18884 +               cookie->bindex = bend;
18885 +       bend = au_fbend(file);
18886 +       /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
18887 +       for (; !err && cookie->bindex <= bend;
18888 +            cookie->bindex++, cookie->h_pos = 0) {
18889 +               h_file = au_h_fptr(file, cookie->bindex);
18890 +               if (!h_file)
18891 +                       continue;
18892 +
18893 +               au_fclr_rdu(cookie->flags, FULL);
18894 +               err = au_rdu_do(h_file, &arg);
18895 +               AuTraceErr(err);
18896 +               if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
18897 +                       break;
18898 +       }
18899 +       AuDbg("rent %llu\n", rdu->rent);
18900 +
18901 +       if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
18902 +               rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
18903 +               au_fset_rdu(cookie->flags, CONT);
18904 +               cookie->generation = au_figen(file);
18905 +       }
18906 +
18907 +       ii_read_lock_child(inode);
18908 +       fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
18909 +       ii_read_unlock(inode);
18910 +
18911 + out_unlock:
18912 +       fi_read_unlock(file);
18913 +       si_read_unlock(arg.sb);
18914 + out_mtx:
18915 +       mutex_unlock(&inode->i_mutex);
18916 + out:
18917 +       AuTraceErr(err);
18918 +       return err;
18919 +}
18920 +
18921 +static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
18922 +{
18923 +       int err;
18924 +       ino_t ino;
18925 +       unsigned long long nent;
18926 +       union au_rdu_ent_ul *u;
18927 +       struct au_rdu_ent ent;
18928 +       struct super_block *sb;
18929 +
18930 +       err = 0;
18931 +       nent = rdu->nent;
18932 +       u = &rdu->ent;
18933 +       sb = file->f_dentry->d_sb;
18934 +       si_read_lock(sb, AuLock_FLUSH);
18935 +       while (nent-- > 0) {
18936 +               err = !access_ok(VERIFY_WRITE, u->e, sizeof(ent));
18937 +               if (unlikely(err)) {
18938 +                       err = -EFAULT;
18939 +                       AuTraceErr(err);
18940 +                       break;
18941 +               }
18942 +
18943 +               err = copy_from_user(&ent, u->e, sizeof(ent));
18944 +               if (unlikely(err)) {
18945 +                       err = -EFAULT;
18946 +                       AuTraceErr(err);
18947 +                       break;
18948 +               }
18949 +
18950 +               /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
18951 +               if (!ent.wh)
18952 +                       err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
18953 +               else
18954 +                       err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
18955 +                                       &ino);
18956 +               if (unlikely(err)) {
18957 +                       AuTraceErr(err);
18958 +                       break;
18959 +               }
18960 +
18961 +               err = __put_user(ino, &u->e->ino);
18962 +               if (unlikely(err)) {
18963 +                       err = -EFAULT;
18964 +                       AuTraceErr(err);
18965 +                       break;
18966 +               }
18967 +               u->ul += au_rdu_len(ent.nlen);
18968 +       }
18969 +       si_read_unlock(sb);
18970 +
18971 +       return err;
18972 +}
18973 +
18974 +/* ---------------------------------------------------------------------- */
18975 +
18976 +static int au_rdu_verify(struct aufs_rdu *rdu)
18977 +{
18978 +       AuDbg("rdu{%llu, %p, (%u, %u) | %u | %llu, %u, %u | "
18979 +             "%llu, b%d, 0x%x, g%u}\n",
18980 +             rdu->sz, rdu->ent.e, rdu->verify[0], rdu->verify[1],
18981 +             rdu->blk,
18982 +             rdu->rent, rdu->shwh, rdu->full,
18983 +             rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
18984 +             rdu->cookie.generation);
18985 +
18986 +       if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu)
18987 +           && rdu->verify[AufsCtlRduV_SZ_PTR] == sizeof(rdu))
18988 +               return 0;
18989 +
18990 +       AuDbg("%u:%u, %u:%u\n",
18991 +             rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu),
18992 +             rdu->verify[AufsCtlRduV_SZ_PTR], (unsigned int)sizeof(rdu));
18993 +       return -EINVAL;
18994 +}
18995 +
18996 +long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
18997 +{
18998 +       long err, e;
18999 +       struct aufs_rdu rdu;
19000 +       void __user *p = (void __user *)arg;
19001 +
19002 +       err = copy_from_user(&rdu, p, sizeof(rdu));
19003 +       if (unlikely(err)) {
19004 +               err = -EFAULT;
19005 +               AuTraceErr(err);
19006 +               goto out;
19007 +       }
19008 +       err = au_rdu_verify(&rdu);
19009 +       if (unlikely(err))
19010 +               goto out;
19011 +
19012 +       switch (cmd) {
19013 +       case AUFS_CTL_RDU:
19014 +               err = au_rdu(file, &rdu);
19015 +               if (unlikely(err))
19016 +                       break;
19017 +
19018 +               e = copy_to_user(p, &rdu, sizeof(rdu));
19019 +               if (unlikely(e)) {
19020 +                       err = -EFAULT;
19021 +                       AuTraceErr(err);
19022 +               }
19023 +               break;
19024 +       case AUFS_CTL_RDU_INO:
19025 +               err = au_rdu_ino(file, &rdu);
19026 +               break;
19027 +
19028 +       default:
19029 +               err = -EINVAL;
19030 +       }
19031 +
19032 + out:
19033 +       AuTraceErr(err);
19034 +       return err;
19035 +}
19036 diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h
19037 new file mode 100644
19038 index 0000000..dfd2c68
19039 --- /dev/null
19040 +++ b/fs/aufs/rwsem.h
19041 @@ -0,0 +1,186 @@
19042 +/*
19043 + * Copyright (C) 2005-2009 Junjiro R. Okajima
19044 + *
19045 + * This program, aufs is free software; you can redistribute it and/or modify
19046 + * it under the terms of the GNU General Public License as published by
19047 + * the Free Software Foundation; either version 2 of the License, or
19048 + * (at your option) any later version.
19049 + *
19050 + * This program is distributed in the hope that it will be useful,
19051 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19052 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19053 + * GNU General Public License for more details.
19054 + *
19055 + * You should have received a copy of the GNU General Public License
19056 + * along with this program; if not, write to the Free Software
19057 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19058 + */
19059 +
19060 +/*
19061 + * simple read-write semaphore wrappers
19062 + */
19063 +
19064 +#ifndef __AUFS_RWSEM_H__
19065 +#define __AUFS_RWSEM_H__
19066 +
19067 +#ifdef __KERNEL__
19068 +
19069 +#include <linux/rwsem.h>
19070 +
19071 +struct au_rwsem {
19072 +       struct rw_semaphore     rwsem;
19073 +#ifdef CONFIG_AUFS_DEBUG
19074 +       /* just for debugging, not almighty counter */
19075 +       atomic_t                rcnt, wcnt;
19076 +#endif
19077 +};
19078 +
19079 +#ifdef CONFIG_AUFS_DEBUG
19080 +#define AuDbgCntInit(rw) do { \
19081 +       atomic_set(&(rw)->rcnt, 0); \
19082 +       atomic_set(&(rw)->wcnt, 0); \
19083 +       smp_mb(); /* atomic set */ \
19084 +} while (0)
19085 +
19086 +#define AuDbgRcntInc(rw)       atomic_inc_return(&(rw)->rcnt)
19087 +#define AuDbgRcntDec(rw)       WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
19088 +#define AuDbgWcntInc(rw)       WARN_ON(atomic_inc_return(&(rw)->wcnt) > 1)
19089 +#define AuDbgWcntDec(rw)       WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
19090 +#else
19091 +#define AuDbgCntInit(rw)       do {} while (0)
19092 +#define AuDbgRcntInc(rw)       do {} while (0)
19093 +#define AuDbgRcntDec(rw)       do {} while (0)
19094 +#define AuDbgWcntInc(rw)       do {} while (0)
19095 +#define AuDbgWcntDec(rw)       do {} while (0)
19096 +#endif /* CONFIG_AUFS_DEBUG */
19097 +
19098 +/* to debug easier, do not make them inlined functions */
19099 +#define AuRwMustNoWaiters(rw)  AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
19100 +/* rwsem_is_locked() is unusable */
19101 +#define AuRwMustReadLock(rw)   AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
19102 +#define AuRwMustWriteLock(rw)  AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
19103 +#define AuRwMustAnyLock(rw)    AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
19104 +                                       && atomic_read(&(rw)->wcnt) <= 0)
19105 +#define AuRwDestroy(rw)                AuDebugOn(atomic_read(&(rw)->rcnt) \
19106 +                                       || atomic_read(&(rw)->wcnt))
19107 +
19108 +static inline void au_rw_init(struct au_rwsem *rw)
19109 +{
19110 +       AuDbgCntInit(rw);
19111 +       init_rwsem(&rw->rwsem);
19112 +}
19113 +
19114 +static inline void au_rw_init_wlock(struct au_rwsem *rw)
19115 +{
19116 +       au_rw_init(rw);
19117 +       down_write(&rw->rwsem);
19118 +       AuDbgWcntInc(rw);
19119 +}
19120 +
19121 +static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
19122 +                                          unsigned int lsc)
19123 +{
19124 +       au_rw_init(rw);
19125 +       down_write_nested(&rw->rwsem, lsc);
19126 +       AuDbgWcntInc(rw);
19127 +}
19128 +
19129 +static inline void au_rw_read_lock(struct au_rwsem *rw)
19130 +{
19131 +       down_read(&rw->rwsem);
19132 +       AuDbgRcntInc(rw);
19133 +}
19134 +
19135 +static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
19136 +{
19137 +       down_read_nested(&rw->rwsem, lsc);
19138 +       AuDbgRcntInc(rw);
19139 +}
19140 +
19141 +static inline void au_rw_read_unlock(struct au_rwsem *rw)
19142 +{
19143 +       AuRwMustReadLock(rw);
19144 +       AuDbgRcntDec(rw);
19145 +       up_read(&rw->rwsem);
19146 +}
19147 +
19148 +static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
19149 +{
19150 +       AuRwMustWriteLock(rw);
19151 +       AuDbgRcntInc(rw);
19152 +       AuDbgWcntDec(rw);
19153 +       downgrade_write(&rw->rwsem);
19154 +}
19155 +
19156 +static inline void au_rw_write_lock(struct au_rwsem *rw)
19157 +{
19158 +       down_write(&rw->rwsem);
19159 +       AuDbgWcntInc(rw);
19160 +}
19161 +
19162 +static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
19163 +                                          unsigned int lsc)
19164 +{
19165 +       down_write_nested(&rw->rwsem, lsc);
19166 +       AuDbgWcntInc(rw);
19167 +}
19168 +
19169 +static inline void au_rw_write_unlock(struct au_rwsem *rw)
19170 +{
19171 +       AuRwMustWriteLock(rw);
19172 +       AuDbgWcntDec(rw);
19173 +       up_write(&rw->rwsem);
19174 +}
19175 +
19176 +/* why is not _nested version defined */
19177 +static inline int au_rw_read_trylock(struct au_rwsem *rw)
19178 +{
19179 +       int ret = down_read_trylock(&rw->rwsem);
19180 +       if (ret)
19181 +               AuDbgRcntInc(rw);
19182 +       return ret;
19183 +}
19184 +
19185 +static inline int au_rw_write_trylock(struct au_rwsem *rw)
19186 +{
19187 +       int ret = down_write_trylock(&rw->rwsem);
19188 +       if (ret)
19189 +               AuDbgWcntInc(rw);
19190 +       return ret;
19191 +}
19192 +
19193 +#undef AuDbgCntInit
19194 +#undef AuDbgRcntInc
19195 +#undef AuDbgRcntDec
19196 +#undef AuDbgWcntInc
19197 +#undef AuDbgWcntDec
19198 +
19199 +#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
19200 +static inline void prefix##_read_lock(param) \
19201 +{ au_rw_read_lock(rwsem); } \
19202 +static inline void prefix##_write_lock(param) \
19203 +{ au_rw_write_lock(rwsem); } \
19204 +static inline int prefix##_read_trylock(param) \
19205 +{ return au_rw_read_trylock(rwsem); } \
19206 +static inline int prefix##_write_trylock(param) \
19207 +{ return au_rw_write_trylock(rwsem); }
19208 +/* why is not _nested version defined */
19209 +/* static inline void prefix##_read_trylock_nested(param, lsc)
19210 +{ au_rw_read_trylock_nested(rwsem, lsc)); }
19211 +static inline void prefix##_write_trylock_nestd(param, lsc)
19212 +{ au_rw_write_trylock_nested(rwsem, lsc); } */
19213 +
19214 +#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
19215 +static inline void prefix##_read_unlock(param) \
19216 +{ au_rw_read_unlock(rwsem); } \
19217 +static inline void prefix##_write_unlock(param) \
19218 +{ au_rw_write_unlock(rwsem); } \
19219 +static inline void prefix##_downgrade_lock(param) \
19220 +{ au_rw_dgrade_lock(rwsem); }
19221 +
19222 +#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
19223 +       AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
19224 +       AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
19225 +
19226 +#endif /* __KERNEL__ */
19227 +#endif /* __AUFS_RWSEM_H__ */
19228 diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c
19229 new file mode 100644
19230 index 0000000..8a797a7
19231 --- /dev/null
19232 +++ b/fs/aufs/sbinfo.c
19233 @@ -0,0 +1,211 @@
19234 +/*
19235 + * Copyright (C) 2005-2009 Junjiro R. Okajima
19236 + *
19237 + * This program, aufs is free software; you can redistribute it and/or modify
19238 + * it under the terms of the GNU General Public License as published by
19239 + * the Free Software Foundation; either version 2 of the License, or
19240 + * (at your option) any later version.
19241 + *
19242 + * This program is distributed in the hope that it will be useful,
19243 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19244 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19245 + * GNU General Public License for more details.
19246 + *
19247 + * You should have received a copy of the GNU General Public License
19248 + * along with this program; if not, write to the Free Software
19249 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19250 + */
19251 +
19252 +/*
19253 + * superblock private data
19254 + */
19255 +
19256 +#include "aufs.h"
19257 +
19258 +/*
19259 + * they are necessary regardless sysfs is disabled.
19260 + */
19261 +void au_si_free(struct kobject *kobj)
19262 +{
19263 +       struct au_sbinfo *sbinfo;
19264 +       struct super_block *sb;
19265 +
19266 +       sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
19267 +       AuDebugOn(!list_empty(&sbinfo->si_plink.head));
19268 +       AuDebugOn(sbinfo->si_plink_maint);
19269 +
19270 +       sb = sbinfo->si_sb;
19271 +       si_write_lock(sb);
19272 +       au_xino_clr(sb);
19273 +       au_br_free(sbinfo);
19274 +       kfree(sbinfo->si_branch);
19275 +       mutex_destroy(&sbinfo->si_xib_mtx);
19276 +       si_write_unlock(sb);
19277 +       AuRwDestroy(&sbinfo->si_rwsem);
19278 +
19279 +       kfree(sbinfo);
19280 +}
19281 +
19282 +int au_si_alloc(struct super_block *sb)
19283 +{
19284 +       int err;
19285 +       struct au_sbinfo *sbinfo;
19286 +
19287 +       err = -ENOMEM;
19288 +       sbinfo = kmalloc(sizeof(*sbinfo), GFP_NOFS);
19289 +       if (unlikely(!sbinfo))
19290 +               goto out;
19291 +
19292 +       /* will be reallocated separately */
19293 +       sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
19294 +       if (unlikely(!sbinfo->si_branch))
19295 +               goto out_sbinfo;
19296 +
19297 +       memset(&sbinfo->si_kobj, 0, sizeof(sbinfo->si_kobj));
19298 +       err = sysaufs_si_init(sbinfo);
19299 +       if (unlikely(err))
19300 +               goto out_br;
19301 +
19302 +       au_nwt_init(&sbinfo->si_nowait);
19303 +       au_rw_init_wlock(&sbinfo->si_rwsem);
19304 +       sbinfo->si_generation = 0;
19305 +       sbinfo->au_si_status = 0;
19306 +       sbinfo->si_bend = -1;
19307 +       sbinfo->si_last_br_id = 0;
19308 +
19309 +       sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
19310 +       sbinfo->si_wbr_create = AuWbrCreate_Def;
19311 +       sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + AuWbrCopyup_Def;
19312 +       sbinfo->si_wbr_create_ops = au_wbr_create_ops + AuWbrCreate_Def;
19313 +
19314 +       sbinfo->si_mntflags = AuOpt_Def;
19315 +
19316 +       sbinfo->si_xread = NULL;
19317 +       sbinfo->si_xwrite = NULL;
19318 +       sbinfo->si_xib = NULL;
19319 +       mutex_init(&sbinfo->si_xib_mtx);
19320 +       sbinfo->si_xib_buf = NULL;
19321 +       sbinfo->si_xino_brid = -1;
19322 +       /* leave si_xib_last_pindex and si_xib_next_bit */
19323 +
19324 +       sbinfo->si_rdcache = AUFS_RDCACHE_DEF * HZ;
19325 +       sbinfo->si_rdblk = AUFS_RDBLK_DEF;
19326 +       sbinfo->si_rdhash = AUFS_RDHASH_DEF;
19327 +       sbinfo->si_dirwh = AUFS_DIRWH_DEF;
19328 +
19329 +       au_spl_init(&sbinfo->si_plink);
19330 +       init_waitqueue_head(&sbinfo->si_plink_wq);
19331 +       spin_lock_init(&sbinfo->si_plink_maint_lock);
19332 +       sbinfo->si_plink_maint = NULL;
19333 +
19334 +       /* leave other members for sysaufs and si_mnt. */
19335 +       sbinfo->si_sb = sb;
19336 +       sb->s_fs_info = sbinfo;
19337 +       au_debug_sbinfo_init(sbinfo);
19338 +       return 0; /* success */
19339 +
19340 + out_br:
19341 +       kfree(sbinfo->si_branch);
19342 + out_sbinfo:
19343 +       kfree(sbinfo);
19344 + out:
19345 +       return err;
19346 +}
19347 +
19348 +int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
19349 +{
19350 +       int err, sz;
19351 +       struct au_branch **brp;
19352 +
19353 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
19354 +
19355 +       err = -ENOMEM;
19356 +       sz = sizeof(*brp) * (sbinfo->si_bend + 1);
19357 +       if (unlikely(!sz))
19358 +               sz = sizeof(*brp);
19359 +       brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
19360 +       if (brp) {
19361 +               sbinfo->si_branch = brp;
19362 +               err = 0;
19363 +       }
19364 +
19365 +       return err;
19366 +}
19367 +
19368 +/* ---------------------------------------------------------------------- */
19369 +
19370 +unsigned int au_sigen_inc(struct super_block *sb)
19371 +{
19372 +       unsigned int gen;
19373 +
19374 +       SiMustWriteLock(sb);
19375 +
19376 +       gen = ++au_sbi(sb)->si_generation;
19377 +       au_update_digen(sb->s_root);
19378 +       au_update_iigen(sb->s_root->d_inode);
19379 +       sb->s_root->d_inode->i_version++;
19380 +       return gen;
19381 +}
19382 +
19383 +aufs_bindex_t au_new_br_id(struct super_block *sb)
19384 +{
19385 +       aufs_bindex_t br_id;
19386 +       int i;
19387 +       struct au_sbinfo *sbinfo;
19388 +
19389 +       SiMustWriteLock(sb);
19390 +
19391 +       sbinfo = au_sbi(sb);
19392 +       for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
19393 +               br_id = ++sbinfo->si_last_br_id;
19394 +               if (br_id && au_br_index(sb, br_id) < 0)
19395 +                       return br_id;
19396 +       }
19397 +
19398 +       return -1;
19399 +}
19400 +
19401 +/* ---------------------------------------------------------------------- */
19402 +
19403 +/* dentry and super_block lock. call at entry point */
19404 +void aufs_read_lock(struct dentry *dentry, int flags)
19405 +{
19406 +       si_read_lock(dentry->d_sb, flags);
19407 +       if (au_ftest_lock(flags, DW))
19408 +               di_write_lock_child(dentry);
19409 +       else
19410 +               di_read_lock_child(dentry, flags);
19411 +}
19412 +
19413 +void aufs_read_unlock(struct dentry *dentry, int flags)
19414 +{
19415 +       if (au_ftest_lock(flags, DW))
19416 +               di_write_unlock(dentry);
19417 +       else
19418 +               di_read_unlock(dentry, flags);
19419 +       si_read_unlock(dentry->d_sb);
19420 +}
19421 +
19422 +void aufs_write_lock(struct dentry *dentry)
19423 +{
19424 +       si_write_lock(dentry->d_sb);
19425 +       di_write_lock_child(dentry);
19426 +}
19427 +
19428 +void aufs_write_unlock(struct dentry *dentry)
19429 +{
19430 +       di_write_unlock(dentry);
19431 +       si_write_unlock(dentry->d_sb);
19432 +}
19433 +
19434 +void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
19435 +{
19436 +       si_read_lock(d1->d_sb, flags);
19437 +       di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
19438 +}
19439 +
19440 +void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
19441 +{
19442 +       di_write_unlock2(d1, d2);
19443 +       si_read_unlock(d1->d_sb);
19444 +}
19445 diff --git a/fs/aufs/spl.h b/fs/aufs/spl.h
19446 new file mode 100644
19447 index 0000000..bcbbd9a
19448 --- /dev/null
19449 +++ b/fs/aufs/spl.h
19450 @@ -0,0 +1,57 @@
19451 +/*
19452 + * Copyright (C) 2005-2009 Junjiro R. Okajima
19453 + *
19454 + * This program, aufs is free software; you can redistribute it and/or modify
19455 + * it under the terms of the GNU General Public License as published by
19456 + * the Free Software Foundation; either version 2 of the License, or
19457 + * (at your option) any later version.
19458 + *
19459 + * This program is distributed in the hope that it will be useful,
19460 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19461 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19462 + * GNU General Public License for more details.
19463 + *
19464 + * You should have received a copy of the GNU General Public License
19465 + * along with this program; if not, write to the Free Software
19466 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19467 + */
19468 +
19469 +/*
19470 + * simple list protected by a spinlock
19471 + */
19472 +
19473 +#ifndef __AUFS_SPL_H__
19474 +#define __AUFS_SPL_H__
19475 +
19476 +#ifdef __KERNEL__
19477 +
19478 +#include <linux/spinlock.h>
19479 +#include <linux/list.h>
19480 +
19481 +struct au_splhead {
19482 +       spinlock_t              spin;
19483 +       struct list_head        head;
19484 +};
19485 +
19486 +static inline void au_spl_init(struct au_splhead *spl)
19487 +{
19488 +       spin_lock_init(&spl->spin);
19489 +       INIT_LIST_HEAD(&spl->head);
19490 +}
19491 +
19492 +static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
19493 +{
19494 +       spin_lock(&spl->spin);
19495 +       list_add(list, &spl->head);
19496 +       spin_unlock(&spl->spin);
19497 +}
19498 +
19499 +static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
19500 +{
19501 +       spin_lock(&spl->spin);
19502 +       list_del(list);
19503 +       spin_unlock(&spl->spin);
19504 +}
19505 +
19506 +#endif /* __KERNEL__ */
19507 +#endif /* __AUFS_SPL_H__ */
19508 diff --git a/fs/aufs/super.c b/fs/aufs/super.c
19509 new file mode 100644
19510 index 0000000..e006e84
19511 --- /dev/null
19512 +++ b/fs/aufs/super.c
19513 @@ -0,0 +1,874 @@
19514 +/*
19515 + * Copyright (C) 2005-2009 Junjiro R. Okajima
19516 + *
19517 + * This program, aufs is free software; you can redistribute it and/or modify
19518 + * it under the terms of the GNU General Public License as published by
19519 + * the Free Software Foundation; either version 2 of the License, or
19520 + * (at your option) any later version.
19521 + *
19522 + * This program is distributed in the hope that it will be useful,
19523 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19524 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19525 + * GNU General Public License for more details.
19526 + *
19527 + * You should have received a copy of the GNU General Public License
19528 + * along with this program; if not, write to the Free Software
19529 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19530 + */
19531 +
19532 +/*
19533 + * mount and super_block operations
19534 + */
19535 +
19536 +#include <linux/buffer_head.h>
19537 +#include <linux/module.h>
19538 +#include <linux/seq_file.h>
19539 +#include <linux/statfs.h>
19540 +#include "aufs.h"
19541 +
19542 +/*
19543 + * super_operations
19544 + */
19545 +static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
19546 +{
19547 +       struct au_icntnr *c;
19548 +
19549 +       c = au_cache_alloc_icntnr();
19550 +       if (c) {
19551 +               inode_init_once(&c->vfs_inode);
19552 +               c->vfs_inode.i_version = 1; /* sigen(sb); */
19553 +               c->iinfo.ii_hinode = NULL;
19554 +               return &c->vfs_inode;
19555 +       }
19556 +       return NULL;
19557 +}
19558 +
19559 +static void aufs_destroy_inode(struct inode *inode)
19560 +{
19561 +       au_iinfo_fin(inode);
19562 +       au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
19563 +}
19564 +
19565 +struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
19566 +{
19567 +       struct inode *inode;
19568 +       int err;
19569 +
19570 +       inode = iget_locked(sb, ino);
19571 +       if (unlikely(!inode)) {
19572 +               inode = ERR_PTR(-ENOMEM);
19573 +               goto out;
19574 +       }
19575 +       if (!(inode->i_state & I_NEW))
19576 +               goto out;
19577 +
19578 +       err = au_xigen_new(inode);
19579 +       if (!err)
19580 +               err = au_iinfo_init(inode);
19581 +       if (!err)
19582 +               inode->i_version++;
19583 +       else {
19584 +               iget_failed(inode);
19585 +               inode = ERR_PTR(err);
19586 +       }
19587 +
19588 + out:
19589 +       /* never return NULL */
19590 +       AuDebugOn(!inode);
19591 +       AuTraceErrPtr(inode);
19592 +       return inode;
19593 +}
19594 +
19595 +/* lock free root dinfo */
19596 +static int au_show_brs(struct seq_file *seq, struct super_block *sb)
19597 +{
19598 +       int err;
19599 +       aufs_bindex_t bindex, bend;
19600 +       struct path path;
19601 +       struct au_hdentry *hd;
19602 +       struct au_branch *br;
19603 +
19604 +       err = 0;
19605 +       bend = au_sbend(sb);
19606 +       hd = au_di(sb->s_root)->di_hdentry;
19607 +       for (bindex = 0; !err && bindex <= bend; bindex++) {
19608 +               br = au_sbr(sb, bindex);
19609 +               path.mnt = br->br_mnt;
19610 +               path.dentry = hd[bindex].hd_dentry;
19611 +               err = au_seq_path(seq, &path);
19612 +               if (err > 0)
19613 +                       err = seq_printf(seq, "=%s",
19614 +                                        au_optstr_br_perm(br->br_perm));
19615 +               if (!err && bindex != bend)
19616 +                       err = seq_putc(seq, ':');
19617 +       }
19618 +
19619 +       return err;
19620 +}
19621 +
19622 +static void au_show_wbr_create(struct seq_file *m, int v,
19623 +                              struct au_sbinfo *sbinfo)
19624 +{
19625 +       const char *pat;
19626 +
19627 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
19628 +
19629 +       seq_printf(m, ",create=");
19630 +       pat = au_optstr_wbr_create(v);
19631 +       switch (v) {
19632 +       case AuWbrCreate_TDP:
19633 +       case AuWbrCreate_RR:
19634 +       case AuWbrCreate_MFS:
19635 +       case AuWbrCreate_PMFS:
19636 +               seq_printf(m, pat);
19637 +               break;
19638 +       case AuWbrCreate_MFSV:
19639 +               seq_printf(m, /*pat*/"mfs:%lu",
19640 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19641 +               break;
19642 +       case AuWbrCreate_PMFSV:
19643 +               seq_printf(m, /*pat*/"pmfs:%lu",
19644 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19645 +               break;
19646 +       case AuWbrCreate_MFSRR:
19647 +               seq_printf(m, /*pat*/"mfsrr:%llu",
19648 +                          sbinfo->si_wbr_mfs.mfsrr_watermark);
19649 +               break;
19650 +       case AuWbrCreate_MFSRRV:
19651 +               seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
19652 +                          sbinfo->si_wbr_mfs.mfsrr_watermark,
19653 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19654 +               break;
19655 +       }
19656 +}
19657 +
19658 +static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
19659 +{
19660 +#ifdef CONFIG_SYSFS
19661 +       return 0;
19662 +#else
19663 +       int err;
19664 +       const int len = sizeof(AUFS_XINO_FNAME) - 1;
19665 +       aufs_bindex_t bindex, brid;
19666 +       struct super_block *sb;
19667 +       struct qstr *name;
19668 +       struct file *f;
19669 +       struct dentry *d, *h_root;
19670 +
19671 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
19672 +
19673 +       err = 0;
19674 +       sb = mnt->mnt_sb;
19675 +       f = au_sbi(sb)->si_xib;
19676 +       if (!f)
19677 +               goto out;
19678 +
19679 +       /* stop printing the default xino path on the first writable branch */
19680 +       h_root = NULL;
19681 +       brid = au_xino_brid(sb);
19682 +       if (brid >= 0) {
19683 +               bindex = au_br_index(sb, brid);
19684 +               h_root = au_di(sb->s_root)->di_hdentry[0 + bindex].hd_dentry;
19685 +       }
19686 +       d = f->f_dentry;
19687 +       name = &d->d_name;
19688 +       /* safe ->d_parent because the file is unlinked */
19689 +       if (d->d_parent == h_root
19690 +           && name->len == len
19691 +           && !memcmp(name->name, AUFS_XINO_FNAME, len))
19692 +               goto out;
19693 +
19694 +       seq_puts(seq, ",xino=");
19695 +       err = au_xino_path(seq, f);
19696 +
19697 + out:
19698 +       return err;
19699 +#endif
19700 +}
19701 +
19702 +/* seq_file will re-call me in case of too long string */
19703 +static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
19704 +{
19705 +       int err, n;
19706 +       unsigned int mnt_flags, v;
19707 +       struct super_block *sb;
19708 +       struct au_sbinfo *sbinfo;
19709 +
19710 +#define AuBool(name, str) do { \
19711 +       v = au_opt_test(mnt_flags, name); \
19712 +       if (v != au_opt_test(AuOpt_Def, name)) \
19713 +               seq_printf(m, ",%s" #str, v ? "" : "no"); \
19714 +} while (0)
19715 +
19716 +#define AuStr(name, str) do { \
19717 +       v = mnt_flags & AuOptMask_##name; \
19718 +       if (v != (AuOpt_Def & AuOptMask_##name)) \
19719 +               seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
19720 +} while (0)
19721 +
19722 +#define AuUInt(name, str, val) do { \
19723 +       if (val != AUFS_##name##_DEF) \
19724 +               seq_printf(m, "," #str "=%u", val); \
19725 +} while (0)
19726 +
19727 +       /* lock free root dinfo */
19728 +       sb = mnt->mnt_sb;
19729 +       si_noflush_read_lock(sb);
19730 +       sbinfo = au_sbi(sb);
19731 +       seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
19732 +
19733 +       mnt_flags = au_mntflags(sb);
19734 +       if (au_opt_test(mnt_flags, XINO)) {
19735 +               err = au_show_xino(m, mnt);
19736 +               if (unlikely(err))
19737 +                       goto out;
19738 +       } else
19739 +               seq_puts(m, ",noxino");
19740 +
19741 +       AuBool(TRUNC_XINO, trunc_xino);
19742 +       AuStr(UDBA, udba);
19743 +       AuBool(SHWH, shwh);
19744 +       AuBool(PLINK, plink);
19745 +       /* AuBool(DIRPERM1, dirperm1); */
19746 +       /* AuBool(REFROF, refrof); */
19747 +
19748 +       v = sbinfo->si_wbr_create;
19749 +       if (v != AuWbrCreate_Def)
19750 +               au_show_wbr_create(m, v, sbinfo);
19751 +
19752 +       v = sbinfo->si_wbr_copyup;
19753 +       if (v != AuWbrCopyup_Def)
19754 +               seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
19755 +
19756 +       v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
19757 +       if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
19758 +               seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
19759 +
19760 +       AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
19761 +
19762 +       n = sbinfo->si_rdcache / HZ;
19763 +       AuUInt(RDCACHE, rdcache, n);
19764 +
19765 +       AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
19766 +       AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
19767 +
19768 +       AuBool(SUM, sum);
19769 +       /* AuBool(SUM_W, wsum); */
19770 +       AuBool(WARN_PERM, warn_perm);
19771 +       AuBool(VERBOSE, verbose);
19772 +
19773 + out:
19774 +       /* be sure to print "br:" last */
19775 +       if (!sysaufs_brs) {
19776 +               seq_puts(m, ",br:");
19777 +               au_show_brs(m, sb);
19778 +       }
19779 +       si_read_unlock(sb);
19780 +       return 0;
19781 +
19782 +#undef Deleted
19783 +#undef AuBool
19784 +#undef AuStr
19785 +}
19786 +
19787 +/* ---------------------------------------------------------------------- */
19788 +
19789 +/* sum mode which returns the summation for statfs(2) */
19790 +
19791 +static u64 au_add_till_max(u64 a, u64 b)
19792 +{
19793 +       u64 old;
19794 +
19795 +       old = a;
19796 +       a += b;
19797 +       if (old < a)
19798 +               return a;
19799 +       return ULLONG_MAX;
19800 +}
19801 +
19802 +static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
19803 +{
19804 +       int err;
19805 +       u64 blocks, bfree, bavail, files, ffree;
19806 +       aufs_bindex_t bend, bindex, i;
19807 +       unsigned char shared;
19808 +       struct vfsmount *h_mnt;
19809 +       struct super_block *h_sb;
19810 +
19811 +       blocks = 0;
19812 +       bfree = 0;
19813 +       bavail = 0;
19814 +       files = 0;
19815 +       ffree = 0;
19816 +
19817 +       err = 0;
19818 +       bend = au_sbend(sb);
19819 +       for (bindex = bend; bindex >= 0; bindex--) {
19820 +               h_mnt = au_sbr_mnt(sb, bindex);
19821 +               h_sb = h_mnt->mnt_sb;
19822 +               shared = 0;
19823 +               for (i = bindex + 1; !shared && i <= bend; i++)
19824 +                       shared = (au_sbr_sb(sb, i) == h_sb);
19825 +               if (shared)
19826 +                       continue;
19827 +
19828 +               /* sb->s_root for NFS is unreliable */
19829 +               err = vfs_statfs(h_mnt->mnt_root, buf);
19830 +               if (unlikely(err))
19831 +                       goto out;
19832 +
19833 +               blocks = au_add_till_max(blocks, buf->f_blocks);
19834 +               bfree = au_add_till_max(bfree, buf->f_bfree);
19835 +               bavail = au_add_till_max(bavail, buf->f_bavail);
19836 +               files = au_add_till_max(files, buf->f_files);
19837 +               ffree = au_add_till_max(ffree, buf->f_ffree);
19838 +       }
19839 +
19840 +       buf->f_blocks = blocks;
19841 +       buf->f_bfree = bfree;
19842 +       buf->f_bavail = bavail;
19843 +       buf->f_files = files;
19844 +       buf->f_ffree = ffree;
19845 +
19846 + out:
19847 +       return err;
19848 +}
19849 +
19850 +static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
19851 +{
19852 +       int err;
19853 +       struct super_block *sb;
19854 +
19855 +       /* lock free root dinfo */
19856 +       sb = dentry->d_sb;
19857 +       si_noflush_read_lock(sb);
19858 +       if (!au_opt_test(au_mntflags(sb), SUM))
19859 +               /* sb->s_root for NFS is unreliable */
19860 +               err = vfs_statfs(au_sbr_mnt(sb, 0)->mnt_root, buf);
19861 +       else
19862 +               err = au_statfs_sum(sb, buf);
19863 +       si_read_unlock(sb);
19864 +
19865 +       if (!err) {
19866 +               buf->f_type = AUFS_SUPER_MAGIC;
19867 +               buf->f_namelen = AUFS_MAX_NAMELEN;
19868 +               memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
19869 +       }
19870 +       /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
19871 +
19872 +       return err;
19873 +}
19874 +
19875 +/* ---------------------------------------------------------------------- */
19876 +
19877 +/* try flushing the lower fs at aufs remount/unmount time */
19878 +
19879 +static void au_fsync_br(struct super_block *sb)
19880 +{
19881 +       aufs_bindex_t bend, bindex;
19882 +       int brperm;
19883 +       struct au_branch *br;
19884 +       struct super_block *h_sb;
19885 +
19886 +       bend = au_sbend(sb);
19887 +       for (bindex = 0; bindex < bend; bindex++) {
19888 +               br = au_sbr(sb, bindex);
19889 +               brperm = br->br_perm;
19890 +               if (brperm == AuBrPerm_RR || brperm == AuBrPerm_RRWH)
19891 +                       continue;
19892 +               h_sb = br->br_mnt->mnt_sb;
19893 +               if (bdev_read_only(h_sb->s_bdev))
19894 +                       continue;
19895 +
19896 +               lockdep_off();
19897 +               down_write(&h_sb->s_umount);
19898 +               shrink_dcache_sb(h_sb);
19899 +               fsync_super(h_sb);
19900 +               up_write(&h_sb->s_umount);
19901 +               lockdep_on();
19902 +       }
19903 +}
19904 +
19905 +/*
19906 + * this IS NOT for super_operations.
19907 + * I guess it will be reverted someday.
19908 + */
19909 +static void aufs_umount_begin(struct super_block *sb)
19910 +{
19911 +       struct au_sbinfo *sbinfo;
19912 +
19913 +       sbinfo = au_sbi(sb);
19914 +       if (!sbinfo)
19915 +               return;
19916 +
19917 +       si_write_lock(sb);
19918 +       au_fsync_br(sb);
19919 +       if (au_opt_test(au_mntflags(sb), PLINK))
19920 +               au_plink_put(sb);
19921 +       if (sbinfo->si_wbr_create_ops->fin)
19922 +               sbinfo->si_wbr_create_ops->fin(sb);
19923 +       si_write_unlock(sb);
19924 +}
19925 +
19926 +/* final actions when unmounting a file system */
19927 +static void aufs_put_super(struct super_block *sb)
19928 +{
19929 +       struct au_sbinfo *sbinfo;
19930 +
19931 +       sbinfo = au_sbi(sb);
19932 +       if (!sbinfo)
19933 +               return;
19934 +
19935 +       aufs_umount_begin(sb);
19936 +       dbgaufs_si_fin(sbinfo);
19937 +       kobject_put(&sbinfo->si_kobj);
19938 +}
19939 +
19940 +/* ---------------------------------------------------------------------- */
19941 +
19942 +/*
19943 + * refresh dentry and inode at remount time.
19944 + */
19945 +static int do_refresh(struct dentry *dentry, mode_t type,
19946 +                     unsigned int dir_flags)
19947 +{
19948 +       int err;
19949 +       struct dentry *parent;
19950 +
19951 +       di_write_lock_child(dentry);
19952 +       parent = dget_parent(dentry);
19953 +       di_read_lock_parent(parent, AuLock_IR);
19954 +
19955 +       /* returns the number of positive dentries */
19956 +       err = au_refresh_hdentry(dentry, type);
19957 +       if (err >= 0) {
19958 +               struct inode *inode = dentry->d_inode;
19959 +               err = au_refresh_hinode(inode, dentry);
19960 +               if (!err && type == S_IFDIR)
19961 +                       au_reset_hinotify(inode, dir_flags);
19962 +       }
19963 +       if (unlikely(err))
19964 +               AuErr("unrecoverable error %d, %.*s\n", err, AuDLNPair(dentry));
19965 +
19966 +       di_read_unlock(parent, AuLock_IR);
19967 +       dput(parent);
19968 +       di_write_unlock(dentry);
19969 +
19970 +       return err;
19971 +}
19972 +
19973 +static int test_dir(struct dentry *dentry, void *arg __maybe_unused)
19974 +{
19975 +       return S_ISDIR(dentry->d_inode->i_mode);
19976 +}
19977 +
19978 +/* gave up consolidating with refresh_nondir() */
19979 +static int refresh_dir(struct dentry *root, unsigned int sigen)
19980 +{
19981 +       int err, i, j, ndentry, e;
19982 +       struct au_dcsub_pages dpages;
19983 +       struct au_dpage *dpage;
19984 +       struct dentry **dentries;
19985 +       struct inode *inode;
19986 +       const unsigned int flags = au_hi_flags(root->d_inode, /*isdir*/1);
19987 +
19988 +       err = 0;
19989 +       list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
19990 +               if (S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
19991 +                       ii_write_lock_child(inode);
19992 +                       e = au_refresh_hinode_self(inode, /*do_attr*/1);
19993 +                       ii_write_unlock(inode);
19994 +                       if (unlikely(e)) {
19995 +                               AuDbg("e %d, i%lu\n", e, inode->i_ino);
19996 +                               if (!err)
19997 +                                       err = e;
19998 +                               /* go on even if err */
19999 +                       }
20000 +               }
20001 +
20002 +       e = au_dpages_init(&dpages, GFP_NOFS);
20003 +       if (unlikely(e)) {
20004 +               if (!err)
20005 +                       err = e;
20006 +               goto out;
20007 +       }
20008 +       e = au_dcsub_pages(&dpages, root, test_dir, NULL);
20009 +       if (unlikely(e)) {
20010 +               if (!err)
20011 +                       err = e;
20012 +               goto out_dpages;
20013 +       }
20014 +
20015 +       for (i = 0; !e && i < dpages.ndpage; i++) {
20016 +               dpage = dpages.dpages + i;
20017 +               dentries = dpage->dentries;
20018 +               ndentry = dpage->ndentry;
20019 +               for (j = 0; !e && j < ndentry; j++) {
20020 +                       struct dentry *d;
20021 +
20022 +                       d = dentries[j];
20023 +                       au_dbg_verify_dir_parent(d, sigen);
20024 +                       if (au_digen(d) != sigen) {
20025 +                               e = do_refresh(d, S_IFDIR, flags);
20026 +                               if (unlikely(e && !err))
20027 +                                       err = e;
20028 +                               /* break on err */
20029 +                       }
20030 +               }
20031 +       }
20032 +
20033 + out_dpages:
20034 +       au_dpages_free(&dpages);
20035 + out:
20036 +       return err;
20037 +}
20038 +
20039 +static int test_nondir(struct dentry *dentry, void *arg __maybe_unused)
20040 +{
20041 +       return !S_ISDIR(dentry->d_inode->i_mode);
20042 +}
20043 +
20044 +static int refresh_nondir(struct dentry *root, unsigned int sigen,
20045 +                         int do_dentry)
20046 +{
20047 +       int err, i, j, ndentry, e;
20048 +       struct au_dcsub_pages dpages;
20049 +       struct au_dpage *dpage;
20050 +       struct dentry **dentries;
20051 +       struct inode *inode;
20052 +
20053 +       err = 0;
20054 +       list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
20055 +               if (!S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
20056 +                       ii_write_lock_child(inode);
20057 +                       e = au_refresh_hinode_self(inode, /*do_attr*/1);
20058 +                       ii_write_unlock(inode);
20059 +                       if (unlikely(e)) {
20060 +                               AuDbg("e %d, i%lu\n", e, inode->i_ino);
20061 +                               if (!err)
20062 +                                       err = e;
20063 +                               /* go on even if err */
20064 +                       }
20065 +               }
20066 +
20067 +       if (!do_dentry)
20068 +               goto out;
20069 +
20070 +       e = au_dpages_init(&dpages, GFP_NOFS);
20071 +       if (unlikely(e)) {
20072 +               if (!err)
20073 +                       err = e;
20074 +               goto out;
20075 +       }
20076 +       e = au_dcsub_pages(&dpages, root, test_nondir, NULL);
20077 +       if (unlikely(e)) {
20078 +               if (!err)
20079 +                       err = e;
20080 +               goto out_dpages;
20081 +       }
20082 +
20083 +       for (i = 0; i < dpages.ndpage; i++) {
20084 +               dpage = dpages.dpages + i;
20085 +               dentries = dpage->dentries;
20086 +               ndentry = dpage->ndentry;
20087 +               for (j = 0; j < ndentry; j++) {
20088 +                       struct dentry *d;
20089 +
20090 +                       d = dentries[j];
20091 +                       au_dbg_verify_nondir_parent(d, sigen);
20092 +                       inode = d->d_inode;
20093 +                       if (inode && au_digen(d) != sigen) {
20094 +                               e = do_refresh(d, inode->i_mode & S_IFMT,
20095 +                                              /*dir_flags*/0);
20096 +                               if (unlikely(e && !err))
20097 +                                       err = e;
20098 +                               /* go on even err */
20099 +                       }
20100 +               }
20101 +       }
20102 +
20103 + out_dpages:
20104 +       au_dpages_free(&dpages);
20105 + out:
20106 +       return err;
20107 +}
20108 +
20109 +static void au_remount_refresh(struct super_block *sb, unsigned int flags)
20110 +{
20111 +       int err;
20112 +       unsigned int sigen;
20113 +       struct au_sbinfo *sbinfo;
20114 +       struct dentry *root;
20115 +       struct inode *inode;
20116 +
20117 +       au_sigen_inc(sb);
20118 +       sigen = au_sigen(sb);
20119 +       sbinfo = au_sbi(sb);
20120 +       au_fclr_si(sbinfo, FAILED_REFRESH_DIRS);
20121 +
20122 +       root = sb->s_root;
20123 +       DiMustNoWaiters(root);
20124 +       inode = root->d_inode;
20125 +       IiMustNoWaiters(inode);
20126 +       au_reset_hinotify(inode, au_hi_flags(inode, /*isdir*/1));
20127 +       di_write_unlock(root);
20128 +
20129 +       err = refresh_dir(root, sigen);
20130 +       if (unlikely(err)) {
20131 +               au_fset_si(sbinfo, FAILED_REFRESH_DIRS);
20132 +               AuWarn("Refreshing directories failed, ignored (%d)\n", err);
20133 +       }
20134 +
20135 +       if (au_ftest_opts(flags, REFRESH_NONDIR)) {
20136 +               err = refresh_nondir(root, sigen, !err);
20137 +               if (unlikely(err))
20138 +                       AuWarn("Refreshing non-directories failed, ignored"
20139 +                              "(%d)\n", err);
20140 +       }
20141 +
20142 +       /* aufs_write_lock() calls ..._child() */
20143 +       di_write_lock_child(root);
20144 +       au_cpup_attr_all(root->d_inode, /*force*/1);
20145 +}
20146 +
20147 +/* stop extra interpretation of errno in mount(8), and strange error messages */
20148 +static int cvt_err(int err)
20149 +{
20150 +       AuTraceErr(err);
20151 +
20152 +       switch (err) {
20153 +       case -ENOENT:
20154 +       case -ENOTDIR:
20155 +       case -EEXIST:
20156 +       case -EIO:
20157 +               err = -EINVAL;
20158 +       }
20159 +       return err;
20160 +}
20161 +
20162 +static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
20163 +{
20164 +       int err;
20165 +       struct au_opts opts;
20166 +       struct dentry *root;
20167 +       struct inode *inode;
20168 +       struct au_sbinfo *sbinfo;
20169 +
20170 +       err = 0;
20171 +       root = sb->s_root;
20172 +       if (!data || !*data) {
20173 +               aufs_write_lock(root);
20174 +               err = au_opts_verify(sb, *flags, /*pending*/0);
20175 +               if (!err)
20176 +                       au_fsync_br(sb);
20177 +               aufs_write_unlock(root);
20178 +               goto out;
20179 +       }
20180 +
20181 +       err = -ENOMEM;
20182 +       memset(&opts, 0, sizeof(opts));
20183 +       opts.opt = (void *)__get_free_page(GFP_NOFS);
20184 +       if (unlikely(!opts.opt))
20185 +               goto out;
20186 +       opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
20187 +       opts.flags = AuOpts_REMOUNT;
20188 +       opts.sb_flags = *flags;
20189 +
20190 +       /* parse it before aufs lock */
20191 +       err = au_opts_parse(sb, data, &opts);
20192 +       if (unlikely(err))
20193 +               goto out_opts;
20194 +
20195 +       sbinfo = au_sbi(sb);
20196 +       inode = root->d_inode;
20197 +       mutex_lock(&inode->i_mutex);
20198 +       aufs_write_lock(root);
20199 +       au_fsync_br(sb);
20200 +
20201 +       /* au_opts_remount() may return an error */
20202 +       err = au_opts_remount(sb, &opts);
20203 +       au_opts_free(&opts);
20204 +
20205 +       if (au_ftest_opts(opts.flags, REFRESH_DIR)
20206 +           || au_ftest_opts(opts.flags, REFRESH_NONDIR))
20207 +               au_remount_refresh(sb, opts.flags);
20208 +
20209 +       aufs_write_unlock(root);
20210 +       mutex_unlock(&inode->i_mutex);
20211 +
20212 + out_opts:
20213 +       free_page((unsigned long)opts.opt);
20214 + out:
20215 +       err = cvt_err(err);
20216 +       AuTraceErr(err);
20217 +       return err;
20218 +}
20219 +
20220 +static const struct super_operations aufs_sop = {
20221 +       .alloc_inode    = aufs_alloc_inode,
20222 +       .destroy_inode  = aufs_destroy_inode,
20223 +       .drop_inode     = generic_delete_inode,
20224 +       .show_options   = aufs_show_options,
20225 +       .statfs         = aufs_statfs,
20226 +       .put_super      = aufs_put_super,
20227 +       .remount_fs     = aufs_remount_fs
20228 +};
20229 +
20230 +/* ---------------------------------------------------------------------- */
20231 +
20232 +static int alloc_root(struct super_block *sb)
20233 +{
20234 +       int err;
20235 +       struct inode *inode;
20236 +       struct dentry *root;
20237 +
20238 +       err = -ENOMEM;
20239 +       inode = au_iget_locked(sb, AUFS_ROOT_INO);
20240 +       err = PTR_ERR(inode);
20241 +       if (IS_ERR(inode))
20242 +               goto out;
20243 +
20244 +       inode->i_op = &aufs_dir_iop;
20245 +       inode->i_fop = &aufs_dir_fop;
20246 +       inode->i_mode = S_IFDIR;
20247 +       inode->i_nlink = 2;
20248 +       unlock_new_inode(inode);
20249 +
20250 +       root = d_alloc_root(inode);
20251 +       if (unlikely(!root))
20252 +               goto out_iput;
20253 +       err = PTR_ERR(root);
20254 +       if (IS_ERR(root))
20255 +               goto out_iput;
20256 +
20257 +       err = au_alloc_dinfo(root);
20258 +       if (!err) {
20259 +               sb->s_root = root;
20260 +               return 0; /* success */
20261 +       }
20262 +       dput(root);
20263 +       goto out; /* do not iput */
20264 +
20265 + out_iput:
20266 +       iget_failed(inode);
20267 +       iput(inode);
20268 + out:
20269 +       return err;
20270 +
20271 +}
20272 +
20273 +static int aufs_fill_super(struct super_block *sb, void *raw_data,
20274 +                          int silent __maybe_unused)
20275 +{
20276 +       int err;
20277 +       struct au_opts opts;
20278 +       struct dentry *root;
20279 +       struct inode *inode;
20280 +       char *arg = raw_data;
20281 +
20282 +       if (unlikely(!arg || !*arg)) {
20283 +               err = -EINVAL;
20284 +               AuErr("no arg\n");
20285 +               goto out;
20286 +       }
20287 +
20288 +       err = -ENOMEM;
20289 +       memset(&opts, 0, sizeof(opts));
20290 +       opts.opt = (void *)__get_free_page(GFP_NOFS);
20291 +       if (unlikely(!opts.opt))
20292 +               goto out;
20293 +       opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
20294 +       opts.sb_flags = sb->s_flags;
20295 +
20296 +       err = au_si_alloc(sb);
20297 +       if (unlikely(err))
20298 +               goto out_opts;
20299 +
20300 +       /* all timestamps always follow the ones on the branch */
20301 +       sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
20302 +       sb->s_op = &aufs_sop;
20303 +       sb->s_magic = AUFS_SUPER_MAGIC;
20304 +       sb->s_maxbytes = 0;
20305 +       au_export_init(sb);
20306 +
20307 +       err = alloc_root(sb);
20308 +       if (unlikely(err)) {
20309 +               si_write_unlock(sb);
20310 +               goto out_info;
20311 +       }
20312 +       root = sb->s_root;
20313 +       inode = root->d_inode;
20314 +
20315 +       /*
20316 +        * actually we can parse options regardless aufs lock here.
20317 +        * but at remount time, parsing must be done before aufs lock.
20318 +        * so we follow the same rule.
20319 +        */
20320 +       ii_write_lock_parent(inode);
20321 +       aufs_write_unlock(root);
20322 +       err = au_opts_parse(sb, arg, &opts);
20323 +       if (unlikely(err))
20324 +               goto out_root;
20325 +
20326 +       /* lock vfs_inode first, then aufs. */
20327 +       mutex_lock(&inode->i_mutex);
20328 +       inode->i_op = &aufs_dir_iop;
20329 +       inode->i_fop = &aufs_dir_fop;
20330 +       aufs_write_lock(root);
20331 +       err = au_opts_mount(sb, &opts);
20332 +       au_opts_free(&opts);
20333 +       if (unlikely(err))
20334 +               goto out_unlock;
20335 +       aufs_write_unlock(root);
20336 +       mutex_unlock(&inode->i_mutex);
20337 +       goto out_opts; /* success */
20338 +
20339 + out_unlock:
20340 +       aufs_write_unlock(root);
20341 +       mutex_unlock(&inode->i_mutex);
20342 + out_root:
20343 +       dput(root);
20344 +       sb->s_root = NULL;
20345 + out_info:
20346 +       kobject_put(&au_sbi(sb)->si_kobj);
20347 +       sb->s_fs_info = NULL;
20348 + out_opts:
20349 +       free_page((unsigned long)opts.opt);
20350 + out:
20351 +       AuTraceErr(err);
20352 +       err = cvt_err(err);
20353 +       AuTraceErr(err);
20354 +       return err;
20355 +}
20356 +
20357 +/* ---------------------------------------------------------------------- */
20358 +
20359 +static int aufs_get_sb(struct file_system_type *fs_type, int flags,
20360 +                      const char *dev_name __maybe_unused, void *raw_data,
20361 +                      struct vfsmount *mnt)
20362 +{
20363 +       int err;
20364 +       struct super_block *sb;
20365 +
20366 +       /* all timestamps always follow the ones on the branch */
20367 +       /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
20368 +       err = get_sb_nodev(fs_type, flags, raw_data, aufs_fill_super, mnt);
20369 +       if (!err) {
20370 +               sb = mnt->mnt_sb;
20371 +               si_write_lock(sb);
20372 +               sysaufs_brs_add(sb, 0);
20373 +               si_write_unlock(sb);
20374 +       }
20375 +       return err;
20376 +}
20377 +
20378 +struct file_system_type aufs_fs_type = {
20379 +       .name           = AUFS_FSTYPE,
20380 +       .fs_flags       =
20381 +               FS_RENAME_DOES_D_MOVE   /* a race between rename and others */
20382 +               | FS_REVAL_DOT,         /* for NFS branch and udba */
20383 +       .get_sb         = aufs_get_sb,
20384 +       .kill_sb        = generic_shutdown_super,
20385 +       /* no need to __module_get() and module_put(). */
20386 +       .owner          = THIS_MODULE,
20387 +};
20388 diff --git a/fs/aufs/super.h b/fs/aufs/super.h
20389 new file mode 100644
20390 index 0000000..33a2b73
20391 --- /dev/null
20392 +++ b/fs/aufs/super.h
20393 @@ -0,0 +1,361 @@
20394 +/*
20395 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20396 + *
20397 + * This program, aufs is free software; you can redistribute it and/or modify
20398 + * it under the terms of the GNU General Public License as published by
20399 + * the Free Software Foundation; either version 2 of the License, or
20400 + * (at your option) any later version.
20401 + *
20402 + * This program is distributed in the hope that it will be useful,
20403 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20404 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20405 + * GNU General Public License for more details.
20406 + *
20407 + * You should have received a copy of the GNU General Public License
20408 + * along with this program; if not, write to the Free Software
20409 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20410 + */
20411 +
20412 +/*
20413 + * super_block operations
20414 + */
20415 +
20416 +#ifndef __AUFS_SUPER_H__
20417 +#define __AUFS_SUPER_H__
20418 +
20419 +#ifdef __KERNEL__
20420 +
20421 +#include <linux/fs.h>
20422 +#include <linux/aufs_type.h>
20423 +#include "rwsem.h"
20424 +#include "spl.h"
20425 +#include "wkq.h"
20426 +
20427 +typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
20428 +typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
20429 +                              loff_t *);
20430 +
20431 +/* policies to select one among multiple writable branches */
20432 +struct au_wbr_copyup_operations {
20433 +       int (*copyup)(struct dentry *dentry);
20434 +};
20435 +
20436 +struct au_wbr_create_operations {
20437 +       int (*create)(struct dentry *dentry, int isdir);
20438 +       int (*init)(struct super_block *sb);
20439 +       int (*fin)(struct super_block *sb);
20440 +};
20441 +
20442 +struct au_wbr_mfs {
20443 +       struct mutex    mfs_lock; /* protect this structure */
20444 +       unsigned long   mfs_jiffy;
20445 +       unsigned long   mfs_expire;
20446 +       aufs_bindex_t   mfs_bindex;
20447 +
20448 +       unsigned long long      mfsrr_bytes;
20449 +       unsigned long long      mfsrr_watermark;
20450 +};
20451 +
20452 +struct au_branch;
20453 +struct au_sbinfo {
20454 +       /* nowait tasks in the system-wide workqueue */
20455 +       struct au_nowait_tasks  si_nowait;
20456 +
20457 +       struct au_rwsem         si_rwsem;
20458 +
20459 +       /* branch management */
20460 +       unsigned int            si_generation;
20461 +
20462 +       /* see above flags */
20463 +       unsigned char           au_si_status;
20464 +
20465 +       aufs_bindex_t           si_bend;
20466 +       aufs_bindex_t           si_last_br_id;
20467 +       struct au_branch        **si_branch;
20468 +
20469 +       /* policy to select a writable branch */
20470 +       unsigned char           si_wbr_copyup;
20471 +       unsigned char           si_wbr_create;
20472 +       struct au_wbr_copyup_operations *si_wbr_copyup_ops;
20473 +       struct au_wbr_create_operations *si_wbr_create_ops;
20474 +
20475 +       /* round robin */
20476 +       atomic_t                si_wbr_rr_next;
20477 +
20478 +       /* most free space */
20479 +       struct au_wbr_mfs       si_wbr_mfs;
20480 +
20481 +       /* mount flags */
20482 +       /* include/asm-ia64/siginfo.h defines a macro named si_flags */
20483 +       unsigned int            si_mntflags;
20484 +
20485 +       /* external inode number (bitmap and translation table) */
20486 +       au_readf_t              si_xread;
20487 +       au_writef_t             si_xwrite;
20488 +       struct file             *si_xib;
20489 +       struct mutex            si_xib_mtx; /* protect xib members */
20490 +       unsigned long           *si_xib_buf;
20491 +       unsigned long           si_xib_last_pindex;
20492 +       int                     si_xib_next_bit;
20493 +       aufs_bindex_t           si_xino_brid;
20494 +       /* reserved for future use */
20495 +       /* unsigned long long   si_xib_limit; */        /* Max xib file size */
20496 +
20497 +#ifdef CONFIG_AUFS_EXPORT
20498 +       /* i_generation */
20499 +       struct file             *si_xigen;
20500 +       atomic_t                si_xigen_next;
20501 +#endif
20502 +
20503 +       /* vdir parameters */
20504 +       unsigned long           si_rdcache;     /* max cache time in HZ */
20505 +       unsigned int            si_rdblk;       /* deblk size */
20506 +       unsigned int            si_rdhash;      /* hash size */
20507 +
20508 +       /*
20509 +        * If the number of whiteouts are larger than si_dirwh, leave all of
20510 +        * them after au_whtmp_ren to reduce the cost of rmdir(2).
20511 +        * future fsck.aufs or kernel thread will remove them later.
20512 +        * Otherwise, remove all whiteouts and the dir in rmdir(2).
20513 +        */
20514 +       unsigned int            si_dirwh;
20515 +
20516 +       /*
20517 +        * rename(2) a directory with all children.
20518 +        */
20519 +       /* reserved for future use */
20520 +       /* int                  si_rendir; */
20521 +
20522 +       /* pseudo_link list */
20523 +       struct au_splhead       si_plink;
20524 +       wait_queue_head_t       si_plink_wq;
20525 +       spinlock_t              si_plink_maint_lock;
20526 +       struct file             *si_plink_maint;
20527 +
20528 +       /*
20529 +        * sysfs and lifetime management.
20530 +        * this is not a small structure and it may be a waste of memory in case
20531 +        * of sysfs is disabled, particulary when many aufs-es are mounted.
20532 +        * but using sysfs is majority.
20533 +        */
20534 +       struct kobject          si_kobj;
20535 +#ifdef CONFIG_DEBUG_FS
20536 +       struct dentry            *si_dbgaufs, *si_dbgaufs_xib;
20537 +#ifdef CONFIG_AUFS_EXPORT
20538 +       struct dentry            *si_dbgaufs_xigen;
20539 +#endif
20540 +#endif
20541 +
20542 +       /* dirty, necessary for unmounting, sysfs and sysrq */
20543 +       struct super_block      *si_sb;
20544 +};
20545 +
20546 +/* sbinfo status flags */
20547 +/*
20548 + * set true when refresh_dirs() failed at remount time.
20549 + * then try refreshing dirs at access time again.
20550 + * if it is false, refreshing dirs at access time is unnecesary
20551 + */
20552 +#define AuSi_FAILED_REFRESH_DIRS       1
20553 +static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
20554 +                                          unsigned int flag)
20555 +{
20556 +       AuRwMustAnyLock(&sbi->si_rwsem);
20557 +       return sbi->au_si_status & flag;
20558 +}
20559 +#define au_ftest_si(sbinfo, name)      au_do_ftest_si(sbinfo, AuSi_##name)
20560 +#define au_fset_si(sbinfo, name) do { \
20561 +       AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
20562 +       (sbinfo)->au_si_status |= AuSi_##name; \
20563 +} while (0)
20564 +#define au_fclr_si(sbinfo, name) do { \
20565 +       AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
20566 +       (sbinfo)->au_si_status &= ~AuSi_##name; \
20567 +} while (0)
20568 +
20569 +/* ---------------------------------------------------------------------- */
20570 +
20571 +/* policy to select one among writable branches */
20572 +#define AuWbrCopyup(sbinfo, ...) \
20573 +       ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
20574 +#define AuWbrCreate(sbinfo, ...) \
20575 +       ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
20576 +
20577 +/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
20578 +#define AuLock_DW              1               /* write-lock dentry */
20579 +#define AuLock_IR              (1 << 1)        /* read-lock inode */
20580 +#define AuLock_IW              (1 << 2)        /* write-lock inode */
20581 +#define AuLock_FLUSH           (1 << 3)        /* wait for 'nowait' tasks */
20582 +#define AuLock_DIR             (1 << 4)        /* target is a dir */
20583 +#define au_ftest_lock(flags, name)     ((flags) & AuLock_##name)
20584 +#define au_fset_lock(flags, name)      { (flags) |= AuLock_##name; }
20585 +#define au_fclr_lock(flags, name)      { (flags) &= ~AuLock_##name; }
20586 +
20587 +/* ---------------------------------------------------------------------- */
20588 +
20589 +/* super.c */
20590 +extern struct file_system_type aufs_fs_type;
20591 +struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
20592 +
20593 +/* sbinfo.c */
20594 +void au_si_free(struct kobject *kobj);
20595 +int au_si_alloc(struct super_block *sb);
20596 +int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
20597 +
20598 +unsigned int au_sigen_inc(struct super_block *sb);
20599 +aufs_bindex_t au_new_br_id(struct super_block *sb);
20600 +
20601 +void aufs_read_lock(struct dentry *dentry, int flags);
20602 +void aufs_read_unlock(struct dentry *dentry, int flags);
20603 +void aufs_write_lock(struct dentry *dentry);
20604 +void aufs_write_unlock(struct dentry *dentry);
20605 +void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int isdir);
20606 +void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
20607 +
20608 +/* wbr_policy.c */
20609 +extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
20610 +extern struct au_wbr_create_operations au_wbr_create_ops[];
20611 +int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
20612 +
20613 +/* ---------------------------------------------------------------------- */
20614 +
20615 +static inline struct au_sbinfo *au_sbi(struct super_block *sb)
20616 +{
20617 +       return sb->s_fs_info;
20618 +}
20619 +
20620 +/* ---------------------------------------------------------------------- */
20621 +
20622 +#ifdef CONFIG_AUFS_EXPORT
20623 +void au_export_init(struct super_block *sb);
20624 +
20625 +static inline int au_test_nfsd(struct task_struct *tsk)
20626 +{
20627 +       return !tsk->mm && !strcmp(tsk->comm, "nfsd");
20628 +}
20629 +
20630 +int au_xigen_inc(struct inode *inode);
20631 +int au_xigen_new(struct inode *inode);
20632 +int au_xigen_set(struct super_block *sb, struct file *base);
20633 +void au_xigen_clr(struct super_block *sb);
20634 +
20635 +static inline int au_busy_or_stale(void)
20636 +{
20637 +       if (!au_test_nfsd(current))
20638 +               return -EBUSY;
20639 +       return -ESTALE;
20640 +}
20641 +#else
20642 +AuStubVoid(au_export_init, struct super_block *sb)
20643 +AuStubInt0(au_test_nfsd, struct task_struct *tsk)
20644 +AuStubInt0(au_xigen_inc, struct inode *inode)
20645 +AuStubInt0(au_xigen_new, struct inode *inode)
20646 +AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
20647 +AuStubVoid(au_xigen_clr, struct super_block *sb)
20648 +static inline int au_busy_or_stale(void)
20649 +{
20650 +       return -EBUSY;
20651 +}
20652 +#endif /* CONFIG_AUFS_EXPORT */
20653 +
20654 +/* ---------------------------------------------------------------------- */
20655 +
20656 +static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
20657 +{
20658 +       /*
20659 +        * This function is a dynamic '__init' fucntion actually,
20660 +        * so the tiny check for si_rwsem is unnecessary.
20661 +        */
20662 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
20663 +#ifdef CONFIG_DEBUG_FS
20664 +       sbinfo->si_dbgaufs = NULL;
20665 +       sbinfo->si_dbgaufs_xib = NULL;
20666 +#ifdef CONFIG_AUFS_EXPORT
20667 +       sbinfo->si_dbgaufs_xigen = NULL;
20668 +#endif
20669 +#endif
20670 +}
20671 +
20672 +/* ---------------------------------------------------------------------- */
20673 +
20674 +/* lock superblock. mainly for entry point functions */
20675 +/*
20676 + * si_noflush_read_lock, si_noflush_write_lock,
20677 + * si_read_unlock, si_write_unlock, si_downgrade_lock
20678 + */
20679 +AuSimpleLockRwsemFuncs(si_noflush, struct super_block *sb,
20680 +                      &au_sbi(sb)->si_rwsem);
20681 +AuSimpleUnlockRwsemFuncs(si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
20682 +
20683 +#define SiMustNoWaiters(sb)    AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
20684 +#define SiMustAnyLock(sb)      AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
20685 +#define SiMustWriteLock(sb)    AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
20686 +
20687 +static inline void si_read_lock(struct super_block *sb, int flags)
20688 +{
20689 +       if (au_ftest_lock(flags, FLUSH))
20690 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20691 +       si_noflush_read_lock(sb);
20692 +}
20693 +
20694 +static inline void si_write_lock(struct super_block *sb)
20695 +{
20696 +       au_nwt_flush(&au_sbi(sb)->si_nowait);
20697 +       si_noflush_write_lock(sb);
20698 +}
20699 +
20700 +static inline int si_read_trylock(struct super_block *sb, int flags)
20701 +{
20702 +       if (au_ftest_lock(flags, FLUSH))
20703 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20704 +       return si_noflush_read_trylock(sb);
20705 +}
20706 +
20707 +static inline int si_write_trylock(struct super_block *sb, int flags)
20708 +{
20709 +       if (au_ftest_lock(flags, FLUSH))
20710 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20711 +       return si_noflush_write_trylock(sb);
20712 +}
20713 +
20714 +/* ---------------------------------------------------------------------- */
20715 +
20716 +static inline aufs_bindex_t au_sbend(struct super_block *sb)
20717 +{
20718 +       SiMustAnyLock(sb);
20719 +       return au_sbi(sb)->si_bend;
20720 +}
20721 +
20722 +static inline unsigned int au_mntflags(struct super_block *sb)
20723 +{
20724 +       SiMustAnyLock(sb);
20725 +       return au_sbi(sb)->si_mntflags;
20726 +}
20727 +
20728 +static inline unsigned int au_sigen(struct super_block *sb)
20729 +{
20730 +       SiMustAnyLock(sb);
20731 +       return au_sbi(sb)->si_generation;
20732 +}
20733 +
20734 +static inline struct au_branch *au_sbr(struct super_block *sb,
20735 +                                      aufs_bindex_t bindex)
20736 +{
20737 +       SiMustAnyLock(sb);
20738 +       return au_sbi(sb)->si_branch[0 + bindex];
20739 +}
20740 +
20741 +static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
20742 +{
20743 +       SiMustWriteLock(sb);
20744 +       au_sbi(sb)->si_xino_brid = brid;
20745 +}
20746 +
20747 +static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
20748 +{
20749 +       SiMustAnyLock(sb);
20750 +       return au_sbi(sb)->si_xino_brid;
20751 +}
20752 +
20753 +#endif /* __KERNEL__ */
20754 +#endif /* __AUFS_SUPER_H__ */
20755 diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c
20756 new file mode 100644
20757 index 0000000..b796330
20758 --- /dev/null
20759 +++ b/fs/aufs/sysaufs.c
20760 @@ -0,0 +1,104 @@
20761 +/*
20762 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20763 + *
20764 + * This program, aufs is free software; you can redistribute it and/or modify
20765 + * it under the terms of the GNU General Public License as published by
20766 + * the Free Software Foundation; either version 2 of the License, or
20767 + * (at your option) any later version.
20768 + *
20769 + * This program is distributed in the hope that it will be useful,
20770 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20771 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20772 + * GNU General Public License for more details.
20773 + *
20774 + * You should have received a copy of the GNU General Public License
20775 + * along with this program; if not, write to the Free Software
20776 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20777 + */
20778 +
20779 +/*
20780 + * sysfs interface and lifetime management
20781 + * they are necessary regardless sysfs is disabled.
20782 + */
20783 +
20784 +#include <linux/fs.h>
20785 +#include <linux/random.h>
20786 +#include <linux/sysfs.h>
20787 +#include "aufs.h"
20788 +
20789 +unsigned long sysaufs_si_mask;
20790 +struct kset *sysaufs_ket;
20791 +
20792 +#define AuSiAttr(_name) { \
20793 +       .attr   = { .name = __stringify(_name), .mode = 0444 }, \
20794 +       .show   = sysaufs_si_##_name,                           \
20795 +}
20796 +
20797 +static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
20798 +struct attribute *sysaufs_si_attrs[] = {
20799 +       &sysaufs_si_attr_xi_path.attr,
20800 +       NULL,
20801 +};
20802 +
20803 +static struct sysfs_ops au_sbi_ops = {
20804 +       .show   = sysaufs_si_show
20805 +};
20806 +
20807 +static struct kobj_type au_sbi_ktype = {
20808 +       .release        = au_si_free,
20809 +       .sysfs_ops      = &au_sbi_ops,
20810 +       .default_attrs  = sysaufs_si_attrs
20811 +};
20812 +
20813 +/* ---------------------------------------------------------------------- */
20814 +
20815 +int sysaufs_si_init(struct au_sbinfo *sbinfo)
20816 +{
20817 +       int err;
20818 +
20819 +       sbinfo->si_kobj.kset = sysaufs_ket;
20820 +       /* cf. sysaufs_name() */
20821 +       err = kobject_init_and_add
20822 +               (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_ket->kobj*/NULL,
20823 +                SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
20824 +
20825 +       dbgaufs_si_null(sbinfo);
20826 +       if (!err) {
20827 +               err = dbgaufs_si_init(sbinfo);
20828 +               if (unlikely(err))
20829 +                       kobject_put(&sbinfo->si_kobj);
20830 +       }
20831 +       return err;
20832 +}
20833 +
20834 +void sysaufs_fin(void)
20835 +{
20836 +       dbgaufs_fin();
20837 +       sysfs_remove_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20838 +       kset_unregister(sysaufs_ket);
20839 +}
20840 +
20841 +int __init sysaufs_init(void)
20842 +{
20843 +       int err;
20844 +
20845 +       do {
20846 +               get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
20847 +       } while (!sysaufs_si_mask);
20848 +
20849 +       sysaufs_ket = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
20850 +       err = PTR_ERR(sysaufs_ket);
20851 +       if (IS_ERR(sysaufs_ket))
20852 +               goto out;
20853 +       err = sysfs_create_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20854 +       if (unlikely(err)) {
20855 +               kset_unregister(sysaufs_ket);
20856 +               goto out;
20857 +       }
20858 +
20859 +       err = dbgaufs_init();
20860 +       if (unlikely(err))
20861 +               sysaufs_fin();
20862 + out:
20863 +       return err;
20864 +}
20865 diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h
20866 new file mode 100644
20867 index 0000000..58019f4
20868 --- /dev/null
20869 +++ b/fs/aufs/sysaufs.h
20870 @@ -0,0 +1,105 @@
20871 +/*
20872 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20873 + *
20874 + * This program, aufs is free software; you can redistribute it and/or modify
20875 + * it under the terms of the GNU General Public License as published by
20876 + * the Free Software Foundation; either version 2 of the License, or
20877 + * (at your option) any later version.
20878 + *
20879 + * This program is distributed in the hope that it will be useful,
20880 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20881 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20882 + * GNU General Public License for more details.
20883 + *
20884 + * You should have received a copy of the GNU General Public License
20885 + * along with this program; if not, write to the Free Software
20886 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20887 + */
20888 +
20889 +/*
20890 + * sysfs interface and mount lifetime management
20891 + */
20892 +
20893 +#ifndef __SYSAUFS_H__
20894 +#define __SYSAUFS_H__
20895 +
20896 +#ifdef __KERNEL__
20897 +
20898 +#include <linux/sysfs.h>
20899 +#include <linux/aufs_type.h>
20900 +#include "module.h"
20901 +
20902 +struct super_block;
20903 +struct au_sbinfo;
20904 +
20905 +struct sysaufs_si_attr {
20906 +       struct attribute attr;
20907 +       int (*show)(struct seq_file *seq, struct super_block *sb);
20908 +};
20909 +
20910 +/* ---------------------------------------------------------------------- */
20911 +
20912 +/* sysaufs.c */
20913 +extern unsigned long sysaufs_si_mask;
20914 +extern struct kset *sysaufs_ket;
20915 +extern struct attribute *sysaufs_si_attrs[];
20916 +int sysaufs_si_init(struct au_sbinfo *sbinfo);
20917 +int __init sysaufs_init(void);
20918 +void sysaufs_fin(void);
20919 +
20920 +/* ---------------------------------------------------------------------- */
20921 +
20922 +/* some people doesn't like to show a pointer in kernel */
20923 +static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
20924 +{
20925 +       return sysaufs_si_mask ^ (unsigned long)sbinfo;
20926 +}
20927 +
20928 +#define SysaufsSiNamePrefix    "si_"
20929 +#define SysaufsSiNameLen       (sizeof(SysaufsSiNamePrefix) + 16)
20930 +static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
20931 +{
20932 +       snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
20933 +                sysaufs_si_id(sbinfo));
20934 +}
20935 +
20936 +struct au_branch;
20937 +#ifdef CONFIG_SYSFS
20938 +/* sysfs.c */
20939 +extern struct attribute_group *sysaufs_attr_group;
20940 +
20941 +int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
20942 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20943 +                        char *buf);
20944 +
20945 +void sysaufs_br_init(struct au_branch *br);
20946 +void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
20947 +void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
20948 +
20949 +#define sysaufs_brs_init()     do {} while (0)
20950 +
20951 +#else
20952 +#define sysaufs_attr_group     NULL
20953 +
20954 +AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
20955 +
20956 +static inline
20957 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20958 +                        char *buf)
20959 +{
20960 +       return 0;
20961 +}
20962 +
20963 +AuStubVoid(sysaufs_br_init, struct au_branch *br)
20964 +AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
20965 +AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
20966 +
20967 +static inline void sysaufs_brs_init(void)
20968 +{
20969 +       sysaufs_brs = 0;
20970 +}
20971 +
20972 +#endif /* CONFIG_SYSFS */
20973 +
20974 +#endif /* __KERNEL__ */
20975 +#endif /* __SYSAUFS_H__ */
20976 diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c
20977 new file mode 100644
20978 index 0000000..55602fa
20979 --- /dev/null
20980 +++ b/fs/aufs/sysfs.c
20981 @@ -0,0 +1,210 @@
20982 +/*
20983 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20984 + *
20985 + * This program, aufs is free software; you can redistribute it and/or modify
20986 + * it under the terms of the GNU General Public License as published by
20987 + * the Free Software Foundation; either version 2 of the License, or
20988 + * (at your option) any later version.
20989 + *
20990 + * This program is distributed in the hope that it will be useful,
20991 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20992 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20993 + * GNU General Public License for more details.
20994 + *
20995 + * You should have received a copy of the GNU General Public License
20996 + * along with this program; if not, write to the Free Software
20997 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20998 + */
20999 +
21000 +/*
21001 + * sysfs interface
21002 + */
21003 +
21004 +#include <linux/fs.h>
21005 +#include <linux/module.h>
21006 +#include <linux/seq_file.h>
21007 +#include <linux/sysfs.h>
21008 +#include "aufs.h"
21009 +
21010 +static struct attribute *au_attr[] = {
21011 +       NULL,   /* need to NULL terminate the list of attributes */
21012 +};
21013 +
21014 +static struct attribute_group sysaufs_attr_group_body = {
21015 +       .attrs = au_attr
21016 +};
21017 +
21018 +struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
21019 +
21020 +/* ---------------------------------------------------------------------- */
21021 +
21022 +int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
21023 +{
21024 +       int err;
21025 +
21026 +       SiMustAnyLock(sb);
21027 +
21028 +       err = 0;
21029 +       if (au_opt_test(au_mntflags(sb), XINO)) {
21030 +               err = au_xino_path(seq, au_sbi(sb)->si_xib);
21031 +               seq_putc(seq, '\n');
21032 +       }
21033 +       return err;
21034 +}
21035 +
21036 +/*
21037 + * the lifetime of branch is independent from the entry under sysfs.
21038 + * sysfs handles the lifetime of the entry, and never call ->show() after it is
21039 + * unlinked.
21040 + */
21041 +static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
21042 +                        aufs_bindex_t bindex)
21043 +{
21044 +       struct path path;
21045 +       struct dentry *root;
21046 +       struct au_branch *br;
21047 +
21048 +       AuDbg("b%d\n", bindex);
21049 +
21050 +       root = sb->s_root;
21051 +       di_read_lock_parent(root, !AuLock_IR);
21052 +       br = au_sbr(sb, bindex);
21053 +       path.mnt = br->br_mnt;
21054 +       path.dentry = au_h_dptr(root, bindex);
21055 +       au_seq_path(seq, &path);
21056 +       di_read_unlock(root, !AuLock_IR);
21057 +       seq_printf(seq, "=%s\n", au_optstr_br_perm(br->br_perm));
21058 +       return 0;
21059 +}
21060 +
21061 +/* ---------------------------------------------------------------------- */
21062 +
21063 +static struct seq_file *au_seq(char *p, ssize_t len)
21064 +{
21065 +       struct seq_file *seq;
21066 +
21067 +       seq = kzalloc(sizeof(*seq), GFP_NOFS);
21068 +       if (seq) {
21069 +               /* mutex_init(&seq.lock); */
21070 +               seq->buf = p;
21071 +               seq->size = len;
21072 +               return seq; /* success */
21073 +       }
21074 +
21075 +       seq = ERR_PTR(-ENOMEM);
21076 +       return seq;
21077 +}
21078 +
21079 +#define SysaufsBr_PREFIX "br"
21080 +
21081 +/* todo: file size may exceed PAGE_SIZE */
21082 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
21083 +                        char *buf)
21084 +{
21085 +       ssize_t err;
21086 +       long l;
21087 +       aufs_bindex_t bend;
21088 +       struct au_sbinfo *sbinfo;
21089 +       struct super_block *sb;
21090 +       struct seq_file *seq;
21091 +       char *name;
21092 +       struct attribute **cattr;
21093 +
21094 +       sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
21095 +       sb = sbinfo->si_sb;
21096 +       si_noflush_read_lock(sb);
21097 +
21098 +       seq = au_seq(buf, PAGE_SIZE);
21099 +       err = PTR_ERR(seq);
21100 +       if (IS_ERR(seq))
21101 +               goto out;
21102 +
21103 +       name = (void *)attr->name;
21104 +       cattr = sysaufs_si_attrs;
21105 +       while (*cattr) {
21106 +               if (!strcmp(name, (*cattr)->name)) {
21107 +                       err = container_of(*cattr, struct sysaufs_si_attr, attr)
21108 +                               ->show(seq, sb);
21109 +                       goto out_seq;
21110 +               }
21111 +               cattr++;
21112 +       }
21113 +
21114 +       bend = au_sbend(sb);
21115 +       if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
21116 +               name += sizeof(SysaufsBr_PREFIX) - 1;
21117 +               err = strict_strtol(name, 10, &l);
21118 +               if (!err) {
21119 +                       if (l <= bend)
21120 +                               err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
21121 +                       else
21122 +                               err = -ENOENT;
21123 +               }
21124 +               goto out_seq;
21125 +       }
21126 +       BUG();
21127 +
21128 + out_seq:
21129 +       if (!err) {
21130 +               err = seq->count;
21131 +               /* sysfs limit */
21132 +               if (unlikely(err == PAGE_SIZE))
21133 +                       err = -EFBIG;
21134 +       }
21135 +       kfree(seq);
21136 + out:
21137 +       si_read_unlock(sb);
21138 +       return err;
21139 +}
21140 +
21141 +/* ---------------------------------------------------------------------- */
21142 +
21143 +void sysaufs_br_init(struct au_branch *br)
21144 +{
21145 +       br->br_attr.name = br->br_name;
21146 +       br->br_attr.mode = S_IRUGO;
21147 +       br->br_attr.owner = THIS_MODULE;
21148 +}
21149 +
21150 +void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
21151 +{
21152 +       struct au_branch *br;
21153 +       struct kobject *kobj;
21154 +       aufs_bindex_t bend;
21155 +
21156 +       dbgaufs_brs_del(sb, bindex);
21157 +
21158 +       if (!sysaufs_brs)
21159 +               return;
21160 +
21161 +       kobj = &au_sbi(sb)->si_kobj;
21162 +       bend = au_sbend(sb);
21163 +       for (; bindex <= bend; bindex++) {
21164 +               br = au_sbr(sb, bindex);
21165 +               sysfs_remove_file(kobj, &br->br_attr);
21166 +       }
21167 +}
21168 +
21169 +void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
21170 +{
21171 +       int err;
21172 +       aufs_bindex_t bend;
21173 +       struct kobject *kobj;
21174 +       struct au_branch *br;
21175 +
21176 +       dbgaufs_brs_add(sb, bindex);
21177 +
21178 +       if (!sysaufs_brs)
21179 +               return;
21180 +
21181 +       kobj = &au_sbi(sb)->si_kobj;
21182 +       bend = au_sbend(sb);
21183 +       for (; bindex <= bend; bindex++) {
21184 +               br = au_sbr(sb, bindex);
21185 +               snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
21186 +                        "%d", bindex);
21187 +               err = sysfs_create_file(kobj, &br->br_attr);
21188 +               if (unlikely(err))
21189 +                       AuWarn("failed %s under sysfs(%d)\n", br->br_name, err);
21190 +       }
21191 +}
21192 diff --git a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c
21193 new file mode 100644
21194 index 0000000..979ac73
21195 --- /dev/null
21196 +++ b/fs/aufs/sysrq.c
21197 @@ -0,0 +1,118 @@
21198 +/*
21199 + * Copyright (C) 2005-2009 Junjiro R. Okajima
21200 + *
21201 + * This program, aufs is free software; you can redistribute it and/or modify
21202 + * it under the terms of the GNU General Public License as published by
21203 + * the Free Software Foundation; either version 2 of the License, or
21204 + * (at your option) any later version.
21205 + *
21206 + * This program is distributed in the hope that it will be useful,
21207 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21208 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21209 + * GNU General Public License for more details.
21210 + *
21211 + * You should have received a copy of the GNU General Public License
21212 + * along with this program; if not, write to the Free Software
21213 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21214 + */
21215 +
21216 +/*
21217 + * magic sysrq hanlder
21218 + */
21219 +
21220 +#include <linux/fs.h>
21221 +#include <linux/module.h>
21222 +#include <linux/moduleparam.h>
21223 +/* #include <linux/sysrq.h> */
21224 +#include "aufs.h"
21225 +
21226 +/* ---------------------------------------------------------------------- */
21227 +
21228 +static void sysrq_sb(struct super_block *sb)
21229 +{
21230 +       char *plevel;
21231 +       struct au_sbinfo *sbinfo;
21232 +       struct file *file;
21233 +
21234 +       plevel = au_plevel;
21235 +       au_plevel = KERN_WARNING;
21236 +       au_debug(1);
21237 +
21238 +       sbinfo = au_sbi(sb);
21239 +       pr_warning("si=%lx\n", sysaufs_si_id(sbinfo));
21240 +       pr_warning(AUFS_NAME ": superblock\n");
21241 +       au_dpri_sb(sb);
21242 +       pr_warning(AUFS_NAME ": root dentry\n");
21243 +       au_dpri_dentry(sb->s_root);
21244 +       pr_warning(AUFS_NAME ": root inode\n");
21245 +       au_dpri_inode(sb->s_root->d_inode);
21246 +#if 0
21247 +       struct inode *i;
21248 +       pr_warning(AUFS_NAME ": isolated inode\n");
21249 +       list_for_each_entry(i, &sb->s_inodes, i_sb_list)
21250 +               if (list_empty(&i->i_dentry))
21251 +                       au_dpri_inode(i);
21252 +#endif
21253 +       pr_warning(AUFS_NAME ": files\n");
21254 +       list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
21255 +               umode_t mode;
21256 +               mode = file->f_dentry->d_inode->i_mode;
21257 +               if (!special_file(mode) || au_special_file(mode))
21258 +                       au_dpri_file(file);
21259 +       }
21260 +
21261 +       au_plevel = plevel;
21262 +       au_debug(0);
21263 +}
21264 +
21265 +/* ---------------------------------------------------------------------- */
21266 +
21267 +/* module parameter */
21268 +static char *aufs_sysrq_key = "a";
21269 +module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
21270 +MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
21271 +
21272 +static void au_sysrq(int key __maybe_unused,
21273 +                    struct tty_struct *tty __maybe_unused)
21274 +{
21275 +       struct kobject *kobj;
21276 +       struct au_sbinfo *sbinfo;
21277 +
21278 +       /* spin_lock(&sysaufs_ket->list_lock); */
21279 +       list_for_each_entry(kobj, &sysaufs_ket->list, entry) {
21280 +               sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
21281 +               sysrq_sb(sbinfo->si_sb);
21282 +       }
21283 +       /* spin_unlock(&sysaufs_ket->list_lock); */
21284 +}
21285 +
21286 +static struct sysrq_key_op au_sysrq_op = {
21287 +       .handler        = au_sysrq,
21288 +       .help_msg       = "Aufs",
21289 +       .action_msg     = "Aufs",
21290 +       .enable_mask    = SYSRQ_ENABLE_DUMP
21291 +};
21292 +
21293 +/* ---------------------------------------------------------------------- */
21294 +
21295 +int __init au_sysrq_init(void)
21296 +{
21297 +       int err;
21298 +       char key;
21299 +
21300 +       err = -1;
21301 +       key = *aufs_sysrq_key;
21302 +       if ('a' <= key && key <= 'z')
21303 +               err = register_sysrq_key(key, &au_sysrq_op);
21304 +       if (unlikely(err))
21305 +               AuErr("err %d, sysrq=%c\n", err, key);
21306 +       return err;
21307 +}
21308 +
21309 +void au_sysrq_fin(void)
21310 +{
21311 +       int err;
21312 +       err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
21313 +       if (unlikely(err))
21314 +               AuErr("err %d (ignored)\n", err);
21315 +}
21316 diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c
21317 new file mode 100644
21318 index 0000000..7c68962
21319 --- /dev/null
21320 +++ b/fs/aufs/vdir.c
21321 @@ -0,0 +1,884 @@
21322 +/*
21323 + * Copyright (C) 2005-2009 Junjiro R. Okajima
21324 + *
21325 + * This program, aufs is free software; you can redistribute it and/or modify
21326 + * it under the terms of the GNU General Public License as published by
21327 + * the Free Software Foundation; either version 2 of the License, or
21328 + * (at your option) any later version.
21329 + *
21330 + * This program is distributed in the hope that it will be useful,
21331 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21332 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21333 + * GNU General Public License for more details.
21334 + *
21335 + * You should have received a copy of the GNU General Public License
21336 + * along with this program; if not, write to the Free Software
21337 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21338 + */
21339 +
21340 +/*
21341 + * virtual or vertical directory
21342 + */
21343 +
21344 +#include <linux/hash.h>
21345 +#include "aufs.h"
21346 +
21347 +static unsigned int calc_size(int nlen)
21348 +{
21349 +       return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
21350 +}
21351 +
21352 +static int set_deblk_end(union au_vdir_deblk_p *p,
21353 +                        union au_vdir_deblk_p *deblk_end)
21354 +{
21355 +       if (calc_size(0) <= deblk_end->deblk - p->deblk) {
21356 +               p->de->de_str.len = 0;
21357 +               /* smp_mb(); */
21358 +               return 0;
21359 +       }
21360 +       return -1; /* error */
21361 +}
21362 +
21363 +/* returns true or false */
21364 +static int is_deblk_end(union au_vdir_deblk_p *p,
21365 +                       union au_vdir_deblk_p *deblk_end)
21366 +{
21367 +       if (calc_size(0) <= deblk_end->deblk - p->deblk)
21368 +               return !p->de->de_str.len;
21369 +       return 1;
21370 +}
21371 +
21372 +static unsigned char *last_deblk(struct au_vdir *vdir)
21373 +{
21374 +       return vdir->vd_deblk[vdir->vd_nblk - 1];
21375 +}
21376 +
21377 +/* ---------------------------------------------------------------------- */
21378 +
21379 +/* estimate the apropriate size for name hash table */
21380 +unsigned int au_rdhash_est(loff_t sz)
21381 +{
21382 +       unsigned int n;
21383 +
21384 +       n = UINT_MAX;
21385 +       sz >>= 10;
21386 +       if (sz < n)
21387 +               n = sz;
21388 +       if (sz < AUFS_RDHASH_DEF)
21389 +               n = AUFS_RDHASH_DEF;
21390 +       /* AuInfo("n %u\n", n); */
21391 +       return n;
21392 +}
21393 +
21394 +/*
21395 + * the allocated memory has to be freed by
21396 + * au_nhash_wh_free() or au_nhash_de_free().
21397 + */
21398 +int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
21399 +{
21400 +       struct hlist_head *head;
21401 +       unsigned int u;
21402 +
21403 +       head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
21404 +       if (head) {
21405 +               nhash->nh_num = num_hash;
21406 +               nhash->nh_head = head;
21407 +               for (u = 0; u < num_hash; u++)
21408 +                       INIT_HLIST_HEAD(head++);
21409 +               return 0; /* success */
21410 +       }
21411 +
21412 +       return -ENOMEM;
21413 +}
21414 +
21415 +static void nhash_count(struct hlist_head *head)
21416 +{
21417 +#if 0
21418 +       unsigned long n;
21419 +       struct hlist_node *pos;
21420 +
21421 +       n = 0;
21422 +       hlist_for_each(pos, head)
21423 +               n++;
21424 +       AuInfo("%lu\n", n);
21425 +#endif
21426 +}
21427 +
21428 +static void au_nhash_wh_do_free(struct hlist_head *head)
21429 +{
21430 +       struct au_vdir_wh *tpos;
21431 +       struct hlist_node *pos, *node;
21432 +
21433 +       hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
21434 +               /* hlist_del(pos); */
21435 +               kfree(tpos);
21436 +       }
21437 +}
21438 +
21439 +static void au_nhash_de_do_free(struct hlist_head *head)
21440 +{
21441 +       struct au_vdir_dehstr *tpos;
21442 +       struct hlist_node *pos, *node;
21443 +
21444 +       hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
21445 +               /* hlist_del(pos); */
21446 +               au_cache_free_vdir_dehstr(tpos);
21447 +       }
21448 +}
21449 +
21450 +static void au_nhash_do_free(struct au_nhash *nhash,
21451 +                            void (*free)(struct hlist_head *head))
21452 +{
21453 +       unsigned int n;
21454 +       struct hlist_head *head;
21455 +
21456 +       n = nhash->nh_num;
21457 +       if (!n)
21458 +               return;
21459 +
21460 +       head = nhash->nh_head;
21461 +       while (n-- > 0) {
21462 +               nhash_count(head);
21463 +               free(head++);
21464 +       }
21465 +       kfree(nhash->nh_head);
21466 +}
21467 +
21468 +void au_nhash_wh_free(struct au_nhash *whlist)
21469 +{
21470 +       au_nhash_do_free(whlist, au_nhash_wh_do_free);
21471 +}
21472 +
21473 +static void au_nhash_de_free(struct au_nhash *delist)
21474 +{
21475 +       au_nhash_do_free(delist, au_nhash_de_do_free);
21476 +}
21477 +
21478 +/* ---------------------------------------------------------------------- */
21479 +
21480 +int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
21481 +                           int limit)
21482 +{
21483 +       int num;
21484 +       unsigned int u, n;
21485 +       struct hlist_head *head;
21486 +       struct au_vdir_wh *tpos;
21487 +       struct hlist_node *pos;
21488 +
21489 +       num = 0;
21490 +       n = whlist->nh_num;
21491 +       head = whlist->nh_head;
21492 +       for (u = 0; u < n; u++, head++)
21493 +               hlist_for_each_entry(tpos, pos, head, wh_hash)
21494 +                       if (tpos->wh_bindex == btgt && ++num > limit)
21495 +                               return 1;
21496 +       return 0;
21497 +}
21498 +
21499 +static struct hlist_head *au_name_hash(struct au_nhash *nhash,
21500 +                                      unsigned char *name,
21501 +                                      unsigned int len)
21502 +{
21503 +       unsigned int v;
21504 +       /* const unsigned int magic_bit = 12; */
21505 +
21506 +       AuDebugOn(!nhash->nh_num || !nhash->nh_head);
21507 +
21508 +       v = 0;
21509 +       while (len--)
21510 +               v += *name++;
21511 +       /* v = hash_long(v, magic_bit); */
21512 +       v %= nhash->nh_num;
21513 +       return nhash->nh_head + v;
21514 +}
21515 +
21516 +static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
21517 +                             int nlen)
21518 +{
21519 +       return str->len == nlen && !memcmp(str->name, name, nlen);
21520 +}
21521 +
21522 +/* returns found or not */
21523 +int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
21524 +{
21525 +       struct hlist_head *head;
21526 +       struct au_vdir_wh *tpos;
21527 +       struct hlist_node *pos;
21528 +       struct au_vdir_destr *str;
21529 +
21530 +       head = au_name_hash(whlist, name, nlen);
21531 +       hlist_for_each_entry(tpos, pos, head, wh_hash) {
21532 +               str = &tpos->wh_str;
21533 +               AuDbg("%.*s\n", str->len, str->name);
21534 +               if (au_nhash_test_name(str, name, nlen))
21535 +                       return 1;
21536 +       }
21537 +       return 0;
21538 +}
21539 +
21540 +/* returns found(true) or not */
21541 +static int test_known(struct au_nhash *delist, char *name, int nlen)
21542 +{
21543 +       struct hlist_head *head;
21544 +       struct au_vdir_dehstr *tpos;
21545 +       struct hlist_node *pos;
21546 +       struct au_vdir_destr *str;
21547 +
21548 +       head = au_name_hash(delist, name, nlen);
21549 +       hlist_for_each_entry(tpos, pos, head, hash) {
21550 +               str = tpos->str;
21551 +               AuDbg("%.*s\n", str->len, str->name);
21552 +               if (au_nhash_test_name(str, name, nlen))
21553 +                       return 1;
21554 +       }
21555 +       return 0;
21556 +}
21557 +
21558 +static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
21559 +                           unsigned char d_type)
21560 +{
21561 +#ifdef CONFIG_AUFS_SHWH
21562 +       wh->wh_ino = ino;
21563 +       wh->wh_type = d_type;
21564 +#endif
21565 +}
21566 +
21567 +/* ---------------------------------------------------------------------- */
21568 +
21569 +int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
21570 +                      unsigned int d_type, aufs_bindex_t bindex,
21571 +                      unsigned char shwh)
21572 +{
21573 +       int err;
21574 +       struct au_vdir_destr *str;
21575 +       struct au_vdir_wh *wh;
21576 +
21577 +       AuDbg("%.*s\n", nlen, name);
21578 +       AuDebugOn(!whlist->nh_num || !whlist->nh_head);
21579 +
21580 +       err = -ENOMEM;
21581 +       wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
21582 +       if (unlikely(!wh))
21583 +               goto out;
21584 +
21585 +       err = 0;
21586 +       wh->wh_bindex = bindex;
21587 +       if (shwh)
21588 +               au_shwh_init_wh(wh, ino, d_type);
21589 +       str = &wh->wh_str;
21590 +       str->len = nlen;
21591 +       memcpy(str->name, name, nlen);
21592 +       hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
21593 +       /* smp_mb(); */
21594 +
21595 + out:
21596 +       return err;
21597 +}
21598 +
21599 +static int append_deblk(struct au_vdir *vdir)
21600 +{
21601 +       int err;
21602 +       unsigned long ul;
21603 +       const unsigned int deblk_sz = vdir->vd_deblk_sz;
21604 +       union au_vdir_deblk_p p, deblk_end;
21605 +       unsigned char **o;
21606 +
21607 +       err = -ENOMEM;
21608 +       o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
21609 +                    GFP_NOFS);
21610 +       if (unlikely(!o))
21611 +               goto out;
21612 +
21613 +       vdir->vd_deblk = o;
21614 +       p.deblk = kmalloc(deblk_sz, GFP_NOFS);
21615 +       if (p.deblk) {
21616 +               ul = vdir->vd_nblk++;
21617 +               vdir->vd_deblk[ul] = p.deblk;
21618 +               vdir->vd_last.ul = ul;
21619 +               vdir->vd_last.p.deblk = p.deblk;
21620 +               deblk_end.deblk = p.deblk + deblk_sz;
21621 +               err = set_deblk_end(&p, &deblk_end);
21622 +       }
21623 +
21624 + out:
21625 +       return err;
21626 +}
21627 +
21628 +static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
21629 +                    unsigned int d_type, struct au_nhash *delist)
21630 +{
21631 +       int err;
21632 +       unsigned int sz;
21633 +       const unsigned int deblk_sz = vdir->vd_deblk_sz;
21634 +       union au_vdir_deblk_p p, *room, deblk_end;
21635 +       struct au_vdir_dehstr *dehstr;
21636 +
21637 +       p.deblk = last_deblk(vdir);
21638 +       deblk_end.deblk = p.deblk + deblk_sz;
21639 +       room = &vdir->vd_last.p;
21640 +       AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
21641 +                 || !is_deblk_end(room, &deblk_end));
21642 +
21643 +       sz = calc_size(nlen);
21644 +       if (unlikely(sz > deblk_end.deblk - room->deblk)) {
21645 +               err = append_deblk(vdir);
21646 +               if (unlikely(err))
21647 +                       goto out;
21648 +
21649 +               p.deblk = last_deblk(vdir);
21650 +               deblk_end.deblk = p.deblk + deblk_sz;
21651 +               /* smp_mb(); */
21652 +               AuDebugOn(room->deblk != p.deblk);
21653 +       }
21654 +
21655 +       err = -ENOMEM;
21656 +       dehstr = au_cache_alloc_vdir_dehstr();
21657 +       if (unlikely(!dehstr))
21658 +               goto out;
21659 +
21660 +       dehstr->str = &room->de->de_str;
21661 +       hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
21662 +       room->de->de_ino = ino;
21663 +       room->de->de_type = d_type;
21664 +       room->de->de_str.len = nlen;
21665 +       memcpy(room->de->de_str.name, name, nlen);
21666 +
21667 +       err = 0;
21668 +       room->deblk += sz;
21669 +       if (unlikely(set_deblk_end(room, &deblk_end)))
21670 +               err = append_deblk(vdir);
21671 +       /* smp_mb(); */
21672 +
21673 + out:
21674 +       return err;
21675 +}
21676 +
21677 +/* ---------------------------------------------------------------------- */
21678 +
21679 +void au_vdir_free(struct au_vdir *vdir)
21680 +{
21681 +       unsigned char **deblk;
21682 +
21683 +       deblk = vdir->vd_deblk;
21684 +       while (vdir->vd_nblk--)
21685 +               kfree(*deblk++);
21686 +       kfree(vdir->vd_deblk);
21687 +       au_cache_free_vdir(vdir);
21688 +}
21689 +
21690 +static struct au_vdir *alloc_vdir(struct file *file)
21691 +{
21692 +       struct au_vdir *vdir;
21693 +       struct super_block *sb;
21694 +       int err;
21695 +
21696 +       sb = file->f_dentry->d_sb;
21697 +       SiMustAnyLock(sb);
21698 +
21699 +       err = -ENOMEM;
21700 +       vdir = au_cache_alloc_vdir();
21701 +       if (unlikely(!vdir))
21702 +               goto out;
21703 +
21704 +       vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
21705 +       if (unlikely(!vdir->vd_deblk))
21706 +               goto out_free;
21707 +
21708 +       vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
21709 +       if (!vdir->vd_deblk_sz) {
21710 +               /* estimate the apropriate size for deblk */
21711 +               vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
21712 +               /* AuInfo("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
21713 +       }
21714 +       vdir->vd_nblk = 0;
21715 +       vdir->vd_version = 0;
21716 +       vdir->vd_jiffy = 0;
21717 +       err = append_deblk(vdir);
21718 +       if (!err)
21719 +               return vdir; /* success */
21720 +
21721 +       kfree(vdir->vd_deblk);
21722 +
21723 + out_free:
21724 +       au_cache_free_vdir(vdir);
21725 + out:
21726 +       vdir = ERR_PTR(err);
21727 +       return vdir;
21728 +}
21729 +
21730 +static int reinit_vdir(struct au_vdir *vdir)
21731 +{
21732 +       int err;
21733 +       union au_vdir_deblk_p p, deblk_end;
21734 +
21735 +       while (vdir->vd_nblk > 1) {
21736 +               kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
21737 +               /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
21738 +               vdir->vd_nblk--;
21739 +       }
21740 +       p.deblk = vdir->vd_deblk[0];
21741 +       deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
21742 +       err = set_deblk_end(&p, &deblk_end);
21743 +       /* keep vd_dblk_sz */
21744 +       vdir->vd_last.ul = 0;
21745 +       vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21746 +       vdir->vd_version = 0;
21747 +       vdir->vd_jiffy = 0;
21748 +       /* smp_mb(); */
21749 +       return err;
21750 +}
21751 +
21752 +/* ---------------------------------------------------------------------- */
21753 +
21754 +#define AuFillVdir_CALLED      1
21755 +#define AuFillVdir_WHABLE      (1 << 1)
21756 +#define AuFillVdir_SHWH                (1 << 2)
21757 +#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
21758 +#define au_fset_fillvdir(flags, name)  { (flags) |= AuFillVdir_##name; }
21759 +#define au_fclr_fillvdir(flags, name)  { (flags) &= ~AuFillVdir_##name; }
21760 +
21761 +#ifndef CONFIG_AUFS_SHWH
21762 +#undef AuFillVdir_SHWH
21763 +#define AuFillVdir_SHWH                0
21764 +#endif
21765 +
21766 +struct fillvdir_arg {
21767 +       struct file             *file;
21768 +       struct au_vdir          *vdir;
21769 +       struct au_nhash         delist;
21770 +       struct au_nhash         whlist;
21771 +       aufs_bindex_t           bindex;
21772 +       unsigned int            flags;
21773 +       int                     err;
21774 +};
21775 +
21776 +static int fillvdir(void *__arg, const char *__name, int nlen,
21777 +                   loff_t offset __maybe_unused, u64 h_ino,
21778 +                   unsigned int d_type)
21779 +{
21780 +       struct fillvdir_arg *arg = __arg;
21781 +       char *name = (void *)__name;
21782 +       struct super_block *sb;
21783 +       ino_t ino;
21784 +       const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
21785 +
21786 +       arg->err = 0;
21787 +       sb = arg->file->f_dentry->d_sb;
21788 +       au_fset_fillvdir(arg->flags, CALLED);
21789 +       /* smp_mb(); */
21790 +       if (nlen <= AUFS_WH_PFX_LEN
21791 +           || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
21792 +               if (test_known(&arg->delist, name, nlen)
21793 +                   || au_nhash_test_known_wh(&arg->whlist, name, nlen))
21794 +                       goto out; /* already exists or whiteouted */
21795 +
21796 +               sb = arg->file->f_dentry->d_sb;
21797 +               arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
21798 +               if (!arg->err) {
21799 +                       if (unlikely(nlen > AUFS_MAX_NAMELEN))
21800 +                               d_type = DT_UNKNOWN;
21801 +                       arg->err = append_de(arg->vdir, name, nlen, ino,
21802 +                                            d_type, &arg->delist);
21803 +               }
21804 +       } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
21805 +               name += AUFS_WH_PFX_LEN;
21806 +               nlen -= AUFS_WH_PFX_LEN;
21807 +               if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
21808 +                       goto out; /* already whiteouted */
21809 +
21810 +               if (shwh)
21811 +                       arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
21812 +                                            &ino);
21813 +               if (!arg->err) {
21814 +                       if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
21815 +                               d_type = DT_UNKNOWN;
21816 +                       arg->err = au_nhash_append_wh
21817 +                               (&arg->whlist, name, nlen, ino, d_type,
21818 +                                arg->bindex, shwh);
21819 +               }
21820 +       }
21821 +
21822 + out:
21823 +       if (!arg->err)
21824 +               arg->vdir->vd_jiffy = jiffies;
21825 +       /* smp_mb(); */
21826 +       AuTraceErr(arg->err);
21827 +       return arg->err;
21828 +}
21829 +
21830 +static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
21831 +                         struct au_nhash *whlist, struct au_nhash *delist)
21832 +{
21833 +#ifdef CONFIG_AUFS_SHWH
21834 +       int err;
21835 +       unsigned int nh, u;
21836 +       struct hlist_head *head;
21837 +       struct au_vdir_wh *tpos;
21838 +       struct hlist_node *pos, *n;
21839 +       char *p, *o;
21840 +       struct au_vdir_destr *destr;
21841 +
21842 +       AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
21843 +
21844 +       err = -ENOMEM;
21845 +       o = p = __getname();
21846 +       if (unlikely(!p))
21847 +               goto out;
21848 +
21849 +       err = 0;
21850 +       nh = whlist->nh_num;
21851 +       memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
21852 +       p += AUFS_WH_PFX_LEN;
21853 +       for (u = 0; u < nh; u++) {
21854 +               head = whlist->nh_head + u;
21855 +               hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
21856 +                       destr = &tpos->wh_str;
21857 +                       memcpy(p, destr->name, destr->len);
21858 +                       err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
21859 +                                       tpos->wh_ino, tpos->wh_type, delist);
21860 +                       if (unlikely(err))
21861 +                               break;
21862 +               }
21863 +       }
21864 +
21865 +       __putname(o);
21866 +
21867 + out:
21868 +       AuTraceErr(err);
21869 +       return err;
21870 +#else
21871 +       return 0;
21872 +#endif
21873 +}
21874 +
21875 +static int au_do_read_vdir(struct fillvdir_arg *arg)
21876 +{
21877 +       int err;
21878 +       unsigned int rdhash;
21879 +       loff_t offset;
21880 +       aufs_bindex_t bend, bindex, bstart;
21881 +       unsigned char shwh;
21882 +       struct file *hf, *file;
21883 +       struct super_block *sb;
21884 +
21885 +       file = arg->file;
21886 +       sb = file->f_dentry->d_sb;
21887 +       SiMustAnyLock(sb);
21888 +
21889 +       rdhash = au_sbi(sb)->si_rdhash;
21890 +       if (!rdhash)
21891 +               rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
21892 +       err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
21893 +       if (unlikely(err))
21894 +               goto out;
21895 +       err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
21896 +       if (unlikely(err))
21897 +               goto out_delist;
21898 +
21899 +       err = 0;
21900 +       arg->flags = 0;
21901 +       shwh = 0;
21902 +       if (au_opt_test(au_mntflags(sb), SHWH)) {
21903 +               shwh = 1;
21904 +               au_fset_fillvdir(arg->flags, SHWH);
21905 +       }
21906 +       bstart = au_fbstart(file);
21907 +       bend = au_fbend(file);
21908 +       for (bindex = bstart; !err && bindex <= bend; bindex++) {
21909 +               hf = au_h_fptr(file, bindex);
21910 +               if (!hf)
21911 +                       continue;
21912 +
21913 +               offset = vfsub_llseek(hf, 0, SEEK_SET);
21914 +               err = offset;
21915 +               if (unlikely(offset))
21916 +                       break;
21917 +
21918 +               arg->bindex = bindex;
21919 +               au_fclr_fillvdir(arg->flags, WHABLE);
21920 +               if (shwh
21921 +                   || (bindex != bend
21922 +                       && au_br_whable(au_sbr_perm(sb, bindex))))
21923 +                       au_fset_fillvdir(arg->flags, WHABLE);
21924 +               do {
21925 +                       arg->err = 0;
21926 +                       au_fclr_fillvdir(arg->flags, CALLED);
21927 +                       /* smp_mb(); */
21928 +                       err = vfsub_readdir(hf, fillvdir, arg);
21929 +                       if (err >= 0)
21930 +                               err = arg->err;
21931 +               } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
21932 +       }
21933 +
21934 +       if (!err && shwh)
21935 +               err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
21936 +
21937 +       au_nhash_wh_free(&arg->whlist);
21938 +
21939 + out_delist:
21940 +       au_nhash_de_free(&arg->delist);
21941 + out:
21942 +       return err;
21943 +}
21944 +
21945 +static int read_vdir(struct file *file, int may_read)
21946 +{
21947 +       int err;
21948 +       unsigned long expire;
21949 +       unsigned char do_read;
21950 +       struct fillvdir_arg arg;
21951 +       struct inode *inode;
21952 +       struct au_vdir *vdir, *allocated;
21953 +
21954 +       err = 0;
21955 +       inode = file->f_dentry->d_inode;
21956 +       IMustLock(inode);
21957 +       SiMustAnyLock(inode->i_sb);
21958 +
21959 +       allocated = NULL;
21960 +       do_read = 0;
21961 +       expire = au_sbi(inode->i_sb)->si_rdcache;
21962 +       vdir = au_ivdir(inode);
21963 +       if (!vdir) {
21964 +               do_read = 1;
21965 +               vdir = alloc_vdir(file);
21966 +               err = PTR_ERR(vdir);
21967 +               if (IS_ERR(vdir))
21968 +                       goto out;
21969 +               err = 0;
21970 +               allocated = vdir;
21971 +       } else if (may_read
21972 +                  && (inode->i_version != vdir->vd_version
21973 +                      || time_after(jiffies, vdir->vd_jiffy + expire))) {
21974 +               do_read = 1;
21975 +               err = reinit_vdir(vdir);
21976 +               if (unlikely(err))
21977 +                       goto out;
21978 +       }
21979 +
21980 +       if (!do_read)
21981 +               return 0; /* success */
21982 +
21983 +       arg.file = file;
21984 +       arg.vdir = vdir;
21985 +       err = au_do_read_vdir(&arg);
21986 +       if (!err) {
21987 +               /* file->f_pos = 0; */
21988 +               vdir->vd_version = inode->i_version;
21989 +               vdir->vd_last.ul = 0;
21990 +               vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21991 +               if (allocated)
21992 +                       au_set_ivdir(inode, allocated);
21993 +       } else if (allocated)
21994 +               au_vdir_free(allocated);
21995 +
21996 + out:
21997 +       return err;
21998 +}
21999 +
22000 +static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
22001 +{
22002 +       int err, rerr;
22003 +       unsigned long ul, n;
22004 +       const unsigned int deblk_sz = src->vd_deblk_sz;
22005 +
22006 +       AuDebugOn(tgt->vd_nblk != 1);
22007 +
22008 +       err = -ENOMEM;
22009 +       if (tgt->vd_nblk < src->vd_nblk) {
22010 +               unsigned char **p;
22011 +
22012 +               p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
22013 +                            GFP_NOFS);
22014 +               if (unlikely(!p))
22015 +                       goto out;
22016 +               tgt->vd_deblk = p;
22017 +       }
22018 +
22019 +       if (tgt->vd_deblk_sz != deblk_sz) {
22020 +               unsigned char *p;
22021 +
22022 +               tgt->vd_deblk_sz = deblk_sz;
22023 +               p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
22024 +               if (unlikely(!p))
22025 +                       goto out;
22026 +               tgt->vd_deblk[0] = p;
22027 +       }
22028 +       memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
22029 +       tgt->vd_version = src->vd_version;
22030 +       tgt->vd_jiffy = src->vd_jiffy;
22031 +
22032 +       n = src->vd_nblk;
22033 +       for (ul = 1; ul < n; ul++) {
22034 +               tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
22035 +                                           GFP_NOFS);
22036 +               if (unlikely(!tgt->vd_deblk[ul]))
22037 +                       goto out;
22038 +               tgt->vd_nblk++;
22039 +       }
22040 +       tgt->vd_nblk = n;
22041 +       tgt->vd_last.ul = tgt->vd_last.ul;
22042 +       tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
22043 +       tgt->vd_last.p.deblk += src->vd_last.p.deblk
22044 +               - src->vd_deblk[src->vd_last.ul];
22045 +       /* smp_mb(); */
22046 +       return 0; /* success */
22047 +
22048 + out:
22049 +       rerr = reinit_vdir(tgt);
22050 +       BUG_ON(rerr);
22051 +       return err;
22052 +}
22053 +
22054 +int au_vdir_init(struct file *file)
22055 +{
22056 +       int err;
22057 +       struct inode *inode;
22058 +       struct au_vdir *vdir_cache, *allocated;
22059 +
22060 +       err = read_vdir(file, !file->f_pos);
22061 +       if (unlikely(err))
22062 +               goto out;
22063 +
22064 +       allocated = NULL;
22065 +       vdir_cache = au_fvdir_cache(file);
22066 +       if (!vdir_cache) {
22067 +               vdir_cache = alloc_vdir(file);
22068 +               err = PTR_ERR(vdir_cache);
22069 +               if (IS_ERR(vdir_cache))
22070 +                       goto out;
22071 +               allocated = vdir_cache;
22072 +       } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
22073 +               err = reinit_vdir(vdir_cache);
22074 +               if (unlikely(err))
22075 +                       goto out;
22076 +       } else
22077 +               return 0; /* success */
22078 +
22079 +       inode = file->f_dentry->d_inode;
22080 +       err = copy_vdir(vdir_cache, au_ivdir(inode));
22081 +       if (!err) {
22082 +               file->f_version = inode->i_version;
22083 +               if (allocated)
22084 +                       au_set_fvdir_cache(file, allocated);
22085 +       } else if (allocated)
22086 +               au_vdir_free(allocated);
22087 +
22088 + out:
22089 +       return err;
22090 +}
22091 +
22092 +static loff_t calc_offset(struct au_vdir *vdir)
22093 +{
22094 +       loff_t offset;
22095 +       union au_vdir_deblk_p p;
22096 +
22097 +       p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
22098 +       offset = vdir->vd_last.p.deblk - p.deblk;
22099 +       offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
22100 +       return offset;
22101 +}
22102 +
22103 +/* returns true or false */
22104 +static int seek_vdir(struct file *file)
22105 +{
22106 +       int valid;
22107 +       unsigned int deblk_sz;
22108 +       unsigned long ul, n;
22109 +       loff_t offset;
22110 +       union au_vdir_deblk_p p, deblk_end;
22111 +       struct au_vdir *vdir_cache;
22112 +
22113 +       valid = 1;
22114 +       vdir_cache = au_fvdir_cache(file);
22115 +       offset = calc_offset(vdir_cache);
22116 +       AuDbg("offset %lld\n", offset);
22117 +       if (file->f_pos == offset)
22118 +               goto out;
22119 +
22120 +       vdir_cache->vd_last.ul = 0;
22121 +       vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
22122 +       if (!file->f_pos)
22123 +               goto out;
22124 +
22125 +       valid = 0;
22126 +       deblk_sz = vdir_cache->vd_deblk_sz;
22127 +       ul = div64_u64(file->f_pos, deblk_sz);
22128 +       AuDbg("ul %lu\n", ul);
22129 +       if (ul >= vdir_cache->vd_nblk)
22130 +               goto out;
22131 +
22132 +       n = vdir_cache->vd_nblk;
22133 +       for (; ul < n; ul++) {
22134 +               p.deblk = vdir_cache->vd_deblk[ul];
22135 +               deblk_end.deblk = p.deblk + deblk_sz;
22136 +               offset = ul;
22137 +               offset *= deblk_sz;
22138 +               while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
22139 +                       unsigned int l;
22140 +
22141 +                       l = calc_size(p.de->de_str.len);
22142 +                       offset += l;
22143 +                       p.deblk += l;
22144 +               }
22145 +               if (!is_deblk_end(&p, &deblk_end)) {
22146 +                       valid = 1;
22147 +                       vdir_cache->vd_last.ul = ul;
22148 +                       vdir_cache->vd_last.p = p;
22149 +                       break;
22150 +               }
22151 +       }
22152 +
22153 + out:
22154 +       /* smp_mb(); */
22155 +       AuTraceErr(!valid);
22156 +       return valid;
22157 +}
22158 +
22159 +int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
22160 +{
22161 +       int err;
22162 +       unsigned int l, deblk_sz;
22163 +       union au_vdir_deblk_p deblk_end;
22164 +       struct au_vdir *vdir_cache;
22165 +       struct au_vdir_de *de;
22166 +
22167 +       vdir_cache = au_fvdir_cache(file);
22168 +       if (!seek_vdir(file))
22169 +               return 0;
22170 +
22171 +       deblk_sz = vdir_cache->vd_deblk_sz;
22172 +       while (1) {
22173 +               deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
22174 +               deblk_end.deblk += deblk_sz;
22175 +               while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
22176 +                       de = vdir_cache->vd_last.p.de;
22177 +                       AuDbg("%.*s, off%lld, i%lu, dt%d\n",
22178 +                             de->de_str.len, de->de_str.name, file->f_pos,
22179 +                             (unsigned long)de->de_ino, de->de_type);
22180 +                       err = filldir(dirent, de->de_str.name, de->de_str.len,
22181 +                                     file->f_pos, de->de_ino, de->de_type);
22182 +                       if (unlikely(err)) {
22183 +                               AuTraceErr(err);
22184 +                               /* todo: ignore the error caused by udba? */
22185 +                               /* return err; */
22186 +                               return 0;
22187 +                       }
22188 +
22189 +                       l = calc_size(de->de_str.len);
22190 +                       vdir_cache->vd_last.p.deblk += l;
22191 +                       file->f_pos += l;
22192 +               }
22193 +               if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
22194 +                       vdir_cache->vd_last.ul++;
22195 +                       vdir_cache->vd_last.p.deblk
22196 +                               = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
22197 +                       file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
22198 +                       continue;
22199 +               }
22200 +               break;
22201 +       }
22202 +
22203 +       /* smp_mb(); */
22204 +       return 0;
22205 +}
22206 diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
22207 new file mode 100644
22208 index 0000000..51f2848
22209 --- /dev/null
22210 +++ b/fs/aufs/vfsub.c
22211 @@ -0,0 +1,660 @@
22212 +/*
22213 + * Copyright (C) 2005-2009 Junjiro R. Okajima
22214 + *
22215 + * This program, aufs is free software; you can redistribute it and/or modify
22216 + * it under the terms of the GNU General Public License as published by
22217 + * the Free Software Foundation; either version 2 of the License, or
22218 + * (at your option) any later version.
22219 + *
22220 + * This program is distributed in the hope that it will be useful,
22221 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22222 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22223 + * GNU General Public License for more details.
22224 + *
22225 + * You should have received a copy of the GNU General Public License
22226 + * along with this program; if not, write to the Free Software
22227 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22228 + */
22229 +
22230 +/*
22231 + * sub-routines for VFS
22232 + */
22233 +
22234 +#include <linux/namei.h>
22235 +#include <linux/splice.h>
22236 +#include <linux/uaccess.h>
22237 +#include "aufs.h"
22238 +
22239 +int vfsub_update_h_iattr(struct path *h_path, int *did)
22240 +{
22241 +       int err;
22242 +       struct kstat st;
22243 +       struct super_block *h_sb;
22244 +
22245 +       /* for remote fs, leave work for its getattr or d_revalidate */
22246 +       /* for bad i_attr fs, handle them in aufs_getattr() */
22247 +       /* still some fs may acquire i_mutex. we need to skip them */
22248 +       err = 0;
22249 +       if (!did)
22250 +               did = &err;
22251 +       h_sb = h_path->dentry->d_sb;
22252 +       *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
22253 +       if (*did)
22254 +               err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
22255 +
22256 +       return err;
22257 +}
22258 +
22259 +/* ---------------------------------------------------------------------- */
22260 +
22261 +struct file *vfsub_filp_open(const char *path, int oflags, int mode)
22262 +{
22263 +       struct file *file;
22264 +
22265 +       lockdep_off();
22266 +       file = filp_open(path, oflags, mode);
22267 +       lockdep_on();
22268 +       if (IS_ERR(file))
22269 +               goto out;
22270 +       vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22271 +
22272 + out:
22273 +       return file;
22274 +}
22275 +
22276 +int vfsub_path_lookup(const char *name, unsigned int flags,
22277 +                     struct nameidata *nd)
22278 +{
22279 +       int err;
22280 +
22281 +       /* lockdep_off(); */
22282 +       err = path_lookup(name, flags, nd);
22283 +       /* lockdep_on(); */
22284 +       if (!err && nd->path.dentry->d_inode)
22285 +               vfsub_update_h_iattr(&nd->path, /*did*/NULL); /*ignore*/
22286 +       return err;
22287 +}
22288 +
22289 +struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
22290 +                                   int len)
22291 +{
22292 +       struct path path = {
22293 +               .mnt = NULL
22294 +       };
22295 +
22296 +       IMustLock(parent->d_inode);
22297 +
22298 +       path.dentry = lookup_one_len(name, parent, len);
22299 +       if (IS_ERR(path.dentry))
22300 +               goto out;
22301 +       if (path.dentry->d_inode)
22302 +               vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
22303 +
22304 + out:
22305 +       AuTraceErrPtr(path.dentry);
22306 +       return path.dentry;
22307 +}
22308 +
22309 +struct dentry *vfsub_lookup_hash(struct nameidata *nd)
22310 +{
22311 +       struct path path = {
22312 +               .mnt = nd->path.mnt
22313 +       };
22314 +
22315 +       IMustLock(nd->path.dentry->d_inode);
22316 +
22317 +       path.dentry = lookup_hash(nd);
22318 +       if (!IS_ERR(path.dentry) && path.dentry->d_inode)
22319 +               vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
22320 +
22321 +       AuTraceErrPtr(path.dentry);
22322 +       return path.dentry;
22323 +}
22324 +
22325 +/* ---------------------------------------------------------------------- */
22326 +
22327 +struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
22328 +                                struct dentry *d2, struct au_hinode *hdir2)
22329 +{
22330 +       struct dentry *d;
22331 +
22332 +       lockdep_off();
22333 +       d = lock_rename(d1, d2);
22334 +       lockdep_on();
22335 +       au_hin_suspend(hdir1);
22336 +       if (hdir1 != hdir2)
22337 +               au_hin_suspend(hdir2);
22338 +
22339 +       return d;
22340 +}
22341 +
22342 +void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
22343 +                        struct dentry *d2, struct au_hinode *hdir2)
22344 +{
22345 +       au_hin_resume(hdir1);
22346 +       if (hdir1 != hdir2)
22347 +               au_hin_resume(hdir2);
22348 +       lockdep_off();
22349 +       unlock_rename(d1, d2);
22350 +       lockdep_on();
22351 +}
22352 +
22353 +/* ---------------------------------------------------------------------- */
22354 +
22355 +int vfsub_create(struct inode *dir, struct path *path, int mode)
22356 +{
22357 +       int err;
22358 +
22359 +       IMustLock(dir);
22360 +
22361 +       if (au_test_fs_null_nd(dir->i_sb))
22362 +               err = vfs_create(dir, path->dentry, mode, NULL);
22363 +       else {
22364 +               struct nameidata h_nd;
22365 +
22366 +               memset(&h_nd, 0, sizeof(h_nd));
22367 +               h_nd.flags = LOOKUP_CREATE;
22368 +               h_nd.intent.open.flags = O_CREAT | FMODE_READ;
22369 +               h_nd.intent.open.create_mode = mode;
22370 +               h_nd.path.dentry = path->dentry->d_parent;
22371 +               h_nd.path.mnt = path->mnt;
22372 +               path_get(&h_nd.path);
22373 +               err = vfs_create(dir, path->dentry, mode, &h_nd);
22374 +               path_put(&h_nd.path);
22375 +       }
22376 +
22377 +       if (!err) {
22378 +               struct path tmp = *path;
22379 +               int did;
22380 +
22381 +               vfsub_update_h_iattr(&tmp, &did);
22382 +               if (did) {
22383 +                       tmp.dentry = path->dentry->d_parent;
22384 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22385 +               }
22386 +               /*ignore*/
22387 +       }
22388 +
22389 +       return err;
22390 +}
22391 +
22392 +int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
22393 +{
22394 +       int err;
22395 +
22396 +       IMustLock(dir);
22397 +
22398 +       err = vfs_symlink(dir, path->dentry, symname);
22399 +       if (!err) {
22400 +               struct path tmp = *path;
22401 +               int did;
22402 +
22403 +               vfsub_update_h_iattr(&tmp, &did);
22404 +               if (did) {
22405 +                       tmp.dentry = path->dentry->d_parent;
22406 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22407 +               }
22408 +               /*ignore*/
22409 +       }
22410 +       return err;
22411 +}
22412 +
22413 +int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
22414 +{
22415 +       int err;
22416 +
22417 +       IMustLock(dir);
22418 +
22419 +       err = vfs_mknod(dir, path->dentry, mode, dev);
22420 +       if (!err) {
22421 +               struct path tmp = *path;
22422 +               int did;
22423 +
22424 +               vfsub_update_h_iattr(&tmp, &did);
22425 +               if (did) {
22426 +                       tmp.dentry = path->dentry->d_parent;
22427 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22428 +               }
22429 +               /*ignore*/
22430 +       }
22431 +       return err;
22432 +}
22433 +
22434 +static int au_test_nlink(struct inode *inode)
22435 +{
22436 +       const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
22437 +
22438 +       if (!au_test_fs_no_limit_nlink(inode->i_sb)
22439 +           || inode->i_nlink < link_max)
22440 +               return 0;
22441 +       return -EMLINK;
22442 +}
22443 +
22444 +int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
22445 +{
22446 +       int err;
22447 +
22448 +       IMustLock(dir);
22449 +
22450 +       err = au_test_nlink(src_dentry->d_inode);
22451 +       if (unlikely(err))
22452 +               return err;
22453 +
22454 +       lockdep_off();
22455 +       err = vfs_link(src_dentry, dir, path->dentry);
22456 +       lockdep_on();
22457 +       if (!err) {
22458 +               struct path tmp = *path;
22459 +               int did;
22460 +
22461 +               /* fuse has different memory inode for the same inumber */
22462 +               vfsub_update_h_iattr(&tmp, &did);
22463 +               if (did) {
22464 +                       tmp.dentry = path->dentry->d_parent;
22465 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22466 +                       tmp.dentry = src_dentry;
22467 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22468 +               }
22469 +               /*ignore*/
22470 +       }
22471 +       return err;
22472 +}
22473 +
22474 +int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
22475 +                struct inode *dir, struct path *path)
22476 +{
22477 +       int err;
22478 +       struct path tmp = {
22479 +               .dentry = path->dentry->d_parent,
22480 +               .mnt    = path->mnt
22481 +       };
22482 +
22483 +       IMustLock(dir);
22484 +       IMustLock(src_dir);
22485 +
22486 +       lockdep_off();
22487 +       err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
22488 +       lockdep_on();
22489 +       if (!err) {
22490 +               int did;
22491 +
22492 +               vfsub_update_h_iattr(&tmp, &did);
22493 +               if (did) {
22494 +                       tmp.dentry = src_dentry;
22495 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22496 +                       tmp.dentry = src_dentry->d_parent;
22497 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22498 +               }
22499 +               /*ignore*/
22500 +       }
22501 +       return err;
22502 +}
22503 +
22504 +int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
22505 +{
22506 +       int err;
22507 +
22508 +       IMustLock(dir);
22509 +
22510 +       err = vfs_mkdir(dir, path->dentry, mode);
22511 +       if (!err) {
22512 +               struct path tmp = *path;
22513 +               int did;
22514 +
22515 +               vfsub_update_h_iattr(&tmp, &did);
22516 +               if (did) {
22517 +                       tmp.dentry = path->dentry->d_parent;
22518 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22519 +               }
22520 +               /*ignore*/
22521 +       }
22522 +       return err;
22523 +}
22524 +
22525 +int vfsub_rmdir(struct inode *dir, struct path *path)
22526 +{
22527 +       int err;
22528 +
22529 +       IMustLock(dir);
22530 +
22531 +       lockdep_off();
22532 +       err = vfs_rmdir(dir, path->dentry);
22533 +       lockdep_on();
22534 +       if (!err) {
22535 +               struct path tmp = {
22536 +                       .dentry = path->dentry->d_parent,
22537 +                       .mnt    = path->mnt
22538 +               };
22539 +
22540 +               vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22541 +       }
22542 +
22543 +       return err;
22544 +}
22545 +
22546 +/* ---------------------------------------------------------------------- */
22547 +
22548 +ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22549 +                    loff_t *ppos)
22550 +{
22551 +       ssize_t err;
22552 +
22553 +       err = vfs_read(file, ubuf, count, ppos);
22554 +       if (err >= 0)
22555 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22556 +       return err;
22557 +}
22558 +
22559 +/* todo: kernel_read()? */
22560 +ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22561 +                    loff_t *ppos)
22562 +{
22563 +       ssize_t err;
22564 +       mm_segment_t oldfs;
22565 +
22566 +       oldfs = get_fs();
22567 +       set_fs(KERNEL_DS);
22568 +       err = vfsub_read_u(file, (char __user *)kbuf, count, ppos);
22569 +       set_fs(oldfs);
22570 +       return err;
22571 +}
22572 +
22573 +ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22574 +                     loff_t *ppos)
22575 +{
22576 +       ssize_t err;
22577 +
22578 +       lockdep_off();
22579 +       err = vfs_write(file, ubuf, count, ppos);
22580 +       lockdep_on();
22581 +       if (err >= 0)
22582 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22583 +       return err;
22584 +}
22585 +
22586 +ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
22587 +{
22588 +       ssize_t err;
22589 +       mm_segment_t oldfs;
22590 +
22591 +       oldfs = get_fs();
22592 +       set_fs(KERNEL_DS);
22593 +       err = vfsub_write_u(file, (const char __user *)kbuf, count, ppos);
22594 +       set_fs(oldfs);
22595 +       return err;
22596 +}
22597 +
22598 +int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
22599 +{
22600 +       int err;
22601 +
22602 +       lockdep_off();
22603 +       err = vfs_readdir(file, filldir, arg);
22604 +       lockdep_on();
22605 +       if (err >= 0)
22606 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22607 +       return err;
22608 +}
22609 +
22610 +long vfsub_splice_to(struct file *in, loff_t *ppos,
22611 +                    struct pipe_inode_info *pipe, size_t len,
22612 +                    unsigned int flags)
22613 +{
22614 +       long err;
22615 +
22616 +       lockdep_off();
22617 +       err = do_splice_to(in, ppos, pipe, len, flags);
22618 +       lockdep_on();
22619 +       if (err >= 0)
22620 +               vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
22621 +       return err;
22622 +}
22623 +
22624 +long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22625 +                      loff_t *ppos, size_t len, unsigned int flags)
22626 +{
22627 +       long err;
22628 +
22629 +       lockdep_off();
22630 +       err = do_splice_from(pipe, out, ppos, len, flags);
22631 +       lockdep_on();
22632 +       if (err >= 0)
22633 +               vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
22634 +       return err;
22635 +}
22636 +
22637 +/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
22638 +int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22639 +               struct file *h_file)
22640 +{
22641 +       int err;
22642 +       struct inode *h_inode;
22643 +
22644 +       h_inode = h_path->dentry->d_inode;
22645 +       if (!h_file) {
22646 +               err = mnt_want_write(h_path->mnt);
22647 +               if (err)
22648 +                       goto out;
22649 +               err = inode_permission(h_inode, MAY_WRITE);
22650 +               if (err)
22651 +                       goto out_mnt;
22652 +               err = get_write_access(h_inode);
22653 +               if (err)
22654 +                       goto out_mnt;
22655 +               err = break_lease(h_inode, FMODE_WRITE);
22656 +               if (err)
22657 +                       goto out_inode;
22658 +       }
22659 +
22660 +       err = locks_verify_truncate(h_inode, h_file, length);
22661 +       if (!err) {
22662 +               lockdep_off();
22663 +               err = do_truncate(h_path->dentry, length, attr, h_file);
22664 +               lockdep_on();
22665 +       }
22666 +
22667 + out_inode:
22668 +       if (!h_file)
22669 +               put_write_access(h_inode);
22670 + out_mnt:
22671 +       if (!h_file)
22672 +               mnt_drop_write(h_path->mnt);
22673 + out:
22674 +       return err;
22675 +}
22676 +
22677 +/* ---------------------------------------------------------------------- */
22678 +
22679 +struct au_vfsub_mkdir_args {
22680 +       int *errp;
22681 +       struct inode *dir;
22682 +       struct path *path;
22683 +       int mode;
22684 +};
22685 +
22686 +static void au_call_vfsub_mkdir(void *args)
22687 +{
22688 +       struct au_vfsub_mkdir_args *a = args;
22689 +       *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
22690 +}
22691 +
22692 +int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
22693 +{
22694 +       int err, do_sio, wkq_err;
22695 +
22696 +       do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22697 +       if (!do_sio)
22698 +               err = vfsub_mkdir(dir, path, mode);
22699 +       else {
22700 +               struct au_vfsub_mkdir_args args = {
22701 +                       .errp   = &err,
22702 +                       .dir    = dir,
22703 +                       .path   = path,
22704 +                       .mode   = mode
22705 +               };
22706 +               wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
22707 +               if (unlikely(wkq_err))
22708 +                       err = wkq_err;
22709 +       }
22710 +
22711 +       return err;
22712 +}
22713 +
22714 +struct au_vfsub_rmdir_args {
22715 +       int *errp;
22716 +       struct inode *dir;
22717 +       struct path *path;
22718 +};
22719 +
22720 +static void au_call_vfsub_rmdir(void *args)
22721 +{
22722 +       struct au_vfsub_rmdir_args *a = args;
22723 +       *a->errp = vfsub_rmdir(a->dir, a->path);
22724 +}
22725 +
22726 +int vfsub_sio_rmdir(struct inode *dir, struct path *path)
22727 +{
22728 +       int err, do_sio, wkq_err;
22729 +
22730 +       do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22731 +       if (!do_sio)
22732 +               err = vfsub_rmdir(dir, path);
22733 +       else {
22734 +               struct au_vfsub_rmdir_args args = {
22735 +                       .errp   = &err,
22736 +                       .dir    = dir,
22737 +                       .path   = path
22738 +               };
22739 +               wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
22740 +               if (unlikely(wkq_err))
22741 +                       err = wkq_err;
22742 +       }
22743 +
22744 +       return err;
22745 +}
22746 +
22747 +/* ---------------------------------------------------------------------- */
22748 +
22749 +struct notify_change_args {
22750 +       int *errp;
22751 +       struct path *path;
22752 +       struct iattr *ia;
22753 +};
22754 +
22755 +static void call_notify_change(void *args)
22756 +{
22757 +       struct notify_change_args *a = args;
22758 +       struct inode *h_inode;
22759 +
22760 +       h_inode = a->path->dentry->d_inode;
22761 +       IMustLock(h_inode);
22762 +
22763 +       *a->errp = -EPERM;
22764 +       if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
22765 +               lockdep_off();
22766 +               *a->errp = notify_change(a->path->dentry, a->ia);
22767 +               lockdep_on();
22768 +               if (!*a->errp)
22769 +                       vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
22770 +       }
22771 +       AuTraceErr(*a->errp);
22772 +}
22773 +
22774 +int vfsub_notify_change(struct path *path, struct iattr *ia)
22775 +{
22776 +       int err;
22777 +       struct notify_change_args args = {
22778 +               .errp   = &err,
22779 +               .path   = path,
22780 +               .ia     = ia
22781 +       };
22782 +
22783 +       call_notify_change(&args);
22784 +
22785 +       return err;
22786 +}
22787 +
22788 +int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
22789 +{
22790 +       int err, wkq_err;
22791 +       struct notify_change_args args = {
22792 +               .errp   = &err,
22793 +               .path   = path,
22794 +               .ia     = ia
22795 +       };
22796 +
22797 +       wkq_err = au_wkq_wait(call_notify_change, &args);
22798 +       if (unlikely(wkq_err))
22799 +               err = wkq_err;
22800 +
22801 +       return err;
22802 +}
22803 +
22804 +/* ---------------------------------------------------------------------- */
22805 +
22806 +struct unlink_args {
22807 +       int *errp;
22808 +       struct inode *dir;
22809 +       struct path *path;
22810 +};
22811 +
22812 +static void call_unlink(void *args)
22813 +{
22814 +       struct unlink_args *a = args;
22815 +       struct dentry *d = a->path->dentry;
22816 +       struct inode *h_inode;
22817 +       const int stop_sillyrename = (au_test_nfs(d->d_sb)
22818 +                                     && atomic_read(&d->d_count) == 1);
22819 +
22820 +       IMustLock(a->dir);
22821 +
22822 +       if (!stop_sillyrename)
22823 +               dget(d);
22824 +       h_inode = d->d_inode;
22825 +       if (h_inode)
22826 +               atomic_inc(&h_inode->i_count);
22827 +
22828 +       lockdep_off();
22829 +       *a->errp = vfs_unlink(a->dir, d);
22830 +       lockdep_on();
22831 +       if (!*a->errp) {
22832 +               struct path tmp = {
22833 +                       .dentry = d->d_parent,
22834 +                       .mnt    = a->path->mnt
22835 +               };
22836 +               vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22837 +       }
22838 +
22839 +       if (!stop_sillyrename)
22840 +               dput(d);
22841 +       if (h_inode)
22842 +               iput(h_inode);
22843 +
22844 +       AuTraceErr(*a->errp);
22845 +}
22846 +
22847 +/*
22848 + * @dir: must be locked.
22849 + * @dentry: target dentry.
22850 + */
22851 +int vfsub_unlink(struct inode *dir, struct path *path, int force)
22852 +{
22853 +       int err;
22854 +       struct unlink_args args = {
22855 +               .errp   = &err,
22856 +               .dir    = dir,
22857 +               .path   = path
22858 +       };
22859 +
22860 +       if (!force)
22861 +               call_unlink(&args);
22862 +       else {
22863 +               int wkq_err;
22864 +
22865 +               wkq_err = au_wkq_wait(call_unlink, &args);
22866 +               if (unlikely(wkq_err))
22867 +                       err = wkq_err;
22868 +       }
22869 +
22870 +       return err;
22871 +}
22872 diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h
22873 new file mode 100644
22874 index 0000000..a131d98
22875 --- /dev/null
22876 +++ b/fs/aufs/vfsub.h
22877 @@ -0,0 +1,145 @@
22878 +/*
22879 + * Copyright (C) 2005-2009 Junjiro R. Okajima
22880 + *
22881 + * This program, aufs is free software; you can redistribute it and/or modify
22882 + * it under the terms of the GNU General Public License as published by
22883 + * the Free Software Foundation; either version 2 of the License, or
22884 + * (at your option) any later version.
22885 + *
22886 + * This program is distributed in the hope that it will be useful,
22887 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22888 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22889 + * GNU General Public License for more details.
22890 + *
22891 + * You should have received a copy of the GNU General Public License
22892 + * along with this program; if not, write to the Free Software
22893 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22894 + */
22895 +
22896 +/*
22897 + * sub-routines for VFS
22898 + */
22899 +
22900 +#ifndef __AUFS_VFSUB_H__
22901 +#define __AUFS_VFSUB_H__
22902 +
22903 +#ifdef __KERNEL__
22904 +
22905 +#include <linux/fs.h>
22906 +#include <linux/fs_stack.h>
22907 +
22908 +/* ---------------------------------------------------------------------- */
22909 +
22910 +/* lock subclass for lower inode */
22911 +/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
22912 +/* reduce? gave up. */
22913 +enum {
22914 +       AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
22915 +       AuLsc_I_PARENT,         /* lower inode, parent first */
22916 +       AuLsc_I_PARENT2,        /* copyup dirs */
22917 +       AuLsc_I_PARENT3,        /* copyup wh */
22918 +       AuLsc_I_CHILD,
22919 +       AuLsc_I_CHILD2,
22920 +       AuLsc_I_End
22921 +};
22922 +
22923 +/* to debug easier, do not make them inlined functions */
22924 +#define MtxMustLock(mtx)       AuDebugOn(!mutex_is_locked(mtx))
22925 +#define IMustLock(i)           MtxMustLock(&(i)->i_mutex)
22926 +
22927 +/* ---------------------------------------------------------------------- */
22928 +
22929 +static inline void vfsub_copy_inode_size(struct inode *inode,
22930 +                                        struct inode *h_inode)
22931 +{
22932 +       spin_lock(&inode->i_lock);
22933 +       fsstack_copy_inode_size(inode, h_inode);
22934 +       spin_unlock(&inode->i_lock);
22935 +}
22936 +
22937 +int vfsub_update_h_iattr(struct path *h_path, int *did);
22938 +struct file *vfsub_filp_open(const char *path, int oflags, int mode);
22939 +int vfsub_path_lookup(const char *name, unsigned int flags,
22940 +                     struct nameidata *nd);
22941 +struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
22942 +                                   int len);
22943 +struct dentry *vfsub_lookup_hash(struct nameidata *nd);
22944 +
22945 +/* ---------------------------------------------------------------------- */
22946 +
22947 +struct au_hinode;
22948 +struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
22949 +                                struct dentry *d2, struct au_hinode *hdir2);
22950 +void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
22951 +                        struct dentry *d2, struct au_hinode *hdir2);
22952 +
22953 +int vfsub_create(struct inode *dir, struct path *path, int mode);
22954 +int vfsub_symlink(struct inode *dir, struct path *path,
22955 +                 const char *symname);
22956 +int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
22957 +int vfsub_link(struct dentry *src_dentry, struct inode *dir,
22958 +              struct path *path);
22959 +int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
22960 +                struct inode *hdir, struct path *path);
22961 +int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
22962 +int vfsub_rmdir(struct inode *dir, struct path *path);
22963 +
22964 +/* ---------------------------------------------------------------------- */
22965 +
22966 +ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22967 +                    loff_t *ppos);
22968 +ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22969 +                       loff_t *ppos);
22970 +ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22971 +                     loff_t *ppos);
22972 +ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
22973 +                     loff_t *ppos);
22974 +int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
22975 +
22976 +static inline void vfsub_file_accessed(struct file *h_file)
22977 +{
22978 +       file_accessed(h_file);
22979 +       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
22980 +}
22981 +
22982 +static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
22983 +                                    struct dentry *h_dentry)
22984 +{
22985 +       struct path h_path = {
22986 +               .dentry = h_dentry,
22987 +               .mnt    = h_mnt
22988 +       };
22989 +       touch_atime(h_mnt, h_dentry);
22990 +       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
22991 +}
22992 +
22993 +long vfsub_splice_to(struct file *in, loff_t *ppos,
22994 +                    struct pipe_inode_info *pipe, size_t len,
22995 +                    unsigned int flags);
22996 +long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22997 +                      loff_t *ppos, size_t len, unsigned int flags);
22998 +int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22999 +               struct file *h_file);
23000 +
23001 +/* ---------------------------------------------------------------------- */
23002 +
23003 +static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
23004 +{
23005 +       loff_t err;
23006 +
23007 +       lockdep_off();
23008 +       err = vfs_llseek(file, offset, origin);
23009 +       lockdep_on();
23010 +       return err;
23011 +}
23012 +
23013 +/* ---------------------------------------------------------------------- */
23014 +
23015 +int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
23016 +int vfsub_sio_rmdir(struct inode *dir, struct path *path);
23017 +int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
23018 +int vfsub_notify_change(struct path *path, struct iattr *ia);
23019 +int vfsub_unlink(struct inode *dir, struct path *path, int force);
23020 +
23021 +#endif /* __KERNEL__ */
23022 +#endif /* __AUFS_VFSUB_H__ */
23023 diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c
23024 new file mode 100644
23025 index 0000000..05a8c1e
23026 --- /dev/null
23027 +++ b/fs/aufs/wbr_policy.c
23028 @@ -0,0 +1,641 @@
23029 +/*
23030 + * Copyright (C) 2005-2009 Junjiro R. Okajima
23031 + *
23032 + * This program, aufs is free software; you can redistribute it and/or modify
23033 + * it under the terms of the GNU General Public License as published by
23034 + * the Free Software Foundation; either version 2 of the License, or
23035 + * (at your option) any later version.
23036 + *
23037 + * This program is distributed in the hope that it will be useful,
23038 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23039 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23040 + * GNU General Public License for more details.
23041 + *
23042 + * You should have received a copy of the GNU General Public License
23043 + * along with this program; if not, write to the Free Software
23044 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23045 + */
23046 +
23047 +/*
23048 + * policies for selecting one among multiple writable branches
23049 + */
23050 +
23051 +#include <linux/statfs.h>
23052 +#include "aufs.h"
23053 +
23054 +/* subset of cpup_attr() */
23055 +static noinline_for_stack
23056 +int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
23057 +{
23058 +       int err, sbits;
23059 +       struct iattr ia;
23060 +       struct inode *h_isrc;
23061 +
23062 +       h_isrc = h_src->d_inode;
23063 +       ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
23064 +       ia.ia_mode = h_isrc->i_mode;
23065 +       ia.ia_uid = h_isrc->i_uid;
23066 +       ia.ia_gid = h_isrc->i_gid;
23067 +       sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
23068 +       au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
23069 +       err = vfsub_sio_notify_change(h_path, &ia);
23070 +
23071 +       /* is this nfs only? */
23072 +       if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
23073 +               ia.ia_valid = ATTR_FORCE | ATTR_MODE;
23074 +               ia.ia_mode = h_isrc->i_mode;
23075 +               err = vfsub_sio_notify_change(h_path, &ia);
23076 +       }
23077 +
23078 +       return err;
23079 +}
23080 +
23081 +#define AuCpdown_PARENT_OPQ    1
23082 +#define AuCpdown_WHED          (1 << 1)
23083 +#define AuCpdown_MADE_DIR      (1 << 2)
23084 +#define AuCpdown_DIROPQ                (1 << 3)
23085 +#define au_ftest_cpdown(flags, name)   ((flags) & AuCpdown_##name)
23086 +#define au_fset_cpdown(flags, name)    { (flags) |= AuCpdown_##name; }
23087 +#define au_fclr_cpdown(flags, name)    { (flags) &= ~AuCpdown_##name; }
23088 +
23089 +struct au_cpdown_dir_args {
23090 +       struct dentry *parent;
23091 +       unsigned int flags;
23092 +};
23093 +
23094 +static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
23095 +                            struct au_cpdown_dir_args *a)
23096 +{
23097 +       int err;
23098 +       struct dentry *opq_dentry;
23099 +
23100 +       opq_dentry = au_diropq_create(dentry, bdst);
23101 +       err = PTR_ERR(opq_dentry);
23102 +       if (IS_ERR(opq_dentry))
23103 +               goto out;
23104 +       dput(opq_dentry);
23105 +       au_fset_cpdown(a->flags, DIROPQ);
23106 +
23107 + out:
23108 +       return err;
23109 +}
23110 +
23111 +static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
23112 +                           struct inode *dir, aufs_bindex_t bdst)
23113 +{
23114 +       int err;
23115 +       struct path h_path;
23116 +       struct au_branch *br;
23117 +
23118 +       br = au_sbr(dentry->d_sb, bdst);
23119 +       h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
23120 +       err = PTR_ERR(h_path.dentry);
23121 +       if (IS_ERR(h_path.dentry))
23122 +               goto out;
23123 +
23124 +       err = 0;
23125 +       if (h_path.dentry->d_inode) {
23126 +               h_path.mnt = br->br_mnt;
23127 +               err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
23128 +                                         dentry);
23129 +       }
23130 +       dput(h_path.dentry);
23131 +
23132 + out:
23133 +       return err;
23134 +}
23135 +
23136 +static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
23137 +                        struct dentry *h_parent, void *arg)
23138 +{
23139 +       int err, rerr;
23140 +       aufs_bindex_t bend, bopq, bstart;
23141 +       unsigned char parent_opq;
23142 +       struct path h_path;
23143 +       struct dentry *parent;
23144 +       struct inode *h_dir, *h_inode, *inode, *dir;
23145 +       struct au_cpdown_dir_args *args = arg;
23146 +
23147 +       bstart = au_dbstart(dentry);
23148 +       /* dentry is di-locked */
23149 +       parent = dget_parent(dentry);
23150 +       dir = parent->d_inode;
23151 +       h_dir = h_parent->d_inode;
23152 +       AuDebugOn(h_dir != au_h_iptr(dir, bdst));
23153 +       IMustLock(h_dir);
23154 +
23155 +       err = au_lkup_neg(dentry, bdst);
23156 +       if (unlikely(err < 0))
23157 +               goto out;
23158 +       h_path.dentry = au_h_dptr(dentry, bdst);
23159 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
23160 +       err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
23161 +                             S_IRWXU | S_IRUGO | S_IXUGO);
23162 +       if (unlikely(err))
23163 +               goto out_put;
23164 +       au_fset_cpdown(args->flags, MADE_DIR);
23165 +
23166 +       bend = au_dbend(dentry);
23167 +       bopq = au_dbdiropq(dentry);
23168 +       au_fclr_cpdown(args->flags, WHED);
23169 +       au_fclr_cpdown(args->flags, DIROPQ);
23170 +       if (au_dbwh(dentry) == bdst)
23171 +               au_fset_cpdown(args->flags, WHED);
23172 +       if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
23173 +               au_fset_cpdown(args->flags, PARENT_OPQ);
23174 +       parent_opq = (au_ftest_cpdown(args->flags, PARENT_OPQ)
23175 +                     && args->parent == dentry);
23176 +       h_inode = h_path.dentry->d_inode;
23177 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
23178 +       if (au_ftest_cpdown(args->flags, WHED)) {
23179 +               err = au_cpdown_dir_opq(dentry, bdst, args);
23180 +               if (unlikely(err)) {
23181 +                       mutex_unlock(&h_inode->i_mutex);
23182 +                       goto out_dir;
23183 +               }
23184 +       }
23185 +
23186 +       err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
23187 +       mutex_unlock(&h_inode->i_mutex);
23188 +       if (unlikely(err))
23189 +               goto out_opq;
23190 +
23191 +       if (au_ftest_cpdown(args->flags, WHED)) {
23192 +               err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
23193 +               if (unlikely(err))
23194 +                       goto out_opq;
23195 +       }
23196 +
23197 +       inode = dentry->d_inode;
23198 +       if (au_ibend(inode) < bdst)
23199 +               au_set_ibend(inode, bdst);
23200 +       au_set_h_iptr(inode, bdst, au_igrab(h_inode),
23201 +                     au_hi_flags(inode, /*isdir*/1));
23202 +       goto out; /* success */
23203 +
23204 +       /* revert */
23205 + out_opq:
23206 +       if (au_ftest_cpdown(args->flags, DIROPQ)) {
23207 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
23208 +               rerr = au_diropq_remove(dentry, bdst);
23209 +               mutex_unlock(&h_inode->i_mutex);
23210 +               if (unlikely(rerr)) {
23211 +                       AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
23212 +                               AuDLNPair(dentry), bdst, rerr);
23213 +                       err = -EIO;
23214 +                       goto out;
23215 +               }
23216 +       }
23217 + out_dir:
23218 +       if (au_ftest_cpdown(args->flags, MADE_DIR)) {
23219 +               rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
23220 +               if (unlikely(rerr)) {
23221 +                       AuIOErr("failed removing %.*s b%d (%d)\n",
23222 +                               AuDLNPair(dentry), bdst, rerr);
23223 +                       err = -EIO;
23224 +               }
23225 +       }
23226 + out_put:
23227 +       au_set_h_dptr(dentry, bdst, NULL);
23228 +       if (au_dbend(dentry) == bdst)
23229 +               au_update_dbend(dentry);
23230 + out:
23231 +       dput(parent);
23232 +       return err;
23233 +}
23234 +
23235 +int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
23236 +{
23237 +       int err;
23238 +       struct au_cpdown_dir_args args = {
23239 +               .parent = dget_parent(dentry),
23240 +               .flags  = 0
23241 +       };
23242 +
23243 +       err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
23244 +       dput(args.parent);
23245 +
23246 +       return err;
23247 +}
23248 +
23249 +/* ---------------------------------------------------------------------- */
23250 +
23251 +/* policies for create */
23252 +
23253 +static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
23254 +{
23255 +       for (; bindex >= 0; bindex--)
23256 +               if (!au_br_rdonly(au_sbr(sb, bindex)))
23257 +                       return bindex;
23258 +       return -EROFS;
23259 +}
23260 +
23261 +/* top down parent */
23262 +static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
23263 +{
23264 +       int err;
23265 +       aufs_bindex_t bstart, bindex;
23266 +       struct super_block *sb;
23267 +       struct dentry *parent, *h_parent;
23268 +
23269 +       sb = dentry->d_sb;
23270 +       bstart = au_dbstart(dentry);
23271 +       err = bstart;
23272 +       if (!au_br_rdonly(au_sbr(sb, bstart)))
23273 +               goto out;
23274 +
23275 +       err = -EROFS;
23276 +       parent = dget_parent(dentry);
23277 +       for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
23278 +               h_parent = au_h_dptr(parent, bindex);
23279 +               if (!h_parent || !h_parent->d_inode)
23280 +                       continue;
23281 +
23282 +               if (!au_br_rdonly(au_sbr(sb, bindex))) {
23283 +                       err = bindex;
23284 +                       break;
23285 +               }
23286 +       }
23287 +       dput(parent);
23288 +
23289 +       /* bottom up here */
23290 +       if (unlikely(err < 0))
23291 +               err = au_wbr_bu(sb, bstart - 1);
23292 +
23293 + out:
23294 +       AuDbg("b%d\n", err);
23295 +       return err;
23296 +}
23297 +
23298 +/* ---------------------------------------------------------------------- */
23299 +
23300 +/* an exception for the policy other than tdp */
23301 +static int au_wbr_create_exp(struct dentry *dentry)
23302 +{
23303 +       int err;
23304 +       aufs_bindex_t bwh, bdiropq;
23305 +       struct dentry *parent;
23306 +
23307 +       err = -1;
23308 +       bwh = au_dbwh(dentry);
23309 +       parent = dget_parent(dentry);
23310 +       bdiropq = au_dbdiropq(parent);
23311 +       if (bwh >= 0) {
23312 +               if (bdiropq >= 0)
23313 +                       err = min(bdiropq, bwh);
23314 +               else
23315 +                       err = bwh;
23316 +               AuDbg("%d\n", err);
23317 +       } else if (bdiropq >= 0) {
23318 +               err = bdiropq;
23319 +               AuDbg("%d\n", err);
23320 +       }
23321 +       dput(parent);
23322 +
23323 +       if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
23324 +               err = -1;
23325 +
23326 +       AuDbg("%d\n", err);
23327 +       return err;
23328 +}
23329 +
23330 +/* ---------------------------------------------------------------------- */
23331 +
23332 +/* round robin */
23333 +static int au_wbr_create_init_rr(struct super_block *sb)
23334 +{
23335 +       int err;
23336 +
23337 +       err = au_wbr_bu(sb, au_sbend(sb));
23338 +       atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
23339 +       /* smp_mb(); */
23340 +
23341 +       AuDbg("b%d\n", err);
23342 +       return err;
23343 +}
23344 +
23345 +static int au_wbr_create_rr(struct dentry *dentry, int isdir)
23346 +{
23347 +       int err, nbr;
23348 +       unsigned int u;
23349 +       aufs_bindex_t bindex, bend;
23350 +       struct super_block *sb;
23351 +       atomic_t *next;
23352 +
23353 +       err = au_wbr_create_exp(dentry);
23354 +       if (err >= 0)
23355 +               goto out;
23356 +
23357 +       sb = dentry->d_sb;
23358 +       next = &au_sbi(sb)->si_wbr_rr_next;
23359 +       bend = au_sbend(sb);
23360 +       nbr = bend + 1;
23361 +       for (bindex = 0; bindex <= bend; bindex++) {
23362 +               if (!isdir) {
23363 +                       err = atomic_dec_return(next) + 1;
23364 +                       /* modulo for 0 is meaningless */
23365 +                       if (unlikely(!err))
23366 +                               err = atomic_dec_return(next) + 1;
23367 +               } else
23368 +                       err = atomic_read(next);
23369 +               AuDbg("%d\n", err);
23370 +               u = err;
23371 +               err = u % nbr;
23372 +               AuDbg("%d\n", err);
23373 +               if (!au_br_rdonly(au_sbr(sb, err)))
23374 +                       break;
23375 +               err = -EROFS;
23376 +       }
23377 +
23378 + out:
23379 +       AuDbg("%d\n", err);
23380 +       return err;
23381 +}
23382 +
23383 +/* ---------------------------------------------------------------------- */
23384 +
23385 +/* most free space */
23386 +static void au_mfs(struct dentry *dentry)
23387 +{
23388 +       struct super_block *sb;
23389 +       struct au_branch *br;
23390 +       struct au_wbr_mfs *mfs;
23391 +       aufs_bindex_t bindex, bend;
23392 +       int err;
23393 +       unsigned long long b, bavail;
23394 +       /* reduce the stack usage */
23395 +       struct kstatfs *st;
23396 +
23397 +       st = kmalloc(sizeof(*st), GFP_NOFS);
23398 +       if (unlikely(!st)) {
23399 +               AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
23400 +               return;
23401 +       }
23402 +
23403 +       bavail = 0;
23404 +       sb = dentry->d_sb;
23405 +       mfs = &au_sbi(sb)->si_wbr_mfs;
23406 +       MtxMustLock(&mfs->mfs_lock);
23407 +       mfs->mfs_bindex = -EROFS;
23408 +       mfs->mfsrr_bytes = 0;
23409 +       bend = au_sbend(sb);
23410 +       for (bindex = 0; bindex <= bend; bindex++) {
23411 +               br = au_sbr(sb, bindex);
23412 +               if (au_br_rdonly(br))
23413 +                       continue;
23414 +
23415 +               /* sb->s_root for NFS is unreliable */
23416 +               err = vfs_statfs(br->br_mnt->mnt_root, st);
23417 +               if (unlikely(err)) {
23418 +                       AuWarn1("failed statfs, b%d, %d\n", bindex, err);
23419 +                       continue;
23420 +               }
23421 +
23422 +               /* when the available size is equal, select the lower one */
23423 +               BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
23424 +                            || sizeof(b) < sizeof(st->f_bsize));
23425 +               b = st->f_bavail * st->f_bsize;
23426 +               br->br_wbr->wbr_bytes = b;
23427 +               if (b >= bavail) {
23428 +                       bavail = b;
23429 +                       mfs->mfs_bindex = bindex;
23430 +                       mfs->mfs_jiffy = jiffies;
23431 +               }
23432 +       }
23433 +
23434 +       mfs->mfsrr_bytes = bavail;
23435 +       AuDbg("b%d\n", mfs->mfs_bindex);
23436 +       kfree(st);
23437 +}
23438 +
23439 +static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
23440 +{
23441 +       int err;
23442 +       struct super_block *sb;
23443 +       struct au_wbr_mfs *mfs;
23444 +
23445 +       err = au_wbr_create_exp(dentry);
23446 +       if (err >= 0)
23447 +               goto out;
23448 +
23449 +       sb = dentry->d_sb;
23450 +       mfs = &au_sbi(sb)->si_wbr_mfs;
23451 +       mutex_lock(&mfs->mfs_lock);
23452 +       if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
23453 +           || mfs->mfs_bindex < 0
23454 +           || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
23455 +               au_mfs(dentry);
23456 +       mutex_unlock(&mfs->mfs_lock);
23457 +       err = mfs->mfs_bindex;
23458 +
23459 + out:
23460 +       AuDbg("b%d\n", err);
23461 +       return err;
23462 +}
23463 +
23464 +static int au_wbr_create_init_mfs(struct super_block *sb)
23465 +{
23466 +       struct au_wbr_mfs *mfs;
23467 +
23468 +       mfs = &au_sbi(sb)->si_wbr_mfs;
23469 +       mutex_init(&mfs->mfs_lock);
23470 +       mfs->mfs_jiffy = 0;
23471 +       mfs->mfs_bindex = -EROFS;
23472 +
23473 +       return 0;
23474 +}
23475 +
23476 +static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
23477 +{
23478 +       mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
23479 +       return 0;
23480 +}
23481 +
23482 +/* ---------------------------------------------------------------------- */
23483 +
23484 +/* most free space and then round robin */
23485 +static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
23486 +{
23487 +       int err;
23488 +       struct au_wbr_mfs *mfs;
23489 +
23490 +       err = au_wbr_create_mfs(dentry, isdir);
23491 +       if (err >= 0) {
23492 +               mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
23493 +               mutex_lock(&mfs->mfs_lock);
23494 +               if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
23495 +                       err = au_wbr_create_rr(dentry, isdir);
23496 +               mutex_unlock(&mfs->mfs_lock);
23497 +       }
23498 +
23499 +       AuDbg("b%d\n", err);
23500 +       return err;
23501 +}
23502 +
23503 +static int au_wbr_create_init_mfsrr(struct super_block *sb)
23504 +{
23505 +       int err;
23506 +
23507 +       au_wbr_create_init_mfs(sb); /* ignore */
23508 +       err = au_wbr_create_init_rr(sb);
23509 +
23510 +       return err;
23511 +}
23512 +
23513 +/* ---------------------------------------------------------------------- */
23514 +
23515 +/* top down parent and most free space */
23516 +static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
23517 +{
23518 +       int err, e2;
23519 +       unsigned long long b;
23520 +       aufs_bindex_t bindex, bstart, bend;
23521 +       struct super_block *sb;
23522 +       struct dentry *parent, *h_parent;
23523 +       struct au_branch *br;
23524 +
23525 +       err = au_wbr_create_tdp(dentry, isdir);
23526 +       if (unlikely(err < 0))
23527 +               goto out;
23528 +       parent = dget_parent(dentry);
23529 +       bstart = au_dbstart(parent);
23530 +       bend = au_dbtaildir(parent);
23531 +       if (bstart == bend)
23532 +               goto out_parent; /* success */
23533 +
23534 +       e2 = au_wbr_create_mfs(dentry, isdir);
23535 +       if (e2 < 0)
23536 +               goto out_parent; /* success */
23537 +
23538 +       /* when the available size is equal, select upper one */
23539 +       sb = dentry->d_sb;
23540 +       br = au_sbr(sb, err);
23541 +       b = br->br_wbr->wbr_bytes;
23542 +       AuDbg("b%d, %llu\n", err, b);
23543 +
23544 +       for (bindex = bstart; bindex <= bend; bindex++) {
23545 +               h_parent = au_h_dptr(parent, bindex);
23546 +               if (!h_parent || !h_parent->d_inode)
23547 +                       continue;
23548 +
23549 +               br = au_sbr(sb, bindex);
23550 +               if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
23551 +                       b = br->br_wbr->wbr_bytes;
23552 +                       err = bindex;
23553 +                       AuDbg("b%d, %llu\n", err, b);
23554 +               }
23555 +       }
23556 +
23557 + out_parent:
23558 +       dput(parent);
23559 + out:
23560 +       AuDbg("b%d\n", err);
23561 +       return err;
23562 +}
23563 +
23564 +/* ---------------------------------------------------------------------- */
23565 +
23566 +/* policies for copyup */
23567 +
23568 +/* top down parent */
23569 +static int au_wbr_copyup_tdp(struct dentry *dentry)
23570 +{
23571 +       return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
23572 +}
23573 +
23574 +/* bottom up parent */
23575 +static int au_wbr_copyup_bup(struct dentry *dentry)
23576 +{
23577 +       int err;
23578 +       aufs_bindex_t bindex, bstart;
23579 +       struct dentry *parent, *h_parent;
23580 +       struct super_block *sb;
23581 +
23582 +       err = -EROFS;
23583 +       sb = dentry->d_sb;
23584 +       parent = dget_parent(dentry);
23585 +       bstart = au_dbstart(parent);
23586 +       for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
23587 +               h_parent = au_h_dptr(parent, bindex);
23588 +               if (!h_parent || !h_parent->d_inode)
23589 +                       continue;
23590 +
23591 +               if (!au_br_rdonly(au_sbr(sb, bindex))) {
23592 +                       err = bindex;
23593 +                       break;
23594 +               }
23595 +       }
23596 +       dput(parent);
23597 +
23598 +       /* bottom up here */
23599 +       if (unlikely(err < 0))
23600 +               err = au_wbr_bu(sb, bstart - 1);
23601 +
23602 +       AuDbg("b%d\n", err);
23603 +       return err;
23604 +}
23605 +
23606 +/* bottom up */
23607 +static int au_wbr_copyup_bu(struct dentry *dentry)
23608 +{
23609 +       int err;
23610 +
23611 +       err = au_wbr_bu(dentry->d_sb, au_dbstart(dentry));
23612 +
23613 +       AuDbg("b%d\n", err);
23614 +       return err;
23615 +}
23616 +
23617 +/* ---------------------------------------------------------------------- */
23618 +
23619 +struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
23620 +       [AuWbrCopyup_TDP] = {
23621 +               .copyup = au_wbr_copyup_tdp
23622 +       },
23623 +       [AuWbrCopyup_BUP] = {
23624 +               .copyup = au_wbr_copyup_bup
23625 +       },
23626 +       [AuWbrCopyup_BU] = {
23627 +               .copyup = au_wbr_copyup_bu
23628 +       }
23629 +};
23630 +
23631 +struct au_wbr_create_operations au_wbr_create_ops[] = {
23632 +       [AuWbrCreate_TDP] = {
23633 +               .create = au_wbr_create_tdp
23634 +       },
23635 +       [AuWbrCreate_RR] = {
23636 +               .create = au_wbr_create_rr,
23637 +               .init   = au_wbr_create_init_rr
23638 +       },
23639 +       [AuWbrCreate_MFS] = {
23640 +               .create = au_wbr_create_mfs,
23641 +               .init   = au_wbr_create_init_mfs,
23642 +               .fin    = au_wbr_create_fin_mfs
23643 +       },
23644 +       [AuWbrCreate_MFSV] = {
23645 +               .create = au_wbr_create_mfs,
23646 +               .init   = au_wbr_create_init_mfs,
23647 +               .fin    = au_wbr_create_fin_mfs
23648 +       },
23649 +       [AuWbrCreate_MFSRR] = {
23650 +               .create = au_wbr_create_mfsrr,
23651 +               .init   = au_wbr_create_init_mfsrr,
23652 +               .fin    = au_wbr_create_fin_mfs
23653 +       },
23654 +       [AuWbrCreate_MFSRRV] = {
23655 +               .create = au_wbr_create_mfsrr,
23656 +               .init   = au_wbr_create_init_mfsrr,
23657 +               .fin    = au_wbr_create_fin_mfs
23658 +       },
23659 +       [AuWbrCreate_PMFS] = {
23660 +               .create = au_wbr_create_pmfs,
23661 +               .init   = au_wbr_create_init_mfs,
23662 +               .fin    = au_wbr_create_fin_mfs
23663 +       },
23664 +       [AuWbrCreate_PMFSV] = {
23665 +               .create = au_wbr_create_pmfs,
23666 +               .init   = au_wbr_create_init_mfs,
23667 +               .fin    = au_wbr_create_fin_mfs
23668 +       }
23669 +};
23670 diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c
23671 new file mode 100644
23672 index 0000000..5e465ca
23673 --- /dev/null
23674 +++ b/fs/aufs/whout.c
23675 @@ -0,0 +1,1054 @@
23676 +/*
23677 + * Copyright (C) 2005-2009 Junjiro R. Okajima
23678 + *
23679 + * This program, aufs is free software; you can redistribute it and/or modify
23680 + * it under the terms of the GNU General Public License as published by
23681 + * the Free Software Foundation; either version 2 of the License, or
23682 + * (at your option) any later version.
23683 + *
23684 + * This program is distributed in the hope that it will be useful,
23685 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23686 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23687 + * GNU General Public License for more details.
23688 + *
23689 + * You should have received a copy of the GNU General Public License
23690 + * along with this program; if not, write to the Free Software
23691 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23692 + */
23693 +
23694 +/*
23695 + * whiteout for logical deletion and opaque directory
23696 + */
23697 +
23698 +#include <linux/fs.h>
23699 +#include "aufs.h"
23700 +
23701 +#define WH_MASK                        S_IRUGO
23702 +
23703 +/*
23704 + * If a directory contains this file, then it is opaque.  We start with the
23705 + * .wh. flag so that it is blocked by lookup.
23706 + */
23707 +static struct qstr diropq_name = {
23708 +       .name = AUFS_WH_DIROPQ,
23709 +       .len = sizeof(AUFS_WH_DIROPQ) - 1
23710 +};
23711 +
23712 +/*
23713 + * generate whiteout name, which is NOT terminated by NULL.
23714 + * @name: original d_name.name
23715 + * @len: original d_name.len
23716 + * @wh: whiteout qstr
23717 + * returns zero when succeeds, otherwise error.
23718 + * succeeded value as wh->name should be freed by kfree().
23719 + */
23720 +int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
23721 +{
23722 +       char *p;
23723 +
23724 +       if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
23725 +               return -ENAMETOOLONG;
23726 +
23727 +       wh->len = name->len + AUFS_WH_PFX_LEN;
23728 +       p = kmalloc(wh->len, GFP_NOFS);
23729 +       wh->name = p;
23730 +       if (p) {
23731 +               memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
23732 +               memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
23733 +               /* smp_mb(); */
23734 +               return 0;
23735 +       }
23736 +       return -ENOMEM;
23737 +}
23738 +
23739 +/* ---------------------------------------------------------------------- */
23740 +
23741 +/*
23742 + * test if the @wh_name exists under @h_parent.
23743 + * @try_sio specifies the necessary of super-io.
23744 + */
23745 +int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
23746 +              struct au_branch *br, int try_sio)
23747 +{
23748 +       int err;
23749 +       struct dentry *wh_dentry;
23750 +       struct inode *h_dir;
23751 +
23752 +       h_dir = h_parent->d_inode;
23753 +       if (!try_sio)
23754 +               wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
23755 +       else
23756 +               wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
23757 +       err = PTR_ERR(wh_dentry);
23758 +       if (IS_ERR(wh_dentry))
23759 +               goto out;
23760 +
23761 +       err = 0;
23762 +       if (!wh_dentry->d_inode)
23763 +               goto out_wh; /* success */
23764 +
23765 +       err = 1;
23766 +       if (S_ISREG(wh_dentry->d_inode->i_mode))
23767 +               goto out_wh; /* success */
23768 +
23769 +       err = -EIO;
23770 +       AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
23771 +               AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
23772 +
23773 + out_wh:
23774 +       dput(wh_dentry);
23775 + out:
23776 +       return err;
23777 +}
23778 +
23779 +/*
23780 + * test if the @h_dentry sets opaque or not.
23781 + */
23782 +int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
23783 +{
23784 +       int err;
23785 +       struct inode *h_dir;
23786 +
23787 +       h_dir = h_dentry->d_inode;
23788 +       err = au_wh_test(h_dentry, &diropq_name, br,
23789 +                        au_test_h_perm_sio(h_dir, MAY_EXEC));
23790 +       return err;
23791 +}
23792 +
23793 +/*
23794 + * returns a negative dentry whose name is unique and temporary.
23795 + */
23796 +struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
23797 +                            struct qstr *prefix)
23798 +{
23799 +       struct dentry *dentry;
23800 +       int i;
23801 +       char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN_MIN + 1],
23802 +               *name, *p;
23803 +       static unsigned short cnt;
23804 +       struct qstr qs;
23805 +
23806 +       BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
23807 +
23808 +       name = defname;
23809 +       qs.len = sizeof(defname) - DNAME_INLINE_LEN_MIN + prefix->len - 1;
23810 +       if (unlikely(prefix->len > DNAME_INLINE_LEN_MIN)) {
23811 +               dentry = ERR_PTR(-ENAMETOOLONG);
23812 +               if (unlikely(qs.len > NAME_MAX))
23813 +                       goto out;
23814 +               dentry = ERR_PTR(-ENOMEM);
23815 +               name = kmalloc(qs.len + 1, GFP_NOFS);
23816 +               if (unlikely(!name))
23817 +                       goto out;
23818 +       }
23819 +
23820 +       /* doubly whiteout-ed */
23821 +       memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
23822 +       p = name + AUFS_WH_PFX_LEN * 2;
23823 +       memcpy(p, prefix->name, prefix->len);
23824 +       p += prefix->len;
23825 +       *p++ = '.';
23826 +       AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
23827 +
23828 +       qs.name = name;
23829 +       for (i = 0; i < 3; i++) {
23830 +               sprintf(p, "%.*d", AUFS_WH_TMP_LEN, cnt++);
23831 +               dentry = au_sio_lkup_one(&qs, h_parent, br);
23832 +               if (IS_ERR(dentry) || !dentry->d_inode)
23833 +                       goto out_name;
23834 +               dput(dentry);
23835 +       }
23836 +       /* AuWarn("could not get random name\n"); */
23837 +       dentry = ERR_PTR(-EEXIST);
23838 +       AuDbg("%.*s\n", AuLNPair(&qs));
23839 +       BUG();
23840 +
23841 + out_name:
23842 +       if (name != defname)
23843 +               kfree(name);
23844 + out:
23845 +       AuTraceErrPtr(dentry);
23846 +       return dentry;
23847 +}
23848 +
23849 +/*
23850 + * rename the @h_dentry on @br to the whiteouted temporary name.
23851 + */
23852 +int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
23853 +{
23854 +       int err;
23855 +       struct path h_path = {
23856 +               .mnt = br->br_mnt
23857 +       };
23858 +       struct inode *h_dir;
23859 +       struct dentry *h_parent;
23860 +
23861 +       h_parent = h_dentry->d_parent; /* dir inode is locked */
23862 +       h_dir = h_parent->d_inode;
23863 +       IMustLock(h_dir);
23864 +
23865 +       h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
23866 +       err = PTR_ERR(h_path.dentry);
23867 +       if (IS_ERR(h_path.dentry))
23868 +               goto out;
23869 +
23870 +       /* under the same dir, no need to lock_rename() */
23871 +       err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
23872 +       AuTraceErr(err);
23873 +       dput(h_path.dentry);
23874 +
23875 + out:
23876 +       AuTraceErr(err);
23877 +       return err;
23878 +}
23879 +
23880 +/* ---------------------------------------------------------------------- */
23881 +/*
23882 + * functions for removing a whiteout
23883 + */
23884 +
23885 +static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
23886 +{
23887 +       int force;
23888 +
23889 +       /*
23890 +        * forces superio when the dir has a sticky bit.
23891 +        * this may be a violation of unix fs semantics.
23892 +        */
23893 +       force = (h_dir->i_mode & S_ISVTX)
23894 +               && h_path->dentry->d_inode->i_uid != current->fsuid;
23895 +       return vfsub_unlink(h_dir, h_path, force);
23896 +}
23897 +
23898 +int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
23899 +                       struct dentry *dentry)
23900 +{
23901 +       int err;
23902 +
23903 +       err = do_unlink_wh(h_dir, h_path);
23904 +       if (!err && dentry)
23905 +               au_set_dbwh(dentry, -1);
23906 +
23907 +       return err;
23908 +}
23909 +
23910 +static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
23911 +                         struct au_branch *br)
23912 +{
23913 +       int err;
23914 +       struct path h_path = {
23915 +               .mnt = br->br_mnt
23916 +       };
23917 +
23918 +       err = 0;
23919 +       h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
23920 +       if (IS_ERR(h_path.dentry))
23921 +               err = PTR_ERR(h_path.dentry);
23922 +       else {
23923 +               if (h_path.dentry->d_inode
23924 +                   && S_ISREG(h_path.dentry->d_inode->i_mode))
23925 +                       err = do_unlink_wh(h_parent->d_inode, &h_path);
23926 +               dput(h_path.dentry);
23927 +       }
23928 +
23929 +       return err;
23930 +}
23931 +
23932 +/* ---------------------------------------------------------------------- */
23933 +/*
23934 + * initialize/clean whiteout for a branch
23935 + */
23936 +
23937 +static void au_wh_clean(struct inode *h_dir, struct path *whpath,
23938 +                       const int isdir)
23939 +{
23940 +       int err;
23941 +
23942 +       if (!whpath->dentry->d_inode)
23943 +               return;
23944 +
23945 +       err = mnt_want_write(whpath->mnt);
23946 +       if (!err) {
23947 +               if (isdir)
23948 +                       err = vfsub_rmdir(h_dir, whpath);
23949 +               else
23950 +                       err = vfsub_unlink(h_dir, whpath, /*force*/0);
23951 +               mnt_drop_write(whpath->mnt);
23952 +       }
23953 +       if (unlikely(err))
23954 +               AuWarn("failed removing %.*s (%d), ignored.\n",
23955 +                      AuDLNPair(whpath->dentry), err);
23956 +}
23957 +
23958 +static int test_linkable(struct dentry *h_root)
23959 +{
23960 +       struct inode *h_dir = h_root->d_inode;
23961 +
23962 +       if (h_dir->i_op && h_dir->i_op->link)
23963 +               return 0;
23964 +
23965 +       AuErr("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
23966 +             AuDLNPair(h_root), au_sbtype(h_root->d_sb));
23967 +       return -ENOSYS;
23968 +}
23969 +
23970 +/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
23971 +static int au_whdir(struct inode *h_dir, struct path *path)
23972 +{
23973 +       int err;
23974 +
23975 +       err = -EEXIST;
23976 +       if (!path->dentry->d_inode) {
23977 +               int mode = S_IRWXU;
23978 +
23979 +               if (au_test_nfs(path->dentry->d_sb))
23980 +                       mode |= S_IXUGO;
23981 +               err = mnt_want_write(path->mnt);
23982 +               if (!err) {
23983 +                       err = vfsub_mkdir(h_dir, path, mode);
23984 +                       mnt_drop_write(path->mnt);
23985 +               }
23986 +       } else if (S_ISDIR(path->dentry->d_inode->i_mode))
23987 +               err = 0;
23988 +       else
23989 +               AuErr("unknown %.*s exists\n", AuDLNPair(path->dentry));
23990 +
23991 +       return err;
23992 +}
23993 +
23994 +struct au_wh_base {
23995 +       const struct qstr *name;
23996 +       struct dentry *dentry;
23997 +};
23998 +
23999 +static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
24000 +                         struct path *h_path)
24001 +{
24002 +       h_path->dentry = base[AuBrWh_BASE].dentry;
24003 +       au_wh_clean(h_dir, h_path, /*isdir*/0);
24004 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
24005 +       au_wh_clean(h_dir, h_path, /*isdir*/1);
24006 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
24007 +       au_wh_clean(h_dir, h_path, /*isdir*/1);
24008 +}
24009 +
24010 +/*
24011 + * returns tri-state,
24012 + * minus: error, caller should print the mesage
24013 + * zero: succuess
24014 + * plus: error, caller should NOT print the mesage
24015 + */
24016 +static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
24017 +                               int do_plink, struct au_wh_base base[],
24018 +                               struct path *h_path)
24019 +{
24020 +       int err;
24021 +       struct inode *h_dir;
24022 +
24023 +       h_dir = h_root->d_inode;
24024 +       h_path->dentry = base[AuBrWh_BASE].dentry;
24025 +       au_wh_clean(h_dir, h_path, /*isdir*/0);
24026 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
24027 +       if (do_plink) {
24028 +               err = test_linkable(h_root);
24029 +               if (unlikely(err)) {
24030 +                       err = 1;
24031 +                       goto out;
24032 +               }
24033 +
24034 +               err = au_whdir(h_dir, h_path);
24035 +               if (unlikely(err))
24036 +                       goto out;
24037 +               wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
24038 +       } else
24039 +               au_wh_clean(h_dir, h_path, /*isdir*/1);
24040 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
24041 +       err = au_whdir(h_dir, h_path);
24042 +       if (unlikely(err))
24043 +               goto out;
24044 +       wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
24045 +
24046 + out:
24047 +       return err;
24048 +}
24049 +
24050 +/*
24051 + * for the moment, aufs supports the branch filesystem which does not support
24052 + * link(2). testing on FAT which does not support i_op->setattr() fully either,
24053 + * copyup failed. finally, such filesystem will not be used as the writable
24054 + * branch.
24055 + *
24056 + * returns tri-state, see above.
24057 + */
24058 +static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
24059 +                        int do_plink, struct au_wh_base base[],
24060 +                        struct path *h_path)
24061 +{
24062 +       int err;
24063 +       struct inode *h_dir;
24064 +
24065 +       WbrWhMustWriteLock(wbr);
24066 +
24067 +       err = test_linkable(h_root);
24068 +       if (unlikely(err)) {
24069 +               err = 1;
24070 +               goto out;
24071 +       }
24072 +
24073 +       /*
24074 +        * todo: should this create be done in /sbin/mount.aufs helper?
24075 +        */
24076 +       err = -EEXIST;
24077 +       h_dir = h_root->d_inode;
24078 +       if (!base[AuBrWh_BASE].dentry->d_inode) {
24079 +               err = mnt_want_write(h_path->mnt);
24080 +               if (!err) {
24081 +                       h_path->dentry = base[AuBrWh_BASE].dentry;
24082 +                       err = vfsub_create(h_dir, h_path, WH_MASK);
24083 +                       mnt_drop_write(h_path->mnt);
24084 +               }
24085 +       } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
24086 +               err = 0;
24087 +       else
24088 +               AuErr("unknown %.*s/%.*s exists\n",
24089 +                     AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
24090 +       if (unlikely(err))
24091 +               goto out;
24092 +
24093 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
24094 +       if (do_plink) {
24095 +               err = au_whdir(h_dir, h_path);
24096 +               if (unlikely(err))
24097 +                       goto out;
24098 +               wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
24099 +       } else
24100 +               au_wh_clean(h_dir, h_path, /*isdir*/1);
24101 +       wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
24102 +
24103 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
24104 +       err = au_whdir(h_dir, h_path);
24105 +       if (unlikely(err))
24106 +               goto out;
24107 +       wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
24108 +
24109 + out:
24110 +       return err;
24111 +}
24112 +
24113 +/*
24114 + * initialize the whiteout base file/dir for @br.
24115 + */
24116 +int au_wh_init(struct dentry *h_root, struct au_branch *br,
24117 +              struct super_block *sb)
24118 +{
24119 +       int err, i;
24120 +       const unsigned char do_plink
24121 +               = !!au_opt_test(au_mntflags(sb), PLINK);
24122 +       struct path path = {
24123 +               .mnt = br->br_mnt
24124 +       };
24125 +       struct inode *h_dir;
24126 +       struct au_wbr *wbr = br->br_wbr;
24127 +       static const struct qstr base_name[] = {
24128 +               [AuBrWh_BASE] = {
24129 +                       .name   = AUFS_BASE_NAME,
24130 +                       .len    = sizeof(AUFS_BASE_NAME) - 1
24131 +               },
24132 +               [AuBrWh_PLINK] = {
24133 +                       .name   = AUFS_PLINKDIR_NAME,
24134 +                       .len    = sizeof(AUFS_PLINKDIR_NAME) - 1
24135 +               },
24136 +               [AuBrWh_ORPH] = {
24137 +                       .name   = AUFS_ORPHDIR_NAME,
24138 +                       .len    = sizeof(AUFS_ORPHDIR_NAME) - 1
24139 +               }
24140 +       };
24141 +       struct au_wh_base base[] = {
24142 +               [AuBrWh_BASE] = {
24143 +                       .name   = base_name + AuBrWh_BASE,
24144 +                       .dentry = NULL
24145 +               },
24146 +               [AuBrWh_PLINK] = {
24147 +                       .name   = base_name + AuBrWh_PLINK,
24148 +                       .dentry = NULL
24149 +               },
24150 +               [AuBrWh_ORPH] = {
24151 +                       .name   = base_name + AuBrWh_ORPH,
24152 +                       .dentry = NULL
24153 +               }
24154 +       };
24155 +
24156 +       if (wbr)
24157 +               WbrWhMustWriteLock(wbr);
24158 +
24159 +       h_dir = h_root->d_inode;
24160 +       for (i = 0; i < AuBrWh_Last; i++) {
24161 +               /* doubly whiteouted */
24162 +               struct dentry *d;
24163 +
24164 +               d = au_wh_lkup(h_root, (void *)base[i].name, br);
24165 +               err = PTR_ERR(d);
24166 +               if (IS_ERR(d))
24167 +                       goto out;
24168 +
24169 +               base[i].dentry = d;
24170 +               AuDebugOn(wbr
24171 +                         && wbr->wbr_wh[i]
24172 +                         && wbr->wbr_wh[i] != base[i].dentry);
24173 +       }
24174 +
24175 +       if (wbr)
24176 +               for (i = 0; i < AuBrWh_Last; i++) {
24177 +                       dput(wbr->wbr_wh[i]);
24178 +                       wbr->wbr_wh[i] = NULL;
24179 +               }
24180 +
24181 +       err = 0;
24182 +
24183 +       switch (br->br_perm) {
24184 +       case AuBrPerm_RO:
24185 +       case AuBrPerm_ROWH:
24186 +       case AuBrPerm_RR:
24187 +       case AuBrPerm_RRWH:
24188 +               au_wh_init_ro(h_dir, base, &path);
24189 +               break;
24190 +
24191 +       case AuBrPerm_RWNoLinkWH:
24192 +               err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
24193 +               if (err > 0)
24194 +                       goto out;
24195 +               else if (err)
24196 +                       goto out_err;
24197 +               break;
24198 +
24199 +       case AuBrPerm_RW:
24200 +               err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
24201 +               if (err > 0)
24202 +                       goto out;
24203 +               else if (err)
24204 +                       goto out_err;
24205 +               break;
24206 +
24207 +       default:
24208 +               BUG();
24209 +       }
24210 +       goto out; /* success */
24211 +
24212 + out_err:
24213 +       AuErr("an error(%d) on the writable branch %.*s(%s)\n",
24214 +             err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
24215 + out:
24216 +       for (i = 0; i < AuBrWh_Last; i++)
24217 +               dput(base[i].dentry);
24218 +       return err;
24219 +}
24220 +
24221 +/* ---------------------------------------------------------------------- */
24222 +/*
24223 + * whiteouts are all hard-linked usually.
24224 + * when its link count reaches a ceiling, we create a new whiteout base
24225 + * asynchronously.
24226 + */
24227 +
24228 +struct reinit_br_wh {
24229 +       struct super_block *sb;
24230 +       struct au_branch *br;
24231 +};
24232 +
24233 +static void reinit_br_wh(void *arg)
24234 +{
24235 +       int err;
24236 +       aufs_bindex_t bindex;
24237 +       struct path h_path;
24238 +       struct reinit_br_wh *a = arg;
24239 +       struct au_wbr *wbr;
24240 +       struct inode *dir;
24241 +       struct dentry *h_root;
24242 +       struct au_hinode *hdir;
24243 +
24244 +       err = 0;
24245 +       wbr = a->br->br_wbr;
24246 +       /* big aufs lock */
24247 +       si_noflush_write_lock(a->sb);
24248 +       if (!au_br_writable(a->br->br_perm))
24249 +               goto out;
24250 +       bindex = au_br_index(a->sb, a->br->br_id);
24251 +       if (unlikely(bindex < 0))
24252 +               goto out;
24253 +
24254 +       di_read_lock_parent(a->sb->s_root, AuLock_IR);
24255 +       dir = a->sb->s_root->d_inode;
24256 +       hdir = au_hi(dir, bindex);
24257 +       h_root = au_h_dptr(a->sb->s_root, bindex);
24258 +
24259 +       au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
24260 +       wbr_wh_write_lock(wbr);
24261 +       err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
24262 +                         h_root, a->br);
24263 +       if (!err) {
24264 +               err = mnt_want_write(a->br->br_mnt);
24265 +               if (!err) {
24266 +                       h_path.dentry = wbr->wbr_whbase;
24267 +                       h_path.mnt = a->br->br_mnt;
24268 +                       err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
24269 +                       mnt_drop_write(a->br->br_mnt);
24270 +               }
24271 +       } else {
24272 +               AuWarn("%.*s is moved, ignored\n", AuDLNPair(wbr->wbr_whbase));
24273 +               err = 0;
24274 +       }
24275 +       dput(wbr->wbr_whbase);
24276 +       wbr->wbr_whbase = NULL;
24277 +       if (!err)
24278 +               err = au_wh_init(h_root, a->br, a->sb);
24279 +       wbr_wh_write_unlock(wbr);
24280 +       au_hin_imtx_unlock(hdir);
24281 +       di_read_unlock(a->sb->s_root, AuLock_IR);
24282 +
24283 + out:
24284 +       if (wbr)
24285 +               atomic_dec(&wbr->wbr_wh_running);
24286 +       atomic_dec(&a->br->br_count);
24287 +       au_nwt_done(&au_sbi(a->sb)->si_nowait);
24288 +       si_write_unlock(a->sb);
24289 +       kfree(arg);
24290 +       if (unlikely(err))
24291 +               AuIOErr("err %d\n", err);
24292 +}
24293 +
24294 +static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
24295 +{
24296 +       int do_dec, wkq_err;
24297 +       struct reinit_br_wh *arg;
24298 +
24299 +       do_dec = 1;
24300 +       if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
24301 +               goto out;
24302 +
24303 +       /* ignore ENOMEM */
24304 +       arg = kmalloc(sizeof(*arg), GFP_NOFS);
24305 +       if (arg) {
24306 +               /*
24307 +                * dec(wh_running), kfree(arg) and dec(br_count)
24308 +                * in reinit function
24309 +                */
24310 +               arg->sb = sb;
24311 +               arg->br = br;
24312 +               atomic_inc(&br->br_count);
24313 +               wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb);
24314 +               if (unlikely(wkq_err)) {
24315 +                       atomic_dec(&br->br_wbr->wbr_wh_running);
24316 +                       atomic_dec(&br->br_count);
24317 +                       kfree(arg);
24318 +               }
24319 +               do_dec = 0;
24320 +       }
24321 +
24322 + out:
24323 +       if (do_dec)
24324 +               atomic_dec(&br->br_wbr->wbr_wh_running);
24325 +}
24326 +
24327 +/* ---------------------------------------------------------------------- */
24328 +
24329 +/*
24330 + * create the whiteout @wh.
24331 + */
24332 +static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
24333 +                            struct dentry *wh)
24334 +{
24335 +       int err;
24336 +       struct path h_path = {
24337 +               .dentry = wh
24338 +       };
24339 +       struct au_branch *br;
24340 +       struct au_wbr *wbr;
24341 +       struct dentry *h_parent;
24342 +       struct inode *h_dir;
24343 +
24344 +       h_parent = wh->d_parent; /* dir inode is locked */
24345 +       h_dir = h_parent->d_inode;
24346 +       IMustLock(h_dir);
24347 +
24348 +       br = au_sbr(sb, bindex);
24349 +       h_path.mnt = br->br_mnt;
24350 +       wbr = br->br_wbr;
24351 +       wbr_wh_read_lock(wbr);
24352 +       if (wbr->wbr_whbase) {
24353 +               err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
24354 +               if (!err || err != -EMLINK)
24355 +                       goto out;
24356 +
24357 +               /* link count full. re-initialize br_whbase. */
24358 +               kick_reinit_br_wh(sb, br);
24359 +       }
24360 +
24361 +       /* return this error in this context */
24362 +       err = vfsub_create(h_dir, &h_path, WH_MASK);
24363 +
24364 + out:
24365 +       wbr_wh_read_unlock(wbr);
24366 +       return err;
24367 +}
24368 +
24369 +/* ---------------------------------------------------------------------- */
24370 +
24371 +/*
24372 + * create or remove the diropq.
24373 + */
24374 +static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
24375 +                               unsigned int flags)
24376 +{
24377 +       struct dentry *opq_dentry, *h_dentry;
24378 +       struct super_block *sb;
24379 +       struct au_branch *br;
24380 +       int err;
24381 +
24382 +       sb = dentry->d_sb;
24383 +       br = au_sbr(sb, bindex);
24384 +       h_dentry = au_h_dptr(dentry, bindex);
24385 +       opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
24386 +       if (IS_ERR(opq_dentry))
24387 +               goto out;
24388 +
24389 +       if (au_ftest_diropq(flags, CREATE)) {
24390 +               err = link_or_create_wh(sb, bindex, opq_dentry);
24391 +               if (!err) {
24392 +                       au_set_dbdiropq(dentry, bindex);
24393 +                       goto out; /* success */
24394 +               }
24395 +       } else {
24396 +               struct path tmp = {
24397 +                       .dentry = opq_dentry,
24398 +                       .mnt    = br->br_mnt
24399 +               };
24400 +               err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
24401 +               if (!err)
24402 +                       au_set_dbdiropq(dentry, -1);
24403 +       }
24404 +       dput(opq_dentry);
24405 +       opq_dentry = ERR_PTR(err);
24406 +
24407 + out:
24408 +       return opq_dentry;
24409 +}
24410 +
24411 +struct do_diropq_args {
24412 +       struct dentry **errp;
24413 +       struct dentry *dentry;
24414 +       aufs_bindex_t bindex;
24415 +       unsigned int flags;
24416 +};
24417 +
24418 +static void call_do_diropq(void *args)
24419 +{
24420 +       struct do_diropq_args *a = args;
24421 +       *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
24422 +}
24423 +
24424 +struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
24425 +                            unsigned int flags)
24426 +{
24427 +       struct dentry *diropq, *h_dentry;
24428 +
24429 +       h_dentry = au_h_dptr(dentry, bindex);
24430 +       if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
24431 +               diropq = do_diropq(dentry, bindex, flags);
24432 +       else {
24433 +               int wkq_err;
24434 +               struct do_diropq_args args = {
24435 +                       .errp           = &diropq,
24436 +                       .dentry         = dentry,
24437 +                       .bindex         = bindex,
24438 +                       .flags          = flags
24439 +               };
24440 +
24441 +               wkq_err = au_wkq_wait(call_do_diropq, &args);
24442 +               if (unlikely(wkq_err))
24443 +                       diropq = ERR_PTR(wkq_err);
24444 +       }
24445 +
24446 +       return diropq;
24447 +}
24448 +
24449 +/* ---------------------------------------------------------------------- */
24450 +
24451 +/*
24452 + * lookup whiteout dentry.
24453 + * @h_parent: lower parent dentry which must exist and be locked
24454 + * @base_name: name of dentry which will be whiteouted
24455 + * returns dentry for whiteout.
24456 + */
24457 +struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24458 +                         struct au_branch *br)
24459 +{
24460 +       int err;
24461 +       struct qstr wh_name;
24462 +       struct dentry *wh_dentry;
24463 +
24464 +       err = au_wh_name_alloc(&wh_name, base_name);
24465 +       wh_dentry = ERR_PTR(err);
24466 +       if (!err) {
24467 +               wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
24468 +               kfree(wh_name.name);
24469 +       }
24470 +       return wh_dentry;
24471 +}
24472 +
24473 +/*
24474 + * link/create a whiteout for @dentry on @bindex.
24475 + */
24476 +struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24477 +                           struct dentry *h_parent)
24478 +{
24479 +       struct dentry *wh_dentry;
24480 +       struct super_block *sb;
24481 +       int err;
24482 +
24483 +       sb = dentry->d_sb;
24484 +       wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
24485 +       if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
24486 +               err = link_or_create_wh(sb, bindex, wh_dentry);
24487 +               if (!err)
24488 +                       au_set_dbwh(dentry, bindex);
24489 +               else {
24490 +                       dput(wh_dentry);
24491 +                       wh_dentry = ERR_PTR(err);
24492 +               }
24493 +       }
24494 +
24495 +       return wh_dentry;
24496 +}
24497 +
24498 +/* ---------------------------------------------------------------------- */
24499 +
24500 +/* Delete all whiteouts in this directory on branch bindex. */
24501 +static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
24502 +                          aufs_bindex_t bindex, struct au_branch *br)
24503 +{
24504 +       int err;
24505 +       unsigned long ul, n;
24506 +       struct qstr wh_name;
24507 +       char *p;
24508 +       struct hlist_head *head;
24509 +       struct au_vdir_wh *tpos;
24510 +       struct hlist_node *pos;
24511 +       struct au_vdir_destr *str;
24512 +
24513 +       err = -ENOMEM;
24514 +       p = __getname();
24515 +       wh_name.name = p;
24516 +       if (unlikely(!wh_name.name))
24517 +               goto out;
24518 +
24519 +       err = 0;
24520 +       memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
24521 +       p += AUFS_WH_PFX_LEN;
24522 +       n = whlist->nh_num;
24523 +       head = whlist->nh_head;
24524 +       for (ul = 0; !err && ul < n; ul++, head++) {
24525 +               hlist_for_each_entry(tpos, pos, head, wh_hash) {
24526 +                       if (tpos->wh_bindex != bindex)
24527 +                               continue;
24528 +
24529 +                       str = &tpos->wh_str;
24530 +                       if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
24531 +                               memcpy(p, str->name, str->len);
24532 +                               wh_name.len = AUFS_WH_PFX_LEN + str->len;
24533 +                               err = unlink_wh_name(h_dentry, &wh_name, br);
24534 +                               if (!err)
24535 +                                       continue;
24536 +                               break;
24537 +                       }
24538 +                       AuIOErr("whiteout name too long %.*s\n",
24539 +                               str->len, str->name);
24540 +                       err = -EIO;
24541 +                       break;
24542 +               }
24543 +       }
24544 +       __putname(wh_name.name);
24545 +
24546 + out:
24547 +       return err;
24548 +}
24549 +
24550 +struct del_wh_children_args {
24551 +       int *errp;
24552 +       struct dentry *h_dentry;
24553 +       struct au_nhash *whlist;
24554 +       aufs_bindex_t bindex;
24555 +       struct au_branch *br;
24556 +};
24557 +
24558 +static void call_del_wh_children(void *args)
24559 +{
24560 +       struct del_wh_children_args *a = args;
24561 +       *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
24562 +}
24563 +
24564 +/* ---------------------------------------------------------------------- */
24565 +
24566 +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
24567 +{
24568 +       struct au_whtmp_rmdir *whtmp;
24569 +       int err;
24570 +       unsigned int rdhash;
24571 +
24572 +       SiMustAnyLock(sb);
24573 +
24574 +       whtmp = kmalloc(sizeof(*whtmp), gfp);
24575 +       if (unlikely(!whtmp)) {
24576 +               whtmp = ERR_PTR(-ENOMEM);
24577 +               goto out;
24578 +       }
24579 +
24580 +       whtmp->dir = NULL;
24581 +       whtmp->wh_dentry = NULL;
24582 +       /* no estimation for dir size */
24583 +       rdhash = au_sbi(sb)->si_rdhash;
24584 +       if (!rdhash)
24585 +               rdhash = AUFS_RDHASH_DEF;
24586 +       err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
24587 +       if (unlikely(err)) {
24588 +               kfree(whtmp);
24589 +               whtmp = ERR_PTR(err);
24590 +       }
24591 +
24592 + out:
24593 +       return whtmp;
24594 +}
24595 +
24596 +void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
24597 +{
24598 +       dput(whtmp->wh_dentry);
24599 +       iput(whtmp->dir);
24600 +       au_nhash_wh_free(&whtmp->whlist);
24601 +       kfree(whtmp);
24602 +}
24603 +
24604 +/*
24605 + * rmdir the whiteouted temporary named dir @h_dentry.
24606 + * @whlist: whiteouted children.
24607 + */
24608 +int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24609 +                  struct dentry *wh_dentry, struct au_nhash *whlist)
24610 +{
24611 +       int err;
24612 +       struct path h_tmp;
24613 +       struct inode *wh_inode, *h_dir;
24614 +       struct au_branch *br;
24615 +
24616 +       h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
24617 +       IMustLock(h_dir);
24618 +
24619 +       br = au_sbr(dir->i_sb, bindex);
24620 +       wh_inode = wh_dentry->d_inode;
24621 +       mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
24622 +
24623 +       /*
24624 +        * someone else might change some whiteouts while we were sleeping.
24625 +        * it means this whlist may have an obsoleted entry.
24626 +        */
24627 +       if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
24628 +               err = del_wh_children(wh_dentry, whlist, bindex, br);
24629 +       else {
24630 +               int wkq_err;
24631 +               struct del_wh_children_args args = {
24632 +                       .errp           = &err,
24633 +                       .h_dentry       = wh_dentry,
24634 +                       .whlist         = whlist,
24635 +                       .bindex         = bindex,
24636 +                       .br             = br
24637 +               };
24638 +
24639 +               wkq_err = au_wkq_wait(call_del_wh_children, &args);
24640 +               if (unlikely(wkq_err))
24641 +                       err = wkq_err;
24642 +       }
24643 +       mutex_unlock(&wh_inode->i_mutex);
24644 +
24645 +       if (!err) {
24646 +               h_tmp.dentry = wh_dentry;
24647 +               h_tmp.mnt = br->br_mnt;
24648 +               err = vfsub_rmdir(h_dir, &h_tmp);
24649 +               /* d_drop(h_dentry); */
24650 +       }
24651 +
24652 +       if (!err) {
24653 +               if (au_ibstart(dir) == bindex) {
24654 +                       au_cpup_attr_timesizes(dir);
24655 +                       drop_nlink(dir);
24656 +               }
24657 +               return 0; /* success */
24658 +       }
24659 +
24660 +       AuWarn("failed removing %.*s(%d), ignored\n",
24661 +              AuDLNPair(wh_dentry), err);
24662 +       return err;
24663 +}
24664 +
24665 +static void call_rmdir_whtmp(void *args)
24666 +{
24667 +       int err;
24668 +       struct au_whtmp_rmdir *a = args;
24669 +       struct super_block *sb;
24670 +       struct dentry *h_parent;
24671 +       struct inode *h_dir;
24672 +       struct au_branch *br;
24673 +       struct au_hinode *hdir;
24674 +
24675 +       /* rmdir by nfsd may cause deadlock with this i_mutex */
24676 +       /* mutex_lock(&a->dir->i_mutex); */
24677 +       sb = a->dir->i_sb;
24678 +       si_noflush_read_lock(sb);
24679 +       err = au_test_ro(sb, a->bindex, NULL);
24680 +       if (unlikely(err))
24681 +               goto out;
24682 +
24683 +       err = -EIO;
24684 +       br = au_sbr(sb, a->bindex);
24685 +       ii_write_lock_parent(a->dir);
24686 +       h_parent = dget_parent(a->wh_dentry);
24687 +       h_dir = h_parent->d_inode;
24688 +       hdir = au_hi(a->dir, a->bindex);
24689 +       au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
24690 +       err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, br);
24691 +       if (!err) {
24692 +               err = mnt_want_write(br->br_mnt);
24693 +               if (!err) {
24694 +                       err = au_whtmp_rmdir(a->dir, a->bindex, a->wh_dentry,
24695 +                                            &a->whlist);
24696 +                       mnt_drop_write(br->br_mnt);
24697 +               }
24698 +       }
24699 +       au_hin_imtx_unlock(hdir);
24700 +       dput(h_parent);
24701 +       ii_write_unlock(a->dir);
24702 +
24703 + out:
24704 +       /* mutex_unlock(&a->dir->i_mutex); */
24705 +       au_nwt_done(&au_sbi(sb)->si_nowait);
24706 +       si_read_unlock(sb);
24707 +       au_whtmp_rmdir_free(a);
24708 +       if (unlikely(err))
24709 +               AuIOErr("err %d\n", err);
24710 +}
24711 +
24712 +void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24713 +                        struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
24714 +{
24715 +       int wkq_err;
24716 +
24717 +       IMustLock(dir);
24718 +
24719 +       /* all post-process will be done in do_rmdir_whtmp(). */
24720 +       args->dir = au_igrab(dir);
24721 +       args->bindex = bindex;
24722 +       args->wh_dentry = dget(wh_dentry);
24723 +       wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, dir->i_sb);
24724 +       if (unlikely(wkq_err)) {
24725 +               AuWarn("rmdir error %.*s (%d), ignored\n",
24726 +                      AuDLNPair(wh_dentry), wkq_err);
24727 +               au_whtmp_rmdir_free(args);
24728 +       }
24729 +}
24730 diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h
24731 new file mode 100644
24732 index 0000000..40c6926
24733 --- /dev/null
24734 +++ b/fs/aufs/whout.h
24735 @@ -0,0 +1,87 @@
24736 +/*
24737 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24738 + *
24739 + * This program, aufs is free software; you can redistribute it and/or modify
24740 + * it under the terms of the GNU General Public License as published by
24741 + * the Free Software Foundation; either version 2 of the License, or
24742 + * (at your option) any later version.
24743 + *
24744 + * This program is distributed in the hope that it will be useful,
24745 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24746 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24747 + * GNU General Public License for more details.
24748 + *
24749 + * You should have received a copy of the GNU General Public License
24750 + * along with this program; if not, write to the Free Software
24751 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24752 + */
24753 +
24754 +/*
24755 + * whiteout for logical deletion and opaque directory
24756 + */
24757 +
24758 +#ifndef __AUFS_WHOUT_H__
24759 +#define __AUFS_WHOUT_H__
24760 +
24761 +#ifdef __KERNEL__
24762 +
24763 +#include <linux/aufs_type.h>
24764 +#include "dir.h"
24765 +
24766 +/* whout.c */
24767 +int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
24768 +struct au_branch;
24769 +int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
24770 +              struct au_branch *br, int try_sio);
24771 +int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
24772 +struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
24773 +                            struct qstr *prefix);
24774 +int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
24775 +int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
24776 +                       struct dentry *dentry);
24777 +int au_wh_init(struct dentry *h_parent, struct au_branch *br,
24778 +              struct super_block *sb);
24779 +
24780 +/* diropq flags */
24781 +#define AuDiropq_CREATE        1
24782 +#define au_ftest_diropq(flags, name)   ((flags) & AuDiropq_##name)
24783 +#define au_fset_diropq(flags, name)    { (flags) |= AuDiropq_##name; }
24784 +#define au_fclr_diropq(flags, name)    { (flags) &= ~AuDiropq_##name; }
24785 +
24786 +struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
24787 +                            unsigned int flags);
24788 +struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24789 +                         struct au_branch *br);
24790 +struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24791 +                           struct dentry *h_parent);
24792 +
24793 +/* real rmdir for the whiteout-ed dir */
24794 +struct au_whtmp_rmdir {
24795 +       struct inode *dir;
24796 +       aufs_bindex_t bindex;
24797 +       struct dentry *wh_dentry;
24798 +       struct au_nhash whlist;
24799 +};
24800 +
24801 +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
24802 +void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
24803 +int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24804 +                  struct dentry *wh_dentry, struct au_nhash *whlist);
24805 +void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24806 +                        struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
24807 +
24808 +/* ---------------------------------------------------------------------- */
24809 +
24810 +static inline struct dentry *au_diropq_create(struct dentry *dentry,
24811 +                                             aufs_bindex_t bindex)
24812 +{
24813 +       return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
24814 +}
24815 +
24816 +static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
24817 +{
24818 +       return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
24819 +}
24820 +
24821 +#endif /* __KERNEL__ */
24822 +#endif /* __AUFS_WHOUT_H__ */
24823 diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c
24824 new file mode 100644
24825 index 0000000..89656e9
24826 --- /dev/null
24827 +++ b/fs/aufs/wkq.c
24828 @@ -0,0 +1,259 @@
24829 +/*
24830 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24831 + *
24832 + * This program, aufs is free software; you can redistribute it and/or modify
24833 + * it under the terms of the GNU General Public License as published by
24834 + * the Free Software Foundation; either version 2 of the License, or
24835 + * (at your option) any later version.
24836 + *
24837 + * This program is distributed in the hope that it will be useful,
24838 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24839 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24840 + * GNU General Public License for more details.
24841 + *
24842 + * You should have received a copy of the GNU General Public License
24843 + * along with this program; if not, write to the Free Software
24844 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24845 + */
24846 +
24847 +/*
24848 + * workqueue for asynchronous/super-io operations
24849 + * todo: try new dredential scheme
24850 + */
24851 +
24852 +#include <linux/module.h>
24853 +#include "aufs.h"
24854 +
24855 +/* internal workqueue named AUFS_WKQ_NAME */
24856 +static struct au_wkq {
24857 +       struct workqueue_struct *q;
24858 +
24859 +       /* balancing */
24860 +       atomic_t                busy;
24861 +} *au_wkq;
24862 +
24863 +struct au_wkinfo {
24864 +       struct work_struct wk;
24865 +       struct super_block *sb;
24866 +
24867 +       unsigned int flags; /* see wkq.h */
24868 +
24869 +       au_wkq_func_t func;
24870 +       void *args;
24871 +
24872 +       atomic_t *busyp;
24873 +       struct completion *comp;
24874 +};
24875 +
24876 +/* ---------------------------------------------------------------------- */
24877 +
24878 +static int enqueue(struct au_wkq *wkq, struct au_wkinfo *wkinfo)
24879 +{
24880 +       wkinfo->busyp = &wkq->busy;
24881 +       if (au_ftest_wkq(wkinfo->flags, WAIT))
24882 +               return !queue_work(wkq->q, &wkinfo->wk);
24883 +       else
24884 +               return !schedule_work(&wkinfo->wk);
24885 +}
24886 +
24887 +static void do_wkq(struct au_wkinfo *wkinfo)
24888 +{
24889 +       unsigned int idle, n;
24890 +       int i, idle_idx;
24891 +
24892 +       while (1) {
24893 +               if (au_ftest_wkq(wkinfo->flags, WAIT)) {
24894 +                       idle_idx = 0;
24895 +                       idle = UINT_MAX;
24896 +                       for (i = 0; i < aufs_nwkq; i++) {
24897 +                               n = atomic_inc_return(&au_wkq[i].busy);
24898 +                               if (n == 1 && !enqueue(au_wkq + i, wkinfo))
24899 +                                       return; /* success */
24900 +
24901 +                               if (n < idle) {
24902 +                                       idle_idx = i;
24903 +                                       idle = n;
24904 +                               }
24905 +                               atomic_dec(&au_wkq[i].busy);
24906 +                       }
24907 +               } else
24908 +                       idle_idx = aufs_nwkq;
24909 +
24910 +               atomic_inc(&au_wkq[idle_idx].busy);
24911 +               if (!enqueue(au_wkq + idle_idx, wkinfo))
24912 +                       return; /* success */
24913 +
24914 +               /* impossible? */
24915 +               AuWarn1("failed to queue_work()\n");
24916 +               yield();
24917 +       }
24918 +}
24919 +
24920 +static void wkq_func(struct work_struct *wk)
24921 +{
24922 +       struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
24923 +
24924 +       wkinfo->func(wkinfo->args);
24925 +       atomic_dec_return(wkinfo->busyp);
24926 +       if (au_ftest_wkq(wkinfo->flags, WAIT))
24927 +               complete(wkinfo->comp);
24928 +       else {
24929 +               kobject_put(&au_sbi(wkinfo->sb)->si_kobj);
24930 +               module_put(THIS_MODULE);
24931 +               kfree(wkinfo);
24932 +       }
24933 +}
24934 +
24935 +/*
24936 + * Since struct completion is large, try allocating it dynamically.
24937 + */
24938 +#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
24939 +#define AuWkqCompDeclare(name) struct completion *comp = NULL
24940 +
24941 +static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24942 +{
24943 +       *comp = kmalloc(sizeof(**comp), GFP_NOFS);
24944 +       if (*comp) {
24945 +               init_completion(*comp);
24946 +               wkinfo->comp = *comp;
24947 +               return 0;
24948 +       }
24949 +       return -ENOMEM;
24950 +}
24951 +
24952 +static void au_wkq_comp_free(struct completion *comp)
24953 +{
24954 +       kfree(comp);
24955 +}
24956 +
24957 +#else
24958 +
24959 +/* no braces */
24960 +#define AuWkqCompDeclare(name) \
24961 +       DECLARE_COMPLETION_ONSTACK(_ ## name); \
24962 +       struct completion *comp = &_ ## name
24963 +
24964 +static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24965 +{
24966 +       wkinfo->comp = *comp;
24967 +       return 0;
24968 +}
24969 +
24970 +static void au_wkq_comp_free(struct completion *comp __maybe_unused)
24971 +{
24972 +       /* empty */
24973 +}
24974 +#endif /* 4KSTACKS */
24975 +
24976 +static void au_wkq_run(struct au_wkinfo *wkinfo)
24977 +{
24978 +       au_dbg_verify_kthread();
24979 +       INIT_WORK(&wkinfo->wk, wkq_func);
24980 +       do_wkq(wkinfo);
24981 +}
24982 +
24983 +int au_wkq_wait(au_wkq_func_t func, void *args)
24984 +{
24985 +       int err;
24986 +       AuWkqCompDeclare(comp);
24987 +       struct au_wkinfo wkinfo = {
24988 +               .flags  = AuWkq_WAIT,
24989 +               .func   = func,
24990 +               .args   = args
24991 +       };
24992 +
24993 +       err = au_wkq_comp_alloc(&wkinfo, &comp);
24994 +       if (!err) {
24995 +               au_wkq_run(&wkinfo);
24996 +               /* no timeout, no interrupt */
24997 +               wait_for_completion(wkinfo.comp);
24998 +               au_wkq_comp_free(comp);
24999 +       }
25000 +
25001 +       return err;
25002 +
25003 +}
25004 +
25005 +int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb)
25006 +{
25007 +       int err;
25008 +       struct au_wkinfo *wkinfo;
25009 +
25010 +       atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
25011 +
25012 +       /*
25013 +        * wkq_func() must free this wkinfo.
25014 +        * it highly depends upon the implementation of workqueue.
25015 +        */
25016 +       err = 0;
25017 +       wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
25018 +       if (wkinfo) {
25019 +               wkinfo->sb = sb;
25020 +               wkinfo->flags = !AuWkq_WAIT;
25021 +               wkinfo->func = func;
25022 +               wkinfo->args = args;
25023 +               wkinfo->comp = NULL;
25024 +               kobject_get(&au_sbi(sb)->si_kobj);
25025 +               __module_get(THIS_MODULE);
25026 +
25027 +               au_wkq_run(wkinfo);
25028 +       } else {
25029 +               err = -ENOMEM;
25030 +               atomic_dec(&au_sbi(sb)->si_nowait.nw_len);
25031 +       }
25032 +
25033 +       return err;
25034 +}
25035 +
25036 +/* ---------------------------------------------------------------------- */
25037 +
25038 +void au_nwt_init(struct au_nowait_tasks *nwt)
25039 +{
25040 +       atomic_set(&nwt->nw_len, 0);
25041 +       /* smp_mb();*/ /* atomic_set */
25042 +       init_waitqueue_head(&nwt->nw_wq);
25043 +}
25044 +
25045 +void au_wkq_fin(void)
25046 +{
25047 +       int i;
25048 +
25049 +       for (i = 0; i < aufs_nwkq; i++)
25050 +               if (au_wkq[i].q && !IS_ERR(au_wkq[i].q))
25051 +                       destroy_workqueue(au_wkq[i].q);
25052 +       kfree(au_wkq);
25053 +}
25054 +
25055 +int __init au_wkq_init(void)
25056 +{
25057 +       int err, i;
25058 +       struct au_wkq *nowaitq;
25059 +
25060 +       /* '+1' is for accounting of nowait queue */
25061 +       err = -ENOMEM;
25062 +       au_wkq = kcalloc(aufs_nwkq + 1, sizeof(*au_wkq), GFP_NOFS);
25063 +       if (unlikely(!au_wkq))
25064 +               goto out;
25065 +
25066 +       err = 0;
25067 +       for (i = 0; i < aufs_nwkq; i++) {
25068 +               au_wkq[i].q = create_singlethread_workqueue(AUFS_WKQ_NAME);
25069 +               if (au_wkq[i].q && !IS_ERR(au_wkq[i].q)) {
25070 +                       atomic_set(&au_wkq[i].busy, 0);
25071 +                       continue;
25072 +               }
25073 +
25074 +               err = PTR_ERR(au_wkq[i].q);
25075 +               au_wkq_fin();
25076 +               goto out;
25077 +       }
25078 +
25079 +       /* nowait accounting */
25080 +       nowaitq = au_wkq + aufs_nwkq;
25081 +       atomic_set(&nowaitq->busy, 0);
25082 +       nowaitq->q = NULL;
25083 +       /* smp_mb(); */ /* atomic_set */
25084 +
25085 + out:
25086 +       return err;
25087 +}
25088 diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h
25089 new file mode 100644
25090 index 0000000..b5b6a61
25091 --- /dev/null
25092 +++ b/fs/aufs/wkq.h
25093 @@ -0,0 +1,82 @@
25094 +/*
25095 + * Copyright (C) 2005-2009 Junjiro R. Okajima
25096 + *
25097 + * This program, aufs is free software; you can redistribute it and/or modify
25098 + * it under the terms of the GNU General Public License as published by
25099 + * the Free Software Foundation; either version 2 of the License, or
25100 + * (at your option) any later version.
25101 + *
25102 + * This program is distributed in the hope that it will be useful,
25103 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25104 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25105 + * GNU General Public License for more details.
25106 + *
25107 + * You should have received a copy of the GNU General Public License
25108 + * along with this program; if not, write to the Free Software
25109 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
25110 + */
25111 +
25112 +/*
25113 + * workqueue for asynchronous/super-io operations
25114 + * todo: try new credentials management scheme
25115 + */
25116 +
25117 +#ifndef __AUFS_WKQ_H__
25118 +#define __AUFS_WKQ_H__
25119 +
25120 +#ifdef __KERNEL__
25121 +
25122 +#include <linux/sched.h>
25123 +#include <linux/wait.h>
25124 +#include <linux/aufs_type.h>
25125 +
25126 +struct super_block;
25127 +
25128 +/* ---------------------------------------------------------------------- */
25129 +
25130 +/*
25131 + * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
25132 + */
25133 +struct au_nowait_tasks {
25134 +       atomic_t                nw_len;
25135 +       wait_queue_head_t       nw_wq;
25136 +};
25137 +
25138 +/* ---------------------------------------------------------------------- */
25139 +
25140 +typedef void (*au_wkq_func_t)(void *args);
25141 +
25142 +/* wkq flags */
25143 +#define AuWkq_WAIT     1
25144 +#define au_ftest_wkq(flags, name)      ((flags) & AuWkq_##name)
25145 +#define au_fset_wkq(flags, name)       { (flags) |= AuWkq_##name; }
25146 +#define au_fclr_wkq(flags, name)       { (flags) &= ~AuWkq_##name; }
25147 +
25148 +/* wkq.c */
25149 +int au_wkq_wait(au_wkq_func_t func, void *args);
25150 +int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb);
25151 +void au_nwt_init(struct au_nowait_tasks *nwt);
25152 +int __init au_wkq_init(void);
25153 +void au_wkq_fin(void);
25154 +
25155 +/* ---------------------------------------------------------------------- */
25156 +
25157 +static inline int au_test_wkq(struct task_struct *tsk)
25158 +{
25159 +       return !tsk->mm && !strcmp(tsk->comm, AUFS_WKQ_NAME);
25160 +}
25161 +
25162 +static inline void au_nwt_done(struct au_nowait_tasks *nwt)
25163 +{
25164 +       if (!atomic_dec_return(&nwt->nw_len))
25165 +               wake_up_all(&nwt->nw_wq);
25166 +}
25167 +
25168 +static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
25169 +{
25170 +       wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
25171 +       return 0;
25172 +}
25173 +
25174 +#endif /* __KERNEL__ */
25175 +#endif /* __AUFS_WKQ_H__ */
25176 diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c
25177 new file mode 100644
25178 index 0000000..25826ef
25179 --- /dev/null
25180 +++ b/fs/aufs/xino.c
25181 @@ -0,0 +1,1199 @@
25182 +/*
25183 + * Copyright (C) 2005-2009 Junjiro R. Okajima
25184 + *
25185 + * This program, aufs is free software; you can redistribute it and/or modify
25186 + * it under the terms of the GNU General Public License as published by
25187 + * the Free Software Foundation; either version 2 of the License, or
25188 + * (at your option) any later version.
25189 + *
25190 + * This program is distributed in the hope that it will be useful,
25191 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25192 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25193 + * GNU General Public License for more details.
25194 + *
25195 + * You should have received a copy of the GNU General Public License
25196 + * along with this program; if not, write to the Free Software
25197 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
25198 + */
25199 +
25200 +/*
25201 + * external inode number translation table and bitmap
25202 + */
25203 +
25204 +#include <linux/file.h>
25205 +#include <linux/seq_file.h>
25206 +#include <linux/uaccess.h>
25207 +#include "aufs.h"
25208 +
25209 +ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
25210 +                  loff_t *pos)
25211 +{
25212 +       ssize_t err;
25213 +       mm_segment_t oldfs;
25214 +
25215 +       oldfs = get_fs();
25216 +       set_fs(KERNEL_DS);
25217 +       do {
25218 +               /* todo: signal_pending? */
25219 +               err = func(file, (char __user *)buf, size, pos);
25220 +       } while (err == -EAGAIN || err == -EINTR);
25221 +       set_fs(oldfs);
25222 +
25223 +#if 0 /* reserved for future use */
25224 +       if (err > 0)
25225 +               fsnotify_access(file->f_dentry);
25226 +#endif
25227 +
25228 +       return err;
25229 +}
25230 +
25231 +/* ---------------------------------------------------------------------- */
25232 +
25233 +static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *buf,
25234 +                             size_t size, loff_t *pos)
25235 +{
25236 +       ssize_t err;
25237 +       mm_segment_t oldfs;
25238 +
25239 +       oldfs = get_fs();
25240 +       set_fs(KERNEL_DS);
25241 +       lockdep_off();
25242 +       do {
25243 +               /* todo: signal_pending? */
25244 +               err = func(file, (const char __user *)buf, size, pos);
25245 +       } while (err == -EAGAIN || err == -EINTR);
25246 +       lockdep_on();
25247 +       set_fs(oldfs);
25248 +
25249 +#if 0 /* reserved for future use */
25250 +       if (err > 0)
25251 +               fsnotify_modify(file->f_dentry);
25252 +#endif
25253 +
25254 +       return err;
25255 +}
25256 +
25257 +struct do_xino_fwrite_args {
25258 +       ssize_t *errp;
25259 +       au_writef_t func;
25260 +       struct file *file;
25261 +       void *buf;
25262 +       size_t size;
25263 +       loff_t *pos;
25264 +};
25265 +
25266 +static void call_do_xino_fwrite(void *args)
25267 +{
25268 +       struct do_xino_fwrite_args *a = args;
25269 +       *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
25270 +}
25271 +
25272 +ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
25273 +                   loff_t *pos)
25274 +{
25275 +       ssize_t err;
25276 +
25277 +       /* todo: signal block and no wkq? */
25278 +       /* todo: new credential scheme */
25279 +       /*
25280 +        * it breaks RLIMIT_FSIZE and normal user's limit,
25281 +        * users should care about quota and real 'filesystem full.'
25282 +        */
25283 +       if (!au_test_wkq(current)) {
25284 +               int wkq_err;
25285 +               struct do_xino_fwrite_args args = {
25286 +                       .errp   = &err,
25287 +                       .func   = func,
25288 +                       .file   = file,
25289 +                       .buf    = buf,
25290 +                       .size   = size,
25291 +                       .pos    = pos
25292 +               };
25293 +
25294 +               wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
25295 +               if (unlikely(wkq_err))
25296 +                       err = wkq_err;
25297 +       } else
25298 +               err = do_xino_fwrite(func, file, buf, size, pos);
25299 +
25300 +       return err;
25301 +}
25302 +
25303 +/* ---------------------------------------------------------------------- */
25304 +
25305 +/*
25306 + * create a new xinofile at the same place/path as @base_file.
25307 + */
25308 +struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
25309 +{
25310 +       struct file *file;
25311 +       struct dentry *base, *dentry, *parent;
25312 +       struct inode *dir;
25313 +       struct qstr *name;
25314 +       int err;
25315 +
25316 +       base = base_file->f_dentry;
25317 +       parent = base->d_parent; /* dir inode is locked */
25318 +       dir = parent->d_inode;
25319 +       IMustLock(dir);
25320 +
25321 +       file = ERR_PTR(-EINVAL);
25322 +       name = &base->d_name;
25323 +       dentry = vfsub_lookup_one_len(name->name, parent, name->len);
25324 +       if (IS_ERR(dentry)) {
25325 +               file = (void *)dentry;
25326 +               AuErr("%.*s lookup err %ld\n", AuLNPair(name), PTR_ERR(dentry));
25327 +               goto out;
25328 +       }
25329 +
25330 +       /* no need to mnt_want_write() since we call dentry_open() later */
25331 +       err = vfs_create(dir, dentry, S_IRUGO | S_IWUGO, NULL);
25332 +       if (unlikely(err)) {
25333 +               file = ERR_PTR(err);
25334 +               AuErr("%.*s create err %d\n", AuLNPair(name), err);
25335 +               goto out_dput;
25336 +       }
25337 +
25338 +       file = dentry_open(dget(dentry), mntget(base_file->f_vfsmnt),
25339 +                          O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE);
25340 +       if (IS_ERR(file)) {
25341 +               AuErr("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
25342 +               goto out_dput;
25343 +       }
25344 +
25345 +       err = vfsub_unlink(dir, &file->f_path, /*force*/0);
25346 +       if (unlikely(err)) {
25347 +               AuErr("%.*s unlink err %d\n", AuLNPair(name), err);
25348 +               goto out_fput;
25349 +       }
25350 +
25351 +       if (copy_src) {
25352 +               /* no one can touch copy_src xino */
25353 +               err = au_copy_file(file, copy_src,
25354 +                                  i_size_read(copy_src->f_dentry->d_inode));
25355 +               if (unlikely(err)) {
25356 +                       AuErr("%.*s copy err %d\n", AuLNPair(name), err);
25357 +                       goto out_fput;
25358 +               }
25359 +       }
25360 +       goto out_dput; /* success */
25361 +
25362 + out_fput:
25363 +       fput(file);
25364 +       file = ERR_PTR(err);
25365 + out_dput:
25366 +       dput(dentry);
25367 + out:
25368 +       return file;
25369 +}
25370 +
25371 +struct au_xino_lock_dir {
25372 +       struct au_hinode *hdir;
25373 +       struct dentry *parent;
25374 +       struct mutex *mtx;
25375 +};
25376 +
25377 +static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
25378 +                            struct au_xino_lock_dir *ldir)
25379 +{
25380 +       aufs_bindex_t brid, bindex;
25381 +
25382 +       ldir->hdir = NULL;
25383 +       bindex = -1;
25384 +       brid = au_xino_brid(sb);
25385 +       if (brid >= 0)
25386 +               bindex = au_br_index(sb, brid);
25387 +       if (bindex >= 0) {
25388 +               ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
25389 +               au_hin_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
25390 +       } else {
25391 +               ldir->parent = dget_parent(xino->f_dentry);
25392 +               ldir->mtx = &ldir->parent->d_inode->i_mutex;
25393 +               mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
25394 +       }
25395 +}
25396 +
25397 +static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
25398 +{
25399 +       if (ldir->hdir)
25400 +               au_hin_imtx_unlock(ldir->hdir);
25401 +       else {
25402 +               mutex_unlock(ldir->mtx);
25403 +               dput(ldir->parent);
25404 +       }
25405 +}
25406 +
25407 +/* ---------------------------------------------------------------------- */
25408 +
25409 +/* trucate xino files asynchronously */
25410 +
25411 +int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
25412 +{
25413 +       int err;
25414 +       aufs_bindex_t bi, bend;
25415 +       struct au_branch *br;
25416 +       struct file *new_xino, *file;
25417 +       struct super_block *h_sb;
25418 +       struct au_xino_lock_dir ldir;
25419 +
25420 +       err = -EINVAL;
25421 +       bend = au_sbend(sb);
25422 +       if (unlikely(bindex < 0 || bend < bindex))
25423 +               goto out;
25424 +       br = au_sbr(sb, bindex);
25425 +       file = br->br_xino.xi_file;
25426 +       if (!file)
25427 +               goto out;
25428 +
25429 +       au_xino_lock_dir(sb, file, &ldir);
25430 +       /* mnt_want_write() is unnecessary here */
25431 +       new_xino = au_xino_create2(file, file);
25432 +       au_xino_unlock_dir(&ldir);
25433 +       err = PTR_ERR(new_xino);
25434 +       if (IS_ERR(new_xino))
25435 +               goto out;
25436 +       err = 0;
25437 +       fput(file);
25438 +       br->br_xino.xi_file = new_xino;
25439 +
25440 +       h_sb = br->br_mnt->mnt_sb;
25441 +       for (bi = 0; bi <= bend; bi++) {
25442 +               if (unlikely(bi == bindex))
25443 +                       continue;
25444 +               br = au_sbr(sb, bi);
25445 +               if (br->br_mnt->mnt_sb != h_sb)
25446 +                       continue;
25447 +
25448 +               fput(br->br_xino.xi_file);
25449 +               br->br_xino.xi_file = new_xino;
25450 +               get_file(new_xino);
25451 +       }
25452 +
25453 + out:
25454 +       return err;
25455 +}
25456 +
25457 +struct xino_do_trunc_args {
25458 +       struct super_block *sb;
25459 +       struct au_branch *br;
25460 +};
25461 +
25462 +static void xino_do_trunc(void *_args)
25463 +{
25464 +       struct xino_do_trunc_args *args = _args;
25465 +       struct super_block *sb;
25466 +       struct au_branch *br;
25467 +       struct inode *dir;
25468 +       int err;
25469 +       aufs_bindex_t bindex;
25470 +
25471 +       err = 0;
25472 +       sb = args->sb;
25473 +       dir = sb->s_root->d_inode;
25474 +       br = args->br;
25475 +
25476 +       si_noflush_write_lock(sb);
25477 +       ii_read_lock_parent(dir);
25478 +       bindex = au_br_index(sb, br->br_id);
25479 +       err = au_xino_trunc(sb, bindex);
25480 +       if (!err
25481 +           && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
25482 +           >= br->br_xino_upper)
25483 +               br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
25484 +
25485 +       ii_read_unlock(dir);
25486 +       if (unlikely(err))
25487 +               AuWarn("err b%d, (%d)\n", bindex, err);
25488 +       atomic_dec(&br->br_xino_running);
25489 +       atomic_dec(&br->br_count);
25490 +       au_nwt_done(&au_sbi(sb)->si_nowait);
25491 +       si_write_unlock(sb);
25492 +       kfree(args);
25493 +}
25494 +
25495 +static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
25496 +{
25497 +       struct xino_do_trunc_args *args;
25498 +       int wkq_err;
25499 +
25500 +       if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
25501 +           < br->br_xino_upper)
25502 +               return;
25503 +
25504 +       if (atomic_inc_return(&br->br_xino_running) > 1)
25505 +               goto out;
25506 +
25507 +       /* lock and kfree() will be called in trunc_xino() */
25508 +       args = kmalloc(sizeof(*args), GFP_NOFS);
25509 +       if (unlikely(!args)) {
25510 +               AuErr1("no memory\n");
25511 +               goto out_args;
25512 +       }
25513 +
25514 +       atomic_inc_return(&br->br_count);
25515 +       args->sb = sb;
25516 +       args->br = br;
25517 +       wkq_err = au_wkq_nowait(xino_do_trunc, args, sb);
25518 +       if (!wkq_err)
25519 +               return; /* success */
25520 +
25521 +       AuErr("wkq %d\n", wkq_err);
25522 +       atomic_dec_return(&br->br_count);
25523 +
25524 + out_args:
25525 +       kfree(args);
25526 + out:
25527 +       atomic_dec_return(&br->br_xino_running);
25528 +}
25529 +
25530 +/* ---------------------------------------------------------------------- */
25531 +
25532 +static int au_xino_do_write(au_writef_t write, struct file *file,
25533 +                           ino_t h_ino, ino_t ino)
25534 +{
25535 +       loff_t pos;
25536 +       ssize_t sz;
25537 +
25538 +       pos = h_ino;
25539 +       if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
25540 +               AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25541 +               return -EFBIG;
25542 +       }
25543 +       pos *= sizeof(ino);
25544 +       sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
25545 +       if (sz == sizeof(ino))
25546 +               return 0; /* success */
25547 +
25548 +       AuIOErr("write failed (%zd)\n", sz);
25549 +       return -EIO;
25550 +}
25551 +
25552 +/*
25553 + * write @ino to the xinofile for the specified branch{@sb, @bindex}
25554 + * at the position of @h_ino.
25555 + * even if @ino is zero, it is written to the xinofile and means no entry.
25556 + * if the size of the xino file on a specific filesystem exceeds the watermark,
25557 + * try truncating it.
25558 + */
25559 +int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25560 +                 ino_t ino)
25561 +{
25562 +       int err;
25563 +       unsigned int mnt_flags;
25564 +       struct au_branch *br;
25565 +
25566 +       BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
25567 +                    || ((loff_t)-1) > 0);
25568 +       SiMustAnyLock(sb);
25569 +
25570 +       mnt_flags = au_mntflags(sb);
25571 +       if (!au_opt_test(mnt_flags, XINO))
25572 +               return 0;
25573 +
25574 +       br = au_sbr(sb, bindex);
25575 +       err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25576 +                              h_ino, ino);
25577 +       if (!err) {
25578 +               if (au_opt_test(mnt_flags, TRUNC_XINO)
25579 +                   && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
25580 +                       xino_try_trunc(sb, br);
25581 +               return 0; /* success */
25582 +       }
25583 +
25584 +       AuIOErr("write failed (%d)\n", err);
25585 +       return -EIO;
25586 +}
25587 +
25588 +/* ---------------------------------------------------------------------- */
25589 +
25590 +/* aufs inode number bitmap */
25591 +
25592 +static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
25593 +static ino_t xib_calc_ino(unsigned long pindex, int bit)
25594 +{
25595 +       ino_t ino;
25596 +
25597 +       AuDebugOn(bit < 0 || page_bits <= bit);
25598 +       ino = AUFS_FIRST_INO + pindex * page_bits + bit;
25599 +       return ino;
25600 +}
25601 +
25602 +static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
25603 +{
25604 +       AuDebugOn(ino < AUFS_FIRST_INO);
25605 +       ino -= AUFS_FIRST_INO;
25606 +       *pindex = ino / page_bits;
25607 +       *bit = ino % page_bits;
25608 +}
25609 +
25610 +static int xib_pindex(struct super_block *sb, unsigned long pindex)
25611 +{
25612 +       int err;
25613 +       loff_t pos;
25614 +       ssize_t sz;
25615 +       struct au_sbinfo *sbinfo;
25616 +       struct file *xib;
25617 +       unsigned long *p;
25618 +
25619 +       sbinfo = au_sbi(sb);
25620 +       MtxMustLock(&sbinfo->si_xib_mtx);
25621 +       AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
25622 +                 || !au_opt_test(sbinfo->si_mntflags, XINO));
25623 +
25624 +       if (pindex == sbinfo->si_xib_last_pindex)
25625 +               return 0;
25626 +
25627 +       xib = sbinfo->si_xib;
25628 +       p = sbinfo->si_xib_buf;
25629 +       pos = sbinfo->si_xib_last_pindex;
25630 +       pos *= PAGE_SIZE;
25631 +       sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25632 +       if (unlikely(sz != PAGE_SIZE))
25633 +               goto out;
25634 +
25635 +       pos = pindex;
25636 +       pos *= PAGE_SIZE;
25637 +       if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
25638 +               sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
25639 +       else {
25640 +               memset(p, 0, PAGE_SIZE);
25641 +               sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25642 +       }
25643 +       if (sz == PAGE_SIZE) {
25644 +               sbinfo->si_xib_last_pindex = pindex;
25645 +               return 0; /* success */
25646 +       }
25647 +
25648 + out:
25649 +       AuIOErr1("write failed (%zd)\n", sz);
25650 +       err = sz;
25651 +       if (sz >= 0)
25652 +               err = -EIO;
25653 +       return err;
25654 +}
25655 +
25656 +/* ---------------------------------------------------------------------- */
25657 +
25658 +int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25659 +                  ino_t ino)
25660 +{
25661 +       int err, bit;
25662 +       unsigned long pindex;
25663 +       struct au_sbinfo *sbinfo;
25664 +
25665 +       if (!au_opt_test(au_mntflags(sb), XINO))
25666 +               return 0;
25667 +
25668 +       err = 0;
25669 +       if (ino) {
25670 +               sbinfo = au_sbi(sb);
25671 +               xib_calc_bit(ino, &pindex, &bit);
25672 +               AuDebugOn(page_bits <= bit);
25673 +               mutex_lock(&sbinfo->si_xib_mtx);
25674 +               err = xib_pindex(sb, pindex);
25675 +               if (!err) {
25676 +                       clear_bit(bit, sbinfo->si_xib_buf);
25677 +                       sbinfo->si_xib_next_bit = bit;
25678 +               }
25679 +               mutex_unlock(&sbinfo->si_xib_mtx);
25680 +       }
25681 +
25682 +       if (!err)
25683 +               err = au_xino_write(sb, bindex, h_ino, 0);
25684 +       return err;
25685 +}
25686 +
25687 +/* get an unused inode number from bitmap */
25688 +ino_t au_xino_new_ino(struct super_block *sb)
25689 +{
25690 +       ino_t ino;
25691 +       unsigned long *p, pindex, ul, pend;
25692 +       struct au_sbinfo *sbinfo;
25693 +       struct file *file;
25694 +       int free_bit, err;
25695 +
25696 +       if (!au_opt_test(au_mntflags(sb), XINO))
25697 +               return iunique(sb, AUFS_FIRST_INO);
25698 +
25699 +       sbinfo = au_sbi(sb);
25700 +       mutex_lock(&sbinfo->si_xib_mtx);
25701 +       p = sbinfo->si_xib_buf;
25702 +       free_bit = sbinfo->si_xib_next_bit;
25703 +       if (free_bit < page_bits && !test_bit(free_bit, p))
25704 +               goto out; /* success */
25705 +       free_bit = find_first_zero_bit(p, page_bits);
25706 +       if (free_bit < page_bits)
25707 +               goto out; /* success */
25708 +
25709 +       pindex = sbinfo->si_xib_last_pindex;
25710 +       for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
25711 +               err = xib_pindex(sb, ul);
25712 +               if (unlikely(err))
25713 +                       goto out_err;
25714 +               free_bit = find_first_zero_bit(p, page_bits);
25715 +               if (free_bit < page_bits)
25716 +                       goto out; /* success */
25717 +       }
25718 +
25719 +       file = sbinfo->si_xib;
25720 +       pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
25721 +       for (ul = pindex + 1; ul <= pend; ul++) {
25722 +               err = xib_pindex(sb, ul);
25723 +               if (unlikely(err))
25724 +                       goto out_err;
25725 +               free_bit = find_first_zero_bit(p, page_bits);
25726 +               if (free_bit < page_bits)
25727 +                       goto out; /* success */
25728 +       }
25729 +       BUG();
25730 +
25731 + out:
25732 +       set_bit(free_bit, p);
25733 +       sbinfo->si_xib_next_bit++;
25734 +       pindex = sbinfo->si_xib_last_pindex;
25735 +       mutex_unlock(&sbinfo->si_xib_mtx);
25736 +       ino = xib_calc_ino(pindex, free_bit);
25737 +       AuDbg("i%lu\n", (unsigned long)ino);
25738 +       return ino;
25739 + out_err:
25740 +       mutex_unlock(&sbinfo->si_xib_mtx);
25741 +       AuDbg("i0\n");
25742 +       return 0;
25743 +}
25744 +
25745 +/*
25746 + * read @ino from xinofile for the specified branch{@sb, @bindex}
25747 + * at the position of @h_ino.
25748 + * if @ino does not exist and @do_new is true, get new one.
25749 + */
25750 +int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25751 +                ino_t *ino)
25752 +{
25753 +       int err;
25754 +       ssize_t sz;
25755 +       loff_t pos;
25756 +       struct file *file;
25757 +       struct au_sbinfo *sbinfo;
25758 +
25759 +       *ino = 0;
25760 +       if (!au_opt_test(au_mntflags(sb), XINO))
25761 +               return 0; /* no xino */
25762 +
25763 +       err = 0;
25764 +       sbinfo = au_sbi(sb);
25765 +       pos = h_ino;
25766 +       if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
25767 +               AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25768 +               return -EFBIG;
25769 +       }
25770 +       pos *= sizeof(*ino);
25771 +
25772 +       file = au_sbr(sb, bindex)->br_xino.xi_file;
25773 +       if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
25774 +               return 0; /* no ino */
25775 +
25776 +       sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
25777 +       if (sz == sizeof(*ino))
25778 +               return 0; /* success */
25779 +
25780 +       err = sz;
25781 +       if (unlikely(sz >= 0)) {
25782 +               err = -EIO;
25783 +               AuIOErr("xino read error (%zd)\n", sz);
25784 +       }
25785 +
25786 +       return err;
25787 +}
25788 +
25789 +/* ---------------------------------------------------------------------- */
25790 +
25791 +/* create and set a new xino file */
25792 +
25793 +struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
25794 +{
25795 +       struct file *file;
25796 +       struct dentry *h_parent, *d;
25797 +       struct inode *h_dir;
25798 +       int err;
25799 +
25800 +       /*
25801 +        * at mount-time, and the xino file is the default path,
25802 +        * hinotify is disabled so we have no inotify events to ignore.
25803 +        * when a user specified the xino, we cannot get au_hdir to be ignored.
25804 +        */
25805 +       file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,
25806 +                              S_IRUGO | S_IWUGO);
25807 +       if (IS_ERR(file)) {
25808 +               if (!silent)
25809 +                       AuErr("open %s(%ld)\n", fname, PTR_ERR(file));
25810 +               return file;
25811 +       }
25812 +
25813 +       /* keep file count */
25814 +       h_parent = dget_parent(file->f_dentry);
25815 +       h_dir = h_parent->d_inode;
25816 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
25817 +       /* mnt_want_write() is unnecessary here */
25818 +       err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
25819 +       mutex_unlock(&h_dir->i_mutex);
25820 +       dput(h_parent);
25821 +       if (unlikely(err)) {
25822 +               if (!silent)
25823 +                       AuErr("unlink %s(%d)\n", fname, err);
25824 +               goto out;
25825 +       }
25826 +
25827 +       err = -EINVAL;
25828 +       d = file->f_dentry;
25829 +       if (unlikely(sb == d->d_sb)) {
25830 +               if (!silent)
25831 +                       AuErr("%s must be outside\n", fname);
25832 +               goto out;
25833 +       }
25834 +       if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
25835 +               if (!silent)
25836 +                       AuErr("xino doesn't support %s(%s)\n",
25837 +                             fname, au_sbtype(d->d_sb));
25838 +               goto out;
25839 +       }
25840 +       return file; /* success */
25841 +
25842 + out:
25843 +       fput(file);
25844 +       file = ERR_PTR(err);
25845 +       return file;
25846 +}
25847 +
25848 +/*
25849 + * find another branch who is on the same filesystem of the specified
25850 + * branch{@btgt}. search until @bend.
25851 + */
25852 +static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
25853 +                       aufs_bindex_t bend)
25854 +{
25855 +       aufs_bindex_t bindex;
25856 +       struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
25857 +
25858 +       for (bindex = 0; bindex < btgt; bindex++)
25859 +               if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25860 +                       return bindex;
25861 +       for (bindex++; bindex <= bend; bindex++)
25862 +               if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25863 +                       return bindex;
25864 +       return -1;
25865 +}
25866 +
25867 +/* ---------------------------------------------------------------------- */
25868 +
25869 +/*
25870 + * initialize the xinofile for the specified branch @br
25871 + * at the place/path where @base_file indicates.
25872 + * test whether another branch is on the same filesystem or not,
25873 + * if @do_test is true.
25874 + */
25875 +int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
25876 +              struct file *base_file, int do_test)
25877 +{
25878 +       int err;
25879 +       ino_t ino;
25880 +       aufs_bindex_t bend, bindex;
25881 +       struct au_branch *shared_br, *b;
25882 +       struct file *file;
25883 +       struct super_block *tgt_sb;
25884 +
25885 +       shared_br = NULL;
25886 +       bend = au_sbend(sb);
25887 +       if (do_test) {
25888 +               tgt_sb = br->br_mnt->mnt_sb;
25889 +               for (bindex = 0; bindex <= bend; bindex++) {
25890 +                       b = au_sbr(sb, bindex);
25891 +                       if (tgt_sb == b->br_mnt->mnt_sb) {
25892 +                               shared_br = b;
25893 +                               break;
25894 +                       }
25895 +               }
25896 +       }
25897 +
25898 +       if (!shared_br || !shared_br->br_xino.xi_file) {
25899 +               struct au_xino_lock_dir ldir;
25900 +
25901 +               au_xino_lock_dir(sb, base_file, &ldir);
25902 +               /* mnt_want_write() is unnecessary here */
25903 +               file = au_xino_create2(base_file, NULL);
25904 +               au_xino_unlock_dir(&ldir);
25905 +               err = PTR_ERR(file);
25906 +               if (IS_ERR(file))
25907 +                       goto out;
25908 +               br->br_xino.xi_file = file;
25909 +       } else {
25910 +               br->br_xino.xi_file = shared_br->br_xino.xi_file;
25911 +               get_file(br->br_xino.xi_file);
25912 +       }
25913 +
25914 +       ino = AUFS_ROOT_INO;
25915 +       err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25916 +                              h_ino, ino);
25917 +       if (!err)
25918 +               return 0; /* success */
25919 +
25920 +
25921 + out:
25922 +       return err;
25923 +}
25924 +
25925 +/* ---------------------------------------------------------------------- */
25926 +
25927 +/* trucate a xino bitmap file */
25928 +
25929 +/* todo: slow */
25930 +static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
25931 +{
25932 +       int err, bit;
25933 +       ssize_t sz;
25934 +       unsigned long pindex;
25935 +       loff_t pos, pend;
25936 +       struct au_sbinfo *sbinfo;
25937 +       au_readf_t func;
25938 +       ino_t *ino;
25939 +       unsigned long *p;
25940 +
25941 +       err = 0;
25942 +       sbinfo = au_sbi(sb);
25943 +       MtxMustLock(&sbinfo->si_xib_mtx);
25944 +       p = sbinfo->si_xib_buf;
25945 +       func = sbinfo->si_xread;
25946 +       pend = i_size_read(file->f_dentry->d_inode);
25947 +       pos = 0;
25948 +       while (pos < pend) {
25949 +               sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
25950 +               err = sz;
25951 +               if (unlikely(sz <= 0))
25952 +                       goto out;
25953 +
25954 +               err = 0;
25955 +               for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
25956 +                       if (unlikely(*ino < AUFS_FIRST_INO))
25957 +                               continue;
25958 +
25959 +                       xib_calc_bit(*ino, &pindex, &bit);
25960 +                       AuDebugOn(page_bits <= bit);
25961 +                       err = xib_pindex(sb, pindex);
25962 +                       if (!err)
25963 +                               set_bit(bit, p);
25964 +                       else
25965 +                               goto out;
25966 +               }
25967 +       }
25968 +
25969 + out:
25970 +       return err;
25971 +}
25972 +
25973 +static int xib_restore(struct super_block *sb)
25974 +{
25975 +       int err;
25976 +       aufs_bindex_t bindex, bend;
25977 +       void *page;
25978 +
25979 +       err = -ENOMEM;
25980 +       page = (void *)__get_free_page(GFP_NOFS);
25981 +       if (unlikely(!page))
25982 +               goto out;
25983 +
25984 +       err = 0;
25985 +       bend = au_sbend(sb);
25986 +       for (bindex = 0; !err && bindex <= bend; bindex++)
25987 +               if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
25988 +                       err = do_xib_restore
25989 +                               (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
25990 +               else
25991 +                       AuDbg("b%d\n", bindex);
25992 +       free_page((unsigned long)page);
25993 +
25994 + out:
25995 +       return err;
25996 +}
25997 +
25998 +int au_xib_trunc(struct super_block *sb)
25999 +{
26000 +       int err;
26001 +       ssize_t sz;
26002 +       loff_t pos;
26003 +       struct au_xino_lock_dir ldir;
26004 +       struct au_sbinfo *sbinfo;
26005 +       unsigned long *p;
26006 +       struct file *file;
26007 +
26008 +       SiMustWriteLock(sb);
26009 +
26010 +       err = 0;
26011 +       sbinfo = au_sbi(sb);
26012 +       if (!au_opt_test(sbinfo->si_mntflags, XINO))
26013 +               goto out;
26014 +
26015 +       file = sbinfo->si_xib;
26016 +       if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
26017 +               goto out;
26018 +
26019 +       au_xino_lock_dir(sb, file, &ldir);
26020 +       /* mnt_want_write() is unnecessary here */
26021 +       file = au_xino_create2(sbinfo->si_xib, NULL);
26022 +       au_xino_unlock_dir(&ldir);
26023 +       err = PTR_ERR(file);
26024 +       if (IS_ERR(file))
26025 +               goto out;
26026 +       fput(sbinfo->si_xib);
26027 +       sbinfo->si_xib = file;
26028 +
26029 +       p = sbinfo->si_xib_buf;
26030 +       memset(p, 0, PAGE_SIZE);
26031 +       pos = 0;
26032 +       sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
26033 +       if (unlikely(sz != PAGE_SIZE)) {
26034 +               err = sz;
26035 +               AuIOErr("err %d\n", err);
26036 +               if (sz >= 0)
26037 +                       err = -EIO;
26038 +               goto out;
26039 +       }
26040 +
26041 +       mutex_lock(&sbinfo->si_xib_mtx);
26042 +       /* mnt_want_write() is unnecessary here */
26043 +       err = xib_restore(sb);
26044 +       mutex_unlock(&sbinfo->si_xib_mtx);
26045 +
26046 +out:
26047 +       return err;
26048 +}
26049 +
26050 +/* ---------------------------------------------------------------------- */
26051 +
26052 +/*
26053 + * xino mount option handlers
26054 + */
26055 +static au_readf_t find_readf(struct file *h_file)
26056 +{
26057 +       const struct file_operations *fop = h_file->f_op;
26058 +
26059 +       if (fop) {
26060 +               if (fop->read)
26061 +                       return fop->read;
26062 +               if (fop->aio_read)
26063 +                       return do_sync_read;
26064 +       }
26065 +       return ERR_PTR(-ENOSYS);
26066 +}
26067 +
26068 +static au_writef_t find_writef(struct file *h_file)
26069 +{
26070 +       const struct file_operations *fop = h_file->f_op;
26071 +
26072 +       if (fop) {
26073 +               if (fop->write)
26074 +                       return fop->write;
26075 +               if (fop->aio_write)
26076 +                       return do_sync_write;
26077 +       }
26078 +       return ERR_PTR(-ENOSYS);
26079 +}
26080 +
26081 +/* xino bitmap */
26082 +static void xino_clear_xib(struct super_block *sb)
26083 +{
26084 +       struct au_sbinfo *sbinfo;
26085 +
26086 +       SiMustWriteLock(sb);
26087 +
26088 +       sbinfo = au_sbi(sb);
26089 +       sbinfo->si_xread = NULL;
26090 +       sbinfo->si_xwrite = NULL;
26091 +       if (sbinfo->si_xib)
26092 +               fput(sbinfo->si_xib);
26093 +       sbinfo->si_xib = NULL;
26094 +       free_page((unsigned long)sbinfo->si_xib_buf);
26095 +       sbinfo->si_xib_buf = NULL;
26096 +}
26097 +
26098 +static int au_xino_set_xib(struct super_block *sb, struct file *base)
26099 +{
26100 +       int err;
26101 +       loff_t pos;
26102 +       struct au_sbinfo *sbinfo;
26103 +       struct file *file;
26104 +
26105 +       SiMustWriteLock(sb);
26106 +
26107 +       sbinfo = au_sbi(sb);
26108 +       file = au_xino_create2(base, sbinfo->si_xib);
26109 +       err = PTR_ERR(file);
26110 +       if (IS_ERR(file))
26111 +               goto out;
26112 +       if (sbinfo->si_xib)
26113 +               fput(sbinfo->si_xib);
26114 +       sbinfo->si_xib = file;
26115 +       sbinfo->si_xread = find_readf(file);
26116 +       sbinfo->si_xwrite = find_writef(file);
26117 +
26118 +       err = -ENOMEM;
26119 +       if (!sbinfo->si_xib_buf)
26120 +               sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
26121 +       if (unlikely(!sbinfo->si_xib_buf))
26122 +               goto out_unset;
26123 +
26124 +       sbinfo->si_xib_last_pindex = 0;
26125 +       sbinfo->si_xib_next_bit = 0;
26126 +       if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
26127 +               pos = 0;
26128 +               err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
26129 +                                 PAGE_SIZE, &pos);
26130 +               if (unlikely(err != PAGE_SIZE))
26131 +                       goto out_free;
26132 +       }
26133 +       err = 0;
26134 +       goto out; /* success */
26135 +
26136 + out_free:
26137 +       free_page((unsigned long)sbinfo->si_xib_buf);
26138 +       sbinfo->si_xib_buf = NULL;
26139 +       if (err >= 0)
26140 +               err = -EIO;
26141 + out_unset:
26142 +       fput(sbinfo->si_xib);
26143 +       sbinfo->si_xib = NULL;
26144 +       sbinfo->si_xread = NULL;
26145 +       sbinfo->si_xwrite = NULL;
26146 + out:
26147 +       return err;
26148 +}
26149 +
26150 +/* xino for each branch */
26151 +static void xino_clear_br(struct super_block *sb)
26152 +{
26153 +       aufs_bindex_t bindex, bend;
26154 +       struct au_branch *br;
26155 +
26156 +       bend = au_sbend(sb);
26157 +       for (bindex = 0; bindex <= bend; bindex++) {
26158 +               br = au_sbr(sb, bindex);
26159 +               if (!br || !br->br_xino.xi_file)
26160 +                       continue;
26161 +
26162 +               fput(br->br_xino.xi_file);
26163 +               br->br_xino.xi_file = NULL;
26164 +       }
26165 +}
26166 +
26167 +static int au_xino_set_br(struct super_block *sb, struct file *base)
26168 +{
26169 +       int err;
26170 +       ino_t ino;
26171 +       aufs_bindex_t bindex, bend, bshared;
26172 +       struct {
26173 +               struct file *old, *new;
26174 +       } *fpair, *p;
26175 +       struct au_branch *br;
26176 +       struct inode *inode;
26177 +       au_writef_t writef;
26178 +
26179 +       SiMustWriteLock(sb);
26180 +
26181 +       err = -ENOMEM;
26182 +       bend = au_sbend(sb);
26183 +       fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
26184 +       if (unlikely(!fpair))
26185 +               goto out;
26186 +
26187 +       inode = sb->s_root->d_inode;
26188 +       ino = AUFS_ROOT_INO;
26189 +       writef = au_sbi(sb)->si_xwrite;
26190 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
26191 +               br = au_sbr(sb, bindex);
26192 +               bshared = is_sb_shared(sb, bindex, bindex - 1);
26193 +               if (bshared >= 0) {
26194 +                       /* shared xino */
26195 +                       *p = fpair[bshared];
26196 +                       get_file(p->new);
26197 +               }
26198 +
26199 +               if (!p->new) {
26200 +                       /* new xino */
26201 +                       p->old = br->br_xino.xi_file;
26202 +                       p->new = au_xino_create2(base, br->br_xino.xi_file);
26203 +                       err = PTR_ERR(p->new);
26204 +                       if (IS_ERR(p->new)) {
26205 +                               p->new = NULL;
26206 +                               goto out_pair;
26207 +                       }
26208 +               }
26209 +
26210 +               err = au_xino_do_write(writef, p->new,
26211 +                                      au_h_iptr(inode, bindex)->i_ino, ino);
26212 +               if (unlikely(err))
26213 +                       goto out_pair;
26214 +       }
26215 +
26216 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
26217 +               br = au_sbr(sb, bindex);
26218 +               if (br->br_xino.xi_file)
26219 +                       fput(br->br_xino.xi_file);
26220 +               get_file(p->new);
26221 +               br->br_xino.xi_file = p->new;
26222 +       }
26223 +
26224 + out_pair:
26225 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
26226 +               if (p->new)
26227 +                       fput(p->new);
26228 +               else
26229 +                       break;
26230 +       kfree(fpair);
26231 + out:
26232 +       return err;
26233 +}
26234 +
26235 +void au_xino_clr(struct super_block *sb)
26236 +{
26237 +       struct au_sbinfo *sbinfo;
26238 +
26239 +       au_xigen_clr(sb);
26240 +       xino_clear_xib(sb);
26241 +       xino_clear_br(sb);
26242 +       sbinfo = au_sbi(sb);
26243 +       /* lvalue, do not call au_mntflags() */
26244 +       au_opt_clr(sbinfo->si_mntflags, XINO);
26245 +}
26246 +
26247 +int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
26248 +{
26249 +       int err, skip;
26250 +       struct dentry *parent, *cur_parent;
26251 +       struct qstr *dname, *cur_name;
26252 +       struct file *cur_xino;
26253 +       struct inode *dir;
26254 +       struct au_sbinfo *sbinfo;
26255 +
26256 +       SiMustWriteLock(sb);
26257 +
26258 +       err = 0;
26259 +       sbinfo = au_sbi(sb);
26260 +       parent = dget_parent(xino->file->f_dentry);
26261 +       if (remount) {
26262 +               skip = 0;
26263 +               dname = &xino->file->f_dentry->d_name;
26264 +               cur_xino = sbinfo->si_xib;
26265 +               if (cur_xino) {
26266 +                       cur_parent = dget_parent(cur_xino->f_dentry);
26267 +                       cur_name = &cur_xino->f_dentry->d_name;
26268 +                       skip = (cur_parent == parent
26269 +                               && dname->len == cur_name->len
26270 +                               && !memcmp(dname->name, cur_name->name,
26271 +                                          dname->len));
26272 +                       dput(cur_parent);
26273 +               }
26274 +               if (skip)
26275 +                       goto out;
26276 +       }
26277 +
26278 +       au_opt_set(sbinfo->si_mntflags, XINO);
26279 +       dir = parent->d_inode;
26280 +       mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
26281 +       /* mnt_want_write() is unnecessary here */
26282 +       err = au_xino_set_xib(sb, xino->file);
26283 +       if (!err)
26284 +               err = au_xigen_set(sb, xino->file);
26285 +       if (!err)
26286 +               err = au_xino_set_br(sb, xino->file);
26287 +       mutex_unlock(&dir->i_mutex);
26288 +       if (!err)
26289 +               goto out; /* success */
26290 +
26291 +       /* reset all */
26292 +       AuIOErr("failed creating xino(%d).\n", err);
26293 +
26294 + out:
26295 +       dput(parent);
26296 +       return err;
26297 +}
26298 +
26299 +/* ---------------------------------------------------------------------- */
26300 +
26301 +/*
26302 + * create a xinofile at the default place/path.
26303 + */
26304 +struct file *au_xino_def(struct super_block *sb)
26305 +{
26306 +       struct file *file;
26307 +       char *page, *p;
26308 +       struct au_branch *br;
26309 +       struct super_block *h_sb;
26310 +       struct path path;
26311 +       aufs_bindex_t bend, bindex, bwr;
26312 +
26313 +       br = NULL;
26314 +       bend = au_sbend(sb);
26315 +       bwr = -1;
26316 +       for (bindex = 0; bindex <= bend; bindex++) {
26317 +               br = au_sbr(sb, bindex);
26318 +               if (au_br_writable(br->br_perm)
26319 +                   && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
26320 +                       bwr = bindex;
26321 +                       break;
26322 +               }
26323 +       }
26324 +
26325 +       if (bwr >= 0) {
26326 +               file = ERR_PTR(-ENOMEM);
26327 +               page = __getname();
26328 +               if (unlikely(!page))
26329 +                       goto out;
26330 +               path.mnt = br->br_mnt;
26331 +               path.dentry = au_h_dptr(sb->s_root, bwr);
26332 +               p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
26333 +               file = (void *)p;
26334 +               if (!IS_ERR(p)) {
26335 +                       strcat(p, "/" AUFS_XINO_FNAME);
26336 +                       AuDbg("%s\n", p);
26337 +                       file = au_xino_create(sb, p, /*silent*/0);
26338 +                       if (!IS_ERR(file))
26339 +                               au_xino_brid_set(sb, br->br_id);
26340 +               }
26341 +               __putname(page);
26342 +       } else {
26343 +               file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
26344 +               if (IS_ERR(file))
26345 +                       goto out;
26346 +               h_sb = file->f_dentry->d_sb;
26347 +               if (unlikely(au_test_fs_bad_xino(h_sb))) {
26348 +                       AuErr("xino doesn't support %s(%s)\n",
26349 +                             AUFS_XINO_DEFPATH, au_sbtype(h_sb));
26350 +                       fput(file);
26351 +                       file = ERR_PTR(-EINVAL);
26352 +               }
26353 +               if (!IS_ERR(file))
26354 +                       au_xino_brid_set(sb, -1);
26355 +       }
26356 +
26357 + out:
26358 +       return file;
26359 +}
26360 +
26361 +/* ---------------------------------------------------------------------- */
26362 +
26363 +int au_xino_path(struct seq_file *seq, struct file *file)
26364 +{
26365 +       int err;
26366 +
26367 +       err = au_seq_path(seq, &file->f_path);
26368 +       if (unlikely(err < 0))
26369 +               goto out;
26370 +
26371 +       err = 0;
26372 +#define Deleted "\\040(deleted)"
26373 +       seq->count -= sizeof(Deleted) - 1;
26374 +       AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
26375 +                        sizeof(Deleted) - 1));
26376 +#undef Deleted
26377 +
26378 + out:
26379 +       return err;
26380 +}
26381 diff --git a/fs/namei.c b/fs/namei.c
26382 index 4ea63ed..2759ad4 100644
26383 --- a/fs/namei.c
26384 +++ b/fs/namei.c
26385 @@ -1241,7 +1241,7 @@ out:
26386   * needs parent already locked. Doesn't follow mounts.
26387   * SMP-safe.
26388   */
26389 -static struct dentry *lookup_hash(struct nameidata *nd)
26390 +struct dentry *lookup_hash(struct nameidata *nd)
26391  {
26392         int err;
26393  
26394 @@ -1251,7 +1251,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
26395         return __lookup_hash(&nd->last, nd->path.dentry, nd);
26396  }
26397  
26398 -static int __lookup_one_len(const char *name, struct qstr *this,
26399 +int __lookup_one_len(const char *name, struct qstr *this,
26400                 struct dentry *base, int len)
26401  {
26402         unsigned long hash;
26403 diff --git a/fs/splice.c b/fs/splice.c
26404 index a1e701c..409245a 100644
26405 --- a/fs/splice.c
26406 +++ b/fs/splice.c
26407 @@ -887,8 +887,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
26408  /*
26409   * Attempt to initiate a splice from pipe to file.
26410   */
26411 -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
26412 -                          loff_t *ppos, size_t len, unsigned int flags)
26413 +long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
26414 +                   loff_t *ppos, size_t len, unsigned int flags)
26415  {
26416         int ret;
26417  
26418 @@ -911,9 +911,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
26419  /*
26420   * Attempt to initiate a splice from a file to a pipe.
26421   */
26422 -static long do_splice_to(struct file *in, loff_t *ppos,
26423 -                        struct pipe_inode_info *pipe, size_t len,
26424 -                        unsigned int flags)
26425 +long do_splice_to(struct file *in, loff_t *ppos,
26426 +                 struct pipe_inode_info *pipe, size_t len,
26427 +                 unsigned int flags)
26428  {
26429         int ret;
26430  
26431 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
26432 index b68ec09..22d182c 100644
26433 --- a/include/linux/Kbuild
26434 +++ b/include/linux/Kbuild
26435 @@ -34,6 +34,7 @@ header-y += atmppp.h
26436  header-y += atmsap.h
26437  header-y += atmsvc.h
26438  header-y += atm_zatm.h
26439 +header-y += aufs_type.h
26440  header-y += auto_fs4.h
26441  header-y += ax25.h
26442  header-y += b1lli.h
26443 diff --git a/include/linux/aufs_type.h b/include/linux/aufs_type.h
26444 new file mode 100644
26445 index 0000000..cf42f85
26446 --- /dev/null
26447 +++ b/include/linux/aufs_type.h
26448 @@ -0,0 +1,195 @@
26449 +/*
26450 + * Copyright (C) 2005-2009 Junjiro R. Okajima
26451 + *
26452 + * This program, aufs is free software; you can redistribute it and/or modify
26453 + * it under the terms of the GNU General Public License as published by
26454 + * the Free Software Foundation; either version 2 of the License, or
26455 + * (at your option) any later version.
26456 + *
26457 + * This program is distributed in the hope that it will be useful,
26458 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26459 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26460 + * GNU General Public License for more details.
26461 + *
26462 + * You should have received a copy of the GNU General Public License
26463 + * along with this program; if not, write to the Free Software
26464 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
26465 + */
26466 +
26467 +#ifndef __AUFS_TYPE_H__
26468 +#define __AUFS_TYPE_H__
26469 +
26470 +#include <linux/ioctl.h>
26471 +#include <linux/limits.h>
26472 +#include <linux/types.h>
26473 +
26474 +#define AUFS_VERSION   "2-27"
26475 +
26476 +/* todo? move this to linux-2.6.19/include/magic.h */
26477 +#define AUFS_SUPER_MAGIC       ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
26478 +
26479 +/* ---------------------------------------------------------------------- */
26480 +
26481 +#ifdef CONFIG_AUFS_BRANCH_MAX_127
26482 +typedef __s8 aufs_bindex_t;
26483 +#define AUFS_BRANCH_MAX 127
26484 +#else
26485 +typedef __s16 aufs_bindex_t;
26486 +#ifdef CONFIG_AUFS_BRANCH_MAX_511
26487 +#define AUFS_BRANCH_MAX 511
26488 +#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
26489 +#define AUFS_BRANCH_MAX 1023
26490 +#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
26491 +#define AUFS_BRANCH_MAX 32767
26492 +#endif
26493 +#endif
26494 +
26495 +#ifdef __KERNEL__
26496 +#ifndef AUFS_BRANCH_MAX
26497 +#error unknown CONFIG_AUFS_BRANCH_MAX value
26498 +#endif
26499 +#endif /* __KERNEL__ */
26500 +
26501 +/* ---------------------------------------------------------------------- */
26502 +
26503 +#define AUFS_NAME              "aufs"
26504 +#define AUFS_FSTYPE            AUFS_NAME
26505 +
26506 +#define AUFS_ROOT_INO          2
26507 +#define AUFS_FIRST_INO         11
26508 +
26509 +#define AUFS_WH_PFX            ".wh."
26510 +#define AUFS_WH_PFX_LEN                ((int)sizeof(AUFS_WH_PFX) - 1)
26511 +#define AUFS_WH_TMP_LEN                4
26512 +/* a limit for rmdir/rename a dir */
26513 +#define AUFS_MAX_NAMELEN       (NAME_MAX \
26514 +                               - AUFS_WH_PFX_LEN * 2   /* doubly whiteouted */\
26515 +                               - 1                     /* dot */\
26516 +                               - AUFS_WH_TMP_LEN)      /* hex */
26517 +#define AUFS_XINO_FNAME                "." AUFS_NAME ".xino"
26518 +#define AUFS_XINO_DEFPATH      "/tmp/" AUFS_XINO_FNAME
26519 +#define AUFS_XINO_TRUNC_INIT   64 /* blocks */
26520 +#define AUFS_XINO_TRUNC_STEP   4  /* blocks */
26521 +#define AUFS_DIRWH_DEF         3
26522 +#define AUFS_RDCACHE_DEF       10 /* seconds */
26523 +#define AUFS_RDBLK_DEF         512 /* bytes */
26524 +#define AUFS_RDHASH_DEF                32
26525 +#define AUFS_WKQ_NAME          AUFS_NAME "d"
26526 +#define AUFS_NWKQ_DEF          4
26527 +#define AUFS_MFS_SECOND_DEF    30 /* seconds */
26528 +#define AUFS_PLINK_WARN                100 /* number of plinks */
26529 +
26530 +#define AUFS_DIROPQ_NAME       AUFS_WH_PFX ".opq" /* whiteouted doubly */
26531 +#define AUFS_WH_DIROPQ         AUFS_WH_PFX AUFS_DIROPQ_NAME
26532 +
26533 +#define AUFS_BASE_NAME         AUFS_WH_PFX AUFS_NAME
26534 +#define AUFS_PLINKDIR_NAME     AUFS_WH_PFX "plnk"
26535 +#define AUFS_ORPHDIR_NAME      AUFS_WH_PFX "orph"
26536 +
26537 +/* doubly whiteouted */
26538 +#define AUFS_WH_BASE           AUFS_WH_PFX AUFS_BASE_NAME
26539 +#define AUFS_WH_PLINKDIR       AUFS_WH_PFX AUFS_PLINKDIR_NAME
26540 +#define AUFS_WH_ORPHDIR                AUFS_WH_PFX AUFS_ORPHDIR_NAME
26541 +
26542 +/* branch permission */
26543 +#define AUFS_BRPERM_RW         "rw"
26544 +#define AUFS_BRPERM_RO         "ro"
26545 +#define AUFS_BRPERM_RR         "rr"
26546 +#define AUFS_BRPERM_WH         "wh"
26547 +#define AUFS_BRPERM_NLWH       "nolwh"
26548 +#define AUFS_BRPERM_ROWH       AUFS_BRPERM_RO "+" AUFS_BRPERM_WH
26549 +#define AUFS_BRPERM_RRWH       AUFS_BRPERM_RR "+" AUFS_BRPERM_WH
26550 +#define AUFS_BRPERM_RWNLWH     AUFS_BRPERM_RW "+" AUFS_BRPERM_NLWH
26551 +
26552 +/* ---------------------------------------------------------------------- */
26553 +
26554 +/* ioctl */
26555 +enum {
26556 +       AuCtl_PLINK_MAINT,
26557 +       AuCtl_PLINK_CLEAN,
26558 +
26559 +       /* readdir in userspace */
26560 +       AuCtl_RDU,
26561 +       AuCtl_RDU_INO,
26562 +
26563 +       /* pathconf wrapper */
26564 +       AuCtl_WBR_FD
26565 +};
26566 +
26567 +/* borrowed from linux/include/linux/kernel.h */
26568 +#ifndef ALIGN
26569 +#define ALIGN(x, a)            __ALIGN_MASK(x, (typeof(x))(a)-1)
26570 +#define __ALIGN_MASK(x, mask)  (((x)+(mask))&~(mask))
26571 +#endif
26572 +
26573 +/* borrowed from linux/include/linux/compiler-gcc3.h */
26574 +#ifndef __aligned
26575 +#define __aligned(x)                   __attribute__((aligned(x)))
26576 +#define __packed                       __attribute__((packed))
26577 +#endif
26578 +
26579 +struct au_rdu_cookie {
26580 +       __u64           h_pos;
26581 +       __s16           bindex;
26582 +       __u8            flags;
26583 +       __u8            pad;
26584 +       __u32           generation;
26585 +} __aligned(8);
26586 +
26587 +struct au_rdu_ent {
26588 +       __u64           ino;
26589 +       __s16           bindex;
26590 +       __u8            type;
26591 +       __u8            nlen;
26592 +       __u8            wh;
26593 +       char            name[0];
26594 +} __aligned(8);
26595 +
26596 +static inline int au_rdu_len(int nlen)
26597 +{
26598 +       /* include the terminating NULL */
26599 +       return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
26600 +                    sizeof(__u64));
26601 +}
26602 +
26603 +union au_rdu_ent_ul {
26604 +       struct au_rdu_ent __user        *e;
26605 +       unsigned long                   ul;
26606 +};
26607 +
26608 +enum {
26609 +       AufsCtlRduV_SZ,
26610 +       AufsCtlRduV_SZ_PTR,
26611 +       AufsCtlRduV_End
26612 +};
26613 +
26614 +struct aufs_rdu {
26615 +       /* input */
26616 +       union {
26617 +               __u64           sz;     /* AuCtl_RDU */
26618 +               __u64           nent;   /* AuCtl_RDU_INO */
26619 +       };
26620 +       union au_rdu_ent_ul     ent;
26621 +       __u16                   verify[AufsCtlRduV_End];
26622 +
26623 +       /* input/output */
26624 +       __u32                   blk;
26625 +
26626 +       /* output */
26627 +       union au_rdu_ent_ul     tail;
26628 +       /* number of entries which were added in a single call */
26629 +       __u64                   rent;
26630 +       __u8                    full;
26631 +       __u8                    shwh;
26632 +
26633 +       struct au_rdu_cookie    cookie;
26634 +} __aligned(8);
26635 +
26636 +#define AuCtlType              'A'
26637 +#define AUFS_CTL_PLINK_MAINT   _IO(AuCtlType, AuCtl_PLINK_MAINT)
26638 +#define AUFS_CTL_PLINK_CLEAN   _IO(AuCtlType, AuCtl_PLINK_CLEAN)
26639 +#define AUFS_CTL_RDU           _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
26640 +#define AUFS_CTL_RDU_INO       _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
26641 +#define AUFS_CTL_WBR_FD                _IO(AuCtlType, AuCtl_WBR_FD)
26642 +
26643 +#endif /* __AUFS_TYPE_H__ */
26644 diff --git a/include/linux/namei.h b/include/linux/namei.h
26645 index 68f8c32..5522432 100644
26646 --- a/include/linux/namei.h
26647 +++ b/include/linux/namei.h
26648 @@ -71,6 +71,9 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry
26649  extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
26650  extern void release_open_intent(struct nameidata *);
26651  
26652 +extern struct dentry *lookup_hash(struct nameidata *nd);
26653 +extern int __lookup_one_len(const char *name, struct qstr *this,
26654 +                           struct dentry *base, int len);
26655  extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
26656  extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
26657  
26658 diff --git a/include/linux/splice.h b/include/linux/splice.h
26659 index 528dcb9..5123bc6 100644
26660 --- a/include/linux/splice.h
26661 +++ b/include/linux/splice.h
26662 @@ -71,4 +71,10 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
26663  extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
26664                                       splice_direct_actor *);
26665  
26666 +extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
26667 +                          loff_t *ppos, size_t len, unsigned int flags);
26668 +extern long do_splice_to(struct file *in, loff_t *ppos,
26669 +                        struct pipe_inode_info *pipe, size_t len,
26670 +                        unsigned int flags);
26671 +
26672  #endif
26673 diff --git a/scripts/basic/hash b/scripts/basic/hash
26674 new file mode 100644
26675 index 0000000000000000000000000000000000000000..666c5f8620e951471a4362046154a95395a50cc1
26676 GIT binary patch
26677 literal 6907
26678 zcmb_hYiwM_6`s3$*WTc_W59shUY9j>M6!;96GC{!ahzZ<Nr)W^gm77V*SibrU3d2`
26679 zBvL8@OImHWmLh@LQqdm~6{tc;2&#x@Na~1}sA>h&Mg>JF6xC_cDzq+5Y0CEdX71S6
26680 z2Gme7(z)k+GiPSbyv{uCeZBqb48ssQ_=H~&Sp_rL1>RMwQo4jCDn+{piB?g;c+xKD
26681 zfQ~^X1wbaq4{AaoP4}BY(n4So(^xi12ILi>dK5@veo?Q|W!#UDMnZ;W5bI^S4=e-_
26682 zLK<ljn8b9}L28G7JL)D`z$Be^(}zwJ@H0W@??@$wI#SUksbo68J)Fygm#Ms{TH3yT
26683 z!xovBb}&y6#5Pk#wXz;!Z(!S~&p=L0UD;+!nkVI_ZusB;qUQwa>0m)v?t*8K&<Hu~
26684 z;w!+37rFQo7heee-flC<_BV(+A#6K7nn~L^Cz5q+TcEkaY~~7K_iaRMG?}*ZxmZ-h
26685 zwkI7imQALe5#*ton`bnVOpEpXeQSE`W#Of&Iv?8QlWj8aWnYmP>XXSDwtBsSVj5#s
26686 z0o%<Vo54!(AUQgvnj9ThPYz?Z6h9m&oHVDHZR`CLo0^0uJ#?mAF7H0<1g5@@*r}KK
26687 zc^!Fg+4{yrdHETXTy7J{&phQe){vlh{1~#8+gMYAX~!oeW{nA&dHjIHtU1w4{4_AQ
26688 z=={qa#Xl7Ocxuz&=7fblOf1FcrkfLYK4=D~o`ll$o;d+F7fuFuU2UNx;Z&b!DI72#
26689 zYXK-9-b+@h-+efL;?)4tjW;ItDE++_4BvggX?%`#LRj#c8Dd;{5<1>6Q0fl_U*N2%
26690 z+h;s@z`3;eX5IcH&j-#GV$Zm9i738)@L0e&VjQg7x5g;^3Mxp>A1}UHc>mh1#mR}A
26691 zI}fvnYL$C&-TukWkBgHBkC_KQF^x&%)Ogdb@s>?>`?`(7L8Ew-*(dWyi@#Ud@0ci@
26692 ztlk@f?Xd3{dZM^CWKR9^({dS@({$=N$m9Fiit$(Ew?AZ}`^w)rQ#jdDgzd%KL)Aqm
26693 zRA1Z~YAU8e&81t-QbWmK_{9Gh%f-Mxf?h4%ZN4JDX9k~VOV1Sl`uE436?OXp3lO!c
26694 zSL82%e2$cjd=2C(Df^s`JyTne8^*BisY_u@sV7)8LsN&8jR7c1n+5QR-?@o@nwR*&
26695 ze~|cs>~@ULwh4CZ_)FhnnZVz^d0=WI#QP^+ZV&<;GBp7G@fVv>^LT;ZMXwz!`*xR|
26696 zQ0K>`-g?v*EcI5i+EOo#DL?!ywF<8qSNt=7v~Z|;>mByCiBd}lLyht|=l9a@6;79(
26697 z#?E)klchp6;RzEl`BQ~M=GJYT6|SycA4_4))J~XkE7k)2kAXF}B^QauR#|O1>#FEo
26698 zL(6t%mtSLDHJXX0Bcriv&Jlu<qAhxk$SSV_yX3sroNea7a*<Sn9(}+JVlj)CF!=N*
26699 z*TKL3<I{4+0q0mvgVPT_436T(O`s|e^J9+6K-*5ELG*(b2<sYtNr>}7J5fGm2b{i)
26700 zi<Nr^IYucnh<MnRp-rd2*8xNP(t-Pi&h$RY_#4QFJ+Bd@<6Ee+zNcrE)ee`x$m$9&
26701 z3wK%VTZZy!C$G?w&b&mO@VQ^LX4hGldG?xTZ|9dQ6XybY_;GwWE7UysCOGE_$4>{P
26702 zcOiyP>AjAAr}U0HpR`^p1kMlm5l|j9Z*O8!5u}t%gA>ho93Seg#VC>>3ZD_|A@fyA
26703 zWLB;qGAlb}T1Br+3(!Wt8NgO2e8Ip|j0{#TCNl%ifSG2c&#35!6!;oG#s%N!H{j>K
26704 zT0w8%8gm0A(~zzLQpZp}nTo1!v?I<Pbjh;t<tk!{la0kXl9#Pqfs@A2aK~^i-!YtZ
26705 zlHq}~$v0aV@(IjOt9!s&7Vcb${n*&0dS;t6hK;fQPh-+wg)v5h(N?>#%3pg?ZG+Kh
26706 z@Mz}?iK?n=5QScMMGO|)He=DpUBX|xs_M$BzJ=FT``||^E)jhkBBKE(#Vr;7;6=f<
26707 z;6gf_&&FJgSf7xiWc<6fM&Ni$ZviqyGM#k7!$3kj0P=|q_!{%ozU;6G;f@>Y5dj~a
26708 zq`%f^?P$IFw$^yOqr(?KB}m|E@VC1Oz6wNc|B9{PfVoz1Y55lzt*xy-oXA)`7n#3l
26709 zCMK82WF0G?&L!jNSk&U&S5cJaEw@xA9k=9Xj{Xc7PDHYD7WVuPH+dF)J9lckLpEyw
26710 ztrL2GVps16v1fk+;s7w{bauj9AU6GoCtZYdJ4T(z5V(_7JfS625X+8<a603}!rg27
26711 zmN=2P2*=a;a3YdRh;VdA8krS$vMS}CST>i;q~`z|Y1vqc`bro}IU>yVh_Dmej_(Kn
26712 zk}}bV6A|H9!XC+DUl!qECzH*g0>$ncMnNbfGMXGl1sP<-CoD@-Ifru49?p!8#?Uw6
26713 zXly7Sw<Fm|Iv!&w5{~7ExaZH{WO^i{(;`Dd+1NcA;o1WeR4#Oh7Tw}h_ny`FB4}|g
26714 z(na$g$UC8TFOc&TX<V>$&eW54HY8Y(bx=Eny7zF&Q;+A4>!3#$%{T_?k?7e!#@T>+
26715 zJeN3bdGqzIL^{tV)Z=;OVd&in*?W)Uxupl>2T^YtdehM3cB2EJ4D|gxn^2GEl64Sx
26716 zX43UCo^%U{XBo;o&kRDaAbaZt6mif35cPN-av*Sm>+;xE;xUlc<N0Z)TORdTzt-CU
26717 znYOYXuA4`pw-mAt)MLQ%n$hFhc?)`*WQzJwk8~eMm&f-43!TI=bzt8y>;`E)uG5!b
26718 zSc^<iA6;+B)ngG{t4E;M^*MSEdGxArpsz-QbQx6F^3Pm7j-7l*fZmLQ@aX;Aqt}cx
26719 zU-K8}J?YWo<;?nmdY^_KlH{C`ZxjHNZXDRmcsvi8{X+YAC!YSC^4OOB9zC9$PeX5h
26720 z|55KXk6sh*QcYM?9Df~jzx)~z><8A%d#DAy&+C`NNT+=|Q122?gcje0f|jX!9SZZ#
26721 z7hU$B=jr_f32n^a23-c<Ews#}m!bEBCqj#SL(nqyaElQc3U1J8^gCK+(%aBm<cZLt
26722 z<?1bU^Phopc+r)~s7Jcg)ngpPbLiyNQc@q?-UdMSu@0x9cLfV@gBHm>%SOBdCwU5k
26723 zbur@kro(Zo2XT#i%R7XZipxD2b$L+xOrZZ|q<Y;`epX>s0R5@Lss{Q^h1IN~e^eNY
26724 z5`lhDVa{~r?-a%)L7-n#STgCq6lN>bJx^f_1_J$&!sv7a`W=PU45oikxQ0yLD->1_
26725 zlzMOX^L-0}enF+P=hXUFn4^OCM64r)dCL@n>sev7L+SgWU*1mD`c&!a7Rz<0@Ofl{
26726 z>rG+aVo}}<#vpluUtHip91p(E%?7{Vt~ne0b5=U_d9a-ge!+LJ+29x7m<5ph!jf}V
26727 zN1EcJvNY6<AolLb#4M5ik}3vn=8M5~e;IgTTm`KA%fODl9(a?!ZHXGeYlnQ_0yjjn
26728 zNP+Y13k>*tkpti5(Vt&G_mw73{XYbjcX64&Q9J^!$CodSPXK%Ec@}t&d)D*8p84&4
26729 z1L-=jy^{b-@tzj~r~Yrj`4+{QPy3I8>+v>x=ZR@x-9N(DAOcu;Ud*>@J%0>elV}Cj
26730 z^TR^>7X$Mxkt?YY^<72m_6Knf@YBGn-SiuQb+91ZJ`axpbN<s_zVF@(%zc)58bz)6
26731 zuB)%#i+>9Io=5)|z}mig*tZY3*&VM2-}zz!n9rWK$?|H%+rSL;O<X-60_*u+kN*4!
26732 z_>{-~e*w3<<uU!!dHMm@htcn|ymNtnpzZh7h{eEsZ>2A0|8xRtf6>SjoO?we<B1v<
26733 zJlw}}PJUzrqHXsKZrp75_YDl<t=qQO&Oz7rc#@(SJD$o6MN+mrdE1fvc7ap%SSsek
26734 zqT%H$yDk%BWSv5u)9q+}baaQVpm)RC+4415x4HYqURxtprsdhX<Y8ad#p?wu&hyO<
26735 z@9vp6ZUe$z-@kE9cfY-H-MWF^L3^-!O@A-j!PerCpN;IWW9jJJ2>G%v?6tRU=)SS9
26736 zNAQxMUhbhLpYic#V9U#g8_j1xTi$Nw{Tr~azsKGjizjoq67{4axg2f~_Q;r>xEBm2
26737 z=mOQh1pXITy?XY?z`RSOZO^eJn#-WhbTk#yrO+(4%l+vvrzjhj9M{_Ha@SVb@R+;<
26738 i@n6MU{p5<3E5h-ZV~-8nP9mSaJ3O>q$UhgVj{gExw*)=_
26739
26740 literal 0
26741 HcmV?d00001
26742
26743 -- 
26744 1.6.3.1
26745