[XFS] Put the correct offset in dirent d_off
[pandora-kernel.git] / fs / xfs / linux-2.6 / xfs_file.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_bit.h"
20 #include "xfs_log.h"
21 #include "xfs_inum.h"
22 #include "xfs_sb.h"
23 #include "xfs_ag.h"
24 #include "xfs_dir2.h"
25 #include "xfs_trans.h"
26 #include "xfs_dmapi.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_alloc.h"
32 #include "xfs_btree.h"
33 #include "xfs_attr_sf.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_error.h"
38 #include "xfs_rw.h"
39 #include "xfs_ioctl32.h"
40 #include "xfs_vnodeops.h"
41
42 #include <linux/dcache.h>
43 #include <linux/smp_lock.h>
44
45 static struct vm_operations_struct xfs_file_vm_ops;
46 #ifdef CONFIG_XFS_DMAPI
47 static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48 #endif
49
50 STATIC_INLINE ssize_t
51 __xfs_file_read(
52         struct kiocb            *iocb,
53         const struct iovec      *iov,
54         unsigned long           nr_segs,
55         int                     ioflags,
56         loff_t                  pos)
57 {
58         struct file             *file = iocb->ki_filp;
59
60         BUG_ON(iocb->ki_pos != pos);
61         if (unlikely(file->f_flags & O_DIRECT))
62                 ioflags |= IO_ISDIRECT;
63         return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
64                                 nr_segs, &iocb->ki_pos, ioflags);
65 }
66
67 STATIC ssize_t
68 xfs_file_aio_read(
69         struct kiocb            *iocb,
70         const struct iovec      *iov,
71         unsigned long           nr_segs,
72         loff_t                  pos)
73 {
74         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
75 }
76
77 STATIC ssize_t
78 xfs_file_aio_read_invis(
79         struct kiocb            *iocb,
80         const struct iovec      *iov,
81         unsigned long           nr_segs,
82         loff_t                  pos)
83 {
84         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
85 }
86
87 STATIC_INLINE ssize_t
88 __xfs_file_write(
89         struct kiocb            *iocb,
90         const struct iovec      *iov,
91         unsigned long           nr_segs,
92         int                     ioflags,
93         loff_t                  pos)
94 {
95         struct file     *file = iocb->ki_filp;
96
97         BUG_ON(iocb->ki_pos != pos);
98         if (unlikely(file->f_flags & O_DIRECT))
99                 ioflags |= IO_ISDIRECT;
100         return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
101                                 &iocb->ki_pos, ioflags);
102 }
103
104 STATIC ssize_t
105 xfs_file_aio_write(
106         struct kiocb            *iocb,
107         const struct iovec      *iov,
108         unsigned long           nr_segs,
109         loff_t                  pos)
110 {
111         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
112 }
113
114 STATIC ssize_t
115 xfs_file_aio_write_invis(
116         struct kiocb            *iocb,
117         const struct iovec      *iov,
118         unsigned long           nr_segs,
119         loff_t                  pos)
120 {
121         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
122 }
123
124 STATIC ssize_t
125 xfs_file_splice_read(
126         struct file             *infilp,
127         loff_t                  *ppos,
128         struct pipe_inode_info  *pipe,
129         size_t                  len,
130         unsigned int            flags)
131 {
132         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
133                                    infilp, ppos, pipe, len, flags, 0);
134 }
135
136 STATIC ssize_t
137 xfs_file_splice_read_invis(
138         struct file             *infilp,
139         loff_t                  *ppos,
140         struct pipe_inode_info  *pipe,
141         size_t                  len,
142         unsigned int            flags)
143 {
144         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
145                                    infilp, ppos, pipe, len, flags, IO_INVIS);
146 }
147
148 STATIC ssize_t
149 xfs_file_splice_write(
150         struct pipe_inode_info  *pipe,
151         struct file             *outfilp,
152         loff_t                  *ppos,
153         size_t                  len,
154         unsigned int            flags)
155 {
156         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
157                                     pipe, outfilp, ppos, len, flags, 0);
158 }
159
160 STATIC ssize_t
161 xfs_file_splice_write_invis(
162         struct pipe_inode_info  *pipe,
163         struct file             *outfilp,
164         loff_t                  *ppos,
165         size_t                  len,
166         unsigned int            flags)
167 {
168         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
169                                     pipe, outfilp, ppos, len, flags, IO_INVIS);
170 }
171
172 STATIC int
173 xfs_file_open(
174         struct inode    *inode,
175         struct file     *filp)
176 {
177         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
178                 return -EFBIG;
179         return -xfs_open(XFS_I(inode));
180 }
181
182 STATIC int
183 xfs_file_release(
184         struct inode    *inode,
185         struct file     *filp)
186 {
187         return -xfs_release(XFS_I(inode));
188 }
189
190 STATIC int
191 xfs_file_fsync(
192         struct file     *filp,
193         struct dentry   *dentry,
194         int             datasync)
195 {
196         int             flags = FSYNC_WAIT;
197
198         if (datasync)
199                 flags |= FSYNC_DATA;
200         xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
201         return -xfs_fsync(XFS_I(dentry->d_inode), flags,
202                         (xfs_off_t)0, (xfs_off_t)-1);
203 }
204
205 #ifdef CONFIG_XFS_DMAPI
206 STATIC int
207 xfs_vm_fault(
208         struct vm_area_struct   *vma,
209         struct vm_fault *vmf)
210 {
211         struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
212         bhv_vnode_t     *vp = vn_from_inode(inode);
213
214         ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215         if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216                 return VM_FAULT_SIGBUS;
217         return filemap_fault(vma, vmf);
218 }
219 #endif /* CONFIG_XFS_DMAPI */
220
221 /*
222  * Unfortunately we can't just use the clean and simple readdir implementation
223  * below, because nfs might call back into ->lookup from the filldir callback
224  * and that will deadlock the low-level btree code.
225  *
226  * Hopefully we'll find a better workaround that allows to use the optimal
227  * version at least for local readdirs for 2.6.25.
228  */
229 #if 0
230 STATIC int
231 xfs_file_readdir(
232         struct file     *filp,
233         void            *dirent,
234         filldir_t       filldir)
235 {
236         struct inode    *inode = filp->f_path.dentry->d_inode;
237         xfs_inode_t     *ip = XFS_I(inode);
238         int             error;
239         size_t          bufsize;
240
241         /*
242          * The Linux API doesn't pass down the total size of the buffer
243          * we read into down to the filesystem.  With the filldir concept
244          * it's not needed for correct information, but the XFS dir2 leaf
245          * code wants an estimate of the buffer size to calculate it's
246          * readahead window and size the buffers used for mapping to
247          * physical blocks.
248          *
249          * Try to give it an estimate that's good enough, maybe at some
250          * point we can change the ->readdir prototype to include the
251          * buffer size.
252          */
253         bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
254
255         error = xfs_readdir(ip, dirent, bufsize,
256                                 (xfs_off_t *)&filp->f_pos, filldir);
257         if (error)
258                 return -error;
259         return 0;
260 }
261 #else
262
263 struct hack_dirent {
264         int             namlen;
265         loff_t          offset;
266         u64             ino;
267         unsigned int    d_type;
268         char            name[];
269 };
270
271 struct hack_callback {
272         char            *dirent;
273         size_t          len;
274         size_t          used;
275 };
276
277 STATIC int
278 xfs_hack_filldir(
279         void            *__buf,
280         const char      *name,
281         int             namlen,
282         loff_t          offset,
283         u64             ino,
284         unsigned int    d_type)
285 {
286         struct hack_callback *buf = __buf;
287         struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
288
289         if (buf->used + sizeof(struct hack_dirent) + namlen > buf->len)
290                 return -EINVAL;
291
292         de->namlen = namlen;
293         de->offset = offset;
294         de->ino = ino;
295         de->d_type = d_type;
296         memcpy(de->name, name, namlen);
297         buf->used += sizeof(struct hack_dirent) + namlen;
298         return 0;
299 }
300
301 STATIC int
302 xfs_file_readdir(
303         struct file     *filp,
304         void            *dirent,
305         filldir_t       filldir)
306 {
307         struct inode    *inode = filp->f_path.dentry->d_inode;
308         xfs_inode_t     *ip = XFS_I(inode);
309         struct hack_callback buf;
310         struct hack_dirent *de;
311         int             error;
312         loff_t          size;
313         int             eof = 0;
314         xfs_off_t       start_offset, curr_offset, offset;
315
316         /*
317          * Try fairly hard to get memory
318          */
319         buf.len = PAGE_CACHE_SIZE;
320         do {
321                 buf.dirent = kmalloc(buf.len, GFP_KERNEL);
322                 if (buf.dirent)
323                         break;
324                 buf.len >>= 1;
325         } while (buf.len >= 1024);
326
327         if (!buf.dirent)
328                 return -ENOMEM;
329
330         curr_offset = filp->f_pos;
331         if (curr_offset == 0x7fffffff)
332                 offset = 0xffffffff;
333         else
334                 offset = filp->f_pos;
335
336         while (!eof) {
337                 int reclen;
338                 start_offset = offset;
339
340                 buf.used = 0;
341                 error = -xfs_readdir(ip, &buf, buf.len, &offset,
342                                      xfs_hack_filldir);
343                 if (error || offset == start_offset) {
344                         size = 0;
345                         break;
346                 }
347
348                 size = buf.used;
349                 de = (struct hack_dirent *)buf.dirent;
350                 while (size > 0) {
351                         if (filldir(dirent, de->name, de->namlen,
352                                         curr_offset & 0x7fffffff,
353                                         de->ino, de->d_type)) {
354                                 goto done;
355                         }
356
357                         reclen = sizeof(struct hack_dirent) + de->namlen;
358                         size -= reclen;
359                         de = (struct hack_dirent *)((char *)de + reclen);
360                         curr_offset = de->offset /* & 0x7fffffff */;
361                 }
362         }
363
364  done:
365         if (!error) {
366                 if (size == 0)
367                         filp->f_pos = offset & 0x7fffffff;
368                 else if (de)
369                         filp->f_pos = curr_offset;
370         }
371
372         kfree(buf.dirent);
373         return error;
374 }
375 #endif
376
377 STATIC int
378 xfs_file_mmap(
379         struct file     *filp,
380         struct vm_area_struct *vma)
381 {
382         vma->vm_ops = &xfs_file_vm_ops;
383         vma->vm_flags |= VM_CAN_NONLINEAR;
384
385 #ifdef CONFIG_XFS_DMAPI
386         if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
387                 vma->vm_ops = &xfs_dmapi_file_vm_ops;
388 #endif /* CONFIG_XFS_DMAPI */
389
390         file_accessed(filp);
391         return 0;
392 }
393
394 STATIC long
395 xfs_file_ioctl(
396         struct file     *filp,
397         unsigned int    cmd,
398         unsigned long   p)
399 {
400         int             error;
401         struct inode    *inode = filp->f_path.dentry->d_inode;
402
403         error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
404         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
405
406         /* NOTE:  some of the ioctl's return positive #'s as a
407          *        byte count indicating success, such as
408          *        readlink_by_handle.  So we don't "sign flip"
409          *        like most other routines.  This means true
410          *        errors need to be returned as a negative value.
411          */
412         return error;
413 }
414
415 STATIC long
416 xfs_file_ioctl_invis(
417         struct file     *filp,
418         unsigned int    cmd,
419         unsigned long   p)
420 {
421         int             error;
422         struct inode    *inode = filp->f_path.dentry->d_inode;
423
424         error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
425         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
426
427         /* NOTE:  some of the ioctl's return positive #'s as a
428          *        byte count indicating success, such as
429          *        readlink_by_handle.  So we don't "sign flip"
430          *        like most other routines.  This means true
431          *        errors need to be returned as a negative value.
432          */
433         return error;
434 }
435
436 #ifdef CONFIG_XFS_DMAPI
437 #ifdef HAVE_VMOP_MPROTECT
438 STATIC int
439 xfs_vm_mprotect(
440         struct vm_area_struct *vma,
441         unsigned int    newflags)
442 {
443         struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
444         struct xfs_mount *mp = XFS_M(inode->i_sb);
445         int             error = 0;
446
447         if (mp->m_flags & XFS_MOUNT_DMAPI) {
448                 if ((vma->vm_flags & VM_MAYSHARE) &&
449                     (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
450                         error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
451         }
452         return error;
453 }
454 #endif /* HAVE_VMOP_MPROTECT */
455 #endif /* CONFIG_XFS_DMAPI */
456
457 #ifdef HAVE_FOP_OPEN_EXEC
458 /* If the user is attempting to execute a file that is offline then
459  * we have to trigger a DMAPI READ event before the file is marked as busy
460  * otherwise the invisible I/O will not be able to write to the file to bring
461  * it back online.
462  */
463 STATIC int
464 xfs_file_open_exec(
465         struct inode    *inode)
466 {
467         struct xfs_mount *mp = XFS_M(inode->i_sb);
468
469         if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
470                 if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
471                         bhv_vnode_t *vp = vn_from_inode(inode);
472
473                         return -XFS_SEND_DATA(mp, DM_EVENT_READ,
474                                                 vp, 0, 0, 0, NULL);
475                 }
476         }
477
478         return 0;
479 }
480 #endif /* HAVE_FOP_OPEN_EXEC */
481
482 /*
483  * mmap()d file has taken write protection fault and is being made
484  * writable. We can set the page state up correctly for a writable
485  * page, which means we can do correct delalloc accounting (ENOSPC
486  * checking!) and unwritten extent mapping.
487  */
488 STATIC int
489 xfs_vm_page_mkwrite(
490         struct vm_area_struct   *vma,
491         struct page             *page)
492 {
493         return block_page_mkwrite(vma, page, xfs_get_blocks);
494 }
495
496 const struct file_operations xfs_file_operations = {
497         .llseek         = generic_file_llseek,
498         .read           = do_sync_read,
499         .write          = do_sync_write,
500         .aio_read       = xfs_file_aio_read,
501         .aio_write      = xfs_file_aio_write,
502         .splice_read    = xfs_file_splice_read,
503         .splice_write   = xfs_file_splice_write,
504         .unlocked_ioctl = xfs_file_ioctl,
505 #ifdef CONFIG_COMPAT
506         .compat_ioctl   = xfs_file_compat_ioctl,
507 #endif
508         .mmap           = xfs_file_mmap,
509         .open           = xfs_file_open,
510         .release        = xfs_file_release,
511         .fsync          = xfs_file_fsync,
512 #ifdef HAVE_FOP_OPEN_EXEC
513         .open_exec      = xfs_file_open_exec,
514 #endif
515 };
516
517 const struct file_operations xfs_invis_file_operations = {
518         .llseek         = generic_file_llseek,
519         .read           = do_sync_read,
520         .write          = do_sync_write,
521         .aio_read       = xfs_file_aio_read_invis,
522         .aio_write      = xfs_file_aio_write_invis,
523         .splice_read    = xfs_file_splice_read_invis,
524         .splice_write   = xfs_file_splice_write_invis,
525         .unlocked_ioctl = xfs_file_ioctl_invis,
526 #ifdef CONFIG_COMPAT
527         .compat_ioctl   = xfs_file_compat_invis_ioctl,
528 #endif
529         .mmap           = xfs_file_mmap,
530         .open           = xfs_file_open,
531         .release        = xfs_file_release,
532         .fsync          = xfs_file_fsync,
533 };
534
535
536 const struct file_operations xfs_dir_file_operations = {
537         .read           = generic_read_dir,
538         .readdir        = xfs_file_readdir,
539         .unlocked_ioctl = xfs_file_ioctl,
540 #ifdef CONFIG_COMPAT
541         .compat_ioctl   = xfs_file_compat_ioctl,
542 #endif
543         .fsync          = xfs_file_fsync,
544 };
545
546 static struct vm_operations_struct xfs_file_vm_ops = {
547         .fault          = filemap_fault,
548         .page_mkwrite   = xfs_vm_page_mkwrite,
549 };
550
551 #ifdef CONFIG_XFS_DMAPI
552 static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
553         .fault          = xfs_vm_fault,
554         .page_mkwrite   = xfs_vm_page_mkwrite,
555 #ifdef HAVE_VMOP_MPROTECT
556         .mprotect       = xfs_vm_mprotect,
557 #endif
558 };
559 #endif /* CONFIG_XFS_DMAPI */