Merge master.kernel.org:/home/rmk/linux-2.6-i2c manually
[pandora-kernel.git] / mm / msync.c
1 /*
2  *      linux/mm/msync.c
3  *
4  * Copyright (C) 1994-1999  Linus Torvalds
5  */
6
7 /*
8  * The msync() system call.
9  */
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14 #include <linux/hugetlb.h>
15 #include <linux/syscalls.h>
16
17 #include <asm/pgtable.h>
18 #include <asm/tlbflush.h>
19
20 /*
21  * Called with mm->page_table_lock held to protect against other
22  * threads/the swapper from ripping pte's out from under us.
23  */
24
25 static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
26                                 unsigned long addr, unsigned long end)
27 {
28         pte_t *pte;
29
30         pte = pte_offset_map(pmd, addr);
31         do {
32                 unsigned long pfn;
33                 struct page *page;
34
35                 if (!pte_present(*pte))
36                         continue;
37                 if (!pte_maybe_dirty(*pte))
38                         continue;
39                 pfn = pte_pfn(*pte);
40                 if (!pfn_valid(pfn))
41                         continue;
42                 page = pfn_to_page(pfn);
43                 if (PageReserved(page))
44                         continue;
45
46                 if (ptep_clear_flush_dirty(vma, addr, pte) ||
47                     page_test_and_clear_dirty(page))
48                         set_page_dirty(page);
49         } while (pte++, addr += PAGE_SIZE, addr != end);
50         pte_unmap(pte - 1);
51 }
52
53 static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
54                                 unsigned long addr, unsigned long end)
55 {
56         pmd_t *pmd;
57         unsigned long next;
58
59         pmd = pmd_offset(pud, addr);
60         do {
61                 next = pmd_addr_end(addr, end);
62                 if (pmd_none_or_clear_bad(pmd))
63                         continue;
64                 sync_pte_range(vma, pmd, addr, next);
65         } while (pmd++, addr = next, addr != end);
66 }
67
68 static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
69                                 unsigned long addr, unsigned long end)
70 {
71         pud_t *pud;
72         unsigned long next;
73
74         pud = pud_offset(pgd, addr);
75         do {
76                 next = pud_addr_end(addr, end);
77                 if (pud_none_or_clear_bad(pud))
78                         continue;
79                 sync_pmd_range(vma, pud, addr, next);
80         } while (pud++, addr = next, addr != end);
81 }
82
83 static void sync_page_range(struct vm_area_struct *vma,
84                                 unsigned long addr, unsigned long end)
85 {
86         struct mm_struct *mm = vma->vm_mm;
87         pgd_t *pgd;
88         unsigned long next;
89
90         /* For hugepages we can't go walking the page table normally,
91          * but that's ok, hugetlbfs is memory based, so we don't need
92          * to do anything more on an msync() */
93         if (is_vm_hugetlb_page(vma))
94                 return;
95
96         BUG_ON(addr >= end);
97         pgd = pgd_offset(mm, addr);
98         flush_cache_range(vma, addr, end);
99         spin_lock(&mm->page_table_lock);
100         do {
101                 next = pgd_addr_end(addr, end);
102                 if (pgd_none_or_clear_bad(pgd))
103                         continue;
104                 sync_pud_range(vma, pgd, addr, next);
105         } while (pgd++, addr = next, addr != end);
106         spin_unlock(&mm->page_table_lock);
107 }
108
109 #ifdef CONFIG_PREEMPT
110 static inline void filemap_sync(struct vm_area_struct *vma,
111                                 unsigned long addr, unsigned long end)
112 {
113         const size_t chunk = 64 * 1024; /* bytes */
114         unsigned long next;
115
116         do {
117                 next = addr + chunk;
118                 if (next > end || next < addr)
119                         next = end;
120                 sync_page_range(vma, addr, next);
121                 cond_resched();
122         } while (addr = next, addr != end);
123 }
124 #else
125 static inline void filemap_sync(struct vm_area_struct *vma,
126                                 unsigned long addr, unsigned long end)
127 {
128         sync_page_range(vma, addr, end);
129 }
130 #endif
131
132 /*
133  * MS_SYNC syncs the entire file - including mappings.
134  *
135  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).  Instead, it just
136  * marks the relevant pages dirty.  The application may now run fsync() to
137  * write out the dirty pages and wait on the writeout and check the result.
138  * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
139  * async writeout immediately.
140  * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
141  * applications.
142  */
143 static int msync_interval(struct vm_area_struct *vma,
144                         unsigned long addr, unsigned long end, int flags)
145 {
146         int ret = 0;
147         struct file *file = vma->vm_file;
148
149         if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
150                 return -EBUSY;
151
152         if (file && (vma->vm_flags & VM_SHARED)) {
153                 filemap_sync(vma, addr, end);
154
155                 if (flags & MS_SYNC) {
156                         struct address_space *mapping = file->f_mapping;
157                         int err;
158
159                         ret = filemap_fdatawrite(mapping);
160                         if (file->f_op && file->f_op->fsync) {
161                                 /*
162                                  * We don't take i_sem here because mmap_sem
163                                  * is already held.
164                                  */
165                                 err = file->f_op->fsync(file,file->f_dentry,1);
166                                 if (err && !ret)
167                                         ret = err;
168                         }
169                         err = filemap_fdatawait(mapping);
170                         if (!ret)
171                                 ret = err;
172                 }
173         }
174         return ret;
175 }
176
177 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
178 {
179         unsigned long end;
180         struct vm_area_struct *vma;
181         int unmapped_error, error = -EINVAL;
182
183         if (flags & MS_SYNC)
184                 current->flags |= PF_SYNCWRITE;
185
186         down_read(&current->mm->mmap_sem);
187         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
188                 goto out;
189         if (start & ~PAGE_MASK)
190                 goto out;
191         if ((flags & MS_ASYNC) && (flags & MS_SYNC))
192                 goto out;
193         error = -ENOMEM;
194         len = (len + ~PAGE_MASK) & PAGE_MASK;
195         end = start + len;
196         if (end < start)
197                 goto out;
198         error = 0;
199         if (end == start)
200                 goto out;
201         /*
202          * If the interval [start,end) covers some unmapped address ranges,
203          * just ignore them, but return -ENOMEM at the end.
204          */
205         vma = find_vma(current->mm, start);
206         unmapped_error = 0;
207         for (;;) {
208                 /* Still start < end. */
209                 error = -ENOMEM;
210                 if (!vma)
211                         goto out;
212                 /* Here start < vma->vm_end. */
213                 if (start < vma->vm_start) {
214                         unmapped_error = -ENOMEM;
215                         start = vma->vm_start;
216                 }
217                 /* Here vma->vm_start <= start < vma->vm_end. */
218                 if (end <= vma->vm_end) {
219                         if (start < end) {
220                                 error = msync_interval(vma, start, end, flags);
221                                 if (error)
222                                         goto out;
223                         }
224                         error = unmapped_error;
225                         goto out;
226                 }
227                 /* Here vma->vm_start <= start < vma->vm_end < end. */
228                 error = msync_interval(vma, start, vma->vm_end, flags);
229                 if (error)
230                         goto out;
231                 start = vma->vm_end;
232                 vma = vma->vm_next;
233         }
234 out:
235         up_read(&current->mm->mmap_sem);
236         current->flags &= ~PF_SYNCWRITE;
237         return error;
238 }