mm: fix aio performance regression for database caused by THP
[pandora-kernel.git] / mm / swap.c
1 /*
2  *  linux/mm/swap.c
3  *
4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
5  */
6
7 /*
8  * This file contains the default values for the operation of the
9  * Linux VM subsystem. Fine-tuning documentation can be found in
10  * Documentation/sysctl/vm.txt.
11  * Started 18.12.91
12  * Swap aging added 23.2.95, Stephen Tweedie.
13  * Buffermem limits added 12.3.98, Rik van Riel.
14  */
15
16 #include <linux/mm.h>
17 #include <linux/sched.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/swap.h>
20 #include <linux/mman.h>
21 #include <linux/pagemap.h>
22 #include <linux/pagevec.h>
23 #include <linux/init.h>
24 #include <linux/export.h>
25 #include <linux/mm_inline.h>
26 #include <linux/buffer_head.h>  /* for try_to_release_page() */
27 #include <linux/percpu_counter.h>
28 #include <linux/percpu.h>
29 #include <linux/cpu.h>
30 #include <linux/notifier.h>
31 #include <linux/backing-dev.h>
32 #include <linux/memcontrol.h>
33 #include <linux/gfp.h>
34 #include <linux/hugetlb.h>
35
36 #include "internal.h"
37
38 /* How many pages do we try to swap or page in/out together? */
39 int page_cluster;
40
41 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
42 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
43 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
44
45 /*
46  * This path almost never happens for VM activity - pages are normally
47  * freed via pagevecs.  But it gets used by networking.
48  */
49 static void __page_cache_release(struct page *page)
50 {
51         if (PageLRU(page)) {
52                 unsigned long flags;
53                 struct zone *zone = page_zone(page);
54
55                 spin_lock_irqsave(&zone->lru_lock, flags);
56                 VM_BUG_ON(!PageLRU(page));
57                 __ClearPageLRU(page);
58                 del_page_from_lru(zone, page);
59                 spin_unlock_irqrestore(&zone->lru_lock, flags);
60         }
61 }
62
63 static void __put_single_page(struct page *page)
64 {
65         __page_cache_release(page);
66         free_hot_cold_page(page, 0);
67 }
68
69 static void __put_compound_page(struct page *page)
70 {
71         compound_page_dtor *dtor;
72
73         if (!PageHuge(page))
74                 __page_cache_release(page);
75         dtor = get_compound_page_dtor(page);
76         (*dtor)(page);
77 }
78
79 static void put_compound_page(struct page *page)
80 {
81         /*
82          * hugetlbfs pages cannot be split from under us.  If this is a
83          * hugetlbfs page, check refcount on head page and release the page if
84          * the refcount becomes zero.
85          */
86         if (PageHuge(page)) {
87                 page = compound_head(page);
88                 if (put_page_testzero(page))
89                         __put_compound_page(page);
90                 return;
91         }
92
93         if (unlikely(PageTail(page))) {
94                 /* __split_huge_page_refcount can run under us */
95                 struct page *page_head = compound_trans_head(page);
96
97                 if (likely(page != page_head &&
98                            get_page_unless_zero(page_head))) {
99                         unsigned long flags;
100                         /*
101                          * page_head wasn't a dangling pointer but it
102                          * may not be a head page anymore by the time
103                          * we obtain the lock. That is ok as long as it
104                          * can't be freed from under us.
105                          */
106                         flags = compound_lock_irqsave(page_head);
107                         if (unlikely(!PageTail(page))) {
108                                 /* __split_huge_page_refcount run before us */
109                                 compound_unlock_irqrestore(page_head, flags);
110                                 VM_BUG_ON(PageHead(page_head));
111                                 if (put_page_testzero(page_head))
112                                         __put_single_page(page_head);
113                         out_put_single:
114                                 if (put_page_testzero(page))
115                                         __put_single_page(page);
116                                 return;
117                         }
118                         VM_BUG_ON(page_head != page->first_page);
119                         /*
120                          * We can release the refcount taken by
121                          * get_page_unless_zero() now that
122                          * __split_huge_page_refcount() is blocked on
123                          * the compound_lock.
124                          */
125                         if (put_page_testzero(page_head))
126                                 VM_BUG_ON(1);
127                         /* __split_huge_page_refcount will wait now */
128                         VM_BUG_ON(page_mapcount(page) <= 0);
129                         atomic_dec(&page->_mapcount);
130                         VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
131                         VM_BUG_ON(atomic_read(&page->_count) != 0);
132                         compound_unlock_irqrestore(page_head, flags);
133                         if (put_page_testzero(page_head)) {
134                                 if (PageHead(page_head))
135                                         __put_compound_page(page_head);
136                                 else
137                                         __put_single_page(page_head);
138                         }
139                 } else {
140                         /* page_head is a dangling pointer */
141                         VM_BUG_ON(PageTail(page));
142                         goto out_put_single;
143                 }
144         } else if (put_page_testzero(page)) {
145                 if (PageHead(page))
146                         __put_compound_page(page);
147                 else
148                         __put_single_page(page);
149         }
150 }
151
152 void put_page(struct page *page)
153 {
154         if (unlikely(PageCompound(page)))
155                 put_compound_page(page);
156         else if (put_page_testzero(page))
157                 __put_single_page(page);
158 }
159 EXPORT_SYMBOL(put_page);
160
161 /*
162  * This function is exported but must not be called by anything other
163  * than get_page(). It implements the slow path of get_page().
164  */
165 bool __get_page_tail(struct page *page)
166 {
167         /*
168          * This takes care of get_page() if run on a tail page
169          * returned by one of the get_user_pages/follow_page variants.
170          * get_user_pages/follow_page itself doesn't need the compound
171          * lock because it runs __get_page_tail_foll() under the
172          * proper PT lock that already serializes against
173          * split_huge_page().
174          */
175         unsigned long flags;
176         bool got = false;
177         struct page *page_head;
178
179         /*
180          * If this is a hugetlbfs page it cannot be split under us.  Simply
181          * increment refcount for the head page.
182          */
183         if (PageHuge(page)) {
184                 page_head = compound_head(page);
185                 atomic_inc(&page_head->_count);
186                 got = true;
187                 goto out;
188         }
189
190         page_head = compound_trans_head(page);
191         if (likely(page != page_head && get_page_unless_zero(page_head))) {
192                 /*
193                  * page_head wasn't a dangling pointer but it
194                  * may not be a head page anymore by the time
195                  * we obtain the lock. That is ok as long as it
196                  * can't be freed from under us.
197                  */
198                 flags = compound_lock_irqsave(page_head);
199                 /* here __split_huge_page_refcount won't run anymore */
200                 if (likely(PageTail(page))) {
201                         __get_page_tail_foll(page, false);
202                         got = true;
203                 }
204                 compound_unlock_irqrestore(page_head, flags);
205                 if (unlikely(!got))
206                         put_page(page_head);
207         }
208 out:
209         return got;
210 }
211 EXPORT_SYMBOL(__get_page_tail);
212
213 /**
214  * put_pages_list() - release a list of pages
215  * @pages: list of pages threaded on page->lru
216  *
217  * Release a list of pages which are strung together on page.lru.  Currently
218  * used by read_cache_pages() and related error recovery code.
219  */
220 void put_pages_list(struct list_head *pages)
221 {
222         while (!list_empty(pages)) {
223                 struct page *victim;
224
225                 victim = list_entry(pages->prev, struct page, lru);
226                 list_del(&victim->lru);
227                 page_cache_release(victim);
228         }
229 }
230 EXPORT_SYMBOL(put_pages_list);
231
232 static void pagevec_lru_move_fn(struct pagevec *pvec,
233                                 void (*move_fn)(struct page *page, void *arg),
234                                 void *arg)
235 {
236         int i;
237         struct zone *zone = NULL;
238         unsigned long flags = 0;
239
240         for (i = 0; i < pagevec_count(pvec); i++) {
241                 struct page *page = pvec->pages[i];
242                 struct zone *pagezone = page_zone(page);
243
244                 if (pagezone != zone) {
245                         if (zone)
246                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
247                         zone = pagezone;
248                         spin_lock_irqsave(&zone->lru_lock, flags);
249                 }
250
251                 (*move_fn)(page, arg);
252         }
253         if (zone)
254                 spin_unlock_irqrestore(&zone->lru_lock, flags);
255         release_pages(pvec->pages, pvec->nr, pvec->cold);
256         pagevec_reinit(pvec);
257 }
258
259 static void pagevec_move_tail_fn(struct page *page, void *arg)
260 {
261         int *pgmoved = arg;
262         struct zone *zone = page_zone(page);
263
264         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
265                 enum lru_list lru = page_lru_base_type(page);
266                 list_move_tail(&page->lru, &zone->lru[lru].list);
267                 mem_cgroup_rotate_reclaimable_page(page);
268                 (*pgmoved)++;
269         }
270 }
271
272 /*
273  * pagevec_move_tail() must be called with IRQ disabled.
274  * Otherwise this may cause nasty races.
275  */
276 static void pagevec_move_tail(struct pagevec *pvec)
277 {
278         int pgmoved = 0;
279
280         pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
281         __count_vm_events(PGROTATED, pgmoved);
282 }
283
284 /*
285  * Writeback is about to end against a page which has been marked for immediate
286  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
287  * inactive list.
288  */
289 void rotate_reclaimable_page(struct page *page)
290 {
291         if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
292             !PageUnevictable(page) && PageLRU(page)) {
293                 struct pagevec *pvec;
294                 unsigned long flags;
295
296                 page_cache_get(page);
297                 local_irq_save(flags);
298                 pvec = &__get_cpu_var(lru_rotate_pvecs);
299                 if (!pagevec_add(pvec, page))
300                         pagevec_move_tail(pvec);
301                 local_irq_restore(flags);
302         }
303 }
304
305 static void update_page_reclaim_stat(struct zone *zone, struct page *page,
306                                      int file, int rotated)
307 {
308         struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
309         struct zone_reclaim_stat *memcg_reclaim_stat;
310
311         memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
312
313         reclaim_stat->recent_scanned[file]++;
314         if (rotated)
315                 reclaim_stat->recent_rotated[file]++;
316
317         if (!memcg_reclaim_stat)
318                 return;
319
320         memcg_reclaim_stat->recent_scanned[file]++;
321         if (rotated)
322                 memcg_reclaim_stat->recent_rotated[file]++;
323 }
324
325 static void __activate_page(struct page *page, void *arg)
326 {
327         struct zone *zone = page_zone(page);
328
329         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
330                 int file = page_is_file_cache(page);
331                 int lru = page_lru_base_type(page);
332                 del_page_from_lru_list(zone, page, lru);
333
334                 SetPageActive(page);
335                 lru += LRU_ACTIVE;
336                 add_page_to_lru_list(zone, page, lru);
337                 __count_vm_event(PGACTIVATE);
338
339                 update_page_reclaim_stat(zone, page, file, 1);
340         }
341 }
342
343 #ifdef CONFIG_SMP
344 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
345
346 static void activate_page_drain(int cpu)
347 {
348         struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
349
350         if (pagevec_count(pvec))
351                 pagevec_lru_move_fn(pvec, __activate_page, NULL);
352 }
353
354 void activate_page(struct page *page)
355 {
356         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
357                 struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
358
359                 page_cache_get(page);
360                 if (!pagevec_add(pvec, page))
361                         pagevec_lru_move_fn(pvec, __activate_page, NULL);
362                 put_cpu_var(activate_page_pvecs);
363         }
364 }
365
366 #else
367 static inline void activate_page_drain(int cpu)
368 {
369 }
370
371 void activate_page(struct page *page)
372 {
373         struct zone *zone = page_zone(page);
374
375         spin_lock_irq(&zone->lru_lock);
376         __activate_page(page, NULL);
377         spin_unlock_irq(&zone->lru_lock);
378 }
379 #endif
380
381 /*
382  * Mark a page as having seen activity.
383  *
384  * inactive,unreferenced        ->      inactive,referenced
385  * inactive,referenced          ->      active,unreferenced
386  * active,unreferenced          ->      active,referenced
387  */
388 void mark_page_accessed(struct page *page)
389 {
390         if (!PageActive(page) && !PageUnevictable(page) &&
391                         PageReferenced(page) && PageLRU(page)) {
392                 activate_page(page);
393                 ClearPageReferenced(page);
394         } else if (!PageReferenced(page)) {
395                 SetPageReferenced(page);
396         }
397 }
398
399 EXPORT_SYMBOL(mark_page_accessed);
400
401 void __lru_cache_add(struct page *page, enum lru_list lru)
402 {
403         struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
404
405         page_cache_get(page);
406         if (!pagevec_add(pvec, page))
407                 ____pagevec_lru_add(pvec, lru);
408         put_cpu_var(lru_add_pvecs);
409 }
410 EXPORT_SYMBOL(__lru_cache_add);
411
412 /**
413  * lru_cache_add_lru - add a page to a page list
414  * @page: the page to be added to the LRU.
415  * @lru: the LRU list to which the page is added.
416  */
417 void lru_cache_add_lru(struct page *page, enum lru_list lru)
418 {
419         if (PageActive(page)) {
420                 VM_BUG_ON(PageUnevictable(page));
421                 ClearPageActive(page);
422         } else if (PageUnevictable(page)) {
423                 VM_BUG_ON(PageActive(page));
424                 ClearPageUnevictable(page);
425         }
426
427         VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
428         __lru_cache_add(page, lru);
429 }
430
431 /**
432  * add_page_to_unevictable_list - add a page to the unevictable list
433  * @page:  the page to be added to the unevictable list
434  *
435  * Add page directly to its zone's unevictable list.  To avoid races with
436  * tasks that might be making the page evictable, through eg. munlock,
437  * munmap or exit, while it's not on the lru, we want to add the page
438  * while it's locked or otherwise "invisible" to other tasks.  This is
439  * difficult to do when using the pagevec cache, so bypass that.
440  */
441 void add_page_to_unevictable_list(struct page *page)
442 {
443         struct zone *zone = page_zone(page);
444
445         spin_lock_irq(&zone->lru_lock);
446         SetPageUnevictable(page);
447         SetPageLRU(page);
448         add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
449         spin_unlock_irq(&zone->lru_lock);
450 }
451
452 /*
453  * If the page can not be invalidated, it is moved to the
454  * inactive list to speed up its reclaim.  It is moved to the
455  * head of the list, rather than the tail, to give the flusher
456  * threads some time to write it out, as this is much more
457  * effective than the single-page writeout from reclaim.
458  *
459  * If the page isn't page_mapped and dirty/writeback, the page
460  * could reclaim asap using PG_reclaim.
461  *
462  * 1. active, mapped page -> none
463  * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
464  * 3. inactive, mapped page -> none
465  * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
466  * 5. inactive, clean -> inactive, tail
467  * 6. Others -> none
468  *
469  * In 4, why it moves inactive's head, the VM expects the page would
470  * be write it out by flusher threads as this is much more effective
471  * than the single-page writeout from reclaim.
472  */
473 static void lru_deactivate_fn(struct page *page, void *arg)
474 {
475         int lru, file;
476         bool active;
477         struct zone *zone = page_zone(page);
478
479         if (!PageLRU(page))
480                 return;
481
482         if (PageUnevictable(page))
483                 return;
484
485         /* Some processes are using the page */
486         if (page_mapped(page))
487                 return;
488
489         active = PageActive(page);
490
491         file = page_is_file_cache(page);
492         lru = page_lru_base_type(page);
493         del_page_from_lru_list(zone, page, lru + active);
494         ClearPageActive(page);
495         ClearPageReferenced(page);
496         add_page_to_lru_list(zone, page, lru);
497
498         if (PageWriteback(page) || PageDirty(page)) {
499                 /*
500                  * PG_reclaim could be raced with end_page_writeback
501                  * It can make readahead confusing.  But race window
502                  * is _really_ small and  it's non-critical problem.
503                  */
504                 SetPageReclaim(page);
505         } else {
506                 /*
507                  * The page's writeback ends up during pagevec
508                  * We moves tha page into tail of inactive.
509                  */
510                 list_move_tail(&page->lru, &zone->lru[lru].list);
511                 mem_cgroup_rotate_reclaimable_page(page);
512                 __count_vm_event(PGROTATED);
513         }
514
515         if (active)
516                 __count_vm_event(PGDEACTIVATE);
517         update_page_reclaim_stat(zone, page, file, 0);
518 }
519
520 /*
521  * Drain pages out of the cpu's pagevecs.
522  * Either "cpu" is the current CPU, and preemption has already been
523  * disabled; or "cpu" is being hot-unplugged, and is already dead.
524  */
525 static void drain_cpu_pagevecs(int cpu)
526 {
527         struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
528         struct pagevec *pvec;
529         int lru;
530
531         for_each_lru(lru) {
532                 pvec = &pvecs[lru - LRU_BASE];
533                 if (pagevec_count(pvec))
534                         ____pagevec_lru_add(pvec, lru);
535         }
536
537         pvec = &per_cpu(lru_rotate_pvecs, cpu);
538         if (pagevec_count(pvec)) {
539                 unsigned long flags;
540
541                 /* No harm done if a racing interrupt already did this */
542                 local_irq_save(flags);
543                 pagevec_move_tail(pvec);
544                 local_irq_restore(flags);
545         }
546
547         pvec = &per_cpu(lru_deactivate_pvecs, cpu);
548         if (pagevec_count(pvec))
549                 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
550
551         activate_page_drain(cpu);
552 }
553
554 /**
555  * deactivate_page - forcefully deactivate a page
556  * @page: page to deactivate
557  *
558  * This function hints the VM that @page is a good reclaim candidate,
559  * for example if its invalidation fails due to the page being dirty
560  * or under writeback.
561  */
562 void deactivate_page(struct page *page)
563 {
564         /*
565          * In a workload with many unevictable page such as mprotect, unevictable
566          * page deactivation for accelerating reclaim is pointless.
567          */
568         if (PageUnevictable(page))
569                 return;
570
571         if (likely(get_page_unless_zero(page))) {
572                 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
573
574                 if (!pagevec_add(pvec, page))
575                         pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
576                 put_cpu_var(lru_deactivate_pvecs);
577         }
578 }
579
580 void lru_add_drain(void)
581 {
582         drain_cpu_pagevecs(get_cpu());
583         put_cpu();
584 }
585
586 static void lru_add_drain_per_cpu(struct work_struct *dummy)
587 {
588         lru_add_drain();
589 }
590
591 /*
592  * Returns 0 for success
593  */
594 int lru_add_drain_all(void)
595 {
596         return schedule_on_each_cpu(lru_add_drain_per_cpu);
597 }
598
599 /*
600  * Batched page_cache_release().  Decrement the reference count on all the
601  * passed pages.  If it fell to zero then remove the page from the LRU and
602  * free it.
603  *
604  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
605  * for the remainder of the operation.
606  *
607  * The locking in this function is against shrink_inactive_list(): we recheck
608  * the page count inside the lock to see whether shrink_inactive_list()
609  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
610  * will free it.
611  */
612 void release_pages(struct page **pages, int nr, int cold)
613 {
614         int i;
615         struct pagevec pages_to_free;
616         struct zone *zone = NULL;
617         unsigned long uninitialized_var(flags);
618
619         pagevec_init(&pages_to_free, cold);
620         for (i = 0; i < nr; i++) {
621                 struct page *page = pages[i];
622
623                 if (unlikely(PageCompound(page))) {
624                         if (zone) {
625                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
626                                 zone = NULL;
627                         }
628                         put_compound_page(page);
629                         continue;
630                 }
631
632                 if (!put_page_testzero(page))
633                         continue;
634
635                 if (PageLRU(page)) {
636                         struct zone *pagezone = page_zone(page);
637
638                         if (pagezone != zone) {
639                                 if (zone)
640                                         spin_unlock_irqrestore(&zone->lru_lock,
641                                                                         flags);
642                                 zone = pagezone;
643                                 spin_lock_irqsave(&zone->lru_lock, flags);
644                         }
645                         VM_BUG_ON(!PageLRU(page));
646                         __ClearPageLRU(page);
647                         del_page_from_lru(zone, page);
648                 }
649
650                 if (!pagevec_add(&pages_to_free, page)) {
651                         if (zone) {
652                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
653                                 zone = NULL;
654                         }
655                         __pagevec_free(&pages_to_free);
656                         pagevec_reinit(&pages_to_free);
657                 }
658         }
659         if (zone)
660                 spin_unlock_irqrestore(&zone->lru_lock, flags);
661
662         pagevec_free(&pages_to_free);
663 }
664 EXPORT_SYMBOL(release_pages);
665
666 /*
667  * The pages which we're about to release may be in the deferred lru-addition
668  * queues.  That would prevent them from really being freed right now.  That's
669  * OK from a correctness point of view but is inefficient - those pages may be
670  * cache-warm and we want to give them back to the page allocator ASAP.
671  *
672  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
673  * and __pagevec_lru_add_active() call release_pages() directly to avoid
674  * mutual recursion.
675  */
676 void __pagevec_release(struct pagevec *pvec)
677 {
678         lru_add_drain();
679         release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
680         pagevec_reinit(pvec);
681 }
682
683 EXPORT_SYMBOL(__pagevec_release);
684
685 /* used by __split_huge_page_refcount() */
686 void lru_add_page_tail(struct zone* zone,
687                        struct page *page, struct page *page_tail)
688 {
689         int active;
690         enum lru_list lru;
691         const int file = 0;
692         struct list_head *head;
693
694         VM_BUG_ON(!PageHead(page));
695         VM_BUG_ON(PageCompound(page_tail));
696         VM_BUG_ON(PageLRU(page_tail));
697         VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
698
699         SetPageLRU(page_tail);
700
701         if (page_evictable(page_tail, NULL)) {
702                 if (PageActive(page)) {
703                         SetPageActive(page_tail);
704                         active = 1;
705                         lru = LRU_ACTIVE_ANON;
706                 } else {
707                         active = 0;
708                         lru = LRU_INACTIVE_ANON;
709                 }
710                 update_page_reclaim_stat(zone, page_tail, file, active);
711                 if (likely(PageLRU(page)))
712                         head = page->lru.prev;
713                 else
714                         head = &zone->lru[lru].list;
715                 __add_page_to_lru_list(zone, page_tail, lru, head);
716         } else {
717                 SetPageUnevictable(page_tail);
718                 add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
719         }
720 }
721
722 static void ____pagevec_lru_add_fn(struct page *page, void *arg)
723 {
724         enum lru_list lru = (enum lru_list)arg;
725         struct zone *zone = page_zone(page);
726         int file = is_file_lru(lru);
727         int active = is_active_lru(lru);
728
729         VM_BUG_ON(PageActive(page));
730         VM_BUG_ON(PageUnevictable(page));
731         VM_BUG_ON(PageLRU(page));
732
733         SetPageLRU(page);
734         if (active)
735                 SetPageActive(page);
736         update_page_reclaim_stat(zone, page, file, active);
737         add_page_to_lru_list(zone, page, lru);
738 }
739
740 /*
741  * Add the passed pages to the LRU, then drop the caller's refcount
742  * on them.  Reinitialises the caller's pagevec.
743  */
744 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
745 {
746         VM_BUG_ON(is_unevictable_lru(lru));
747
748         pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
749 }
750
751 EXPORT_SYMBOL(____pagevec_lru_add);
752
753 /*
754  * Try to drop buffers from the pages in a pagevec
755  */
756 void pagevec_strip(struct pagevec *pvec)
757 {
758         int i;
759
760         for (i = 0; i < pagevec_count(pvec); i++) {
761                 struct page *page = pvec->pages[i];
762
763                 if (page_has_private(page) && trylock_page(page)) {
764                         if (page_has_private(page))
765                                 try_to_release_page(page, 0);
766                         unlock_page(page);
767                 }
768         }
769 }
770
771 /**
772  * pagevec_lookup - gang pagecache lookup
773  * @pvec:       Where the resulting pages are placed
774  * @mapping:    The address_space to search
775  * @start:      The starting page index
776  * @nr_pages:   The maximum number of pages
777  *
778  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
779  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
780  * reference against the pages in @pvec.
781  *
782  * The search returns a group of mapping-contiguous pages with ascending
783  * indexes.  There may be holes in the indices due to not-present pages.
784  *
785  * pagevec_lookup() returns the number of pages which were found.
786  */
787 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
788                 pgoff_t start, unsigned nr_pages)
789 {
790         pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
791         return pagevec_count(pvec);
792 }
793
794 EXPORT_SYMBOL(pagevec_lookup);
795
796 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
797                 pgoff_t *index, int tag, unsigned nr_pages)
798 {
799         pvec->nr = find_get_pages_tag(mapping, index, tag,
800                                         nr_pages, pvec->pages);
801         return pagevec_count(pvec);
802 }
803
804 EXPORT_SYMBOL(pagevec_lookup_tag);
805
806 /*
807  * Perform any setup for the swap system
808  */
809 void __init swap_setup(void)
810 {
811         unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
812
813 #ifdef CONFIG_SWAP
814         bdi_init(swapper_space.backing_dev_info);
815 #endif
816
817         /* Use a smaller cluster for small-memory machines */
818         if (megs < 16)
819                 page_cluster = 2;
820         else
821                 page_cluster = 3;
822         /*
823          * Right now other parts of the system means that we
824          * _really_ don't want to cluster much more
825          */
826 }