Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / mm / swap.c
1 /*
2  *  linux/mm/swap.c
3  *
4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
5  */
6
7 /*
8  * This file contains the default values for the operation of the
9  * Linux VM subsystem. Fine-tuning documentation can be found in
10  * Documentation/sysctl/vm.txt.
11  * Started 18.12.91
12  * Swap aging added 23.2.95, Stephen Tweedie.
13  * Buffermem limits added 12.3.98, Rik van Riel.
14  */
15
16 #include <linux/mm.h>
17 #include <linux/sched.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/swap.h>
20 #include <linux/mman.h>
21 #include <linux/pagemap.h>
22 #include <linux/pagevec.h>
23 #include <linux/init.h>
24 #include <linux/export.h>
25 #include <linux/mm_inline.h>
26 #include <linux/buffer_head.h>  /* for try_to_release_page() */
27 #include <linux/percpu_counter.h>
28 #include <linux/percpu.h>
29 #include <linux/cpu.h>
30 #include <linux/notifier.h>
31 #include <linux/backing-dev.h>
32 #include <linux/memcontrol.h>
33 #include <linux/gfp.h>
34 #include <linux/hugetlb.h>
35
36 #include "internal.h"
37
38 /* How many pages do we try to swap or page in/out together? */
39 int page_cluster;
40
41 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
42 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
43 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
44
45 /*
46  * This path almost never happens for VM activity - pages are normally
47  * freed via pagevecs.  But it gets used by networking.
48  */
49 static void __page_cache_release(struct page *page)
50 {
51         if (PageLRU(page)) {
52                 unsigned long flags;
53                 struct zone *zone = page_zone(page);
54
55                 spin_lock_irqsave(&zone->lru_lock, flags);
56                 VM_BUG_ON(!PageLRU(page));
57                 __ClearPageLRU(page);
58                 del_page_from_lru(zone, page);
59                 spin_unlock_irqrestore(&zone->lru_lock, flags);
60         }
61 }
62
63 static void __put_single_page(struct page *page)
64 {
65         __page_cache_release(page);
66         free_hot_cold_page(page, 0);
67 }
68
69 static void __put_compound_page(struct page *page)
70 {
71         compound_page_dtor *dtor;
72
73         if (!PageHuge(page))
74                 __page_cache_release(page);
75         dtor = get_compound_page_dtor(page);
76         (*dtor)(page);
77 }
78
79 static void put_compound_page(struct page *page)
80 {
81         if (unlikely(PageTail(page))) {
82                 /* __split_huge_page_refcount can run under us */
83                 struct page *page_head = compound_trans_head(page);
84
85                 if (likely(page != page_head &&
86                            get_page_unless_zero(page_head))) {
87                         unsigned long flags;
88
89                          if (PageHeadHuge(page_head)) {
90                                 if (likely(PageTail(page))) {
91                                         /*
92                                          * __split_huge_page_refcount
93                                          * cannot race here.
94                                          */
95                                         VM_BUG_ON(!PageHead(page_head));
96                                         atomic_dec(&page->_mapcount);
97                                         if (put_page_testzero(page_head))
98                                                 VM_BUG_ON(1);
99                                         if (put_page_testzero(page_head))
100                                                 __put_compound_page(page_head);
101                                         return;
102                                 } else {
103                                         /*
104                                          * __split_huge_page_refcount
105                                          * run before us, "page" was a
106                                          * THP tail. The split
107                                          * page_head has been freed
108                                          * and reallocated as slab or
109                                          * hugetlbfs page of smaller
110                                          * order (only possible if
111                                          * reallocated as slab on
112                                          * x86).
113                                          */
114                                         goto skip_lock;
115                                 }
116                         }
117                         /*
118                          * page_head wasn't a dangling pointer but it
119                          * may not be a head page anymore by the time
120                          * we obtain the lock. That is ok as long as it
121                          * can't be freed from under us.
122                          */
123                         flags = compound_lock_irqsave(page_head);
124                         if (unlikely(!PageTail(page))) {
125                                 /* __split_huge_page_refcount run before us */
126                                 compound_unlock_irqrestore(page_head, flags);
127                                 VM_BUG_ON(PageHead(page_head));
128 skip_lock:
129                                 if (put_page_testzero(page_head)) {
130                                         /*
131                                          * The head page may have been
132                                          * freed and reallocated as a
133                                          * compound page of smaller
134                                          * order and then freed again.
135                                          * All we know is that it
136                                          * cannot have become: a THP
137                                          * page, a compound page of
138                                          * higher order, a tail page.
139                                          * That is because we still
140                                          * hold the refcount of the
141                                          * split THP tail and
142                                          * page_head was the THP head
143                                          * before the split.
144                                          */
145                                         if (PageHead(page_head))
146                                                 __put_compound_page(page_head);
147                                         else
148                                                 __put_single_page(page_head);
149                                 }
150 out_put_single:
151                                 if (put_page_testzero(page))
152                                         __put_single_page(page);
153                                 return;
154                         }
155                         VM_BUG_ON(page_head != page->first_page);
156                         /*
157                          * We can release the refcount taken by
158                          * get_page_unless_zero() now that
159                          * __split_huge_page_refcount() is blocked on
160                          * the compound_lock.
161                          */
162                         if (put_page_testzero(page_head))
163                                 VM_BUG_ON(1);
164                         /* __split_huge_page_refcount will wait now */
165                         VM_BUG_ON(page_mapcount(page) <= 0);
166                         atomic_dec(&page->_mapcount);
167                         VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
168                         VM_BUG_ON(atomic_read(&page->_count) != 0);
169                         compound_unlock_irqrestore(page_head, flags);
170                         if (put_page_testzero(page_head)) {
171                                 if (PageHead(page_head))
172                                         __put_compound_page(page_head);
173                                 else
174                                         __put_single_page(page_head);
175                         }
176                 } else {
177                         /* page_head is a dangling pointer */
178                         VM_BUG_ON(PageTail(page));
179                         goto out_put_single;
180                 }
181         } else if (put_page_testzero(page)) {
182                 if (PageHead(page))
183                         __put_compound_page(page);
184                 else
185                         __put_single_page(page);
186         }
187 }
188
189 void put_page(struct page *page)
190 {
191         if (unlikely(PageCompound(page)))
192                 put_compound_page(page);
193         else if (put_page_testzero(page))
194                 __put_single_page(page);
195 }
196 EXPORT_SYMBOL(put_page);
197
198 /*
199  * This function is exported but must not be called by anything other
200  * than get_page(). It implements the slow path of get_page().
201  */
202 bool __get_page_tail(struct page *page)
203 {
204         /*
205          * This takes care of get_page() if run on a tail page
206          * returned by one of the get_user_pages/follow_page variants.
207          * get_user_pages/follow_page itself doesn't need the compound
208          * lock because it runs __get_page_tail_foll() under the
209          * proper PT lock that already serializes against
210          * split_huge_page().
211          */
212         unsigned long flags;
213         bool got = false;
214         struct page *page_head = compound_trans_head(page);
215
216         if (likely(page != page_head && get_page_unless_zero(page_head))) {
217                 /* Ref to put_compound_page() comment. */
218                 if (PageHeadHuge(page_head)) {
219                         if (likely(PageTail(page))) {
220                                 /*
221                                  * This is a hugetlbfs
222                                  * page. __split_huge_page_refcount
223                                  * cannot race here.
224                                  */
225                                 VM_BUG_ON(!PageHead(page_head));
226                                 __get_page_tail_foll(page, false);
227                                 return true;
228                         } else {
229                                 /*
230                                  * __split_huge_page_refcount run
231                                  * before us, "page" was a THP
232                                  * tail. The split page_head has been
233                                  * freed and reallocated as slab or
234                                  * hugetlbfs page of smaller order
235                                  * (only possible if reallocated as
236                                  * slab on x86).
237                                  */
238                                 put_page(page_head);
239                                 return false;
240                         }
241                 }
242                 /*
243                  * page_head wasn't a dangling pointer but it
244                  * may not be a head page anymore by the time
245                  * we obtain the lock. That is ok as long as it
246                  * can't be freed from under us.
247                  */
248                 flags = compound_lock_irqsave(page_head);
249                 /* here __split_huge_page_refcount won't run anymore */
250                 if (likely(PageTail(page))) {
251                         __get_page_tail_foll(page, false);
252                         got = true;
253                 }
254                 compound_unlock_irqrestore(page_head, flags);
255                 if (unlikely(!got))
256                         put_page(page_head);
257         }
258         return got;
259 }
260 EXPORT_SYMBOL(__get_page_tail);
261
262 /**
263  * put_pages_list() - release a list of pages
264  * @pages: list of pages threaded on page->lru
265  *
266  * Release a list of pages which are strung together on page.lru.  Currently
267  * used by read_cache_pages() and related error recovery code.
268  */
269 void put_pages_list(struct list_head *pages)
270 {
271         while (!list_empty(pages)) {
272                 struct page *victim;
273
274                 victim = list_entry(pages->prev, struct page, lru);
275                 list_del(&victim->lru);
276                 page_cache_release(victim);
277         }
278 }
279 EXPORT_SYMBOL(put_pages_list);
280
281 static void pagevec_lru_move_fn(struct pagevec *pvec,
282                                 void (*move_fn)(struct page *page, void *arg),
283                                 void *arg)
284 {
285         int i;
286         struct zone *zone = NULL;
287         unsigned long flags = 0;
288
289         for (i = 0; i < pagevec_count(pvec); i++) {
290                 struct page *page = pvec->pages[i];
291                 struct zone *pagezone = page_zone(page);
292
293                 if (pagezone != zone) {
294                         if (zone)
295                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
296                         zone = pagezone;
297                         spin_lock_irqsave(&zone->lru_lock, flags);
298                 }
299
300                 (*move_fn)(page, arg);
301         }
302         if (zone)
303                 spin_unlock_irqrestore(&zone->lru_lock, flags);
304         release_pages(pvec->pages, pvec->nr, pvec->cold);
305         pagevec_reinit(pvec);
306 }
307
308 static void pagevec_move_tail_fn(struct page *page, void *arg)
309 {
310         int *pgmoved = arg;
311         struct zone *zone = page_zone(page);
312
313         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
314                 enum lru_list lru = page_lru_base_type(page);
315                 list_move_tail(&page->lru, &zone->lru[lru].list);
316                 mem_cgroup_rotate_reclaimable_page(page);
317                 (*pgmoved)++;
318         }
319 }
320
321 /*
322  * pagevec_move_tail() must be called with IRQ disabled.
323  * Otherwise this may cause nasty races.
324  */
325 static void pagevec_move_tail(struct pagevec *pvec)
326 {
327         int pgmoved = 0;
328
329         pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
330         __count_vm_events(PGROTATED, pgmoved);
331 }
332
333 /*
334  * Writeback is about to end against a page which has been marked for immediate
335  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
336  * inactive list.
337  */
338 void rotate_reclaimable_page(struct page *page)
339 {
340         if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
341             !PageUnevictable(page) && PageLRU(page)) {
342                 struct pagevec *pvec;
343                 unsigned long flags;
344
345                 page_cache_get(page);
346                 local_irq_save(flags);
347                 pvec = &__get_cpu_var(lru_rotate_pvecs);
348                 if (!pagevec_add(pvec, page))
349                         pagevec_move_tail(pvec);
350                 local_irq_restore(flags);
351         }
352 }
353
354 static void update_page_reclaim_stat(struct zone *zone, struct page *page,
355                                      int file, int rotated)
356 {
357         struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
358         struct zone_reclaim_stat *memcg_reclaim_stat;
359
360         memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
361
362         reclaim_stat->recent_scanned[file]++;
363         if (rotated)
364                 reclaim_stat->recent_rotated[file]++;
365
366         if (!memcg_reclaim_stat)
367                 return;
368
369         memcg_reclaim_stat->recent_scanned[file]++;
370         if (rotated)
371                 memcg_reclaim_stat->recent_rotated[file]++;
372 }
373
374 static void __activate_page(struct page *page, void *arg)
375 {
376         struct zone *zone = page_zone(page);
377
378         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
379                 int file = page_is_file_cache(page);
380                 int lru = page_lru_base_type(page);
381                 del_page_from_lru_list(zone, page, lru);
382
383                 SetPageActive(page);
384                 lru += LRU_ACTIVE;
385                 add_page_to_lru_list(zone, page, lru);
386                 __count_vm_event(PGACTIVATE);
387
388                 update_page_reclaim_stat(zone, page, file, 1);
389         }
390 }
391
392 #ifdef CONFIG_SMP
393 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
394
395 static void activate_page_drain(int cpu)
396 {
397         struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
398
399         if (pagevec_count(pvec))
400                 pagevec_lru_move_fn(pvec, __activate_page, NULL);
401 }
402
403 void activate_page(struct page *page)
404 {
405         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
406                 struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
407
408                 page_cache_get(page);
409                 if (!pagevec_add(pvec, page))
410                         pagevec_lru_move_fn(pvec, __activate_page, NULL);
411                 put_cpu_var(activate_page_pvecs);
412         }
413 }
414
415 #else
416 static inline void activate_page_drain(int cpu)
417 {
418 }
419
420 void activate_page(struct page *page)
421 {
422         struct zone *zone = page_zone(page);
423
424         spin_lock_irq(&zone->lru_lock);
425         __activate_page(page, NULL);
426         spin_unlock_irq(&zone->lru_lock);
427 }
428 #endif
429
430 /*
431  * Mark a page as having seen activity.
432  *
433  * inactive,unreferenced        ->      inactive,referenced
434  * inactive,referenced          ->      active,unreferenced
435  * active,unreferenced          ->      active,referenced
436  */
437 void mark_page_accessed(struct page *page)
438 {
439         if (!PageActive(page) && !PageUnevictable(page) &&
440                         PageReferenced(page) && PageLRU(page)) {
441                 activate_page(page);
442                 ClearPageReferenced(page);
443         } else if (!PageReferenced(page)) {
444                 SetPageReferenced(page);
445         }
446 }
447
448 EXPORT_SYMBOL(mark_page_accessed);
449
450 void __lru_cache_add(struct page *page, enum lru_list lru)
451 {
452         struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
453
454         page_cache_get(page);
455         if (!pagevec_add(pvec, page))
456                 ____pagevec_lru_add(pvec, lru);
457         put_cpu_var(lru_add_pvecs);
458 }
459 EXPORT_SYMBOL(__lru_cache_add);
460
461 /**
462  * lru_cache_add_lru - add a page to a page list
463  * @page: the page to be added to the LRU.
464  * @lru: the LRU list to which the page is added.
465  */
466 void lru_cache_add_lru(struct page *page, enum lru_list lru)
467 {
468         if (PageActive(page)) {
469                 VM_BUG_ON(PageUnevictable(page));
470                 ClearPageActive(page);
471         } else if (PageUnevictable(page)) {
472                 VM_BUG_ON(PageActive(page));
473                 ClearPageUnevictable(page);
474         }
475
476         VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
477         __lru_cache_add(page, lru);
478 }
479
480 /**
481  * add_page_to_unevictable_list - add a page to the unevictable list
482  * @page:  the page to be added to the unevictable list
483  *
484  * Add page directly to its zone's unevictable list.  To avoid races with
485  * tasks that might be making the page evictable, through eg. munlock,
486  * munmap or exit, while it's not on the lru, we want to add the page
487  * while it's locked or otherwise "invisible" to other tasks.  This is
488  * difficult to do when using the pagevec cache, so bypass that.
489  */
490 void add_page_to_unevictable_list(struct page *page)
491 {
492         struct zone *zone = page_zone(page);
493
494         spin_lock_irq(&zone->lru_lock);
495         SetPageUnevictable(page);
496         SetPageLRU(page);
497         add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
498         spin_unlock_irq(&zone->lru_lock);
499 }
500
501 /*
502  * If the page can not be invalidated, it is moved to the
503  * inactive list to speed up its reclaim.  It is moved to the
504  * head of the list, rather than the tail, to give the flusher
505  * threads some time to write it out, as this is much more
506  * effective than the single-page writeout from reclaim.
507  *
508  * If the page isn't page_mapped and dirty/writeback, the page
509  * could reclaim asap using PG_reclaim.
510  *
511  * 1. active, mapped page -> none
512  * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
513  * 3. inactive, mapped page -> none
514  * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
515  * 5. inactive, clean -> inactive, tail
516  * 6. Others -> none
517  *
518  * In 4, why it moves inactive's head, the VM expects the page would
519  * be write it out by flusher threads as this is much more effective
520  * than the single-page writeout from reclaim.
521  */
522 static void lru_deactivate_fn(struct page *page, void *arg)
523 {
524         int lru, file;
525         bool active;
526         struct zone *zone = page_zone(page);
527
528         if (!PageLRU(page))
529                 return;
530
531         if (PageUnevictable(page))
532                 return;
533
534         /* Some processes are using the page */
535         if (page_mapped(page))
536                 return;
537
538         active = PageActive(page);
539
540         file = page_is_file_cache(page);
541         lru = page_lru_base_type(page);
542         del_page_from_lru_list(zone, page, lru + active);
543         ClearPageActive(page);
544         ClearPageReferenced(page);
545         add_page_to_lru_list(zone, page, lru);
546
547         if (PageWriteback(page) || PageDirty(page)) {
548                 /*
549                  * PG_reclaim could be raced with end_page_writeback
550                  * It can make readahead confusing.  But race window
551                  * is _really_ small and  it's non-critical problem.
552                  */
553                 SetPageReclaim(page);
554         } else {
555                 /*
556                  * The page's writeback ends up during pagevec
557                  * We moves tha page into tail of inactive.
558                  */
559                 list_move_tail(&page->lru, &zone->lru[lru].list);
560                 mem_cgroup_rotate_reclaimable_page(page);
561                 __count_vm_event(PGROTATED);
562         }
563
564         if (active)
565                 __count_vm_event(PGDEACTIVATE);
566         update_page_reclaim_stat(zone, page, file, 0);
567 }
568
569 /*
570  * Drain pages out of the cpu's pagevecs.
571  * Either "cpu" is the current CPU, and preemption has already been
572  * disabled; or "cpu" is being hot-unplugged, and is already dead.
573  */
574 static void drain_cpu_pagevecs(int cpu)
575 {
576         struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
577         struct pagevec *pvec;
578         int lru;
579
580         for_each_lru(lru) {
581                 pvec = &pvecs[lru - LRU_BASE];
582                 if (pagevec_count(pvec))
583                         ____pagevec_lru_add(pvec, lru);
584         }
585
586         pvec = &per_cpu(lru_rotate_pvecs, cpu);
587         if (pagevec_count(pvec)) {
588                 unsigned long flags;
589
590                 /* No harm done if a racing interrupt already did this */
591                 local_irq_save(flags);
592                 pagevec_move_tail(pvec);
593                 local_irq_restore(flags);
594         }
595
596         pvec = &per_cpu(lru_deactivate_pvecs, cpu);
597         if (pagevec_count(pvec))
598                 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
599
600         activate_page_drain(cpu);
601 }
602
603 /**
604  * deactivate_page - forcefully deactivate a page
605  * @page: page to deactivate
606  *
607  * This function hints the VM that @page is a good reclaim candidate,
608  * for example if its invalidation fails due to the page being dirty
609  * or under writeback.
610  */
611 void deactivate_page(struct page *page)
612 {
613         /*
614          * In a workload with many unevictable page such as mprotect, unevictable
615          * page deactivation for accelerating reclaim is pointless.
616          */
617         if (PageUnevictable(page))
618                 return;
619
620         if (likely(get_page_unless_zero(page))) {
621                 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
622
623                 if (!pagevec_add(pvec, page))
624                         pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
625                 put_cpu_var(lru_deactivate_pvecs);
626         }
627 }
628
629 void lru_add_drain(void)
630 {
631         drain_cpu_pagevecs(get_cpu());
632         put_cpu();
633 }
634
635 static void lru_add_drain_per_cpu(struct work_struct *dummy)
636 {
637         lru_add_drain();
638 }
639
640 /*
641  * Returns 0 for success
642  */
643 int lru_add_drain_all(void)
644 {
645         return schedule_on_each_cpu(lru_add_drain_per_cpu);
646 }
647
648 /*
649  * Batched page_cache_release().  Decrement the reference count on all the
650  * passed pages.  If it fell to zero then remove the page from the LRU and
651  * free it.
652  *
653  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
654  * for the remainder of the operation.
655  *
656  * The locking in this function is against shrink_inactive_list(): we recheck
657  * the page count inside the lock to see whether shrink_inactive_list()
658  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
659  * will free it.
660  */
661 void release_pages(struct page **pages, int nr, int cold)
662 {
663         int i;
664         LIST_HEAD(pages_to_free);
665         struct zone *zone = NULL;
666         unsigned long uninitialized_var(flags);
667
668         for (i = 0; i < nr; i++) {
669                 struct page *page = pages[i];
670
671                 if (unlikely(PageCompound(page))) {
672                         if (zone) {
673                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
674                                 zone = NULL;
675                         }
676                         put_compound_page(page);
677                         continue;
678                 }
679
680                 if (!put_page_testzero(page))
681                         continue;
682
683                 if (PageLRU(page)) {
684                         struct zone *pagezone = page_zone(page);
685
686                         if (pagezone != zone) {
687                                 if (zone)
688                                         spin_unlock_irqrestore(&zone->lru_lock,
689                                                                         flags);
690                                 zone = pagezone;
691                                 spin_lock_irqsave(&zone->lru_lock, flags);
692                         }
693                         VM_BUG_ON(!PageLRU(page));
694                         __ClearPageLRU(page);
695                         del_page_from_lru(zone, page);
696                 }
697
698                 list_add(&page->lru, &pages_to_free);
699         }
700         if (zone)
701                 spin_unlock_irqrestore(&zone->lru_lock, flags);
702
703         free_hot_cold_page_list(&pages_to_free, cold);
704 }
705 EXPORT_SYMBOL(release_pages);
706
707 /*
708  * The pages which we're about to release may be in the deferred lru-addition
709  * queues.  That would prevent them from really being freed right now.  That's
710  * OK from a correctness point of view but is inefficient - those pages may be
711  * cache-warm and we want to give them back to the page allocator ASAP.
712  *
713  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
714  * and __pagevec_lru_add_active() call release_pages() directly to avoid
715  * mutual recursion.
716  */
717 void __pagevec_release(struct pagevec *pvec)
718 {
719         lru_add_drain();
720         release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
721         pagevec_reinit(pvec);
722 }
723
724 EXPORT_SYMBOL(__pagevec_release);
725
726 /* used by __split_huge_page_refcount() */
727 void lru_add_page_tail(struct zone* zone,
728                        struct page *page, struct page *page_tail)
729 {
730         int active;
731         enum lru_list lru;
732         const int file = 0;
733         struct list_head *head;
734
735         VM_BUG_ON(!PageHead(page));
736         VM_BUG_ON(PageCompound(page_tail));
737         VM_BUG_ON(PageLRU(page_tail));
738         VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
739
740         SetPageLRU(page_tail);
741
742         if (page_evictable(page_tail, NULL)) {
743                 if (PageActive(page)) {
744                         SetPageActive(page_tail);
745                         active = 1;
746                         lru = LRU_ACTIVE_ANON;
747                 } else {
748                         active = 0;
749                         lru = LRU_INACTIVE_ANON;
750                 }
751                 update_page_reclaim_stat(zone, page_tail, file, active);
752                 if (likely(PageLRU(page)))
753                         head = page->lru.prev;
754                 else
755                         head = &zone->lru[lru].list;
756                 __add_page_to_lru_list(zone, page_tail, lru, head);
757         } else {
758                 SetPageUnevictable(page_tail);
759                 add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
760         }
761 }
762
763 static void ____pagevec_lru_add_fn(struct page *page, void *arg)
764 {
765         enum lru_list lru = (enum lru_list)arg;
766         struct zone *zone = page_zone(page);
767         int file = is_file_lru(lru);
768         int active = is_active_lru(lru);
769
770         VM_BUG_ON(PageActive(page));
771         VM_BUG_ON(PageUnevictable(page));
772         VM_BUG_ON(PageLRU(page));
773
774         SetPageLRU(page);
775         if (active)
776                 SetPageActive(page);
777         update_page_reclaim_stat(zone, page, file, active);
778         add_page_to_lru_list(zone, page, lru);
779 }
780
781 /*
782  * Add the passed pages to the LRU, then drop the caller's refcount
783  * on them.  Reinitialises the caller's pagevec.
784  */
785 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
786 {
787         VM_BUG_ON(is_unevictable_lru(lru));
788
789         pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
790 }
791
792 EXPORT_SYMBOL(____pagevec_lru_add);
793
794 /*
795  * Try to drop buffers from the pages in a pagevec
796  */
797 void pagevec_strip(struct pagevec *pvec)
798 {
799         int i;
800
801         for (i = 0; i < pagevec_count(pvec); i++) {
802                 struct page *page = pvec->pages[i];
803
804                 if (page_has_private(page) && trylock_page(page)) {
805                         if (page_has_private(page))
806                                 try_to_release_page(page, 0);
807                         unlock_page(page);
808                 }
809         }
810 }
811
812 /**
813  * pagevec_lookup - gang pagecache lookup
814  * @pvec:       Where the resulting pages are placed
815  * @mapping:    The address_space to search
816  * @start:      The starting page index
817  * @nr_pages:   The maximum number of pages
818  *
819  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
820  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
821  * reference against the pages in @pvec.
822  *
823  * The search returns a group of mapping-contiguous pages with ascending
824  * indexes.  There may be holes in the indices due to not-present pages.
825  *
826  * pagevec_lookup() returns the number of pages which were found.
827  */
828 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
829                 pgoff_t start, unsigned nr_pages)
830 {
831         pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
832         return pagevec_count(pvec);
833 }
834
835 EXPORT_SYMBOL(pagevec_lookup);
836
837 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
838                 pgoff_t *index, int tag, unsigned nr_pages)
839 {
840         pvec->nr = find_get_pages_tag(mapping, index, tag,
841                                         nr_pages, pvec->pages);
842         return pagevec_count(pvec);
843 }
844
845 EXPORT_SYMBOL(pagevec_lookup_tag);
846
847 /*
848  * Perform any setup for the swap system
849  */
850 void __init swap_setup(void)
851 {
852         unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
853
854 #ifdef CONFIG_SWAP
855         bdi_init(swapper_space.backing_dev_info);
856 #endif
857
858         /* Use a smaller cluster for small-memory machines */
859         if (megs < 16)
860                 page_cluster = 2;
861         else
862                 page_cluster = 3;
863         /*
864          * Right now other parts of the system means that we
865          * _really_ don't want to cluster much more
866          */
867 }