include/asm-generic/pgtable.h

   1 #ifndef _ASM_GENERIC_PGTABLE_H
   2 #define _ASM_GENERIC_PGTABLE_H
   3
   4 #ifndef __ASSEMBLY__
   5 #ifdef CONFIG_MMU
   6
   7 #include <linux/mm_types.h>
   8
   9 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  10 extern int ptep_set_access_flags(struct vm_area_struct *vma,
  11                                  unsigned long address, pte_t *ptep,
  12                                  pte_t entry, int dirty);
  13 #endif
  14
  15 #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  16 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  17                                  unsigned long address, pmd_t *pmdp,
  18                                  pmd_t entry, int dirty);
  19 #endif
  20
  21 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  22 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  23                                             unsigned long address,
  24                                             pte_t *ptep)
  25 {
  26         pte_t pte = *ptep;
  27         int r = 1;
  28         if (!pte_young(pte))
  29                 r = 0;
  30         else
  31                 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  32         return r;
  33 }
  34 #endif
  35
  36 #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  37 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  38 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  39                                             unsigned long address,
  40                                             pmd_t *pmdp)
  41 {
  42         pmd_t pmd = *pmdp;
  43         int r = 1;
  44         if (!pmd_young(pmd))
  45                 r = 0;
  46         else
  47                 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  48         return r;
  49 }
  50 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
  51 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  52                                             unsigned long address,
  53                                             pmd_t *pmdp)
  54 {
  55         BUG();
  56         return 0;
  57 }
  58 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  59 #endif
  60
  61 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  62 int ptep_clear_flush_young(struct vm_area_struct *vma,
  63                            unsigned long address, pte_t *ptep);
  64 #endif
  65
  66 #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  67 int pmdp_clear_flush_young(struct vm_area_struct *vma,
  68                            unsigned long address, pmd_t *pmdp);
  69 #endif
  70
  71 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
  72 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  73                                        unsigned long address,
  74                                        pte_t *ptep)
  75 {
  76         pte_t pte = *ptep;
  77         pte_clear(mm, address, ptep);
  78         return pte;
  79 }
  80 #endif
  81
  82 #ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR
  83 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  84 static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
  85                                        unsigned long address,
  86                                        pmd_t *pmdp)
  87 {
  88         pmd_t pmd = *pmdp;
  89         pmd_clear(pmdp);
  90         return pmd;
  91 }
  92 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  93 #endif
  94
  95 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  96 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
  97                                             unsigned long address, pte_t *ptep,
  98                                             int full)
  99 {
 100         pte_t pte;
 101         pte = ptep_get_and_clear(mm, address, ptep);
 102         return pte;
 103 }
 104 #endif
 105
 106 /*
 107  * Some architectures may be able to avoid expensive synchronization
 108  * primitives when modifications are made to PTE's which are already
 109  * not present, or in the process of an address space destruction.
 110  */
 111 #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
 112 static inline void pte_clear_not_present_full(struct mm_struct *mm,
 113                                               unsigned long address,
 114                                               pte_t *ptep,
 115                                               int full)
 116 {
 117         pte_clear(mm, address, ptep);
 118 }
 119 #endif
 120
 121 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 122 extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 123                               unsigned long address,
 124                               pte_t *ptep);
 125 #endif
 126
 127 #ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH
 128 extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
 129                               unsigned long address,
 130                               pmd_t *pmdp);
 131 #endif
 132
 133 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 134 struct mm_struct;
 135 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 136 {
 137         pte_t old_pte = *ptep;
 138         set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 139 }
 140 #endif
 141
 142 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 143 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 144 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 145                                       unsigned long address, pmd_t *pmdp)
 146 {
 147         pmd_t old_pmd = *pmdp;
 148         set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
 149 }
 150 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 151 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 152                                       unsigned long address, pmd_t *pmdp)
 153 {
 154         BUG();
 155 }
 156 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 157 #endif
 158
 159 #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
 160 extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 161                                  unsigned long address, pmd_t *pmdp);
 162 #endif
 163
 164 #ifndef __HAVE_ARCH_PTE_SAME
 165 static inline int pte_same(pte_t pte_a, pte_t pte_b)
 166 {
 167         return pte_val(pte_a) == pte_val(pte_b);
 168 }
 169 #endif
 170
 171 #ifndef __HAVE_ARCH_PMD_SAME
 172 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 173 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 174 {
 175         return pmd_val(pmd_a) == pmd_val(pmd_b);
 176 }
 177 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 178 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 179 {
 180         BUG();
 181         return 0;
 182 }
 183 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 184 #endif
 185
 186 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 187 #define page_test_and_clear_dirty(pfn, mapped)  (0)
 188 #endif
 189
 190 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 191 #define pte_maybe_dirty(pte)            pte_dirty(pte)
 192 #else
 193 #define pte_maybe_dirty(pte)            (1)
 194 #endif
 195
 196 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 197 #define page_test_and_clear_young(pfn) (0)
 198 #endif
 199
 200 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 201 #define pgd_offset_gate(mm, addr)       pgd_offset(mm, addr)
 202 #endif
 203
 204 #ifndef __HAVE_ARCH_MOVE_PTE
 205 #define move_pte(pte, prot, old_addr, new_addr) (pte)
 206 #endif
 207
 208 #ifndef flush_tlb_fix_spurious_fault
 209 #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 210 #endif
 211
 212 #ifndef pgprot_noncached
 213 #define pgprot_noncached(prot)  (prot)
 214 #endif
 215
 216 #ifndef pgprot_writecombine
 217 #define pgprot_writecombine pgprot_noncached
 218 #endif
 219
 220 /*
 221  * When walking page tables, get the address of the next boundary,
 222  * or the end address of the range if that comes earlier.  Although no
 223  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 224  */
 225
 226 #define pgd_addr_end(addr, end)                                         \
 227 ({      unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
 228         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 229 })
 230
 231 #ifndef pud_addr_end
 232 #define pud_addr_end(addr, end)                                         \
 233 ({      unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
 234         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 235 })
 236 #endif
 237
 238 #ifndef pmd_addr_end
 239 #define pmd_addr_end(addr, end)                                         \
 240 ({      unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
 241         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 242 })
 243 #endif
 244
 245 /*
 246  * When walking page tables, we usually want to skip any p?d_none entries;
 247  * and any p?d_bad entries - reporting the error before resetting to none.
 248  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 249  */
 250 void pgd_clear_bad(pgd_t *);
 251 void pud_clear_bad(pud_t *);
 252 void pmd_clear_bad(pmd_t *);
 253
 254 static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 255 {
 256         if (pgd_none(*pgd))
 257                 return 1;
 258         if (unlikely(pgd_bad(*pgd))) {
 259                 pgd_clear_bad(pgd);
 260                 return 1;
 261         }
 262         return 0;
 263 }
 264
 265 static inline int pud_none_or_clear_bad(pud_t *pud)
 266 {
 267         if (pud_none(*pud))
 268                 return 1;
 269         if (unlikely(pud_bad(*pud))) {
 270                 pud_clear_bad(pud);
 271                 return 1;
 272         }
 273         return 0;
 274 }
 275
 276 static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 277 {
 278         if (pmd_none(*pmd))
 279                 return 1;
 280         if (unlikely(pmd_bad(*pmd))) {
 281                 pmd_clear_bad(pmd);
 282                 return 1;
 283         }
 284         return 0;
 285 }
 286
 287 static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 288                                              unsigned long addr,
 289                                              pte_t *ptep)
 290 {
 291         /*
 292          * Get the current pte state, but zero it out to make it
 293          * non-present, preventing the hardware from asynchronously
 294          * updating it.
 295          */
 296         return ptep_get_and_clear(mm, addr, ptep);
 297 }
 298
 299 static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 300                                              unsigned long addr,
 301                                              pte_t *ptep, pte_t pte)
 302 {
 303         /*
 304          * The pte is non-present, so there's no hardware state to
 305          * preserve.
 306          */
 307         set_pte_at(mm, addr, ptep, pte);
 308 }
 309
 310 #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 311 /*
 312  * Start a pte protection read-modify-write transaction, which
 313  * protects against asynchronous hardware modifications to the pte.
 314  * The intention is not to prevent the hardware from making pte
 315  * updates, but to prevent any updates it may make from being lost.
 316  *
 317  * This does not protect against other software modifications of the
 318  * pte; the appropriate pte lock must be held over the transation.
 319  *
 320  * Note that this interface is intended to be batchable, meaning that
 321  * ptep_modify_prot_commit may not actually update the pte, but merely
 322  * queue the update to be done at some later time.  The update must be
 323  * actually committed before the pte lock is released, however.
 324  */
 325 static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 326                                            unsigned long addr,
 327                                            pte_t *ptep)
 328 {
 329         return __ptep_modify_prot_start(mm, addr, ptep);
 330 }
 331
 332 /*
 333  * Commit an update to a pte, leaving any hardware-controlled bits in
 334  * the PTE unmodified.
 335  */
 336 static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 337                                            unsigned long addr,
 338                                            pte_t *ptep, pte_t pte)
 339 {
 340         __ptep_modify_prot_commit(mm, addr, ptep, pte);
 341 }
 342 #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 343 #endif /* CONFIG_MMU */
 344
 345 /*
 346  * A facility to provide lazy MMU batching.  This allows PTE updates and
 347  * page invalidations to be delayed until a call to leave lazy MMU mode
 348  * is issued.  Some architectures may benefit from doing this, and it is
 349  * beneficial for both shadow and direct mode hypervisors, which may batch
 350  * the PTE updates which happen during this window.  Note that using this
 351  * interface requires that read hazards be removed from the code.  A read
 352  * hazard could result in the direct mode hypervisor case, since the actual
 353  * write to the page tables may not yet have taken place, so reads though
 354  * a raw PTE pointer after it has been modified are not guaranteed to be
 355  * up to date.  This mode can only be entered and left under the protection of
 356  * the page table locks for all page tables which may be modified.  In the UP
 357  * case, this is required so that preemption is disabled, and in the SMP case,
 358  * it must synchronize the delayed page table writes properly on other CPUs.
 359  */
 360 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 361 #define arch_enter_lazy_mmu_mode()      do {} while (0)
 362 #define arch_leave_lazy_mmu_mode()      do {} while (0)
 363 #define arch_flush_lazy_mmu_mode()      do {} while (0)
 364 #endif
 365
 366 /*
 367  * A facility to provide batching of the reload of page tables and
 368  * other process state with the actual context switch code for
 369  * paravirtualized guests.  By convention, only one of the batched
 370  * update (lazy) modes (CPU, MMU) should be active at any given time,
 371  * entry should never be nested, and entry and exits should always be
 372  * paired.  This is for sanity of maintaining and reasoning about the
 373  * kernel code.  In this case, the exit (end of the context switch) is
 374  * in architecture-specific code, and so doesn't need a generic
 375  * definition.
 376  */
 377 #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
 378 #define arch_start_context_switch(prev) do {} while (0)
 379 #endif
 380
 381 #ifndef __HAVE_PFNMAP_TRACKING
 382 /*
 383  * Interface that can be used by architecture code to keep track of
 384  * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 385  *
 386  * track_pfn_vma_new is called when a _new_ pfn mapping is being established
 387  * for physical range indicated by pfn and size.
 388  */
 389 static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 390                                         unsigned long pfn, unsigned long size)
 391 {
 392         return 0;
 393 }
 394
 395 /*
 396  * Interface that can be used by architecture code to keep track of
 397  * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 398  *
 399  * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
 400  * copied through copy_page_range().
 401  */
 402 static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
 403 {
 404         return 0;
 405 }
 406
 407 /*
 408  * Interface that can be used by architecture code to keep track of
 409  * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
 410  *
 411  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
 412  * untrack can be called for a specific region indicated by pfn and size or
 413  * can be for the entire vma (in which case size can be zero).
 414  */
 415 static inline void untrack_pfn_vma(struct vm_area_struct *vma,
 416                                         unsigned long pfn, unsigned long size)
 417 {
 418 }
 419 #else
 420 extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 421                                 unsigned long pfn, unsigned long size);
 422 extern int track_pfn_vma_copy(struct vm_area_struct *vma);
 423 extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 424                                 unsigned long size);
 425 #endif
 426
 427 #ifdef CONFIG_MMU
 428
 429 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 430 static inline int pmd_trans_huge(pmd_t pmd)
 431 {
 432         return 0;
 433 }
 434 static inline int pmd_trans_splitting(pmd_t pmd)
 435 {
 436         return 0;
 437 }
 438 #ifndef __HAVE_ARCH_PMD_WRITE
 439 static inline int pmd_write(pmd_t pmd)
 440 {
 441         BUG();
 442         return 0;
 443 }
 444 #endif /* __HAVE_ARCH_PMD_WRITE */
 445 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 446
 447 #ifndef pmd_read_atomic
 448 static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 449 {
 450         /*
 451          * Depend on compiler for an atomic pmd read. NOTE: this is
 452          * only going to work, if the pmdval_t isn't larger than
 453          * an unsigned long.
 454          */
 455         return *pmdp;
 456 }
 457 #endif
 458
 459 /*
 460  * This function is meant to be used by sites walking pagetables with
 461  * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 462  * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 463  * into a null pmd and the transhuge page fault can convert a null pmd
 464  * into an hugepmd or into a regular pmd (if the hugepage allocation
 465  * fails). While holding the mmap_sem in read mode the pmd becomes
 466  * stable and stops changing under us only if it's not null and not a
 467  * transhuge pmd. When those races occurs and this function makes a
 468  * difference vs the standard pmd_none_or_clear_bad, the result is
 469  * undefined so behaving like if the pmd was none is safe (because it
 470  * can return none anyway). The compiler level barrier() is critically
 471  * important to compute the two checks atomically on the same pmdval.
 472  *
 473  * For 32bit kernels with a 64bit large pmd_t this automatically takes
 474  * care of reading the pmd atomically to avoid SMP race conditions
 475  * against pmd_populate() when the mmap_sem is hold for reading by the
 476  * caller (a special atomic read not done by "gcc" as in the generic
 477  * version above, is also needed when THP is disabled because the page
 478  * fault can populate the pmd from under us).
 479  */
 480 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 481 {
 482         pmd_t pmdval = pmd_read_atomic(pmd);
 483         /*
 484          * The barrier will stabilize the pmdval in a register or on
 485          * the stack so that it will stop changing under the code.
 486          *
 487          * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
 488          * pmd_read_atomic is allowed to return a not atomic pmdval
 489          * (for example pointing to an hugepage that has never been
 490          * mapped in the pmd). The below checks will only care about
 491          * the low part of the pmd with 32bit PAE x86 anyway, with the
 492          * exception of pmd_none(). So the important thing is that if
 493          * the low part of the pmd is found null, the high part will
 494          * be also null or the pmd_none() check below would be
 495          * confused.
 496          */
 497 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 498         barrier();
 499 #endif
 500         if (pmd_none(pmdval))
 501                 return 1;
 502         if (unlikely(pmd_bad(pmdval))) {
 503                 if (!pmd_trans_huge(pmdval))
 504                         pmd_clear_bad(pmd);
 505                 return 1;
 506         }
 507         return 0;
 508 }
 509
 510 /*
 511  * This is a noop if Transparent Hugepage Support is not built into
 512  * the kernel. Otherwise it is equivalent to
 513  * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 514  * places that already verified the pmd is not none and they want to
 515  * walk ptes while holding the mmap sem in read mode (write mode don't
 516  * need this). If THP is not enabled, the pmd can't go away under the
 517  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 518  * run a pmd_trans_unstable before walking the ptes after
 519  * split_huge_page_pmd returns (because it may have run when the pmd
 520  * become null, but then a page fault can map in a THP and not a
 521  * regular page).
 522  */
 523 static inline int pmd_trans_unstable(pmd_t *pmd)
 524 {
 525 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 526         return pmd_none_or_trans_huge_or_clear_bad(pmd);
 527 #else
 528         return 0;
 529 #endif
 530 }
 531
 532 #endif /* CONFIG_MMU */
 533
 534 #endif /* !__ASSEMBLY__ */
 535
 536 #endif /* _ASM_GENERIC_PGTABLE_H */