1 /**************************************************************************
2 * Copyright (c) 2007, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 **************************************************************************/
23 * Code for the SGX MMU:
27 * clflush on one processor only:
28 * clflush should apparently flush the cache line on all processors in an
34 * The usage of the slots must be completely encapsulated within a spinlock, and
35 * no other functions that may be using the locks for other purposed may be
36 * called from within the locked region.
37 * Since the slots are per processor, this will guarantee that we are the only
42 * TODO: Inserting ptes from an interrupt handler:
43 * This may be desirable for some SGX functionality where the GPU can fault in
44 * needed pages. For that, we need to make an atomic insert_pages function, that
46 * If it fails, the caller need to insert the page using a workqueue function,
47 * but on average it should be fast.
50 struct psb_mmu_driver {
51 /* protects driver- and pd structures. Always take in read mode
52 * before taking the page table spinlock.
54 struct rw_semaphore sem;
56 /* protects page tables, directory tables and pt tables.
61 atomic_t needs_tlbflush;
63 uint8_t __iomem *register_map;
64 struct psb_mmu_pd *default_pd;
65 /*uint32_t bif_ctrl;*/
68 unsigned long clflush_mask;
70 struct drm_psb_private *dev_priv;
76 struct psb_mmu_pd *pd;
84 struct psb_mmu_driver *driver;
86 struct psb_mmu_pt **tables;
88 struct page *dummy_pt;
89 struct page *dummy_page;
95 static inline uint32_t psb_mmu_pt_index(uint32_t offset)
97 return (offset >> PSB_PTE_SHIFT) & 0x3FF;
100 static inline uint32_t psb_mmu_pd_index(uint32_t offset)
102 return offset >> PSB_PDE_SHIFT;
105 static inline void psb_clflush(void *addr)
107 __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory");
110 static inline void psb_mmu_clflush(struct psb_mmu_driver *driver,
113 if (!driver->has_clflush)
121 static void psb_page_clflush(struct psb_mmu_driver *driver, struct page* page)
123 uint32_t clflush_add = driver->clflush_add >> PAGE_SHIFT;
124 uint32_t clflush_count = PAGE_SIZE / clflush_add;
128 clf = kmap_atomic(page, KM_USER0);
130 for (i = 0; i < clflush_count; ++i) {
135 kunmap_atomic(clf, KM_USER0);
138 static void psb_pages_clflush(struct psb_mmu_driver *driver,
139 struct page *page[], unsigned long num_pages)
143 if (!driver->has_clflush)
146 for (i = 0; i < num_pages; i++)
147 psb_page_clflush(driver, *page++);
150 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver,
153 atomic_set(&driver->needs_tlbflush, 0);
156 static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force)
158 down_write(&driver->sem);
159 psb_mmu_flush_pd_locked(driver, force);
160 up_write(&driver->sem);
163 void psb_mmu_flush(struct psb_mmu_driver *driver, int rc_prot)
166 down_write(&driver->sem);
168 up_write(&driver->sem);
171 void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context)
173 /*ttm_tt_cache_flush(&pd->p, 1);*/
174 psb_pages_clflush(pd->driver, &pd->p, 1);
175 down_write(&pd->driver->sem);
177 psb_mmu_flush_pd_locked(pd->driver, 1);
178 pd->hw_context = hw_context;
179 up_write(&pd->driver->sem);
183 static inline unsigned long psb_pd_addr_end(unsigned long addr,
187 addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK;
188 return (addr < end) ? addr : end;
191 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type)
193 uint32_t mask = PSB_PTE_VALID;
195 if (type & PSB_MMU_CACHED_MEMORY)
196 mask |= PSB_PTE_CACHED;
197 if (type & PSB_MMU_RO_MEMORY)
199 if (type & PSB_MMU_WO_MEMORY)
202 return (pfn << PAGE_SHIFT) | mask;
205 struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver,
206 int trap_pagefaults, int invalid_type)
208 struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL);
215 pd->p = alloc_page(GFP_DMA32);
218 pd->dummy_pt = alloc_page(GFP_DMA32);
221 pd->dummy_page = alloc_page(GFP_DMA32);
225 if (!trap_pagefaults) {
227 psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt),
230 psb_mmu_mask_pte(page_to_pfn(pd->dummy_page),
237 v = kmap(pd->dummy_pt);
238 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
239 v[i] = pd->invalid_pte;
241 kunmap(pd->dummy_pt);
244 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
245 v[i] = pd->invalid_pde;
249 clear_page(kmap(pd->dummy_page));
250 kunmap(pd->dummy_page);
252 pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
257 pd->pd_mask = PSB_PTE_VALID;
263 __free_page(pd->dummy_page);
265 __free_page(pd->dummy_pt);
273 void psb_mmu_free_pt(struct psb_mmu_pt *pt)
279 void psb_mmu_free_pagedir(struct psb_mmu_pd *pd)
281 struct psb_mmu_driver *driver = pd->driver;
282 struct psb_mmu_pt *pt;
285 down_write(&driver->sem);
286 if (pd->hw_context != -1)
287 psb_mmu_flush_pd_locked(driver, 1);
289 /* Should take the spinlock here, but we don't need to do that
290 since we have the semaphore in write mode. */
292 for (i = 0; i < 1024; ++i) {
299 __free_page(pd->dummy_page);
300 __free_page(pd->dummy_pt);
303 up_write(&driver->sem);
306 static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd)
308 struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL);
310 uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT;
311 uint32_t clflush_count = PAGE_SIZE / clflush_add;
312 spinlock_t *lock = &pd->driver->lock;
320 pt->p = alloc_page(GFP_DMA32);
328 v = kmap_atomic(pt->p, KM_USER0);
330 ptes = (uint32_t *) v;
331 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
332 *ptes++ = pd->invalid_pte;
335 if (pd->driver->has_clflush && pd->hw_context != -1) {
337 for (i = 0; i < clflush_count; ++i) {
344 kunmap_atomic(v, KM_USER0);
354 struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd,
357 uint32_t index = psb_mmu_pd_index(addr);
358 struct psb_mmu_pt *pt;
360 spinlock_t *lock = &pd->driver->lock;
363 pt = pd->tables[index];
366 pt = psb_mmu_alloc_pt(pd);
371 if (pd->tables[index]) {
375 pt = pd->tables[index];
379 v = kmap_atomic(pd->p, KM_USER0);
380 pd->tables[index] = pt;
381 v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask;
383 kunmap_atomic((void *) v, KM_USER0);
385 if (pd->hw_context != -1) {
386 psb_mmu_clflush(pd->driver, (void *) &v[index]);
387 atomic_set(&pd->driver->needs_tlbflush, 1);
390 pt->v = kmap_atomic(pt->p, KM_USER0);
394 static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd,
397 uint32_t index = psb_mmu_pd_index(addr);
398 struct psb_mmu_pt *pt;
399 spinlock_t *lock = &pd->driver->lock;
402 pt = pd->tables[index];
407 pt->v = kmap_atomic(pt->p, KM_USER0);
411 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt)
413 struct psb_mmu_pd *pd = pt->pd;
416 kunmap_atomic(pt->v, KM_USER0);
417 if (pt->count == 0) {
418 v = kmap_atomic(pd->p, KM_USER0);
419 v[pt->index] = pd->invalid_pde;
420 pd->tables[pt->index] = NULL;
422 if (pd->hw_context != -1) {
423 psb_mmu_clflush(pd->driver,
424 (void *) &v[pt->index]);
425 atomic_set(&pd->driver->needs_tlbflush, 1);
427 kunmap_atomic(pt->v, KM_USER0);
428 spin_unlock(&pd->driver->lock);
432 spin_unlock(&pd->driver->lock);
435 static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt,
436 unsigned long addr, uint32_t pte)
438 pt->v[psb_mmu_pt_index(addr)] = pte;
441 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt,
444 pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte;
448 void psb_mmu_mirror_gtt(struct psb_mmu_pd *pd,
449 uint32_t mmu_offset, uint32_t gtt_start,
453 uint32_t start = psb_mmu_pd_index(mmu_offset);
454 struct psb_mmu_driver *driver = pd->driver;
455 int num_pages = gtt_pages;
457 down_read(&driver->sem);
458 spin_lock(&driver->lock);
460 v = kmap_atomic(pd->p, KM_USER0);
463 while (gtt_pages--) {
464 *v++ = gtt_start | pd->pd_mask;
465 gtt_start += PAGE_SIZE;
468 /*ttm_tt_cache_flush(&pd->p, num_pages);*/
469 psb_pages_clflush(pd->driver, &pd->p, num_pages);
470 kunmap_atomic(v, KM_USER0);
471 spin_unlock(&driver->lock);
473 if (pd->hw_context != -1)
474 atomic_set(&pd->driver->needs_tlbflush, 1);
476 up_read(&pd->driver->sem);
477 psb_mmu_flush_pd(pd->driver, 0);
480 struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver)
482 struct psb_mmu_pd *pd;
484 /* down_read(&driver->sem); */
485 pd = driver->default_pd;
486 /* up_read(&driver->sem); */
491 /* Returns the physical address of the PD shared by sgx/msvdx */
492 uint32_t psb_get_default_pd_addr(struct psb_mmu_driver *driver)
494 struct psb_mmu_pd *pd;
496 pd = psb_mmu_get_default_pd(driver);
497 return page_to_pfn(pd->p) << PAGE_SHIFT;
500 void psb_mmu_driver_takedown(struct psb_mmu_driver *driver)
502 psb_mmu_free_pagedir(driver->default_pd);
506 struct psb_mmu_driver *psb_mmu_driver_init(uint8_t __iomem * registers,
509 struct drm_psb_private *dev_priv)
511 struct psb_mmu_driver *driver;
513 driver = kmalloc(sizeof(*driver), GFP_KERNEL);
517 driver->dev_priv = dev_priv;
519 driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults,
521 if (!driver->default_pd)
524 spin_lock_init(&driver->lock);
525 init_rwsem(&driver->sem);
526 down_write(&driver->sem);
527 driver->register_map = registers;
528 atomic_set(&driver->needs_tlbflush, 1);
530 driver->has_clflush = 0;
532 if (boot_cpu_has(X86_FEATURE_CLFLSH)) {
533 uint32_t tfms, misc, cap0, cap4, clflush_size;
536 * clflush size is determined at kernel setup for x86_64
537 * but not for i386. We have to do it here.
540 cpuid(0x00000001, &tfms, &misc, &cap0, &cap4);
541 clflush_size = ((misc >> 8) & 0xff) * 8;
542 driver->has_clflush = 1;
543 driver->clflush_add =
544 PAGE_SIZE * clflush_size / sizeof(uint32_t);
545 driver->clflush_mask = driver->clflush_add - 1;
546 driver->clflush_mask = ~driver->clflush_mask;
549 up_write(&driver->sem);
557 static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd,
558 unsigned long address, uint32_t num_pages,
559 uint32_t desired_tile_stride,
560 uint32_t hw_tile_stride)
562 struct psb_mmu_pt *pt;
569 unsigned long row_add;
570 unsigned long clflush_add = pd->driver->clflush_add;
571 unsigned long clflush_mask = pd->driver->clflush_mask;
573 if (!pd->driver->has_clflush) {
574 /*ttm_tt_cache_flush(&pd->p, num_pages);*/
575 psb_pages_clflush(pd->driver, &pd->p, num_pages);
580 rows = num_pages / desired_tile_stride;
582 desired_tile_stride = num_pages;
584 add = desired_tile_stride << PAGE_SHIFT;
585 row_add = hw_tile_stride << PAGE_SHIFT;
587 for (i = 0; i < rows; ++i) {
593 next = psb_pd_addr_end(addr, end);
594 pt = psb_mmu_pt_map_lock(pd, addr);
599 [psb_mmu_pt_index(addr)]);
602 (addr & clflush_mask) < next);
604 psb_mmu_pt_unmap_unlock(pt);
605 } while (addr = next, next != end);
611 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd,
612 unsigned long address, uint32_t num_pages)
614 struct psb_mmu_pt *pt;
618 unsigned long f_address = address;
620 down_read(&pd->driver->sem);
623 end = addr + (num_pages << PAGE_SHIFT);
626 next = psb_pd_addr_end(addr, end);
627 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
631 psb_mmu_invalidate_pte(pt, addr);
633 } while (addr += PAGE_SIZE, addr < next);
634 psb_mmu_pt_unmap_unlock(pt);
636 } while (addr = next, next != end);
639 if (pd->hw_context != -1)
640 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
642 up_read(&pd->driver->sem);
644 if (pd->hw_context != -1)
645 psb_mmu_flush(pd->driver, 0);
650 void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address,
651 uint32_t num_pages, uint32_t desired_tile_stride,
652 uint32_t hw_tile_stride)
654 struct psb_mmu_pt *pt;
661 unsigned long row_add;
662 unsigned long f_address = address;
665 rows = num_pages / desired_tile_stride;
667 desired_tile_stride = num_pages;
669 add = desired_tile_stride << PAGE_SHIFT;
670 row_add = hw_tile_stride << PAGE_SHIFT;
672 /* down_read(&pd->driver->sem); */
674 /* Make sure we only need to flush this processor's cache */
676 for (i = 0; i < rows; ++i) {
682 next = psb_pd_addr_end(addr, end);
683 pt = psb_mmu_pt_map_lock(pd, addr);
687 psb_mmu_invalidate_pte(pt, addr);
690 } while (addr += PAGE_SIZE, addr < next);
691 psb_mmu_pt_unmap_unlock(pt);
693 } while (addr = next, next != end);
696 if (pd->hw_context != -1)
697 psb_mmu_flush_ptes(pd, f_address, num_pages,
698 desired_tile_stride, hw_tile_stride);
700 /* up_read(&pd->driver->sem); */
702 if (pd->hw_context != -1)
703 psb_mmu_flush(pd->driver, 0);
706 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn,
707 unsigned long address, uint32_t num_pages,
710 struct psb_mmu_pt *pt;
715 unsigned long f_address = address;
718 down_read(&pd->driver->sem);
721 end = addr + (num_pages << PAGE_SHIFT);
724 next = psb_pd_addr_end(addr, end);
725 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
731 pte = psb_mmu_mask_pte(start_pfn++, type);
732 psb_mmu_set_pte(pt, addr, pte);
734 } while (addr += PAGE_SIZE, addr < next);
735 psb_mmu_pt_unmap_unlock(pt);
737 } while (addr = next, next != end);
740 if (pd->hw_context != -1)
741 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
743 up_read(&pd->driver->sem);
745 if (pd->hw_context != -1)
746 psb_mmu_flush(pd->driver, 1);
751 int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages,
752 unsigned long address, uint32_t num_pages,
753 uint32_t desired_tile_stride,
754 uint32_t hw_tile_stride, int type)
756 struct psb_mmu_pt *pt;
764 unsigned long row_add;
765 unsigned long f_address = address;
768 if (hw_tile_stride) {
769 if (num_pages % desired_tile_stride != 0)
771 rows = num_pages / desired_tile_stride;
773 desired_tile_stride = num_pages;
776 add = desired_tile_stride << PAGE_SHIFT;
777 row_add = hw_tile_stride << PAGE_SHIFT;
779 down_read(&pd->driver->sem);
781 for (i = 0; i < rows; ++i) {
787 next = psb_pd_addr_end(addr, end);
788 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
795 psb_mmu_mask_pte(page_to_pfn(*pages++),
797 psb_mmu_set_pte(pt, addr, pte);
799 } while (addr += PAGE_SIZE, addr < next);
800 psb_mmu_pt_unmap_unlock(pt);
802 } while (addr = next, next != end);
807 if (pd->hw_context != -1)
808 psb_mmu_flush_ptes(pd, f_address, num_pages,
809 desired_tile_stride, hw_tile_stride);
811 up_read(&pd->driver->sem);
813 if (pd->hw_context != -1)
814 psb_mmu_flush(pd->driver, 1);
819 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual,
823 struct psb_mmu_pt *pt;
825 spinlock_t *lock = &pd->driver->lock;
827 down_read(&pd->driver->sem);
828 pt = psb_mmu_pt_map_lock(pd, virtual);
833 v = kmap_atomic(pd->p, KM_USER0);
834 tmp = v[psb_mmu_pd_index(virtual)];
835 kunmap_atomic(v, KM_USER0);
838 if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) ||
839 !(pd->invalid_pte & PSB_PTE_VALID)) {
844 *pfn = pd->invalid_pte >> PAGE_SHIFT;
847 tmp = pt->v[psb_mmu_pt_index(virtual)];
848 if (!(tmp & PSB_PTE_VALID)) {
852 *pfn = tmp >> PAGE_SHIFT;
854 psb_mmu_pt_unmap_unlock(pt);
856 up_read(&pd->driver->sem);