MIPS: O32 compat/N32: Fix to use compat syscall wrappers for AIO syscalls.
[pandora-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37 #include <linux/intel-gtt.h>
38
39 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
40 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
41 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
42 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
43 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
44                                              int write);
45 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
46                                                      uint64_t offset,
47                                                      uint64_t size);
48 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
49 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
50 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
51                                            unsigned alignment);
52 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
53 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
54                                 struct drm_i915_gem_pwrite *args,
55                                 struct drm_file *file_priv);
56 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
57
58 static LIST_HEAD(shrink_list);
59 static DEFINE_SPINLOCK(shrink_list_lock);
60
61 static inline bool
62 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
63 {
64         return obj_priv->gtt_space &&
65                 !obj_priv->active &&
66                 obj_priv->pin_count == 0;
67 }
68
69 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
70                      unsigned long end)
71 {
72         drm_i915_private_t *dev_priv = dev->dev_private;
73
74         if (start >= end ||
75             (start & (PAGE_SIZE - 1)) != 0 ||
76             (end & (PAGE_SIZE - 1)) != 0) {
77                 return -EINVAL;
78         }
79
80         drm_mm_init(&dev_priv->mm.gtt_space, start,
81                     end - start);
82
83         dev->gtt_total = (uint32_t) (end - start);
84
85         return 0;
86 }
87
88 int
89 i915_gem_init_ioctl(struct drm_device *dev, void *data,
90                     struct drm_file *file_priv)
91 {
92         struct drm_i915_gem_init *args = data;
93         int ret;
94
95         mutex_lock(&dev->struct_mutex);
96         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
97         mutex_unlock(&dev->struct_mutex);
98
99         return ret;
100 }
101
102 int
103 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
104                             struct drm_file *file_priv)
105 {
106         struct drm_i915_gem_get_aperture *args = data;
107
108         if (!(dev->driver->driver_features & DRIVER_GEM))
109                 return -ENODEV;
110
111         args->aper_size = dev->gtt_total;
112         args->aper_available_size = (args->aper_size -
113                                      atomic_read(&dev->pin_memory));
114
115         return 0;
116 }
117
118
119 /**
120  * Creates a new mm object and returns a handle to it.
121  */
122 int
123 i915_gem_create_ioctl(struct drm_device *dev, void *data,
124                       struct drm_file *file_priv)
125 {
126         struct drm_i915_gem_create *args = data;
127         struct drm_gem_object *obj;
128         int ret;
129         u32 handle;
130
131         args->size = roundup(args->size, PAGE_SIZE);
132
133         /* Allocate the new object */
134         obj = i915_gem_alloc_object(dev, args->size);
135         if (obj == NULL)
136                 return -ENOMEM;
137
138         ret = drm_gem_handle_create(file_priv, obj, &handle);
139         /* drop reference from allocate - handle holds it now */
140         drm_gem_object_unreference_unlocked(obj);
141         if (ret) {
142                 return ret;
143         }
144
145         args->handle = handle;
146         return 0;
147 }
148
149 static inline int
150 fast_shmem_read(struct page **pages,
151                 loff_t page_base, int page_offset,
152                 char __user *data,
153                 int length)
154 {
155         char __iomem *vaddr;
156         int unwritten;
157
158         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
159         if (vaddr == NULL)
160                 return -ENOMEM;
161         unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
162         kunmap_atomic(vaddr, KM_USER0);
163
164         if (unwritten)
165                 return -EFAULT;
166
167         return 0;
168 }
169
170 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
171 {
172         drm_i915_private_t *dev_priv = obj->dev->dev_private;
173         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
174
175         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
176                 obj_priv->tiling_mode != I915_TILING_NONE;
177 }
178
179 static inline void
180 slow_shmem_copy(struct page *dst_page,
181                 int dst_offset,
182                 struct page *src_page,
183                 int src_offset,
184                 int length)
185 {
186         char *dst_vaddr, *src_vaddr;
187
188         dst_vaddr = kmap(dst_page);
189         src_vaddr = kmap(src_page);
190
191         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
192
193         kunmap(src_page);
194         kunmap(dst_page);
195 }
196
197 static inline void
198 slow_shmem_bit17_copy(struct page *gpu_page,
199                       int gpu_offset,
200                       struct page *cpu_page,
201                       int cpu_offset,
202                       int length,
203                       int is_read)
204 {
205         char *gpu_vaddr, *cpu_vaddr;
206
207         /* Use the unswizzled path if this page isn't affected. */
208         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
209                 if (is_read)
210                         return slow_shmem_copy(cpu_page, cpu_offset,
211                                                gpu_page, gpu_offset, length);
212                 else
213                         return slow_shmem_copy(gpu_page, gpu_offset,
214                                                cpu_page, cpu_offset, length);
215         }
216
217         gpu_vaddr = kmap(gpu_page);
218         cpu_vaddr = kmap(cpu_page);
219
220         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
221          * XORing with the other bits (A9 for Y, A9 and A10 for X)
222          */
223         while (length > 0) {
224                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
225                 int this_length = min(cacheline_end - gpu_offset, length);
226                 int swizzled_gpu_offset = gpu_offset ^ 64;
227
228                 if (is_read) {
229                         memcpy(cpu_vaddr + cpu_offset,
230                                gpu_vaddr + swizzled_gpu_offset,
231                                this_length);
232                 } else {
233                         memcpy(gpu_vaddr + swizzled_gpu_offset,
234                                cpu_vaddr + cpu_offset,
235                                this_length);
236                 }
237                 cpu_offset += this_length;
238                 gpu_offset += this_length;
239                 length -= this_length;
240         }
241
242         kunmap(cpu_page);
243         kunmap(gpu_page);
244 }
245
246 /**
247  * This is the fast shmem pread path, which attempts to copy_from_user directly
248  * from the backing pages of the object to the user's address space.  On a
249  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
250  */
251 static int
252 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
253                           struct drm_i915_gem_pread *args,
254                           struct drm_file *file_priv)
255 {
256         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
257         ssize_t remain;
258         loff_t offset, page_base;
259         char __user *user_data;
260         int page_offset, page_length;
261         int ret;
262
263         user_data = (char __user *) (uintptr_t) args->data_ptr;
264         remain = args->size;
265
266         mutex_lock(&dev->struct_mutex);
267
268         ret = i915_gem_object_get_pages(obj, 0);
269         if (ret != 0)
270                 goto fail_unlock;
271
272         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
273                                                         args->size);
274         if (ret != 0)
275                 goto fail_put_pages;
276
277         obj_priv = to_intel_bo(obj);
278         offset = args->offset;
279
280         while (remain > 0) {
281                 /* Operation in this page
282                  *
283                  * page_base = page offset within aperture
284                  * page_offset = offset within page
285                  * page_length = bytes to copy for this page
286                  */
287                 page_base = (offset & ~(PAGE_SIZE-1));
288                 page_offset = offset & (PAGE_SIZE-1);
289                 page_length = remain;
290                 if ((page_offset + remain) > PAGE_SIZE)
291                         page_length = PAGE_SIZE - page_offset;
292
293                 ret = fast_shmem_read(obj_priv->pages,
294                                       page_base, page_offset,
295                                       user_data, page_length);
296                 if (ret)
297                         goto fail_put_pages;
298
299                 remain -= page_length;
300                 user_data += page_length;
301                 offset += page_length;
302         }
303
304 fail_put_pages:
305         i915_gem_object_put_pages(obj);
306 fail_unlock:
307         mutex_unlock(&dev->struct_mutex);
308
309         return ret;
310 }
311
312 static int
313 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
314 {
315         int ret;
316
317         ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
318
319         /* If we've insufficient memory to map in the pages, attempt
320          * to make some space by throwing out some old buffers.
321          */
322         if (ret == -ENOMEM) {
323                 struct drm_device *dev = obj->dev;
324
325                 ret = i915_gem_evict_something(dev, obj->size,
326                                                i915_gem_get_gtt_alignment(obj));
327                 if (ret)
328                         return ret;
329
330                 ret = i915_gem_object_get_pages(obj, 0);
331         }
332
333         return ret;
334 }
335
336 /**
337  * This is the fallback shmem pread path, which allocates temporary storage
338  * in kernel space to copy_to_user into outside of the struct_mutex, so we
339  * can copy out of the object's backing pages while holding the struct mutex
340  * and not take page faults.
341  */
342 static int
343 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
344                           struct drm_i915_gem_pread *args,
345                           struct drm_file *file_priv)
346 {
347         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
348         struct mm_struct *mm = current->mm;
349         struct page **user_pages;
350         ssize_t remain;
351         loff_t offset, pinned_pages, i;
352         loff_t first_data_page, last_data_page, num_pages;
353         int shmem_page_index, shmem_page_offset;
354         int data_page_index,  data_page_offset;
355         int page_length;
356         int ret;
357         uint64_t data_ptr = args->data_ptr;
358         int do_bit17_swizzling;
359
360         remain = args->size;
361
362         /* Pin the user pages containing the data.  We can't fault while
363          * holding the struct mutex, yet we want to hold it while
364          * dereferencing the user data.
365          */
366         first_data_page = data_ptr / PAGE_SIZE;
367         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
368         num_pages = last_data_page - first_data_page + 1;
369
370         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
371         if (user_pages == NULL)
372                 return -ENOMEM;
373
374         down_read(&mm->mmap_sem);
375         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
376                                       num_pages, 1, 0, user_pages, NULL);
377         up_read(&mm->mmap_sem);
378         if (pinned_pages < num_pages) {
379                 ret = -EFAULT;
380                 goto fail_put_user_pages;
381         }
382
383         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
384
385         mutex_lock(&dev->struct_mutex);
386
387         ret = i915_gem_object_get_pages_or_evict(obj);
388         if (ret)
389                 goto fail_unlock;
390
391         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
392                                                         args->size);
393         if (ret != 0)
394                 goto fail_put_pages;
395
396         obj_priv = to_intel_bo(obj);
397         offset = args->offset;
398
399         while (remain > 0) {
400                 /* Operation in this page
401                  *
402                  * shmem_page_index = page number within shmem file
403                  * shmem_page_offset = offset within page in shmem file
404                  * data_page_index = page number in get_user_pages return
405                  * data_page_offset = offset with data_page_index page.
406                  * page_length = bytes to copy for this page
407                  */
408                 shmem_page_index = offset / PAGE_SIZE;
409                 shmem_page_offset = offset & ~PAGE_MASK;
410                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
411                 data_page_offset = data_ptr & ~PAGE_MASK;
412
413                 page_length = remain;
414                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
415                         page_length = PAGE_SIZE - shmem_page_offset;
416                 if ((data_page_offset + page_length) > PAGE_SIZE)
417                         page_length = PAGE_SIZE - data_page_offset;
418
419                 if (do_bit17_swizzling) {
420                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
421                                               shmem_page_offset,
422                                               user_pages[data_page_index],
423                                               data_page_offset,
424                                               page_length,
425                                               1);
426                 } else {
427                         slow_shmem_copy(user_pages[data_page_index],
428                                         data_page_offset,
429                                         obj_priv->pages[shmem_page_index],
430                                         shmem_page_offset,
431                                         page_length);
432                 }
433
434                 remain -= page_length;
435                 data_ptr += page_length;
436                 offset += page_length;
437         }
438
439 fail_put_pages:
440         i915_gem_object_put_pages(obj);
441 fail_unlock:
442         mutex_unlock(&dev->struct_mutex);
443 fail_put_user_pages:
444         for (i = 0; i < pinned_pages; i++) {
445                 SetPageDirty(user_pages[i]);
446                 page_cache_release(user_pages[i]);
447         }
448         drm_free_large(user_pages);
449
450         return ret;
451 }
452
453 /**
454  * Reads data from the object referenced by handle.
455  *
456  * On error, the contents of *data are undefined.
457  */
458 int
459 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
460                      struct drm_file *file_priv)
461 {
462         struct drm_i915_gem_pread *args = data;
463         struct drm_gem_object *obj;
464         struct drm_i915_gem_object *obj_priv;
465         int ret;
466
467         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
468         if (obj == NULL)
469                 return -ENOENT;
470         obj_priv = to_intel_bo(obj);
471
472         /* Bounds check source.
473          *
474          * XXX: This could use review for overflow issues...
475          */
476         if (args->offset > obj->size || args->size > obj->size ||
477             args->offset + args->size > obj->size) {
478                 drm_gem_object_unreference_unlocked(obj);
479                 return -EINVAL;
480         }
481
482         if (i915_gem_object_needs_bit17_swizzle(obj)) {
483                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
484         } else {
485                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
486                 if (ret != 0)
487                         ret = i915_gem_shmem_pread_slow(dev, obj, args,
488                                                         file_priv);
489         }
490
491         drm_gem_object_unreference_unlocked(obj);
492
493         return ret;
494 }
495
496 /* This is the fast write path which cannot handle
497  * page faults in the source data
498  */
499
500 static inline int
501 fast_user_write(struct io_mapping *mapping,
502                 loff_t page_base, int page_offset,
503                 char __user *user_data,
504                 int length)
505 {
506         char *vaddr_atomic;
507         unsigned long unwritten;
508
509         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
510         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
511                                                       user_data, length);
512         io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
513         if (unwritten)
514                 return -EFAULT;
515         return 0;
516 }
517
518 /* Here's the write path which can sleep for
519  * page faults
520  */
521
522 static inline void
523 slow_kernel_write(struct io_mapping *mapping,
524                   loff_t gtt_base, int gtt_offset,
525                   struct page *user_page, int user_offset,
526                   int length)
527 {
528         char __iomem *dst_vaddr;
529         char *src_vaddr;
530
531         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
532         src_vaddr = kmap(user_page);
533
534         memcpy_toio(dst_vaddr + gtt_offset,
535                     src_vaddr + user_offset,
536                     length);
537
538         kunmap(user_page);
539         io_mapping_unmap(dst_vaddr);
540 }
541
542 static inline int
543 fast_shmem_write(struct page **pages,
544                  loff_t page_base, int page_offset,
545                  char __user *data,
546                  int length)
547 {
548         char __iomem *vaddr;
549         unsigned long unwritten;
550
551         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
552         if (vaddr == NULL)
553                 return -ENOMEM;
554         unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
555         kunmap_atomic(vaddr, KM_USER0);
556
557         if (unwritten)
558                 return -EFAULT;
559         return 0;
560 }
561
562 /**
563  * This is the fast pwrite path, where we copy the data directly from the
564  * user into the GTT, uncached.
565  */
566 static int
567 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
568                          struct drm_i915_gem_pwrite *args,
569                          struct drm_file *file_priv)
570 {
571         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
572         drm_i915_private_t *dev_priv = dev->dev_private;
573         ssize_t remain;
574         loff_t offset, page_base;
575         char __user *user_data;
576         int page_offset, page_length;
577         int ret;
578
579         user_data = (char __user *) (uintptr_t) args->data_ptr;
580         remain = args->size;
581         if (!access_ok(VERIFY_READ, user_data, remain))
582                 return -EFAULT;
583
584
585         mutex_lock(&dev->struct_mutex);
586         ret = i915_gem_object_pin(obj, 0);
587         if (ret) {
588                 mutex_unlock(&dev->struct_mutex);
589                 return ret;
590         }
591         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
592         if (ret)
593                 goto fail;
594
595         obj_priv = to_intel_bo(obj);
596         offset = obj_priv->gtt_offset + args->offset;
597
598         while (remain > 0) {
599                 /* Operation in this page
600                  *
601                  * page_base = page offset within aperture
602                  * page_offset = offset within page
603                  * page_length = bytes to copy for this page
604                  */
605                 page_base = (offset & ~(PAGE_SIZE-1));
606                 page_offset = offset & (PAGE_SIZE-1);
607                 page_length = remain;
608                 if ((page_offset + remain) > PAGE_SIZE)
609                         page_length = PAGE_SIZE - page_offset;
610
611                 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
612                                        page_offset, user_data, page_length);
613
614                 /* If we get a fault while copying data, then (presumably) our
615                  * source page isn't available.  Return the error and we'll
616                  * retry in the slow path.
617                  */
618                 if (ret)
619                         goto fail;
620
621                 remain -= page_length;
622                 user_data += page_length;
623                 offset += page_length;
624         }
625
626 fail:
627         i915_gem_object_unpin(obj);
628         mutex_unlock(&dev->struct_mutex);
629
630         return ret;
631 }
632
633 /**
634  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
635  * the memory and maps it using kmap_atomic for copying.
636  *
637  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
638  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
639  */
640 static int
641 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
642                          struct drm_i915_gem_pwrite *args,
643                          struct drm_file *file_priv)
644 {
645         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
646         drm_i915_private_t *dev_priv = dev->dev_private;
647         ssize_t remain;
648         loff_t gtt_page_base, offset;
649         loff_t first_data_page, last_data_page, num_pages;
650         loff_t pinned_pages, i;
651         struct page **user_pages;
652         struct mm_struct *mm = current->mm;
653         int gtt_page_offset, data_page_offset, data_page_index, page_length;
654         int ret;
655         uint64_t data_ptr = args->data_ptr;
656
657         remain = args->size;
658
659         /* Pin the user pages containing the data.  We can't fault while
660          * holding the struct mutex, and all of the pwrite implementations
661          * want to hold it while dereferencing the user data.
662          */
663         first_data_page = data_ptr / PAGE_SIZE;
664         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
665         num_pages = last_data_page - first_data_page + 1;
666
667         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
668         if (user_pages == NULL)
669                 return -ENOMEM;
670
671         down_read(&mm->mmap_sem);
672         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
673                                       num_pages, 0, 0, user_pages, NULL);
674         up_read(&mm->mmap_sem);
675         if (pinned_pages < num_pages) {
676                 ret = -EFAULT;
677                 goto out_unpin_pages;
678         }
679
680         mutex_lock(&dev->struct_mutex);
681         ret = i915_gem_object_pin(obj, 0);
682         if (ret)
683                 goto out_unlock;
684
685         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
686         if (ret)
687                 goto out_unpin_object;
688
689         obj_priv = to_intel_bo(obj);
690         offset = obj_priv->gtt_offset + args->offset;
691
692         while (remain > 0) {
693                 /* Operation in this page
694                  *
695                  * gtt_page_base = page offset within aperture
696                  * gtt_page_offset = offset within page in aperture
697                  * data_page_index = page number in get_user_pages return
698                  * data_page_offset = offset with data_page_index page.
699                  * page_length = bytes to copy for this page
700                  */
701                 gtt_page_base = offset & PAGE_MASK;
702                 gtt_page_offset = offset & ~PAGE_MASK;
703                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
704                 data_page_offset = data_ptr & ~PAGE_MASK;
705
706                 page_length = remain;
707                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
708                         page_length = PAGE_SIZE - gtt_page_offset;
709                 if ((data_page_offset + page_length) > PAGE_SIZE)
710                         page_length = PAGE_SIZE - data_page_offset;
711
712                 slow_kernel_write(dev_priv->mm.gtt_mapping,
713                                   gtt_page_base, gtt_page_offset,
714                                   user_pages[data_page_index],
715                                   data_page_offset,
716                                   page_length);
717
718                 remain -= page_length;
719                 offset += page_length;
720                 data_ptr += page_length;
721         }
722
723 out_unpin_object:
724         i915_gem_object_unpin(obj);
725 out_unlock:
726         mutex_unlock(&dev->struct_mutex);
727 out_unpin_pages:
728         for (i = 0; i < pinned_pages; i++)
729                 page_cache_release(user_pages[i]);
730         drm_free_large(user_pages);
731
732         return ret;
733 }
734
735 /**
736  * This is the fast shmem pwrite path, which attempts to directly
737  * copy_from_user into the kmapped pages backing the object.
738  */
739 static int
740 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
741                            struct drm_i915_gem_pwrite *args,
742                            struct drm_file *file_priv)
743 {
744         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
745         ssize_t remain;
746         loff_t offset, page_base;
747         char __user *user_data;
748         int page_offset, page_length;
749         int ret;
750
751         user_data = (char __user *) (uintptr_t) args->data_ptr;
752         remain = args->size;
753
754         mutex_lock(&dev->struct_mutex);
755
756         ret = i915_gem_object_get_pages(obj, 0);
757         if (ret != 0)
758                 goto fail_unlock;
759
760         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
761         if (ret != 0)
762                 goto fail_put_pages;
763
764         obj_priv = to_intel_bo(obj);
765         offset = args->offset;
766         obj_priv->dirty = 1;
767
768         while (remain > 0) {
769                 /* Operation in this page
770                  *
771                  * page_base = page offset within aperture
772                  * page_offset = offset within page
773                  * page_length = bytes to copy for this page
774                  */
775                 page_base = (offset & ~(PAGE_SIZE-1));
776                 page_offset = offset & (PAGE_SIZE-1);
777                 page_length = remain;
778                 if ((page_offset + remain) > PAGE_SIZE)
779                         page_length = PAGE_SIZE - page_offset;
780
781                 ret = fast_shmem_write(obj_priv->pages,
782                                        page_base, page_offset,
783                                        user_data, page_length);
784                 if (ret)
785                         goto fail_put_pages;
786
787                 remain -= page_length;
788                 user_data += page_length;
789                 offset += page_length;
790         }
791
792 fail_put_pages:
793         i915_gem_object_put_pages(obj);
794 fail_unlock:
795         mutex_unlock(&dev->struct_mutex);
796
797         return ret;
798 }
799
800 /**
801  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
802  * the memory and maps it using kmap_atomic for copying.
803  *
804  * This avoids taking mmap_sem for faulting on the user's address while the
805  * struct_mutex is held.
806  */
807 static int
808 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
809                            struct drm_i915_gem_pwrite *args,
810                            struct drm_file *file_priv)
811 {
812         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
813         struct mm_struct *mm = current->mm;
814         struct page **user_pages;
815         ssize_t remain;
816         loff_t offset, pinned_pages, i;
817         loff_t first_data_page, last_data_page, num_pages;
818         int shmem_page_index, shmem_page_offset;
819         int data_page_index,  data_page_offset;
820         int page_length;
821         int ret;
822         uint64_t data_ptr = args->data_ptr;
823         int do_bit17_swizzling;
824
825         remain = args->size;
826
827         /* Pin the user pages containing the data.  We can't fault while
828          * holding the struct mutex, and all of the pwrite implementations
829          * want to hold it while dereferencing the user data.
830          */
831         first_data_page = data_ptr / PAGE_SIZE;
832         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
833         num_pages = last_data_page - first_data_page + 1;
834
835         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
836         if (user_pages == NULL)
837                 return -ENOMEM;
838
839         down_read(&mm->mmap_sem);
840         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
841                                       num_pages, 0, 0, user_pages, NULL);
842         up_read(&mm->mmap_sem);
843         if (pinned_pages < num_pages) {
844                 ret = -EFAULT;
845                 goto fail_put_user_pages;
846         }
847
848         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
849
850         mutex_lock(&dev->struct_mutex);
851
852         ret = i915_gem_object_get_pages_or_evict(obj);
853         if (ret)
854                 goto fail_unlock;
855
856         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
857         if (ret != 0)
858                 goto fail_put_pages;
859
860         obj_priv = to_intel_bo(obj);
861         offset = args->offset;
862         obj_priv->dirty = 1;
863
864         while (remain > 0) {
865                 /* Operation in this page
866                  *
867                  * shmem_page_index = page number within shmem file
868                  * shmem_page_offset = offset within page in shmem file
869                  * data_page_index = page number in get_user_pages return
870                  * data_page_offset = offset with data_page_index page.
871                  * page_length = bytes to copy for this page
872                  */
873                 shmem_page_index = offset / PAGE_SIZE;
874                 shmem_page_offset = offset & ~PAGE_MASK;
875                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
876                 data_page_offset = data_ptr & ~PAGE_MASK;
877
878                 page_length = remain;
879                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
880                         page_length = PAGE_SIZE - shmem_page_offset;
881                 if ((data_page_offset + page_length) > PAGE_SIZE)
882                         page_length = PAGE_SIZE - data_page_offset;
883
884                 if (do_bit17_swizzling) {
885                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
886                                               shmem_page_offset,
887                                               user_pages[data_page_index],
888                                               data_page_offset,
889                                               page_length,
890                                               0);
891                 } else {
892                         slow_shmem_copy(obj_priv->pages[shmem_page_index],
893                                         shmem_page_offset,
894                                         user_pages[data_page_index],
895                                         data_page_offset,
896                                         page_length);
897                 }
898
899                 remain -= page_length;
900                 data_ptr += page_length;
901                 offset += page_length;
902         }
903
904 fail_put_pages:
905         i915_gem_object_put_pages(obj);
906 fail_unlock:
907         mutex_unlock(&dev->struct_mutex);
908 fail_put_user_pages:
909         for (i = 0; i < pinned_pages; i++)
910                 page_cache_release(user_pages[i]);
911         drm_free_large(user_pages);
912
913         return ret;
914 }
915
916 /**
917  * Writes data to the object referenced by handle.
918  *
919  * On error, the contents of the buffer that were to be modified are undefined.
920  */
921 int
922 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
923                       struct drm_file *file_priv)
924 {
925         struct drm_i915_gem_pwrite *args = data;
926         struct drm_gem_object *obj;
927         struct drm_i915_gem_object *obj_priv;
928         int ret = 0;
929
930         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
931         if (obj == NULL)
932                 return -ENOENT;
933         obj_priv = to_intel_bo(obj);
934
935         /* Bounds check destination.
936          *
937          * XXX: This could use review for overflow issues...
938          */
939         if (args->offset > obj->size || args->size > obj->size ||
940             args->offset + args->size > obj->size) {
941                 drm_gem_object_unreference_unlocked(obj);
942                 return -EINVAL;
943         }
944
945         /* We can only do the GTT pwrite on untiled buffers, as otherwise
946          * it would end up going through the fenced access, and we'll get
947          * different detiling behavior between reading and writing.
948          * pread/pwrite currently are reading and writing from the CPU
949          * perspective, requiring manual detiling by the client.
950          */
951         if (obj_priv->phys_obj)
952                 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
953         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
954                  dev->gtt_total != 0 &&
955                  obj->write_domain != I915_GEM_DOMAIN_CPU) {
956                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
957                 if (ret == -EFAULT) {
958                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
959                                                        file_priv);
960                 }
961         } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
962                 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
963         } else {
964                 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
965                 if (ret == -EFAULT) {
966                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
967                                                          file_priv);
968                 }
969         }
970
971 #if WATCH_PWRITE
972         if (ret)
973                 DRM_INFO("pwrite failed %d\n", ret);
974 #endif
975
976         drm_gem_object_unreference_unlocked(obj);
977
978         return ret;
979 }
980
981 /**
982  * Called when user space prepares to use an object with the CPU, either
983  * through the mmap ioctl's mapping or a GTT mapping.
984  */
985 int
986 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
987                           struct drm_file *file_priv)
988 {
989         struct drm_i915_private *dev_priv = dev->dev_private;
990         struct drm_i915_gem_set_domain *args = data;
991         struct drm_gem_object *obj;
992         struct drm_i915_gem_object *obj_priv;
993         uint32_t read_domains = args->read_domains;
994         uint32_t write_domain = args->write_domain;
995         int ret;
996
997         if (!(dev->driver->driver_features & DRIVER_GEM))
998                 return -ENODEV;
999
1000         /* Only handle setting domains to types used by the CPU. */
1001         if (write_domain & I915_GEM_GPU_DOMAINS)
1002                 return -EINVAL;
1003
1004         if (read_domains & I915_GEM_GPU_DOMAINS)
1005                 return -EINVAL;
1006
1007         /* Having something in the write domain implies it's in the read
1008          * domain, and only that read domain.  Enforce that in the request.
1009          */
1010         if (write_domain != 0 && read_domains != write_domain)
1011                 return -EINVAL;
1012
1013         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1014         if (obj == NULL)
1015                 return -ENOENT;
1016         obj_priv = to_intel_bo(obj);
1017
1018         mutex_lock(&dev->struct_mutex);
1019
1020         intel_mark_busy(dev, obj);
1021
1022 #if WATCH_BUF
1023         DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1024                  obj, obj->size, read_domains, write_domain);
1025 #endif
1026         if (read_domains & I915_GEM_DOMAIN_GTT) {
1027                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1028
1029                 /* Update the LRU on the fence for the CPU access that's
1030                  * about to occur.
1031                  */
1032                 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1033                         struct drm_i915_fence_reg *reg =
1034                                 &dev_priv->fence_regs[obj_priv->fence_reg];
1035                         list_move_tail(&reg->lru_list,
1036                                        &dev_priv->mm.fence_list);
1037                 }
1038
1039                 /* Silently promote "you're not bound, there was nothing to do"
1040                  * to success, since the client was just asking us to
1041                  * make sure everything was done.
1042                  */
1043                 if (ret == -EINVAL)
1044                         ret = 0;
1045         } else {
1046                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1047         }
1048
1049         
1050         /* Maintain LRU order of "inactive" objects */
1051         if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1052                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1053
1054         drm_gem_object_unreference(obj);
1055         mutex_unlock(&dev->struct_mutex);
1056         return ret;
1057 }
1058
1059 /**
1060  * Called when user space has done writes to this buffer
1061  */
1062 int
1063 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1064                       struct drm_file *file_priv)
1065 {
1066         struct drm_i915_gem_sw_finish *args = data;
1067         struct drm_gem_object *obj;
1068         struct drm_i915_gem_object *obj_priv;
1069         int ret = 0;
1070
1071         if (!(dev->driver->driver_features & DRIVER_GEM))
1072                 return -ENODEV;
1073
1074         mutex_lock(&dev->struct_mutex);
1075         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1076         if (obj == NULL) {
1077                 mutex_unlock(&dev->struct_mutex);
1078                 return -ENOENT;
1079         }
1080
1081 #if WATCH_BUF
1082         DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1083                  __func__, args->handle, obj, obj->size);
1084 #endif
1085         obj_priv = to_intel_bo(obj);
1086
1087         /* Pinned buffers may be scanout, so flush the cache */
1088         if (obj_priv->pin_count)
1089                 i915_gem_object_flush_cpu_write_domain(obj);
1090
1091         drm_gem_object_unreference(obj);
1092         mutex_unlock(&dev->struct_mutex);
1093         return ret;
1094 }
1095
1096 /**
1097  * Maps the contents of an object, returning the address it is mapped
1098  * into.
1099  *
1100  * While the mapping holds a reference on the contents of the object, it doesn't
1101  * imply a ref on the object itself.
1102  */
1103 int
1104 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1105                    struct drm_file *file_priv)
1106 {
1107         struct drm_i915_gem_mmap *args = data;
1108         struct drm_gem_object *obj;
1109         loff_t offset;
1110         unsigned long addr;
1111
1112         if (!(dev->driver->driver_features & DRIVER_GEM))
1113                 return -ENODEV;
1114
1115         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1116         if (obj == NULL)
1117                 return -ENOENT;
1118
1119         offset = args->offset;
1120
1121         down_write(&current->mm->mmap_sem);
1122         addr = do_mmap(obj->filp, 0, args->size,
1123                        PROT_READ | PROT_WRITE, MAP_SHARED,
1124                        args->offset);
1125         up_write(&current->mm->mmap_sem);
1126         drm_gem_object_unreference_unlocked(obj);
1127         if (IS_ERR((void *)addr))
1128                 return addr;
1129
1130         args->addr_ptr = (uint64_t) addr;
1131
1132         return 0;
1133 }
1134
1135 /**
1136  * i915_gem_fault - fault a page into the GTT
1137  * vma: VMA in question
1138  * vmf: fault info
1139  *
1140  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1141  * from userspace.  The fault handler takes care of binding the object to
1142  * the GTT (if needed), allocating and programming a fence register (again,
1143  * only if needed based on whether the old reg is still valid or the object
1144  * is tiled) and inserting a new PTE into the faulting process.
1145  *
1146  * Note that the faulting process may involve evicting existing objects
1147  * from the GTT and/or fence registers to make room.  So performance may
1148  * suffer if the GTT working set is large or there are few fence registers
1149  * left.
1150  */
1151 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1152 {
1153         struct drm_gem_object *obj = vma->vm_private_data;
1154         struct drm_device *dev = obj->dev;
1155         drm_i915_private_t *dev_priv = dev->dev_private;
1156         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1157         pgoff_t page_offset;
1158         unsigned long pfn;
1159         int ret = 0;
1160         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1161
1162         /* We don't use vmf->pgoff since that has the fake offset */
1163         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1164                 PAGE_SHIFT;
1165
1166         /* Now bind it into the GTT if needed */
1167         mutex_lock(&dev->struct_mutex);
1168         if (!obj_priv->gtt_space) {
1169                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1170                 if (ret)
1171                         goto unlock;
1172
1173                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1174                 if (ret)
1175                         goto unlock;
1176         }
1177
1178         /* Need a new fence register? */
1179         if (obj_priv->tiling_mode != I915_TILING_NONE) {
1180                 ret = i915_gem_object_get_fence_reg(obj);
1181                 if (ret)
1182                         goto unlock;
1183         }
1184
1185         if (i915_gem_object_is_inactive(obj_priv))
1186                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1187
1188         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1189                 page_offset;
1190
1191         /* Finally, remap it using the new GTT offset */
1192         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1193 unlock:
1194         mutex_unlock(&dev->struct_mutex);
1195
1196         switch (ret) {
1197         case 0:
1198         case -ERESTARTSYS:
1199                 return VM_FAULT_NOPAGE;
1200         case -ENOMEM:
1201         case -EAGAIN:
1202                 return VM_FAULT_OOM;
1203         default:
1204                 return VM_FAULT_SIGBUS;
1205         }
1206 }
1207
1208 /**
1209  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1210  * @obj: obj in question
1211  *
1212  * GEM memory mapping works by handing back to userspace a fake mmap offset
1213  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1214  * up the object based on the offset and sets up the various memory mapping
1215  * structures.
1216  *
1217  * This routine allocates and attaches a fake offset for @obj.
1218  */
1219 static int
1220 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1221 {
1222         struct drm_device *dev = obj->dev;
1223         struct drm_gem_mm *mm = dev->mm_private;
1224         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1225         struct drm_map_list *list;
1226         struct drm_local_map *map;
1227         int ret = 0;
1228
1229         /* Set the object up for mmap'ing */
1230         list = &obj->map_list;
1231         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1232         if (!list->map)
1233                 return -ENOMEM;
1234
1235         map = list->map;
1236         map->type = _DRM_GEM;
1237         map->size = obj->size;
1238         map->handle = obj;
1239
1240         /* Get a DRM GEM mmap offset allocated... */
1241         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1242                                                     obj->size / PAGE_SIZE, 0, 0);
1243         if (!list->file_offset_node) {
1244                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1245                 ret = -ENOMEM;
1246                 goto out_free_list;
1247         }
1248
1249         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1250                                                   obj->size / PAGE_SIZE, 0);
1251         if (!list->file_offset_node) {
1252                 ret = -ENOMEM;
1253                 goto out_free_list;
1254         }
1255
1256         list->hash.key = list->file_offset_node->start;
1257         if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1258                 DRM_ERROR("failed to add to map hash\n");
1259                 ret = -ENOMEM;
1260                 goto out_free_mm;
1261         }
1262
1263         /* By now we should be all set, any drm_mmap request on the offset
1264          * below will get to our mmap & fault handler */
1265         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1266
1267         return 0;
1268
1269 out_free_mm:
1270         drm_mm_put_block(list->file_offset_node);
1271 out_free_list:
1272         kfree(list->map);
1273
1274         return ret;
1275 }
1276
1277 /**
1278  * i915_gem_release_mmap - remove physical page mappings
1279  * @obj: obj in question
1280  *
1281  * Preserve the reservation of the mmapping with the DRM core code, but
1282  * relinquish ownership of the pages back to the system.
1283  *
1284  * It is vital that we remove the page mapping if we have mapped a tiled
1285  * object through the GTT and then lose the fence register due to
1286  * resource pressure. Similarly if the object has been moved out of the
1287  * aperture, than pages mapped into userspace must be revoked. Removing the
1288  * mapping will then trigger a page fault on the next user access, allowing
1289  * fixup by i915_gem_fault().
1290  */
1291 void
1292 i915_gem_release_mmap(struct drm_gem_object *obj)
1293 {
1294         struct drm_device *dev = obj->dev;
1295         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1296
1297         if (dev->dev_mapping)
1298                 unmap_mapping_range(dev->dev_mapping,
1299                                     obj_priv->mmap_offset, obj->size, 1);
1300 }
1301
1302 static void
1303 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1304 {
1305         struct drm_device *dev = obj->dev;
1306         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1307         struct drm_gem_mm *mm = dev->mm_private;
1308         struct drm_map_list *list;
1309
1310         list = &obj->map_list;
1311         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1312
1313         if (list->file_offset_node) {
1314                 drm_mm_put_block(list->file_offset_node);
1315                 list->file_offset_node = NULL;
1316         }
1317
1318         if (list->map) {
1319                 kfree(list->map);
1320                 list->map = NULL;
1321         }
1322
1323         obj_priv->mmap_offset = 0;
1324 }
1325
1326 /**
1327  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1328  * @obj: object to check
1329  *
1330  * Return the required GTT alignment for an object, taking into account
1331  * potential fence register mapping if needed.
1332  */
1333 static uint32_t
1334 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1335 {
1336         struct drm_device *dev = obj->dev;
1337         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1338         int start, i;
1339
1340         /*
1341          * Minimum alignment is 4k (GTT page size), but might be greater
1342          * if a fence register is needed for the object.
1343          */
1344         if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1345                 return 4096;
1346
1347         /*
1348          * Previous chips need to be aligned to the size of the smallest
1349          * fence register that can contain the object.
1350          */
1351         if (IS_I9XX(dev))
1352                 start = 1024*1024;
1353         else
1354                 start = 512*1024;
1355
1356         for (i = start; i < obj->size; i <<= 1)
1357                 ;
1358
1359         return i;
1360 }
1361
1362 /**
1363  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1364  * @dev: DRM device
1365  * @data: GTT mapping ioctl data
1366  * @file_priv: GEM object info
1367  *
1368  * Simply returns the fake offset to userspace so it can mmap it.
1369  * The mmap call will end up in drm_gem_mmap(), which will set things
1370  * up so we can get faults in the handler above.
1371  *
1372  * The fault handler will take care of binding the object into the GTT
1373  * (since it may have been evicted to make room for something), allocating
1374  * a fence register, and mapping the appropriate aperture address into
1375  * userspace.
1376  */
1377 int
1378 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1379                         struct drm_file *file_priv)
1380 {
1381         struct drm_i915_gem_mmap_gtt *args = data;
1382         struct drm_gem_object *obj;
1383         struct drm_i915_gem_object *obj_priv;
1384         int ret;
1385
1386         if (!(dev->driver->driver_features & DRIVER_GEM))
1387                 return -ENODEV;
1388
1389         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1390         if (obj == NULL)
1391                 return -ENOENT;
1392
1393         mutex_lock(&dev->struct_mutex);
1394
1395         obj_priv = to_intel_bo(obj);
1396
1397         if (obj_priv->madv != I915_MADV_WILLNEED) {
1398                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1399                 drm_gem_object_unreference(obj);
1400                 mutex_unlock(&dev->struct_mutex);
1401                 return -EINVAL;
1402         }
1403
1404
1405         if (!obj_priv->mmap_offset) {
1406                 ret = i915_gem_create_mmap_offset(obj);
1407                 if (ret) {
1408                         drm_gem_object_unreference(obj);
1409                         mutex_unlock(&dev->struct_mutex);
1410                         return ret;
1411                 }
1412         }
1413
1414         args->offset = obj_priv->mmap_offset;
1415
1416         /*
1417          * Pull it into the GTT so that we have a page list (makes the
1418          * initial fault faster and any subsequent flushing possible).
1419          */
1420         if (!obj_priv->agp_mem) {
1421                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1422                 if (ret) {
1423                         drm_gem_object_unreference(obj);
1424                         mutex_unlock(&dev->struct_mutex);
1425                         return ret;
1426                 }
1427         }
1428
1429         drm_gem_object_unreference(obj);
1430         mutex_unlock(&dev->struct_mutex);
1431
1432         return 0;
1433 }
1434
1435 void
1436 i915_gem_object_put_pages(struct drm_gem_object *obj)
1437 {
1438         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1439         int page_count = obj->size / PAGE_SIZE;
1440         int i;
1441
1442         BUG_ON(obj_priv->pages_refcount == 0);
1443         BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1444
1445         if (--obj_priv->pages_refcount != 0)
1446                 return;
1447
1448         if (obj_priv->tiling_mode != I915_TILING_NONE)
1449                 i915_gem_object_save_bit_17_swizzle(obj);
1450
1451         if (obj_priv->madv == I915_MADV_DONTNEED)
1452                 obj_priv->dirty = 0;
1453
1454         for (i = 0; i < page_count; i++) {
1455                 if (obj_priv->dirty)
1456                         set_page_dirty(obj_priv->pages[i]);
1457
1458                 if (obj_priv->madv == I915_MADV_WILLNEED)
1459                         mark_page_accessed(obj_priv->pages[i]);
1460
1461                 page_cache_release(obj_priv->pages[i]);
1462         }
1463         obj_priv->dirty = 0;
1464
1465         drm_free_large(obj_priv->pages);
1466         obj_priv->pages = NULL;
1467 }
1468
1469 static void
1470 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1471                                struct intel_ring_buffer *ring)
1472 {
1473         struct drm_device *dev = obj->dev;
1474         drm_i915_private_t *dev_priv = dev->dev_private;
1475         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1476         BUG_ON(ring == NULL);
1477         obj_priv->ring = ring;
1478
1479         /* Add a reference if we're newly entering the active list. */
1480         if (!obj_priv->active) {
1481                 drm_gem_object_reference(obj);
1482                 obj_priv->active = 1;
1483         }
1484         /* Move from whatever list we were on to the tail of execution. */
1485         spin_lock(&dev_priv->mm.active_list_lock);
1486         list_move_tail(&obj_priv->list, &ring->active_list);
1487         spin_unlock(&dev_priv->mm.active_list_lock);
1488         obj_priv->last_rendering_seqno = seqno;
1489 }
1490
1491 static void
1492 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1493 {
1494         struct drm_device *dev = obj->dev;
1495         drm_i915_private_t *dev_priv = dev->dev_private;
1496         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1497
1498         BUG_ON(!obj_priv->active);
1499         list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1500         obj_priv->last_rendering_seqno = 0;
1501 }
1502
1503 /* Immediately discard the backing storage */
1504 static void
1505 i915_gem_object_truncate(struct drm_gem_object *obj)
1506 {
1507         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1508         struct inode *inode;
1509
1510         /* Our goal here is to return as much of the memory as
1511          * is possible back to the system as we are called from OOM.
1512          * To do this we must instruct the shmfs to drop all of its
1513          * backing pages, *now*. Here we mirror the actions taken
1514          * when by shmem_delete_inode() to release the backing store.
1515          */
1516         inode = obj->filp->f_path.dentry->d_inode;
1517         truncate_inode_pages(inode->i_mapping, 0);
1518         if (inode->i_op->truncate_range)
1519                 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1520
1521         obj_priv->madv = __I915_MADV_PURGED;
1522 }
1523
1524 static inline int
1525 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1526 {
1527         return obj_priv->madv == I915_MADV_DONTNEED;
1528 }
1529
1530 static void
1531 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1532 {
1533         struct drm_device *dev = obj->dev;
1534         drm_i915_private_t *dev_priv = dev->dev_private;
1535         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1536
1537         i915_verify_inactive(dev, __FILE__, __LINE__);
1538         if (obj_priv->pin_count != 0)
1539                 list_del_init(&obj_priv->list);
1540         else
1541                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1542
1543         BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1544
1545         obj_priv->last_rendering_seqno = 0;
1546         obj_priv->ring = NULL;
1547         if (obj_priv->active) {
1548                 obj_priv->active = 0;
1549                 drm_gem_object_unreference(obj);
1550         }
1551         i915_verify_inactive(dev, __FILE__, __LINE__);
1552 }
1553
1554 static void
1555 i915_gem_process_flushing_list(struct drm_device *dev,
1556                                uint32_t flush_domains, uint32_t seqno,
1557                                struct intel_ring_buffer *ring)
1558 {
1559         drm_i915_private_t *dev_priv = dev->dev_private;
1560         struct drm_i915_gem_object *obj_priv, *next;
1561
1562         list_for_each_entry_safe(obj_priv, next,
1563                                  &dev_priv->mm.gpu_write_list,
1564                                  gpu_write_list) {
1565                 struct drm_gem_object *obj = &obj_priv->base;
1566
1567                 if ((obj->write_domain & flush_domains) ==
1568                     obj->write_domain &&
1569                     obj_priv->ring->ring_flag == ring->ring_flag) {
1570                         uint32_t old_write_domain = obj->write_domain;
1571
1572                         obj->write_domain = 0;
1573                         list_del_init(&obj_priv->gpu_write_list);
1574                         i915_gem_object_move_to_active(obj, seqno, ring);
1575
1576                         /* update the fence lru list */
1577                         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1578                                 struct drm_i915_fence_reg *reg =
1579                                         &dev_priv->fence_regs[obj_priv->fence_reg];
1580                                 list_move_tail(&reg->lru_list,
1581                                                 &dev_priv->mm.fence_list);
1582                         }
1583
1584                         trace_i915_gem_object_change_domain(obj,
1585                                                             obj->read_domains,
1586                                                             old_write_domain);
1587                 }
1588         }
1589 }
1590
1591 uint32_t
1592 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1593                  uint32_t flush_domains, struct intel_ring_buffer *ring)
1594 {
1595         drm_i915_private_t *dev_priv = dev->dev_private;
1596         struct drm_i915_file_private *i915_file_priv = NULL;
1597         struct drm_i915_gem_request *request;
1598         uint32_t seqno;
1599         int was_empty;
1600
1601         if (file_priv != NULL)
1602                 i915_file_priv = file_priv->driver_priv;
1603
1604         request = kzalloc(sizeof(*request), GFP_KERNEL);
1605         if (request == NULL)
1606                 return 0;
1607
1608         seqno = ring->add_request(dev, ring, file_priv, flush_domains);
1609
1610         request->seqno = seqno;
1611         request->ring = ring;
1612         request->emitted_jiffies = jiffies;
1613         was_empty = list_empty(&ring->request_list);
1614         list_add_tail(&request->list, &ring->request_list);
1615
1616         if (i915_file_priv) {
1617                 list_add_tail(&request->client_list,
1618                               &i915_file_priv->mm.request_list);
1619         } else {
1620                 INIT_LIST_HEAD(&request->client_list);
1621         }
1622
1623         /* Associate any objects on the flushing list matching the write
1624          * domain we're flushing with our flush.
1625          */
1626         if (flush_domains != 0) 
1627                 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
1628
1629         if (!dev_priv->mm.suspended) {
1630                 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1631                 if (was_empty)
1632                         queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1633         }
1634         return seqno;
1635 }
1636
1637 /**
1638  * Command execution barrier
1639  *
1640  * Ensures that all commands in the ring are finished
1641  * before signalling the CPU
1642  */
1643 static uint32_t
1644 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1645 {
1646         uint32_t flush_domains = 0;
1647
1648         /* The sampler always gets flushed on i965 (sigh) */
1649         if (IS_I965G(dev))
1650                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1651
1652         ring->flush(dev, ring,
1653                         I915_GEM_DOMAIN_COMMAND, flush_domains);
1654         return flush_domains;
1655 }
1656
1657 /**
1658  * Moves buffers associated only with the given active seqno from the active
1659  * to inactive list, potentially freeing them.
1660  */
1661 static void
1662 i915_gem_retire_request(struct drm_device *dev,
1663                         struct drm_i915_gem_request *request)
1664 {
1665         drm_i915_private_t *dev_priv = dev->dev_private;
1666
1667         trace_i915_gem_request_retire(dev, request->seqno);
1668
1669         /* Move any buffers on the active list that are no longer referenced
1670          * by the ringbuffer to the flushing/inactive lists as appropriate.
1671          */
1672         spin_lock(&dev_priv->mm.active_list_lock);
1673         while (!list_empty(&request->ring->active_list)) {
1674                 struct drm_gem_object *obj;
1675                 struct drm_i915_gem_object *obj_priv;
1676
1677                 obj_priv = list_first_entry(&request->ring->active_list,
1678                                             struct drm_i915_gem_object,
1679                                             list);
1680                 obj = &obj_priv->base;
1681
1682                 /* If the seqno being retired doesn't match the oldest in the
1683                  * list, then the oldest in the list must still be newer than
1684                  * this seqno.
1685                  */
1686                 if (obj_priv->last_rendering_seqno != request->seqno)
1687                         goto out;
1688
1689 #if WATCH_LRU
1690                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1691                          __func__, request->seqno, obj);
1692 #endif
1693
1694                 if (obj->write_domain != 0)
1695                         i915_gem_object_move_to_flushing(obj);
1696                 else {
1697                         /* Take a reference on the object so it won't be
1698                          * freed while the spinlock is held.  The list
1699                          * protection for this spinlock is safe when breaking
1700                          * the lock like this since the next thing we do
1701                          * is just get the head of the list again.
1702                          */
1703                         drm_gem_object_reference(obj);
1704                         i915_gem_object_move_to_inactive(obj);
1705                         spin_unlock(&dev_priv->mm.active_list_lock);
1706                         drm_gem_object_unreference(obj);
1707                         spin_lock(&dev_priv->mm.active_list_lock);
1708                 }
1709         }
1710 out:
1711         spin_unlock(&dev_priv->mm.active_list_lock);
1712 }
1713
1714 /**
1715  * Returns true if seq1 is later than seq2.
1716  */
1717 bool
1718 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1719 {
1720         return (int32_t)(seq1 - seq2) >= 0;
1721 }
1722
1723 uint32_t
1724 i915_get_gem_seqno(struct drm_device *dev,
1725                    struct intel_ring_buffer *ring)
1726 {
1727         return ring->get_gem_seqno(dev, ring);
1728 }
1729
1730 /**
1731  * This function clears the request list as sequence numbers are passed.
1732  */
1733 static void
1734 i915_gem_retire_requests_ring(struct drm_device *dev,
1735                               struct intel_ring_buffer *ring)
1736 {
1737         drm_i915_private_t *dev_priv = dev->dev_private;
1738         uint32_t seqno;
1739
1740         if (!ring->status_page.page_addr
1741                         || list_empty(&ring->request_list))
1742                 return;
1743
1744         seqno = i915_get_gem_seqno(dev, ring);
1745
1746         while (!list_empty(&ring->request_list)) {
1747                 struct drm_i915_gem_request *request;
1748                 uint32_t retiring_seqno;
1749
1750                 request = list_first_entry(&ring->request_list,
1751                                            struct drm_i915_gem_request,
1752                                            list);
1753                 retiring_seqno = request->seqno;
1754
1755                 if (i915_seqno_passed(seqno, retiring_seqno) ||
1756                     atomic_read(&dev_priv->mm.wedged)) {
1757                         i915_gem_retire_request(dev, request);
1758
1759                         list_del(&request->list);
1760                         list_del(&request->client_list);
1761                         kfree(request);
1762                 } else
1763                         break;
1764         }
1765
1766         if (unlikely (dev_priv->trace_irq_seqno &&
1767                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1768
1769                 ring->user_irq_put(dev, ring);
1770                 dev_priv->trace_irq_seqno = 0;
1771         }
1772 }
1773
1774 void
1775 i915_gem_retire_requests(struct drm_device *dev)
1776 {
1777         drm_i915_private_t *dev_priv = dev->dev_private;
1778
1779         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1780             struct drm_i915_gem_object *obj_priv, *tmp;
1781
1782             /* We must be careful that during unbind() we do not
1783              * accidentally infinitely recurse into retire requests.
1784              * Currently:
1785              *   retire -> free -> unbind -> wait -> retire_ring
1786              */
1787             list_for_each_entry_safe(obj_priv, tmp,
1788                                      &dev_priv->mm.deferred_free_list,
1789                                      list)
1790                     i915_gem_free_object_tail(&obj_priv->base);
1791         }
1792
1793         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1794         if (HAS_BSD(dev))
1795                 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1796 }
1797
1798 void
1799 i915_gem_retire_work_handler(struct work_struct *work)
1800 {
1801         drm_i915_private_t *dev_priv;
1802         struct drm_device *dev;
1803
1804         dev_priv = container_of(work, drm_i915_private_t,
1805                                 mm.retire_work.work);
1806         dev = dev_priv->dev;
1807
1808         mutex_lock(&dev->struct_mutex);
1809         i915_gem_retire_requests(dev);
1810
1811         if (!dev_priv->mm.suspended &&
1812                 (!list_empty(&dev_priv->render_ring.request_list) ||
1813                         (HAS_BSD(dev) &&
1814                          !list_empty(&dev_priv->bsd_ring.request_list))))
1815                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1816         mutex_unlock(&dev->struct_mutex);
1817 }
1818
1819 int
1820 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1821                 int interruptible, struct intel_ring_buffer *ring)
1822 {
1823         drm_i915_private_t *dev_priv = dev->dev_private;
1824         u32 ier;
1825         int ret = 0;
1826
1827         BUG_ON(seqno == 0);
1828
1829         if (atomic_read(&dev_priv->mm.wedged))
1830                 return -EIO;
1831
1832         if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
1833                 if (HAS_PCH_SPLIT(dev))
1834                         ier = I915_READ(DEIER) | I915_READ(GTIER);
1835                 else
1836                         ier = I915_READ(IER);
1837                 if (!ier) {
1838                         DRM_ERROR("something (likely vbetool) disabled "
1839                                   "interrupts, re-enabling\n");
1840                         i915_driver_irq_preinstall(dev);
1841                         i915_driver_irq_postinstall(dev);
1842                 }
1843
1844                 trace_i915_gem_request_wait_begin(dev, seqno);
1845
1846                 ring->waiting_gem_seqno = seqno;
1847                 ring->user_irq_get(dev, ring);
1848                 if (interruptible)
1849                         ret = wait_event_interruptible(ring->irq_queue,
1850                                 i915_seqno_passed(
1851                                         ring->get_gem_seqno(dev, ring), seqno)
1852                                 || atomic_read(&dev_priv->mm.wedged));
1853                 else
1854                         wait_event(ring->irq_queue,
1855                                 i915_seqno_passed(
1856                                         ring->get_gem_seqno(dev, ring), seqno)
1857                                 || atomic_read(&dev_priv->mm.wedged));
1858
1859                 ring->user_irq_put(dev, ring);
1860                 ring->waiting_gem_seqno = 0;
1861
1862                 trace_i915_gem_request_wait_end(dev, seqno);
1863         }
1864         if (atomic_read(&dev_priv->mm.wedged))
1865                 ret = -EIO;
1866
1867         if (ret && ret != -ERESTARTSYS)
1868                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1869                           __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
1870
1871         /* Directly dispatch request retiring.  While we have the work queue
1872          * to handle this, the waiter on a request often wants an associated
1873          * buffer to have made it to the inactive list, and we would need
1874          * a separate wait queue to handle that.
1875          */
1876         if (ret == 0)
1877                 i915_gem_retire_requests_ring(dev, ring);
1878
1879         return ret;
1880 }
1881
1882 /**
1883  * Waits for a sequence number to be signaled, and cleans up the
1884  * request and object lists appropriately for that event.
1885  */
1886 static int
1887 i915_wait_request(struct drm_device *dev, uint32_t seqno,
1888                 struct intel_ring_buffer *ring)
1889 {
1890         return i915_do_wait_request(dev, seqno, 1, ring);
1891 }
1892
1893 static void
1894 i915_gem_flush(struct drm_device *dev,
1895                uint32_t invalidate_domains,
1896                uint32_t flush_domains)
1897 {
1898         drm_i915_private_t *dev_priv = dev->dev_private;
1899         if (flush_domains & I915_GEM_DOMAIN_CPU)
1900                 drm_agp_chipset_flush(dev);
1901         dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1902                         invalidate_domains,
1903                         flush_domains);
1904
1905         if (HAS_BSD(dev))
1906                 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1907                                 invalidate_domains,
1908                                 flush_domains);
1909 }
1910
1911 /**
1912  * Ensures that all rendering to the object has completed and the object is
1913  * safe to unbind from the GTT or access from the CPU.
1914  */
1915 static int
1916 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1917 {
1918         struct drm_device *dev = obj->dev;
1919         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1920         int ret;
1921
1922         /* This function only exists to support waiting for existing rendering,
1923          * not for emitting required flushes.
1924          */
1925         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1926
1927         /* If there is rendering queued on the buffer being evicted, wait for
1928          * it.
1929          */
1930         if (obj_priv->active) {
1931 #if WATCH_BUF
1932                 DRM_INFO("%s: object %p wait for seqno %08x\n",
1933                           __func__, obj, obj_priv->last_rendering_seqno);
1934 #endif
1935                 ret = i915_wait_request(dev,
1936                                 obj_priv->last_rendering_seqno, obj_priv->ring);
1937                 if (ret != 0)
1938                         return ret;
1939         }
1940
1941         return 0;
1942 }
1943
1944 /**
1945  * Unbinds an object from the GTT aperture.
1946  */
1947 int
1948 i915_gem_object_unbind(struct drm_gem_object *obj)
1949 {
1950         struct drm_device *dev = obj->dev;
1951         drm_i915_private_t *dev_priv = dev->dev_private;
1952         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1953         int ret = 0;
1954
1955 #if WATCH_BUF
1956         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1957         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1958 #endif
1959         if (obj_priv->gtt_space == NULL)
1960                 return 0;
1961
1962         if (obj_priv->pin_count != 0) {
1963                 DRM_ERROR("Attempting to unbind pinned buffer\n");
1964                 return -EINVAL;
1965         }
1966
1967         /* blow away mappings if mapped through GTT */
1968         i915_gem_release_mmap(obj);
1969
1970         /* Move the object to the CPU domain to ensure that
1971          * any possible CPU writes while it's not in the GTT
1972          * are flushed when we go to remap it. This will
1973          * also ensure that all pending GPU writes are finished
1974          * before we unbind.
1975          */
1976         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1977         if (ret == -ERESTARTSYS)
1978                 return ret;
1979         /* Continue on if we fail due to EIO, the GPU is hung so we
1980          * should be safe and we need to cleanup or else we might
1981          * cause memory corruption through use-after-free.
1982          */
1983
1984         /* release the fence reg _after_ flushing */
1985         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1986                 i915_gem_clear_fence_reg(obj);
1987
1988         if (obj_priv->agp_mem != NULL) {
1989                 drm_unbind_agp(obj_priv->agp_mem);
1990                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1991                 obj_priv->agp_mem = NULL;
1992         }
1993
1994         i915_gem_object_put_pages(obj);
1995         BUG_ON(obj_priv->pages_refcount);
1996
1997         if (obj_priv->gtt_space) {
1998                 atomic_dec(&dev->gtt_count);
1999                 atomic_sub(obj->size, &dev->gtt_memory);
2000
2001                 drm_mm_put_block(obj_priv->gtt_space);
2002                 obj_priv->gtt_space = NULL;
2003         }
2004
2005         /* Remove ourselves from the LRU list if present. */
2006         spin_lock(&dev_priv->mm.active_list_lock);
2007         if (!list_empty(&obj_priv->list))
2008                 list_del_init(&obj_priv->list);
2009         spin_unlock(&dev_priv->mm.active_list_lock);
2010
2011         if (i915_gem_object_is_purgeable(obj_priv))
2012                 i915_gem_object_truncate(obj);
2013
2014         trace_i915_gem_object_unbind(obj);
2015
2016         return ret;
2017 }
2018
2019 int
2020 i915_gpu_idle(struct drm_device *dev)
2021 {
2022         drm_i915_private_t *dev_priv = dev->dev_private;
2023         bool lists_empty;
2024         uint32_t seqno1, seqno2;
2025         int ret;
2026
2027         spin_lock(&dev_priv->mm.active_list_lock);
2028         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2029                        list_empty(&dev_priv->render_ring.active_list) &&
2030                        (!HAS_BSD(dev) ||
2031                         list_empty(&dev_priv->bsd_ring.active_list)));
2032         spin_unlock(&dev_priv->mm.active_list_lock);
2033
2034         if (lists_empty)
2035                 return 0;
2036
2037         /* Flush everything onto the inactive list. */
2038         i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2039         seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2040                         &dev_priv->render_ring);
2041         if (seqno1 == 0)
2042                 return -ENOMEM;
2043         ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2044
2045         if (HAS_BSD(dev)) {
2046                 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2047                                 &dev_priv->bsd_ring);
2048                 if (seqno2 == 0)
2049                         return -ENOMEM;
2050
2051                 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2052                 if (ret)
2053                         return ret;
2054         }
2055
2056
2057         return ret;
2058 }
2059
2060 int
2061 i915_gem_object_get_pages(struct drm_gem_object *obj,
2062                           gfp_t gfpmask)
2063 {
2064         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2065         int page_count, i;
2066         struct address_space *mapping;
2067         struct inode *inode;
2068         struct page *page;
2069
2070         BUG_ON(obj_priv->pages_refcount
2071                         == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2072
2073         if (obj_priv->pages_refcount++ != 0)
2074                 return 0;
2075
2076         /* Get the list of pages out of our struct file.  They'll be pinned
2077          * at this point until we release them.
2078          */
2079         page_count = obj->size / PAGE_SIZE;
2080         BUG_ON(obj_priv->pages != NULL);
2081         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2082         if (obj_priv->pages == NULL) {
2083                 obj_priv->pages_refcount--;
2084                 return -ENOMEM;
2085         }
2086
2087         inode = obj->filp->f_path.dentry->d_inode;
2088         mapping = inode->i_mapping;
2089         for (i = 0; i < page_count; i++) {
2090                 page = read_cache_page_gfp(mapping, i,
2091                                            GFP_HIGHUSER |
2092                                            __GFP_COLD |
2093                                            __GFP_RECLAIMABLE |
2094                                            gfpmask);
2095                 if (IS_ERR(page))
2096                         goto err_pages;
2097
2098                 obj_priv->pages[i] = page;
2099         }
2100
2101         if (obj_priv->tiling_mode != I915_TILING_NONE)
2102                 i915_gem_object_do_bit_17_swizzle(obj);
2103
2104         return 0;
2105
2106 err_pages:
2107         while (i--)
2108                 page_cache_release(obj_priv->pages[i]);
2109
2110         drm_free_large(obj_priv->pages);
2111         obj_priv->pages = NULL;
2112         obj_priv->pages_refcount--;
2113         return PTR_ERR(page);
2114 }
2115
2116 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2117 {
2118         struct drm_gem_object *obj = reg->obj;
2119         struct drm_device *dev = obj->dev;
2120         drm_i915_private_t *dev_priv = dev->dev_private;
2121         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2122         int regnum = obj_priv->fence_reg;
2123         uint64_t val;
2124
2125         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2126                     0xfffff000) << 32;
2127         val |= obj_priv->gtt_offset & 0xfffff000;
2128         val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2129                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2130
2131         if (obj_priv->tiling_mode == I915_TILING_Y)
2132                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2133         val |= I965_FENCE_REG_VALID;
2134
2135         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2136 }
2137
2138 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2139 {
2140         struct drm_gem_object *obj = reg->obj;
2141         struct drm_device *dev = obj->dev;
2142         drm_i915_private_t *dev_priv = dev->dev_private;
2143         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2144         int regnum = obj_priv->fence_reg;
2145         uint64_t val;
2146
2147         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2148                     0xfffff000) << 32;
2149         val |= obj_priv->gtt_offset & 0xfffff000;
2150         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2151         if (obj_priv->tiling_mode == I915_TILING_Y)
2152                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2153         val |= I965_FENCE_REG_VALID;
2154
2155         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2156 }
2157
2158 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2159 {
2160         struct drm_gem_object *obj = reg->obj;
2161         struct drm_device *dev = obj->dev;
2162         drm_i915_private_t *dev_priv = dev->dev_private;
2163         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2164         int regnum = obj_priv->fence_reg;
2165         int tile_width;
2166         uint32_t fence_reg, val;
2167         uint32_t pitch_val;
2168
2169         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2170             (obj_priv->gtt_offset & (obj->size - 1))) {
2171                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2172                      __func__, obj_priv->gtt_offset, obj->size);
2173                 return;
2174         }
2175
2176         if (obj_priv->tiling_mode == I915_TILING_Y &&
2177             HAS_128_BYTE_Y_TILING(dev))
2178                 tile_width = 128;
2179         else
2180                 tile_width = 512;
2181
2182         /* Note: pitch better be a power of two tile widths */
2183         pitch_val = obj_priv->stride / tile_width;
2184         pitch_val = ffs(pitch_val) - 1;
2185
2186         if (obj_priv->tiling_mode == I915_TILING_Y &&
2187             HAS_128_BYTE_Y_TILING(dev))
2188                 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2189         else
2190                 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2191
2192         val = obj_priv->gtt_offset;
2193         if (obj_priv->tiling_mode == I915_TILING_Y)
2194                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2195         val |= I915_FENCE_SIZE_BITS(obj->size);
2196         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2197         val |= I830_FENCE_REG_VALID;
2198
2199         if (regnum < 8)
2200                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2201         else
2202                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2203         I915_WRITE(fence_reg, val);
2204 }
2205
2206 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2207 {
2208         struct drm_gem_object *obj = reg->obj;
2209         struct drm_device *dev = obj->dev;
2210         drm_i915_private_t *dev_priv = dev->dev_private;
2211         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2212         int regnum = obj_priv->fence_reg;
2213         uint32_t val;
2214         uint32_t pitch_val;
2215         uint32_t fence_size_bits;
2216
2217         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2218             (obj_priv->gtt_offset & (obj->size - 1))) {
2219                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2220                      __func__, obj_priv->gtt_offset);
2221                 return;
2222         }
2223
2224         pitch_val = obj_priv->stride / 128;
2225         pitch_val = ffs(pitch_val) - 1;
2226         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2227
2228         val = obj_priv->gtt_offset;
2229         if (obj_priv->tiling_mode == I915_TILING_Y)
2230                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2231         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2232         WARN_ON(fence_size_bits & ~0x00000f00);
2233         val |= fence_size_bits;
2234         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2235         val |= I830_FENCE_REG_VALID;
2236
2237         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2238 }
2239
2240 static int i915_find_fence_reg(struct drm_device *dev)
2241 {
2242         struct drm_i915_fence_reg *reg = NULL;
2243         struct drm_i915_gem_object *obj_priv = NULL;
2244         struct drm_i915_private *dev_priv = dev->dev_private;
2245         struct drm_gem_object *obj = NULL;
2246         int i, avail, ret;
2247
2248         /* First try to find a free reg */
2249         avail = 0;
2250         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2251                 reg = &dev_priv->fence_regs[i];
2252                 if (!reg->obj)
2253                         return i;
2254
2255                 obj_priv = to_intel_bo(reg->obj);
2256                 if (!obj_priv->pin_count)
2257                     avail++;
2258         }
2259
2260         if (avail == 0)
2261                 return -ENOSPC;
2262
2263         /* None available, try to steal one or wait for a user to finish */
2264         i = I915_FENCE_REG_NONE;
2265         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2266                             lru_list) {
2267                 obj = reg->obj;
2268                 obj_priv = to_intel_bo(obj);
2269
2270                 if (obj_priv->pin_count)
2271                         continue;
2272
2273                 /* found one! */
2274                 i = obj_priv->fence_reg;
2275                 break;
2276         }
2277
2278         BUG_ON(i == I915_FENCE_REG_NONE);
2279
2280         /* We only have a reference on obj from the active list. put_fence_reg
2281          * might drop that one, causing a use-after-free in it. So hold a
2282          * private reference to obj like the other callers of put_fence_reg
2283          * (set_tiling ioctl) do. */
2284         drm_gem_object_reference(obj);
2285         ret = i915_gem_object_put_fence_reg(obj);
2286         drm_gem_object_unreference(obj);
2287         if (ret != 0)
2288                 return ret;
2289
2290         return i;
2291 }
2292
2293 /**
2294  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2295  * @obj: object to map through a fence reg
2296  *
2297  * When mapping objects through the GTT, userspace wants to be able to write
2298  * to them without having to worry about swizzling if the object is tiled.
2299  *
2300  * This function walks the fence regs looking for a free one for @obj,
2301  * stealing one if it can't find any.
2302  *
2303  * It then sets up the reg based on the object's properties: address, pitch
2304  * and tiling format.
2305  */
2306 int
2307 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2308 {
2309         struct drm_device *dev = obj->dev;
2310         struct drm_i915_private *dev_priv = dev->dev_private;
2311         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2312         struct drm_i915_fence_reg *reg = NULL;
2313         int ret;
2314
2315         /* Just update our place in the LRU if our fence is getting used. */
2316         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2317                 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2318                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2319                 return 0;
2320         }
2321
2322         switch (obj_priv->tiling_mode) {
2323         case I915_TILING_NONE:
2324                 WARN(1, "allocating a fence for non-tiled object?\n");
2325                 break;
2326         case I915_TILING_X:
2327                 if (!obj_priv->stride)
2328                         return -EINVAL;
2329                 WARN((obj_priv->stride & (512 - 1)),
2330                      "object 0x%08x is X tiled but has non-512B pitch\n",
2331                      obj_priv->gtt_offset);
2332                 break;
2333         case I915_TILING_Y:
2334                 if (!obj_priv->stride)
2335                         return -EINVAL;
2336                 WARN((obj_priv->stride & (128 - 1)),
2337                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2338                      obj_priv->gtt_offset);
2339                 break;
2340         }
2341
2342         ret = i915_find_fence_reg(dev);
2343         if (ret < 0)
2344                 return ret;
2345
2346         obj_priv->fence_reg = ret;
2347         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2348         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2349
2350         reg->obj = obj;
2351
2352         switch (INTEL_INFO(dev)->gen) {
2353         case 6:
2354                 sandybridge_write_fence_reg(reg);
2355                 break;
2356         case 5:
2357         case 4:
2358                 i965_write_fence_reg(reg);
2359                 break;
2360         case 3:
2361                 i915_write_fence_reg(reg);
2362                 break;
2363         case 2:
2364                 i830_write_fence_reg(reg);
2365                 break;
2366         }
2367
2368         trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2369                         obj_priv->tiling_mode);
2370
2371         return 0;
2372 }
2373
2374 /**
2375  * i915_gem_clear_fence_reg - clear out fence register info
2376  * @obj: object to clear
2377  *
2378  * Zeroes out the fence register itself and clears out the associated
2379  * data structures in dev_priv and obj_priv.
2380  */
2381 static void
2382 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2383 {
2384         struct drm_device *dev = obj->dev;
2385         drm_i915_private_t *dev_priv = dev->dev_private;
2386         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2387         struct drm_i915_fence_reg *reg =
2388                 &dev_priv->fence_regs[obj_priv->fence_reg];
2389         uint32_t fence_reg;
2390
2391         switch (INTEL_INFO(dev)->gen) {
2392         case 6:
2393                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2394                              (obj_priv->fence_reg * 8), 0);
2395                 break;
2396         case 5:
2397         case 4:
2398                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2399                 break;
2400         case 3:
2401                 if (obj_priv->fence_reg >= 8)
2402                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2403                 else
2404         case 2:
2405                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2406
2407                 I915_WRITE(fence_reg, 0);
2408                 break;
2409         }
2410
2411         reg->obj = NULL;
2412         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2413         list_del_init(&reg->lru_list);
2414 }
2415
2416 /**
2417  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2418  * to the buffer to finish, and then resets the fence register.
2419  * @obj: tiled object holding a fence register.
2420  *
2421  * Zeroes out the fence register itself and clears out the associated
2422  * data structures in dev_priv and obj_priv.
2423  */
2424 int
2425 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2426 {
2427         struct drm_device *dev = obj->dev;
2428         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2429
2430         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2431                 return 0;
2432
2433         /* If we've changed tiling, GTT-mappings of the object
2434          * need to re-fault to ensure that the correct fence register
2435          * setup is in place.
2436          */
2437         i915_gem_release_mmap(obj);
2438
2439         /* On the i915, GPU access to tiled buffers is via a fence,
2440          * therefore we must wait for any outstanding access to complete
2441          * before clearing the fence.
2442          */
2443         if (!IS_I965G(dev)) {
2444                 int ret;
2445
2446                 ret = i915_gem_object_flush_gpu_write_domain(obj);
2447                 if (ret != 0)
2448                         return ret;
2449
2450                 ret = i915_gem_object_wait_rendering(obj);
2451                 if (ret != 0)
2452                         return ret;
2453         }
2454
2455         i915_gem_object_flush_gtt_write_domain(obj);
2456         i915_gem_clear_fence_reg (obj);
2457
2458         return 0;
2459 }
2460
2461 /**
2462  * Finds free space in the GTT aperture and binds the object there.
2463  */
2464 static int
2465 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2466 {
2467         struct drm_device *dev = obj->dev;
2468         drm_i915_private_t *dev_priv = dev->dev_private;
2469         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2470         struct drm_mm_node *free_space;
2471         gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2472         int ret;
2473
2474         if (obj_priv->madv != I915_MADV_WILLNEED) {
2475                 DRM_ERROR("Attempting to bind a purgeable object\n");
2476                 return -EINVAL;
2477         }
2478
2479         if (alignment == 0)
2480                 alignment = i915_gem_get_gtt_alignment(obj);
2481         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2482                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2483                 return -EINVAL;
2484         }
2485
2486         /* If the object is bigger than the entire aperture, reject it early
2487          * before evicting everything in a vain attempt to find space.
2488          */
2489         if (obj->size > dev->gtt_total) {
2490                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2491                 return -E2BIG;
2492         }
2493
2494  search_free:
2495         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2496                                         obj->size, alignment, 0);
2497         if (free_space != NULL) {
2498                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2499                                                        alignment);
2500                 if (obj_priv->gtt_space != NULL)
2501                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2502         }
2503         if (obj_priv->gtt_space == NULL) {
2504                 /* If the gtt is empty and we're still having trouble
2505                  * fitting our object in, we're out of memory.
2506                  */
2507 #if WATCH_LRU
2508                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2509 #endif
2510                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2511                 if (ret)
2512                         return ret;
2513
2514                 goto search_free;
2515         }
2516
2517 #if WATCH_BUF
2518         DRM_INFO("Binding object of size %zd at 0x%08x\n",
2519                  obj->size, obj_priv->gtt_offset);
2520 #endif
2521         ret = i915_gem_object_get_pages(obj, gfpmask);
2522         if (ret) {
2523                 drm_mm_put_block(obj_priv->gtt_space);
2524                 obj_priv->gtt_space = NULL;
2525
2526                 if (ret == -ENOMEM) {
2527                         /* first try to clear up some space from the GTT */
2528                         ret = i915_gem_evict_something(dev, obj->size,
2529                                                        alignment);
2530                         if (ret) {
2531                                 /* now try to shrink everyone else */
2532                                 if (gfpmask) {
2533                                         gfpmask = 0;
2534                                         goto search_free;
2535                                 }
2536
2537                                 return ret;
2538                         }
2539
2540                         goto search_free;
2541                 }
2542
2543                 return ret;
2544         }
2545
2546         /* Create an AGP memory structure pointing at our pages, and bind it
2547          * into the GTT.
2548          */
2549         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2550                                                obj_priv->pages,
2551                                                obj->size >> PAGE_SHIFT,
2552                                                obj_priv->gtt_offset,
2553                                                obj_priv->agp_type);
2554         if (obj_priv->agp_mem == NULL) {
2555                 i915_gem_object_put_pages(obj);
2556                 drm_mm_put_block(obj_priv->gtt_space);
2557                 obj_priv->gtt_space = NULL;
2558
2559                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2560                 if (ret)
2561                         return ret;
2562
2563                 goto search_free;
2564         }
2565         atomic_inc(&dev->gtt_count);
2566         atomic_add(obj->size, &dev->gtt_memory);
2567
2568         /* keep track of bounds object by adding it to the inactive list */
2569         list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2570
2571         /* Assert that the object is not currently in any GPU domain. As it
2572          * wasn't in the GTT, there shouldn't be any way it could have been in
2573          * a GPU cache
2574          */
2575         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2576         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2577
2578         trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2579
2580         return 0;
2581 }
2582
2583 void
2584 i915_gem_clflush_object(struct drm_gem_object *obj)
2585 {
2586         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2587
2588         /* If we don't have a page list set up, then we're not pinned
2589          * to GPU, and we can ignore the cache flush because it'll happen
2590          * again at bind time.
2591          */
2592         if (obj_priv->pages == NULL)
2593                 return;
2594
2595         trace_i915_gem_object_clflush(obj);
2596
2597         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2598 }
2599
2600 /** Flushes any GPU write domain for the object if it's dirty. */
2601 static int
2602 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2603 {
2604         struct drm_device *dev = obj->dev;
2605         uint32_t old_write_domain;
2606         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2607
2608         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2609                 return 0;
2610
2611         /* Queue the GPU write cache flushing we need. */
2612         old_write_domain = obj->write_domain;
2613         i915_gem_flush(dev, 0, obj->write_domain);
2614         if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2615                 return -ENOMEM;
2616
2617         trace_i915_gem_object_change_domain(obj,
2618                                             obj->read_domains,
2619                                             old_write_domain);
2620         return 0;
2621 }
2622
2623 /** Flushes the GTT write domain for the object if it's dirty. */
2624 static void
2625 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2626 {
2627         uint32_t old_write_domain;
2628
2629         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2630                 return;
2631
2632         /* No actual flushing is required for the GTT write domain.   Writes
2633          * to it immediately go to main memory as far as we know, so there's
2634          * no chipset flush.  It also doesn't land in render cache.
2635          */
2636         old_write_domain = obj->write_domain;
2637         obj->write_domain = 0;
2638
2639         trace_i915_gem_object_change_domain(obj,
2640                                             obj->read_domains,
2641                                             old_write_domain);
2642 }
2643
2644 /** Flushes the CPU write domain for the object if it's dirty. */
2645 static void
2646 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2647 {
2648         struct drm_device *dev = obj->dev;
2649         uint32_t old_write_domain;
2650
2651         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2652                 return;
2653
2654         i915_gem_clflush_object(obj);
2655         drm_agp_chipset_flush(dev);
2656         old_write_domain = obj->write_domain;
2657         obj->write_domain = 0;
2658
2659         trace_i915_gem_object_change_domain(obj,
2660                                             obj->read_domains,
2661                                             old_write_domain);
2662 }
2663
2664 int
2665 i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2666 {
2667         int ret = 0;
2668
2669         switch (obj->write_domain) {
2670         case I915_GEM_DOMAIN_GTT:
2671                 i915_gem_object_flush_gtt_write_domain(obj);
2672                 break;
2673         case I915_GEM_DOMAIN_CPU:
2674                 i915_gem_object_flush_cpu_write_domain(obj);
2675                 break;
2676         default:
2677                 ret = i915_gem_object_flush_gpu_write_domain(obj);
2678                 break;
2679         }
2680
2681         return ret;
2682 }
2683
2684 /**
2685  * Moves a single object to the GTT read, and possibly write domain.
2686  *
2687  * This function returns when the move is complete, including waiting on
2688  * flushes to occur.
2689  */
2690 int
2691 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2692 {
2693         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2694         uint32_t old_write_domain, old_read_domains;
2695         int ret;
2696
2697         /* Not valid to be called on unbound objects. */
2698         if (obj_priv->gtt_space == NULL)
2699                 return -EINVAL;
2700
2701         ret = i915_gem_object_flush_gpu_write_domain(obj);
2702         if (ret != 0)
2703                 return ret;
2704
2705         /* Wait on any GPU rendering and flushing to occur. */
2706         ret = i915_gem_object_wait_rendering(obj);
2707         if (ret != 0)
2708                 return ret;
2709
2710         old_write_domain = obj->write_domain;
2711         old_read_domains = obj->read_domains;
2712
2713         /* If we're writing through the GTT domain, then CPU and GPU caches
2714          * will need to be invalidated at next use.
2715          */
2716         if (write)
2717                 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2718
2719         i915_gem_object_flush_cpu_write_domain(obj);
2720
2721         /* It should now be out of any other write domains, and we can update
2722          * the domain values for our changes.
2723          */
2724         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2725         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2726         if (write) {
2727                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2728                 obj_priv->dirty = 1;
2729         }
2730
2731         trace_i915_gem_object_change_domain(obj,
2732                                             old_read_domains,
2733                                             old_write_domain);
2734
2735         return 0;
2736 }
2737
2738 /*
2739  * Prepare buffer for display plane. Use uninterruptible for possible flush
2740  * wait, as in modesetting process we're not supposed to be interrupted.
2741  */
2742 int
2743 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2744 {
2745         struct drm_device *dev = obj->dev;
2746         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2747         uint32_t old_write_domain, old_read_domains;
2748         int ret;
2749
2750         /* Not valid to be called on unbound objects. */
2751         if (obj_priv->gtt_space == NULL)
2752                 return -EINVAL;
2753
2754         ret = i915_gem_object_flush_gpu_write_domain(obj);
2755         if (ret)
2756                 return ret;
2757
2758         /* Wait on any GPU rendering and flushing to occur. */
2759         if (obj_priv->active) {
2760 #if WATCH_BUF
2761                 DRM_INFO("%s: object %p wait for seqno %08x\n",
2762                           __func__, obj, obj_priv->last_rendering_seqno);
2763 #endif
2764                 ret = i915_do_wait_request(dev,
2765                                 obj_priv->last_rendering_seqno,
2766                                 0,
2767                                 obj_priv->ring);
2768                 if (ret != 0)
2769                         return ret;
2770         }
2771
2772         i915_gem_object_flush_cpu_write_domain(obj);
2773
2774         old_write_domain = obj->write_domain;
2775         old_read_domains = obj->read_domains;
2776
2777         /* It should now be out of any other write domains, and we can update
2778          * the domain values for our changes.
2779          */
2780         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2781         obj->read_domains = I915_GEM_DOMAIN_GTT;
2782         obj->write_domain = I915_GEM_DOMAIN_GTT;
2783         obj_priv->dirty = 1;
2784
2785         trace_i915_gem_object_change_domain(obj,
2786                                             old_read_domains,
2787                                             old_write_domain);
2788
2789         return 0;
2790 }
2791
2792 /**
2793  * Moves a single object to the CPU read, and possibly write domain.
2794  *
2795  * This function returns when the move is complete, including waiting on
2796  * flushes to occur.
2797  */
2798 static int
2799 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2800 {
2801         uint32_t old_write_domain, old_read_domains;
2802         int ret;
2803
2804         ret = i915_gem_object_flush_gpu_write_domain(obj);
2805         if (ret)
2806                 return ret;
2807
2808         /* Wait on any GPU rendering and flushing to occur. */
2809         ret = i915_gem_object_wait_rendering(obj);
2810         if (ret != 0)
2811                 return ret;
2812
2813         i915_gem_object_flush_gtt_write_domain(obj);
2814
2815         /* If we have a partially-valid cache of the object in the CPU,
2816          * finish invalidating it and free the per-page flags.
2817          */
2818         i915_gem_object_set_to_full_cpu_read_domain(obj);
2819
2820         old_write_domain = obj->write_domain;
2821         old_read_domains = obj->read_domains;
2822
2823         /* Flush the CPU cache if it's still invalid. */
2824         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2825                 i915_gem_clflush_object(obj);
2826
2827                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2828         }
2829
2830         /* It should now be out of any other write domains, and we can update
2831          * the domain values for our changes.
2832          */
2833         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2834
2835         /* If we're writing through the CPU, then the GPU read domains will
2836          * need to be invalidated at next use.
2837          */
2838         if (write) {
2839                 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2840                 obj->write_domain = I915_GEM_DOMAIN_CPU;
2841         }
2842
2843         trace_i915_gem_object_change_domain(obj,
2844                                             old_read_domains,
2845                                             old_write_domain);
2846
2847         return 0;
2848 }
2849
2850 /*
2851  * Set the next domain for the specified object. This
2852  * may not actually perform the necessary flushing/invaliding though,
2853  * as that may want to be batched with other set_domain operations
2854  *
2855  * This is (we hope) the only really tricky part of gem. The goal
2856  * is fairly simple -- track which caches hold bits of the object
2857  * and make sure they remain coherent. A few concrete examples may
2858  * help to explain how it works. For shorthand, we use the notation
2859  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2860  * a pair of read and write domain masks.
2861  *
2862  * Case 1: the batch buffer
2863  *
2864  *      1. Allocated
2865  *      2. Written by CPU
2866  *      3. Mapped to GTT
2867  *      4. Read by GPU
2868  *      5. Unmapped from GTT
2869  *      6. Freed
2870  *
2871  *      Let's take these a step at a time
2872  *
2873  *      1. Allocated
2874  *              Pages allocated from the kernel may still have
2875  *              cache contents, so we set them to (CPU, CPU) always.
2876  *      2. Written by CPU (using pwrite)
2877  *              The pwrite function calls set_domain (CPU, CPU) and
2878  *              this function does nothing (as nothing changes)
2879  *      3. Mapped by GTT
2880  *              This function asserts that the object is not
2881  *              currently in any GPU-based read or write domains
2882  *      4. Read by GPU
2883  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
2884  *              As write_domain is zero, this function adds in the
2885  *              current read domains (CPU+COMMAND, 0).
2886  *              flush_domains is set to CPU.
2887  *              invalidate_domains is set to COMMAND
2888  *              clflush is run to get data out of the CPU caches
2889  *              then i915_dev_set_domain calls i915_gem_flush to
2890  *              emit an MI_FLUSH and drm_agp_chipset_flush
2891  *      5. Unmapped from GTT
2892  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
2893  *              flush_domains and invalidate_domains end up both zero
2894  *              so no flushing/invalidating happens
2895  *      6. Freed
2896  *              yay, done
2897  *
2898  * Case 2: The shared render buffer
2899  *
2900  *      1. Allocated
2901  *      2. Mapped to GTT
2902  *      3. Read/written by GPU
2903  *      4. set_domain to (CPU,CPU)
2904  *      5. Read/written by CPU
2905  *      6. Read/written by GPU
2906  *
2907  *      1. Allocated
2908  *              Same as last example, (CPU, CPU)
2909  *      2. Mapped to GTT
2910  *              Nothing changes (assertions find that it is not in the GPU)
2911  *      3. Read/written by GPU
2912  *              execbuffer calls set_domain (RENDER, RENDER)
2913  *              flush_domains gets CPU
2914  *              invalidate_domains gets GPU
2915  *              clflush (obj)
2916  *              MI_FLUSH and drm_agp_chipset_flush
2917  *      4. set_domain (CPU, CPU)
2918  *              flush_domains gets GPU
2919  *              invalidate_domains gets CPU
2920  *              wait_rendering (obj) to make sure all drawing is complete.
2921  *              This will include an MI_FLUSH to get the data from GPU
2922  *              to memory
2923  *              clflush (obj) to invalidate the CPU cache
2924  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2925  *      5. Read/written by CPU
2926  *              cache lines are loaded and dirtied
2927  *      6. Read written by GPU
2928  *              Same as last GPU access
2929  *
2930  * Case 3: The constant buffer
2931  *
2932  *      1. Allocated
2933  *      2. Written by CPU
2934  *      3. Read by GPU
2935  *      4. Updated (written) by CPU again
2936  *      5. Read by GPU
2937  *
2938  *      1. Allocated
2939  *              (CPU, CPU)
2940  *      2. Written by CPU
2941  *              (CPU, CPU)
2942  *      3. Read by GPU
2943  *              (CPU+RENDER, 0)
2944  *              flush_domains = CPU
2945  *              invalidate_domains = RENDER
2946  *              clflush (obj)
2947  *              MI_FLUSH
2948  *              drm_agp_chipset_flush
2949  *      4. Updated (written) by CPU again
2950  *              (CPU, CPU)
2951  *              flush_domains = 0 (no previous write domain)
2952  *              invalidate_domains = 0 (no new read domains)
2953  *      5. Read by GPU
2954  *              (CPU+RENDER, 0)
2955  *              flush_domains = CPU
2956  *              invalidate_domains = RENDER
2957  *              clflush (obj)
2958  *              MI_FLUSH
2959  *              drm_agp_chipset_flush
2960  */
2961 static void
2962 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2963 {
2964         struct drm_device               *dev = obj->dev;
2965         drm_i915_private_t              *dev_priv = dev->dev_private;
2966         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2967         uint32_t                        invalidate_domains = 0;
2968         uint32_t                        flush_domains = 0;
2969         uint32_t                        old_read_domains;
2970
2971         BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2972         BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2973
2974         intel_mark_busy(dev, obj);
2975
2976 #if WATCH_BUF
2977         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2978                  __func__, obj,
2979                  obj->read_domains, obj->pending_read_domains,
2980                  obj->write_domain, obj->pending_write_domain);
2981 #endif
2982         /*
2983          * If the object isn't moving to a new write domain,
2984          * let the object stay in multiple read domains
2985          */
2986         if (obj->pending_write_domain == 0)
2987                 obj->pending_read_domains |= obj->read_domains;
2988         else
2989                 obj_priv->dirty = 1;
2990
2991         /*
2992          * Flush the current write domain if
2993          * the new read domains don't match. Invalidate
2994          * any read domains which differ from the old
2995          * write domain
2996          */
2997         if (obj->write_domain &&
2998             obj->write_domain != obj->pending_read_domains) {
2999                 flush_domains |= obj->write_domain;
3000                 invalidate_domains |=
3001                         obj->pending_read_domains & ~obj->write_domain;
3002         }
3003         /*
3004          * Invalidate any read caches which may have
3005          * stale data. That is, any new read domains.
3006          */
3007         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3008         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
3009 #if WATCH_BUF
3010                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
3011                          __func__, flush_domains, invalidate_domains);
3012 #endif
3013                 i915_gem_clflush_object(obj);
3014         }
3015
3016         old_read_domains = obj->read_domains;
3017
3018         /* The actual obj->write_domain will be updated with
3019          * pending_write_domain after we emit the accumulated flush for all
3020          * of our domain changes in execbuffers (which clears objects'
3021          * write_domains).  So if we have a current write domain that we
3022          * aren't changing, set pending_write_domain to that.
3023          */
3024         if (flush_domains == 0 && obj->pending_write_domain == 0)
3025                 obj->pending_write_domain = obj->write_domain;
3026         obj->read_domains = obj->pending_read_domains;
3027
3028         if (flush_domains & I915_GEM_GPU_DOMAINS) {
3029                 if (obj_priv->ring == &dev_priv->render_ring)
3030                         dev_priv->flush_rings |= FLUSH_RENDER_RING;
3031                 else if (obj_priv->ring == &dev_priv->bsd_ring)
3032                         dev_priv->flush_rings |= FLUSH_BSD_RING;
3033         }
3034
3035         dev->invalidate_domains |= invalidate_domains;
3036         dev->flush_domains |= flush_domains;
3037 #if WATCH_BUF
3038         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3039                  __func__,
3040                  obj->read_domains, obj->write_domain,
3041                  dev->invalidate_domains, dev->flush_domains);
3042 #endif
3043
3044         trace_i915_gem_object_change_domain(obj,
3045                                             old_read_domains,
3046                                             obj->write_domain);
3047 }
3048
3049 /**
3050  * Moves the object from a partially CPU read to a full one.
3051  *
3052  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3053  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3054  */
3055 static void
3056 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3057 {
3058         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3059
3060         if (!obj_priv->page_cpu_valid)
3061                 return;
3062
3063         /* If we're partially in the CPU read domain, finish moving it in.
3064          */
3065         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3066                 int i;
3067
3068                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3069                         if (obj_priv->page_cpu_valid[i])
3070                                 continue;
3071                         drm_clflush_pages(obj_priv->pages + i, 1);
3072                 }
3073         }
3074
3075         /* Free the page_cpu_valid mappings which are now stale, whether
3076          * or not we've got I915_GEM_DOMAIN_CPU.
3077          */
3078         kfree(obj_priv->page_cpu_valid);
3079         obj_priv->page_cpu_valid = NULL;
3080 }
3081
3082 /**
3083  * Set the CPU read domain on a range of the object.
3084  *
3085  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3086  * not entirely valid.  The page_cpu_valid member of the object flags which
3087  * pages have been flushed, and will be respected by
3088  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3089  * of the whole object.
3090  *
3091  * This function returns when the move is complete, including waiting on
3092  * flushes to occur.
3093  */
3094 static int
3095 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3096                                           uint64_t offset, uint64_t size)
3097 {
3098         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3099         uint32_t old_read_domains;
3100         int i, ret;
3101
3102         if (offset == 0 && size == obj->size)
3103                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3104
3105         ret = i915_gem_object_flush_gpu_write_domain(obj);
3106         if (ret)
3107                 return ret;
3108
3109         /* Wait on any GPU rendering and flushing to occur. */
3110         ret = i915_gem_object_wait_rendering(obj);
3111         if (ret != 0)
3112                 return ret;
3113         i915_gem_object_flush_gtt_write_domain(obj);
3114
3115         /* If we're already fully in the CPU read domain, we're done. */
3116         if (obj_priv->page_cpu_valid == NULL &&
3117             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3118                 return 0;
3119
3120         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3121          * newly adding I915_GEM_DOMAIN_CPU
3122          */
3123         if (obj_priv->page_cpu_valid == NULL) {
3124                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3125                                                    GFP_KERNEL);
3126                 if (obj_priv->page_cpu_valid == NULL)
3127                         return -ENOMEM;
3128         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3129                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3130
3131         /* Flush the cache on any pages that are still invalid from the CPU's
3132          * perspective.
3133          */
3134         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3135              i++) {
3136                 if (obj_priv->page_cpu_valid[i])
3137                         continue;
3138
3139                 drm_clflush_pages(obj_priv->pages + i, 1);
3140
3141                 obj_priv->page_cpu_valid[i] = 1;
3142         }
3143
3144         /* It should now be out of any other write domains, and we can update
3145          * the domain values for our changes.
3146          */
3147         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3148
3149         old_read_domains = obj->read_domains;
3150         obj->read_domains |= I915_GEM_DOMAIN_CPU;
3151
3152         trace_i915_gem_object_change_domain(obj,
3153                                             old_read_domains,
3154                                             obj->write_domain);
3155
3156         return 0;
3157 }
3158
3159 /**
3160  * Pin an object to the GTT and evaluate the relocations landing in it.
3161  */
3162 static int
3163 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3164                                  struct drm_file *file_priv,
3165                                  struct drm_i915_gem_exec_object2 *entry,
3166                                  struct drm_i915_gem_relocation_entry *relocs)
3167 {
3168         struct drm_device *dev = obj->dev;
3169         drm_i915_private_t *dev_priv = dev->dev_private;
3170         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3171         int i, ret;
3172         void __iomem *reloc_page;
3173         bool need_fence;
3174
3175         need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3176                      obj_priv->tiling_mode != I915_TILING_NONE;
3177
3178         /* Check fence reg constraints and rebind if necessary */
3179         if (need_fence &&
3180             !i915_gem_object_fence_offset_ok(obj,
3181                                              obj_priv->tiling_mode)) {
3182                 ret = i915_gem_object_unbind(obj);
3183                 if (ret)
3184                         return ret;
3185         }
3186
3187         /* Choose the GTT offset for our buffer and put it there. */
3188         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3189         if (ret)
3190                 return ret;
3191
3192         /*
3193          * Pre-965 chips need a fence register set up in order to
3194          * properly handle blits to/from tiled surfaces.
3195          */
3196         if (need_fence) {
3197                 ret = i915_gem_object_get_fence_reg(obj);
3198                 if (ret != 0) {
3199                         i915_gem_object_unpin(obj);
3200                         return ret;
3201                 }
3202         }
3203
3204         entry->offset = obj_priv->gtt_offset;
3205
3206         /* Apply the relocations, using the GTT aperture to avoid cache
3207          * flushing requirements.
3208          */
3209         for (i = 0; i < entry->relocation_count; i++) {
3210                 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3211                 struct drm_gem_object *target_obj;
3212                 struct drm_i915_gem_object *target_obj_priv;
3213                 uint32_t reloc_val, reloc_offset;
3214                 uint32_t __iomem *reloc_entry;
3215
3216                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3217                                                    reloc->target_handle);
3218                 if (target_obj == NULL) {
3219                         i915_gem_object_unpin(obj);
3220                         return -ENOENT;
3221                 }
3222                 target_obj_priv = to_intel_bo(target_obj);
3223
3224 #if WATCH_RELOC
3225                 DRM_INFO("%s: obj %p offset %08x target %d "
3226                          "read %08x write %08x gtt %08x "
3227                          "presumed %08x delta %08x\n",
3228                          __func__,
3229                          obj,
3230                          (int) reloc->offset,
3231                          (int) reloc->target_handle,
3232                          (int) reloc->read_domains,
3233                          (int) reloc->write_domain,
3234                          (int) target_obj_priv->gtt_offset,
3235                          (int) reloc->presumed_offset,
3236                          reloc->delta);
3237 #endif
3238
3239                 /* The target buffer should have appeared before us in the
3240                  * exec_object list, so it should have a GTT space bound by now.
3241                  */
3242                 if (target_obj_priv->gtt_space == NULL) {
3243                         DRM_ERROR("No GTT space found for object %d\n",
3244                                   reloc->target_handle);
3245                         drm_gem_object_unreference(target_obj);
3246                         i915_gem_object_unpin(obj);
3247                         return -EINVAL;
3248                 }
3249
3250                 /* Validate that the target is in a valid r/w GPU domain */
3251                 if (reloc->write_domain & (reloc->write_domain - 1)) {
3252                         DRM_ERROR("reloc with multiple write domains: "
3253                                   "obj %p target %d offset %d "
3254                                   "read %08x write %08x",
3255                                   obj, reloc->target_handle,
3256                                   (int) reloc->offset,
3257                                   reloc->read_domains,
3258                                   reloc->write_domain);
3259                         return -EINVAL;
3260                 }
3261                 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3262                     reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3263                         DRM_ERROR("reloc with read/write CPU domains: "
3264                                   "obj %p target %d offset %d "
3265                                   "read %08x write %08x",
3266                                   obj, reloc->target_handle,
3267                                   (int) reloc->offset,
3268                                   reloc->read_domains,
3269                                   reloc->write_domain);
3270                         drm_gem_object_unreference(target_obj);
3271                         i915_gem_object_unpin(obj);
3272                         return -EINVAL;
3273                 }
3274                 if (reloc->write_domain && target_obj->pending_write_domain &&
3275                     reloc->write_domain != target_obj->pending_write_domain) {
3276                         DRM_ERROR("Write domain conflict: "
3277                                   "obj %p target %d offset %d "
3278                                   "new %08x old %08x\n",
3279                                   obj, reloc->target_handle,
3280                                   (int) reloc->offset,
3281                                   reloc->write_domain,
3282                                   target_obj->pending_write_domain);
3283                         drm_gem_object_unreference(target_obj);
3284                         i915_gem_object_unpin(obj);
3285                         return -EINVAL;
3286                 }
3287
3288                 target_obj->pending_read_domains |= reloc->read_domains;
3289                 target_obj->pending_write_domain |= reloc->write_domain;
3290
3291                 /* If the relocation already has the right value in it, no
3292                  * more work needs to be done.
3293                  */
3294                 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3295                         drm_gem_object_unreference(target_obj);
3296                         continue;
3297                 }
3298
3299                 /* Check that the relocation address is valid... */
3300                 if (reloc->offset > obj->size - 4) {
3301                         DRM_ERROR("Relocation beyond object bounds: "
3302                                   "obj %p target %d offset %d size %d.\n",
3303                                   obj, reloc->target_handle,
3304                                   (int) reloc->offset, (int) obj->size);
3305                         drm_gem_object_unreference(target_obj);
3306                         i915_gem_object_unpin(obj);
3307                         return -EINVAL;
3308                 }
3309                 if (reloc->offset & 3) {
3310                         DRM_ERROR("Relocation not 4-byte aligned: "
3311                                   "obj %p target %d offset %d.\n",
3312                                   obj, reloc->target_handle,
3313                                   (int) reloc->offset);
3314                         drm_gem_object_unreference(target_obj);
3315                         i915_gem_object_unpin(obj);
3316                         return -EINVAL;
3317                 }
3318
3319                 /* and points to somewhere within the target object. */
3320                 if (reloc->delta >= target_obj->size) {
3321                         DRM_ERROR("Relocation beyond target object bounds: "
3322                                   "obj %p target %d delta %d size %d.\n",
3323                                   obj, reloc->target_handle,
3324                                   (int) reloc->delta, (int) target_obj->size);
3325                         drm_gem_object_unreference(target_obj);
3326                         i915_gem_object_unpin(obj);
3327                         return -EINVAL;
3328                 }
3329
3330                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3331                 if (ret != 0) {
3332                         drm_gem_object_unreference(target_obj);
3333                         i915_gem_object_unpin(obj);
3334                         return -EINVAL;
3335                 }
3336
3337                 /* Map the page containing the relocation we're going to
3338                  * perform.
3339                  */
3340                 reloc_offset = obj_priv->gtt_offset + reloc->offset;
3341                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3342                                                       (reloc_offset &
3343                                                        ~(PAGE_SIZE - 1)),
3344                                                       KM_USER0);
3345                 reloc_entry = (uint32_t __iomem *)(reloc_page +
3346                                                    (reloc_offset & (PAGE_SIZE - 1)));
3347                 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3348
3349 #if WATCH_BUF
3350                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3351                           obj, (unsigned int) reloc->offset,
3352                           readl(reloc_entry), reloc_val);
3353 #endif
3354                 writel(reloc_val, reloc_entry);
3355                 io_mapping_unmap_atomic(reloc_page, KM_USER0);
3356
3357                 /* The updated presumed offset for this entry will be
3358                  * copied back out to the user.
3359                  */
3360                 reloc->presumed_offset = target_obj_priv->gtt_offset;
3361
3362                 drm_gem_object_unreference(target_obj);
3363         }
3364
3365 #if WATCH_BUF
3366         if (0)
3367                 i915_gem_dump_object(obj, 128, __func__, ~0);
3368 #endif
3369         return 0;
3370 }
3371
3372 /* Throttle our rendering by waiting until the ring has completed our requests
3373  * emitted over 20 msec ago.
3374  *
3375  * Note that if we were to use the current jiffies each time around the loop,
3376  * we wouldn't escape the function with any frames outstanding if the time to
3377  * render a frame was over 20ms.
3378  *
3379  * This should get us reasonable parallelism between CPU and GPU but also
3380  * relatively low latency when blocking on a particular request to finish.
3381  */
3382 static int
3383 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3384 {
3385         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3386         int ret = 0;
3387         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3388
3389         mutex_lock(&dev->struct_mutex);
3390         while (!list_empty(&i915_file_priv->mm.request_list)) {
3391                 struct drm_i915_gem_request *request;
3392
3393                 request = list_first_entry(&i915_file_priv->mm.request_list,
3394                                            struct drm_i915_gem_request,
3395                                            client_list);
3396
3397                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3398                         break;
3399
3400                 ret = i915_wait_request(dev, request->seqno, request->ring);
3401                 if (ret != 0)
3402                         break;
3403         }
3404         mutex_unlock(&dev->struct_mutex);
3405
3406         return ret;
3407 }
3408
3409 static int
3410 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
3411                               uint32_t buffer_count,
3412                               struct drm_i915_gem_relocation_entry **relocs)
3413 {
3414         uint32_t reloc_count = 0, reloc_index = 0, i;
3415         int ret;
3416
3417         *relocs = NULL;
3418         for (i = 0; i < buffer_count; i++) {
3419                 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3420                         return -EINVAL;
3421                 reloc_count += exec_list[i].relocation_count;
3422         }
3423
3424         *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3425         if (*relocs == NULL) {
3426                 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3427                 return -ENOMEM;
3428         }
3429
3430         for (i = 0; i < buffer_count; i++) {
3431                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3432
3433                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3434
3435                 ret = copy_from_user(&(*relocs)[reloc_index],
3436                                      user_relocs,
3437                                      exec_list[i].relocation_count *
3438                                      sizeof(**relocs));
3439                 if (ret != 0) {
3440                         drm_free_large(*relocs);
3441                         *relocs = NULL;
3442                         return -EFAULT;
3443                 }
3444
3445                 reloc_index += exec_list[i].relocation_count;
3446         }
3447
3448         return 0;
3449 }
3450
3451 static int
3452 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
3453                             uint32_t buffer_count,
3454                             struct drm_i915_gem_relocation_entry *relocs)
3455 {
3456         uint32_t reloc_count = 0, i;
3457         int ret = 0;
3458
3459         if (relocs == NULL)
3460             return 0;
3461
3462         for (i = 0; i < buffer_count; i++) {
3463                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3464                 int unwritten;
3465
3466                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3467
3468                 unwritten = copy_to_user(user_relocs,
3469                                          &relocs[reloc_count],
3470                                          exec_list[i].relocation_count *
3471                                          sizeof(*relocs));
3472
3473                 if (unwritten) {
3474                         ret = -EFAULT;
3475                         goto err;
3476                 }
3477
3478                 reloc_count += exec_list[i].relocation_count;
3479         }
3480
3481 err:
3482         drm_free_large(relocs);
3483
3484         return ret;
3485 }
3486
3487 static int
3488 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3489                            uint64_t exec_offset)
3490 {
3491         uint32_t exec_start, exec_len;
3492
3493         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3494         exec_len = (uint32_t) exec->batch_len;
3495
3496         if ((exec_start | exec_len) & 0x7)
3497                 return -EINVAL;
3498
3499         if (!exec_start)
3500                 return -EINVAL;
3501
3502         return 0;
3503 }
3504
3505 static int
3506 i915_gem_wait_for_pending_flip(struct drm_device *dev,
3507                                struct drm_gem_object **object_list,
3508                                int count)
3509 {
3510         drm_i915_private_t *dev_priv = dev->dev_private;
3511         struct drm_i915_gem_object *obj_priv;
3512         DEFINE_WAIT(wait);
3513         int i, ret = 0;
3514
3515         for (;;) {
3516                 prepare_to_wait(&dev_priv->pending_flip_queue,
3517                                 &wait, TASK_INTERRUPTIBLE);
3518                 for (i = 0; i < count; i++) {
3519                         obj_priv = to_intel_bo(object_list[i]);
3520                         if (atomic_read(&obj_priv->pending_flip) > 0)
3521                                 break;
3522                 }
3523                 if (i == count)
3524                         break;
3525
3526                 if (!signal_pending(current)) {
3527                         mutex_unlock(&dev->struct_mutex);
3528                         schedule();
3529                         mutex_lock(&dev->struct_mutex);
3530                         continue;
3531                 }
3532                 ret = -ERESTARTSYS;
3533                 break;
3534         }
3535         finish_wait(&dev_priv->pending_flip_queue, &wait);
3536
3537         return ret;
3538 }
3539
3540
3541 int
3542 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3543                        struct drm_file *file_priv,
3544                        struct drm_i915_gem_execbuffer2 *args,
3545                        struct drm_i915_gem_exec_object2 *exec_list)
3546 {
3547         drm_i915_private_t *dev_priv = dev->dev_private;
3548         struct drm_gem_object **object_list = NULL;
3549         struct drm_gem_object *batch_obj;
3550         struct drm_i915_gem_object *obj_priv;
3551         struct drm_clip_rect *cliprects = NULL;
3552         struct drm_i915_gem_relocation_entry *relocs = NULL;
3553         int ret = 0, ret2, i, pinned = 0;
3554         uint64_t exec_offset;
3555         uint32_t seqno, flush_domains, reloc_index;
3556         int pin_tries, flips;
3557
3558         struct intel_ring_buffer *ring = NULL;
3559
3560 #if WATCH_EXEC
3561         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3562                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3563 #endif
3564         if (args->flags & I915_EXEC_BSD) {
3565                 if (!HAS_BSD(dev)) {
3566                         DRM_ERROR("execbuf with wrong flag\n");
3567                         return -EINVAL;
3568                 }
3569                 ring = &dev_priv->bsd_ring;
3570         } else {
3571                 ring = &dev_priv->render_ring;
3572         }
3573
3574         if (args->buffer_count < 1) {
3575                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3576                 return -EINVAL;
3577         }
3578         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3579         if (object_list == NULL) {
3580                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3581                           args->buffer_count);
3582                 ret = -ENOMEM;
3583                 goto pre_mutex_err;
3584         }
3585
3586         if (args->num_cliprects != 0) {
3587                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3588                                     GFP_KERNEL);
3589                 if (cliprects == NULL) {
3590                         ret = -ENOMEM;
3591                         goto pre_mutex_err;
3592                 }
3593
3594                 ret = copy_from_user(cliprects,
3595                                      (struct drm_clip_rect __user *)
3596                                      (uintptr_t) args->cliprects_ptr,
3597                                      sizeof(*cliprects) * args->num_cliprects);
3598                 if (ret != 0) {
3599                         DRM_ERROR("copy %d cliprects failed: %d\n",
3600                                   args->num_cliprects, ret);
3601                         ret = -EFAULT;
3602                         goto pre_mutex_err;
3603                 }
3604         }
3605
3606         ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3607                                             &relocs);
3608         if (ret != 0)
3609                 goto pre_mutex_err;
3610
3611         mutex_lock(&dev->struct_mutex);
3612
3613         i915_verify_inactive(dev, __FILE__, __LINE__);
3614
3615         if (atomic_read(&dev_priv->mm.wedged)) {
3616                 mutex_unlock(&dev->struct_mutex);
3617                 ret = -EIO;
3618                 goto pre_mutex_err;
3619         }
3620
3621         if (dev_priv->mm.suspended) {
3622                 mutex_unlock(&dev->struct_mutex);
3623                 ret = -EBUSY;
3624                 goto pre_mutex_err;
3625         }
3626
3627         /* Look up object handles */
3628         flips = 0;
3629         for (i = 0; i < args->buffer_count; i++) {
3630                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3631                                                        exec_list[i].handle);
3632                 if (object_list[i] == NULL) {
3633                         DRM_ERROR("Invalid object handle %d at index %d\n",
3634                                    exec_list[i].handle, i);
3635                         /* prevent error path from reading uninitialized data */
3636                         args->buffer_count = i + 1;
3637                         ret = -ENOENT;
3638                         goto err;
3639                 }
3640
3641                 obj_priv = to_intel_bo(object_list[i]);
3642                 if (obj_priv->in_execbuffer) {
3643                         DRM_ERROR("Object %p appears more than once in object list\n",
3644                                    object_list[i]);
3645                         /* prevent error path from reading uninitialized data */
3646                         args->buffer_count = i + 1;
3647                         ret = -EINVAL;
3648                         goto err;
3649                 }
3650                 obj_priv->in_execbuffer = true;
3651                 flips += atomic_read(&obj_priv->pending_flip);
3652         }
3653
3654         if (flips > 0) {
3655                 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3656                                                      args->buffer_count);
3657                 if (ret)
3658                         goto err;
3659         }
3660
3661         /* Pin and relocate */
3662         for (pin_tries = 0; ; pin_tries++) {
3663                 ret = 0;
3664                 reloc_index = 0;
3665
3666                 for (i = 0; i < args->buffer_count; i++) {
3667                         object_list[i]->pending_read_domains = 0;
3668                         object_list[i]->pending_write_domain = 0;
3669                         ret = i915_gem_object_pin_and_relocate(object_list[i],
3670                                                                file_priv,
3671                                                                &exec_list[i],
3672                                                                &relocs[reloc_index]);
3673                         if (ret)
3674                                 break;
3675                         pinned = i + 1;
3676                         reloc_index += exec_list[i].relocation_count;
3677                 }
3678                 /* success */
3679                 if (ret == 0)
3680                         break;
3681
3682                 /* error other than GTT full, or we've already tried again */
3683                 if (ret != -ENOSPC || pin_tries >= 1) {
3684                         if (ret != -ERESTARTSYS) {
3685                                 unsigned long long total_size = 0;
3686                                 int num_fences = 0;
3687                                 for (i = 0; i < args->buffer_count; i++) {
3688                                         obj_priv = to_intel_bo(object_list[i]);
3689
3690                                         total_size += object_list[i]->size;
3691                                         num_fences +=
3692                                                 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3693                                                 obj_priv->tiling_mode != I915_TILING_NONE;
3694                                 }
3695                                 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
3696                                           pinned+1, args->buffer_count,
3697                                           total_size, num_fences,
3698                                           ret);
3699                                 DRM_ERROR("%d objects [%d pinned], "
3700                                           "%d object bytes [%d pinned], "
3701                                           "%d/%d gtt bytes\n",
3702                                           atomic_read(&dev->object_count),
3703                                           atomic_read(&dev->pin_count),
3704                                           atomic_read(&dev->object_memory),
3705                                           atomic_read(&dev->pin_memory),
3706                                           atomic_read(&dev->gtt_memory),
3707                                           dev->gtt_total);
3708                         }
3709                         goto err;
3710                 }
3711
3712                 /* unpin all of our buffers */
3713                 for (i = 0; i < pinned; i++)
3714                         i915_gem_object_unpin(object_list[i]);
3715                 pinned = 0;
3716
3717                 /* evict everyone we can from the aperture */
3718                 ret = i915_gem_evict_everything(dev);
3719                 if (ret && ret != -ENOSPC)
3720                         goto err;
3721         }
3722
3723         /* Set the pending read domains for the batch buffer to COMMAND */
3724         batch_obj = object_list[args->buffer_count-1];
3725         if (batch_obj->pending_write_domain) {
3726                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3727                 ret = -EINVAL;
3728                 goto err;
3729         }
3730         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3731
3732         /* Sanity check the batch buffer, prior to moving objects */
3733         exec_offset = exec_list[args->buffer_count - 1].offset;
3734         ret = i915_gem_check_execbuffer (args, exec_offset);
3735         if (ret != 0) {
3736                 DRM_ERROR("execbuf with invalid offset/length\n");
3737                 goto err;
3738         }
3739
3740         i915_verify_inactive(dev, __FILE__, __LINE__);
3741
3742         /* Zero the global flush/invalidate flags. These
3743          * will be modified as new domains are computed
3744          * for each object
3745          */
3746         dev->invalidate_domains = 0;
3747         dev->flush_domains = 0;
3748         dev_priv->flush_rings = 0;
3749
3750         for (i = 0; i < args->buffer_count; i++) {
3751                 struct drm_gem_object *obj = object_list[i];
3752
3753                 /* Compute new gpu domains and update invalidate/flush */
3754                 i915_gem_object_set_to_gpu_domain(obj);
3755         }
3756
3757         i915_verify_inactive(dev, __FILE__, __LINE__);
3758
3759         if (dev->invalidate_domains | dev->flush_domains) {
3760 #if WATCH_EXEC
3761                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3762                           __func__,
3763                          dev->invalidate_domains,
3764                          dev->flush_domains);
3765 #endif
3766                 i915_gem_flush(dev,
3767                                dev->invalidate_domains,
3768                                dev->flush_domains);
3769                 if (dev_priv->flush_rings & FLUSH_RENDER_RING)
3770                         (void)i915_add_request(dev, file_priv,
3771                                                dev->flush_domains,
3772                                                &dev_priv->render_ring);
3773                 if (dev_priv->flush_rings & FLUSH_BSD_RING)
3774                         (void)i915_add_request(dev, file_priv,
3775                                                dev->flush_domains,
3776                                                &dev_priv->bsd_ring);
3777         }
3778
3779         for (i = 0; i < args->buffer_count; i++) {
3780                 struct drm_gem_object *obj = object_list[i];
3781                 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3782                 uint32_t old_write_domain = obj->write_domain;
3783
3784                 obj->write_domain = obj->pending_write_domain;
3785                 if (obj->write_domain)
3786                         list_move_tail(&obj_priv->gpu_write_list,
3787                                        &dev_priv->mm.gpu_write_list);
3788                 else
3789                         list_del_init(&obj_priv->gpu_write_list);
3790
3791                 trace_i915_gem_object_change_domain(obj,
3792                                                     obj->read_domains,
3793                                                     old_write_domain);
3794         }
3795
3796         i915_verify_inactive(dev, __FILE__, __LINE__);
3797
3798 #if WATCH_COHERENCY
3799         for (i = 0; i < args->buffer_count; i++) {
3800                 i915_gem_object_check_coherency(object_list[i],
3801                                                 exec_list[i].handle);
3802         }
3803 #endif
3804
3805 #if WATCH_EXEC
3806         i915_gem_dump_object(batch_obj,
3807                               args->batch_len,
3808                               __func__,
3809                               ~0);
3810 #endif
3811
3812         /* Exec the batchbuffer */
3813         ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3814                         cliprects, exec_offset);
3815         if (ret) {
3816                 DRM_ERROR("dispatch failed %d\n", ret);
3817                 goto err;
3818         }
3819
3820         /*
3821          * Ensure that the commands in the batch buffer are
3822          * finished before the interrupt fires
3823          */
3824         flush_domains = i915_retire_commands(dev, ring);
3825
3826         i915_verify_inactive(dev, __FILE__, __LINE__);
3827
3828         /*
3829          * Get a seqno representing the execution of the current buffer,
3830          * which we can wait on.  We would like to mitigate these interrupts,
3831          * likely by only creating seqnos occasionally (so that we have
3832          * *some* interrupts representing completion of buffers that we can
3833          * wait on when trying to clear up gtt space).
3834          */
3835         seqno = i915_add_request(dev, file_priv, flush_domains, ring);
3836         BUG_ON(seqno == 0);
3837         for (i = 0; i < args->buffer_count; i++) {
3838                 struct drm_gem_object *obj = object_list[i];
3839                 obj_priv = to_intel_bo(obj);
3840
3841                 i915_gem_object_move_to_active(obj, seqno, ring);
3842 #if WATCH_LRU
3843                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3844 #endif
3845         }
3846 #if WATCH_LRU
3847         i915_dump_lru(dev, __func__);
3848 #endif
3849
3850         i915_verify_inactive(dev, __FILE__, __LINE__);
3851
3852 err:
3853         for (i = 0; i < pinned; i++)
3854                 i915_gem_object_unpin(object_list[i]);
3855
3856         for (i = 0; i < args->buffer_count; i++) {
3857                 if (object_list[i]) {
3858                         obj_priv = to_intel_bo(object_list[i]);
3859                         obj_priv->in_execbuffer = false;
3860                 }
3861                 drm_gem_object_unreference(object_list[i]);
3862         }
3863
3864         mutex_unlock(&dev->struct_mutex);
3865
3866 pre_mutex_err:
3867         /* Copy the updated relocations out regardless of current error
3868          * state.  Failure to update the relocs would mean that the next
3869          * time userland calls execbuf, it would do so with presumed offset
3870          * state that didn't match the actual object state.
3871          */
3872         ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3873                                            relocs);
3874         if (ret2 != 0) {
3875                 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3876
3877                 if (ret == 0)
3878                         ret = ret2;
3879         }
3880
3881         drm_free_large(object_list);
3882         kfree(cliprects);
3883
3884         return ret;
3885 }
3886
3887 /*
3888  * Legacy execbuffer just creates an exec2 list from the original exec object
3889  * list array and passes it to the real function.
3890  */
3891 int
3892 i915_gem_execbuffer(struct drm_device *dev, void *data,
3893                     struct drm_file *file_priv)
3894 {
3895         struct drm_i915_gem_execbuffer *args = data;
3896         struct drm_i915_gem_execbuffer2 exec2;
3897         struct drm_i915_gem_exec_object *exec_list = NULL;
3898         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3899         int ret, i;
3900
3901 #if WATCH_EXEC
3902         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3903                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3904 #endif
3905
3906         if (args->buffer_count < 1) {
3907                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3908                 return -EINVAL;
3909         }
3910
3911         /* Copy in the exec list from userland */
3912         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
3913         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3914         if (exec_list == NULL || exec2_list == NULL) {
3915                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3916                           args->buffer_count);
3917                 drm_free_large(exec_list);
3918                 drm_free_large(exec2_list);
3919                 return -ENOMEM;
3920         }
3921         ret = copy_from_user(exec_list,
3922                              (struct drm_i915_relocation_entry __user *)
3923                              (uintptr_t) args->buffers_ptr,
3924                              sizeof(*exec_list) * args->buffer_count);
3925         if (ret != 0) {
3926                 DRM_ERROR("copy %d exec entries failed %d\n",
3927                           args->buffer_count, ret);
3928                 drm_free_large(exec_list);
3929                 drm_free_large(exec2_list);
3930                 return -EFAULT;
3931         }
3932
3933         for (i = 0; i < args->buffer_count; i++) {
3934                 exec2_list[i].handle = exec_list[i].handle;
3935                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3936                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3937                 exec2_list[i].alignment = exec_list[i].alignment;
3938                 exec2_list[i].offset = exec_list[i].offset;
3939                 if (!IS_I965G(dev))
3940                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3941                 else
3942                         exec2_list[i].flags = 0;
3943         }
3944
3945         exec2.buffers_ptr = args->buffers_ptr;
3946         exec2.buffer_count = args->buffer_count;
3947         exec2.batch_start_offset = args->batch_start_offset;
3948         exec2.batch_len = args->batch_len;
3949         exec2.DR1 = args->DR1;
3950         exec2.DR4 = args->DR4;
3951         exec2.num_cliprects = args->num_cliprects;
3952         exec2.cliprects_ptr = args->cliprects_ptr;
3953         exec2.flags = I915_EXEC_RENDER;
3954
3955         ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
3956         if (!ret) {
3957                 /* Copy the new buffer offsets back to the user's exec list. */
3958                 for (i = 0; i < args->buffer_count; i++)
3959                         exec_list[i].offset = exec2_list[i].offset;
3960                 /* ... and back out to userspace */
3961                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3962                                    (uintptr_t) args->buffers_ptr,
3963                                    exec_list,
3964                                    sizeof(*exec_list) * args->buffer_count);
3965                 if (ret) {
3966                         ret = -EFAULT;
3967                         DRM_ERROR("failed to copy %d exec entries "
3968                                   "back to user (%d)\n",
3969                                   args->buffer_count, ret);
3970                 }
3971         }
3972
3973         drm_free_large(exec_list);
3974         drm_free_large(exec2_list);
3975         return ret;
3976 }
3977
3978 int
3979 i915_gem_execbuffer2(struct drm_device *dev, void *data,
3980                      struct drm_file *file_priv)
3981 {
3982         struct drm_i915_gem_execbuffer2 *args = data;
3983         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3984         int ret;
3985
3986 #if WATCH_EXEC
3987         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3988                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3989 #endif
3990
3991         if (args->buffer_count < 1) {
3992                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
3993                 return -EINVAL;
3994         }
3995
3996         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3997         if (exec2_list == NULL) {
3998                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3999                           args->buffer_count);
4000                 return -ENOMEM;
4001         }
4002         ret = copy_from_user(exec2_list,
4003                              (struct drm_i915_relocation_entry __user *)
4004                              (uintptr_t) args->buffers_ptr,
4005                              sizeof(*exec2_list) * args->buffer_count);
4006         if (ret != 0) {
4007                 DRM_ERROR("copy %d exec entries failed %d\n",
4008                           args->buffer_count, ret);
4009                 drm_free_large(exec2_list);
4010                 return -EFAULT;
4011         }
4012
4013         ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4014         if (!ret) {
4015                 /* Copy the new buffer offsets back to the user's exec list. */
4016                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4017                                    (uintptr_t) args->buffers_ptr,
4018                                    exec2_list,
4019                                    sizeof(*exec2_list) * args->buffer_count);
4020                 if (ret) {
4021                         ret = -EFAULT;
4022                         DRM_ERROR("failed to copy %d exec entries "
4023                                   "back to user (%d)\n",
4024                                   args->buffer_count, ret);
4025                 }
4026         }
4027
4028         drm_free_large(exec2_list);
4029         return ret;
4030 }
4031
4032 int
4033 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
4034 {
4035         struct drm_device *dev = obj->dev;
4036         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4037         int ret;
4038
4039         BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4040
4041         i915_verify_inactive(dev, __FILE__, __LINE__);
4042
4043         if (obj_priv->gtt_space != NULL) {
4044                 if (alignment == 0)
4045                         alignment = i915_gem_get_gtt_alignment(obj);
4046                 if (obj_priv->gtt_offset & (alignment - 1)) {
4047                         WARN(obj_priv->pin_count,
4048                              "bo is already pinned with incorrect alignment:"
4049                              " offset=%x, req.alignment=%x\n",
4050                              obj_priv->gtt_offset, alignment);
4051                         ret = i915_gem_object_unbind(obj);
4052                         if (ret)
4053                                 return ret;
4054                 }
4055         }
4056
4057         if (obj_priv->gtt_space == NULL) {
4058                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
4059                 if (ret)
4060                         return ret;
4061         }
4062
4063         obj_priv->pin_count++;
4064
4065         /* If the object is not active and not pending a flush,
4066          * remove it from the inactive list
4067          */
4068         if (obj_priv->pin_count == 1) {
4069                 atomic_inc(&dev->pin_count);
4070                 atomic_add(obj->size, &dev->pin_memory);
4071                 if (!obj_priv->active &&
4072                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4073                         list_del_init(&obj_priv->list);
4074         }
4075         i915_verify_inactive(dev, __FILE__, __LINE__);
4076
4077         return 0;
4078 }
4079
4080 void
4081 i915_gem_object_unpin(struct drm_gem_object *obj)
4082 {
4083         struct drm_device *dev = obj->dev;
4084         drm_i915_private_t *dev_priv = dev->dev_private;
4085         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4086
4087         i915_verify_inactive(dev, __FILE__, __LINE__);
4088         obj_priv->pin_count--;
4089         BUG_ON(obj_priv->pin_count < 0);
4090         BUG_ON(obj_priv->gtt_space == NULL);
4091
4092         /* If the object is no longer pinned, and is
4093          * neither active nor being flushed, then stick it on
4094          * the inactive list
4095          */
4096         if (obj_priv->pin_count == 0) {
4097                 if (!obj_priv->active &&
4098                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4099                         list_move_tail(&obj_priv->list,
4100                                        &dev_priv->mm.inactive_list);
4101                 atomic_dec(&dev->pin_count);
4102                 atomic_sub(obj->size, &dev->pin_memory);
4103         }
4104         i915_verify_inactive(dev, __FILE__, __LINE__);
4105 }
4106
4107 int
4108 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4109                    struct drm_file *file_priv)
4110 {
4111         struct drm_i915_gem_pin *args = data;
4112         struct drm_gem_object *obj;
4113         struct drm_i915_gem_object *obj_priv;
4114         int ret;
4115
4116         mutex_lock(&dev->struct_mutex);
4117
4118         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4119         if (obj == NULL) {
4120                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4121                           args->handle);
4122                 mutex_unlock(&dev->struct_mutex);
4123                 return -ENOENT;
4124         }
4125         obj_priv = to_intel_bo(obj);
4126
4127         if (obj_priv->madv != I915_MADV_WILLNEED) {
4128                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4129                 drm_gem_object_unreference(obj);
4130                 mutex_unlock(&dev->struct_mutex);
4131                 return -EINVAL;
4132         }
4133
4134         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4135                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4136                           args->handle);
4137                 drm_gem_object_unreference(obj);
4138                 mutex_unlock(&dev->struct_mutex);
4139                 return -EINVAL;
4140         }
4141
4142         obj_priv->user_pin_count++;
4143         obj_priv->pin_filp = file_priv;
4144         if (obj_priv->user_pin_count == 1) {
4145                 ret = i915_gem_object_pin(obj, args->alignment);
4146                 if (ret != 0) {
4147                         drm_gem_object_unreference(obj);
4148                         mutex_unlock(&dev->struct_mutex);
4149                         return ret;
4150                 }
4151         }
4152
4153         /* XXX - flush the CPU caches for pinned objects
4154          * as the X server doesn't manage domains yet
4155          */
4156         i915_gem_object_flush_cpu_write_domain(obj);
4157         args->offset = obj_priv->gtt_offset;
4158         drm_gem_object_unreference(obj);
4159         mutex_unlock(&dev->struct_mutex);
4160
4161         return 0;
4162 }
4163
4164 int
4165 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4166                      struct drm_file *file_priv)
4167 {
4168         struct drm_i915_gem_pin *args = data;
4169         struct drm_gem_object *obj;
4170         struct drm_i915_gem_object *obj_priv;
4171
4172         mutex_lock(&dev->struct_mutex);
4173
4174         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4175         if (obj == NULL) {
4176                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4177                           args->handle);
4178                 mutex_unlock(&dev->struct_mutex);
4179                 return -ENOENT;
4180         }
4181
4182         obj_priv = to_intel_bo(obj);
4183         if (obj_priv->pin_filp != file_priv) {
4184                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4185                           args->handle);
4186                 drm_gem_object_unreference(obj);
4187                 mutex_unlock(&dev->struct_mutex);
4188                 return -EINVAL;
4189         }
4190         obj_priv->user_pin_count--;
4191         if (obj_priv->user_pin_count == 0) {
4192                 obj_priv->pin_filp = NULL;
4193                 i915_gem_object_unpin(obj);
4194         }
4195
4196         drm_gem_object_unreference(obj);
4197         mutex_unlock(&dev->struct_mutex);
4198         return 0;
4199 }
4200
4201 int
4202 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4203                     struct drm_file *file_priv)
4204 {
4205         struct drm_i915_gem_busy *args = data;
4206         struct drm_gem_object *obj;
4207         struct drm_i915_gem_object *obj_priv;
4208
4209         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4210         if (obj == NULL) {
4211                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4212                           args->handle);
4213                 return -ENOENT;
4214         }
4215
4216         mutex_lock(&dev->struct_mutex);
4217
4218         /* Count all active objects as busy, even if they are currently not used
4219          * by the gpu. Users of this interface expect objects to eventually
4220          * become non-busy without any further actions, therefore emit any
4221          * necessary flushes here.
4222          */
4223         obj_priv = to_intel_bo(obj);
4224         args->busy = obj_priv->active;
4225         if (args->busy) {
4226                 /* Unconditionally flush objects, even when the gpu still uses this
4227                  * object. Userspace calling this function indicates that it wants to
4228                  * use this buffer rather sooner than later, so issuing the required
4229                  * flush earlier is beneficial.
4230                  */
4231                 if (obj->write_domain) {
4232                         i915_gem_flush(dev, 0, obj->write_domain);
4233                         (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4234                 }
4235
4236                 /* Update the active list for the hardware's current position.
4237                  * Otherwise this only updates on a delayed timer or when irqs
4238                  * are actually unmasked, and our working set ends up being
4239                  * larger than required.
4240                  */
4241                 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4242
4243                 args->busy = obj_priv->active;
4244         }
4245
4246         drm_gem_object_unreference(obj);
4247         mutex_unlock(&dev->struct_mutex);
4248         return 0;
4249 }
4250
4251 int
4252 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4253                         struct drm_file *file_priv)
4254 {
4255     return i915_gem_ring_throttle(dev, file_priv);
4256 }
4257
4258 int
4259 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4260                        struct drm_file *file_priv)
4261 {
4262         struct drm_i915_gem_madvise *args = data;
4263         struct drm_gem_object *obj;
4264         struct drm_i915_gem_object *obj_priv;
4265
4266         switch (args->madv) {
4267         case I915_MADV_DONTNEED:
4268         case I915_MADV_WILLNEED:
4269             break;
4270         default:
4271             return -EINVAL;
4272         }
4273
4274         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4275         if (obj == NULL) {
4276                 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4277                           args->handle);
4278                 return -ENOENT;
4279         }
4280
4281         mutex_lock(&dev->struct_mutex);
4282         obj_priv = to_intel_bo(obj);
4283
4284         if (obj_priv->pin_count) {
4285                 drm_gem_object_unreference(obj);
4286                 mutex_unlock(&dev->struct_mutex);
4287
4288                 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4289                 return -EINVAL;
4290         }
4291
4292         if (obj_priv->madv != __I915_MADV_PURGED)
4293                 obj_priv->madv = args->madv;
4294
4295         /* if the object is no longer bound, discard its backing storage */
4296         if (i915_gem_object_is_purgeable(obj_priv) &&
4297             obj_priv->gtt_space == NULL)
4298                 i915_gem_object_truncate(obj);
4299
4300         args->retained = obj_priv->madv != __I915_MADV_PURGED;
4301
4302         drm_gem_object_unreference(obj);
4303         mutex_unlock(&dev->struct_mutex);
4304
4305         return 0;
4306 }
4307
4308 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4309                                               size_t size)
4310 {
4311         struct drm_i915_gem_object *obj;
4312
4313         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4314         if (obj == NULL)
4315                 return NULL;
4316
4317         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4318                 kfree(obj);
4319                 return NULL;
4320         }
4321
4322         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4323         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4324
4325         obj->agp_type = AGP_USER_MEMORY;
4326         obj->base.driver_private = NULL;
4327         obj->fence_reg = I915_FENCE_REG_NONE;
4328         INIT_LIST_HEAD(&obj->list);
4329         INIT_LIST_HEAD(&obj->gpu_write_list);
4330         obj->madv = I915_MADV_WILLNEED;
4331
4332         trace_i915_gem_object_create(&obj->base);
4333
4334         return &obj->base;
4335 }
4336
4337 int i915_gem_init_object(struct drm_gem_object *obj)
4338 {
4339         BUG();
4340
4341         return 0;
4342 }
4343
4344 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4345 {
4346         struct drm_device *dev = obj->dev;
4347         drm_i915_private_t *dev_priv = dev->dev_private;
4348         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4349         int ret;
4350
4351         ret = i915_gem_object_unbind(obj);
4352         if (ret == -ERESTARTSYS) {
4353                 list_move(&obj_priv->list,
4354                           &dev_priv->mm.deferred_free_list);
4355                 return;
4356         }
4357
4358         if (obj_priv->mmap_offset)
4359                 i915_gem_free_mmap_offset(obj);
4360
4361         drm_gem_object_release(obj);
4362
4363         kfree(obj_priv->page_cpu_valid);
4364         kfree(obj_priv->bit_17);
4365         kfree(obj_priv);
4366 }
4367
4368 void i915_gem_free_object(struct drm_gem_object *obj)
4369 {
4370         struct drm_device *dev = obj->dev;
4371         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4372
4373         trace_i915_gem_object_destroy(obj);
4374
4375         while (obj_priv->pin_count > 0)
4376                 i915_gem_object_unpin(obj);
4377
4378         if (obj_priv->phys_obj)
4379                 i915_gem_detach_phys_object(dev, obj);
4380
4381         i915_gem_free_object_tail(obj);
4382 }
4383
4384 int
4385 i915_gem_idle(struct drm_device *dev)
4386 {
4387         drm_i915_private_t *dev_priv = dev->dev_private;
4388         int ret;
4389
4390         mutex_lock(&dev->struct_mutex);
4391
4392         if (dev_priv->mm.suspended ||
4393                         (dev_priv->render_ring.gem_object == NULL) ||
4394                         (HAS_BSD(dev) &&
4395                          dev_priv->bsd_ring.gem_object == NULL)) {
4396                 mutex_unlock(&dev->struct_mutex);
4397                 return 0;
4398         }
4399
4400         ret = i915_gpu_idle(dev);
4401         if (ret) {
4402                 mutex_unlock(&dev->struct_mutex);
4403                 return ret;
4404         }
4405
4406         /* Under UMS, be paranoid and evict. */
4407         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4408                 ret = i915_gem_evict_inactive(dev);
4409                 if (ret) {
4410                         mutex_unlock(&dev->struct_mutex);
4411                         return ret;
4412                 }
4413         }
4414
4415         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4416          * We need to replace this with a semaphore, or something.
4417          * And not confound mm.suspended!
4418          */
4419         dev_priv->mm.suspended = 1;
4420         del_timer(&dev_priv->hangcheck_timer);
4421
4422         i915_kernel_lost_context(dev);
4423         i915_gem_cleanup_ringbuffer(dev);
4424
4425         mutex_unlock(&dev->struct_mutex);
4426
4427         /* Cancel the retire work handler, which should be idle now. */
4428         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4429
4430         return 0;
4431 }
4432
4433 /*
4434  * 965+ support PIPE_CONTROL commands, which provide finer grained control
4435  * over cache flushing.
4436  */
4437 static int
4438 i915_gem_init_pipe_control(struct drm_device *dev)
4439 {
4440         drm_i915_private_t *dev_priv = dev->dev_private;
4441         struct drm_gem_object *obj;
4442         struct drm_i915_gem_object *obj_priv;
4443         int ret;
4444
4445         obj = i915_gem_alloc_object(dev, 4096);
4446         if (obj == NULL) {
4447                 DRM_ERROR("Failed to allocate seqno page\n");
4448                 ret = -ENOMEM;
4449                 goto err;
4450         }
4451         obj_priv = to_intel_bo(obj);
4452         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4453
4454         ret = i915_gem_object_pin(obj, 4096);
4455         if (ret)
4456                 goto err_unref;
4457
4458         dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4459         dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4460         if (dev_priv->seqno_page == NULL)
4461                 goto err_unpin;
4462
4463         dev_priv->seqno_obj = obj;
4464         memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4465
4466         return 0;
4467
4468 err_unpin:
4469         i915_gem_object_unpin(obj);
4470 err_unref:
4471         drm_gem_object_unreference(obj);
4472 err:
4473         return ret;
4474 }
4475
4476
4477 static void
4478 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4479 {
4480         drm_i915_private_t *dev_priv = dev->dev_private;
4481         struct drm_gem_object *obj;
4482         struct drm_i915_gem_object *obj_priv;
4483
4484         obj = dev_priv->seqno_obj;
4485         obj_priv = to_intel_bo(obj);
4486         kunmap(obj_priv->pages[0]);
4487         i915_gem_object_unpin(obj);
4488         drm_gem_object_unreference(obj);
4489         dev_priv->seqno_obj = NULL;
4490
4491         dev_priv->seqno_page = NULL;
4492 }
4493
4494 int
4495 i915_gem_init_ringbuffer(struct drm_device *dev)
4496 {
4497         drm_i915_private_t *dev_priv = dev->dev_private;
4498         int ret;
4499
4500         dev_priv->render_ring = render_ring;
4501
4502         if (!I915_NEED_GFX_HWS(dev)) {
4503                 dev_priv->render_ring.status_page.page_addr
4504                         = dev_priv->status_page_dmah->vaddr;
4505                 memset(dev_priv->render_ring.status_page.page_addr,
4506                                 0, PAGE_SIZE);
4507         }
4508
4509         if (HAS_PIPE_CONTROL(dev)) {
4510                 ret = i915_gem_init_pipe_control(dev);
4511                 if (ret)
4512                         return ret;
4513         }
4514
4515         ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
4516         if (ret)
4517                 goto cleanup_pipe_control;
4518
4519         if (HAS_BSD(dev)) {
4520                 dev_priv->bsd_ring = bsd_ring;
4521                 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
4522                 if (ret)
4523                         goto cleanup_render_ring;
4524         }
4525
4526         dev_priv->next_seqno = 1;
4527
4528         return 0;
4529
4530 cleanup_render_ring:
4531         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4532 cleanup_pipe_control:
4533         if (HAS_PIPE_CONTROL(dev))
4534                 i915_gem_cleanup_pipe_control(dev);
4535         return ret;
4536 }
4537
4538 void
4539 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4540 {
4541         drm_i915_private_t *dev_priv = dev->dev_private;
4542
4543         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4544         if (HAS_BSD(dev))
4545                 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
4546         if (HAS_PIPE_CONTROL(dev))
4547                 i915_gem_cleanup_pipe_control(dev);
4548 }
4549
4550 int
4551 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4552                        struct drm_file *file_priv)
4553 {
4554         drm_i915_private_t *dev_priv = dev->dev_private;
4555         int ret;
4556
4557         if (drm_core_check_feature(dev, DRIVER_MODESET))
4558                 return 0;
4559
4560         if (atomic_read(&dev_priv->mm.wedged)) {
4561                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4562                 atomic_set(&dev_priv->mm.wedged, 0);
4563         }
4564
4565         mutex_lock(&dev->struct_mutex);
4566         dev_priv->mm.suspended = 0;
4567
4568         ret = i915_gem_init_ringbuffer(dev);
4569         if (ret != 0) {
4570                 mutex_unlock(&dev->struct_mutex);
4571                 return ret;
4572         }
4573
4574         spin_lock(&dev_priv->mm.active_list_lock);
4575         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4576         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
4577         spin_unlock(&dev_priv->mm.active_list_lock);
4578
4579         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4580         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4581         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4582         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
4583         mutex_unlock(&dev->struct_mutex);
4584
4585         ret = drm_irq_install(dev);
4586         if (ret)
4587                 goto cleanup_ringbuffer;
4588
4589         return 0;
4590
4591 cleanup_ringbuffer:
4592         mutex_lock(&dev->struct_mutex);
4593         i915_gem_cleanup_ringbuffer(dev);
4594         dev_priv->mm.suspended = 1;
4595         mutex_unlock(&dev->struct_mutex);
4596
4597         return ret;
4598 }
4599
4600 int
4601 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4602                        struct drm_file *file_priv)
4603 {
4604         if (drm_core_check_feature(dev, DRIVER_MODESET))
4605                 return 0;
4606
4607         drm_irq_uninstall(dev);
4608         return i915_gem_idle(dev);
4609 }
4610
4611 void
4612 i915_gem_lastclose(struct drm_device *dev)
4613 {
4614         int ret;
4615
4616         if (drm_core_check_feature(dev, DRIVER_MODESET))
4617                 return;
4618
4619         ret = i915_gem_idle(dev);
4620         if (ret)
4621                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4622 }
4623
4624 void
4625 i915_gem_load(struct drm_device *dev)
4626 {
4627         int i;
4628         drm_i915_private_t *dev_priv = dev->dev_private;
4629
4630         spin_lock_init(&dev_priv->mm.active_list_lock);
4631         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4632         INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
4633         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4634         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4635         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4636         INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4637         INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
4638         if (HAS_BSD(dev)) {
4639                 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4640                 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4641         }
4642         for (i = 0; i < 16; i++)
4643                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4644         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4645                           i915_gem_retire_work_handler);
4646         spin_lock(&shrink_list_lock);
4647         list_add(&dev_priv->mm.shrink_list, &shrink_list);
4648         spin_unlock(&shrink_list_lock);
4649
4650         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4651         if (IS_GEN3(dev)) {
4652                 u32 tmp = I915_READ(MI_ARB_STATE);
4653                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4654                         /* arb state is a masked write, so set bit + bit in mask */
4655                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4656                         I915_WRITE(MI_ARB_STATE, tmp);
4657                 }
4658         }
4659
4660         /* Old X drivers will take 0-2 for front, back, depth buffers */
4661         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4662                 dev_priv->fence_reg_start = 3;
4663
4664         if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4665                 dev_priv->num_fence_regs = 16;
4666         else
4667                 dev_priv->num_fence_regs = 8;
4668
4669         /* Initialize fence registers to zero */
4670         if (IS_I965G(dev)) {
4671                 for (i = 0; i < 16; i++)
4672                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4673         } else {
4674                 for (i = 0; i < 8; i++)
4675                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4676                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4677                         for (i = 0; i < 8; i++)
4678                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4679         }
4680         i915_gem_detect_bit_6_swizzle(dev);
4681         init_waitqueue_head(&dev_priv->pending_flip_queue);
4682 }
4683
4684 /*
4685  * Create a physically contiguous memory object for this object
4686  * e.g. for cursor + overlay regs
4687  */
4688 int i915_gem_init_phys_object(struct drm_device *dev,
4689                               int id, int size, int align)
4690 {
4691         drm_i915_private_t *dev_priv = dev->dev_private;
4692         struct drm_i915_gem_phys_object *phys_obj;
4693         int ret;
4694
4695         if (dev_priv->mm.phys_objs[id - 1] || !size)
4696                 return 0;
4697
4698         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4699         if (!phys_obj)
4700                 return -ENOMEM;
4701
4702         phys_obj->id = id;
4703
4704         phys_obj->handle = drm_pci_alloc(dev, size, align);
4705         if (!phys_obj->handle) {
4706                 ret = -ENOMEM;
4707                 goto kfree_obj;
4708         }
4709 #ifdef CONFIG_X86
4710         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4711 #endif
4712
4713         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4714
4715         return 0;
4716 kfree_obj:
4717         kfree(phys_obj);
4718         return ret;
4719 }
4720
4721 void i915_gem_free_phys_object(struct drm_device *dev, int id)
4722 {
4723         drm_i915_private_t *dev_priv = dev->dev_private;
4724         struct drm_i915_gem_phys_object *phys_obj;
4725
4726         if (!dev_priv->mm.phys_objs[id - 1])
4727                 return;
4728
4729         phys_obj = dev_priv->mm.phys_objs[id - 1];
4730         if (phys_obj->cur_obj) {
4731                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4732         }
4733
4734 #ifdef CONFIG_X86
4735         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4736 #endif
4737         drm_pci_free(dev, phys_obj->handle);
4738         kfree(phys_obj);
4739         dev_priv->mm.phys_objs[id - 1] = NULL;
4740 }
4741
4742 void i915_gem_free_all_phys_object(struct drm_device *dev)
4743 {
4744         int i;
4745
4746         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4747                 i915_gem_free_phys_object(dev, i);
4748 }
4749
4750 void i915_gem_detach_phys_object(struct drm_device *dev,
4751                                  struct drm_gem_object *obj)
4752 {
4753         struct drm_i915_gem_object *obj_priv;
4754         int i;
4755         int ret;
4756         int page_count;
4757
4758         obj_priv = to_intel_bo(obj);
4759         if (!obj_priv->phys_obj)
4760                 return;
4761
4762         ret = i915_gem_object_get_pages(obj, 0);
4763         if (ret)
4764                 goto out;
4765
4766         page_count = obj->size / PAGE_SIZE;
4767
4768         for (i = 0; i < page_count; i++) {
4769                 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4770                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4771
4772                 memcpy(dst, src, PAGE_SIZE);
4773                 kunmap_atomic(dst, KM_USER0);
4774         }
4775         drm_clflush_pages(obj_priv->pages, page_count);
4776         drm_agp_chipset_flush(dev);
4777
4778         i915_gem_object_put_pages(obj);
4779 out:
4780         obj_priv->phys_obj->cur_obj = NULL;
4781         obj_priv->phys_obj = NULL;
4782 }
4783
4784 int
4785 i915_gem_attach_phys_object(struct drm_device *dev,
4786                             struct drm_gem_object *obj,
4787                             int id,
4788                             int align)
4789 {
4790         drm_i915_private_t *dev_priv = dev->dev_private;
4791         struct drm_i915_gem_object *obj_priv;
4792         int ret = 0;
4793         int page_count;
4794         int i;
4795
4796         if (id > I915_MAX_PHYS_OBJECT)
4797                 return -EINVAL;
4798
4799         obj_priv = to_intel_bo(obj);
4800
4801         if (obj_priv->phys_obj) {
4802                 if (obj_priv->phys_obj->id == id)
4803                         return 0;
4804                 i915_gem_detach_phys_object(dev, obj);
4805         }
4806
4807         /* create a new object */
4808         if (!dev_priv->mm.phys_objs[id - 1]) {
4809                 ret = i915_gem_init_phys_object(dev, id,
4810                                                 obj->size, align);
4811                 if (ret) {
4812                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4813                         goto out;
4814                 }
4815         }
4816
4817         /* bind to the object */
4818         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4819         obj_priv->phys_obj->cur_obj = obj;
4820
4821         ret = i915_gem_object_get_pages(obj, 0);
4822         if (ret) {
4823                 DRM_ERROR("failed to get page list\n");
4824                 goto out;
4825         }
4826
4827         page_count = obj->size / PAGE_SIZE;
4828
4829         for (i = 0; i < page_count; i++) {
4830                 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4831                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4832
4833                 memcpy(dst, src, PAGE_SIZE);
4834                 kunmap_atomic(src, KM_USER0);
4835         }
4836
4837         i915_gem_object_put_pages(obj);
4838
4839         return 0;
4840 out:
4841         return ret;
4842 }
4843
4844 static int
4845 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4846                      struct drm_i915_gem_pwrite *args,
4847                      struct drm_file *file_priv)
4848 {
4849         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4850         void *obj_addr;
4851         int ret;
4852         char __user *user_data;
4853
4854         user_data = (char __user *) (uintptr_t) args->data_ptr;
4855         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4856
4857         DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4858         ret = copy_from_user(obj_addr, user_data, args->size);
4859         if (ret)
4860                 return -EFAULT;
4861
4862         drm_agp_chipset_flush(dev);
4863         return 0;
4864 }
4865
4866 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4867 {
4868         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4869
4870         /* Clean up our request list when the client is going away, so that
4871          * later retire_requests won't dereference our soon-to-be-gone
4872          * file_priv.
4873          */
4874         mutex_lock(&dev->struct_mutex);
4875         while (!list_empty(&i915_file_priv->mm.request_list))
4876                 list_del_init(i915_file_priv->mm.request_list.next);
4877         mutex_unlock(&dev->struct_mutex);
4878 }
4879
4880 static int
4881 i915_gpu_is_active(struct drm_device *dev)
4882 {
4883         drm_i915_private_t *dev_priv = dev->dev_private;
4884         int lists_empty;
4885
4886         spin_lock(&dev_priv->mm.active_list_lock);
4887         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4888                       list_empty(&dev_priv->render_ring.active_list);
4889         if (HAS_BSD(dev))
4890                 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
4891         spin_unlock(&dev_priv->mm.active_list_lock);
4892
4893         return !lists_empty;
4894 }
4895
4896 static int
4897 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
4898 {
4899         drm_i915_private_t *dev_priv, *next_dev;
4900         struct drm_i915_gem_object *obj_priv, *next_obj;
4901         int cnt = 0;
4902         int would_deadlock = 1;
4903
4904         /* "fast-path" to count number of available objects */
4905         if (nr_to_scan == 0) {
4906                 spin_lock(&shrink_list_lock);
4907                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4908                         struct drm_device *dev = dev_priv->dev;
4909
4910                         if (mutex_trylock(&dev->struct_mutex)) {
4911                                 list_for_each_entry(obj_priv,
4912                                                     &dev_priv->mm.inactive_list,
4913                                                     list)
4914                                         cnt++;
4915                                 mutex_unlock(&dev->struct_mutex);
4916                         }
4917                 }
4918                 spin_unlock(&shrink_list_lock);
4919
4920                 return (cnt / 100) * sysctl_vfs_cache_pressure;
4921         }
4922
4923         spin_lock(&shrink_list_lock);
4924
4925 rescan:
4926         /* first scan for clean buffers */
4927         list_for_each_entry_safe(dev_priv, next_dev,
4928                                  &shrink_list, mm.shrink_list) {
4929                 struct drm_device *dev = dev_priv->dev;
4930
4931                 if (! mutex_trylock(&dev->struct_mutex))
4932                         continue;
4933
4934                 spin_unlock(&shrink_list_lock);
4935                 i915_gem_retire_requests(dev);
4936
4937                 list_for_each_entry_safe(obj_priv, next_obj,
4938                                          &dev_priv->mm.inactive_list,
4939                                          list) {
4940                         if (i915_gem_object_is_purgeable(obj_priv)) {
4941                                 i915_gem_object_unbind(&obj_priv->base);
4942                                 if (--nr_to_scan <= 0)
4943                                         break;
4944                         }
4945                 }
4946
4947                 spin_lock(&shrink_list_lock);
4948                 mutex_unlock(&dev->struct_mutex);
4949
4950                 would_deadlock = 0;
4951
4952                 if (nr_to_scan <= 0)
4953                         break;
4954         }
4955
4956         /* second pass, evict/count anything still on the inactive list */
4957         list_for_each_entry_safe(dev_priv, next_dev,
4958                                  &shrink_list, mm.shrink_list) {
4959                 struct drm_device *dev = dev_priv->dev;
4960
4961                 if (! mutex_trylock(&dev->struct_mutex))
4962                         continue;
4963
4964                 spin_unlock(&shrink_list_lock);
4965
4966                 list_for_each_entry_safe(obj_priv, next_obj,
4967                                          &dev_priv->mm.inactive_list,
4968                                          list) {
4969                         if (nr_to_scan > 0) {
4970                                 i915_gem_object_unbind(&obj_priv->base);
4971                                 nr_to_scan--;
4972                         } else
4973                                 cnt++;
4974                 }
4975
4976                 spin_lock(&shrink_list_lock);
4977                 mutex_unlock(&dev->struct_mutex);
4978
4979                 would_deadlock = 0;
4980         }
4981
4982         if (nr_to_scan) {
4983                 int active = 0;
4984
4985                 /*
4986                  * We are desperate for pages, so as a last resort, wait
4987                  * for the GPU to finish and discard whatever we can.
4988                  * This has a dramatic impact to reduce the number of
4989                  * OOM-killer events whilst running the GPU aggressively.
4990                  */
4991                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4992                         struct drm_device *dev = dev_priv->dev;
4993
4994                         if (!mutex_trylock(&dev->struct_mutex))
4995                                 continue;
4996
4997                         spin_unlock(&shrink_list_lock);
4998
4999                         if (i915_gpu_is_active(dev)) {
5000                                 i915_gpu_idle(dev);
5001                                 active++;
5002                         }
5003
5004                         spin_lock(&shrink_list_lock);
5005                         mutex_unlock(&dev->struct_mutex);
5006                 }
5007
5008                 if (active)
5009                         goto rescan;
5010         }
5011
5012         spin_unlock(&shrink_list_lock);
5013
5014         if (would_deadlock)
5015                 return -1;
5016         else if (cnt > 0)
5017                 return (cnt / 100) * sysctl_vfs_cache_pressure;
5018         else
5019                 return 0;
5020 }
5021
5022 static struct shrinker shrinker = {
5023         .shrink = i915_gem_shrink,
5024         .seeks = DEFAULT_SEEKS,
5025 };
5026
5027 __init void
5028 i915_gem_shrinker_init(void)
5029 {
5030     register_shrinker(&shrinker);
5031 }
5032
5033 __exit void
5034 i915_gem_shrinker_exit(void)
5035 {
5036     unregister_shrinker(&shrinker);
5037 }