Merge branch 'drm-intel-fixes' into drm-intel-next
[pandora-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37 #include <linux/intel-gtt.h>
38
39 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
40
41 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
42                                                   bool pipelined);
43 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
44 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
45 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
46                                              int write);
47 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
48                                                      uint64_t offset,
49                                                      uint64_t size);
50 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
52 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
53                                            unsigned alignment);
54 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
55 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
56                                 struct drm_i915_gem_pwrite *args,
57                                 struct drm_file *file_priv);
58 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
59
60 static LIST_HEAD(shrink_list);
61 static DEFINE_SPINLOCK(shrink_list_lock);
62
63 static inline bool
64 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
65 {
66         return obj_priv->gtt_space &&
67                 !obj_priv->active &&
68                 obj_priv->pin_count == 0;
69 }
70
71 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
72                      unsigned long end)
73 {
74         drm_i915_private_t *dev_priv = dev->dev_private;
75
76         if (start >= end ||
77             (start & (PAGE_SIZE - 1)) != 0 ||
78             (end & (PAGE_SIZE - 1)) != 0) {
79                 return -EINVAL;
80         }
81
82         drm_mm_init(&dev_priv->mm.gtt_space, start,
83                     end - start);
84
85         dev->gtt_total = (uint32_t) (end - start);
86
87         return 0;
88 }
89
90 int
91 i915_gem_init_ioctl(struct drm_device *dev, void *data,
92                     struct drm_file *file_priv)
93 {
94         struct drm_i915_gem_init *args = data;
95         int ret;
96
97         mutex_lock(&dev->struct_mutex);
98         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
99         mutex_unlock(&dev->struct_mutex);
100
101         return ret;
102 }
103
104 int
105 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
106                             struct drm_file *file_priv)
107 {
108         struct drm_i915_gem_get_aperture *args = data;
109
110         if (!(dev->driver->driver_features & DRIVER_GEM))
111                 return -ENODEV;
112
113         args->aper_size = dev->gtt_total;
114         args->aper_available_size = (args->aper_size -
115                                      atomic_read(&dev->pin_memory));
116
117         return 0;
118 }
119
120
121 /**
122  * Creates a new mm object and returns a handle to it.
123  */
124 int
125 i915_gem_create_ioctl(struct drm_device *dev, void *data,
126                       struct drm_file *file_priv)
127 {
128         struct drm_i915_gem_create *args = data;
129         struct drm_gem_object *obj;
130         int ret;
131         u32 handle;
132
133         args->size = roundup(args->size, PAGE_SIZE);
134
135         /* Allocate the new object */
136         obj = i915_gem_alloc_object(dev, args->size);
137         if (obj == NULL)
138                 return -ENOMEM;
139
140         ret = drm_gem_handle_create(file_priv, obj, &handle);
141         if (ret) {
142                 drm_gem_object_unreference_unlocked(obj);
143                 return ret;
144         }
145
146         /* Sink the floating reference from kref_init(handlecount) */
147         drm_gem_object_handle_unreference_unlocked(obj);
148
149         args->handle = handle;
150         return 0;
151 }
152
153 static inline int
154 fast_shmem_read(struct page **pages,
155                 loff_t page_base, int page_offset,
156                 char __user *data,
157                 int length)
158 {
159         char __iomem *vaddr;
160         int unwritten;
161
162         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
163         if (vaddr == NULL)
164                 return -ENOMEM;
165         unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
166         kunmap_atomic(vaddr, KM_USER0);
167
168         if (unwritten)
169                 return -EFAULT;
170
171         return 0;
172 }
173
174 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
175 {
176         drm_i915_private_t *dev_priv = obj->dev->dev_private;
177         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
178
179         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
180                 obj_priv->tiling_mode != I915_TILING_NONE;
181 }
182
183 static inline void
184 slow_shmem_copy(struct page *dst_page,
185                 int dst_offset,
186                 struct page *src_page,
187                 int src_offset,
188                 int length)
189 {
190         char *dst_vaddr, *src_vaddr;
191
192         dst_vaddr = kmap(dst_page);
193         src_vaddr = kmap(src_page);
194
195         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
196
197         kunmap(src_page);
198         kunmap(dst_page);
199 }
200
201 static inline void
202 slow_shmem_bit17_copy(struct page *gpu_page,
203                       int gpu_offset,
204                       struct page *cpu_page,
205                       int cpu_offset,
206                       int length,
207                       int is_read)
208 {
209         char *gpu_vaddr, *cpu_vaddr;
210
211         /* Use the unswizzled path if this page isn't affected. */
212         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
213                 if (is_read)
214                         return slow_shmem_copy(cpu_page, cpu_offset,
215                                                gpu_page, gpu_offset, length);
216                 else
217                         return slow_shmem_copy(gpu_page, gpu_offset,
218                                                cpu_page, cpu_offset, length);
219         }
220
221         gpu_vaddr = kmap(gpu_page);
222         cpu_vaddr = kmap(cpu_page);
223
224         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
225          * XORing with the other bits (A9 for Y, A9 and A10 for X)
226          */
227         while (length > 0) {
228                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
229                 int this_length = min(cacheline_end - gpu_offset, length);
230                 int swizzled_gpu_offset = gpu_offset ^ 64;
231
232                 if (is_read) {
233                         memcpy(cpu_vaddr + cpu_offset,
234                                gpu_vaddr + swizzled_gpu_offset,
235                                this_length);
236                 } else {
237                         memcpy(gpu_vaddr + swizzled_gpu_offset,
238                                cpu_vaddr + cpu_offset,
239                                this_length);
240                 }
241                 cpu_offset += this_length;
242                 gpu_offset += this_length;
243                 length -= this_length;
244         }
245
246         kunmap(cpu_page);
247         kunmap(gpu_page);
248 }
249
250 /**
251  * This is the fast shmem pread path, which attempts to copy_from_user directly
252  * from the backing pages of the object to the user's address space.  On a
253  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
254  */
255 static int
256 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
257                           struct drm_i915_gem_pread *args,
258                           struct drm_file *file_priv)
259 {
260         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
261         ssize_t remain;
262         loff_t offset, page_base;
263         char __user *user_data;
264         int page_offset, page_length;
265         int ret;
266
267         user_data = (char __user *) (uintptr_t) args->data_ptr;
268         remain = args->size;
269
270         mutex_lock(&dev->struct_mutex);
271
272         ret = i915_gem_object_get_pages(obj, 0);
273         if (ret != 0)
274                 goto fail_unlock;
275
276         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
277                                                         args->size);
278         if (ret != 0)
279                 goto fail_put_pages;
280
281         obj_priv = to_intel_bo(obj);
282         offset = args->offset;
283
284         while (remain > 0) {
285                 /* Operation in this page
286                  *
287                  * page_base = page offset within aperture
288                  * page_offset = offset within page
289                  * page_length = bytes to copy for this page
290                  */
291                 page_base = (offset & ~(PAGE_SIZE-1));
292                 page_offset = offset & (PAGE_SIZE-1);
293                 page_length = remain;
294                 if ((page_offset + remain) > PAGE_SIZE)
295                         page_length = PAGE_SIZE - page_offset;
296
297                 ret = fast_shmem_read(obj_priv->pages,
298                                       page_base, page_offset,
299                                       user_data, page_length);
300                 if (ret)
301                         goto fail_put_pages;
302
303                 remain -= page_length;
304                 user_data += page_length;
305                 offset += page_length;
306         }
307
308 fail_put_pages:
309         i915_gem_object_put_pages(obj);
310 fail_unlock:
311         mutex_unlock(&dev->struct_mutex);
312
313         return ret;
314 }
315
316 static int
317 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
318 {
319         int ret;
320
321         ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
322
323         /* If we've insufficient memory to map in the pages, attempt
324          * to make some space by throwing out some old buffers.
325          */
326         if (ret == -ENOMEM) {
327                 struct drm_device *dev = obj->dev;
328
329                 ret = i915_gem_evict_something(dev, obj->size,
330                                                i915_gem_get_gtt_alignment(obj));
331                 if (ret)
332                         return ret;
333
334                 ret = i915_gem_object_get_pages(obj, 0);
335         }
336
337         return ret;
338 }
339
340 /**
341  * This is the fallback shmem pread path, which allocates temporary storage
342  * in kernel space to copy_to_user into outside of the struct_mutex, so we
343  * can copy out of the object's backing pages while holding the struct mutex
344  * and not take page faults.
345  */
346 static int
347 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
348                           struct drm_i915_gem_pread *args,
349                           struct drm_file *file_priv)
350 {
351         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
352         struct mm_struct *mm = current->mm;
353         struct page **user_pages;
354         ssize_t remain;
355         loff_t offset, pinned_pages, i;
356         loff_t first_data_page, last_data_page, num_pages;
357         int shmem_page_index, shmem_page_offset;
358         int data_page_index,  data_page_offset;
359         int page_length;
360         int ret;
361         uint64_t data_ptr = args->data_ptr;
362         int do_bit17_swizzling;
363
364         remain = args->size;
365
366         /* Pin the user pages containing the data.  We can't fault while
367          * holding the struct mutex, yet we want to hold it while
368          * dereferencing the user data.
369          */
370         first_data_page = data_ptr / PAGE_SIZE;
371         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
372         num_pages = last_data_page - first_data_page + 1;
373
374         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
375         if (user_pages == NULL)
376                 return -ENOMEM;
377
378         down_read(&mm->mmap_sem);
379         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
380                                       num_pages, 1, 0, user_pages, NULL);
381         up_read(&mm->mmap_sem);
382         if (pinned_pages < num_pages) {
383                 ret = -EFAULT;
384                 goto fail_put_user_pages;
385         }
386
387         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
388
389         mutex_lock(&dev->struct_mutex);
390
391         ret = i915_gem_object_get_pages_or_evict(obj);
392         if (ret)
393                 goto fail_unlock;
394
395         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
396                                                         args->size);
397         if (ret != 0)
398                 goto fail_put_pages;
399
400         obj_priv = to_intel_bo(obj);
401         offset = args->offset;
402
403         while (remain > 0) {
404                 /* Operation in this page
405                  *
406                  * shmem_page_index = page number within shmem file
407                  * shmem_page_offset = offset within page in shmem file
408                  * data_page_index = page number in get_user_pages return
409                  * data_page_offset = offset with data_page_index page.
410                  * page_length = bytes to copy for this page
411                  */
412                 shmem_page_index = offset / PAGE_SIZE;
413                 shmem_page_offset = offset & ~PAGE_MASK;
414                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
415                 data_page_offset = data_ptr & ~PAGE_MASK;
416
417                 page_length = remain;
418                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
419                         page_length = PAGE_SIZE - shmem_page_offset;
420                 if ((data_page_offset + page_length) > PAGE_SIZE)
421                         page_length = PAGE_SIZE - data_page_offset;
422
423                 if (do_bit17_swizzling) {
424                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
425                                               shmem_page_offset,
426                                               user_pages[data_page_index],
427                                               data_page_offset,
428                                               page_length,
429                                               1);
430                 } else {
431                         slow_shmem_copy(user_pages[data_page_index],
432                                         data_page_offset,
433                                         obj_priv->pages[shmem_page_index],
434                                         shmem_page_offset,
435                                         page_length);
436                 }
437
438                 remain -= page_length;
439                 data_ptr += page_length;
440                 offset += page_length;
441         }
442
443 fail_put_pages:
444         i915_gem_object_put_pages(obj);
445 fail_unlock:
446         mutex_unlock(&dev->struct_mutex);
447 fail_put_user_pages:
448         for (i = 0; i < pinned_pages; i++) {
449                 SetPageDirty(user_pages[i]);
450                 page_cache_release(user_pages[i]);
451         }
452         drm_free_large(user_pages);
453
454         return ret;
455 }
456
457 /**
458  * Reads data from the object referenced by handle.
459  *
460  * On error, the contents of *data are undefined.
461  */
462 int
463 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
464                      struct drm_file *file_priv)
465 {
466         struct drm_i915_gem_pread *args = data;
467         struct drm_gem_object *obj;
468         struct drm_i915_gem_object *obj_priv;
469         int ret;
470
471         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
472         if (obj == NULL)
473                 return -ENOENT;
474         obj_priv = to_intel_bo(obj);
475
476         /* Bounds check source.
477          *
478          * XXX: This could use review for overflow issues...
479          */
480         if (args->offset > obj->size || args->size > obj->size ||
481             args->offset + args->size > obj->size) {
482                 drm_gem_object_unreference_unlocked(obj);
483                 return -EINVAL;
484         }
485
486         if (i915_gem_object_needs_bit17_swizzle(obj)) {
487                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
488         } else {
489                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
490                 if (ret != 0)
491                         ret = i915_gem_shmem_pread_slow(dev, obj, args,
492                                                         file_priv);
493         }
494
495         drm_gem_object_unreference_unlocked(obj);
496
497         return ret;
498 }
499
500 /* This is the fast write path which cannot handle
501  * page faults in the source data
502  */
503
504 static inline int
505 fast_user_write(struct io_mapping *mapping,
506                 loff_t page_base, int page_offset,
507                 char __user *user_data,
508                 int length)
509 {
510         char *vaddr_atomic;
511         unsigned long unwritten;
512
513         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
514         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
515                                                       user_data, length);
516         io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
517         if (unwritten)
518                 return -EFAULT;
519         return 0;
520 }
521
522 /* Here's the write path which can sleep for
523  * page faults
524  */
525
526 static inline void
527 slow_kernel_write(struct io_mapping *mapping,
528                   loff_t gtt_base, int gtt_offset,
529                   struct page *user_page, int user_offset,
530                   int length)
531 {
532         char __iomem *dst_vaddr;
533         char *src_vaddr;
534
535         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
536         src_vaddr = kmap(user_page);
537
538         memcpy_toio(dst_vaddr + gtt_offset,
539                     src_vaddr + user_offset,
540                     length);
541
542         kunmap(user_page);
543         io_mapping_unmap(dst_vaddr);
544 }
545
546 static inline int
547 fast_shmem_write(struct page **pages,
548                  loff_t page_base, int page_offset,
549                  char __user *data,
550                  int length)
551 {
552         char __iomem *vaddr;
553         unsigned long unwritten;
554
555         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
556         if (vaddr == NULL)
557                 return -ENOMEM;
558         unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
559         kunmap_atomic(vaddr, KM_USER0);
560
561         if (unwritten)
562                 return -EFAULT;
563         return 0;
564 }
565
566 /**
567  * This is the fast pwrite path, where we copy the data directly from the
568  * user into the GTT, uncached.
569  */
570 static int
571 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
572                          struct drm_i915_gem_pwrite *args,
573                          struct drm_file *file_priv)
574 {
575         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
576         drm_i915_private_t *dev_priv = dev->dev_private;
577         ssize_t remain;
578         loff_t offset, page_base;
579         char __user *user_data;
580         int page_offset, page_length;
581         int ret;
582
583         user_data = (char __user *) (uintptr_t) args->data_ptr;
584         remain = args->size;
585         if (!access_ok(VERIFY_READ, user_data, remain))
586                 return -EFAULT;
587
588
589         mutex_lock(&dev->struct_mutex);
590         ret = i915_gem_object_pin(obj, 0);
591         if (ret) {
592                 mutex_unlock(&dev->struct_mutex);
593                 return ret;
594         }
595         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
596         if (ret)
597                 goto fail;
598
599         obj_priv = to_intel_bo(obj);
600         offset = obj_priv->gtt_offset + args->offset;
601
602         while (remain > 0) {
603                 /* Operation in this page
604                  *
605                  * page_base = page offset within aperture
606                  * page_offset = offset within page
607                  * page_length = bytes to copy for this page
608                  */
609                 page_base = (offset & ~(PAGE_SIZE-1));
610                 page_offset = offset & (PAGE_SIZE-1);
611                 page_length = remain;
612                 if ((page_offset + remain) > PAGE_SIZE)
613                         page_length = PAGE_SIZE - page_offset;
614
615                 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
616                                        page_offset, user_data, page_length);
617
618                 /* If we get a fault while copying data, then (presumably) our
619                  * source page isn't available.  Return the error and we'll
620                  * retry in the slow path.
621                  */
622                 if (ret)
623                         goto fail;
624
625                 remain -= page_length;
626                 user_data += page_length;
627                 offset += page_length;
628         }
629
630 fail:
631         i915_gem_object_unpin(obj);
632         mutex_unlock(&dev->struct_mutex);
633
634         return ret;
635 }
636
637 /**
638  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
639  * the memory and maps it using kmap_atomic for copying.
640  *
641  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
642  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
643  */
644 static int
645 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
646                          struct drm_i915_gem_pwrite *args,
647                          struct drm_file *file_priv)
648 {
649         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
650         drm_i915_private_t *dev_priv = dev->dev_private;
651         ssize_t remain;
652         loff_t gtt_page_base, offset;
653         loff_t first_data_page, last_data_page, num_pages;
654         loff_t pinned_pages, i;
655         struct page **user_pages;
656         struct mm_struct *mm = current->mm;
657         int gtt_page_offset, data_page_offset, data_page_index, page_length;
658         int ret;
659         uint64_t data_ptr = args->data_ptr;
660
661         remain = args->size;
662
663         /* Pin the user pages containing the data.  We can't fault while
664          * holding the struct mutex, and all of the pwrite implementations
665          * want to hold it while dereferencing the user data.
666          */
667         first_data_page = data_ptr / PAGE_SIZE;
668         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
669         num_pages = last_data_page - first_data_page + 1;
670
671         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
672         if (user_pages == NULL)
673                 return -ENOMEM;
674
675         down_read(&mm->mmap_sem);
676         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
677                                       num_pages, 0, 0, user_pages, NULL);
678         up_read(&mm->mmap_sem);
679         if (pinned_pages < num_pages) {
680                 ret = -EFAULT;
681                 goto out_unpin_pages;
682         }
683
684         mutex_lock(&dev->struct_mutex);
685         ret = i915_gem_object_pin(obj, 0);
686         if (ret)
687                 goto out_unlock;
688
689         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
690         if (ret)
691                 goto out_unpin_object;
692
693         obj_priv = to_intel_bo(obj);
694         offset = obj_priv->gtt_offset + args->offset;
695
696         while (remain > 0) {
697                 /* Operation in this page
698                  *
699                  * gtt_page_base = page offset within aperture
700                  * gtt_page_offset = offset within page in aperture
701                  * data_page_index = page number in get_user_pages return
702                  * data_page_offset = offset with data_page_index page.
703                  * page_length = bytes to copy for this page
704                  */
705                 gtt_page_base = offset & PAGE_MASK;
706                 gtt_page_offset = offset & ~PAGE_MASK;
707                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
708                 data_page_offset = data_ptr & ~PAGE_MASK;
709
710                 page_length = remain;
711                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
712                         page_length = PAGE_SIZE - gtt_page_offset;
713                 if ((data_page_offset + page_length) > PAGE_SIZE)
714                         page_length = PAGE_SIZE - data_page_offset;
715
716                 slow_kernel_write(dev_priv->mm.gtt_mapping,
717                                   gtt_page_base, gtt_page_offset,
718                                   user_pages[data_page_index],
719                                   data_page_offset,
720                                   page_length);
721
722                 remain -= page_length;
723                 offset += page_length;
724                 data_ptr += page_length;
725         }
726
727 out_unpin_object:
728         i915_gem_object_unpin(obj);
729 out_unlock:
730         mutex_unlock(&dev->struct_mutex);
731 out_unpin_pages:
732         for (i = 0; i < pinned_pages; i++)
733                 page_cache_release(user_pages[i]);
734         drm_free_large(user_pages);
735
736         return ret;
737 }
738
739 /**
740  * This is the fast shmem pwrite path, which attempts to directly
741  * copy_from_user into the kmapped pages backing the object.
742  */
743 static int
744 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
745                            struct drm_i915_gem_pwrite *args,
746                            struct drm_file *file_priv)
747 {
748         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
749         ssize_t remain;
750         loff_t offset, page_base;
751         char __user *user_data;
752         int page_offset, page_length;
753         int ret;
754
755         user_data = (char __user *) (uintptr_t) args->data_ptr;
756         remain = args->size;
757
758         mutex_lock(&dev->struct_mutex);
759
760         ret = i915_gem_object_get_pages(obj, 0);
761         if (ret != 0)
762                 goto fail_unlock;
763
764         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
765         if (ret != 0)
766                 goto fail_put_pages;
767
768         obj_priv = to_intel_bo(obj);
769         offset = args->offset;
770         obj_priv->dirty = 1;
771
772         while (remain > 0) {
773                 /* Operation in this page
774                  *
775                  * page_base = page offset within aperture
776                  * page_offset = offset within page
777                  * page_length = bytes to copy for this page
778                  */
779                 page_base = (offset & ~(PAGE_SIZE-1));
780                 page_offset = offset & (PAGE_SIZE-1);
781                 page_length = remain;
782                 if ((page_offset + remain) > PAGE_SIZE)
783                         page_length = PAGE_SIZE - page_offset;
784
785                 ret = fast_shmem_write(obj_priv->pages,
786                                        page_base, page_offset,
787                                        user_data, page_length);
788                 if (ret)
789                         goto fail_put_pages;
790
791                 remain -= page_length;
792                 user_data += page_length;
793                 offset += page_length;
794         }
795
796 fail_put_pages:
797         i915_gem_object_put_pages(obj);
798 fail_unlock:
799         mutex_unlock(&dev->struct_mutex);
800
801         return ret;
802 }
803
804 /**
805  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
806  * the memory and maps it using kmap_atomic for copying.
807  *
808  * This avoids taking mmap_sem for faulting on the user's address while the
809  * struct_mutex is held.
810  */
811 static int
812 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
813                            struct drm_i915_gem_pwrite *args,
814                            struct drm_file *file_priv)
815 {
816         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
817         struct mm_struct *mm = current->mm;
818         struct page **user_pages;
819         ssize_t remain;
820         loff_t offset, pinned_pages, i;
821         loff_t first_data_page, last_data_page, num_pages;
822         int shmem_page_index, shmem_page_offset;
823         int data_page_index,  data_page_offset;
824         int page_length;
825         int ret;
826         uint64_t data_ptr = args->data_ptr;
827         int do_bit17_swizzling;
828
829         remain = args->size;
830
831         /* Pin the user pages containing the data.  We can't fault while
832          * holding the struct mutex, and all of the pwrite implementations
833          * want to hold it while dereferencing the user data.
834          */
835         first_data_page = data_ptr / PAGE_SIZE;
836         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
837         num_pages = last_data_page - first_data_page + 1;
838
839         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
840         if (user_pages == NULL)
841                 return -ENOMEM;
842
843         down_read(&mm->mmap_sem);
844         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
845                                       num_pages, 0, 0, user_pages, NULL);
846         up_read(&mm->mmap_sem);
847         if (pinned_pages < num_pages) {
848                 ret = -EFAULT;
849                 goto fail_put_user_pages;
850         }
851
852         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
853
854         mutex_lock(&dev->struct_mutex);
855
856         ret = i915_gem_object_get_pages_or_evict(obj);
857         if (ret)
858                 goto fail_unlock;
859
860         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
861         if (ret != 0)
862                 goto fail_put_pages;
863
864         obj_priv = to_intel_bo(obj);
865         offset = args->offset;
866         obj_priv->dirty = 1;
867
868         while (remain > 0) {
869                 /* Operation in this page
870                  *
871                  * shmem_page_index = page number within shmem file
872                  * shmem_page_offset = offset within page in shmem file
873                  * data_page_index = page number in get_user_pages return
874                  * data_page_offset = offset with data_page_index page.
875                  * page_length = bytes to copy for this page
876                  */
877                 shmem_page_index = offset / PAGE_SIZE;
878                 shmem_page_offset = offset & ~PAGE_MASK;
879                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
880                 data_page_offset = data_ptr & ~PAGE_MASK;
881
882                 page_length = remain;
883                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
884                         page_length = PAGE_SIZE - shmem_page_offset;
885                 if ((data_page_offset + page_length) > PAGE_SIZE)
886                         page_length = PAGE_SIZE - data_page_offset;
887
888                 if (do_bit17_swizzling) {
889                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
890                                               shmem_page_offset,
891                                               user_pages[data_page_index],
892                                               data_page_offset,
893                                               page_length,
894                                               0);
895                 } else {
896                         slow_shmem_copy(obj_priv->pages[shmem_page_index],
897                                         shmem_page_offset,
898                                         user_pages[data_page_index],
899                                         data_page_offset,
900                                         page_length);
901                 }
902
903                 remain -= page_length;
904                 data_ptr += page_length;
905                 offset += page_length;
906         }
907
908 fail_put_pages:
909         i915_gem_object_put_pages(obj);
910 fail_unlock:
911         mutex_unlock(&dev->struct_mutex);
912 fail_put_user_pages:
913         for (i = 0; i < pinned_pages; i++)
914                 page_cache_release(user_pages[i]);
915         drm_free_large(user_pages);
916
917         return ret;
918 }
919
920 /**
921  * Writes data to the object referenced by handle.
922  *
923  * On error, the contents of the buffer that were to be modified are undefined.
924  */
925 int
926 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
927                       struct drm_file *file_priv)
928 {
929         struct drm_i915_gem_pwrite *args = data;
930         struct drm_gem_object *obj;
931         struct drm_i915_gem_object *obj_priv;
932         int ret = 0;
933
934         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
935         if (obj == NULL)
936                 return -ENOENT;
937         obj_priv = to_intel_bo(obj);
938
939         /* Bounds check destination.
940          *
941          * XXX: This could use review for overflow issues...
942          */
943         if (args->offset > obj->size || args->size > obj->size ||
944             args->offset + args->size > obj->size) {
945                 drm_gem_object_unreference_unlocked(obj);
946                 return -EINVAL;
947         }
948
949         /* We can only do the GTT pwrite on untiled buffers, as otherwise
950          * it would end up going through the fenced access, and we'll get
951          * different detiling behavior between reading and writing.
952          * pread/pwrite currently are reading and writing from the CPU
953          * perspective, requiring manual detiling by the client.
954          */
955         if (obj_priv->phys_obj)
956                 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
957         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
958                  dev->gtt_total != 0 &&
959                  obj->write_domain != I915_GEM_DOMAIN_CPU) {
960                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
961                 if (ret == -EFAULT) {
962                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
963                                                        file_priv);
964                 }
965         } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
966                 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
967         } else {
968                 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
969                 if (ret == -EFAULT) {
970                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
971                                                          file_priv);
972                 }
973         }
974
975 #if WATCH_PWRITE
976         if (ret)
977                 DRM_INFO("pwrite failed %d\n", ret);
978 #endif
979
980         drm_gem_object_unreference_unlocked(obj);
981
982         return ret;
983 }
984
985 /**
986  * Called when user space prepares to use an object with the CPU, either
987  * through the mmap ioctl's mapping or a GTT mapping.
988  */
989 int
990 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
991                           struct drm_file *file_priv)
992 {
993         struct drm_i915_private *dev_priv = dev->dev_private;
994         struct drm_i915_gem_set_domain *args = data;
995         struct drm_gem_object *obj;
996         struct drm_i915_gem_object *obj_priv;
997         uint32_t read_domains = args->read_domains;
998         uint32_t write_domain = args->write_domain;
999         int ret;
1000
1001         if (!(dev->driver->driver_features & DRIVER_GEM))
1002                 return -ENODEV;
1003
1004         /* Only handle setting domains to types used by the CPU. */
1005         if (write_domain & I915_GEM_GPU_DOMAINS)
1006                 return -EINVAL;
1007
1008         if (read_domains & I915_GEM_GPU_DOMAINS)
1009                 return -EINVAL;
1010
1011         /* Having something in the write domain implies it's in the read
1012          * domain, and only that read domain.  Enforce that in the request.
1013          */
1014         if (write_domain != 0 && read_domains != write_domain)
1015                 return -EINVAL;
1016
1017         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1018         if (obj == NULL)
1019                 return -ENOENT;
1020         obj_priv = to_intel_bo(obj);
1021
1022         mutex_lock(&dev->struct_mutex);
1023
1024         intel_mark_busy(dev, obj);
1025
1026 #if WATCH_BUF
1027         DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1028                  obj, obj->size, read_domains, write_domain);
1029 #endif
1030         if (read_domains & I915_GEM_DOMAIN_GTT) {
1031                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1032
1033                 /* Update the LRU on the fence for the CPU access that's
1034                  * about to occur.
1035                  */
1036                 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1037                         struct drm_i915_fence_reg *reg =
1038                                 &dev_priv->fence_regs[obj_priv->fence_reg];
1039                         list_move_tail(&reg->lru_list,
1040                                        &dev_priv->mm.fence_list);
1041                 }
1042
1043                 /* Silently promote "you're not bound, there was nothing to do"
1044                  * to success, since the client was just asking us to
1045                  * make sure everything was done.
1046                  */
1047                 if (ret == -EINVAL)
1048                         ret = 0;
1049         } else {
1050                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1051         }
1052
1053         
1054         /* Maintain LRU order of "inactive" objects */
1055         if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1056                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1057
1058         drm_gem_object_unreference(obj);
1059         mutex_unlock(&dev->struct_mutex);
1060         return ret;
1061 }
1062
1063 /**
1064  * Called when user space has done writes to this buffer
1065  */
1066 int
1067 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1068                       struct drm_file *file_priv)
1069 {
1070         struct drm_i915_gem_sw_finish *args = data;
1071         struct drm_gem_object *obj;
1072         struct drm_i915_gem_object *obj_priv;
1073         int ret = 0;
1074
1075         if (!(dev->driver->driver_features & DRIVER_GEM))
1076                 return -ENODEV;
1077
1078         mutex_lock(&dev->struct_mutex);
1079         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1080         if (obj == NULL) {
1081                 mutex_unlock(&dev->struct_mutex);
1082                 return -ENOENT;
1083         }
1084
1085 #if WATCH_BUF
1086         DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1087                  __func__, args->handle, obj, obj->size);
1088 #endif
1089         obj_priv = to_intel_bo(obj);
1090
1091         /* Pinned buffers may be scanout, so flush the cache */
1092         if (obj_priv->pin_count)
1093                 i915_gem_object_flush_cpu_write_domain(obj);
1094
1095         drm_gem_object_unreference(obj);
1096         mutex_unlock(&dev->struct_mutex);
1097         return ret;
1098 }
1099
1100 /**
1101  * Maps the contents of an object, returning the address it is mapped
1102  * into.
1103  *
1104  * While the mapping holds a reference on the contents of the object, it doesn't
1105  * imply a ref on the object itself.
1106  */
1107 int
1108 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1109                    struct drm_file *file_priv)
1110 {
1111         struct drm_i915_gem_mmap *args = data;
1112         struct drm_gem_object *obj;
1113         loff_t offset;
1114         unsigned long addr;
1115
1116         if (!(dev->driver->driver_features & DRIVER_GEM))
1117                 return -ENODEV;
1118
1119         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1120         if (obj == NULL)
1121                 return -ENOENT;
1122
1123         offset = args->offset;
1124
1125         down_write(&current->mm->mmap_sem);
1126         addr = do_mmap(obj->filp, 0, args->size,
1127                        PROT_READ | PROT_WRITE, MAP_SHARED,
1128                        args->offset);
1129         up_write(&current->mm->mmap_sem);
1130         drm_gem_object_unreference_unlocked(obj);
1131         if (IS_ERR((void *)addr))
1132                 return addr;
1133
1134         args->addr_ptr = (uint64_t) addr;
1135
1136         return 0;
1137 }
1138
1139 /**
1140  * i915_gem_fault - fault a page into the GTT
1141  * vma: VMA in question
1142  * vmf: fault info
1143  *
1144  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1145  * from userspace.  The fault handler takes care of binding the object to
1146  * the GTT (if needed), allocating and programming a fence register (again,
1147  * only if needed based on whether the old reg is still valid or the object
1148  * is tiled) and inserting a new PTE into the faulting process.
1149  *
1150  * Note that the faulting process may involve evicting existing objects
1151  * from the GTT and/or fence registers to make room.  So performance may
1152  * suffer if the GTT working set is large or there are few fence registers
1153  * left.
1154  */
1155 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1156 {
1157         struct drm_gem_object *obj = vma->vm_private_data;
1158         struct drm_device *dev = obj->dev;
1159         drm_i915_private_t *dev_priv = dev->dev_private;
1160         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1161         pgoff_t page_offset;
1162         unsigned long pfn;
1163         int ret = 0;
1164         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1165
1166         /* We don't use vmf->pgoff since that has the fake offset */
1167         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1168                 PAGE_SHIFT;
1169
1170         /* Now bind it into the GTT if needed */
1171         mutex_lock(&dev->struct_mutex);
1172         if (!obj_priv->gtt_space) {
1173                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1174                 if (ret)
1175                         goto unlock;
1176
1177                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1178                 if (ret)
1179                         goto unlock;
1180         }
1181
1182         /* Need a new fence register? */
1183         if (obj_priv->tiling_mode != I915_TILING_NONE) {
1184                 ret = i915_gem_object_get_fence_reg(obj);
1185                 if (ret)
1186                         goto unlock;
1187         }
1188
1189         if (i915_gem_object_is_inactive(obj_priv))
1190                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1191
1192         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1193                 page_offset;
1194
1195         /* Finally, remap it using the new GTT offset */
1196         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1197 unlock:
1198         mutex_unlock(&dev->struct_mutex);
1199
1200         switch (ret) {
1201         case 0:
1202         case -ERESTARTSYS:
1203                 return VM_FAULT_NOPAGE;
1204         case -ENOMEM:
1205         case -EAGAIN:
1206                 return VM_FAULT_OOM;
1207         default:
1208                 return VM_FAULT_SIGBUS;
1209         }
1210 }
1211
1212 /**
1213  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1214  * @obj: obj in question
1215  *
1216  * GEM memory mapping works by handing back to userspace a fake mmap offset
1217  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1218  * up the object based on the offset and sets up the various memory mapping
1219  * structures.
1220  *
1221  * This routine allocates and attaches a fake offset for @obj.
1222  */
1223 static int
1224 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1225 {
1226         struct drm_device *dev = obj->dev;
1227         struct drm_gem_mm *mm = dev->mm_private;
1228         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1229         struct drm_map_list *list;
1230         struct drm_local_map *map;
1231         int ret = 0;
1232
1233         /* Set the object up for mmap'ing */
1234         list = &obj->map_list;
1235         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1236         if (!list->map)
1237                 return -ENOMEM;
1238
1239         map = list->map;
1240         map->type = _DRM_GEM;
1241         map->size = obj->size;
1242         map->handle = obj;
1243
1244         /* Get a DRM GEM mmap offset allocated... */
1245         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1246                                                     obj->size / PAGE_SIZE, 0, 0);
1247         if (!list->file_offset_node) {
1248                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1249                 ret = -ENOMEM;
1250                 goto out_free_list;
1251         }
1252
1253         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1254                                                   obj->size / PAGE_SIZE, 0);
1255         if (!list->file_offset_node) {
1256                 ret = -ENOMEM;
1257                 goto out_free_list;
1258         }
1259
1260         list->hash.key = list->file_offset_node->start;
1261         if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1262                 DRM_ERROR("failed to add to map hash\n");
1263                 ret = -ENOMEM;
1264                 goto out_free_mm;
1265         }
1266
1267         /* By now we should be all set, any drm_mmap request on the offset
1268          * below will get to our mmap & fault handler */
1269         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1270
1271         return 0;
1272
1273 out_free_mm:
1274         drm_mm_put_block(list->file_offset_node);
1275 out_free_list:
1276         kfree(list->map);
1277
1278         return ret;
1279 }
1280
1281 /**
1282  * i915_gem_release_mmap - remove physical page mappings
1283  * @obj: obj in question
1284  *
1285  * Preserve the reservation of the mmapping with the DRM core code, but
1286  * relinquish ownership of the pages back to the system.
1287  *
1288  * It is vital that we remove the page mapping if we have mapped a tiled
1289  * object through the GTT and then lose the fence register due to
1290  * resource pressure. Similarly if the object has been moved out of the
1291  * aperture, than pages mapped into userspace must be revoked. Removing the
1292  * mapping will then trigger a page fault on the next user access, allowing
1293  * fixup by i915_gem_fault().
1294  */
1295 void
1296 i915_gem_release_mmap(struct drm_gem_object *obj)
1297 {
1298         struct drm_device *dev = obj->dev;
1299         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1300
1301         if (dev->dev_mapping)
1302                 unmap_mapping_range(dev->dev_mapping,
1303                                     obj_priv->mmap_offset, obj->size, 1);
1304 }
1305
1306 static void
1307 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1308 {
1309         struct drm_device *dev = obj->dev;
1310         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1311         struct drm_gem_mm *mm = dev->mm_private;
1312         struct drm_map_list *list;
1313
1314         list = &obj->map_list;
1315         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1316
1317         if (list->file_offset_node) {
1318                 drm_mm_put_block(list->file_offset_node);
1319                 list->file_offset_node = NULL;
1320         }
1321
1322         if (list->map) {
1323                 kfree(list->map);
1324                 list->map = NULL;
1325         }
1326
1327         obj_priv->mmap_offset = 0;
1328 }
1329
1330 /**
1331  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1332  * @obj: object to check
1333  *
1334  * Return the required GTT alignment for an object, taking into account
1335  * potential fence register mapping if needed.
1336  */
1337 static uint32_t
1338 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1339 {
1340         struct drm_device *dev = obj->dev;
1341         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1342         int start, i;
1343
1344         /*
1345          * Minimum alignment is 4k (GTT page size), but might be greater
1346          * if a fence register is needed for the object.
1347          */
1348         if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1349                 return 4096;
1350
1351         /*
1352          * Previous chips need to be aligned to the size of the smallest
1353          * fence register that can contain the object.
1354          */
1355         if (IS_I9XX(dev))
1356                 start = 1024*1024;
1357         else
1358                 start = 512*1024;
1359
1360         for (i = start; i < obj->size; i <<= 1)
1361                 ;
1362
1363         return i;
1364 }
1365
1366 /**
1367  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1368  * @dev: DRM device
1369  * @data: GTT mapping ioctl data
1370  * @file_priv: GEM object info
1371  *
1372  * Simply returns the fake offset to userspace so it can mmap it.
1373  * The mmap call will end up in drm_gem_mmap(), which will set things
1374  * up so we can get faults in the handler above.
1375  *
1376  * The fault handler will take care of binding the object into the GTT
1377  * (since it may have been evicted to make room for something), allocating
1378  * a fence register, and mapping the appropriate aperture address into
1379  * userspace.
1380  */
1381 int
1382 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1383                         struct drm_file *file_priv)
1384 {
1385         struct drm_i915_gem_mmap_gtt *args = data;
1386         struct drm_gem_object *obj;
1387         struct drm_i915_gem_object *obj_priv;
1388         int ret;
1389
1390         if (!(dev->driver->driver_features & DRIVER_GEM))
1391                 return -ENODEV;
1392
1393         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1394         if (obj == NULL)
1395                 return -ENOENT;
1396
1397         mutex_lock(&dev->struct_mutex);
1398
1399         obj_priv = to_intel_bo(obj);
1400
1401         if (obj_priv->madv != I915_MADV_WILLNEED) {
1402                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1403                 drm_gem_object_unreference(obj);
1404                 mutex_unlock(&dev->struct_mutex);
1405                 return -EINVAL;
1406         }
1407
1408
1409         if (!obj_priv->mmap_offset) {
1410                 ret = i915_gem_create_mmap_offset(obj);
1411                 if (ret) {
1412                         drm_gem_object_unreference(obj);
1413                         mutex_unlock(&dev->struct_mutex);
1414                         return ret;
1415                 }
1416         }
1417
1418         args->offset = obj_priv->mmap_offset;
1419
1420         /*
1421          * Pull it into the GTT so that we have a page list (makes the
1422          * initial fault faster and any subsequent flushing possible).
1423          */
1424         if (!obj_priv->agp_mem) {
1425                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1426                 if (ret) {
1427                         drm_gem_object_unreference(obj);
1428                         mutex_unlock(&dev->struct_mutex);
1429                         return ret;
1430                 }
1431         }
1432
1433         drm_gem_object_unreference(obj);
1434         mutex_unlock(&dev->struct_mutex);
1435
1436         return 0;
1437 }
1438
1439 void
1440 i915_gem_object_put_pages(struct drm_gem_object *obj)
1441 {
1442         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1443         int page_count = obj->size / PAGE_SIZE;
1444         int i;
1445
1446         BUG_ON(obj_priv->pages_refcount == 0);
1447         BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1448
1449         if (--obj_priv->pages_refcount != 0)
1450                 return;
1451
1452         if (obj_priv->tiling_mode != I915_TILING_NONE)
1453                 i915_gem_object_save_bit_17_swizzle(obj);
1454
1455         if (obj_priv->madv == I915_MADV_DONTNEED)
1456                 obj_priv->dirty = 0;
1457
1458         for (i = 0; i < page_count; i++) {
1459                 if (obj_priv->dirty)
1460                         set_page_dirty(obj_priv->pages[i]);
1461
1462                 if (obj_priv->madv == I915_MADV_WILLNEED)
1463                         mark_page_accessed(obj_priv->pages[i]);
1464
1465                 page_cache_release(obj_priv->pages[i]);
1466         }
1467         obj_priv->dirty = 0;
1468
1469         drm_free_large(obj_priv->pages);
1470         obj_priv->pages = NULL;
1471 }
1472
1473 static uint32_t
1474 i915_gem_next_request_seqno(struct drm_device *dev,
1475                             struct intel_ring_buffer *ring)
1476 {
1477         drm_i915_private_t *dev_priv = dev->dev_private;
1478
1479         ring->outstanding_lazy_request = true;
1480
1481         return dev_priv->next_seqno;
1482 }
1483
1484 static void
1485 i915_gem_object_move_to_active(struct drm_gem_object *obj,
1486                                struct intel_ring_buffer *ring)
1487 {
1488         struct drm_device *dev = obj->dev;
1489         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1490         uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1491
1492         BUG_ON(ring == NULL);
1493         obj_priv->ring = ring;
1494
1495         /* Add a reference if we're newly entering the active list. */
1496         if (!obj_priv->active) {
1497                 drm_gem_object_reference(obj);
1498                 obj_priv->active = 1;
1499         }
1500
1501         /* Move from whatever list we were on to the tail of execution. */
1502         list_move_tail(&obj_priv->list, &ring->active_list);
1503         obj_priv->last_rendering_seqno = seqno;
1504 }
1505
1506 static void
1507 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1508 {
1509         struct drm_device *dev = obj->dev;
1510         drm_i915_private_t *dev_priv = dev->dev_private;
1511         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1512
1513         BUG_ON(!obj_priv->active);
1514         list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1515         obj_priv->last_rendering_seqno = 0;
1516 }
1517
1518 /* Immediately discard the backing storage */
1519 static void
1520 i915_gem_object_truncate(struct drm_gem_object *obj)
1521 {
1522         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1523         struct inode *inode;
1524
1525         /* Our goal here is to return as much of the memory as
1526          * is possible back to the system as we are called from OOM.
1527          * To do this we must instruct the shmfs to drop all of its
1528          * backing pages, *now*. Here we mirror the actions taken
1529          * when by shmem_delete_inode() to release the backing store.
1530          */
1531         inode = obj->filp->f_path.dentry->d_inode;
1532         truncate_inode_pages(inode->i_mapping, 0);
1533         if (inode->i_op->truncate_range)
1534                 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1535
1536         obj_priv->madv = __I915_MADV_PURGED;
1537 }
1538
1539 static inline int
1540 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1541 {
1542         return obj_priv->madv == I915_MADV_DONTNEED;
1543 }
1544
1545 static void
1546 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1547 {
1548         struct drm_device *dev = obj->dev;
1549         drm_i915_private_t *dev_priv = dev->dev_private;
1550         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1551
1552         i915_verify_inactive(dev, __FILE__, __LINE__);
1553         if (obj_priv->pin_count != 0)
1554                 list_del_init(&obj_priv->list);
1555         else
1556                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1557
1558         BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1559
1560         obj_priv->last_rendering_seqno = 0;
1561         obj_priv->ring = NULL;
1562         if (obj_priv->active) {
1563                 obj_priv->active = 0;
1564                 drm_gem_object_unreference(obj);
1565         }
1566         i915_verify_inactive(dev, __FILE__, __LINE__);
1567 }
1568
1569 void
1570 i915_gem_process_flushing_list(struct drm_device *dev,
1571                                uint32_t flush_domains,
1572                                struct intel_ring_buffer *ring)
1573 {
1574         drm_i915_private_t *dev_priv = dev->dev_private;
1575         struct drm_i915_gem_object *obj_priv, *next;
1576
1577         list_for_each_entry_safe(obj_priv, next,
1578                                  &dev_priv->mm.gpu_write_list,
1579                                  gpu_write_list) {
1580                 struct drm_gem_object *obj = &obj_priv->base;
1581
1582                 if ((obj->write_domain & flush_domains) ==
1583                     obj->write_domain &&
1584                     obj_priv->ring->ring_flag == ring->ring_flag) {
1585                         uint32_t old_write_domain = obj->write_domain;
1586
1587                         obj->write_domain = 0;
1588                         list_del_init(&obj_priv->gpu_write_list);
1589                         i915_gem_object_move_to_active(obj, ring);
1590
1591                         /* update the fence lru list */
1592                         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1593                                 struct drm_i915_fence_reg *reg =
1594                                         &dev_priv->fence_regs[obj_priv->fence_reg];
1595                                 list_move_tail(&reg->lru_list,
1596                                                 &dev_priv->mm.fence_list);
1597                         }
1598
1599                         trace_i915_gem_object_change_domain(obj,
1600                                                             obj->read_domains,
1601                                                             old_write_domain);
1602                 }
1603         }
1604 }
1605
1606 uint32_t
1607 i915_add_request(struct drm_device *dev,
1608                  struct drm_file *file_priv,
1609                  struct drm_i915_gem_request *request,
1610                  struct intel_ring_buffer *ring)
1611 {
1612         drm_i915_private_t *dev_priv = dev->dev_private;
1613         struct drm_i915_file_private *i915_file_priv = NULL;
1614         uint32_t seqno;
1615         int was_empty;
1616
1617         if (file_priv != NULL)
1618                 i915_file_priv = file_priv->driver_priv;
1619
1620         if (request == NULL) {
1621                 request = kzalloc(sizeof(*request), GFP_KERNEL);
1622                 if (request == NULL)
1623                         return 0;
1624         }
1625
1626         seqno = ring->add_request(dev, ring, file_priv, 0);
1627
1628         request->seqno = seqno;
1629         request->ring = ring;
1630         request->emitted_jiffies = jiffies;
1631         was_empty = list_empty(&ring->request_list);
1632         list_add_tail(&request->list, &ring->request_list);
1633
1634         if (i915_file_priv) {
1635                 list_add_tail(&request->client_list,
1636                               &i915_file_priv->mm.request_list);
1637         } else {
1638                 INIT_LIST_HEAD(&request->client_list);
1639         }
1640
1641         if (!dev_priv->mm.suspended) {
1642                 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1643                 if (was_empty)
1644                         queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1645         }
1646         return seqno;
1647 }
1648
1649 /**
1650  * Command execution barrier
1651  *
1652  * Ensures that all commands in the ring are finished
1653  * before signalling the CPU
1654  */
1655 static void
1656 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1657 {
1658         uint32_t flush_domains = 0;
1659
1660         /* The sampler always gets flushed on i965 (sigh) */
1661         if (IS_I965G(dev))
1662                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1663
1664         ring->flush(dev, ring,
1665                         I915_GEM_DOMAIN_COMMAND, flush_domains);
1666 }
1667
1668 /**
1669  * Moves buffers associated only with the given active seqno from the active
1670  * to inactive list, potentially freeing them.
1671  */
1672 static void
1673 i915_gem_retire_request(struct drm_device *dev,
1674                         struct drm_i915_gem_request *request)
1675 {
1676         trace_i915_gem_request_retire(dev, request->seqno);
1677
1678         /* Move any buffers on the active list that are no longer referenced
1679          * by the ringbuffer to the flushing/inactive lists as appropriate.
1680          */
1681         while (!list_empty(&request->ring->active_list)) {
1682                 struct drm_gem_object *obj;
1683                 struct drm_i915_gem_object *obj_priv;
1684
1685                 obj_priv = list_first_entry(&request->ring->active_list,
1686                                             struct drm_i915_gem_object,
1687                                             list);
1688                 obj = &obj_priv->base;
1689
1690                 /* If the seqno being retired doesn't match the oldest in the
1691                  * list, then the oldest in the list must still be newer than
1692                  * this seqno.
1693                  */
1694                 if (obj_priv->last_rendering_seqno != request->seqno)
1695                         return;
1696
1697 #if WATCH_LRU
1698                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1699                          __func__, request->seqno, obj);
1700 #endif
1701
1702                 if (obj->write_domain != 0)
1703                         i915_gem_object_move_to_flushing(obj);
1704                 else
1705                         i915_gem_object_move_to_inactive(obj);
1706         }
1707 }
1708
1709 /**
1710  * Returns true if seq1 is later than seq2.
1711  */
1712 bool
1713 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1714 {
1715         return (int32_t)(seq1 - seq2) >= 0;
1716 }
1717
1718 uint32_t
1719 i915_get_gem_seqno(struct drm_device *dev,
1720                    struct intel_ring_buffer *ring)
1721 {
1722         return ring->get_gem_seqno(dev, ring);
1723 }
1724
1725 /**
1726  * This function clears the request list as sequence numbers are passed.
1727  */
1728 static void
1729 i915_gem_retire_requests_ring(struct drm_device *dev,
1730                               struct intel_ring_buffer *ring)
1731 {
1732         drm_i915_private_t *dev_priv = dev->dev_private;
1733         uint32_t seqno;
1734
1735         if (!ring->status_page.page_addr
1736                         || list_empty(&ring->request_list))
1737                 return;
1738
1739         seqno = i915_get_gem_seqno(dev, ring);
1740
1741         while (!list_empty(&ring->request_list)) {
1742                 struct drm_i915_gem_request *request;
1743                 uint32_t retiring_seqno;
1744
1745                 request = list_first_entry(&ring->request_list,
1746                                            struct drm_i915_gem_request,
1747                                            list);
1748                 retiring_seqno = request->seqno;
1749
1750                 if (i915_seqno_passed(seqno, retiring_seqno) ||
1751                     atomic_read(&dev_priv->mm.wedged)) {
1752                         i915_gem_retire_request(dev, request);
1753
1754                         list_del(&request->list);
1755                         list_del(&request->client_list);
1756                         kfree(request);
1757                 } else
1758                         break;
1759         }
1760
1761         if (unlikely (dev_priv->trace_irq_seqno &&
1762                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1763
1764                 ring->user_irq_put(dev, ring);
1765                 dev_priv->trace_irq_seqno = 0;
1766         }
1767 }
1768
1769 void
1770 i915_gem_retire_requests(struct drm_device *dev)
1771 {
1772         drm_i915_private_t *dev_priv = dev->dev_private;
1773
1774         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1775             struct drm_i915_gem_object *obj_priv, *tmp;
1776
1777             /* We must be careful that during unbind() we do not
1778              * accidentally infinitely recurse into retire requests.
1779              * Currently:
1780              *   retire -> free -> unbind -> wait -> retire_ring
1781              */
1782             list_for_each_entry_safe(obj_priv, tmp,
1783                                      &dev_priv->mm.deferred_free_list,
1784                                      list)
1785                     i915_gem_free_object_tail(&obj_priv->base);
1786         }
1787
1788         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1789         if (HAS_BSD(dev))
1790                 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1791 }
1792
1793 static void
1794 i915_gem_retire_work_handler(struct work_struct *work)
1795 {
1796         drm_i915_private_t *dev_priv;
1797         struct drm_device *dev;
1798
1799         dev_priv = container_of(work, drm_i915_private_t,
1800                                 mm.retire_work.work);
1801         dev = dev_priv->dev;
1802
1803         mutex_lock(&dev->struct_mutex);
1804         i915_gem_retire_requests(dev);
1805
1806         if (!dev_priv->mm.suspended &&
1807                 (!list_empty(&dev_priv->render_ring.request_list) ||
1808                         (HAS_BSD(dev) &&
1809                          !list_empty(&dev_priv->bsd_ring.request_list))))
1810                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1811         mutex_unlock(&dev->struct_mutex);
1812 }
1813
1814 int
1815 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1816                      bool interruptible, struct intel_ring_buffer *ring)
1817 {
1818         drm_i915_private_t *dev_priv = dev->dev_private;
1819         u32 ier;
1820         int ret = 0;
1821
1822         BUG_ON(seqno == 0);
1823
1824         if (seqno == dev_priv->next_seqno) {
1825                 seqno = i915_add_request(dev, NULL, NULL, ring);
1826                 if (seqno == 0)
1827                         return -ENOMEM;
1828         }
1829
1830         if (atomic_read(&dev_priv->mm.wedged))
1831                 return -EIO;
1832
1833         if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
1834                 if (HAS_PCH_SPLIT(dev))
1835                         ier = I915_READ(DEIER) | I915_READ(GTIER);
1836                 else
1837                         ier = I915_READ(IER);
1838                 if (!ier) {
1839                         DRM_ERROR("something (likely vbetool) disabled "
1840                                   "interrupts, re-enabling\n");
1841                         i915_driver_irq_preinstall(dev);
1842                         i915_driver_irq_postinstall(dev);
1843                 }
1844
1845                 trace_i915_gem_request_wait_begin(dev, seqno);
1846
1847                 ring->waiting_gem_seqno = seqno;
1848                 ring->user_irq_get(dev, ring);
1849                 if (interruptible)
1850                         ret = wait_event_interruptible(ring->irq_queue,
1851                                 i915_seqno_passed(
1852                                         ring->get_gem_seqno(dev, ring), seqno)
1853                                 || atomic_read(&dev_priv->mm.wedged));
1854                 else
1855                         wait_event(ring->irq_queue,
1856                                 i915_seqno_passed(
1857                                         ring->get_gem_seqno(dev, ring), seqno)
1858                                 || atomic_read(&dev_priv->mm.wedged));
1859
1860                 ring->user_irq_put(dev, ring);
1861                 ring->waiting_gem_seqno = 0;
1862
1863                 trace_i915_gem_request_wait_end(dev, seqno);
1864         }
1865         if (atomic_read(&dev_priv->mm.wedged))
1866                 ret = -EIO;
1867
1868         if (ret && ret != -ERESTARTSYS)
1869                 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
1870                           __func__, ret, seqno, ring->get_gem_seqno(dev, ring),
1871                           dev_priv->next_seqno);
1872
1873         /* Directly dispatch request retiring.  While we have the work queue
1874          * to handle this, the waiter on a request often wants an associated
1875          * buffer to have made it to the inactive list, and we would need
1876          * a separate wait queue to handle that.
1877          */
1878         if (ret == 0)
1879                 i915_gem_retire_requests_ring(dev, ring);
1880
1881         return ret;
1882 }
1883
1884 /**
1885  * Waits for a sequence number to be signaled, and cleans up the
1886  * request and object lists appropriately for that event.
1887  */
1888 static int
1889 i915_wait_request(struct drm_device *dev, uint32_t seqno,
1890                 struct intel_ring_buffer *ring)
1891 {
1892         return i915_do_wait_request(dev, seqno, 1, ring);
1893 }
1894
1895 static void
1896 i915_gem_flush(struct drm_device *dev,
1897                uint32_t invalidate_domains,
1898                uint32_t flush_domains)
1899 {
1900         drm_i915_private_t *dev_priv = dev->dev_private;
1901
1902         if (flush_domains & I915_GEM_DOMAIN_CPU)
1903                 drm_agp_chipset_flush(dev);
1904
1905         dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1906                         invalidate_domains,
1907                         flush_domains);
1908
1909         if (HAS_BSD(dev))
1910                 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1911                                 invalidate_domains,
1912                                 flush_domains);
1913 }
1914
1915 /**
1916  * Ensures that all rendering to the object has completed and the object is
1917  * safe to unbind from the GTT or access from the CPU.
1918  */
1919 static int
1920 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1921 {
1922         struct drm_device *dev = obj->dev;
1923         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1924         int ret;
1925
1926         /* This function only exists to support waiting for existing rendering,
1927          * not for emitting required flushes.
1928          */
1929         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1930
1931         /* If there is rendering queued on the buffer being evicted, wait for
1932          * it.
1933          */
1934         if (obj_priv->active) {
1935 #if WATCH_BUF
1936                 DRM_INFO("%s: object %p wait for seqno %08x\n",
1937                           __func__, obj, obj_priv->last_rendering_seqno);
1938 #endif
1939                 ret = i915_wait_request(dev,
1940                                         obj_priv->last_rendering_seqno,
1941                                         obj_priv->ring);
1942                 if (ret != 0)
1943                         return ret;
1944         }
1945
1946         return 0;
1947 }
1948
1949 /**
1950  * Unbinds an object from the GTT aperture.
1951  */
1952 int
1953 i915_gem_object_unbind(struct drm_gem_object *obj)
1954 {
1955         struct drm_device *dev = obj->dev;
1956         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1957         int ret = 0;
1958
1959 #if WATCH_BUF
1960         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1961         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1962 #endif
1963         if (obj_priv->gtt_space == NULL)
1964                 return 0;
1965
1966         if (obj_priv->pin_count != 0) {
1967                 DRM_ERROR("Attempting to unbind pinned buffer\n");
1968                 return -EINVAL;
1969         }
1970
1971         /* blow away mappings if mapped through GTT */
1972         i915_gem_release_mmap(obj);
1973
1974         /* Move the object to the CPU domain to ensure that
1975          * any possible CPU writes while it's not in the GTT
1976          * are flushed when we go to remap it. This will
1977          * also ensure that all pending GPU writes are finished
1978          * before we unbind.
1979          */
1980         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1981         if (ret == -ERESTARTSYS)
1982                 return ret;
1983         /* Continue on if we fail due to EIO, the GPU is hung so we
1984          * should be safe and we need to cleanup or else we might
1985          * cause memory corruption through use-after-free.
1986          */
1987
1988         /* release the fence reg _after_ flushing */
1989         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1990                 i915_gem_clear_fence_reg(obj);
1991
1992         if (obj_priv->agp_mem != NULL) {
1993                 drm_unbind_agp(obj_priv->agp_mem);
1994                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1995                 obj_priv->agp_mem = NULL;
1996         }
1997
1998         i915_gem_object_put_pages(obj);
1999         BUG_ON(obj_priv->pages_refcount);
2000
2001         if (obj_priv->gtt_space) {
2002                 atomic_dec(&dev->gtt_count);
2003                 atomic_sub(obj->size, &dev->gtt_memory);
2004
2005                 drm_mm_put_block(obj_priv->gtt_space);
2006                 obj_priv->gtt_space = NULL;
2007         }
2008
2009         /* Remove ourselves from the LRU list if present. */
2010         if (!list_empty(&obj_priv->list))
2011                 list_del_init(&obj_priv->list);
2012
2013         if (i915_gem_object_is_purgeable(obj_priv))
2014                 i915_gem_object_truncate(obj);
2015
2016         trace_i915_gem_object_unbind(obj);
2017
2018         return ret;
2019 }
2020
2021 int
2022 i915_gpu_idle(struct drm_device *dev)
2023 {
2024         drm_i915_private_t *dev_priv = dev->dev_private;
2025         bool lists_empty;
2026         int ret;
2027
2028         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2029                        list_empty(&dev_priv->render_ring.active_list) &&
2030                        (!HAS_BSD(dev) ||
2031                         list_empty(&dev_priv->bsd_ring.active_list)));
2032         if (lists_empty)
2033                 return 0;
2034
2035         /* Flush everything onto the inactive list. */
2036         i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2037
2038         ret = i915_wait_request(dev,
2039                                 i915_gem_next_request_seqno(dev, &dev_priv->render_ring),
2040                                 &dev_priv->render_ring);
2041         if (ret)
2042                 return ret;
2043
2044         if (HAS_BSD(dev)) {
2045                 ret = i915_wait_request(dev,
2046                                         i915_gem_next_request_seqno(dev, &dev_priv->bsd_ring),
2047                                         &dev_priv->bsd_ring);
2048                 if (ret)
2049                         return ret;
2050         }
2051
2052         return 0;
2053 }
2054
2055 int
2056 i915_gem_object_get_pages(struct drm_gem_object *obj,
2057                           gfp_t gfpmask)
2058 {
2059         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2060         int page_count, i;
2061         struct address_space *mapping;
2062         struct inode *inode;
2063         struct page *page;
2064
2065         BUG_ON(obj_priv->pages_refcount
2066                         == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2067
2068         if (obj_priv->pages_refcount++ != 0)
2069                 return 0;
2070
2071         /* Get the list of pages out of our struct file.  They'll be pinned
2072          * at this point until we release them.
2073          */
2074         page_count = obj->size / PAGE_SIZE;
2075         BUG_ON(obj_priv->pages != NULL);
2076         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2077         if (obj_priv->pages == NULL) {
2078                 obj_priv->pages_refcount--;
2079                 return -ENOMEM;
2080         }
2081
2082         inode = obj->filp->f_path.dentry->d_inode;
2083         mapping = inode->i_mapping;
2084         for (i = 0; i < page_count; i++) {
2085                 page = read_cache_page_gfp(mapping, i,
2086                                            GFP_HIGHUSER |
2087                                            __GFP_COLD |
2088                                            __GFP_RECLAIMABLE |
2089                                            gfpmask);
2090                 if (IS_ERR(page))
2091                         goto err_pages;
2092
2093                 obj_priv->pages[i] = page;
2094         }
2095
2096         if (obj_priv->tiling_mode != I915_TILING_NONE)
2097                 i915_gem_object_do_bit_17_swizzle(obj);
2098
2099         return 0;
2100
2101 err_pages:
2102         while (i--)
2103                 page_cache_release(obj_priv->pages[i]);
2104
2105         drm_free_large(obj_priv->pages);
2106         obj_priv->pages = NULL;
2107         obj_priv->pages_refcount--;
2108         return PTR_ERR(page);
2109 }
2110
2111 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2112 {
2113         struct drm_gem_object *obj = reg->obj;
2114         struct drm_device *dev = obj->dev;
2115         drm_i915_private_t *dev_priv = dev->dev_private;
2116         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2117         int regnum = obj_priv->fence_reg;
2118         uint64_t val;
2119
2120         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2121                     0xfffff000) << 32;
2122         val |= obj_priv->gtt_offset & 0xfffff000;
2123         val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2124                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2125
2126         if (obj_priv->tiling_mode == I915_TILING_Y)
2127                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2128         val |= I965_FENCE_REG_VALID;
2129
2130         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2131 }
2132
2133 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2134 {
2135         struct drm_gem_object *obj = reg->obj;
2136         struct drm_device *dev = obj->dev;
2137         drm_i915_private_t *dev_priv = dev->dev_private;
2138         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2139         int regnum = obj_priv->fence_reg;
2140         uint64_t val;
2141
2142         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2143                     0xfffff000) << 32;
2144         val |= obj_priv->gtt_offset & 0xfffff000;
2145         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2146         if (obj_priv->tiling_mode == I915_TILING_Y)
2147                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2148         val |= I965_FENCE_REG_VALID;
2149
2150         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2151 }
2152
2153 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2154 {
2155         struct drm_gem_object *obj = reg->obj;
2156         struct drm_device *dev = obj->dev;
2157         drm_i915_private_t *dev_priv = dev->dev_private;
2158         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2159         int regnum = obj_priv->fence_reg;
2160         int tile_width;
2161         uint32_t fence_reg, val;
2162         uint32_t pitch_val;
2163
2164         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2165             (obj_priv->gtt_offset & (obj->size - 1))) {
2166                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2167                      __func__, obj_priv->gtt_offset, obj->size);
2168                 return;
2169         }
2170
2171         if (obj_priv->tiling_mode == I915_TILING_Y &&
2172             HAS_128_BYTE_Y_TILING(dev))
2173                 tile_width = 128;
2174         else
2175                 tile_width = 512;
2176
2177         /* Note: pitch better be a power of two tile widths */
2178         pitch_val = obj_priv->stride / tile_width;
2179         pitch_val = ffs(pitch_val) - 1;
2180
2181         if (obj_priv->tiling_mode == I915_TILING_Y &&
2182             HAS_128_BYTE_Y_TILING(dev))
2183                 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2184         else
2185                 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2186
2187         val = obj_priv->gtt_offset;
2188         if (obj_priv->tiling_mode == I915_TILING_Y)
2189                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2190         val |= I915_FENCE_SIZE_BITS(obj->size);
2191         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2192         val |= I830_FENCE_REG_VALID;
2193
2194         if (regnum < 8)
2195                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2196         else
2197                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2198         I915_WRITE(fence_reg, val);
2199 }
2200
2201 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2202 {
2203         struct drm_gem_object *obj = reg->obj;
2204         struct drm_device *dev = obj->dev;
2205         drm_i915_private_t *dev_priv = dev->dev_private;
2206         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2207         int regnum = obj_priv->fence_reg;
2208         uint32_t val;
2209         uint32_t pitch_val;
2210         uint32_t fence_size_bits;
2211
2212         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2213             (obj_priv->gtt_offset & (obj->size - 1))) {
2214                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2215                      __func__, obj_priv->gtt_offset);
2216                 return;
2217         }
2218
2219         pitch_val = obj_priv->stride / 128;
2220         pitch_val = ffs(pitch_val) - 1;
2221         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2222
2223         val = obj_priv->gtt_offset;
2224         if (obj_priv->tiling_mode == I915_TILING_Y)
2225                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2226         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2227         WARN_ON(fence_size_bits & ~0x00000f00);
2228         val |= fence_size_bits;
2229         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2230         val |= I830_FENCE_REG_VALID;
2231
2232         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2233 }
2234
2235 static int i915_find_fence_reg(struct drm_device *dev)
2236 {
2237         struct drm_i915_fence_reg *reg = NULL;
2238         struct drm_i915_gem_object *obj_priv = NULL;
2239         struct drm_i915_private *dev_priv = dev->dev_private;
2240         struct drm_gem_object *obj = NULL;
2241         int i, avail, ret;
2242
2243         /* First try to find a free reg */
2244         avail = 0;
2245         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2246                 reg = &dev_priv->fence_regs[i];
2247                 if (!reg->obj)
2248                         return i;
2249
2250                 obj_priv = to_intel_bo(reg->obj);
2251                 if (!obj_priv->pin_count)
2252                     avail++;
2253         }
2254
2255         if (avail == 0)
2256                 return -ENOSPC;
2257
2258         /* None available, try to steal one or wait for a user to finish */
2259         i = I915_FENCE_REG_NONE;
2260         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2261                             lru_list) {
2262                 obj = reg->obj;
2263                 obj_priv = to_intel_bo(obj);
2264
2265                 if (obj_priv->pin_count)
2266                         continue;
2267
2268                 /* found one! */
2269                 i = obj_priv->fence_reg;
2270                 break;
2271         }
2272
2273         BUG_ON(i == I915_FENCE_REG_NONE);
2274
2275         /* We only have a reference on obj from the active list. put_fence_reg
2276          * might drop that one, causing a use-after-free in it. So hold a
2277          * private reference to obj like the other callers of put_fence_reg
2278          * (set_tiling ioctl) do. */
2279         drm_gem_object_reference(obj);
2280         ret = i915_gem_object_put_fence_reg(obj);
2281         drm_gem_object_unreference(obj);
2282         if (ret != 0)
2283                 return ret;
2284
2285         return i;
2286 }
2287
2288 /**
2289  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2290  * @obj: object to map through a fence reg
2291  *
2292  * When mapping objects through the GTT, userspace wants to be able to write
2293  * to them without having to worry about swizzling if the object is tiled.
2294  *
2295  * This function walks the fence regs looking for a free one for @obj,
2296  * stealing one if it can't find any.
2297  *
2298  * It then sets up the reg based on the object's properties: address, pitch
2299  * and tiling format.
2300  */
2301 int
2302 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2303 {
2304         struct drm_device *dev = obj->dev;
2305         struct drm_i915_private *dev_priv = dev->dev_private;
2306         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2307         struct drm_i915_fence_reg *reg = NULL;
2308         int ret;
2309
2310         /* Just update our place in the LRU if our fence is getting used. */
2311         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2312                 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2313                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2314                 return 0;
2315         }
2316
2317         switch (obj_priv->tiling_mode) {
2318         case I915_TILING_NONE:
2319                 WARN(1, "allocating a fence for non-tiled object?\n");
2320                 break;
2321         case I915_TILING_X:
2322                 if (!obj_priv->stride)
2323                         return -EINVAL;
2324                 WARN((obj_priv->stride & (512 - 1)),
2325                      "object 0x%08x is X tiled but has non-512B pitch\n",
2326                      obj_priv->gtt_offset);
2327                 break;
2328         case I915_TILING_Y:
2329                 if (!obj_priv->stride)
2330                         return -EINVAL;
2331                 WARN((obj_priv->stride & (128 - 1)),
2332                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2333                      obj_priv->gtt_offset);
2334                 break;
2335         }
2336
2337         ret = i915_find_fence_reg(dev);
2338         if (ret < 0)
2339                 return ret;
2340
2341         obj_priv->fence_reg = ret;
2342         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2343         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2344
2345         reg->obj = obj;
2346
2347         if (IS_GEN6(dev))
2348                 sandybridge_write_fence_reg(reg);
2349         else if (IS_I965G(dev))
2350                 i965_write_fence_reg(reg);
2351         else if (IS_I9XX(dev))
2352                 i915_write_fence_reg(reg);
2353         else
2354                 i830_write_fence_reg(reg);
2355
2356         trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2357                         obj_priv->tiling_mode);
2358
2359         return 0;
2360 }
2361
2362 /**
2363  * i915_gem_clear_fence_reg - clear out fence register info
2364  * @obj: object to clear
2365  *
2366  * Zeroes out the fence register itself and clears out the associated
2367  * data structures in dev_priv and obj_priv.
2368  */
2369 static void
2370 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2371 {
2372         struct drm_device *dev = obj->dev;
2373         drm_i915_private_t *dev_priv = dev->dev_private;
2374         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2375         struct drm_i915_fence_reg *reg =
2376                 &dev_priv->fence_regs[obj_priv->fence_reg];
2377
2378         if (IS_GEN6(dev)) {
2379                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2380                              (obj_priv->fence_reg * 8), 0);
2381         } else if (IS_I965G(dev)) {
2382                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2383         } else {
2384                 uint32_t fence_reg;
2385
2386                 if (obj_priv->fence_reg < 8)
2387                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2388                 else
2389                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2390                                                        8) * 4;
2391
2392                 I915_WRITE(fence_reg, 0);
2393         }
2394
2395         reg->obj = NULL;
2396         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2397         list_del_init(&reg->lru_list);
2398 }
2399
2400 /**
2401  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2402  * to the buffer to finish, and then resets the fence register.
2403  * @obj: tiled object holding a fence register.
2404  *
2405  * Zeroes out the fence register itself and clears out the associated
2406  * data structures in dev_priv and obj_priv.
2407  */
2408 int
2409 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2410 {
2411         struct drm_device *dev = obj->dev;
2412         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2413
2414         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2415                 return 0;
2416
2417         /* If we've changed tiling, GTT-mappings of the object
2418          * need to re-fault to ensure that the correct fence register
2419          * setup is in place.
2420          */
2421         i915_gem_release_mmap(obj);
2422
2423         /* On the i915, GPU access to tiled buffers is via a fence,
2424          * therefore we must wait for any outstanding access to complete
2425          * before clearing the fence.
2426          */
2427         if (!IS_I965G(dev)) {
2428                 int ret;
2429
2430                 ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2431                 if (ret != 0)
2432                         return ret;
2433         }
2434
2435         i915_gem_object_flush_gtt_write_domain(obj);
2436         i915_gem_clear_fence_reg (obj);
2437
2438         return 0;
2439 }
2440
2441 /**
2442  * Finds free space in the GTT aperture and binds the object there.
2443  */
2444 static int
2445 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2446 {
2447         struct drm_device *dev = obj->dev;
2448         drm_i915_private_t *dev_priv = dev->dev_private;
2449         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2450         struct drm_mm_node *free_space;
2451         gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2452         int ret;
2453
2454         if (obj_priv->madv != I915_MADV_WILLNEED) {
2455                 DRM_ERROR("Attempting to bind a purgeable object\n");
2456                 return -EINVAL;
2457         }
2458
2459         if (alignment == 0)
2460                 alignment = i915_gem_get_gtt_alignment(obj);
2461         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2462                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2463                 return -EINVAL;
2464         }
2465
2466         /* If the object is bigger than the entire aperture, reject it early
2467          * before evicting everything in a vain attempt to find space.
2468          */
2469         if (obj->size > dev->gtt_total) {
2470                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2471                 return -E2BIG;
2472         }
2473
2474  search_free:
2475         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2476                                         obj->size, alignment, 0);
2477         if (free_space != NULL) {
2478                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2479                                                        alignment);
2480                 if (obj_priv->gtt_space != NULL)
2481                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2482         }
2483         if (obj_priv->gtt_space == NULL) {
2484                 /* If the gtt is empty and we're still having trouble
2485                  * fitting our object in, we're out of memory.
2486                  */
2487 #if WATCH_LRU
2488                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2489 #endif
2490                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2491                 if (ret)
2492                         return ret;
2493
2494                 goto search_free;
2495         }
2496
2497 #if WATCH_BUF
2498         DRM_INFO("Binding object of size %zd at 0x%08x\n",
2499                  obj->size, obj_priv->gtt_offset);
2500 #endif
2501         ret = i915_gem_object_get_pages(obj, gfpmask);
2502         if (ret) {
2503                 drm_mm_put_block(obj_priv->gtt_space);
2504                 obj_priv->gtt_space = NULL;
2505
2506                 if (ret == -ENOMEM) {
2507                         /* first try to clear up some space from the GTT */
2508                         ret = i915_gem_evict_something(dev, obj->size,
2509                                                        alignment);
2510                         if (ret) {
2511                                 /* now try to shrink everyone else */
2512                                 if (gfpmask) {
2513                                         gfpmask = 0;
2514                                         goto search_free;
2515                                 }
2516
2517                                 return ret;
2518                         }
2519
2520                         goto search_free;
2521                 }
2522
2523                 return ret;
2524         }
2525
2526         /* Create an AGP memory structure pointing at our pages, and bind it
2527          * into the GTT.
2528          */
2529         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2530                                                obj_priv->pages,
2531                                                obj->size >> PAGE_SHIFT,
2532                                                obj_priv->gtt_offset,
2533                                                obj_priv->agp_type);
2534         if (obj_priv->agp_mem == NULL) {
2535                 i915_gem_object_put_pages(obj);
2536                 drm_mm_put_block(obj_priv->gtt_space);
2537                 obj_priv->gtt_space = NULL;
2538
2539                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2540                 if (ret)
2541                         return ret;
2542
2543                 goto search_free;
2544         }
2545         atomic_inc(&dev->gtt_count);
2546         atomic_add(obj->size, &dev->gtt_memory);
2547
2548         /* keep track of bounds object by adding it to the inactive list */
2549         list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2550
2551         /* Assert that the object is not currently in any GPU domain. As it
2552          * wasn't in the GTT, there shouldn't be any way it could have been in
2553          * a GPU cache
2554          */
2555         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2556         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2557
2558         trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2559
2560         return 0;
2561 }
2562
2563 void
2564 i915_gem_clflush_object(struct drm_gem_object *obj)
2565 {
2566         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2567
2568         /* If we don't have a page list set up, then we're not pinned
2569          * to GPU, and we can ignore the cache flush because it'll happen
2570          * again at bind time.
2571          */
2572         if (obj_priv->pages == NULL)
2573                 return;
2574
2575         trace_i915_gem_object_clflush(obj);
2576
2577         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2578 }
2579
2580 /** Flushes any GPU write domain for the object if it's dirty. */
2581 static int
2582 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
2583                                        bool pipelined)
2584 {
2585         struct drm_device *dev = obj->dev;
2586         uint32_t old_write_domain;
2587
2588         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2589                 return 0;
2590
2591         /* Queue the GPU write cache flushing we need. */
2592         old_write_domain = obj->write_domain;
2593         i915_gem_flush(dev, 0, obj->write_domain);
2594
2595         trace_i915_gem_object_change_domain(obj,
2596                                             obj->read_domains,
2597                                             old_write_domain);
2598
2599         if (pipelined)
2600                 return 0;
2601
2602         return i915_gem_object_wait_rendering(obj);
2603 }
2604
2605 /** Flushes the GTT write domain for the object if it's dirty. */
2606 static void
2607 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2608 {
2609         uint32_t old_write_domain;
2610
2611         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2612                 return;
2613
2614         /* No actual flushing is required for the GTT write domain.   Writes
2615          * to it immediately go to main memory as far as we know, so there's
2616          * no chipset flush.  It also doesn't land in render cache.
2617          */
2618         old_write_domain = obj->write_domain;
2619         obj->write_domain = 0;
2620
2621         trace_i915_gem_object_change_domain(obj,
2622                                             obj->read_domains,
2623                                             old_write_domain);
2624 }
2625
2626 /** Flushes the CPU write domain for the object if it's dirty. */
2627 static void
2628 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2629 {
2630         struct drm_device *dev = obj->dev;
2631         uint32_t old_write_domain;
2632
2633         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2634                 return;
2635
2636         i915_gem_clflush_object(obj);
2637         drm_agp_chipset_flush(dev);
2638         old_write_domain = obj->write_domain;
2639         obj->write_domain = 0;
2640
2641         trace_i915_gem_object_change_domain(obj,
2642                                             obj->read_domains,
2643                                             old_write_domain);
2644 }
2645
2646 int
2647 i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2648 {
2649         int ret = 0;
2650
2651         switch (obj->write_domain) {
2652         case I915_GEM_DOMAIN_GTT:
2653                 i915_gem_object_flush_gtt_write_domain(obj);
2654                 break;
2655         case I915_GEM_DOMAIN_CPU:
2656                 i915_gem_object_flush_cpu_write_domain(obj);
2657                 break;
2658         default:
2659                 ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2660                 break;
2661         }
2662
2663         return ret;
2664 }
2665
2666 /**
2667  * Moves a single object to the GTT read, and possibly write domain.
2668  *
2669  * This function returns when the move is complete, including waiting on
2670  * flushes to occur.
2671  */
2672 int
2673 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2674 {
2675         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2676         uint32_t old_write_domain, old_read_domains;
2677         int ret;
2678
2679         /* Not valid to be called on unbound objects. */
2680         if (obj_priv->gtt_space == NULL)
2681                 return -EINVAL;
2682
2683         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2684         if (ret != 0)
2685                 return ret;
2686
2687         old_write_domain = obj->write_domain;
2688         old_read_domains = obj->read_domains;
2689
2690         /* If we're writing through the GTT domain, then CPU and GPU caches
2691          * will need to be invalidated at next use.
2692          */
2693         if (write) {
2694                 ret = i915_gem_object_wait_rendering(obj);
2695                 if (ret)
2696                         return ret;
2697
2698                 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2699         }
2700
2701         i915_gem_object_flush_cpu_write_domain(obj);
2702
2703         /* It should now be out of any other write domains, and we can update
2704          * the domain values for our changes.
2705          */
2706         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2707         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2708         if (write) {
2709                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2710                 obj_priv->dirty = 1;
2711         }
2712
2713         trace_i915_gem_object_change_domain(obj,
2714                                             old_read_domains,
2715                                             old_write_domain);
2716
2717         return 0;
2718 }
2719
2720 /*
2721  * Prepare buffer for display plane. Use uninterruptible for possible flush
2722  * wait, as in modesetting process we're not supposed to be interrupted.
2723  */
2724 int
2725 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2726 {
2727         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2728         uint32_t old_read_domains;
2729         int ret;
2730
2731         /* Not valid to be called on unbound objects. */
2732         if (obj_priv->gtt_space == NULL)
2733                 return -EINVAL;
2734
2735         ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2736         if (ret != 0)
2737                 return ret;
2738
2739         i915_gem_object_flush_cpu_write_domain(obj);
2740
2741         old_read_domains = obj->read_domains;
2742         obj->read_domains = I915_GEM_DOMAIN_GTT;
2743
2744         trace_i915_gem_object_change_domain(obj,
2745                                             old_read_domains,
2746                                             obj->write_domain);
2747
2748         return 0;
2749 }
2750
2751 /**
2752  * Moves a single object to the CPU read, and possibly write domain.
2753  *
2754  * This function returns when the move is complete, including waiting on
2755  * flushes to occur.
2756  */
2757 static int
2758 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2759 {
2760         uint32_t old_write_domain, old_read_domains;
2761         int ret;
2762
2763         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2764         if (ret != 0)
2765                 return ret;
2766
2767         i915_gem_object_flush_gtt_write_domain(obj);
2768
2769         /* If we have a partially-valid cache of the object in the CPU,
2770          * finish invalidating it and free the per-page flags.
2771          */
2772         i915_gem_object_set_to_full_cpu_read_domain(obj);
2773
2774         old_write_domain = obj->write_domain;
2775         old_read_domains = obj->read_domains;
2776
2777         /* Flush the CPU cache if it's still invalid. */
2778         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2779                 i915_gem_clflush_object(obj);
2780
2781                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2782         }
2783
2784         /* It should now be out of any other write domains, and we can update
2785          * the domain values for our changes.
2786          */
2787         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2788
2789         /* If we're writing through the CPU, then the GPU read domains will
2790          * need to be invalidated at next use.
2791          */
2792         if (write) {
2793                 ret = i915_gem_object_wait_rendering(obj);
2794                 if (ret)
2795                         return ret;
2796
2797                 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2798                 obj->write_domain = I915_GEM_DOMAIN_CPU;
2799         }
2800
2801         trace_i915_gem_object_change_domain(obj,
2802                                             old_read_domains,
2803                                             old_write_domain);
2804
2805         return 0;
2806 }
2807
2808 /*
2809  * Set the next domain for the specified object. This
2810  * may not actually perform the necessary flushing/invaliding though,
2811  * as that may want to be batched with other set_domain operations
2812  *
2813  * This is (we hope) the only really tricky part of gem. The goal
2814  * is fairly simple -- track which caches hold bits of the object
2815  * and make sure they remain coherent. A few concrete examples may
2816  * help to explain how it works. For shorthand, we use the notation
2817  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2818  * a pair of read and write domain masks.
2819  *
2820  * Case 1: the batch buffer
2821  *
2822  *      1. Allocated
2823  *      2. Written by CPU
2824  *      3. Mapped to GTT
2825  *      4. Read by GPU
2826  *      5. Unmapped from GTT
2827  *      6. Freed
2828  *
2829  *      Let's take these a step at a time
2830  *
2831  *      1. Allocated
2832  *              Pages allocated from the kernel may still have
2833  *              cache contents, so we set them to (CPU, CPU) always.
2834  *      2. Written by CPU (using pwrite)
2835  *              The pwrite function calls set_domain (CPU, CPU) and
2836  *              this function does nothing (as nothing changes)
2837  *      3. Mapped by GTT
2838  *              This function asserts that the object is not
2839  *              currently in any GPU-based read or write domains
2840  *      4. Read by GPU
2841  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
2842  *              As write_domain is zero, this function adds in the
2843  *              current read domains (CPU+COMMAND, 0).
2844  *              flush_domains is set to CPU.
2845  *              invalidate_domains is set to COMMAND
2846  *              clflush is run to get data out of the CPU caches
2847  *              then i915_dev_set_domain calls i915_gem_flush to
2848  *              emit an MI_FLUSH and drm_agp_chipset_flush
2849  *      5. Unmapped from GTT
2850  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
2851  *              flush_domains and invalidate_domains end up both zero
2852  *              so no flushing/invalidating happens
2853  *      6. Freed
2854  *              yay, done
2855  *
2856  * Case 2: The shared render buffer
2857  *
2858  *      1. Allocated
2859  *      2. Mapped to GTT
2860  *      3. Read/written by GPU
2861  *      4. set_domain to (CPU,CPU)
2862  *      5. Read/written by CPU
2863  *      6. Read/written by GPU
2864  *
2865  *      1. Allocated
2866  *              Same as last example, (CPU, CPU)
2867  *      2. Mapped to GTT
2868  *              Nothing changes (assertions find that it is not in the GPU)
2869  *      3. Read/written by GPU
2870  *              execbuffer calls set_domain (RENDER, RENDER)
2871  *              flush_domains gets CPU
2872  *              invalidate_domains gets GPU
2873  *              clflush (obj)
2874  *              MI_FLUSH and drm_agp_chipset_flush
2875  *      4. set_domain (CPU, CPU)
2876  *              flush_domains gets GPU
2877  *              invalidate_domains gets CPU
2878  *              wait_rendering (obj) to make sure all drawing is complete.
2879  *              This will include an MI_FLUSH to get the data from GPU
2880  *              to memory
2881  *              clflush (obj) to invalidate the CPU cache
2882  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2883  *      5. Read/written by CPU
2884  *              cache lines are loaded and dirtied
2885  *      6. Read written by GPU
2886  *              Same as last GPU access
2887  *
2888  * Case 3: The constant buffer
2889  *
2890  *      1. Allocated
2891  *      2. Written by CPU
2892  *      3. Read by GPU
2893  *      4. Updated (written) by CPU again
2894  *      5. Read by GPU
2895  *
2896  *      1. Allocated
2897  *              (CPU, CPU)
2898  *      2. Written by CPU
2899  *              (CPU, CPU)
2900  *      3. Read by GPU
2901  *              (CPU+RENDER, 0)
2902  *              flush_domains = CPU
2903  *              invalidate_domains = RENDER
2904  *              clflush (obj)
2905  *              MI_FLUSH
2906  *              drm_agp_chipset_flush
2907  *      4. Updated (written) by CPU again
2908  *              (CPU, CPU)
2909  *              flush_domains = 0 (no previous write domain)
2910  *              invalidate_domains = 0 (no new read domains)
2911  *      5. Read by GPU
2912  *              (CPU+RENDER, 0)
2913  *              flush_domains = CPU
2914  *              invalidate_domains = RENDER
2915  *              clflush (obj)
2916  *              MI_FLUSH
2917  *              drm_agp_chipset_flush
2918  */
2919 static void
2920 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2921 {
2922         struct drm_device               *dev = obj->dev;
2923         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2924         uint32_t                        invalidate_domains = 0;
2925         uint32_t                        flush_domains = 0;
2926         uint32_t                        old_read_domains;
2927
2928         BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2929         BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2930
2931         intel_mark_busy(dev, obj);
2932
2933 #if WATCH_BUF
2934         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2935                  __func__, obj,
2936                  obj->read_domains, obj->pending_read_domains,
2937                  obj->write_domain, obj->pending_write_domain);
2938 #endif
2939         /*
2940          * If the object isn't moving to a new write domain,
2941          * let the object stay in multiple read domains
2942          */
2943         if (obj->pending_write_domain == 0)
2944                 obj->pending_read_domains |= obj->read_domains;
2945         else
2946                 obj_priv->dirty = 1;
2947
2948         /*
2949          * Flush the current write domain if
2950          * the new read domains don't match. Invalidate
2951          * any read domains which differ from the old
2952          * write domain
2953          */
2954         if (obj->write_domain &&
2955             obj->write_domain != obj->pending_read_domains) {
2956                 flush_domains |= obj->write_domain;
2957                 invalidate_domains |=
2958                         obj->pending_read_domains & ~obj->write_domain;
2959         }
2960         /*
2961          * Invalidate any read caches which may have
2962          * stale data. That is, any new read domains.
2963          */
2964         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
2965         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2966 #if WATCH_BUF
2967                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2968                          __func__, flush_domains, invalidate_domains);
2969 #endif
2970                 i915_gem_clflush_object(obj);
2971         }
2972
2973         old_read_domains = obj->read_domains;
2974
2975         /* The actual obj->write_domain will be updated with
2976          * pending_write_domain after we emit the accumulated flush for all
2977          * of our domain changes in execbuffers (which clears objects'
2978          * write_domains).  So if we have a current write domain that we
2979          * aren't changing, set pending_write_domain to that.
2980          */
2981         if (flush_domains == 0 && obj->pending_write_domain == 0)
2982                 obj->pending_write_domain = obj->write_domain;
2983         obj->read_domains = obj->pending_read_domains;
2984
2985         dev->invalidate_domains |= invalidate_domains;
2986         dev->flush_domains |= flush_domains;
2987 #if WATCH_BUF
2988         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
2989                  __func__,
2990                  obj->read_domains, obj->write_domain,
2991                  dev->invalidate_domains, dev->flush_domains);
2992 #endif
2993
2994         trace_i915_gem_object_change_domain(obj,
2995                                             old_read_domains,
2996                                             obj->write_domain);
2997 }
2998
2999 /**
3000  * Moves the object from a partially CPU read to a full one.
3001  *
3002  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3003  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3004  */
3005 static void
3006 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3007 {
3008         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3009
3010         if (!obj_priv->page_cpu_valid)
3011                 return;
3012
3013         /* If we're partially in the CPU read domain, finish moving it in.
3014          */
3015         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3016                 int i;
3017
3018                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3019                         if (obj_priv->page_cpu_valid[i])
3020                                 continue;
3021                         drm_clflush_pages(obj_priv->pages + i, 1);
3022                 }
3023         }
3024
3025         /* Free the page_cpu_valid mappings which are now stale, whether
3026          * or not we've got I915_GEM_DOMAIN_CPU.
3027          */
3028         kfree(obj_priv->page_cpu_valid);
3029         obj_priv->page_cpu_valid = NULL;
3030 }
3031
3032 /**
3033  * Set the CPU read domain on a range of the object.
3034  *
3035  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3036  * not entirely valid.  The page_cpu_valid member of the object flags which
3037  * pages have been flushed, and will be respected by
3038  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3039  * of the whole object.
3040  *
3041  * This function returns when the move is complete, including waiting on
3042  * flushes to occur.
3043  */
3044 static int
3045 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3046                                           uint64_t offset, uint64_t size)
3047 {
3048         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3049         uint32_t old_read_domains;
3050         int i, ret;
3051
3052         if (offset == 0 && size == obj->size)
3053                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3054
3055         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3056         if (ret != 0)
3057                 return ret;
3058         i915_gem_object_flush_gtt_write_domain(obj);
3059
3060         /* If we're already fully in the CPU read domain, we're done. */
3061         if (obj_priv->page_cpu_valid == NULL &&
3062             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3063                 return 0;
3064
3065         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3066          * newly adding I915_GEM_DOMAIN_CPU
3067          */
3068         if (obj_priv->page_cpu_valid == NULL) {
3069                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3070                                                    GFP_KERNEL);
3071                 if (obj_priv->page_cpu_valid == NULL)
3072                         return -ENOMEM;
3073         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3074                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3075
3076         /* Flush the cache on any pages that are still invalid from the CPU's
3077          * perspective.
3078          */
3079         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3080              i++) {
3081                 if (obj_priv->page_cpu_valid[i])
3082                         continue;
3083
3084                 drm_clflush_pages(obj_priv->pages + i, 1);
3085
3086                 obj_priv->page_cpu_valid[i] = 1;
3087         }
3088
3089         /* It should now be out of any other write domains, and we can update
3090          * the domain values for our changes.
3091          */
3092         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3093
3094         old_read_domains = obj->read_domains;
3095         obj->read_domains |= I915_GEM_DOMAIN_CPU;
3096
3097         trace_i915_gem_object_change_domain(obj,
3098                                             old_read_domains,
3099                                             obj->write_domain);
3100
3101         return 0;
3102 }
3103
3104 /**
3105  * Pin an object to the GTT and evaluate the relocations landing in it.
3106  */
3107 static int
3108 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3109                                  struct drm_file *file_priv,
3110                                  struct drm_i915_gem_exec_object2 *entry,
3111                                  struct drm_i915_gem_relocation_entry *relocs)
3112 {
3113         struct drm_device *dev = obj->dev;
3114         drm_i915_private_t *dev_priv = dev->dev_private;
3115         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3116         int i, ret;
3117         void __iomem *reloc_page;
3118         bool need_fence;
3119
3120         need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3121                      obj_priv->tiling_mode != I915_TILING_NONE;
3122
3123         /* Check fence reg constraints and rebind if necessary */
3124         if (need_fence &&
3125             !i915_gem_object_fence_offset_ok(obj,
3126                                              obj_priv->tiling_mode)) {
3127                 ret = i915_gem_object_unbind(obj);
3128                 if (ret)
3129                         return ret;
3130         }
3131
3132         /* Choose the GTT offset for our buffer and put it there. */
3133         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3134         if (ret)
3135                 return ret;
3136
3137         /*
3138          * Pre-965 chips need a fence register set up in order to
3139          * properly handle blits to/from tiled surfaces.
3140          */
3141         if (need_fence) {
3142                 ret = i915_gem_object_get_fence_reg(obj);
3143                 if (ret != 0) {
3144                         i915_gem_object_unpin(obj);
3145                         return ret;
3146                 }
3147         }
3148
3149         entry->offset = obj_priv->gtt_offset;
3150
3151         /* Apply the relocations, using the GTT aperture to avoid cache
3152          * flushing requirements.
3153          */
3154         for (i = 0; i < entry->relocation_count; i++) {
3155                 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3156                 struct drm_gem_object *target_obj;
3157                 struct drm_i915_gem_object *target_obj_priv;
3158                 uint32_t reloc_val, reloc_offset;
3159                 uint32_t __iomem *reloc_entry;
3160
3161                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3162                                                    reloc->target_handle);
3163                 if (target_obj == NULL) {
3164                         i915_gem_object_unpin(obj);
3165                         return -ENOENT;
3166                 }
3167                 target_obj_priv = to_intel_bo(target_obj);
3168
3169 #if WATCH_RELOC
3170                 DRM_INFO("%s: obj %p offset %08x target %d "
3171                          "read %08x write %08x gtt %08x "
3172                          "presumed %08x delta %08x\n",
3173                          __func__,
3174                          obj,
3175                          (int) reloc->offset,
3176                          (int) reloc->target_handle,
3177                          (int) reloc->read_domains,
3178                          (int) reloc->write_domain,
3179                          (int) target_obj_priv->gtt_offset,
3180                          (int) reloc->presumed_offset,
3181                          reloc->delta);
3182 #endif
3183
3184                 /* The target buffer should have appeared before us in the
3185                  * exec_object list, so it should have a GTT space bound by now.
3186                  */
3187                 if (target_obj_priv->gtt_space == NULL) {
3188                         DRM_ERROR("No GTT space found for object %d\n",
3189                                   reloc->target_handle);
3190                         drm_gem_object_unreference(target_obj);
3191                         i915_gem_object_unpin(obj);
3192                         return -EINVAL;
3193                 }
3194
3195                 /* Validate that the target is in a valid r/w GPU domain */
3196                 if (reloc->write_domain & (reloc->write_domain - 1)) {
3197                         DRM_ERROR("reloc with multiple write domains: "
3198                                   "obj %p target %d offset %d "
3199                                   "read %08x write %08x",
3200                                   obj, reloc->target_handle,
3201                                   (int) reloc->offset,
3202                                   reloc->read_domains,
3203                                   reloc->write_domain);
3204                         return -EINVAL;
3205                 }
3206                 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3207                     reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3208                         DRM_ERROR("reloc with read/write CPU domains: "
3209                                   "obj %p target %d offset %d "
3210                                   "read %08x write %08x",
3211                                   obj, reloc->target_handle,
3212                                   (int) reloc->offset,
3213                                   reloc->read_domains,
3214                                   reloc->write_domain);
3215                         drm_gem_object_unreference(target_obj);
3216                         i915_gem_object_unpin(obj);
3217                         return -EINVAL;
3218                 }
3219                 if (reloc->write_domain && target_obj->pending_write_domain &&
3220                     reloc->write_domain != target_obj->pending_write_domain) {
3221                         DRM_ERROR("Write domain conflict: "
3222                                   "obj %p target %d offset %d "
3223                                   "new %08x old %08x\n",
3224                                   obj, reloc->target_handle,
3225                                   (int) reloc->offset,
3226                                   reloc->write_domain,
3227                                   target_obj->pending_write_domain);
3228                         drm_gem_object_unreference(target_obj);
3229                         i915_gem_object_unpin(obj);
3230                         return -EINVAL;
3231                 }
3232
3233                 target_obj->pending_read_domains |= reloc->read_domains;
3234                 target_obj->pending_write_domain |= reloc->write_domain;
3235
3236                 /* If the relocation already has the right value in it, no
3237                  * more work needs to be done.
3238                  */
3239                 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3240                         drm_gem_object_unreference(target_obj);
3241                         continue;
3242                 }
3243
3244                 /* Check that the relocation address is valid... */
3245                 if (reloc->offset > obj->size - 4) {
3246                         DRM_ERROR("Relocation beyond object bounds: "
3247                                   "obj %p target %d offset %d size %d.\n",
3248                                   obj, reloc->target_handle,
3249                                   (int) reloc->offset, (int) obj->size);
3250                         drm_gem_object_unreference(target_obj);
3251                         i915_gem_object_unpin(obj);
3252                         return -EINVAL;
3253                 }
3254                 if (reloc->offset & 3) {
3255                         DRM_ERROR("Relocation not 4-byte aligned: "
3256                                   "obj %p target %d offset %d.\n",
3257                                   obj, reloc->target_handle,
3258                                   (int) reloc->offset);
3259                         drm_gem_object_unreference(target_obj);
3260                         i915_gem_object_unpin(obj);
3261                         return -EINVAL;
3262                 }
3263
3264                 /* and points to somewhere within the target object. */
3265                 if (reloc->delta >= target_obj->size) {
3266                         DRM_ERROR("Relocation beyond target object bounds: "
3267                                   "obj %p target %d delta %d size %d.\n",
3268                                   obj, reloc->target_handle,
3269                                   (int) reloc->delta, (int) target_obj->size);
3270                         drm_gem_object_unreference(target_obj);
3271                         i915_gem_object_unpin(obj);
3272                         return -EINVAL;
3273                 }
3274
3275                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3276                 if (ret != 0) {
3277                         drm_gem_object_unreference(target_obj);
3278                         i915_gem_object_unpin(obj);
3279                         return -EINVAL;
3280                 }
3281
3282                 /* Map the page containing the relocation we're going to
3283                  * perform.
3284                  */
3285                 reloc_offset = obj_priv->gtt_offset + reloc->offset;
3286                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3287                                                       (reloc_offset &
3288                                                        ~(PAGE_SIZE - 1)),
3289                                                       KM_USER0);
3290                 reloc_entry = (uint32_t __iomem *)(reloc_page +
3291                                                    (reloc_offset & (PAGE_SIZE - 1)));
3292                 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3293
3294 #if WATCH_BUF
3295                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3296                           obj, (unsigned int) reloc->offset,
3297                           readl(reloc_entry), reloc_val);
3298 #endif
3299                 writel(reloc_val, reloc_entry);
3300                 io_mapping_unmap_atomic(reloc_page, KM_USER0);
3301
3302                 /* The updated presumed offset for this entry will be
3303                  * copied back out to the user.
3304                  */
3305                 reloc->presumed_offset = target_obj_priv->gtt_offset;
3306
3307                 drm_gem_object_unreference(target_obj);
3308         }
3309
3310 #if WATCH_BUF
3311         if (0)
3312                 i915_gem_dump_object(obj, 128, __func__, ~0);
3313 #endif
3314         return 0;
3315 }
3316
3317 /* Throttle our rendering by waiting until the ring has completed our requests
3318  * emitted over 20 msec ago.
3319  *
3320  * Note that if we were to use the current jiffies each time around the loop,
3321  * we wouldn't escape the function with any frames outstanding if the time to
3322  * render a frame was over 20ms.
3323  *
3324  * This should get us reasonable parallelism between CPU and GPU but also
3325  * relatively low latency when blocking on a particular request to finish.
3326  */
3327 static int
3328 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3329 {
3330         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3331         int ret = 0;
3332         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3333
3334         mutex_lock(&dev->struct_mutex);
3335         while (!list_empty(&i915_file_priv->mm.request_list)) {
3336                 struct drm_i915_gem_request *request;
3337
3338                 request = list_first_entry(&i915_file_priv->mm.request_list,
3339                                            struct drm_i915_gem_request,
3340                                            client_list);
3341
3342                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3343                         break;
3344
3345                 ret = i915_wait_request(dev, request->seqno, request->ring);
3346                 if (ret != 0)
3347                         break;
3348         }
3349         mutex_unlock(&dev->struct_mutex);
3350
3351         return ret;
3352 }
3353
3354 static int
3355 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
3356                               uint32_t buffer_count,
3357                               struct drm_i915_gem_relocation_entry **relocs)
3358 {
3359         uint32_t reloc_count = 0, reloc_index = 0, i;
3360         int ret;
3361
3362         *relocs = NULL;
3363         for (i = 0; i < buffer_count; i++) {
3364                 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3365                         return -EINVAL;
3366                 reloc_count += exec_list[i].relocation_count;
3367         }
3368
3369         *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3370         if (*relocs == NULL) {
3371                 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3372                 return -ENOMEM;
3373         }
3374
3375         for (i = 0; i < buffer_count; i++) {
3376                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3377
3378                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3379
3380                 ret = copy_from_user(&(*relocs)[reloc_index],
3381                                      user_relocs,
3382                                      exec_list[i].relocation_count *
3383                                      sizeof(**relocs));
3384                 if (ret != 0) {
3385                         drm_free_large(*relocs);
3386                         *relocs = NULL;
3387                         return -EFAULT;
3388                 }
3389
3390                 reloc_index += exec_list[i].relocation_count;
3391         }
3392
3393         return 0;
3394 }
3395
3396 static int
3397 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
3398                             uint32_t buffer_count,
3399                             struct drm_i915_gem_relocation_entry *relocs)
3400 {
3401         uint32_t reloc_count = 0, i;
3402         int ret = 0;
3403
3404         if (relocs == NULL)
3405             return 0;
3406
3407         for (i = 0; i < buffer_count; i++) {
3408                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3409                 int unwritten;
3410
3411                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3412
3413                 unwritten = copy_to_user(user_relocs,
3414                                          &relocs[reloc_count],
3415                                          exec_list[i].relocation_count *
3416                                          sizeof(*relocs));
3417
3418                 if (unwritten) {
3419                         ret = -EFAULT;
3420                         goto err;
3421                 }
3422
3423                 reloc_count += exec_list[i].relocation_count;
3424         }
3425
3426 err:
3427         drm_free_large(relocs);
3428
3429         return ret;
3430 }
3431
3432 static int
3433 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3434                            uint64_t exec_offset)
3435 {
3436         uint32_t exec_start, exec_len;
3437
3438         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3439         exec_len = (uint32_t) exec->batch_len;
3440
3441         if ((exec_start | exec_len) & 0x7)
3442                 return -EINVAL;
3443
3444         if (!exec_start)
3445                 return -EINVAL;
3446
3447         return 0;
3448 }
3449
3450 static int
3451 i915_gem_wait_for_pending_flip(struct drm_device *dev,
3452                                struct drm_gem_object **object_list,
3453                                int count)
3454 {
3455         drm_i915_private_t *dev_priv = dev->dev_private;
3456         struct drm_i915_gem_object *obj_priv;
3457         DEFINE_WAIT(wait);
3458         int i, ret = 0;
3459
3460         for (;;) {
3461                 prepare_to_wait(&dev_priv->pending_flip_queue,
3462                                 &wait, TASK_INTERRUPTIBLE);
3463                 for (i = 0; i < count; i++) {
3464                         obj_priv = to_intel_bo(object_list[i]);
3465                         if (atomic_read(&obj_priv->pending_flip) > 0)
3466                                 break;
3467                 }
3468                 if (i == count)
3469                         break;
3470
3471                 if (!signal_pending(current)) {
3472                         mutex_unlock(&dev->struct_mutex);
3473                         schedule();
3474                         mutex_lock(&dev->struct_mutex);
3475                         continue;
3476                 }
3477                 ret = -ERESTARTSYS;
3478                 break;
3479         }
3480         finish_wait(&dev_priv->pending_flip_queue, &wait);
3481
3482         return ret;
3483 }
3484
3485 static int
3486 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3487                        struct drm_file *file_priv,
3488                        struct drm_i915_gem_execbuffer2 *args,
3489                        struct drm_i915_gem_exec_object2 *exec_list)
3490 {
3491         drm_i915_private_t *dev_priv = dev->dev_private;
3492         struct drm_gem_object **object_list = NULL;
3493         struct drm_gem_object *batch_obj;
3494         struct drm_i915_gem_object *obj_priv;
3495         struct drm_clip_rect *cliprects = NULL;
3496         struct drm_i915_gem_relocation_entry *relocs = NULL;
3497         struct drm_i915_gem_request *request = NULL;
3498         int ret = 0, ret2, i, pinned = 0;
3499         uint64_t exec_offset;
3500         uint32_t seqno, reloc_index;
3501         int pin_tries, flips;
3502
3503         struct intel_ring_buffer *ring = NULL;
3504
3505 #if WATCH_EXEC
3506         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3507                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3508 #endif
3509         if (args->flags & I915_EXEC_BSD) {
3510                 if (!HAS_BSD(dev)) {
3511                         DRM_ERROR("execbuf with wrong flag\n");
3512                         return -EINVAL;
3513                 }
3514                 ring = &dev_priv->bsd_ring;
3515         } else {
3516                 ring = &dev_priv->render_ring;
3517         }
3518
3519         if (args->buffer_count < 1) {
3520                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3521                 return -EINVAL;
3522         }
3523         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3524         if (object_list == NULL) {
3525                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3526                           args->buffer_count);
3527                 ret = -ENOMEM;
3528                 goto pre_mutex_err;
3529         }
3530
3531         if (args->num_cliprects != 0) {
3532                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3533                                     GFP_KERNEL);
3534                 if (cliprects == NULL) {
3535                         ret = -ENOMEM;
3536                         goto pre_mutex_err;
3537                 }
3538
3539                 ret = copy_from_user(cliprects,
3540                                      (struct drm_clip_rect __user *)
3541                                      (uintptr_t) args->cliprects_ptr,
3542                                      sizeof(*cliprects) * args->num_cliprects);
3543                 if (ret != 0) {
3544                         DRM_ERROR("copy %d cliprects failed: %d\n",
3545                                   args->num_cliprects, ret);
3546                         ret = -EFAULT;
3547                         goto pre_mutex_err;
3548                 }
3549         }
3550
3551         request = kzalloc(sizeof(*request), GFP_KERNEL);
3552         if (request == NULL) {
3553                 ret = -ENOMEM;
3554                 goto pre_mutex_err;
3555         }
3556
3557         ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3558                                             &relocs);
3559         if (ret != 0)
3560                 goto pre_mutex_err;
3561
3562         mutex_lock(&dev->struct_mutex);
3563
3564         i915_verify_inactive(dev, __FILE__, __LINE__);
3565
3566         if (atomic_read(&dev_priv->mm.wedged)) {
3567                 mutex_unlock(&dev->struct_mutex);
3568                 ret = -EIO;
3569                 goto pre_mutex_err;
3570         }
3571
3572         if (dev_priv->mm.suspended) {
3573                 mutex_unlock(&dev->struct_mutex);
3574                 ret = -EBUSY;
3575                 goto pre_mutex_err;
3576         }
3577
3578         /* Look up object handles */
3579         flips = 0;
3580         for (i = 0; i < args->buffer_count; i++) {
3581                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3582                                                        exec_list[i].handle);
3583                 if (object_list[i] == NULL) {
3584                         DRM_ERROR("Invalid object handle %d at index %d\n",
3585                                    exec_list[i].handle, i);
3586                         /* prevent error path from reading uninitialized data */
3587                         args->buffer_count = i + 1;
3588                         ret = -ENOENT;
3589                         goto err;
3590                 }
3591
3592                 obj_priv = to_intel_bo(object_list[i]);
3593                 if (obj_priv->in_execbuffer) {
3594                         DRM_ERROR("Object %p appears more than once in object list\n",
3595                                    object_list[i]);
3596                         /* prevent error path from reading uninitialized data */
3597                         args->buffer_count = i + 1;
3598                         ret = -EINVAL;
3599                         goto err;
3600                 }
3601                 obj_priv->in_execbuffer = true;
3602                 flips += atomic_read(&obj_priv->pending_flip);
3603         }
3604
3605         if (flips > 0) {
3606                 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3607                                                      args->buffer_count);
3608                 if (ret)
3609                         goto err;
3610         }
3611
3612         /* Pin and relocate */
3613         for (pin_tries = 0; ; pin_tries++) {
3614                 ret = 0;
3615                 reloc_index = 0;
3616
3617                 for (i = 0; i < args->buffer_count; i++) {
3618                         object_list[i]->pending_read_domains = 0;
3619                         object_list[i]->pending_write_domain = 0;
3620                         ret = i915_gem_object_pin_and_relocate(object_list[i],
3621                                                                file_priv,
3622                                                                &exec_list[i],
3623                                                                &relocs[reloc_index]);
3624                         if (ret)
3625                                 break;
3626                         pinned = i + 1;
3627                         reloc_index += exec_list[i].relocation_count;
3628                 }
3629                 /* success */
3630                 if (ret == 0)
3631                         break;
3632
3633                 /* error other than GTT full, or we've already tried again */
3634                 if (ret != -ENOSPC || pin_tries >= 1) {
3635                         if (ret != -ERESTARTSYS) {
3636                                 unsigned long long total_size = 0;
3637                                 int num_fences = 0;
3638                                 for (i = 0; i < args->buffer_count; i++) {
3639                                         obj_priv = to_intel_bo(object_list[i]);
3640
3641                                         total_size += object_list[i]->size;
3642                                         num_fences +=
3643                                                 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3644                                                 obj_priv->tiling_mode != I915_TILING_NONE;
3645                                 }
3646                                 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
3647                                           pinned+1, args->buffer_count,
3648                                           total_size, num_fences,
3649                                           ret);
3650                                 DRM_ERROR("%d objects [%d pinned], "
3651                                           "%d object bytes [%d pinned], "
3652                                           "%d/%d gtt bytes\n",
3653                                           atomic_read(&dev->object_count),
3654                                           atomic_read(&dev->pin_count),
3655                                           atomic_read(&dev->object_memory),
3656                                           atomic_read(&dev->pin_memory),
3657                                           atomic_read(&dev->gtt_memory),
3658                                           dev->gtt_total);
3659                         }
3660                         goto err;
3661                 }
3662
3663                 /* unpin all of our buffers */
3664                 for (i = 0; i < pinned; i++)
3665                         i915_gem_object_unpin(object_list[i]);
3666                 pinned = 0;
3667
3668                 /* evict everyone we can from the aperture */
3669                 ret = i915_gem_evict_everything(dev);
3670                 if (ret && ret != -ENOSPC)
3671                         goto err;
3672         }
3673
3674         /* Set the pending read domains for the batch buffer to COMMAND */
3675         batch_obj = object_list[args->buffer_count-1];
3676         if (batch_obj->pending_write_domain) {
3677                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3678                 ret = -EINVAL;
3679                 goto err;
3680         }
3681         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3682
3683         /* Sanity check the batch buffer, prior to moving objects */
3684         exec_offset = exec_list[args->buffer_count - 1].offset;
3685         ret = i915_gem_check_execbuffer (args, exec_offset);
3686         if (ret != 0) {
3687                 DRM_ERROR("execbuf with invalid offset/length\n");
3688                 goto err;
3689         }
3690
3691         i915_verify_inactive(dev, __FILE__, __LINE__);
3692
3693         /* Zero the global flush/invalidate flags. These
3694          * will be modified as new domains are computed
3695          * for each object
3696          */
3697         dev->invalidate_domains = 0;
3698         dev->flush_domains = 0;
3699
3700         for (i = 0; i < args->buffer_count; i++) {
3701                 struct drm_gem_object *obj = object_list[i];
3702
3703                 /* Compute new gpu domains and update invalidate/flush */
3704                 i915_gem_object_set_to_gpu_domain(obj);
3705         }
3706
3707         i915_verify_inactive(dev, __FILE__, __LINE__);
3708
3709         if (dev->invalidate_domains | dev->flush_domains) {
3710 #if WATCH_EXEC
3711                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3712                           __func__,
3713                          dev->invalidate_domains,
3714                          dev->flush_domains);
3715 #endif
3716                 i915_gem_flush(dev,
3717                                dev->invalidate_domains,
3718                                dev->flush_domains);
3719         }
3720
3721         if (dev_priv->render_ring.outstanding_lazy_request) {
3722                 (void)i915_add_request(dev, file_priv, NULL, &dev_priv->render_ring);
3723                 dev_priv->render_ring.outstanding_lazy_request = false;
3724         }
3725         if (dev_priv->bsd_ring.outstanding_lazy_request) {
3726                 (void)i915_add_request(dev, file_priv, NULL, &dev_priv->bsd_ring);
3727                 dev_priv->bsd_ring.outstanding_lazy_request = false;
3728         }
3729
3730         for (i = 0; i < args->buffer_count; i++) {
3731                 struct drm_gem_object *obj = object_list[i];
3732                 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3733                 uint32_t old_write_domain = obj->write_domain;
3734
3735                 obj->write_domain = obj->pending_write_domain;
3736                 if (obj->write_domain)
3737                         list_move_tail(&obj_priv->gpu_write_list,
3738                                        &dev_priv->mm.gpu_write_list);
3739                 else
3740                         list_del_init(&obj_priv->gpu_write_list);
3741
3742                 trace_i915_gem_object_change_domain(obj,
3743                                                     obj->read_domains,
3744                                                     old_write_domain);
3745         }
3746
3747         i915_verify_inactive(dev, __FILE__, __LINE__);
3748
3749 #if WATCH_COHERENCY
3750         for (i = 0; i < args->buffer_count; i++) {
3751                 i915_gem_object_check_coherency(object_list[i],
3752                                                 exec_list[i].handle);
3753         }
3754 #endif
3755
3756 #if WATCH_EXEC
3757         i915_gem_dump_object(batch_obj,
3758                               args->batch_len,
3759                               __func__,
3760                               ~0);
3761 #endif
3762
3763         /* Exec the batchbuffer */
3764         ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3765                         cliprects, exec_offset);
3766         if (ret) {
3767                 DRM_ERROR("dispatch failed %d\n", ret);
3768                 goto err;
3769         }
3770
3771         /*
3772          * Ensure that the commands in the batch buffer are
3773          * finished before the interrupt fires
3774          */
3775         i915_retire_commands(dev, ring);
3776
3777         i915_verify_inactive(dev, __FILE__, __LINE__);
3778
3779         for (i = 0; i < args->buffer_count; i++) {
3780                 struct drm_gem_object *obj = object_list[i];
3781                 obj_priv = to_intel_bo(obj);
3782
3783                 i915_gem_object_move_to_active(obj, ring);
3784 #if WATCH_LRU
3785                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3786 #endif
3787         }
3788
3789         /*
3790          * Get a seqno representing the execution of the current buffer,
3791          * which we can wait on.  We would like to mitigate these interrupts,
3792          * likely by only creating seqnos occasionally (so that we have
3793          * *some* interrupts representing completion of buffers that we can
3794          * wait on when trying to clear up gtt space).
3795          */
3796         seqno = i915_add_request(dev, file_priv, request, ring);
3797         request = NULL;
3798
3799 #if WATCH_LRU
3800         i915_dump_lru(dev, __func__);
3801 #endif
3802
3803         i915_verify_inactive(dev, __FILE__, __LINE__);
3804
3805 err:
3806         for (i = 0; i < pinned; i++)
3807                 i915_gem_object_unpin(object_list[i]);
3808
3809         for (i = 0; i < args->buffer_count; i++) {
3810                 if (object_list[i]) {
3811                         obj_priv = to_intel_bo(object_list[i]);
3812                         obj_priv->in_execbuffer = false;
3813                 }
3814                 drm_gem_object_unreference(object_list[i]);
3815         }
3816
3817         mutex_unlock(&dev->struct_mutex);
3818
3819 pre_mutex_err:
3820         /* Copy the updated relocations out regardless of current error
3821          * state.  Failure to update the relocs would mean that the next
3822          * time userland calls execbuf, it would do so with presumed offset
3823          * state that didn't match the actual object state.
3824          */
3825         ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3826                                            relocs);
3827         if (ret2 != 0) {
3828                 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3829
3830                 if (ret == 0)
3831                         ret = ret2;
3832         }
3833
3834         drm_free_large(object_list);
3835         kfree(cliprects);
3836         kfree(request);
3837
3838         return ret;
3839 }
3840
3841 /*
3842  * Legacy execbuffer just creates an exec2 list from the original exec object
3843  * list array and passes it to the real function.
3844  */
3845 int
3846 i915_gem_execbuffer(struct drm_device *dev, void *data,
3847                     struct drm_file *file_priv)
3848 {
3849         struct drm_i915_gem_execbuffer *args = data;
3850         struct drm_i915_gem_execbuffer2 exec2;
3851         struct drm_i915_gem_exec_object *exec_list = NULL;
3852         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3853         int ret, i;
3854
3855 #if WATCH_EXEC
3856         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3857                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3858 #endif
3859
3860         if (args->buffer_count < 1) {
3861                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3862                 return -EINVAL;
3863         }
3864
3865         /* Copy in the exec list from userland */
3866         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
3867         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3868         if (exec_list == NULL || exec2_list == NULL) {
3869                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3870                           args->buffer_count);
3871                 drm_free_large(exec_list);
3872                 drm_free_large(exec2_list);
3873                 return -ENOMEM;
3874         }
3875         ret = copy_from_user(exec_list,
3876                              (struct drm_i915_relocation_entry __user *)
3877                              (uintptr_t) args->buffers_ptr,
3878                              sizeof(*exec_list) * args->buffer_count);
3879         if (ret != 0) {
3880                 DRM_ERROR("copy %d exec entries failed %d\n",
3881                           args->buffer_count, ret);
3882                 drm_free_large(exec_list);
3883                 drm_free_large(exec2_list);
3884                 return -EFAULT;
3885         }
3886
3887         for (i = 0; i < args->buffer_count; i++) {
3888                 exec2_list[i].handle = exec_list[i].handle;
3889                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3890                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3891                 exec2_list[i].alignment = exec_list[i].alignment;
3892                 exec2_list[i].offset = exec_list[i].offset;
3893                 if (!IS_I965G(dev))
3894                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3895                 else
3896                         exec2_list[i].flags = 0;
3897         }
3898
3899         exec2.buffers_ptr = args->buffers_ptr;
3900         exec2.buffer_count = args->buffer_count;
3901         exec2.batch_start_offset = args->batch_start_offset;
3902         exec2.batch_len = args->batch_len;
3903         exec2.DR1 = args->DR1;
3904         exec2.DR4 = args->DR4;
3905         exec2.num_cliprects = args->num_cliprects;
3906         exec2.cliprects_ptr = args->cliprects_ptr;
3907         exec2.flags = I915_EXEC_RENDER;
3908
3909         ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
3910         if (!ret) {
3911                 /* Copy the new buffer offsets back to the user's exec list. */
3912                 for (i = 0; i < args->buffer_count; i++)
3913                         exec_list[i].offset = exec2_list[i].offset;
3914                 /* ... and back out to userspace */
3915                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3916                                    (uintptr_t) args->buffers_ptr,
3917                                    exec_list,
3918                                    sizeof(*exec_list) * args->buffer_count);
3919                 if (ret) {
3920                         ret = -EFAULT;
3921                         DRM_ERROR("failed to copy %d exec entries "
3922                                   "back to user (%d)\n",
3923                                   args->buffer_count, ret);
3924                 }
3925         }
3926
3927         drm_free_large(exec_list);
3928         drm_free_large(exec2_list);
3929         return ret;
3930 }
3931
3932 int
3933 i915_gem_execbuffer2(struct drm_device *dev, void *data,
3934                      struct drm_file *file_priv)
3935 {
3936         struct drm_i915_gem_execbuffer2 *args = data;
3937         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3938         int ret;
3939
3940 #if WATCH_EXEC
3941         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3942                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3943 #endif
3944
3945         if (args->buffer_count < 1) {
3946                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
3947                 return -EINVAL;
3948         }
3949
3950         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3951         if (exec2_list == NULL) {
3952                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3953                           args->buffer_count);
3954                 return -ENOMEM;
3955         }
3956         ret = copy_from_user(exec2_list,
3957                              (struct drm_i915_relocation_entry __user *)
3958                              (uintptr_t) args->buffers_ptr,
3959                              sizeof(*exec2_list) * args->buffer_count);
3960         if (ret != 0) {
3961                 DRM_ERROR("copy %d exec entries failed %d\n",
3962                           args->buffer_count, ret);
3963                 drm_free_large(exec2_list);
3964                 return -EFAULT;
3965         }
3966
3967         ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
3968         if (!ret) {
3969                 /* Copy the new buffer offsets back to the user's exec list. */
3970                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3971                                    (uintptr_t) args->buffers_ptr,
3972                                    exec2_list,
3973                                    sizeof(*exec2_list) * args->buffer_count);
3974                 if (ret) {
3975                         ret = -EFAULT;
3976                         DRM_ERROR("failed to copy %d exec entries "
3977                                   "back to user (%d)\n",
3978                                   args->buffer_count, ret);
3979                 }
3980         }
3981
3982         drm_free_large(exec2_list);
3983         return ret;
3984 }
3985
3986 int
3987 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3988 {
3989         struct drm_device *dev = obj->dev;
3990         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3991         int ret;
3992
3993         BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3994
3995         i915_verify_inactive(dev, __FILE__, __LINE__);
3996
3997         if (obj_priv->gtt_space != NULL) {
3998                 if (alignment == 0)
3999                         alignment = i915_gem_get_gtt_alignment(obj);
4000                 if (obj_priv->gtt_offset & (alignment - 1)) {
4001                         WARN(obj_priv->pin_count,
4002                              "bo is already pinned with incorrect alignment:"
4003                              " offset=%x, req.alignment=%x\n",
4004                              obj_priv->gtt_offset, alignment);
4005                         ret = i915_gem_object_unbind(obj);
4006                         if (ret)
4007                                 return ret;
4008                 }
4009         }
4010
4011         if (obj_priv->gtt_space == NULL) {
4012                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
4013                 if (ret)
4014                         return ret;
4015         }
4016
4017         obj_priv->pin_count++;
4018
4019         /* If the object is not active and not pending a flush,
4020          * remove it from the inactive list
4021          */
4022         if (obj_priv->pin_count == 1) {
4023                 atomic_inc(&dev->pin_count);
4024                 atomic_add(obj->size, &dev->pin_memory);
4025                 if (!obj_priv->active &&
4026                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4027                         list_del_init(&obj_priv->list);
4028         }
4029         i915_verify_inactive(dev, __FILE__, __LINE__);
4030
4031         return 0;
4032 }
4033
4034 void
4035 i915_gem_object_unpin(struct drm_gem_object *obj)
4036 {
4037         struct drm_device *dev = obj->dev;
4038         drm_i915_private_t *dev_priv = dev->dev_private;
4039         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4040
4041         i915_verify_inactive(dev, __FILE__, __LINE__);
4042         obj_priv->pin_count--;
4043         BUG_ON(obj_priv->pin_count < 0);
4044         BUG_ON(obj_priv->gtt_space == NULL);
4045
4046         /* If the object is no longer pinned, and is
4047          * neither active nor being flushed, then stick it on
4048          * the inactive list
4049          */
4050         if (obj_priv->pin_count == 0) {
4051                 if (!obj_priv->active &&
4052                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4053                         list_move_tail(&obj_priv->list,
4054                                        &dev_priv->mm.inactive_list);
4055                 atomic_dec(&dev->pin_count);
4056                 atomic_sub(obj->size, &dev->pin_memory);
4057         }
4058         i915_verify_inactive(dev, __FILE__, __LINE__);
4059 }
4060
4061 int
4062 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4063                    struct drm_file *file_priv)
4064 {
4065         struct drm_i915_gem_pin *args = data;
4066         struct drm_gem_object *obj;
4067         struct drm_i915_gem_object *obj_priv;
4068         int ret;
4069
4070         mutex_lock(&dev->struct_mutex);
4071
4072         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4073         if (obj == NULL) {
4074                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4075                           args->handle);
4076                 mutex_unlock(&dev->struct_mutex);
4077                 return -ENOENT;
4078         }
4079         obj_priv = to_intel_bo(obj);
4080
4081         if (obj_priv->madv != I915_MADV_WILLNEED) {
4082                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4083                 drm_gem_object_unreference(obj);
4084                 mutex_unlock(&dev->struct_mutex);
4085                 return -EINVAL;
4086         }
4087
4088         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4089                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4090                           args->handle);
4091                 drm_gem_object_unreference(obj);
4092                 mutex_unlock(&dev->struct_mutex);
4093                 return -EINVAL;
4094         }
4095
4096         obj_priv->user_pin_count++;
4097         obj_priv->pin_filp = file_priv;
4098         if (obj_priv->user_pin_count == 1) {
4099                 ret = i915_gem_object_pin(obj, args->alignment);
4100                 if (ret != 0) {
4101                         drm_gem_object_unreference(obj);
4102                         mutex_unlock(&dev->struct_mutex);
4103                         return ret;
4104                 }
4105         }
4106
4107         /* XXX - flush the CPU caches for pinned objects
4108          * as the X server doesn't manage domains yet
4109          */
4110         i915_gem_object_flush_cpu_write_domain(obj);
4111         args->offset = obj_priv->gtt_offset;
4112         drm_gem_object_unreference(obj);
4113         mutex_unlock(&dev->struct_mutex);
4114
4115         return 0;
4116 }
4117
4118 int
4119 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4120                      struct drm_file *file_priv)
4121 {
4122         struct drm_i915_gem_pin *args = data;
4123         struct drm_gem_object *obj;
4124         struct drm_i915_gem_object *obj_priv;
4125
4126         mutex_lock(&dev->struct_mutex);
4127
4128         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4129         if (obj == NULL) {
4130                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4131                           args->handle);
4132                 mutex_unlock(&dev->struct_mutex);
4133                 return -ENOENT;
4134         }
4135
4136         obj_priv = to_intel_bo(obj);
4137         if (obj_priv->pin_filp != file_priv) {
4138                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4139                           args->handle);
4140                 drm_gem_object_unreference(obj);
4141                 mutex_unlock(&dev->struct_mutex);
4142                 return -EINVAL;
4143         }
4144         obj_priv->user_pin_count--;
4145         if (obj_priv->user_pin_count == 0) {
4146                 obj_priv->pin_filp = NULL;
4147                 i915_gem_object_unpin(obj);
4148         }
4149
4150         drm_gem_object_unreference(obj);
4151         mutex_unlock(&dev->struct_mutex);
4152         return 0;
4153 }
4154
4155 int
4156 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4157                     struct drm_file *file_priv)
4158 {
4159         struct drm_i915_gem_busy *args = data;
4160         struct drm_gem_object *obj;
4161         struct drm_i915_gem_object *obj_priv;
4162
4163         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4164         if (obj == NULL) {
4165                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4166                           args->handle);
4167                 return -ENOENT;
4168         }
4169
4170         mutex_lock(&dev->struct_mutex);
4171
4172         /* Count all active objects as busy, even if they are currently not used
4173          * by the gpu. Users of this interface expect objects to eventually
4174          * become non-busy without any further actions, therefore emit any
4175          * necessary flushes here.
4176          */
4177         obj_priv = to_intel_bo(obj);
4178         args->busy = obj_priv->active;
4179         if (args->busy) {
4180                 /* Unconditionally flush objects, even when the gpu still uses this
4181                  * object. Userspace calling this function indicates that it wants to
4182                  * use this buffer rather sooner than later, so issuing the required
4183                  * flush earlier is beneficial.
4184                  */
4185                 if (obj->write_domain) {
4186                         i915_gem_flush(dev, 0, obj->write_domain);
4187                         (void)i915_add_request(dev, file_priv, NULL, obj_priv->ring);
4188                 }
4189
4190                 /* Update the active list for the hardware's current position.
4191                  * Otherwise this only updates on a delayed timer or when irqs
4192                  * are actually unmasked, and our working set ends up being
4193                  * larger than required.
4194                  */
4195                 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4196
4197                 args->busy = obj_priv->active;
4198         }
4199
4200         drm_gem_object_unreference(obj);
4201         mutex_unlock(&dev->struct_mutex);
4202         return 0;
4203 }
4204
4205 int
4206 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4207                         struct drm_file *file_priv)
4208 {
4209     return i915_gem_ring_throttle(dev, file_priv);
4210 }
4211
4212 int
4213 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4214                        struct drm_file *file_priv)
4215 {
4216         struct drm_i915_gem_madvise *args = data;
4217         struct drm_gem_object *obj;
4218         struct drm_i915_gem_object *obj_priv;
4219
4220         switch (args->madv) {
4221         case I915_MADV_DONTNEED:
4222         case I915_MADV_WILLNEED:
4223             break;
4224         default:
4225             return -EINVAL;
4226         }
4227
4228         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4229         if (obj == NULL) {
4230                 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4231                           args->handle);
4232                 return -ENOENT;
4233         }
4234
4235         mutex_lock(&dev->struct_mutex);
4236         obj_priv = to_intel_bo(obj);
4237
4238         if (obj_priv->pin_count) {
4239                 drm_gem_object_unreference(obj);
4240                 mutex_unlock(&dev->struct_mutex);
4241
4242                 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4243                 return -EINVAL;
4244         }
4245
4246         if (obj_priv->madv != __I915_MADV_PURGED)
4247                 obj_priv->madv = args->madv;
4248
4249         /* if the object is no longer bound, discard its backing storage */
4250         if (i915_gem_object_is_purgeable(obj_priv) &&
4251             obj_priv->gtt_space == NULL)
4252                 i915_gem_object_truncate(obj);
4253
4254         args->retained = obj_priv->madv != __I915_MADV_PURGED;
4255
4256         drm_gem_object_unreference(obj);
4257         mutex_unlock(&dev->struct_mutex);
4258
4259         return 0;
4260 }
4261
4262 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4263                                               size_t size)
4264 {
4265         struct drm_i915_gem_object *obj;
4266
4267         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4268         if (obj == NULL)
4269                 return NULL;
4270
4271         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4272                 kfree(obj);
4273                 return NULL;
4274         }
4275
4276         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4277         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4278
4279         obj->agp_type = AGP_USER_MEMORY;
4280         obj->base.driver_private = NULL;
4281         obj->fence_reg = I915_FENCE_REG_NONE;
4282         INIT_LIST_HEAD(&obj->list);
4283         INIT_LIST_HEAD(&obj->gpu_write_list);
4284         obj->madv = I915_MADV_WILLNEED;
4285
4286         trace_i915_gem_object_create(&obj->base);
4287
4288         return &obj->base;
4289 }
4290
4291 int i915_gem_init_object(struct drm_gem_object *obj)
4292 {
4293         BUG();
4294
4295         return 0;
4296 }
4297
4298 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4299 {
4300         struct drm_device *dev = obj->dev;
4301         drm_i915_private_t *dev_priv = dev->dev_private;
4302         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4303         int ret;
4304
4305         ret = i915_gem_object_unbind(obj);
4306         if (ret == -ERESTARTSYS) {
4307                 list_move(&obj_priv->list,
4308                           &dev_priv->mm.deferred_free_list);
4309                 return;
4310         }
4311
4312         if (obj_priv->mmap_offset)
4313                 i915_gem_free_mmap_offset(obj);
4314
4315         drm_gem_object_release(obj);
4316
4317         kfree(obj_priv->page_cpu_valid);
4318         kfree(obj_priv->bit_17);
4319         kfree(obj_priv);
4320 }
4321
4322 void i915_gem_free_object(struct drm_gem_object *obj)
4323 {
4324         struct drm_device *dev = obj->dev;
4325         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4326
4327         trace_i915_gem_object_destroy(obj);
4328
4329         while (obj_priv->pin_count > 0)
4330                 i915_gem_object_unpin(obj);
4331
4332         if (obj_priv->phys_obj)
4333                 i915_gem_detach_phys_object(dev, obj);
4334
4335         i915_gem_free_object_tail(obj);
4336 }
4337
4338 int
4339 i915_gem_idle(struct drm_device *dev)
4340 {
4341         drm_i915_private_t *dev_priv = dev->dev_private;
4342         int ret;
4343
4344         mutex_lock(&dev->struct_mutex);
4345
4346         if (dev_priv->mm.suspended ||
4347                         (dev_priv->render_ring.gem_object == NULL) ||
4348                         (HAS_BSD(dev) &&
4349                          dev_priv->bsd_ring.gem_object == NULL)) {
4350                 mutex_unlock(&dev->struct_mutex);
4351                 return 0;
4352         }
4353
4354         ret = i915_gpu_idle(dev);
4355         if (ret) {
4356                 mutex_unlock(&dev->struct_mutex);
4357                 return ret;
4358         }
4359
4360         /* Under UMS, be paranoid and evict. */
4361         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4362                 ret = i915_gem_evict_inactive(dev);
4363                 if (ret) {
4364                         mutex_unlock(&dev->struct_mutex);
4365                         return ret;
4366                 }
4367         }
4368
4369         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4370          * We need to replace this with a semaphore, or something.
4371          * And not confound mm.suspended!
4372          */
4373         dev_priv->mm.suspended = 1;
4374         del_timer_sync(&dev_priv->hangcheck_timer);
4375
4376         i915_kernel_lost_context(dev);
4377         i915_gem_cleanup_ringbuffer(dev);
4378
4379         mutex_unlock(&dev->struct_mutex);
4380
4381         /* Cancel the retire work handler, which should be idle now. */
4382         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4383
4384         return 0;
4385 }
4386
4387 /*
4388  * 965+ support PIPE_CONTROL commands, which provide finer grained control
4389  * over cache flushing.
4390  */
4391 static int
4392 i915_gem_init_pipe_control(struct drm_device *dev)
4393 {
4394         drm_i915_private_t *dev_priv = dev->dev_private;
4395         struct drm_gem_object *obj;
4396         struct drm_i915_gem_object *obj_priv;
4397         int ret;
4398
4399         obj = i915_gem_alloc_object(dev, 4096);
4400         if (obj == NULL) {
4401                 DRM_ERROR("Failed to allocate seqno page\n");
4402                 ret = -ENOMEM;
4403                 goto err;
4404         }
4405         obj_priv = to_intel_bo(obj);
4406         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4407
4408         ret = i915_gem_object_pin(obj, 4096);
4409         if (ret)
4410                 goto err_unref;
4411
4412         dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4413         dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4414         if (dev_priv->seqno_page == NULL)
4415                 goto err_unpin;
4416
4417         dev_priv->seqno_obj = obj;
4418         memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4419
4420         return 0;
4421
4422 err_unpin:
4423         i915_gem_object_unpin(obj);
4424 err_unref:
4425         drm_gem_object_unreference(obj);
4426 err:
4427         return ret;
4428 }
4429
4430
4431 static void
4432 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4433 {
4434         drm_i915_private_t *dev_priv = dev->dev_private;
4435         struct drm_gem_object *obj;
4436         struct drm_i915_gem_object *obj_priv;
4437
4438         obj = dev_priv->seqno_obj;
4439         obj_priv = to_intel_bo(obj);
4440         kunmap(obj_priv->pages[0]);
4441         i915_gem_object_unpin(obj);
4442         drm_gem_object_unreference(obj);
4443         dev_priv->seqno_obj = NULL;
4444
4445         dev_priv->seqno_page = NULL;
4446 }
4447
4448 int
4449 i915_gem_init_ringbuffer(struct drm_device *dev)
4450 {
4451         drm_i915_private_t *dev_priv = dev->dev_private;
4452         int ret;
4453
4454         dev_priv->render_ring = render_ring;
4455
4456         if (!I915_NEED_GFX_HWS(dev)) {
4457                 dev_priv->render_ring.status_page.page_addr
4458                         = dev_priv->status_page_dmah->vaddr;
4459                 memset(dev_priv->render_ring.status_page.page_addr,
4460                                 0, PAGE_SIZE);
4461         }
4462
4463         if (HAS_PIPE_CONTROL(dev)) {
4464                 ret = i915_gem_init_pipe_control(dev);
4465                 if (ret)
4466                         return ret;
4467         }
4468
4469         ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
4470         if (ret)
4471                 goto cleanup_pipe_control;
4472
4473         if (HAS_BSD(dev)) {
4474                 dev_priv->bsd_ring = bsd_ring;
4475                 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
4476                 if (ret)
4477                         goto cleanup_render_ring;
4478         }
4479
4480         dev_priv->next_seqno = 1;
4481
4482         return 0;
4483
4484 cleanup_render_ring:
4485         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4486 cleanup_pipe_control:
4487         if (HAS_PIPE_CONTROL(dev))
4488                 i915_gem_cleanup_pipe_control(dev);
4489         return ret;
4490 }
4491
4492 void
4493 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4494 {
4495         drm_i915_private_t *dev_priv = dev->dev_private;
4496
4497         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4498         if (HAS_BSD(dev))
4499                 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
4500         if (HAS_PIPE_CONTROL(dev))
4501                 i915_gem_cleanup_pipe_control(dev);
4502 }
4503
4504 int
4505 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4506                        struct drm_file *file_priv)
4507 {
4508         drm_i915_private_t *dev_priv = dev->dev_private;
4509         int ret;
4510
4511         if (drm_core_check_feature(dev, DRIVER_MODESET))
4512                 return 0;
4513
4514         if (atomic_read(&dev_priv->mm.wedged)) {
4515                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4516                 atomic_set(&dev_priv->mm.wedged, 0);
4517         }
4518
4519         mutex_lock(&dev->struct_mutex);
4520         dev_priv->mm.suspended = 0;
4521
4522         ret = i915_gem_init_ringbuffer(dev);
4523         if (ret != 0) {
4524                 mutex_unlock(&dev->struct_mutex);
4525                 return ret;
4526         }
4527
4528         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4529         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
4530         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4531         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4532         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4533         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
4534         mutex_unlock(&dev->struct_mutex);
4535
4536         ret = drm_irq_install(dev);
4537         if (ret)
4538                 goto cleanup_ringbuffer;
4539
4540         return 0;
4541
4542 cleanup_ringbuffer:
4543         mutex_lock(&dev->struct_mutex);
4544         i915_gem_cleanup_ringbuffer(dev);
4545         dev_priv->mm.suspended = 1;
4546         mutex_unlock(&dev->struct_mutex);
4547
4548         return ret;
4549 }
4550
4551 int
4552 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4553                        struct drm_file *file_priv)
4554 {
4555         if (drm_core_check_feature(dev, DRIVER_MODESET))
4556                 return 0;
4557
4558         drm_irq_uninstall(dev);
4559         return i915_gem_idle(dev);
4560 }
4561
4562 void
4563 i915_gem_lastclose(struct drm_device *dev)
4564 {
4565         int ret;
4566
4567         if (drm_core_check_feature(dev, DRIVER_MODESET))
4568                 return;
4569
4570         ret = i915_gem_idle(dev);
4571         if (ret)
4572                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4573 }
4574
4575 void
4576 i915_gem_load(struct drm_device *dev)
4577 {
4578         int i;
4579         drm_i915_private_t *dev_priv = dev->dev_private;
4580
4581         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4582         INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
4583         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4584         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4585         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4586         INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4587         INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
4588         if (HAS_BSD(dev)) {
4589                 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4590                 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4591         }
4592         for (i = 0; i < 16; i++)
4593                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4594         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4595                           i915_gem_retire_work_handler);
4596         spin_lock(&shrink_list_lock);
4597         list_add(&dev_priv->mm.shrink_list, &shrink_list);
4598         spin_unlock(&shrink_list_lock);
4599
4600         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4601         if (IS_GEN3(dev)) {
4602                 u32 tmp = I915_READ(MI_ARB_STATE);
4603                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4604                         /* arb state is a masked write, so set bit + bit in mask */
4605                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4606                         I915_WRITE(MI_ARB_STATE, tmp);
4607                 }
4608         }
4609
4610         /* Old X drivers will take 0-2 for front, back, depth buffers */
4611         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4612                 dev_priv->fence_reg_start = 3;
4613
4614         if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4615                 dev_priv->num_fence_regs = 16;
4616         else
4617                 dev_priv->num_fence_regs = 8;
4618
4619         /* Initialize fence registers to zero */
4620         if (IS_I965G(dev)) {
4621                 for (i = 0; i < 16; i++)
4622                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4623         } else {
4624                 for (i = 0; i < 8; i++)
4625                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4626                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4627                         for (i = 0; i < 8; i++)
4628                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4629         }
4630         i915_gem_detect_bit_6_swizzle(dev);
4631         init_waitqueue_head(&dev_priv->pending_flip_queue);
4632 }
4633
4634 /*
4635  * Create a physically contiguous memory object for this object
4636  * e.g. for cursor + overlay regs
4637  */
4638 static int i915_gem_init_phys_object(struct drm_device *dev,
4639                                      int id, int size, int align)
4640 {
4641         drm_i915_private_t *dev_priv = dev->dev_private;
4642         struct drm_i915_gem_phys_object *phys_obj;
4643         int ret;
4644
4645         if (dev_priv->mm.phys_objs[id - 1] || !size)
4646                 return 0;
4647
4648         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4649         if (!phys_obj)
4650                 return -ENOMEM;
4651
4652         phys_obj->id = id;
4653
4654         phys_obj->handle = drm_pci_alloc(dev, size, align);
4655         if (!phys_obj->handle) {
4656                 ret = -ENOMEM;
4657                 goto kfree_obj;
4658         }
4659 #ifdef CONFIG_X86
4660         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4661 #endif
4662
4663         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4664
4665         return 0;
4666 kfree_obj:
4667         kfree(phys_obj);
4668         return ret;
4669 }
4670
4671 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4672 {
4673         drm_i915_private_t *dev_priv = dev->dev_private;
4674         struct drm_i915_gem_phys_object *phys_obj;
4675
4676         if (!dev_priv->mm.phys_objs[id - 1])
4677                 return;
4678
4679         phys_obj = dev_priv->mm.phys_objs[id - 1];
4680         if (phys_obj->cur_obj) {
4681                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4682         }
4683
4684 #ifdef CONFIG_X86
4685         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4686 #endif
4687         drm_pci_free(dev, phys_obj->handle);
4688         kfree(phys_obj);
4689         dev_priv->mm.phys_objs[id - 1] = NULL;
4690 }
4691
4692 void i915_gem_free_all_phys_object(struct drm_device *dev)
4693 {
4694         int i;
4695
4696         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4697                 i915_gem_free_phys_object(dev, i);
4698 }
4699
4700 void i915_gem_detach_phys_object(struct drm_device *dev,
4701                                  struct drm_gem_object *obj)
4702 {
4703         struct drm_i915_gem_object *obj_priv;
4704         int i;
4705         int ret;
4706         int page_count;
4707
4708         obj_priv = to_intel_bo(obj);
4709         if (!obj_priv->phys_obj)
4710                 return;
4711
4712         ret = i915_gem_object_get_pages(obj, 0);
4713         if (ret)
4714                 goto out;
4715
4716         page_count = obj->size / PAGE_SIZE;
4717
4718         for (i = 0; i < page_count; i++) {
4719                 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4720                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4721
4722                 memcpy(dst, src, PAGE_SIZE);
4723                 kunmap_atomic(dst, KM_USER0);
4724         }
4725         drm_clflush_pages(obj_priv->pages, page_count);
4726         drm_agp_chipset_flush(dev);
4727
4728         i915_gem_object_put_pages(obj);
4729 out:
4730         obj_priv->phys_obj->cur_obj = NULL;
4731         obj_priv->phys_obj = NULL;
4732 }
4733
4734 int
4735 i915_gem_attach_phys_object(struct drm_device *dev,
4736                             struct drm_gem_object *obj,
4737                             int id,
4738                             int align)
4739 {
4740         drm_i915_private_t *dev_priv = dev->dev_private;
4741         struct drm_i915_gem_object *obj_priv;
4742         int ret = 0;
4743         int page_count;
4744         int i;
4745
4746         if (id > I915_MAX_PHYS_OBJECT)
4747                 return -EINVAL;
4748
4749         obj_priv = to_intel_bo(obj);
4750
4751         if (obj_priv->phys_obj) {
4752                 if (obj_priv->phys_obj->id == id)
4753                         return 0;
4754                 i915_gem_detach_phys_object(dev, obj);
4755         }
4756
4757         /* create a new object */
4758         if (!dev_priv->mm.phys_objs[id - 1]) {
4759                 ret = i915_gem_init_phys_object(dev, id,
4760                                                 obj->size, align);
4761                 if (ret) {
4762                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4763                         goto out;
4764                 }
4765         }
4766
4767         /* bind to the object */
4768         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4769         obj_priv->phys_obj->cur_obj = obj;
4770
4771         ret = i915_gem_object_get_pages(obj, 0);
4772         if (ret) {
4773                 DRM_ERROR("failed to get page list\n");
4774                 goto out;
4775         }
4776
4777         page_count = obj->size / PAGE_SIZE;
4778
4779         for (i = 0; i < page_count; i++) {
4780                 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4781                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4782
4783                 memcpy(dst, src, PAGE_SIZE);
4784                 kunmap_atomic(src, KM_USER0);
4785         }
4786
4787         i915_gem_object_put_pages(obj);
4788
4789         return 0;
4790 out:
4791         return ret;
4792 }
4793
4794 static int
4795 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4796                      struct drm_i915_gem_pwrite *args,
4797                      struct drm_file *file_priv)
4798 {
4799         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4800         void *obj_addr;
4801         int ret;
4802         char __user *user_data;
4803
4804         user_data = (char __user *) (uintptr_t) args->data_ptr;
4805         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4806
4807         DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4808         ret = copy_from_user(obj_addr, user_data, args->size);
4809         if (ret)
4810                 return -EFAULT;
4811
4812         drm_agp_chipset_flush(dev);
4813         return 0;
4814 }
4815
4816 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4817 {
4818         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4819
4820         /* Clean up our request list when the client is going away, so that
4821          * later retire_requests won't dereference our soon-to-be-gone
4822          * file_priv.
4823          */
4824         mutex_lock(&dev->struct_mutex);
4825         while (!list_empty(&i915_file_priv->mm.request_list))
4826                 list_del_init(i915_file_priv->mm.request_list.next);
4827         mutex_unlock(&dev->struct_mutex);
4828 }
4829
4830 static int
4831 i915_gpu_is_active(struct drm_device *dev)
4832 {
4833         drm_i915_private_t *dev_priv = dev->dev_private;
4834         int lists_empty;
4835
4836         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4837                       list_empty(&dev_priv->render_ring.active_list);
4838         if (HAS_BSD(dev))
4839                 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
4840
4841         return !lists_empty;
4842 }
4843
4844 static int
4845 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
4846 {
4847         drm_i915_private_t *dev_priv, *next_dev;
4848         struct drm_i915_gem_object *obj_priv, *next_obj;
4849         int cnt = 0;
4850         int would_deadlock = 1;
4851
4852         /* "fast-path" to count number of available objects */
4853         if (nr_to_scan == 0) {
4854                 spin_lock(&shrink_list_lock);
4855                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4856                         struct drm_device *dev = dev_priv->dev;
4857
4858                         if (mutex_trylock(&dev->struct_mutex)) {
4859                                 list_for_each_entry(obj_priv,
4860                                                     &dev_priv->mm.inactive_list,
4861                                                     list)
4862                                         cnt++;
4863                                 mutex_unlock(&dev->struct_mutex);
4864                         }
4865                 }
4866                 spin_unlock(&shrink_list_lock);
4867
4868                 return (cnt / 100) * sysctl_vfs_cache_pressure;
4869         }
4870
4871         spin_lock(&shrink_list_lock);
4872
4873 rescan:
4874         /* first scan for clean buffers */
4875         list_for_each_entry_safe(dev_priv, next_dev,
4876                                  &shrink_list, mm.shrink_list) {
4877                 struct drm_device *dev = dev_priv->dev;
4878
4879                 if (! mutex_trylock(&dev->struct_mutex))
4880                         continue;
4881
4882                 spin_unlock(&shrink_list_lock);
4883                 i915_gem_retire_requests(dev);
4884
4885                 list_for_each_entry_safe(obj_priv, next_obj,
4886                                          &dev_priv->mm.inactive_list,
4887                                          list) {
4888                         if (i915_gem_object_is_purgeable(obj_priv)) {
4889                                 i915_gem_object_unbind(&obj_priv->base);
4890                                 if (--nr_to_scan <= 0)
4891                                         break;
4892                         }
4893                 }
4894
4895                 spin_lock(&shrink_list_lock);
4896                 mutex_unlock(&dev->struct_mutex);
4897
4898                 would_deadlock = 0;
4899
4900                 if (nr_to_scan <= 0)
4901                         break;
4902         }
4903
4904         /* second pass, evict/count anything still on the inactive list */
4905         list_for_each_entry_safe(dev_priv, next_dev,
4906                                  &shrink_list, mm.shrink_list) {
4907                 struct drm_device *dev = dev_priv->dev;
4908
4909                 if (! mutex_trylock(&dev->struct_mutex))
4910                         continue;
4911
4912                 spin_unlock(&shrink_list_lock);
4913
4914                 list_for_each_entry_safe(obj_priv, next_obj,
4915                                          &dev_priv->mm.inactive_list,
4916                                          list) {
4917                         if (nr_to_scan > 0) {
4918                                 i915_gem_object_unbind(&obj_priv->base);
4919                                 nr_to_scan--;
4920                         } else
4921                                 cnt++;
4922                 }
4923
4924                 spin_lock(&shrink_list_lock);
4925                 mutex_unlock(&dev->struct_mutex);
4926
4927                 would_deadlock = 0;
4928         }
4929
4930         if (nr_to_scan) {
4931                 int active = 0;
4932
4933                 /*
4934                  * We are desperate for pages, so as a last resort, wait
4935                  * for the GPU to finish and discard whatever we can.
4936                  * This has a dramatic impact to reduce the number of
4937                  * OOM-killer events whilst running the GPU aggressively.
4938                  */
4939                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4940                         struct drm_device *dev = dev_priv->dev;
4941
4942                         if (!mutex_trylock(&dev->struct_mutex))
4943                                 continue;
4944
4945                         spin_unlock(&shrink_list_lock);
4946
4947                         if (i915_gpu_is_active(dev)) {
4948                                 i915_gpu_idle(dev);
4949                                 active++;
4950                         }
4951
4952                         spin_lock(&shrink_list_lock);
4953                         mutex_unlock(&dev->struct_mutex);
4954                 }
4955
4956                 if (active)
4957                         goto rescan;
4958         }
4959
4960         spin_unlock(&shrink_list_lock);
4961
4962         if (would_deadlock)
4963                 return -1;
4964         else if (cnt > 0)
4965                 return (cnt / 100) * sysctl_vfs_cache_pressure;
4966         else
4967                 return 0;
4968 }
4969
4970 static struct shrinker shrinker = {
4971         .shrink = i915_gem_shrink,
4972         .seeks = DEFAULT_SEEKS,
4973 };
4974
4975 __init void
4976 i915_gem_shrinker_init(void)
4977 {
4978     register_shrinker(&shrinker);
4979 }
4980
4981 __exit void
4982 i915_gem_shrinker_exit(void)
4983 {
4984     unregister_shrinker(&shrinker);
4985 }