drm/i915: Ignore fenced commands for gpu access on gen4
[pandora-kernel.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include "drmP.h"
30 #include "drm.h"
31 #include "i915_drm.h"
32 #include "i915_drv.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35
36 struct change_domains {
37         uint32_t invalidate_domains;
38         uint32_t flush_domains;
39         uint32_t flush_rings;
40 };
41
42 /*
43  * Set the next domain for the specified object. This
44  * may not actually perform the necessary flushing/invaliding though,
45  * as that may want to be batched with other set_domain operations
46  *
47  * This is (we hope) the only really tricky part of gem. The goal
48  * is fairly simple -- track which caches hold bits of the object
49  * and make sure they remain coherent. A few concrete examples may
50  * help to explain how it works. For shorthand, we use the notation
51  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
52  * a pair of read and write domain masks.
53  *
54  * Case 1: the batch buffer
55  *
56  *      1. Allocated
57  *      2. Written by CPU
58  *      3. Mapped to GTT
59  *      4. Read by GPU
60  *      5. Unmapped from GTT
61  *      6. Freed
62  *
63  *      Let's take these a step at a time
64  *
65  *      1. Allocated
66  *              Pages allocated from the kernel may still have
67  *              cache contents, so we set them to (CPU, CPU) always.
68  *      2. Written by CPU (using pwrite)
69  *              The pwrite function calls set_domain (CPU, CPU) and
70  *              this function does nothing (as nothing changes)
71  *      3. Mapped by GTT
72  *              This function asserts that the object is not
73  *              currently in any GPU-based read or write domains
74  *      4. Read by GPU
75  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
76  *              As write_domain is zero, this function adds in the
77  *              current read domains (CPU+COMMAND, 0).
78  *              flush_domains is set to CPU.
79  *              invalidate_domains is set to COMMAND
80  *              clflush is run to get data out of the CPU caches
81  *              then i915_dev_set_domain calls i915_gem_flush to
82  *              emit an MI_FLUSH and drm_agp_chipset_flush
83  *      5. Unmapped from GTT
84  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
85  *              flush_domains and invalidate_domains end up both zero
86  *              so no flushing/invalidating happens
87  *      6. Freed
88  *              yay, done
89  *
90  * Case 2: The shared render buffer
91  *
92  *      1. Allocated
93  *      2. Mapped to GTT
94  *      3. Read/written by GPU
95  *      4. set_domain to (CPU,CPU)
96  *      5. Read/written by CPU
97  *      6. Read/written by GPU
98  *
99  *      1. Allocated
100  *              Same as last example, (CPU, CPU)
101  *      2. Mapped to GTT
102  *              Nothing changes (assertions find that it is not in the GPU)
103  *      3. Read/written by GPU
104  *              execbuffer calls set_domain (RENDER, RENDER)
105  *              flush_domains gets CPU
106  *              invalidate_domains gets GPU
107  *              clflush (obj)
108  *              MI_FLUSH and drm_agp_chipset_flush
109  *      4. set_domain (CPU, CPU)
110  *              flush_domains gets GPU
111  *              invalidate_domains gets CPU
112  *              wait_rendering (obj) to make sure all drawing is complete.
113  *              This will include an MI_FLUSH to get the data from GPU
114  *              to memory
115  *              clflush (obj) to invalidate the CPU cache
116  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
117  *      5. Read/written by CPU
118  *              cache lines are loaded and dirtied
119  *      6. Read written by GPU
120  *              Same as last GPU access
121  *
122  * Case 3: The constant buffer
123  *
124  *      1. Allocated
125  *      2. Written by CPU
126  *      3. Read by GPU
127  *      4. Updated (written) by CPU again
128  *      5. Read by GPU
129  *
130  *      1. Allocated
131  *              (CPU, CPU)
132  *      2. Written by CPU
133  *              (CPU, CPU)
134  *      3. Read by GPU
135  *              (CPU+RENDER, 0)
136  *              flush_domains = CPU
137  *              invalidate_domains = RENDER
138  *              clflush (obj)
139  *              MI_FLUSH
140  *              drm_agp_chipset_flush
141  *      4. Updated (written) by CPU again
142  *              (CPU, CPU)
143  *              flush_domains = 0 (no previous write domain)
144  *              invalidate_domains = 0 (no new read domains)
145  *      5. Read by GPU
146  *              (CPU+RENDER, 0)
147  *              flush_domains = CPU
148  *              invalidate_domains = RENDER
149  *              clflush (obj)
150  *              MI_FLUSH
151  *              drm_agp_chipset_flush
152  */
153 static void
154 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
155                                   struct intel_ring_buffer *ring,
156                                   struct change_domains *cd)
157 {
158         uint32_t invalidate_domains = 0, flush_domains = 0;
159
160         /*
161          * If the object isn't moving to a new write domain,
162          * let the object stay in multiple read domains
163          */
164         if (obj->base.pending_write_domain == 0)
165                 obj->base.pending_read_domains |= obj->base.read_domains;
166
167         /*
168          * Flush the current write domain if
169          * the new read domains don't match. Invalidate
170          * any read domains which differ from the old
171          * write domain
172          */
173         if (obj->base.write_domain &&
174             (((obj->base.write_domain != obj->base.pending_read_domains ||
175                obj->ring != ring)) ||
176              (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
177                 flush_domains |= obj->base.write_domain;
178                 invalidate_domains |=
179                         obj->base.pending_read_domains & ~obj->base.write_domain;
180         }
181         /*
182          * Invalidate any read caches which may have
183          * stale data. That is, any new read domains.
184          */
185         invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
186         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
187                 i915_gem_clflush_object(obj);
188
189         /* blow away mappings if mapped through GTT */
190         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
191                 i915_gem_release_mmap(obj);
192
193         /* The actual obj->write_domain will be updated with
194          * pending_write_domain after we emit the accumulated flush for all
195          * of our domain changes in execbuffers (which clears objects'
196          * write_domains).  So if we have a current write domain that we
197          * aren't changing, set pending_write_domain to that.
198          */
199         if (flush_domains == 0 && obj->base.pending_write_domain == 0)
200                 obj->base.pending_write_domain = obj->base.write_domain;
201
202         cd->invalidate_domains |= invalidate_domains;
203         cd->flush_domains |= flush_domains;
204         if (flush_domains & I915_GEM_GPU_DOMAINS)
205                 cd->flush_rings |= obj->ring->id;
206         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
207                 cd->flush_rings |= ring->id;
208 }
209
210 static int
211 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
212                                    struct drm_file *file_priv,
213                                    struct drm_i915_gem_exec_object2 *entry,
214                                    struct drm_i915_gem_relocation_entry *reloc)
215 {
216         struct drm_device *dev = obj->base.dev;
217         struct drm_gem_object *target_obj;
218         uint32_t target_offset;
219         int ret = -EINVAL;
220
221         target_obj = drm_gem_object_lookup(dev, file_priv,
222                                            reloc->target_handle);
223         if (target_obj == NULL)
224                 return -ENOENT;
225
226         target_offset = to_intel_bo(target_obj)->gtt_offset;
227
228 #if WATCH_RELOC
229         DRM_INFO("%s: obj %p offset %08x target %d "
230                  "read %08x write %08x gtt %08x "
231                  "presumed %08x delta %08x\n",
232                  __func__,
233                  obj,
234                  (int) reloc->offset,
235                  (int) reloc->target_handle,
236                  (int) reloc->read_domains,
237                  (int) reloc->write_domain,
238                  (int) target_offset,
239                  (int) reloc->presumed_offset,
240                  reloc->delta);
241 #endif
242
243         /* The target buffer should have appeared before us in the
244          * exec_object list, so it should have a GTT space bound by now.
245          */
246         if (target_offset == 0) {
247                 DRM_ERROR("No GTT space found for object %d\n",
248                           reloc->target_handle);
249                 goto err;
250         }
251
252         /* Validate that the target is in a valid r/w GPU domain */
253         if (reloc->write_domain & (reloc->write_domain - 1)) {
254                 DRM_ERROR("reloc with multiple write domains: "
255                           "obj %p target %d offset %d "
256                           "read %08x write %08x",
257                           obj, reloc->target_handle,
258                           (int) reloc->offset,
259                           reloc->read_domains,
260                           reloc->write_domain);
261                 goto err;
262         }
263         if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
264             reloc->read_domains & I915_GEM_DOMAIN_CPU) {
265                 DRM_ERROR("reloc with read/write CPU domains: "
266                           "obj %p target %d offset %d "
267                           "read %08x write %08x",
268                           obj, reloc->target_handle,
269                           (int) reloc->offset,
270                           reloc->read_domains,
271                           reloc->write_domain);
272                 goto err;
273         }
274         if (reloc->write_domain && target_obj->pending_write_domain &&
275             reloc->write_domain != target_obj->pending_write_domain) {
276                 DRM_ERROR("Write domain conflict: "
277                           "obj %p target %d offset %d "
278                           "new %08x old %08x\n",
279                           obj, reloc->target_handle,
280                           (int) reloc->offset,
281                           reloc->write_domain,
282                           target_obj->pending_write_domain);
283                 goto err;
284         }
285
286         target_obj->pending_read_domains |= reloc->read_domains;
287         target_obj->pending_write_domain |= reloc->write_domain;
288
289         /* If the relocation already has the right value in it, no
290          * more work needs to be done.
291          */
292         if (target_offset == reloc->presumed_offset)
293                 goto out;
294
295         /* Check that the relocation address is valid... */
296         if (reloc->offset > obj->base.size - 4) {
297                 DRM_ERROR("Relocation beyond object bounds: "
298                           "obj %p target %d offset %d size %d.\n",
299                           obj, reloc->target_handle,
300                           (int) reloc->offset,
301                           (int) obj->base.size);
302                 goto err;
303         }
304         if (reloc->offset & 3) {
305                 DRM_ERROR("Relocation not 4-byte aligned: "
306                           "obj %p target %d offset %d.\n",
307                           obj, reloc->target_handle,
308                           (int) reloc->offset);
309                 goto err;
310         }
311
312         /* and points to somewhere within the target object. */
313         if (reloc->delta >= target_obj->size) {
314                 DRM_ERROR("Relocation beyond target object bounds: "
315                           "obj %p target %d delta %d size %d.\n",
316                           obj, reloc->target_handle,
317                           (int) reloc->delta,
318                           (int) target_obj->size);
319                 goto err;
320         }
321
322         reloc->delta += target_offset;
323         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
324                 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
325                 char *vaddr;
326
327                 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
328                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
329                 kunmap_atomic(vaddr);
330         } else {
331                 struct drm_i915_private *dev_priv = dev->dev_private;
332                 uint32_t __iomem *reloc_entry;
333                 void __iomem *reloc_page;
334
335                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
336                 if (ret)
337                         goto err;
338
339                 /* Map the page containing the relocation we're going to perform.  */
340                 reloc->offset += obj->gtt_offset;
341                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
342                                                       reloc->offset & PAGE_MASK);
343                 reloc_entry = (uint32_t __iomem *)
344                         (reloc_page + (reloc->offset & ~PAGE_MASK));
345                 iowrite32(reloc->delta, reloc_entry);
346                 io_mapping_unmap_atomic(reloc_page);
347         }
348
349         /* and update the user's relocation entry */
350         reloc->presumed_offset = target_offset;
351
352 out:
353         ret = 0;
354 err:
355         drm_gem_object_unreference(target_obj);
356         return ret;
357 }
358
359 static int
360 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
361                                     struct drm_file *file_priv,
362                                     struct drm_i915_gem_exec_object2 *entry)
363 {
364         struct drm_i915_gem_relocation_entry __user *user_relocs;
365         int i, ret;
366
367         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
368         for (i = 0; i < entry->relocation_count; i++) {
369                 struct drm_i915_gem_relocation_entry reloc;
370
371                 if (__copy_from_user_inatomic(&reloc,
372                                               user_relocs+i,
373                                               sizeof(reloc)))
374                         return -EFAULT;
375
376                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
377                 if (ret)
378                         return ret;
379
380                 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
381                                             &reloc.presumed_offset,
382                                             sizeof(reloc.presumed_offset)))
383                         return -EFAULT;
384         }
385
386         return 0;
387 }
388
389 static int
390 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
391                                          struct drm_file *file_priv,
392                                          struct drm_i915_gem_exec_object2 *entry,
393                                          struct drm_i915_gem_relocation_entry *relocs)
394 {
395         int i, ret;
396
397         for (i = 0; i < entry->relocation_count; i++) {
398                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
399                 if (ret)
400                         return ret;
401         }
402
403         return 0;
404 }
405
406 static int
407 i915_gem_execbuffer_relocate(struct drm_device *dev,
408                              struct drm_file *file,
409                              struct list_head *objects,
410                              struct drm_i915_gem_exec_object2 *exec)
411 {
412         struct drm_i915_gem_object *obj;
413         int ret;
414
415         list_for_each_entry(obj, objects, exec_list) {
416                 obj->base.pending_read_domains = 0;
417                 obj->base.pending_write_domain = 0;
418                 ret = i915_gem_execbuffer_relocate_object(obj, file, exec++);
419                 if (ret)
420                         return ret;
421         }
422
423         return 0;
424 }
425
426 static int
427 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
428                             struct drm_file *file,
429                             struct list_head *objects,
430                             struct drm_i915_gem_exec_object2 *exec)
431 {
432         struct drm_i915_gem_object *obj;
433         struct drm_i915_gem_exec_object2 *entry;
434         int ret, retry;
435         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
436
437         /* Attempt to pin all of the buffers into the GTT.
438          * This is done in 3 phases:
439          *
440          * 1a. Unbind all objects that do not match the GTT constraints for
441          *     the execbuffer (fenceable, mappable, alignment etc).
442          * 1b. Increment pin count for already bound objects.
443          * 2.  Bind new objects.
444          * 3.  Decrement pin count.
445          *
446          * This avoid unnecessary unbinding of later objects in order to makr
447          * room for the earlier objects *unless* we need to defragment.
448          */
449         retry = 0;
450         do {
451                 ret = 0;
452
453                 /* Unbind any ill-fitting objects or pin. */
454                 entry = exec;
455                 list_for_each_entry(obj, objects, exec_list) {
456                         bool need_fence, need_mappable;
457
458                         if (!obj->gtt_space) {
459                                 entry++;
460                                 continue;
461                         }
462
463                         need_fence =
464                                 has_fenced_gpu_access &&
465                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
466                                 obj->tiling_mode != I915_TILING_NONE;
467                         need_mappable =
468                                 entry->relocation_count ? true : need_fence;
469
470                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
471                             (need_mappable && !obj->map_and_fenceable))
472                                 ret = i915_gem_object_unbind(obj);
473                         else
474                                 ret = i915_gem_object_pin(obj,
475                                                           entry->alignment,
476                                                           need_mappable);
477                         if (ret)
478                                 goto err;
479
480                         entry++;
481                 }
482
483                 /* Bind fresh objects */
484                 entry = exec;
485                 list_for_each_entry(obj, objects, exec_list) {
486                         bool need_fence;
487
488                         need_fence =
489                                 has_fenced_gpu_access &&
490                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
491                                 obj->tiling_mode != I915_TILING_NONE;
492
493                         if (!obj->gtt_space) {
494                                 bool need_mappable =
495                                         entry->relocation_count ? true : need_fence;
496
497                                 ret = i915_gem_object_pin(obj,
498                                                           entry->alignment,
499                                                           need_mappable);
500                                 if (ret)
501                                         break;
502                         }
503
504                         if (has_fenced_gpu_access) {
505                                 if (need_fence) {
506                                         ret = i915_gem_object_get_fence(obj, ring, 1);
507                                         if (ret)
508                                                 break;
509                                 } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
510                                            obj->tiling_mode == I915_TILING_NONE) {
511                                         /* XXX pipelined! */
512                                         ret = i915_gem_object_put_fence(obj);
513                                         if (ret)
514                                                 break;
515                                 }
516                                 obj->pending_fenced_gpu_access = need_fence;
517                         }
518
519                         entry->offset = obj->gtt_offset;
520                         entry++;
521                 }
522
523                 /* Decrement pin count for bound objects */
524                 list_for_each_entry(obj, objects, exec_list) {
525                         if (obj->gtt_space)
526                                 i915_gem_object_unpin(obj);
527                 }
528
529                 if (ret != -ENOSPC || retry > 1)
530                         return ret;
531
532                 /* First attempt, just clear anything that is purgeable.
533                  * Second attempt, clear the entire GTT.
534                  */
535                 ret = i915_gem_evict_everything(ring->dev, retry == 0);
536                 if (ret)
537                         return ret;
538
539                 retry++;
540         } while (1);
541
542 err:
543         obj = list_entry(obj->exec_list.prev,
544                          struct drm_i915_gem_object,
545                          exec_list);
546         while (objects != &obj->exec_list) {
547                 if (obj->gtt_space)
548                         i915_gem_object_unpin(obj);
549
550                 obj = list_entry(obj->exec_list.prev,
551                                  struct drm_i915_gem_object,
552                                  exec_list);
553         }
554
555         return ret;
556 }
557
558 static int
559 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
560                                   struct drm_file *file,
561                                   struct intel_ring_buffer *ring,
562                                   struct list_head *objects,
563                                   struct drm_i915_gem_exec_object2 *exec,
564                                   int count)
565 {
566         struct drm_i915_gem_relocation_entry *reloc;
567         struct drm_i915_gem_object *obj;
568         int i, total, ret;
569
570         mutex_unlock(&dev->struct_mutex);
571
572         total = 0;
573         for (i = 0; i < count; i++)
574                 total += exec[i].relocation_count;
575
576         reloc = drm_malloc_ab(total, sizeof(*reloc));
577         if (reloc == NULL) {
578                 mutex_lock(&dev->struct_mutex);
579                 return -ENOMEM;
580         }
581
582         total = 0;
583         for (i = 0; i < count; i++) {
584                 struct drm_i915_gem_relocation_entry __user *user_relocs;
585
586                 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
587
588                 if (copy_from_user(reloc+total, user_relocs,
589                                    exec[i].relocation_count * sizeof(*reloc))) {
590                         ret = -EFAULT;
591                         mutex_lock(&dev->struct_mutex);
592                         goto err;
593                 }
594
595                 total += exec[i].relocation_count;
596         }
597
598         ret = i915_mutex_lock_interruptible(dev);
599         if (ret) {
600                 mutex_lock(&dev->struct_mutex);
601                 goto err;
602         }
603
604         ret = i915_gem_execbuffer_reserve(ring, file, objects, exec);
605         if (ret)
606                 goto err;
607
608         total = 0;
609         list_for_each_entry(obj, objects, exec_list) {
610                 obj->base.pending_read_domains = 0;
611                 obj->base.pending_write_domain = 0;
612                 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
613                                                                exec,
614                                                                reloc + total);
615                 if (ret)
616                         goto err;
617
618                 total += exec->relocation_count;
619                 exec++;
620         }
621
622         /* Leave the user relocations as are, this is the painfully slow path,
623          * and we want to avoid the complication of dropping the lock whilst
624          * having buffers reserved in the aperture and so causing spurious
625          * ENOSPC for random operations.
626          */
627
628 err:
629         drm_free_large(reloc);
630         return ret;
631 }
632
633 static void
634 i915_gem_execbuffer_flush(struct drm_device *dev,
635                           uint32_t invalidate_domains,
636                           uint32_t flush_domains,
637                           uint32_t flush_rings)
638 {
639         drm_i915_private_t *dev_priv = dev->dev_private;
640         int i;
641
642         if (flush_domains & I915_GEM_DOMAIN_CPU)
643                 intel_gtt_chipset_flush();
644
645         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
646                 for (i = 0; i < I915_NUM_RINGS; i++)
647                         if (flush_rings & (1 << i))
648                                 i915_gem_flush_ring(dev, &dev_priv->ring[i],
649                                                     invalidate_domains,
650                                                     flush_domains);
651         }
652 }
653
654 static int
655 i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
656                                struct intel_ring_buffer *to)
657 {
658         struct intel_ring_buffer *from = obj->ring;
659         u32 seqno;
660         int ret, idx;
661
662         if (from == NULL || to == from)
663                 return 0;
664
665         if (INTEL_INFO(obj->base.dev)->gen < 6)
666                 return i915_gem_object_wait_rendering(obj, true);
667
668         idx = intel_ring_sync_index(from, to);
669
670         seqno = obj->last_rendering_seqno;
671         if (seqno <= from->sync_seqno[idx])
672                 return 0;
673
674         if (seqno == from->outstanding_lazy_request) {
675                 struct drm_i915_gem_request *request;
676
677                 request = kzalloc(sizeof(*request), GFP_KERNEL);
678                 if (request == NULL)
679                         return -ENOMEM;
680
681                 ret = i915_add_request(obj->base.dev, NULL, request, from);
682                 if (ret) {
683                         kfree(request);
684                         return ret;
685                 }
686
687                 seqno = request->seqno;
688         }
689
690         from->sync_seqno[idx] = seqno;
691         return intel_ring_sync(to, from, seqno - 1);
692 }
693
694 static int
695 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
696                                 struct list_head *objects)
697 {
698         struct drm_i915_gem_object *obj;
699         struct change_domains cd;
700         int ret;
701
702         cd.invalidate_domains = 0;
703         cd.flush_domains = 0;
704         cd.flush_rings = 0;
705         list_for_each_entry(obj, objects, exec_list)
706                 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
707
708         if (cd.invalidate_domains | cd.flush_domains) {
709 #if WATCH_EXEC
710                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
711                           __func__,
712                          cd.invalidate_domains,
713                          cd.flush_domains);
714 #endif
715                 i915_gem_execbuffer_flush(ring->dev,
716                                           cd.invalidate_domains,
717                                           cd.flush_domains,
718                                           cd.flush_rings);
719         }
720
721         list_for_each_entry(obj, objects, exec_list) {
722                 ret = i915_gem_execbuffer_sync_rings(obj, ring);
723                 if (ret)
724                         return ret;
725         }
726
727         return 0;
728 }
729
730 static bool
731 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
732 {
733         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
734 }
735
736 static int
737 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
738                    int count)
739 {
740         int i;
741
742         for (i = 0; i < count; i++) {
743                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
744                 int length; /* limited by fault_in_pages_readable() */
745
746                 /* First check for malicious input causing overflow */
747                 if (exec[i].relocation_count >
748                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
749                         return -EINVAL;
750
751                 length = exec[i].relocation_count *
752                         sizeof(struct drm_i915_gem_relocation_entry);
753                 if (!access_ok(VERIFY_READ, ptr, length))
754                         return -EFAULT;
755
756                 /* we may also need to update the presumed offsets */
757                 if (!access_ok(VERIFY_WRITE, ptr, length))
758                         return -EFAULT;
759
760                 if (fault_in_pages_readable(ptr, length))
761                         return -EFAULT;
762         }
763
764         return 0;
765 }
766
767 static int
768 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
769                                    struct list_head *objects)
770 {
771         struct drm_i915_gem_object *obj;
772         int flips;
773
774         /* Check for any pending flips. As we only maintain a flip queue depth
775          * of 1, we can simply insert a WAIT for the next display flip prior
776          * to executing the batch and avoid stalling the CPU.
777          */
778         flips = 0;
779         list_for_each_entry(obj, objects, exec_list) {
780                 if (obj->base.write_domain)
781                         flips |= atomic_read(&obj->pending_flip);
782         }
783         if (flips) {
784                 int plane, flip_mask, ret;
785
786                 for (plane = 0; flips >> plane; plane++) {
787                         if (((flips >> plane) & 1) == 0)
788                                 continue;
789
790                         if (plane)
791                                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
792                         else
793                                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
794
795                         ret = intel_ring_begin(ring, 2);
796                         if (ret)
797                                 return ret;
798
799                         intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
800                         intel_ring_emit(ring, MI_NOOP);
801                         intel_ring_advance(ring);
802                 }
803         }
804
805         return 0;
806 }
807
808 static void
809 i915_gem_execbuffer_move_to_active(struct list_head *objects,
810                                    struct intel_ring_buffer *ring,
811                                    u32 seqno)
812 {
813         struct drm_i915_gem_object *obj;
814
815         list_for_each_entry(obj, objects, exec_list) {
816                 obj->base.read_domains = obj->base.pending_read_domains;
817                 obj->base.write_domain = obj->base.pending_write_domain;
818                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
819
820                 i915_gem_object_move_to_active(obj, ring, seqno);
821                 if (obj->base.write_domain) {
822                         obj->dirty = 1;
823                         obj->pending_gpu_write = true;
824                         list_move_tail(&obj->gpu_write_list,
825                                        &ring->gpu_write_list);
826                         intel_mark_busy(ring->dev, obj);
827                 }
828
829                 trace_i915_gem_object_change_domain(obj,
830                                                     obj->base.read_domains,
831                                                     obj->base.write_domain);
832         }
833 }
834
835 static void
836 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
837                                     struct drm_file *file,
838                                     struct intel_ring_buffer *ring)
839 {
840         struct drm_i915_gem_request *request;
841         u32 flush_domains;
842
843         /*
844          * Ensure that the commands in the batch buffer are
845          * finished before the interrupt fires.
846          *
847          * The sampler always gets flushed on i965 (sigh).
848          */
849         flush_domains = 0;
850         if (INTEL_INFO(dev)->gen >= 4)
851                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
852
853         ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
854
855         /* Add a breadcrumb for the completion of the batch buffer */
856         request = kzalloc(sizeof(*request), GFP_KERNEL);
857         if (request == NULL || i915_add_request(dev, file, request, ring)) {
858                 i915_gem_next_request_seqno(dev, ring);
859                 kfree(request);
860         }
861 }
862
863 static int
864 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
865                        struct drm_file *file,
866                        struct drm_i915_gem_execbuffer2 *args,
867                        struct drm_i915_gem_exec_object2 *exec)
868 {
869         drm_i915_private_t *dev_priv = dev->dev_private;
870         struct list_head objects;
871         struct drm_i915_gem_object *batch_obj;
872         struct drm_clip_rect *cliprects = NULL;
873         struct intel_ring_buffer *ring;
874         u32 exec_start, exec_len;
875         u32 seqno;
876         int ret, i;
877
878         if (!i915_gem_check_execbuffer(args)) {
879                 DRM_ERROR("execbuf with invalid offset/length\n");
880                 return -EINVAL;
881         }
882
883         ret = validate_exec_list(exec, args->buffer_count);
884         if (ret)
885                 return ret;
886
887 #if WATCH_EXEC
888         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
889                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
890 #endif
891         switch (args->flags & I915_EXEC_RING_MASK) {
892         case I915_EXEC_DEFAULT:
893         case I915_EXEC_RENDER:
894                 ring = &dev_priv->ring[RCS];
895                 break;
896         case I915_EXEC_BSD:
897                 if (!HAS_BSD(dev)) {
898                         DRM_ERROR("execbuf with invalid ring (BSD)\n");
899                         return -EINVAL;
900                 }
901                 ring = &dev_priv->ring[VCS];
902                 break;
903         case I915_EXEC_BLT:
904                 if (!HAS_BLT(dev)) {
905                         DRM_ERROR("execbuf with invalid ring (BLT)\n");
906                         return -EINVAL;
907                 }
908                 ring = &dev_priv->ring[BCS];
909                 break;
910         default:
911                 DRM_ERROR("execbuf with unknown ring: %d\n",
912                           (int)(args->flags & I915_EXEC_RING_MASK));
913                 return -EINVAL;
914         }
915
916         if (args->buffer_count < 1) {
917                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
918                 return -EINVAL;
919         }
920
921         if (args->num_cliprects != 0) {
922                 if (ring != &dev_priv->ring[RCS]) {
923                         DRM_ERROR("clip rectangles are only valid with the render ring\n");
924                         return -EINVAL;
925                 }
926
927                 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
928                                     GFP_KERNEL);
929                 if (cliprects == NULL) {
930                         ret = -ENOMEM;
931                         goto pre_mutex_err;
932                 }
933
934                 if (copy_from_user(cliprects,
935                                      (struct drm_clip_rect __user *)(uintptr_t)
936                                      args->cliprects_ptr,
937                                      sizeof(*cliprects)*args->num_cliprects)) {
938                         ret = -EFAULT;
939                         goto pre_mutex_err;
940                 }
941         }
942
943         ret = i915_mutex_lock_interruptible(dev);
944         if (ret)
945                 goto pre_mutex_err;
946
947         if (dev_priv->mm.suspended) {
948                 mutex_unlock(&dev->struct_mutex);
949                 ret = -EBUSY;
950                 goto pre_mutex_err;
951         }
952
953         /* Look up object handles */
954         INIT_LIST_HEAD(&objects);
955         for (i = 0; i < args->buffer_count; i++) {
956                 struct drm_i915_gem_object *obj;
957
958                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
959                                                         exec[i].handle));
960                 if (obj == NULL) {
961                         DRM_ERROR("Invalid object handle %d at index %d\n",
962                                    exec[i].handle, i);
963                         /* prevent error path from reading uninitialized data */
964                         ret = -ENOENT;
965                         goto err;
966                 }
967
968                 if (!list_empty(&obj->exec_list)) {
969                         DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
970                                    obj, exec[i].handle, i);
971                         ret = -EINVAL;
972                         goto err;
973                 }
974
975                 list_add_tail(&obj->exec_list, &objects);
976         }
977
978         /* Move the objects en-masse into the GTT, evicting if necessary. */
979         ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec);
980         if (ret)
981                 goto err;
982
983         /* The objects are in their final locations, apply the relocations. */
984         ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec);
985         if (ret) {
986                 if (ret == -EFAULT) {
987                         ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
988                                                                 &objects, exec,
989                                                                 args->buffer_count);
990                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
991                 }
992                 if (ret)
993                         goto err;
994         }
995
996         /* Set the pending read domains for the batch buffer to COMMAND */
997         batch_obj = list_entry(objects.prev,
998                                struct drm_i915_gem_object,
999                                exec_list);
1000         if (batch_obj->base.pending_write_domain) {
1001                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
1002                 ret = -EINVAL;
1003                 goto err;
1004         }
1005         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1006
1007         ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1008         if (ret)
1009                 goto err;
1010
1011         ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
1012         if (ret)
1013                 goto err;
1014
1015         seqno = i915_gem_next_request_seqno(dev, ring);
1016         for (i = 0; i < I915_NUM_RINGS-1; i++) {
1017                 if (seqno < ring->sync_seqno[i]) {
1018                         /* The GPU can not handle its semaphore value wrapping,
1019                          * so every billion or so execbuffers, we need to stall
1020                          * the GPU in order to reset the counters.
1021                          */
1022                         ret = i915_gpu_idle(dev);
1023                         if (ret)
1024                                 goto err;
1025
1026                         BUG_ON(ring->sync_seqno[i]);
1027                 }
1028         }
1029
1030         exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1031         exec_len = args->batch_len;
1032         if (cliprects) {
1033                 for (i = 0; i < args->num_cliprects; i++) {
1034                         ret = i915_emit_box(dev, &cliprects[i],
1035                                             args->DR1, args->DR4);
1036                         if (ret)
1037                                 goto err;
1038
1039                         ret = ring->dispatch_execbuffer(ring,
1040                                                         exec_start, exec_len);
1041                         if (ret)
1042                                 goto err;
1043                 }
1044         } else {
1045                 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1046                 if (ret)
1047                         goto err;
1048         }
1049
1050         i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1051         i915_gem_execbuffer_retire_commands(dev, file, ring);
1052
1053 err:
1054         while (!list_empty(&objects)) {
1055                 struct drm_i915_gem_object *obj;
1056
1057                 obj = list_first_entry(&objects,
1058                                        struct drm_i915_gem_object,
1059                                        exec_list);
1060                 list_del_init(&obj->exec_list);
1061                 drm_gem_object_unreference(&obj->base);
1062         }
1063
1064         mutex_unlock(&dev->struct_mutex);
1065
1066 pre_mutex_err:
1067         kfree(cliprects);
1068         return ret;
1069 }
1070
1071 /*
1072  * Legacy execbuffer just creates an exec2 list from the original exec object
1073  * list array and passes it to the real function.
1074  */
1075 int
1076 i915_gem_execbuffer(struct drm_device *dev, void *data,
1077                     struct drm_file *file)
1078 {
1079         struct drm_i915_gem_execbuffer *args = data;
1080         struct drm_i915_gem_execbuffer2 exec2;
1081         struct drm_i915_gem_exec_object *exec_list = NULL;
1082         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1083         int ret, i;
1084
1085 #if WATCH_EXEC
1086         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1087                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1088 #endif
1089
1090         if (args->buffer_count < 1) {
1091                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1092                 return -EINVAL;
1093         }
1094
1095         /* Copy in the exec list from userland */
1096         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1097         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1098         if (exec_list == NULL || exec2_list == NULL) {
1099                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1100                           args->buffer_count);
1101                 drm_free_large(exec_list);
1102                 drm_free_large(exec2_list);
1103                 return -ENOMEM;
1104         }
1105         ret = copy_from_user(exec_list,
1106                              (struct drm_i915_relocation_entry __user *)
1107                              (uintptr_t) args->buffers_ptr,
1108                              sizeof(*exec_list) * args->buffer_count);
1109         if (ret != 0) {
1110                 DRM_ERROR("copy %d exec entries failed %d\n",
1111                           args->buffer_count, ret);
1112                 drm_free_large(exec_list);
1113                 drm_free_large(exec2_list);
1114                 return -EFAULT;
1115         }
1116
1117         for (i = 0; i < args->buffer_count; i++) {
1118                 exec2_list[i].handle = exec_list[i].handle;
1119                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1120                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1121                 exec2_list[i].alignment = exec_list[i].alignment;
1122                 exec2_list[i].offset = exec_list[i].offset;
1123                 if (INTEL_INFO(dev)->gen < 4)
1124                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1125                 else
1126                         exec2_list[i].flags = 0;
1127         }
1128
1129         exec2.buffers_ptr = args->buffers_ptr;
1130         exec2.buffer_count = args->buffer_count;
1131         exec2.batch_start_offset = args->batch_start_offset;
1132         exec2.batch_len = args->batch_len;
1133         exec2.DR1 = args->DR1;
1134         exec2.DR4 = args->DR4;
1135         exec2.num_cliprects = args->num_cliprects;
1136         exec2.cliprects_ptr = args->cliprects_ptr;
1137         exec2.flags = I915_EXEC_RENDER;
1138
1139         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1140         if (!ret) {
1141                 /* Copy the new buffer offsets back to the user's exec list. */
1142                 for (i = 0; i < args->buffer_count; i++)
1143                         exec_list[i].offset = exec2_list[i].offset;
1144                 /* ... and back out to userspace */
1145                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1146                                    (uintptr_t) args->buffers_ptr,
1147                                    exec_list,
1148                                    sizeof(*exec_list) * args->buffer_count);
1149                 if (ret) {
1150                         ret = -EFAULT;
1151                         DRM_ERROR("failed to copy %d exec entries "
1152                                   "back to user (%d)\n",
1153                                   args->buffer_count, ret);
1154                 }
1155         }
1156
1157         drm_free_large(exec_list);
1158         drm_free_large(exec2_list);
1159         return ret;
1160 }
1161
1162 int
1163 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1164                      struct drm_file *file)
1165 {
1166         struct drm_i915_gem_execbuffer2 *args = data;
1167         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1168         int ret;
1169
1170 #if WATCH_EXEC
1171         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1172                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1173 #endif
1174
1175         if (args->buffer_count < 1) {
1176                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
1177                 return -EINVAL;
1178         }
1179
1180         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1181         if (exec2_list == NULL) {
1182                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1183                           args->buffer_count);
1184                 return -ENOMEM;
1185         }
1186         ret = copy_from_user(exec2_list,
1187                              (struct drm_i915_relocation_entry __user *)
1188                              (uintptr_t) args->buffers_ptr,
1189                              sizeof(*exec2_list) * args->buffer_count);
1190         if (ret != 0) {
1191                 DRM_ERROR("copy %d exec entries failed %d\n",
1192                           args->buffer_count, ret);
1193                 drm_free_large(exec2_list);
1194                 return -EFAULT;
1195         }
1196
1197         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1198         if (!ret) {
1199                 /* Copy the new buffer offsets back to the user's exec list. */
1200                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1201                                    (uintptr_t) args->buffers_ptr,
1202                                    exec2_list,
1203                                    sizeof(*exec2_list) * args->buffer_count);
1204                 if (ret) {
1205                         ret = -EFAULT;
1206                         DRM_ERROR("failed to copy %d exec entries "
1207                                   "back to user (%d)\n",
1208                                   args->buffer_count, ret);
1209                 }
1210         }
1211
1212         drm_free_large(exec2_list);
1213         return ret;
1214 }
1215