drm/i915: Avoid might_fault during pwrite whilst holding our mutex
[pandora-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
index 6e85496..eba9b16 100644 (file)
@@ -268,9 +268,9 @@ fast_shmem_read(struct page **pages,
        char *vaddr;
        int ret;
 
-       vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
+       vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
        ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
-       kunmap_atomic(vaddr, KM_USER0);
+       kunmap_atomic(vaddr);
 
        return ret;
 }
@@ -619,10 +619,10 @@ fast_user_write(struct io_mapping *mapping,
        char *vaddr_atomic;
        unsigned long unwritten;
 
-       vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
+       vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
        unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
                                                      user_data, length);
-       io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
+       io_mapping_unmap_atomic(vaddr_atomic);
        return unwritten;
 }
 
@@ -659,9 +659,9 @@ fast_shmem_write(struct page **pages,
        char *vaddr;
        int ret;
 
-       vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
+       vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
        ret = __copy_from_user_inatomic(vaddr + page_offset, data, length);
-       kunmap_atomic(vaddr, KM_USER0);
+       kunmap_atomic(vaddr);
 
        return ret;
 }
@@ -1657,12 +1657,11 @@ i915_gem_process_flushing_list(struct drm_device *dev,
        struct drm_i915_gem_object *obj_priv, *next;
 
        list_for_each_entry_safe(obj_priv, next,
-                                &dev_priv->mm.gpu_write_list,
+                                &ring->gpu_write_list,
                                 gpu_write_list) {
                struct drm_gem_object *obj = &obj_priv->base;
 
-               if (obj->write_domain & flush_domains &&
-                   obj_priv->ring == ring) {
+               if (obj->write_domain & flush_domains) {
                        uint32_t old_write_domain = obj->write_domain;
 
                        obj->write_domain = 0;
@@ -1800,6 +1799,7 @@ void i915_gem_reset(struct drm_device *dev)
 
        i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
        i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
+       i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
 
        /* Remove anything from the flushing lists. The GPU cache is likely
         * to be lost on reset along with the data, so simply move the
@@ -1922,6 +1922,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 
        i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
        i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
+       i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
 }
 
 static void
@@ -1944,7 +1945,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 
        if (!dev_priv->mm.suspended &&
                (!list_empty(&dev_priv->render_ring.request_list) ||
-                !list_empty(&dev_priv->bsd_ring.request_list)))
+                !list_empty(&dev_priv->bsd_ring.request_list) ||
+                !list_empty(&dev_priv->blt_ring.request_list)))
                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
        mutex_unlock(&dev->struct_mutex);
 }
@@ -2063,6 +2065,10 @@ i915_gem_flush(struct drm_device *dev,
                        i915_gem_flush_ring(dev, file_priv,
                                            &dev_priv->bsd_ring,
                                            invalidate_domains, flush_domains);
+               if (flush_rings & RING_BLT)
+                       i915_gem_flush_ring(dev, file_priv,
+                                           &dev_priv->blt_ring,
+                                           invalidate_domains, flush_domains);
        }
 }
 
@@ -2166,6 +2172,9 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 static int i915_ring_idle(struct drm_device *dev,
                          struct intel_ring_buffer *ring)
 {
+       if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
+               return 0;
+
        i915_gem_flush_ring(dev, NULL, ring,
                            I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
        return i915_wait_request(dev,
@@ -2181,8 +2190,7 @@ i915_gpu_idle(struct drm_device *dev)
        int ret;
 
        lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
-                      list_empty(&dev_priv->render_ring.active_list) &&
-                      list_empty(&dev_priv->bsd_ring.active_list));
+                      list_empty(&dev_priv->mm.active_list));
        if (lists_empty)
                return 0;
 
@@ -2195,6 +2203,10 @@ i915_gpu_idle(struct drm_device *dev)
        if (ret)
                return ret;
 
+       ret = i915_ring_idle(dev, &dev_priv->blt_ring);
+       if (ret)
+               return ret;
+
        return 0;
 }
 
@@ -3066,7 +3078,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
  *             drm_agp_chipset_flush
  */
 static void
-i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
+i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
+                                 struct intel_ring_buffer *ring)
 {
        struct drm_device               *dev = obj->dev;
        struct drm_i915_private         *dev_priv = dev->dev_private;
@@ -3093,7 +3106,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
         * write domain
         */
        if (obj->write_domain &&
-           obj->write_domain != obj->pending_read_domains) {
+           (obj->write_domain != obj->pending_read_domains ||
+            obj_priv->ring != ring)) {
                flush_domains |= obj->write_domain;
                invalidate_domains |=
                        obj->pending_read_domains & ~obj->write_domain;
@@ -3120,8 +3134,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
 
        dev->invalidate_domains |= invalidate_domains;
        dev->flush_domains |= flush_domains;
-       if (obj_priv->ring)
+       if (flush_domains & I915_GEM_GPU_DOMAINS)
                dev_priv->mm.flush_rings |= obj_priv->ring->id;
+       if (invalidate_domains & I915_GEM_GPU_DOMAINS)
+               dev_priv->mm.flush_rings |= ring->id;
 
        trace_i915_gem_object_change_domain(obj,
                                            old_read_domains,
@@ -3337,7 +3353,7 @@ i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
                }
 
                target_obj->pending_read_domains |= reloc.read_domains;
-               target_obj->pending_write_domain = reloc.write_domain;
+               target_obj->pending_write_domain |= reloc.write_domain;
 
                /* If the relocation already has the right value in it, no
                 * more work needs to be done.
@@ -3378,9 +3394,9 @@ i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
                        uint32_t page_offset = reloc.offset & ~PAGE_MASK;
                        char *vaddr;
 
-                       vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT], KM_USER0);
+                       vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT]);
                        *(uint32_t *)(vaddr + page_offset) = reloc.delta;
-                       kunmap_atomic(vaddr, KM_USER0);
+                       kunmap_atomic(vaddr);
                } else {
                        uint32_t __iomem *reloc_entry;
                        void __iomem *reloc_page;
@@ -3392,12 +3408,20 @@ i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
                        /* Map the page containing the relocation we're going to perform.  */
                        reloc.offset += obj->gtt_offset;
                        reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-                                                             reloc.offset & PAGE_MASK,
-                                                             KM_USER0);
+                                                             reloc.offset & PAGE_MASK);
                        reloc_entry = (uint32_t __iomem *)
                                (reloc_page + (reloc.offset & ~PAGE_MASK));
                        iowrite32(reloc.delta, reloc_entry);
-                       io_mapping_unmap_atomic(reloc_page, KM_USER0);
+                       io_mapping_unmap_atomic(reloc_page);
+               }
+
+               /* and update the user's relocation entry */
+               reloc.presumed_offset = target_offset;
+               if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
+                                             &reloc.presumed_offset,
+                                             sizeof(reloc.presumed_offset))) {
+                   ret = -EFAULT;
+                   break;
                }
        }
 
@@ -3472,6 +3496,52 @@ i915_gem_execbuffer_pin(struct drm_device *dev,
        return 0;
 }
 
+static int
+i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
+                               struct drm_file *file,
+                               struct intel_ring_buffer *ring,
+                               struct drm_gem_object **objects,
+                               int count)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int ret, i;
+
+       /* Zero the global flush/invalidate flags. These
+        * will be modified as new domains are computed
+        * for each object
+        */
+       dev->invalidate_domains = 0;
+       dev->flush_domains = 0;
+       dev_priv->mm.flush_rings = 0;
+       for (i = 0; i < count; i++)
+               i915_gem_object_set_to_gpu_domain(objects[i], ring);
+
+       if (dev->invalidate_domains | dev->flush_domains) {
+#if WATCH_EXEC
+               DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
+                         __func__,
+                        dev->invalidate_domains,
+                        dev->flush_domains);
+#endif
+               i915_gem_flush(dev, file,
+                              dev->invalidate_domains,
+                              dev->flush_domains,
+                              dev_priv->mm.flush_rings);
+       }
+
+       for (i = 0; i < count; i++) {
+               struct drm_i915_gem_object *obj = to_intel_bo(objects[i]);
+               /* XXX replace with semaphores */
+               if (obj->ring && ring != obj->ring) {
+                       ret = i915_gem_object_wait_rendering(&obj->base, true);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
@@ -3560,6 +3630,10 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
                if (!access_ok(VERIFY_READ, ptr, length))
                        return -EFAULT;
 
+               /* we may also need to update the presumed offsets */
+               if (!access_ok(VERIFY_WRITE, ptr, length))
+                       return -EFAULT;
+
                if (fault_in_pages_readable(ptr, length))
                        return -EFAULT;
        }
@@ -3596,14 +3670,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
        DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
                  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
 #endif
-       if (args->flags & I915_EXEC_BSD) {
+       switch (args->flags & I915_EXEC_RING_MASK) {
+       case I915_EXEC_DEFAULT:
+       case I915_EXEC_RENDER:
+               ring = &dev_priv->render_ring;
+               break;
+       case I915_EXEC_BSD:
                if (!HAS_BSD(dev)) {
-                       DRM_ERROR("execbuf with wrong flag\n");
+                       DRM_ERROR("execbuf with invalid ring (BSD)\n");
                        return -EINVAL;
                }
                ring = &dev_priv->bsd_ring;
-       } else {
-               ring = &dev_priv->render_ring;
+               break;
+       case I915_EXEC_BLT:
+               if (!HAS_BLT(dev)) {
+                       DRM_ERROR("execbuf with invalid ring (BLT)\n");
+                       return -EINVAL;
+               }
+               ring = &dev_priv->blt_ring;
+               break;
+       default:
+               DRM_ERROR("execbuf with unknown ring: %d\n",
+                         (int)(args->flags & I915_EXEC_RING_MASK));
+               return -EINVAL;
        }
 
        if (args->buffer_count < 1) {
@@ -3713,44 +3802,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                goto err;
        }
 
-       /* Zero the global flush/invalidate flags. These
-        * will be modified as new domains are computed
-        * for each object
-        */
-       dev->invalidate_domains = 0;
-       dev->flush_domains = 0;
-       dev_priv->mm.flush_rings = 0;
-
-       for (i = 0; i < args->buffer_count; i++) {
-               struct drm_gem_object *obj = object_list[i];
-
-               /* Compute new gpu domains and update invalidate/flush */
-               i915_gem_object_set_to_gpu_domain(obj);
-       }
-
-       if (dev->invalidate_domains | dev->flush_domains) {
-#if WATCH_EXEC
-               DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
-                         __func__,
-                        dev->invalidate_domains,
-                        dev->flush_domains);
-#endif
-               i915_gem_flush(dev, file,
-                              dev->invalidate_domains,
-                              dev->flush_domains,
-                              dev_priv->mm.flush_rings);
-       }
+       ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring,
+                                             object_list, args->buffer_count);
+       if (ret)
+               goto err;
 
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
-               struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
                uint32_t old_write_domain = obj->write_domain;
-
                obj->write_domain = obj->pending_write_domain;
-               if (obj->write_domain)
-                       list_move_tail(&obj_priv->gpu_write_list,
-                                      &dev_priv->mm.gpu_write_list);
-
                trace_i915_gem_object_change_domain(obj,
                                                    obj->read_domains,
                                                    old_write_domain);
@@ -3815,9 +3875,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
-               obj_priv = to_intel_bo(obj);
 
                i915_gem_object_move_to_active(obj, ring);
+               if (obj->write_domain)
+                       list_move_tail(&to_intel_bo(obj)->gpu_write_list,
+                                      &ring->gpu_write_list);
        }
 
        i915_add_request(dev, file, request, ring);
@@ -4469,10 +4531,18 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
                        goto cleanup_render_ring;
        }
 
+       if (HAS_BLT(dev)) {
+               ret = intel_init_blt_ring_buffer(dev);
+               if (ret)
+                       goto cleanup_bsd_ring;
+       }
+
        dev_priv->next_seqno = 1;
 
        return 0;
 
+cleanup_bsd_ring:
+       intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
 cleanup_render_ring:
        intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
 cleanup_pipe_control:
@@ -4488,6 +4558,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 
        intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
        intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
+       intel_cleanup_ring_buffer(dev, &dev_priv->blt_ring);
        if (HAS_PIPE_CONTROL(dev))
                i915_gem_cleanup_pipe_control(dev);
 }
@@ -4519,10 +4590,12 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
        BUG_ON(!list_empty(&dev_priv->mm.active_list));
        BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
        BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
+       BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
        BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
        BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
        BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
        BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
+       BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
        mutex_unlock(&dev->struct_mutex);
 
        ret = drm_irq_install(dev);
@@ -4564,6 +4637,14 @@ i915_gem_lastclose(struct drm_device *dev)
                DRM_ERROR("failed to idle hardware: %d\n", ret);
 }
 
+static void
+init_ring_lists(struct intel_ring_buffer *ring)
+{
+       INIT_LIST_HEAD(&ring->active_list);
+       INIT_LIST_HEAD(&ring->request_list);
+       INIT_LIST_HEAD(&ring->gpu_write_list);
+}
+
 void
 i915_gem_load(struct drm_device *dev)
 {
@@ -4572,15 +4653,13 @@ i915_gem_load(struct drm_device *dev)
 
        INIT_LIST_HEAD(&dev_priv->mm.active_list);
        INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
-       INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
        INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
        INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
        INIT_LIST_HEAD(&dev_priv->mm.fence_list);
        INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
-       INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
-       INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
-       INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
-       INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
+       init_ring_lists(&dev_priv->render_ring);
+       init_ring_lists(&dev_priv->bsd_ring);
+       init_ring_lists(&dev_priv->blt_ring);
        for (i = 0; i < 16; i++)
                INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
        INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
@@ -4718,11 +4797,11 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
        page_count = obj->size / PAGE_SIZE;
 
        for (i = 0; i < page_count; i++) {
-               char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
+               char *dst = kmap_atomic(obj_priv->pages[i]);
                char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
 
                memcpy(dst, src, PAGE_SIZE);
-               kunmap_atomic(dst, KM_USER0);
+               kunmap_atomic(dst);
        }
        drm_clflush_pages(obj_priv->pages, page_count);
        drm_agp_chipset_flush(dev);
@@ -4779,11 +4858,11 @@ i915_gem_attach_phys_object(struct drm_device *dev,
        page_count = obj->size / PAGE_SIZE;
 
        for (i = 0; i < page_count; i++) {
-               char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
+               char *src = kmap_atomic(obj_priv->pages[i]);
                char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
 
                memcpy(dst, src, PAGE_SIZE);
-               kunmap_atomic(src, KM_USER0);
+               kunmap_atomic(src);
        }
 
        i915_gem_object_put_pages(obj);
@@ -4799,17 +4878,24 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
                     struct drm_file *file_priv)
 {
        struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-       void *obj_addr;
-       int ret;
-       char __user *user_data;
+       void *vaddr = obj_priv->phys_obj->handle->vaddr + args->offset;
+       char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
 
-       user_data = (char __user *) (uintptr_t) args->data_ptr;
-       obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
+       DRM_DEBUG_DRIVER("vaddr %p, %lld\n", vaddr, args->size);
 
-       DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
-       ret = copy_from_user(obj_addr, user_data, args->size);
-       if (ret)
-               return -EFAULT;
+       if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
+               unsigned long unwritten;
+
+               /* The physical object once assigned is fixed for the lifetime
+                * of the obj, so we can safely drop the lock and continue
+                * to access vaddr.
+                */
+               mutex_unlock(&dev->struct_mutex);
+               unwritten = copy_from_user(vaddr, user_data, args->size);
+               mutex_lock(&dev->struct_mutex);
+               if (unwritten)
+                       return -EFAULT;
+       }
 
        drm_agp_chipset_flush(dev);
        return 0;
@@ -4843,8 +4929,7 @@ i915_gpu_is_active(struct drm_device *dev)
        int lists_empty;
 
        lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
-                     list_empty(&dev_priv->render_ring.active_list) &&
-                     list_empty(&dev_priv->bsd_ring.active_list);
+                     list_empty(&dev_priv->mm.active_list);
 
        return !lists_empty;
 }