Merge branch 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied...
[pandora-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
index a087e1b..d1cd8b8 100644 (file)
@@ -1763,8 +1763,11 @@ i915_add_request(struct intel_ring_buffer *ring,
        ring->outstanding_lazy_request = false;
 
        if (!dev_priv->mm.suspended) {
-               mod_timer(&dev_priv->hangcheck_timer,
-                         jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+               if (i915_enable_hangcheck) {
+                       mod_timer(&dev_priv->hangcheck_timer,
+                                 jiffies +
+                                 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+               }
                if (was_empty)
                        queue_delayed_work(dev_priv->wq,
                                           &dev_priv->mm.retire_work, HZ);
@@ -2135,6 +2138,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
        return 0;
 }
 
+static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+{
+       u32 old_write_domain, old_read_domains;
+
+       /* Act a barrier for all accesses through the GTT */
+       mb();
+
+       /* Force a pagefault for domain tracking on next user access */
+       i915_gem_release_mmap(obj);
+
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+               return;
+
+       old_read_domains = obj->base.read_domains;
+       old_write_domain = obj->base.write_domain;
+
+       obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
+       obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+}
+
 /**
  * Unbinds an object from the GTT aperture.
  */
@@ -2151,23 +2178,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
                return -EINVAL;
        }
 
-       /* blow away mappings if mapped through GTT */
-       i915_gem_release_mmap(obj);
-
-       /* Move the object to the CPU domain to ensure that
-        * any possible CPU writes while it's not in the GTT
-        * are flushed when we go to remap it. This will
-        * also ensure that all pending GPU writes are finished
-        * before we unbind.
-        */
-       ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       ret = i915_gem_object_finish_gpu(obj);
        if (ret == -ERESTARTSYS)
                return ret;
        /* Continue on if we fail due to EIO, the GPU is hung so we
         * should be safe and we need to cleanup or else we might
         * cause memory corruption through use-after-free.
         */
+
+       i915_gem_object_finish_gtt(obj);
+
+       /* Move the object to the CPU domain to ensure that
+        * any possible CPU writes while it's not in the GTT
+        * are flushed when we go to remap it.
+        */
+       if (ret == 0)
+               ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       if (ret == -ERESTARTSYS)
+               return ret;
        if (ret) {
+               /* In the event of a disaster, abandon all caches and
+                * hope for the best.
+                */
                i915_gem_clflush_object(obj);
                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
@@ -2996,51 +3028,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        return 0;
 }
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level)
+{
+       int ret;
+
+       if (obj->cache_level == cache_level)
+               return 0;
+
+       if (obj->pin_count) {
+               DRM_DEBUG("can not change the cache level of pinned objects\n");
+               return -EBUSY;
+       }
+
+       if (obj->gtt_space) {
+               ret = i915_gem_object_finish_gpu(obj);
+               if (ret)
+                       return ret;
+
+               i915_gem_object_finish_gtt(obj);
+
+               /* Before SandyBridge, you could not use tiling or fence
+                * registers with snooped memory, so relinquish any fences
+                * currently pointing to our region in the aperture.
+                */
+               if (INTEL_INFO(obj->base.dev)->gen < 6) {
+                       ret = i915_gem_object_put_fence(obj);
+                       if (ret)
+                               return ret;
+               }
+
+               i915_gem_gtt_rebind_object(obj, cache_level);
+       }
+
+       if (cache_level == I915_CACHE_NONE) {
+               u32 old_read_domains, old_write_domain;
+
+               /* If we're coming from LLC cached, then we haven't
+                * actually been tracking whether the data is in the
+                * CPU cache or not, since we only allow one bit set
+                * in obj->write_domain and have been skipping the clflushes.
+                * Just set it to the CPU cache for now.
+                */
+               WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+               WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+               old_read_domains = obj->base.read_domains;
+               old_write_domain = obj->base.write_domain;
+
+               obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+               obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   old_read_domains,
+                                                   old_write_domain);
+       }
+
+       obj->cache_level = cache_level;
+       return 0;
+}
+
 /*
- * Prepare buffer for display plane. Use uninterruptible for possible flush
- * wait, as in modesetting process we're not supposed to be interrupted.
+ * Prepare buffer for display plane (scanout, cursors, etc).
+ * Can be called from an uninterruptible phase (modesetting) and allows
+ * any flushes to be pipelined (for pageflips).
+ *
+ * For the display plane, we want to be in the GTT but out of any write
+ * domains. So in many ways this looks like set_to_gtt_domain() apart from the
+ * ability to pipeline the waits, pinning and any additional subtleties
+ * that may differentiate the display plane from ordinary buffers.
  */
 int
-i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
                                     struct intel_ring_buffer *pipelined)
 {
-       uint32_t old_read_domains;
+       u32 old_read_domains, old_write_domain;
        int ret;
 
-       /* Not valid to be called on unbound objects. */
-       if (obj->gtt_space == NULL)
-               return -EINVAL;
-
        ret = i915_gem_object_flush_gpu_write_domain(obj);
        if (ret)
                return ret;
 
-
-       /* Currently, we are always called from an non-interruptible context. */
        if (pipelined != obj->ring) {
                ret = i915_gem_object_wait_rendering(obj);
                if (ret)
                        return ret;
        }
 
+       /* The display engine is not coherent with the LLC cache on gen6.  As
+        * a result, we make sure that the pinning that is about to occur is
+        * done with uncached PTEs. This is lowest common denominator for all
+        * chipsets.
+        *
+        * However for gen6+, we could do better by using the GFDT bit instead
+        * of uncaching, which would allow us to flush all the LLC-cached data
+        * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+        */
+       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+       if (ret)
+               return ret;
+
+       /* As the user may map the buffer once pinned in the display plane
+        * (e.g. libkms for the bootup splash), we have to ensure that we
+        * always use map_and_fenceable for all scanout buffers.
+        */
+       ret = i915_gem_object_pin(obj, alignment, true);
+       if (ret)
+               return ret;
+
        i915_gem_object_flush_cpu_write_domain(obj);
 
+       old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
        trace_i915_gem_object_change_domain(obj,
                                            old_read_domains,
-                                           obj->base.write_domain);
+                                           old_write_domain);
 
        return 0;
 }
 
 int
-i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 {
        int ret;
 
-       if (!obj->active)
+       if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
                return 0;
 
        if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
@@ -3049,6 +3169,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
                        return ret;
        }
 
+       /* Ensure that we invalidate the GPU's caches and TLBs. */
+       obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
+
        return i915_gem_object_wait_rendering(obj);
 }
 
@@ -3575,7 +3698,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-       obj->cache_level = I915_CACHE_NONE;
+       if (IS_GEN6(dev)) {
+               /* On Gen6, we can have the GPU use the LLC (the CPU
+                * cache) for about a 10% performance improvement
+                * compared to uncached.  Graphics requests other than
+                * display scanout are coherent with the CPU in
+                * accessing this cache.  This means in this mode we
+                * don't need to clflush on the CPU side, and on the
+                * GPU side we only need to flush internal caches to
+                * get data visible to the CPU.
+                *
+                * However, we maintain the display planes as UC, and so
+                * need to rebind when first used as such.
+                */
+               obj->cache_level = I915_CACHE_LLC;
+       } else
+               obj->cache_level = I915_CACHE_NONE;
+
        obj->base.driver_private = NULL;
        obj->fence_reg = I915_FENCE_REG_NONE;
        INIT_LIST_HEAD(&obj->mm_list);