Merge branch 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 16:21:09 +0000 (09:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 16:21:09 +0000 (09:21 -0700)
* 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: (135 commits)
  drm/radeon/kms: fix DP training for DPEncoderService revision bigger than 1.1
  drm/radeon/kms: add missing vddci setting on NI+
  drm/radeon: Add a rmb() in IH processing
  drm/radeon: ATOM Endian fix for atombios_crtc_program_pll()
  drm/radeon: Fix the definition of RADEON_BUF_SWAP_32BIT
  drm/radeon: Do an MMIO read on interrupts when not uisng MSIs
  drm/radeon: Writeback endian fixes
  drm/radeon: Remove a bunch of useless _iomem casts
  drm/gem: add support for private objects
  DRM: clean up and document parsing of video= parameter
  DRM: Radeon: Fix section mismatch.
  drm: really make debug levels match in edid failure code
  drm/radeon/kms: fix i2c map for rv250/280
  drm/nouveau/gr: disable fifo access and idle before suspend ctx unload
  drm/nouveau: pass flag to engine fini() method on suspend
  drm/nouveau: replace nv04_graph_fifo_access() use with direct reg bashing
  drm/nv40/gr: rewrite/split context takedown functions
  drm/nouveau: detect disabled device in irq handler and return IRQ_NONE
  drm/nouveau: ignore connector type when deciding digital/analog on DVI-I
  drm/nouveau: Add a quirk for Gigabyte NX86T
  ...

1  2 
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/nouveau/nv50_graph.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/radeon_reg.h
drivers/gpu/drm/radeon/rs600.c

@@@ -214,6 -214,8 +214,8 @@@ struct drm_i915_display_funcs 
        int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
                          struct drm_framebuffer *fb,
                          struct drm_i915_gem_object *obj);
+       int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+                           int x, int y);
        /* clock updates for mode set */
        /* cursor updates */
        /* render clock increase/decrease */
@@@ -262,9 -264,9 +264,10 @@@ enum intel_pch 
  };
  
  #define QUIRK_PIPEA_FORCE (1<<0)
 +#define QUIRK_LVDS_SSC_DISABLE (1<<1)
  
  struct intel_fbdev;
+ struct intel_fbc_work;
  
  typedef struct drm_i915_private {
        struct drm_device *dev;
        int relative_constants_mode;
  
        void __iomem *regs;
+       u32 gt_fifo_count;
  
        struct intel_gmbus {
                struct i2c_adapter adapter;
        uint32_t last_instdone1;
  
        unsigned long cfb_size;
-       unsigned long cfb_pitch;
-       unsigned long cfb_offset;
-       int cfb_fence;
-       int cfb_plane;
+       unsigned int cfb_fb;
+       enum plane cfb_plane;
        int cfb_y;
+       struct intel_fbc_work *fbc_work;
  
        struct intel_opregion opregion;
  
@@@ -986,15 -988,16 +989,16 @@@ struct drm_i915_file_private 
  
  extern struct drm_ioctl_desc i915_ioctls[];
  extern int i915_max_ioctl;
- extern unsigned int i915_fbpercrtc;
- extern int i915_panel_ignore_lid;
- extern unsigned int i915_powersave;
- extern unsigned int i915_semaphores;
- extern unsigned int i915_lvds_downclock;
- extern unsigned int i915_panel_use_ssc;
- extern int i915_vbt_sdvo_panel_type;
- extern unsigned int i915_enable_rc6;
- extern unsigned int i915_enable_fbc;
+ extern unsigned int i915_fbpercrtc __always_unused;
+ extern int i915_panel_ignore_lid __read_mostly;
+ extern unsigned int i915_powersave __read_mostly;
+ extern unsigned int i915_semaphores __read_mostly;
+ extern unsigned int i915_lvds_downclock __read_mostly;
+ extern unsigned int i915_panel_use_ssc __read_mostly;
+ extern int i915_vbt_sdvo_panel_type __read_mostly;
+ extern unsigned int i915_enable_rc6 __read_mostly;
+ extern unsigned int i915_enable_fbc __read_mostly;
+ extern bool i915_enable_hangcheck __read_mostly;
  
  extern int i915_suspend(struct drm_device *dev, pm_message_t state);
  extern int i915_resume(struct drm_device *dev);
@@@ -1164,7 -1167,7 +1168,7 @@@ void i915_gem_clflush_object(struct drm
  int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
                                            uint32_t read_domains,
                                            uint32_t write_domain);
- int __must_check i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj);
+ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
  int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
  void i915_gem_do_init(struct drm_device *dev,
@@@ -1183,7 -1186,8 +1187,8 @@@ int __must_chec
  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
                                  bool write);
  int __must_check
- i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
                                     struct intel_ring_buffer *pipelined);
  int i915_gem_attach_phys_object(struct drm_device *dev,
                                struct drm_i915_gem_object *obj,
@@@ -1195,13 -1199,16 +1200,18 @@@ void i915_gem_free_all_phys_object(stru
  void i915_gem_release(struct drm_device *dev, struct drm_file *file);
  
  uint32_t
 -i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
 +i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
 +                                  uint32_t size,
 +                                  int tiling_mode);
  
+ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level);
  /* i915_gem_gtt.c */
  void i915_gem_restore_gtt_mappings(struct drm_device *dev);
  int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
+ void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
+                               enum i915_cache_level cache_level);
  void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
  
  /* i915_gem_evict.c */
@@@ -1283,12 -1290,8 +1293,8 @@@ extern void intel_modeset_init(struct d
  extern void intel_modeset_gem_init(struct drm_device *dev);
  extern void intel_modeset_cleanup(struct drm_device *dev);
  extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
- extern void i8xx_disable_fbc(struct drm_device *dev);
- extern void g4x_disable_fbc(struct drm_device *dev);
- extern void ironlake_disable_fbc(struct drm_device *dev);
- extern void intel_disable_fbc(struct drm_device *dev);
- extern void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval);
  extern bool intel_fbc_enabled(struct drm_device *dev);
+ extern void intel_disable_fbc(struct drm_device *dev);
  extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
  extern void ironlake_enable_rc6(struct drm_device *dev);
  extern void gen6_set_rps(struct drm_device *dev, u8 val);
@@@ -1374,24 -1374,25 +1374,24 @@@ i915_gem_free_mmap_offset(struct drm_i9
  }
  
  static uint32_t
 -i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
 +i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      uint32_t size;
 +      uint32_t gtt_size;
  
        if (INTEL_INFO(dev)->gen >= 4 ||
 -          obj->tiling_mode == I915_TILING_NONE)
 -              return obj->base.size;
 +          tiling_mode == I915_TILING_NONE)
 +              return size;
  
        /* Previous chips need a power-of-two fence region when tiling */
        if (INTEL_INFO(dev)->gen == 3)
 -              size = 1024*1024;
 +              gtt_size = 1024*1024;
        else
 -              size = 512*1024;
 +              gtt_size = 512*1024;
  
 -      while (size < obj->base.size)
 -              size <<= 1;
 +      while (gtt_size < size)
 +              gtt_size <<= 1;
  
 -      return size;
 +      return gtt_size;
  }
  
  /**
   * potential fence register mapping.
   */
  static uint32_t
 -i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
 +i915_gem_get_gtt_alignment(struct drm_device *dev,
 +                         uint32_t size,
 +                         int tiling_mode)
  {
 -      struct drm_device *dev = obj->base.dev;
 -
        /*
         * Minimum alignment is 4k (GTT page size), but might be greater
         * if a fence register is needed for the object.
         */
        if (INTEL_INFO(dev)->gen >= 4 ||
 -          obj->tiling_mode == I915_TILING_NONE)
 +          tiling_mode == I915_TILING_NONE)
                return 4096;
  
        /*
         * Previous chips need to be aligned to the size of the smallest
         * fence register that can contain the object.
         */
 -      return i915_gem_get_gtt_size(obj);
 +      return i915_gem_get_gtt_size(dev, size, tiling_mode);
  }
  
  /**
   * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
   *                                     unfenced object
 - * @obj: object to check
 + * @dev: the device
 + * @size: size of the object
 + * @tiling_mode: tiling mode of the object
   *
   * Return the required GTT alignment for an object, only taking into account
   * unfenced tiled surface requirements.
   */
  uint32_t
 -i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
 +i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
 +                                  uint32_t size,
 +                                  int tiling_mode)
  {
 -      struct drm_device *dev = obj->base.dev;
 -      int tile_height;
 -
        /*
         * Minimum alignment is 4k (GTT page size) for sane hw.
         */
        if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
 -          obj->tiling_mode == I915_TILING_NONE)
 +          tiling_mode == I915_TILING_NONE)
                return 4096;
  
 -      /*
 -       * Older chips need unfenced tiled buffers to be aligned to the left
 -       * edge of an even tile row (where tile rows are counted as if the bo is
 -       * placed in a fenced gtt region).
 +      /* Previous hardware however needs to be aligned to a power-of-two
 +       * tile height. The simplest method for determining this is to reuse
 +       * the power-of-tile object size.
         */
 -      if (IS_GEN2(dev))
 -              tile_height = 16;
 -      else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 -              tile_height = 32;
 -      else
 -              tile_height = 8;
 -
 -      return tile_height * obj->stride * 2;
 +      return i915_gem_get_gtt_size(dev, size, tiling_mode);
  }
  
  int
@@@ -1763,8 -1771,11 +1763,11 @@@ i915_add_request(struct intel_ring_buff
        ring->outstanding_lazy_request = false;
  
        if (!dev_priv->mm.suspended) {
-               mod_timer(&dev_priv->hangcheck_timer,
-                         jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+               if (i915_enable_hangcheck) {
+                       mod_timer(&dev_priv->hangcheck_timer,
+                                 jiffies +
+                                 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+               }
                if (was_empty)
                        queue_delayed_work(dev_priv->wq,
                                           &dev_priv->mm.retire_work, HZ);
@@@ -2135,6 -2146,30 +2138,30 @@@ i915_gem_object_wait_rendering(struct d
        return 0;
  }
  
+ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+ {
+       u32 old_write_domain, old_read_domains;
+       /* Act a barrier for all accesses through the GTT */
+       mb();
+       /* Force a pagefault for domain tracking on next user access */
+       i915_gem_release_mmap(obj);
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+               return;
+       old_read_domains = obj->base.read_domains;
+       old_write_domain = obj->base.write_domain;
+       obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
+       obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+ }
  /**
   * Unbinds an object from the GTT aperture.
   */
@@@ -2151,23 -2186,28 +2178,28 @@@ i915_gem_object_unbind(struct drm_i915_
                return -EINVAL;
        }
  
-       /* blow away mappings if mapped through GTT */
-       i915_gem_release_mmap(obj);
-       /* Move the object to the CPU domain to ensure that
-        * any possible CPU writes while it's not in the GTT
-        * are flushed when we go to remap it. This will
-        * also ensure that all pending GPU writes are finished
-        * before we unbind.
-        */
-       ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       ret = i915_gem_object_finish_gpu(obj);
        if (ret == -ERESTARTSYS)
                return ret;
        /* Continue on if we fail due to EIO, the GPU is hung so we
         * should be safe and we need to cleanup or else we might
         * cause memory corruption through use-after-free.
         */
+       i915_gem_object_finish_gtt(obj);
+       /* Move the object to the CPU domain to ensure that
+        * any possible CPU writes while it's not in the GTT
+        * are flushed when we go to remap it.
+        */
+       if (ret == 0)
+               ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       if (ret == -ERESTARTSYS)
+               return ret;
        if (ret) {
+               /* In the event of a disaster, abandon all caches and
+                * hope for the best.
+                */
                i915_gem_clflush_object(obj);
                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
@@@ -2736,16 -2776,9 +2768,16 @@@ i915_gem_object_bind_to_gtt(struct drm_
                return -EINVAL;
        }
  
 -      fence_size = i915_gem_get_gtt_size(obj);
 -      fence_alignment = i915_gem_get_gtt_alignment(obj);
 -      unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
 +      fence_size = i915_gem_get_gtt_size(dev,
 +                                         obj->base.size,
 +                                         obj->tiling_mode);
 +      fence_alignment = i915_gem_get_gtt_alignment(dev,
 +                                                   obj->base.size,
 +                                                   obj->tiling_mode);
 +      unfenced_alignment =
 +              i915_gem_get_unfenced_gtt_alignment(dev,
 +                                                  obj->base.size,
 +                                                  obj->tiling_mode);
  
        if (alignment == 0)
                alignment = map_and_fenceable ? fence_alignment :
@@@ -2996,51 -3029,139 +3028,139 @@@ i915_gem_object_set_to_gtt_domain(struc
        return 0;
  }
  
+ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level)
+ {
+       int ret;
+       if (obj->cache_level == cache_level)
+               return 0;
+       if (obj->pin_count) {
+               DRM_DEBUG("can not change the cache level of pinned objects\n");
+               return -EBUSY;
+       }
+       if (obj->gtt_space) {
+               ret = i915_gem_object_finish_gpu(obj);
+               if (ret)
+                       return ret;
+               i915_gem_object_finish_gtt(obj);
+               /* Before SandyBridge, you could not use tiling or fence
+                * registers with snooped memory, so relinquish any fences
+                * currently pointing to our region in the aperture.
+                */
+               if (INTEL_INFO(obj->base.dev)->gen < 6) {
+                       ret = i915_gem_object_put_fence(obj);
+                       if (ret)
+                               return ret;
+               }
+               i915_gem_gtt_rebind_object(obj, cache_level);
+       }
+       if (cache_level == I915_CACHE_NONE) {
+               u32 old_read_domains, old_write_domain;
+               /* If we're coming from LLC cached, then we haven't
+                * actually been tracking whether the data is in the
+                * CPU cache or not, since we only allow one bit set
+                * in obj->write_domain and have been skipping the clflushes.
+                * Just set it to the CPU cache for now.
+                */
+               WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+               WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+               old_read_domains = obj->base.read_domains;
+               old_write_domain = obj->base.write_domain;
+               obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+               obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+               trace_i915_gem_object_change_domain(obj,
+                                                   old_read_domains,
+                                                   old_write_domain);
+       }
+       obj->cache_level = cache_level;
+       return 0;
+ }
  /*
-  * Prepare buffer for display plane. Use uninterruptible for possible flush
-  * wait, as in modesetting process we're not supposed to be interrupted.
+  * Prepare buffer for display plane (scanout, cursors, etc).
+  * Can be called from an uninterruptible phase (modesetting) and allows
+  * any flushes to be pipelined (for pageflips).
+  *
+  * For the display plane, we want to be in the GTT but out of any write
+  * domains. So in many ways this looks like set_to_gtt_domain() apart from the
+  * ability to pipeline the waits, pinning and any additional subtleties
+  * that may differentiate the display plane from ordinary buffers.
   */
  int
- i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
                                     struct intel_ring_buffer *pipelined)
  {
-       uint32_t old_read_domains;
+       u32 old_read_domains, old_write_domain;
        int ret;
  
-       /* Not valid to be called on unbound objects. */
-       if (obj->gtt_space == NULL)
-               return -EINVAL;
        ret = i915_gem_object_flush_gpu_write_domain(obj);
        if (ret)
                return ret;
  
-       /* Currently, we are always called from an non-interruptible context. */
        if (pipelined != obj->ring) {
                ret = i915_gem_object_wait_rendering(obj);
                if (ret)
                        return ret;
        }
  
+       /* The display engine is not coherent with the LLC cache on gen6.  As
+        * a result, we make sure that the pinning that is about to occur is
+        * done with uncached PTEs. This is lowest common denominator for all
+        * chipsets.
+        *
+        * However for gen6+, we could do better by using the GFDT bit instead
+        * of uncaching, which would allow us to flush all the LLC-cached data
+        * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+        */
+       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+       if (ret)
+               return ret;
+       /* As the user may map the buffer once pinned in the display plane
+        * (e.g. libkms for the bootup splash), we have to ensure that we
+        * always use map_and_fenceable for all scanout buffers.
+        */
+       ret = i915_gem_object_pin(obj, alignment, true);
+       if (ret)
+               return ret;
        i915_gem_object_flush_cpu_write_domain(obj);
  
+       old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
  
        trace_i915_gem_object_change_domain(obj,
                                            old_read_domains,
-                                           obj->base.write_domain);
+                                           old_write_domain);
  
        return 0;
  }
  
  int
- i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
  {
        int ret;
  
-       if (!obj->active)
+       if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
                return 0;
  
        if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
                        return ret;
        }
  
+       /* Ensure that we invalidate the GPU's caches and TLBs. */
+       obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
        return i915_gem_object_wait_rendering(obj);
  }
  
@@@ -3575,7 -3699,23 +3698,23 @@@ struct drm_i915_gem_object *i915_gem_al
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
  
-       obj->cache_level = I915_CACHE_NONE;
+       if (IS_GEN6(dev)) {
+               /* On Gen6, we can have the GPU use the LLC (the CPU
+                * cache) for about a 10% performance improvement
+                * compared to uncached.  Graphics requests other than
+                * display scanout are coherent with the CPU in
+                * accessing this cache.  This means in this mode we
+                * don't need to clflush on the CPU side, and on the
+                * GPU side we only need to flush internal caches to
+                * get data visible to the CPU.
+                *
+                * However, we maintain the display planes as UC, and so
+                * need to rebind when first used as such.
+                */
+               obj->cache_level = I915_CACHE_LLC;
+       } else
+               obj->cache_level = I915_CACHE_NONE;
        obj->base.driver_private = NULL;
        obj->fence_reg = I915_FENCE_REG_NONE;
        INIT_LIST_HEAD(&obj->mm_list);
@@@ -24,6 -24,7 +24,7 @@@
   *    Eric Anholt <eric@anholt.net>
   */
  
+ #include <linux/cpufreq.h>
  #include <linux/module.h>
  #include <linux/input.h>
  #include <linux/i2c.h>
@@@ -1157,12 -1158,15 +1158,15 @@@ static void intel_enable_transcoder(str
  
        reg = TRANSCONF(pipe);
        val = I915_READ(reg);
-       /*
-        * make the BPC in transcoder be consistent with
-        * that in pipeconf reg.
-        */
-       val &= ~PIPE_BPC_MASK;
-       val |= I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK;
+       if (HAS_PCH_IBX(dev_priv->dev)) {
+               /*
+                * make the BPC in transcoder be consistent with
+                * that in pipeconf reg.
+                */
+               val &= ~PIPE_BPC_MASK;
+               val |= I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK;
+       }
        I915_WRITE(reg, val | TRANS_ENABLE);
        if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100))
                DRM_ERROR("failed to enable transcoder %d\n", pipe);
@@@ -1380,6 -1384,28 +1384,28 @@@ static void intel_disable_pch_ports(str
        disable_pch_hdmi(dev_priv, pipe, HDMID);
  }
  
+ static void i8xx_disable_fbc(struct drm_device *dev)
+ {
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 fbc_ctl;
+       /* Disable compression */
+       fbc_ctl = I915_READ(FBC_CONTROL);
+       if ((fbc_ctl & FBC_CTL_EN) == 0)
+               return;
+       fbc_ctl &= ~FBC_CTL_EN;
+       I915_WRITE(FBC_CONTROL, fbc_ctl);
+       /* Wait for compressing bit to clear */
+       if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
+               DRM_DEBUG_KMS("FBC idle timed out\n");
+               return;
+       }
+       DRM_DEBUG_KMS("disabled FBC\n");
+ }
  static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
  {
        struct drm_device *dev = crtc->dev;
        struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
        struct drm_i915_gem_object *obj = intel_fb->obj;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int cfb_pitch;
        int plane, i;
        u32 fbc_ctl, fbc_ctl2;
  
-       if (fb->pitch == dev_priv->cfb_pitch &&
-           obj->fence_reg == dev_priv->cfb_fence &&
-           intel_crtc->plane == dev_priv->cfb_plane &&
-           I915_READ(FBC_CONTROL) & FBC_CTL_EN)
-               return;
-       i8xx_disable_fbc(dev);
-       dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
-       if (fb->pitch < dev_priv->cfb_pitch)
-               dev_priv->cfb_pitch = fb->pitch;
+       cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+       if (fb->pitch < cfb_pitch)
+               cfb_pitch = fb->pitch;
  
        /* FBC_CTL wants 64B units */
-       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
-       dev_priv->cfb_fence = obj->fence_reg;
-       dev_priv->cfb_plane = intel_crtc->plane;
-       plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
+       cfb_pitch = (cfb_pitch / 64) - 1;
+       plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
  
        /* Clear old tags */
        for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
                I915_WRITE(FBC_TAG + (i * 4), 0);
  
        /* Set it up... */
-       fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
-       if (obj->tiling_mode != I915_TILING_NONE)
-               fbc_ctl2 |= FBC_CTL_CPU_FENCE;
+       fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
+       fbc_ctl2 |= plane;
        I915_WRITE(FBC_CONTROL2, fbc_ctl2);
        I915_WRITE(FBC_FENCE_OFF, crtc->y);
  
        fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
        if (IS_I945GM(dev))
                fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
-       fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
+       fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
        fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
-       if (obj->tiling_mode != I915_TILING_NONE)
-               fbc_ctl |= dev_priv->cfb_fence;
-       I915_WRITE(FBC_CONTROL, fbc_ctl);
-       DRM_DEBUG_KMS("enabled FBC, pitch %ld, yoff %d, plane %d, ",
-                     dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
- }
- void i8xx_disable_fbc(struct drm_device *dev)
- {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 fbc_ctl;
-       /* Disable compression */
-       fbc_ctl = I915_READ(FBC_CONTROL);
-       if ((fbc_ctl & FBC_CTL_EN) == 0)
-               return;
-       fbc_ctl &= ~FBC_CTL_EN;
+       fbc_ctl |= obj->fence_reg;
        I915_WRITE(FBC_CONTROL, fbc_ctl);
  
-       /* Wait for compressing bit to clear */
-       if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
-               DRM_DEBUG_KMS("FBC idle timed out\n");
-               return;
-       }
-       DRM_DEBUG_KMS("disabled FBC\n");
+       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %d, ",
+                     cfb_pitch, crtc->y, intel_crtc->plane);
  }
  
  static bool i8xx_fbc_enabled(struct drm_device *dev)
@@@ -1476,30 -1468,9 +1468,9 @@@ static void g4x_enable_fbc(struct drm_c
        unsigned long stall_watermark = 200;
        u32 dpfc_ctl;
  
-       dpfc_ctl = I915_READ(DPFC_CONTROL);
-       if (dpfc_ctl & DPFC_CTL_EN) {
-               if (dev_priv->cfb_pitch == dev_priv->cfb_pitch / 64 - 1 &&
-                   dev_priv->cfb_fence == obj->fence_reg &&
-                   dev_priv->cfb_plane == intel_crtc->plane &&
-                   dev_priv->cfb_y == crtc->y)
-                       return;
-               I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-               intel_wait_for_vblank(dev, intel_crtc->pipe);
-       }
-       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
-       dev_priv->cfb_fence = obj->fence_reg;
-       dev_priv->cfb_plane = intel_crtc->plane;
-       dev_priv->cfb_y = crtc->y;
        dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
-       if (obj->tiling_mode != I915_TILING_NONE) {
-               dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
-               I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
-       } else {
-               I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
-       }
+       dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
+       I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
  
        I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
                   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
        DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane);
  }
  
- void g4x_disable_fbc(struct drm_device *dev)
static void g4x_disable_fbc(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 dpfc_ctl;
@@@ -1567,32 -1538,12 +1538,12 @@@ static void ironlake_enable_fbc(struct 
        u32 dpfc_ctl;
  
        dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
-       if (dpfc_ctl & DPFC_CTL_EN) {
-               if (dev_priv->cfb_pitch == dev_priv->cfb_pitch / 64 - 1 &&
-                   dev_priv->cfb_fence == obj->fence_reg &&
-                   dev_priv->cfb_plane == intel_crtc->plane &&
-                   dev_priv->cfb_offset == obj->gtt_offset &&
-                   dev_priv->cfb_y == crtc->y)
-                       return;
-               I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-               intel_wait_for_vblank(dev, intel_crtc->pipe);
-       }
-       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
-       dev_priv->cfb_fence = obj->fence_reg;
-       dev_priv->cfb_plane = intel_crtc->plane;
-       dev_priv->cfb_offset = obj->gtt_offset;
-       dev_priv->cfb_y = crtc->y;
        dpfc_ctl &= DPFC_RESERVED;
        dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
-       if (obj->tiling_mode != I915_TILING_NONE) {
-               dpfc_ctl |= (DPFC_CTL_FENCE_EN | dev_priv->cfb_fence);
-               I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
-       } else {
-               I915_WRITE(ILK_DPFC_CHICKEN, ~DPFC_HT_MODIFY);
-       }
+       /* Set persistent mode for front-buffer rendering, ala X. */
+       dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
+       dpfc_ctl |= (DPFC_CTL_FENCE_EN | obj->fence_reg);
+       I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
  
        I915_WRITE(ILK_DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
                   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
  
        if (IS_GEN6(dev)) {
                I915_WRITE(SNB_DPFC_CTL_SA,
-                          SNB_CPU_FENCE_ENABLE | dev_priv->cfb_fence);
+                          SNB_CPU_FENCE_ENABLE | obj->fence_reg);
                I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
                sandybridge_blit_fbc_update(dev);
        }
        DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane);
  }
  
- void ironlake_disable_fbc(struct drm_device *dev)
static void ironlake_disable_fbc(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 dpfc_ctl;
@@@ -1644,24 -1595,109 +1595,109 @@@ bool intel_fbc_enabled(struct drm_devic
        return dev_priv->display.fbc_enabled(dev);
  }
  
void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
static void intel_fbc_work_fn(struct work_struct *__work)
  {
-       struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+       struct intel_fbc_work *work =
+               container_of(to_delayed_work(__work),
+                            struct intel_fbc_work, work);
+       struct drm_device *dev = work->crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       mutex_lock(&dev->struct_mutex);
+       if (work == dev_priv->fbc_work) {
+               /* Double check that we haven't switched fb without cancelling
+                * the prior work.
+                */
+               if (work->crtc->fb == work->fb) {
+                       dev_priv->display.enable_fbc(work->crtc,
+                                                    work->interval);
+                       dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane;
+                       dev_priv->cfb_fb = work->crtc->fb->base.id;
+                       dev_priv->cfb_y = work->crtc->y;
+               }
+               dev_priv->fbc_work = NULL;
+       }
+       mutex_unlock(&dev->struct_mutex);
+       kfree(work);
+ }
+ static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
+ {
+       if (dev_priv->fbc_work == NULL)
+               return;
+       DRM_DEBUG_KMS("cancelling pending FBC enable\n");
+       /* Synchronisation is provided by struct_mutex and checking of
+        * dev_priv->fbc_work, so we can perform the cancellation
+        * entirely asynchronously.
+        */
+       if (cancel_delayed_work(&dev_priv->fbc_work->work))
+               /* tasklet was killed before being run, clean up */
+               kfree(dev_priv->fbc_work);
+       /* Mark the work as no longer wanted so that if it does
+        * wake-up (because the work was already running and waiting
+        * for our mutex), it will discover that is no longer
+        * necessary to run.
+        */
+       dev_priv->fbc_work = NULL;
+ }
+ static void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+ {
+       struct intel_fbc_work *work;
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
  
        if (!dev_priv->display.enable_fbc)
                return;
  
-       dev_priv->display.enable_fbc(crtc, interval);
+       intel_cancel_fbc_work(dev_priv);
+       work = kzalloc(sizeof *work, GFP_KERNEL);
+       if (work == NULL) {
+               dev_priv->display.enable_fbc(crtc, interval);
+               return;
+       }
+       work->crtc = crtc;
+       work->fb = crtc->fb;
+       work->interval = interval;
+       INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
+       dev_priv->fbc_work = work;
+       DRM_DEBUG_KMS("scheduling delayed FBC enable\n");
+       /* Delay the actual enabling to let pageflipping cease and the
+        * display to settle before starting the compression. Note that
+        * this delay also serves a second purpose: it allows for a
+        * vblank to pass after disabling the FBC before we attempt
+        * to modify the control registers.
+        *
+        * A more complicated solution would involve tracking vblanks
+        * following the termination of the page-flipping sequence
+        * and indeed performing the enable as a co-routine and not
+        * waiting synchronously upon the vblank.
+        */
+       schedule_delayed_work(&work->work, msecs_to_jiffies(50));
  }
  
  void intel_disable_fbc(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
  
+       intel_cancel_fbc_work(dev_priv);
        if (!dev_priv->display.disable_fbc)
                return;
  
        dev_priv->display.disable_fbc(dev);
+       dev_priv->cfb_plane = -1;
  }
  
  /**
@@@ -1760,8 -1796,13 +1796,13 @@@ static void intel_update_fbc(struct drm
                dev_priv->no_fbc_reason = FBC_BAD_PLANE;
                goto out_disable;
        }
-       if (obj->tiling_mode != I915_TILING_X) {
-               DRM_DEBUG_KMS("framebuffer not tiled, disabling compression\n");
+       /* The use of a CPU fence is mandatory in order to detect writes
+        * by the CPU to the scanout and trigger updates to the FBC.
+        */
+       if (obj->tiling_mode != I915_TILING_X ||
+           obj->fence_reg == I915_FENCE_REG_NONE) {
+               DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
                dev_priv->no_fbc_reason = FBC_NOT_TILED;
                goto out_disable;
        }
        if (in_dbg_master())
                goto out_disable;
  
+       /* If the scanout has not changed, don't modify the FBC settings.
+        * Note that we make the fundamental assumption that the fb->obj
+        * cannot be unpinned (and have its GTT offset and fence revoked)
+        * without first being decoupled from the scanout and FBC disabled.
+        */
+       if (dev_priv->cfb_plane == intel_crtc->plane &&
+           dev_priv->cfb_fb == fb->base.id &&
+           dev_priv->cfb_y == crtc->y)
+               return;
+       if (intel_fbc_enabled(dev)) {
+               /* We update FBC along two paths, after changing fb/crtc
+                * configuration (modeswitching) and after page-flipping
+                * finishes. For the latter, we know that not only did
+                * we disable the FBC at the start of the page-flip
+                * sequence, but also more than one vblank has passed.
+                *
+                * For the former case of modeswitching, it is possible
+                * to switch between two FBC valid configurations
+                * instantaneously so we do need to disable the FBC
+                * before we can modify its control registers. We also
+                * have to wait for the next vblank for that to take
+                * effect. However, since we delay enabling FBC we can
+                * assume that a vblank has passed since disabling and
+                * that we can safely alter the registers in the deferred
+                * callback.
+                *
+                * In the scenario that we go from a valid to invalid
+                * and then back to valid FBC configuration we have
+                * no strict enforcement that a vblank occurred since
+                * disabling the FBC. However, along all current pipe
+                * disabling paths we do need to wait for a vblank at
+                * some point. And we wait before enabling FBC anyway.
+                */
+               DRM_DEBUG_KMS("disabling active FBC for update\n");
+               intel_disable_fbc(dev);
+       }
        intel_enable_fbc(crtc, 500);
        return;
  
@@@ -1812,14 -1891,10 +1891,10 @@@ intel_pin_and_fence_fb_obj(struct drm_d
        }
  
        dev_priv->mm.interruptible = false;
-       ret = i915_gem_object_pin(obj, alignment, true);
+       ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
        if (ret)
                goto err_interruptible;
  
-       ret = i915_gem_object_set_to_display_plane(obj, pipelined);
-       if (ret)
-               goto err_unpin;
        /* Install a fence for tiled scan-out. Pre-i965 always needs a
         * fence, whereas 965+ only requires a fence if using
         * framebuffer compression.  For simplicity, we always install
@@@ -1841,10 -1916,8 +1916,8 @@@ err_interruptible
        return ret;
  }
  
- /* Assume fb object is pinned & idle & fenced and just update base pointers */
- static int
- intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-                          int x, int y, enum mode_set_atomic state)
+ static int i9xx_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+                            int x, int y)
  {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
                dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
                break;
        default:
-               DRM_ERROR("Unknown color depth\n");
+               DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel);
                return -EINVAL;
        }
        if (INTEL_INFO(dev)->gen >= 4) {
                        dspcntr &= ~DISPPLANE_TILED;
        }
  
-       if (HAS_PCH_SPLIT(dev))
-               /* must disable */
-               dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
        I915_WRITE(reg, dspcntr);
  
        Start = obj->gtt_offset;
                I915_WRITE(DSPADDR(plane), Start + Offset);
        POSTING_READ(reg);
  
+       return 0;
+ }
+ static int ironlake_update_plane(struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb, int x, int y)
+ {
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_framebuffer *intel_fb;
+       struct drm_i915_gem_object *obj;
+       int plane = intel_crtc->plane;
+       unsigned long Start, Offset;
+       u32 dspcntr;
+       u32 reg;
+       switch (plane) {
+       case 0:
+       case 1:
+               break;
+       default:
+               DRM_ERROR("Can't update plane %d in SAREA\n", plane);
+               return -EINVAL;
+       }
+       intel_fb = to_intel_framebuffer(fb);
+       obj = intel_fb->obj;
+       reg = DSPCNTR(plane);
+       dspcntr = I915_READ(reg);
+       /* Mask out pixel format bits in case we change it */
+       dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
+       switch (fb->bits_per_pixel) {
+       case 8:
+               dspcntr |= DISPPLANE_8BPP;
+               break;
+       case 16:
+               if (fb->depth != 16)
+                       return -EINVAL;
+               dspcntr |= DISPPLANE_16BPP;
+               break;
+       case 24:
+       case 32:
+               if (fb->depth == 24)
+                       dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
+               else if (fb->depth == 30)
+                       dspcntr |= DISPPLANE_32BPP_30BIT_NO_ALPHA;
+               else
+                       return -EINVAL;
+               break;
+       default:
+               DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel);
+               return -EINVAL;
+       }
+       if (obj->tiling_mode != I915_TILING_NONE)
+               dspcntr |= DISPPLANE_TILED;
+       else
+               dspcntr &= ~DISPPLANE_TILED;
+       /* must disable */
+       dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
+       I915_WRITE(reg, dspcntr);
+       Start = obj->gtt_offset;
+       Offset = y * fb->pitch + x * (fb->bits_per_pixel / 8);
+       DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n",
+                     Start, Offset, x, y, fb->pitch);
+       I915_WRITE(DSPSTRIDE(plane), fb->pitch);
+       I915_WRITE(DSPSURF(plane), Start);
+       I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
+       I915_WRITE(DSPADDR(plane), Offset);
+       POSTING_READ(reg);
+       return 0;
+ }
+ /* Assume fb object is pinned & idle & fenced and just update base pointers */
+ static int
+ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+                          int x, int y, enum mode_set_atomic state)
+ {
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int ret;
+       ret = dev_priv->display.update_plane(crtc, fb, x, y);
+       if (ret)
+               return ret;
        intel_update_fbc(dev);
        intel_increase_pllclock(crtc);
  
@@@ -1971,7 -2133,7 +2133,7 @@@ intel_pipe_set_base(struct drm_crtc *cr
                 * This should only fail upon a hung GPU, in which case we
                 * can safely continue.
                 */
-               ret = i915_gem_object_flush_gpu(obj);
+               ret = i915_gem_object_finish_gpu(obj);
                (void) ret;
        }
  
@@@ -2622,6 -2784,7 +2784,7 @@@ static void ironlake_pch_enable(struct 
        /* For PCH DP, enable TRANS_DP_CTL */
        if (HAS_PCH_CPT(dev) &&
            intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
+               u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) >> 5;
                reg = TRANS_DP_CTL(pipe);
                temp = I915_READ(reg);
                temp &= ~(TRANS_DP_PORT_SEL_MASK |
                          TRANS_DP_BPC_MASK);
                temp |= (TRANS_DP_OUTPUT_ENABLE |
                         TRANS_DP_ENH_FRAMING);
-               temp |= TRANS_DP_8BPC;
+               temp |= bpc << 9; /* same format but at 11:9 */
  
                if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
                        temp |= TRANS_DP_HSYNC_ACTIVE_HIGH;
@@@ -2732,9 -2895,8 +2895,8 @@@ static void ironlake_crtc_disable(struc
  
        intel_disable_plane(dev_priv, plane, pipe);
  
-       if (dev_priv->cfb_plane == plane &&
-           dev_priv->display.disable_fbc)
-               dev_priv->display.disable_fbc(dev);
+       if (dev_priv->cfb_plane == plane)
+               intel_disable_fbc(dev);
  
        intel_disable_pipe(dev_priv, pipe);
  
@@@ -2898,9 -3060,8 +3060,8 @@@ static void i9xx_crtc_disable(struct dr
        intel_crtc_dpms_overlay(intel_crtc, false);
        intel_crtc_update_cursor(crtc, false);
  
-       if (dev_priv->cfb_plane == plane &&
-           dev_priv->display.disable_fbc)
-               dev_priv->display.disable_fbc(dev);
+       if (dev_priv->cfb_plane == plane)
+               intel_disable_fbc(dev);
  
        intel_disable_plane(dev_priv, plane, pipe);
        intel_disable_pipe(dev_priv, pipe);
@@@ -4305,10 -4466,136 +4466,137 @@@ static void intel_update_watermarks(str
  
  static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
  {
 -      return dev_priv->lvds_use_ssc && i915_panel_use_ssc;
 +      return dev_priv->lvds_use_ssc && i915_panel_use_ssc
 +              && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE);
  }
  
+ /**
+  * intel_choose_pipe_bpp_dither - figure out what color depth the pipe should send
+  * @crtc: CRTC structure
+  *
+  * A pipe may be connected to one or more outputs.  Based on the depth of the
+  * attached framebuffer, choose a good color depth to use on the pipe.
+  *
+  * If possible, match the pipe depth to the fb depth.  In some cases, this
+  * isn't ideal, because the connected output supports a lesser or restricted
+  * set of depths.  Resolve that here:
+  *    LVDS typically supports only 6bpc, so clamp down in that case
+  *    HDMI supports only 8bpc or 12bpc, so clamp to 8bpc with dither for 10bpc
+  *    Displays may support a restricted set as well, check EDID and clamp as
+  *      appropriate.
+  *
+  * RETURNS:
+  * Dithering requirement (i.e. false if display bpc and pipe bpc match,
+  * true if they don't match).
+  */
+ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc,
+                                        unsigned int *pipe_bpp)
+ {
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_encoder *encoder;
+       struct drm_connector *connector;
+       unsigned int display_bpc = UINT_MAX, bpc;
+       /* Walk the encoders & connectors on this crtc, get min bpc */
+       list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+               struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+               if (encoder->crtc != crtc)
+                       continue;
+               if (intel_encoder->type == INTEL_OUTPUT_LVDS) {
+                       unsigned int lvds_bpc;
+                       if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) ==
+                           LVDS_A3_POWER_UP)
+                               lvds_bpc = 8;
+                       else
+                               lvds_bpc = 6;
+                       if (lvds_bpc < display_bpc) {
+                               DRM_DEBUG_DRIVER("clamping display bpc (was %d) to LVDS (%d)\n", display_bpc, lvds_bpc);
+                               display_bpc = lvds_bpc;
+                       }
+                       continue;
+               }
+               if (intel_encoder->type == INTEL_OUTPUT_EDP) {
+                       /* Use VBT settings if we have an eDP panel */
+                       unsigned int edp_bpc = dev_priv->edp.bpp / 3;
+                       if (edp_bpc < display_bpc) {
+                               DRM_DEBUG_DRIVER("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc);
+                               display_bpc = edp_bpc;
+                       }
+                       continue;
+               }
+               /* Not one of the known troublemakers, check the EDID */
+               list_for_each_entry(connector, &dev->mode_config.connector_list,
+                                   head) {
+                       if (connector->encoder != encoder)
+                               continue;
+                       if (connector->display_info.bpc < display_bpc) {
+                               DRM_DEBUG_DRIVER("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc);
+                               display_bpc = connector->display_info.bpc;
+                       }
+               }
+               /*
+                * HDMI is either 12 or 8, so if the display lets 10bpc sneak
+                * through, clamp it down.  (Note: >12bpc will be caught below.)
+                */
+               if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
+                       if (display_bpc > 8 && display_bpc < 12) {
+                               DRM_DEBUG_DRIVER("forcing bpc to 12 for HDMI\n");
+                               display_bpc = 12;
+                       } else {
+                               DRM_DEBUG_DRIVER("forcing bpc to 8 for HDMI\n");
+                               display_bpc = 8;
+                       }
+               }
+       }
+       /*
+        * We could just drive the pipe at the highest bpc all the time and
+        * enable dithering as needed, but that costs bandwidth.  So choose
+        * the minimum value that expresses the full color range of the fb but
+        * also stays within the max display bpc discovered above.
+        */
+       switch (crtc->fb->depth) {
+       case 8:
+               bpc = 8; /* since we go through a colormap */
+               break;
+       case 15:
+       case 16:
+               bpc = 6; /* min is 18bpp */
+               break;
+       case 24:
+               bpc = min((unsigned int)8, display_bpc);
+               break;
+       case 30:
+               bpc = min((unsigned int)10, display_bpc);
+               break;
+       case 48:
+               bpc = min((unsigned int)12, display_bpc);
+               break;
+       default:
+               DRM_DEBUG("unsupported depth, assuming 24 bits\n");
+               bpc = min((unsigned int)8, display_bpc);
+               break;
+       }
+       DRM_DEBUG_DRIVER("setting pipe bpc to %d (max display bpc %d)\n",
+                        bpc, display_bpc);
+       *pipe_bpp = bpc * 3;
+       return display_bpc != bpc;
+ }
  static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
                              struct drm_display_mode *mode,
                              struct drm_display_mode *adjusted_mode,
@@@ -4721,7 -5008,9 +5009,9 @@@ static int ironlake_crtc_mode_set(struc
        struct fdi_m_n m_n = {0};
        u32 temp;
        u32 lvds_sync = 0;
-       int target_clock, pixel_multiplier, lane, link_bw, bpp, factor;
+       int target_clock, pixel_multiplier, lane, link_bw, factor;
+       unsigned int pipe_bpp;
+       bool dither;
  
        list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
                if (encoder->base.crtc != crtc)
        /* determine panel color depth */
        temp = I915_READ(PIPECONF(pipe));
        temp &= ~PIPE_BPC_MASK;
-       if (is_lvds) {
-               /* the BPC will be 6 if it is 18-bit LVDS panel */
-               if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
-                       temp |= PIPE_8BPC;
-               else
-                       temp |= PIPE_6BPC;
-       } else if (has_edp_encoder) {
-               switch (dev_priv->edp.bpp/3) {
-               case 8:
-                       temp |= PIPE_8BPC;
-                       break;
-               case 10:
-                       temp |= PIPE_10BPC;
-                       break;
-               case 6:
-                       temp |= PIPE_6BPC;
-                       break;
-               case 12:
-                       temp |= PIPE_12BPC;
-                       break;
-               }
-       } else
-               temp |= PIPE_8BPC;
-       I915_WRITE(PIPECONF(pipe), temp);
-       switch (temp & PIPE_BPC_MASK) {
-       case PIPE_8BPC:
-               bpp = 24;
+       dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp);
+       switch (pipe_bpp) {
+       case 18:
+               temp |= PIPE_6BPC;
                break;
-       case PIPE_10BPC:
-               bpp = 30;
+       case 24:
+               temp |= PIPE_8BPC;
                break;
-       case PIPE_6BPC:
-               bpp = 18;
+       case 30:
+               temp |= PIPE_10BPC;
                break;
-       case PIPE_12BPC:
-               bpp = 36;
+       case 36:
+               temp |= PIPE_12BPC;
                break;
        default:
-               DRM_ERROR("unknown pipe bpc value\n");
-               bpp = 24;
+               WARN(1, "intel_choose_pipe_bpp returned invalid value\n");
+               temp |= PIPE_8BPC;
+               pipe_bpp = 24;
+               break;
        }
  
+       intel_crtc->bpp = pipe_bpp;
+       I915_WRITE(PIPECONF(pipe), temp);
        if (!lane) {
                /*
                 * Account for spread spectrum to avoid
                 * oversubscribing the link. Max center spread
                 * is 2.5%; use 5% for safety's sake.
                 */
-               u32 bps = target_clock * bpp * 21 / 20;
+               u32 bps = target_clock * intel_crtc->bpp * 21 / 20;
                lane = bps / (link_bw * 8) + 1;
        }
  
  
        if (pixel_multiplier > 1)
                link_bw *= pixel_multiplier;
-       ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
+       ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw,
+                            &m_n);
  
        /* Ironlake: try to setup display ref clock before DPLL
         * enabling. This is only under driver's control after
                I915_WRITE(PCH_LVDS, temp);
        }
  
-       /* set the dithering flag and clear for anything other than a panel. */
        pipeconf &= ~PIPECONF_DITHER_EN;
        pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
-       if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
+       if ((is_lvds && dev_priv->lvds_dither) || dither) {
                pipeconf |= PIPECONF_DITHER_EN;
                pipeconf |= PIPECONF_DITHER_TYPE_ST1;
        }
        if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
                intel_dp_set_m_n(crtc, mode, adjusted_mode);
        } else {
@@@ -5435,21 -5704,15 +5705,15 @@@ static int intel_crtc_cursor_set(struc
                        goto fail_locked;
                }
  
-               ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
-               if (ret) {
-                       DRM_ERROR("failed to pin cursor bo\n");
-                       goto fail_locked;
-               }
-               ret = i915_gem_object_set_to_gtt_domain(obj, 0);
+               ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL);
                if (ret) {
                        DRM_ERROR("failed to move cursor bo into the GTT\n");
-                       goto fail_unpin;
+                       goto fail_locked;
                }
  
                ret = i915_gem_object_put_fence(obj);
                if (ret) {
-                       DRM_ERROR("failed to move cursor bo into the GTT\n");
+                       DRM_ERROR("failed to release fence for cursor");
                        goto fail_unpin;
                }
  
@@@ -6152,6 -6415,7 +6416,7 @@@ static void intel_unpin_work_fn(struct 
        drm_gem_object_unreference(&work->pending_flip_obj->base);
        drm_gem_object_unreference(&work->old_fb_obj->base);
  
+       intel_update_fbc(work->dev);
        mutex_unlock(&work->dev->struct_mutex);
        kfree(work);
  }
@@@ -6516,6 -6780,7 +6781,7 @@@ static int intel_crtc_page_flip(struct 
        if (ret)
                goto cleanup_pending;
  
+       intel_disable_fbc(dev);
        mutex_unlock(&dev->struct_mutex);
  
        trace_i915_flip_request(intel_crtc->plane, obj);
@@@ -6644,6 -6909,7 +6910,7 @@@ static void intel_crtc_init(struct drm_
  
        intel_crtc_reset(&intel_crtc->base);
        intel_crtc->active = true; /* force the pipe off on setup_init_config */
+       intel_crtc->bpp = 24; /* default for pre-Ironlake */
  
        if (HAS_PCH_SPLIT(dev)) {
                intel_helper_funcs.prepare = ironlake_crtc_prepare;
@@@ -6870,6 -7136,11 +7137,11 @@@ int intel_framebuffer_init(struct drm_d
        switch (mode_cmd->bpp) {
        case 8:
        case 16:
+               /* Only pre-ILK can handle 5:5:5 */
+               if (mode_cmd->depth == 15 && !HAS_PCH_SPLIT(dev))
+                       return -EINVAL;
+               break;
        case 24:
        case 32:
                break;
@@@ -7284,6 -7555,59 +7556,59 @@@ void gen6_enable_rps(struct drm_i915_pr
        mutex_unlock(&dev_priv->dev->struct_mutex);
  }
  
+ void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
+ {
+       int min_freq = 15;
+       int gpu_freq, ia_freq, max_ia_freq;
+       int scaling_factor = 180;
+       max_ia_freq = cpufreq_quick_get_max(0);
+       /*
+        * Default to measured freq if none found, PCU will ensure we don't go
+        * over
+        */
+       if (!max_ia_freq)
+               max_ia_freq = tsc_khz;
+       /* Convert from kHz to MHz */
+       max_ia_freq /= 1000;
+       mutex_lock(&dev_priv->dev->struct_mutex);
+       /*
+        * For each potential GPU frequency, load a ring frequency we'd like
+        * to use for memory access.  We do this by specifying the IA frequency
+        * the PCU should use as a reference to determine the ring frequency.
+        */
+       for (gpu_freq = dev_priv->max_delay; gpu_freq >= dev_priv->min_delay;
+            gpu_freq--) {
+               int diff = dev_priv->max_delay - gpu_freq;
+               /*
+                * For GPU frequencies less than 750MHz, just use the lowest
+                * ring freq.
+                */
+               if (gpu_freq < min_freq)
+                       ia_freq = 800;
+               else
+                       ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
+               ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+               I915_WRITE(GEN6_PCODE_DATA,
+                          (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) |
+                          gpu_freq);
+               I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
+                          GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
+               if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
+                             GEN6_PCODE_READY) == 0, 10)) {
+                       DRM_ERROR("pcode write of freq table timed out\n");
+                       continue;
+               }
+       }
+       mutex_unlock(&dev_priv->dev->struct_mutex);
+ }
  static void ironlake_init_clock_gating(struct drm_device *dev)
  {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@@ -7640,9 -7964,11 +7965,11 @@@ static void intel_init_display(struct d
        if (HAS_PCH_SPLIT(dev)) {
                dev_priv->display.dpms = ironlake_crtc_dpms;
                dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
+               dev_priv->display.update_plane = ironlake_update_plane;
        } else {
                dev_priv->display.dpms = i9xx_crtc_dpms;
                dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
+               dev_priv->display.update_plane = i9xx_update_plane;
        }
  
        if (I915_HAS_FBC(dev)) {
@@@ -7811,15 -8137,6 +8138,15 @@@ static void quirk_pipea_force (struct d
        DRM_DEBUG_DRIVER("applying pipe a force quirk\n");
  }
  
 +/*
 + * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason
 + */
 +static void quirk_ssc_force_disable(struct drm_device *dev)
 +{
 +      struct drm_i915_private *dev_priv = dev->dev_private;
 +      dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE;
 +}
 +
  struct intel_quirk {
        int device;
        int subsystem_vendor;
@@@ -7848,9 -8165,6 +8175,9 @@@ struct intel_quirk intel_quirks[] = 
        /* 855 & before need to leave pipe A & dpll A up */
        { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
        { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
 +
 +      /* Lenovo U160 cannot use SSC on LVDS */
 +      { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable },
  };
  
  static void intel_init_quirks(struct drm_device *dev)
@@@ -7939,8 -8253,10 +8266,10 @@@ void intel_modeset_init(struct drm_devi
                intel_init_emon(dev);
        }
  
-       if (IS_GEN6(dev))
+       if (IS_GEN6(dev) || IS_GEN7(dev)) {
                gen6_enable_rps(dev_priv);
+               gen6_update_ring_freq(dev_priv);
+       }
  
        INIT_WORK(&dev_priv->idle_work, intel_idle_update);
        setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer,
@@@ -7976,12 -8292,11 +8305,11 @@@ void intel_modeset_cleanup(struct drm_d
                intel_increase_pllclock(crtc);
        }
  
-       if (dev_priv->display.disable_fbc)
-               dev_priv->display.disable_fbc(dev);
+       intel_disable_fbc(dev);
  
        if (IS_IRONLAKE_M(dev))
                ironlake_disable_drps(dev);
-       if (IS_GEN6(dev))
+       if (IS_GEN6(dev) || IS_GEN7(dev))
                gen6_disable_rps(dev);
  
        if (IS_IRONLAKE_M(dev))
        drm_irq_uninstall(dev);
        cancel_work_sync(&dev_priv->hotplug_work);
  
+       /* flush any delayed tasks or pending work */
+       flush_scheduled_work();
        /* Shut off idle work before the crtcs get freed. */
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                intel_crtc = to_intel_crtc(crtc);
@@@ -31,6 -31,7 +31,6 @@@
  #include "nouveau_grctx.h"
  #include "nouveau_dma.h"
  #include "nouveau_vm.h"
 -#include "nouveau_ramht.h"
  #include "nv50_evo.h"
  
  struct nv50_graph_engine {
@@@ -124,7 -125,6 +124,6 @@@ static voi
  nv50_graph_init_reset(struct drm_device *dev)
  {
        uint32_t pmc_e = NV_PMC_ENABLE_PGRAPH | (1 << 21);
        NV_DEBUG(dev, "\n");
  
        nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e);
@@@ -254,9 -254,13 +253,13 @@@ nv50_graph_init(struct drm_device *dev
  }
  
  static int
- nv50_graph_fini(struct drm_device *dev, int engine)
+ nv50_graph_fini(struct drm_device *dev, int engine, bool suspend)
  {
-       NV_DEBUG(dev, "\n");
+       nv_mask(dev, 0x400500, 0x00010001, 0x00000000);
+       if (!nv_wait(dev, 0x400700, ~0, 0) && suspend) {
+               nv_mask(dev, 0x400500, 0x00010001, 0x00010001);
+               return -EBUSY;
+       }
        nv50_graph_unload_context(dev);
        nv_wr32(dev, 0x40013c, 0x00000000);
        return 0;
@@@ -1382,9 -1382,6 +1382,6 @@@ int evergreen_cp_resume(struct radeon_d
  
        /* set the wb address wether it's enabled or not */
        WREG32(CP_RB_RPTR_ADDR,
- #ifdef __BIG_ENDIAN
-              RB_RPTR_SWAP(2) |
- #endif
               ((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC));
        WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
        WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
@@@ -2000,7 -1997,7 +1997,7 @@@ static void evergreen_gpu_init(struct r
                        gb_backend_map = 0x66442200;
                        break;
                case CHIP_JUNIPER:
 -                      gb_backend_map = 0x00006420;
 +                      gb_backend_map = 0x00002200;
                        break;
                default:
                        gb_backend_map =
        rdev->config.evergreen.tile_config |=
                ((gb_addr_config & 0x30000000) >> 28) << 12;
  
+       rdev->config.evergreen.backend_map = gb_backend_map;
        WREG32(GB_BACKEND_MAP, gb_backend_map);
        WREG32(GB_ADDR_CONFIG, gb_addr_config);
        WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
@@@ -2761,6 -2759,9 +2759,9 @@@ int evergreen_irq_process(struct radeon
                return IRQ_NONE;
        }
  restart_ih:
+       /* Order reading of wptr vs. reading of IH ring data */
+       rmb();
        /* display interrupts */
        evergreen_irq_ack(rdev);
  
  #       define RADEON_BUS_READ_BURST         (1 << 30)
  #define RADEON_BUS_CNTL1                    0x0034
  #       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
 +#define RV370_BUS_CNTL                      0x004c
 +#       define RV370_BUS_BIOS_DIS_ROM        (1 << 2)
  /* rv370/rv380, rv410, r423/r430/r480, r5xx */
  #define RADEON_MSI_REARM_EN               0x0160
  #     define RV370_MSI_REARM_EN            (1 << 0)
  #     define RADEON_RB_BUFSZ_MASK             (0x3f << 0)
  #     define RADEON_RB_BLKSZ_SHIFT            8
  #     define RADEON_RB_BLKSZ_MASK             (0x3f << 8)
- #     define RADEON_BUF_SWAP_32BIT            (1 << 17)
+ #     define RADEON_BUF_SWAP_32BIT            (2 << 16)
  #     define RADEON_MAX_FETCH_SHIFT           18
  #     define RADEON_MAX_FETCH_MASK            (0x3 << 18)
  #     define RADEON_RB_NO_UPDATE              (1 << 27)
@@@ -426,7 -426,7 +426,7 @@@ int rs600_gart_init(struct radeon_devic
        return radeon_gart_table_vram_alloc(rdev);
  }
  
 -int rs600_gart_enable(struct radeon_device *rdev)
 +static int rs600_gart_enable(struct radeon_device *rdev)
  {
        u32 tmp;
        int r, i;
                return r;
        radeon_gart_restore(rdev);
        /* Enable bus master */
 -      tmp = RREG32(R_00004C_BUS_CNTL) & C_00004C_BUS_MASTER_DIS;
 -      WREG32(R_00004C_BUS_CNTL, tmp);
 +      tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
 +      WREG32(RADEON_BUS_CNTL, tmp);
        /* FIXME: setup default page */
        WREG32_MC(R_000100_MC_PT0_CNTL,
                  (S_000100_EFFECTIVE_L2_CACHE_SIZE(6) |
@@@ -530,7 -530,7 +530,7 @@@ int rs600_gart_set_page(struct radeon_d
        addr = addr & 0xFFFFFFFFFFFFF000ULL;
        addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
        addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
-       writeq(addr, ((void __iomem *)ptr) + (i * 8));
+       writeq(addr, ptr + (i * 8));
        return 0;
  }