Merge branch 'drm-radeon-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Jun 2011 02:09:17 +0000 (19:09 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Jun 2011 02:09:17 +0000 (19:09 -0700)
* 'drm-radeon-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6:
  drm/radeon/kms/atom: fix PHY init
  drm/radeon/kms: add missing Evergreen texture formats to the CS parser
  drm/radeon/kms: viewport height has to be even
  drm/radeon/kms: remove duplicate reg from r600 safe regs
  drm/radeon/kms: add support for Llano Fusion APUs
  drm/radeon/kms: add llano pci ids
  drm/radeon/kms: fill in asic struct for llano
  drm/radeon/kms: add family ids for llano APUs
  drm/radeon: fix oops in ttm reserve when pageflipping (v2)
  drm/radeon/kms: clean up the radeon kms Kconfig
  drm/radeon/kms: fix thermal sensor reading on juniper
  drm/radeon/kms: add missing case for cayman thermal sensor
  drm/radeon/kms: add blit support for cayman (v2)
  drm/radeon/kms/blit: workaround some hw issues on evergreen+

19 files changed:
drivers/gpu/drm/radeon/Kconfig
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/cayman_blit_shaders.c
drivers/gpu/drm/radeon/cayman_blit_shaders.h
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreen_blit_kms.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_cs.c
drivers/gpu/drm/radeon/r600d.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_encoders.c
drivers/gpu/drm/radeon/radeon_family.h
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/reg_srcs/r600
include/drm/drm_pciids.h

index 9746fee..ea92bbe 100644 (file)
@@ -28,11 +28,4 @@ config DRM_RADEON_KMS
          The kernel will also perform security check on command stream
          provided by the user, we want to catch and forbid any illegal use
          of the GPU such as DMA into random system memory or into memory
-         not owned by the process supplying the command stream. This part
-         of the code is still incomplete and this why we propose that patch
-         as a staging driver addition, future security might forbid current
-         experimental userspace to run.
-
-         This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX
-         (radeon up to X1950). Works is underway to provide support for R6XX,
-         R7XX and newer hardware (radeon from HD2XXX to HD4XXX).
+         not owned by the process supplying the command stream.
index ec84878..84a69e7 100644 (file)
@@ -1045,7 +1045,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc,
        uint64_t fb_location;
        uint32_t fb_format, fb_pitch_pixels, tiling_flags;
        u32 fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_NONE);
-       u32 tmp;
+       u32 tmp, viewport_w, viewport_h;
        int r;
 
        /* no fb bound */
@@ -1171,8 +1171,10 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc,
        y &= ~1;
        WREG32(EVERGREEN_VIEWPORT_START + radeon_crtc->crtc_offset,
               (x << 16) | y);
+       viewport_w = crtc->mode.hdisplay;
+       viewport_h = (crtc->mode.vdisplay + 1) & ~1;
        WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
-              (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+              (viewport_w << 16) | viewport_h);
 
        /* pageflip setup */
        /* make sure flip is at vb rather than hb */
@@ -1213,7 +1215,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc,
        uint64_t fb_location;
        uint32_t fb_format, fb_pitch_pixels, tiling_flags;
        u32 fb_swap = R600_D1GRPH_SWAP_ENDIAN_NONE;
-       u32 tmp;
+       u32 tmp, viewport_w, viewport_h;
        int r;
 
        /* no fb bound */
@@ -1338,8 +1340,10 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc,
        y &= ~1;
        WREG32(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset,
               (x << 16) | y);
+       viewport_w = crtc->mode.hdisplay;
+       viewport_h = (crtc->mode.vdisplay + 1) & ~1;
        WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
-              (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+              (viewport_w << 16) | viewport_h);
 
        /* pageflip setup */
        /* make sure flip is at vb rather than hb */
index e148ab0..7b4eeb7 100644 (file)
 
 const u32 cayman_default_state[] =
 {
-       /* XXX fill in additional blit state */
+       0xc0066900,
+       0x00000000,
+       0x00000060, /* DB_RENDER_CONTROL */
+       0x00000000, /* DB_COUNT_CONTROL */
+       0x00000000, /* DB_DEPTH_VIEW */
+       0x0000002a, /* DB_RENDER_OVERRIDE */
+       0x00000000, /* DB_RENDER_OVERRIDE2 */
+       0x00000000, /* DB_HTILE_DATA_BASE */
 
        0xc0026900,
-       0x00000316,
-       0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-       0x00000010, /*  */
+       0x0000000a,
+       0x00000000, /* DB_STENCIL_CLEAR */
+       0x00000000, /* DB_DEPTH_CLEAR */
+
+       0xc0036900,
+       0x0000000f,
+       0x00000000, /* DB_DEPTH_INFO */
+       0x00000000, /* DB_Z_INFO */
+       0x00000000, /* DB_STENCIL_INFO */
+
+       0xc0016900,
+       0x00000080,
+       0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+       0xc00d6900,
+       0x00000083,
+       0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+       0x00000000, /* PA_SC_CLIPRECT_0_TL */
+       0x20002000, /* PA_SC_CLIPRECT_0_BR */
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0xaaaaaaaa, /* PA_SC_EDGERULE */
+       0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+       0x0000000f, /* CB_TARGET_MASK */
+       0x0000000f, /* CB_SHADER_MASK */
+
+       0xc0226900,
+       0x00000094,
+       0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+       0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+       0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+       0xc0016900,
+       0x000000d4,
+       0x00000000, /* SX_MISC */
 
        0xc0026900,
        0x000000d9,
        0x00000000, /* CP_RINGID */
        0x00000000, /* CP_VMID */
+
+       0xc0096900,
+       0x00000100,
+       0x00ffffff, /* VGT_MAX_VTX_INDX */
+       0x00000000, /* VGT_MIN_VTX_INDX */
+       0x00000000, /* VGT_INDX_OFFSET */
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+       0x00000000, /* SX_ALPHA_TEST_CONTROL */
+       0x00000000, /* CB_BLEND_RED */
+       0x00000000, /* CB_BLEND_GREEN */
+       0x00000000, /* CB_BLEND_BLUE */
+       0x00000000, /* CB_BLEND_ALPHA */
+
+       0xc0016900,
+       0x00000187,
+       0x00000100, /* SPI_VS_OUT_ID_0 */
+
+       0xc0026900,
+       0x00000191,
+       0x00000100, /* SPI_PS_INPUT_CNTL_0 */
+       0x00000101, /* SPI_PS_INPUT_CNTL_1 */
+
+       0xc0016900,
+       0x000001b1,
+       0x00000000, /* SPI_VS_OUT_CONFIG */
+
+       0xc0106900,
+       0x000001b3,
+       0x20000001, /* SPI_PS_IN_CONTROL_0 */
+       0x00000000, /* SPI_PS_IN_CONTROL_1 */
+       0x00000000, /* SPI_INTERP_CONTROL_0 */
+       0x00000000, /* SPI_INPUT_Z */
+       0x00000000, /* SPI_FOG_CNTL */
+       0x00100000, /* SPI_BARYC_CNTL */
+       0x00000000, /* SPI_PS_IN_CONTROL_2 */
+       0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
+       0x00000000, /* SPI_GPR_MGMT */
+       0x00000000, /* SPI_LDS_MGMT */
+       0x00000000, /* SPI_STACK_MGMT */
+       0x00000000, /* SPI_WAVE_MGMT_1 */
+       0x00000000, /* SPI_WAVE_MGMT_2 */
+
+       0xc0016900,
+       0x000001e0,
+       0x00000000, /* CB_BLEND0_CONTROL */
+
+       0xc00e6900,
+       0x00000200,
+       0x00000000, /* DB_DEPTH_CONTROL */
+       0x00000000, /* DB_EQAA */
+       0x00cc0010, /* CB_COLOR_CONTROL */
+       0x00000210, /* DB_SHADER_CONTROL */
+       0x00010000, /* PA_CL_CLIP_CNTL */
+       0x00000004, /* PA_SU_SC_MODE_CNTL */
+       0x00000100, /* PA_CL_VTE_CNTL */
+       0x00000000, /* PA_CL_VS_OUT_CNTL */
+       0x00000000, /* PA_CL_NANINF_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+       0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+       0x00000000, /*  */
+       0x00000000, /*  */
+
+       0xc0026900,
+       0x00000229,
+       0x00000000, /* SQ_PGM_START_FS */
+       0x00000000,
+
+       0xc0016900,
+       0x0000023b,
+       0x00000000, /* SQ_LDS_ALLOC_PS */
+
+       0xc0066900,
+       0x00000240,
+       0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0046900,
+       0x00000247,
+       0x00000000, /* SQ_GS_VERT_ITEMSIZE */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0116900,
+       0x00000280,
+       0x00000000, /* PA_SU_POINT_SIZE */
+       0x00000000, /* PA_SU_POINT_MINMAX */
+       0x00000008, /* PA_SU_LINE_CNTL */
+       0x00000000, /* PA_SC_LINE_STIPPLE */
+       0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+       0x00000000, /* VGT_HOS_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000, /* VGT_GS_MODE */
+
+       0xc0026900,
+       0x00000292,
+       0x00000000, /* PA_SC_MODE_CNTL_0 */
+       0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+       0xc0016900,
+       0x000002a1,
+       0x00000000, /* VGT_PRIMITIVEID_EN */
+
+       0xc0016900,
+       0x000002a5,
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+       0xc0026900,
+       0x000002a8,
+       0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+       0x00000000,
+
+       0xc0026900,
+       0x000002ad,
+       0x00000000, /* VGT_REUSE_OFF */
+       0x00000000,
+
+       0xc0016900,
+       0x000002d5,
+       0x00000000, /* VGT_SHADER_STAGES_EN */
+
+       0xc0016900,
+       0x000002dc,
+       0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+       0xc0066900,
+       0x000002de,
+       0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0026900,
+       0x000002e5,
+       0x00000000, /* VGT_STRMOUT_CONFIG */
+       0x00000000,
+
+       0xc01b6900,
+       0x000002f5,
+       0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+       0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+       0x00000000, /* PA_SC_LINE_CNTL */
+       0x00000000, /* PA_SC_AA_CONFIG */
+       0x00000005, /* PA_SU_VTX_CNTL */
+       0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+       0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+       0xffffffff,
+
+       0xc0026900,
+       0x00000316,
+       0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+       0x00000010, /*  */
+};
+
+const u32 cayman_vs[] =
+{
+       0x00000004,
+       0x80400400,
+       0x0000a03c,
+       0x95000688,
+       0x00004000,
+       0x15000688,
+       0x00000000,
+       0x88000000,
+       0x04000000,
+       0x67961001,
+#ifdef __BIG_ENDIAN
+       0x00020000,
+#else
+       0x00000000,
+#endif
+       0x00000000,
+       0x04000000,
+       0x67961000,
+#ifdef __BIG_ENDIAN
+       0x00020008,
+#else
+       0x00000008,
+#endif
+       0x00000000,
+};
+
+const u32 cayman_ps[] =
+{
+       0x00000004,
+       0xa00c0000,
+       0x00000008,
+       0x80400000,
+       0x00000000,
+       0x95000688,
+       0x00000000,
+       0x88000000,
+       0x00380400,
+       0x00146b10,
+       0x00380000,
+       0x20146b10,
+       0x00380400,
+       0x40146b00,
+       0x80380000,
+       0x60146b00,
+       0x00000010,
+       0x000d1000,
+       0xb0800000,
+       0x00000000,
 };
 
+const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
+const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
index 33b75e5..f5d0e9a 100644 (file)
 #ifndef CAYMAN_BLIT_SHADERS_H
 #define CAYMAN_BLIT_SHADERS_H
 
+extern const u32 cayman_ps[];
+extern const u32 cayman_vs[];
 extern const u32 cayman_default_state[];
 
+extern const u32 cayman_ps_size, cayman_vs_size;
 extern const u32 cayman_default_size;
 
 #endif
index 7c37638..98ea597 100644 (file)
@@ -88,21 +88,39 @@ u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 /* get temperature in millidegrees */
 int evergreen_get_temp(struct radeon_device *rdev)
 {
-       u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
-               ASIC_T_SHIFT;
-       u32 actual_temp = 0;
-
-       if (temp & 0x400)
-               actual_temp = -256;
-       else if (temp & 0x200)
-               actual_temp = 255;
-       else if (temp & 0x100) {
-               actual_temp = temp & 0x1ff;
-               actual_temp |= ~0x1ff;
-       } else
-               actual_temp = temp & 0xff;
+       u32 temp, toffset, actual_temp = 0;
+
+       if (rdev->family == CHIP_JUNIPER) {
+               toffset = (RREG32(CG_THERMAL_CTRL) & TOFFSET_MASK) >>
+                       TOFFSET_SHIFT;
+               temp = (RREG32(CG_TS0_STATUS) & TS0_ADC_DOUT_MASK) >>
+                       TS0_ADC_DOUT_SHIFT;
+
+               if (toffset & 0x100)
+                       actual_temp = temp / 2 - (0x200 - toffset);
+               else
+                       actual_temp = temp / 2 + toffset;
+
+               actual_temp = actual_temp * 1000;
+
+       } else {
+               temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
+                       ASIC_T_SHIFT;
 
-       return (actual_temp * 1000) / 2;
+               if (temp & 0x400)
+                       actual_temp = -256;
+               else if (temp & 0x200)
+                       actual_temp = 255;
+               else if (temp & 0x100) {
+                       actual_temp = temp & 0x1ff;
+                       actual_temp |= ~0x1ff;
+               } else
+                       actual_temp = temp & 0xff;
+
+               actual_temp = (actual_temp * 1000) / 2;
+       }
+
+       return actual_temp;
 }
 
 int sumo_get_temp(struct radeon_device *rdev)
@@ -1415,6 +1433,8 @@ static u32 evergreen_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
        case CHIP_CEDAR:
        case CHIP_REDWOOD:
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
        case CHIP_TURKS:
        case CHIP_CAICOS:
                force_no_swizzle = false;
@@ -1544,6 +1564,8 @@ static void evergreen_program_channel_remap(struct radeon_device *rdev)
        case CHIP_REDWOOD:
        case CHIP_CEDAR:
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
        case CHIP_TURKS:
        case CHIP_CAICOS:
        default:
@@ -1685,6 +1707,54 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
                rdev->config.evergreen.max_hw_contexts = 4;
                rdev->config.evergreen.sq_num_cf_insts = 1;
 
+               rdev->config.evergreen.sc_prim_fifo_size = 0x40;
+               rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
+               rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
+               break;
+       case CHIP_SUMO:
+               rdev->config.evergreen.num_ses = 1;
+               rdev->config.evergreen.max_pipes = 4;
+               rdev->config.evergreen.max_tile_pipes = 2;
+               if (rdev->pdev->device == 0x9648)
+                       rdev->config.evergreen.max_simds = 3;
+               else if ((rdev->pdev->device == 0x9647) ||
+                        (rdev->pdev->device == 0x964a))
+                       rdev->config.evergreen.max_simds = 4;
+               else
+                       rdev->config.evergreen.max_simds = 5;
+               rdev->config.evergreen.max_backends = 2 * rdev->config.evergreen.num_ses;
+               rdev->config.evergreen.max_gprs = 256;
+               rdev->config.evergreen.max_threads = 248;
+               rdev->config.evergreen.max_gs_threads = 32;
+               rdev->config.evergreen.max_stack_entries = 256;
+               rdev->config.evergreen.sx_num_of_sets = 4;
+               rdev->config.evergreen.sx_max_export_size = 256;
+               rdev->config.evergreen.sx_max_export_pos_size = 64;
+               rdev->config.evergreen.sx_max_export_smx_size = 192;
+               rdev->config.evergreen.max_hw_contexts = 8;
+               rdev->config.evergreen.sq_num_cf_insts = 2;
+
+               rdev->config.evergreen.sc_prim_fifo_size = 0x40;
+               rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
+               rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
+               break;
+       case CHIP_SUMO2:
+               rdev->config.evergreen.num_ses = 1;
+               rdev->config.evergreen.max_pipes = 4;
+               rdev->config.evergreen.max_tile_pipes = 4;
+               rdev->config.evergreen.max_simds = 2;
+               rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
+               rdev->config.evergreen.max_gprs = 256;
+               rdev->config.evergreen.max_threads = 248;
+               rdev->config.evergreen.max_gs_threads = 32;
+               rdev->config.evergreen.max_stack_entries = 512;
+               rdev->config.evergreen.sx_num_of_sets = 4;
+               rdev->config.evergreen.sx_max_export_size = 256;
+               rdev->config.evergreen.sx_max_export_pos_size = 64;
+               rdev->config.evergreen.sx_max_export_smx_size = 192;
+               rdev->config.evergreen.max_hw_contexts = 8;
+               rdev->config.evergreen.sq_num_cf_insts = 2;
+
                rdev->config.evergreen.sc_prim_fifo_size = 0x40;
                rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
                rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
@@ -2039,6 +2109,8 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
        switch (rdev->family) {
        case CHIP_CEDAR:
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
        case CHIP_CAICOS:
                /* no vertex cache */
                sq_config &= ~VC_ENABLE;
@@ -2060,6 +2132,8 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
        switch (rdev->family) {
        case CHIP_CEDAR:
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
                ps_thread_count = 96;
                break;
        default:
@@ -2099,6 +2173,8 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
        switch (rdev->family) {
        case CHIP_CEDAR:
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
        case CHIP_CAICOS:
                vgt_cache_invalidation = CACHE_INVALIDATION(TC_ONLY);
                break;
index ba06a69..57f3bc1 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "evergreend.h"
 #include "evergreen_blit_shaders.h"
+#include "cayman_blit_shaders.h"
 
 #define DI_PT_RECTLIST        0x11
 #define DI_INDEX_SIZE_16_BIT  0x0
@@ -152,6 +153,8 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 
        if ((rdev->family == CHIP_CEDAR) ||
            (rdev->family == CHIP_PALM) ||
+           (rdev->family == CHIP_SUMO) ||
+           (rdev->family == CHIP_SUMO2) ||
            (rdev->family == CHIP_CAICOS))
                cp_set_surface_sync(rdev,
                                    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
@@ -199,6 +202,16 @@ static void
 set_scissors(struct radeon_device *rdev, int x1, int y1,
             int x2, int y2)
 {
+       /* workaround some hw bugs */
+       if (x2 == 0)
+               x1 = 1;
+       if (y2 == 0)
+               y1 = 1;
+       if (rdev->family == CHIP_CAYMAN) {
+               if ((x2 == 1) && (y2 == 1))
+                       x2 = 2;
+       }
+
        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
        radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
        radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
@@ -255,238 +268,284 @@ set_default_state(struct radeon_device *rdev)
        u64 gpu_addr;
        int dwords;
 
-       switch (rdev->family) {
-       case CHIP_CEDAR:
-       default:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 96;
-               num_vs_threads = 16;
-               num_gs_threads = 16;
-               num_es_threads = 16;
-               num_hs_threads = 16;
-               num_ls_threads = 16;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_REDWOOD:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_JUNIPER:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_CYPRESS:
-       case CHIP_HEMLOCK:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_PALM:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 96;
-               num_vs_threads = 16;
-               num_gs_threads = 16;
-               num_es_threads = 16;
-               num_hs_threads = 16;
-               num_ls_threads = 16;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_BARTS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_TURKS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_CAICOS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 10;
-               num_gs_threads = 10;
-               num_es_threads = 10;
-               num_hs_threads = 10;
-               num_ls_threads = 10;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       }
-
-       if ((rdev->family == CHIP_CEDAR) ||
-           (rdev->family == CHIP_PALM) ||
-           (rdev->family == CHIP_CAICOS))
-               sq_config = 0;
-       else
-               sq_config = VC_ENABLE;
-
-       sq_config |= (EXPORT_SRC_C |
-                     CS_PRIO(0) |
-                     LS_PRIO(0) |
-                     HS_PRIO(0) |
-                     PS_PRIO(0) |
-                     VS_PRIO(1) |
-                     GS_PRIO(2) |
-                     ES_PRIO(3));
-
-       sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-                                 NUM_VS_GPRS(num_vs_gprs) |
-                                 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-       sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-                                 NUM_ES_GPRS(num_es_gprs));
-       sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
-                                 NUM_LS_GPRS(num_ls_gprs));
-       sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-                                  NUM_VS_THREADS(num_vs_threads) |
-                                  NUM_GS_THREADS(num_gs_threads) |
-                                  NUM_ES_THREADS(num_es_threads));
-       sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
-                                    NUM_LS_THREADS(num_ls_threads));
-       sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-                                   NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-       sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-                                   NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-       sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
-                                   NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
-
        /* set clear context state */
        radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
        radeon_ring_write(rdev, 0);
 
-       /* disable dyn gprs */
-       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-       radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
-       radeon_ring_write(rdev, 0);
+       if (rdev->family < CHIP_CAYMAN) {
+               switch (rdev->family) {
+               case CHIP_CEDAR:
+               default:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 16;
+                       num_gs_threads = 16;
+                       num_es_threads = 16;
+                       num_hs_threads = 16;
+                       num_ls_threads = 16;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_REDWOOD:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_JUNIPER:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_CYPRESS:
+               case CHIP_HEMLOCK:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_PALM:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 16;
+                       num_gs_threads = 16;
+                       num_es_threads = 16;
+                       num_hs_threads = 16;
+                       num_ls_threads = 16;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_SUMO:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 25;
+                       num_gs_threads = 25;
+                       num_es_threads = 25;
+                       num_hs_threads = 25;
+                       num_ls_threads = 25;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_SUMO2:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 25;
+                       num_gs_threads = 25;
+                       num_es_threads = 25;
+                       num_hs_threads = 25;
+                       num_ls_threads = 25;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_BARTS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_TURKS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_CAICOS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 10;
+                       num_gs_threads = 10;
+                       num_es_threads = 10;
+                       num_hs_threads = 10;
+                       num_ls_threads = 10;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               }
 
-       /* SQ config */
-       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-       radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
-       radeon_ring_write(rdev, sq_config);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
-       radeon_ring_write(rdev, 0);
-       radeon_ring_write(rdev, 0);
-       radeon_ring_write(rdev, sq_thread_resource_mgmt);
-       radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+               if ((rdev->family == CHIP_CEDAR) ||
+                   (rdev->family == CHIP_PALM) ||
+                   (rdev->family == CHIP_SUMO) ||
+                   (rdev->family == CHIP_SUMO2) ||
+                   (rdev->family == CHIP_CAICOS))
+                       sq_config = 0;
+               else
+                       sq_config = VC_ENABLE;
+
+               sq_config |= (EXPORT_SRC_C |
+                             CS_PRIO(0) |
+                             LS_PRIO(0) |
+                             HS_PRIO(0) |
+                             PS_PRIO(0) |
+                             VS_PRIO(1) |
+                             GS_PRIO(2) |
+                             ES_PRIO(3));
+
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+                                         NUM_VS_GPRS(num_vs_gprs) |
+                                         NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+                                         NUM_ES_GPRS(num_es_gprs));
+               sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
+                                         NUM_LS_GPRS(num_ls_gprs));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+                                          NUM_VS_THREADS(num_vs_threads) |
+                                          NUM_GS_THREADS(num_gs_threads) |
+                                          NUM_ES_THREADS(num_es_threads));
+               sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
+                                            NUM_LS_THREADS(num_ls_threads));
+               sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+                                           NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+               sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+                                           NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+               sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
+                                           NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
+
+               /* disable dyn gprs */
+               radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
+               radeon_ring_write(rdev, 0);
+
+               /* SQ config */
+               radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
+               radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
+               radeon_ring_write(rdev, sq_config);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
+               radeon_ring_write(rdev, 0);
+               radeon_ring_write(rdev, 0);
+               radeon_ring_write(rdev, sq_thread_resource_mgmt);
+               radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+       }
 
        /* CONTEXT_CONTROL */
        radeon_ring_write(rdev, 0xc0012800);
@@ -560,7 +619,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
        mutex_init(&rdev->r600_blit.mutex);
        rdev->r600_blit.state_offset = 0;
 
-       rdev->r600_blit.state_len = evergreen_default_size;
+       if (rdev->family < CHIP_CAYMAN)
+               rdev->r600_blit.state_len = evergreen_default_size;
+       else
+               rdev->r600_blit.state_len = cayman_default_size;
 
        dwords = rdev->r600_blit.state_len;
        while (dwords & 0xf) {
@@ -572,11 +634,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
        obj_size = ALIGN(obj_size, 256);
 
        rdev->r600_blit.vs_offset = obj_size;
-       obj_size += evergreen_vs_size * 4;
+       if (rdev->family < CHIP_CAYMAN)
+               obj_size += evergreen_vs_size * 4;
+       else
+               obj_size += cayman_vs_size * 4;
        obj_size = ALIGN(obj_size, 256);
 
        rdev->r600_blit.ps_offset = obj_size;
-       obj_size += evergreen_ps_size * 4;
+       if (rdev->family < CHIP_CAYMAN)
+               obj_size += evergreen_ps_size * 4;
+       else
+               obj_size += cayman_ps_size * 4;
        obj_size = ALIGN(obj_size, 256);
 
        r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
@@ -599,16 +667,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
                return r;
        }
 
-       memcpy_toio(ptr + rdev->r600_blit.state_offset,
-                   evergreen_default_state, rdev->r600_blit.state_len * 4);
-
-       if (num_packet2s)
-               memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-                           packet2s, num_packet2s * 4);
-       for (i = 0; i < evergreen_vs_size; i++)
-               *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
-       for (i = 0; i < evergreen_ps_size; i++)
-               *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+       if (rdev->family < CHIP_CAYMAN) {
+               memcpy_toio(ptr + rdev->r600_blit.state_offset,
+                           evergreen_default_state, rdev->r600_blit.state_len * 4);
+
+               if (num_packet2s)
+                       memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+                                   packet2s, num_packet2s * 4);
+               for (i = 0; i < evergreen_vs_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
+               for (i = 0; i < evergreen_ps_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+       } else {
+               memcpy_toio(ptr + rdev->r600_blit.state_offset,
+                           cayman_default_state, rdev->r600_blit.state_len * 4);
+
+               if (num_packet2s)
+                       memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+                                   packet2s, num_packet2s * 4);
+               for (i = 0; i < cayman_vs_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
+               for (i = 0; i < cayman_ps_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
+       }
        radeon_bo_kunmap(rdev->r600_blit.shader_obj);
        radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
index f37e91e..1636e34 100644 (file)
 #define                SE_DB_BUSY                                      (1 << 30)
 #define                SE_CB_BUSY                                      (1 << 31)
 /* evergreen */
+#define        CG_THERMAL_CTRL                                 0x72c
+#define                TOFFSET_MASK                            0x00003FE0
+#define                TOFFSET_SHIFT                           5
 #define        CG_MULT_THERMAL_STATUS                          0x740
 #define                ASIC_T(x)                               ((x) << 16)
-#define                ASIC_T_MASK                             0x7FF0000
+#define                ASIC_T_MASK                             0x07FF0000
 #define                ASIC_T_SHIFT                            16
+#define        CG_TS0_STATUS                                   0x760
+#define                TS0_ADC_DOUT_MASK                       0x000003FF
+#define                TS0_ADC_DOUT_SHIFT                      0
 /* APU */
 #define        CG_THERMAL_STATUS                               0x678
 
index b205ba1..16caafe 100644 (file)
@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
                return r;
        cayman_gpu_init(rdev);
 
-#if 0
-       r = cayman_blit_init(rdev);
+       r = evergreen_blit_init(rdev);
        if (r) {
-               cayman_blit_fini(rdev);
+               evergreen_blit_fini(rdev);
                rdev->asic->copy = NULL;
                dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
        }
-#endif
 
        /* allocate wb buffer */
        r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-       /* int r; */
+       int r;
 
        /* FIXME: we should wait for ring to be empty */
        cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
        radeon_wb_disable(rdev);
        cayman_pcie_gart_disable(rdev);
 
-#if 0
        /* unpin shaders bo */
        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
        if (likely(r == 0)) {
                radeon_bo_unpin(rdev->r600_blit.shader_obj);
                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
        }
-#endif
+
        return 0;
 }
 
@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)
 
 void cayman_fini(struct radeon_device *rdev)
 {
-       /* cayman_blit_fini(rdev); */
+       evergreen_blit_fini(rdev);
        cayman_cp_fini(rdev);
        r600_irq_fini(rdev);
        radeon_wb_fini(rdev);
index 6f27593..d74d4d7 100644 (file)
@@ -87,6 +87,10 @@ MODULE_FIRMWARE("radeon/CYPRESS_rlc.bin");
 MODULE_FIRMWARE("radeon/PALM_pfp.bin");
 MODULE_FIRMWARE("radeon/PALM_me.bin");
 MODULE_FIRMWARE("radeon/SUMO_rlc.bin");
+MODULE_FIRMWARE("radeon/SUMO_pfp.bin");
+MODULE_FIRMWARE("radeon/SUMO_me.bin");
+MODULE_FIRMWARE("radeon/SUMO2_pfp.bin");
+MODULE_FIRMWARE("radeon/SUMO2_me.bin");
 
 int r600_debugfs_mc_info_init(struct radeon_device *rdev);
 
@@ -2024,6 +2028,14 @@ int r600_init_microcode(struct radeon_device *rdev)
                chip_name = "PALM";
                rlc_chip_name = "SUMO";
                break;
+       case CHIP_SUMO:
+               chip_name = "SUMO";
+               rlc_chip_name = "SUMO";
+               break;
+       case CHIP_SUMO2:
+               chip_name = "SUMO2";
+               rlc_chip_name = "SUMO";
+               break;
        default: BUG();
        }
 
index fd18be9..909bda8 100644 (file)
@@ -71,20 +71,21 @@ struct r600_cs_track {
        u64                     db_bo_mc;
 };
 
-#define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc }
-#define FMT_16_BIT(fmt, vc) [fmt] = { 1, 1, 2, vc }
-#define FMT_24_BIT(fmt) [fmt] = { 1, 1, 3, 0 }
-#define FMT_32_BIT(fmt, vc) [fmt] = { 1, 1, 4, vc }
-#define FMT_48_BIT(fmt) [fmt] = { 1, 1, 6, 0 }
-#define FMT_64_BIT(fmt, vc) [fmt] = { 1, 1, 8, vc }
-#define FMT_96_BIT(fmt) [fmt] = { 1, 1, 12, 0 }
-#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16, vc }
+#define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
+#define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
+#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 3,  0, CHIP_R600 }
+#define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
+#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 6,  0, CHIP_R600 }
+#define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
+#define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
+#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
 
 struct gpu_formats {
        unsigned blockwidth;
        unsigned blockheight;
        unsigned blocksize;
        unsigned valid_color;
+       enum radeon_family min_family;
 };
 
 static const struct gpu_formats color_formats_table[] = {
@@ -154,7 +155,11 @@ static const struct gpu_formats color_formats_table[] = {
        [V_038004_FMT_BC3] = { 4, 4, 16, 0 },
        [V_038004_FMT_BC4] = { 4, 4, 8, 0 },
        [V_038004_FMT_BC5] = { 4, 4, 16, 0},
+       [V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
+       [V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
 
+       /* The other Evergreen formats */
+       [V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
 static inline bool fmt_is_valid_color(u32 format)
@@ -168,11 +173,14 @@ static inline bool fmt_is_valid_color(u32 format)
        return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format)
+static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
        if (format >= ARRAY_SIZE(color_formats_table))
                return false;
        
+       if (family < color_formats_table[format].min_family)
+               return false;
+
        if (color_formats_table[format].blockwidth > 0)
                return true;
 
@@ -1325,7 +1333,7 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 i
                return -EINVAL;
        }
        format = G_038004_DATA_FORMAT(word1);
-       if (!fmt_is_valid_texture(format)) {
+       if (!fmt_is_valid_texture(format, p->family)) {
                dev_warn(p->dev, "%s:%d texture invalid format %d\n",
                         __func__, __LINE__, format);
                return -EINVAL;
index b2b944b..f140a0d 100644 (file)
 #define     V_038004_FMT_BC3                           0x00000033
 #define     V_038004_FMT_BC4                           0x00000034
 #define     V_038004_FMT_BC5                           0x00000035
+#define     V_038004_FMT_BC6                           0x00000036
+#define     V_038004_FMT_BC7                           0x00000037
+#define     V_038004_FMT_32_AS_32_32_32_32             0x00000038
 #define R_038010_SQ_TEX_RESOURCE_WORD4_0             0x038010
 #define   S_038010_FORMAT_COMP_X(x)                    (((x) & 0x3) << 0)
 #define   G_038010_FORMAT_COMP_X(x)                    (((x) >> 0) & 0x3)
index d948265..9bd162f 100644 (file)
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
        .get_vblank_counter = &evergreen_get_vblank_counter,
        .fence_ring_emit = &r600_fence_ring_emit,
        .cs_parse = &evergreen_cs_parse,
-       .copy_blit = NULL,
-       .copy_dma = NULL,
-       .copy = NULL,
+       .copy_blit = &evergreen_copy_blit,
+       .copy_dma = &evergreen_copy_blit,
+       .copy = &evergreen_copy_blit,
        .get_engine_clock = &radeon_atom_get_engine_clock,
        .set_engine_clock = &radeon_atom_set_engine_clock,
        .get_memory_clock = &radeon_atom_get_memory_clock,
@@ -1020,6 +1020,8 @@ int radeon_asic_init(struct radeon_device *rdev)
                rdev->asic = &evergreen_asic;
                break;
        case CHIP_PALM:
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
                rdev->asic = &sumo_asic;
                break;
        case CHIP_BARTS:
index 5b61364..d77ede3 100644 (file)
@@ -82,6 +82,8 @@ static const char radeon_family_name[][16] = {
        "CYPRESS",
        "HEMLOCK",
        "PALM",
+       "SUMO",
+       "SUMO2",
        "BARTS",
        "TURKS",
        "CAICOS",
index ae247ee..292f73f 100644 (file)
@@ -264,6 +264,8 @@ static void radeon_unpin_work_func(struct work_struct *__work)
                radeon_bo_unreserve(work->old_rbo);
        } else
                DRM_ERROR("failed to reserve buffer after flip\n");
+
+       drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
        kfree(work);
 }
 
@@ -371,6 +373,8 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
        new_radeon_fb = to_radeon_framebuffer(fb);
        /* schedule unpin of the old buffer */
        obj = old_radeon_fb->obj;
+       /* take a reference to the old object */
+       drm_gem_object_reference(obj);
        rbo = gem_to_radeon_bo(obj);
        work->old_rbo = rbo;
        INIT_WORK(&work->work, radeon_unpin_work_func);
@@ -378,12 +382,9 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
        /* We borrow the event spin lock for protecting unpin_work */
        spin_lock_irqsave(&dev->event_lock, flags);
        if (radeon_crtc->unpin_work) {
-               spin_unlock_irqrestore(&dev->event_lock, flags);
-               kfree(work);
-               radeon_fence_unref(&fence);
-
                DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
-               return -EBUSY;
+               r = -EBUSY;
+               goto unlock_free;
        }
        radeon_crtc->unpin_work = work;
        radeon_crtc->deferred_flip_completion = 0;
@@ -497,6 +498,8 @@ pflip_cleanup1:
 pflip_cleanup:
        spin_lock_irqsave(&dev->event_lock, flags);
        radeon_crtc->unpin_work = NULL;
+unlock_free:
+       drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
        spin_unlock_irqrestore(&dev->event_lock, flags);
        radeon_fence_unref(&fence);
        kfree(work);
index 1b55755..03f124d 100644 (file)
@@ -954,10 +954,15 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
        int dp_lane_count = 0;
        int connector_object_id = 0;
        int igp_lane_info = 0;
+       int dig_encoder = dig->dig_encoder;
 
-       if (action == ATOM_TRANSMITTER_ACTION_INIT)
+       if (action == ATOM_TRANSMITTER_ACTION_INIT) {
                connector = radeon_get_connector_for_encoder_init(encoder);
-       else
+               /* just needed to avoid bailing in the encoder check.  the encoder
+                * isn't used for init
+                */
+               dig_encoder = 0;
+       } else
                connector = radeon_get_connector_for_encoder(encoder);
 
        if (connector) {
@@ -973,7 +978,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
        }
 
        /* no dig encoder assigned */
-       if (dig->dig_encoder == -1)
+       if (dig_encoder == -1)
                return;
 
        if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP)
@@ -1023,7 +1028,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 
                if (dig->linkb)
                        args.v3.acConfig.ucLinkSel = 1;
-               if (dig->dig_encoder & 1)
+               if (dig_encoder & 1)
                        args.v3.acConfig.ucEncoderSel = 1;
 
                /* Select the PLL for the PHY
@@ -1073,7 +1078,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
                                args.v3.acConfig.fDualLinkConnector = 1;
                }
        } else if (ASIC_IS_DCE32(rdev)) {
-               args.v2.acConfig.ucEncoderSel = dig->dig_encoder;
+               args.v2.acConfig.ucEncoderSel = dig_encoder;
                if (dig->linkb)
                        args.v2.acConfig.ucLinkSel = 1;
 
@@ -1100,7 +1105,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
        } else {
                args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
 
-               if (dig->dig_encoder)
+               if (dig_encoder)
                        args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
                else
                        args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
index 6f1d9e5..ec2f1ea 100644 (file)
@@ -81,6 +81,8 @@ enum radeon_family {
        CHIP_CYPRESS,
        CHIP_HEMLOCK,
        CHIP_PALM,
+       CHIP_SUMO,
+       CHIP_SUMO2,
        CHIP_BARTS,
        CHIP_TURKS,
        CHIP_CAICOS,
index 86eda1e..aaa19dc 100644 (file)
@@ -487,6 +487,7 @@ static int radeon_hwmon_init(struct radeon_device *rdev)
        case THERMAL_TYPE_RV6XX:
        case THERMAL_TYPE_RV770:
        case THERMAL_TYPE_EVERGREEN:
+       case THERMAL_TYPE_NI:
        case THERMAL_TYPE_SUMO:
                rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev);
                if (IS_ERR(rdev->pm.int_hwmon_dev)) {
index 92f1900..ea49752 100644 (file)
@@ -758,6 +758,5 @@ r600 0x9400
 0x00009714 VC_ENHANCE
 0x00009830 DB_DEBUG
 0x00009838 DB_WATERMARKS
-0x00028D28 DB_SRESULTS_COMPARE_STATE0
 0x00028D44 DB_ALPHA_TO_MASK
 0x00009700 VC_CNTL
index f04b2a3..e08f344 100644 (file)
        {0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
        {0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
        {0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9642, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9643, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9644, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+       {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+       {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+       {0x1002, 0x964e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+       {0x1002, 0x964f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
        {0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
        {0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
        {0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \