else
cp_coher_size = ((size + 255) >> 8);
+ if (rdev->family >= CHIP_CAYMAN) {
+ /* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync
+ * to the RB directly. For IBs, the CP programs this as part of the
+ * surface_sync packet.
+ */
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ radeon_ring_write(rdev, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
+ radeon_ring_write(rdev, 0); /* CP_COHER_CNTL2 */
+ }
radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(rdev, sync_type);
radeon_ring_write(rdev, cp_coher_size);
static void
set_tex_resource(struct radeon_device *rdev,
int format, int w, int h, int pitch,
- u64 gpu_addr)
+ u64 gpu_addr, u32 size)
{
u32 sq_tex_resource_word0, sq_tex_resource_word1;
u32 sq_tex_resource_word4, sq_tex_resource_word7;
sq_tex_resource_word7 = format |
S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
+ cp_set_surface_sync(rdev,
+ PACKET3_TC_ACTION_ENA, size, gpu_addr);
+
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, sq_tex_resource_word0);
rdev->r600_blit.primitives.set_default_state = set_default_state;
rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
- rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
- rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
rdev->r600_blit.ring_size_per_loop = 74;
+ if (rdev->family >= CHIP_CAYMAN)
+ rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */
rdev->r600_blit.max_dim = 16384;