nouveau: Fix alignment requirements on src and dst addresses
authorMaarten Lankhorst <maarten.lankhorst@canonical.com>
Mon, 4 Jun 2012 10:00:31 +0000 (12:00 +0200)
committerBen Skeggs <bskeggs@redhat.com>
Thu, 26 Jul 2012 00:28:13 +0000 (10:28 +1000)
Linear copy works by adding the offset to the buffer address,
which may end up not being 16-byte aligned.

Some tests I've written for prime_pcopy show that the engine
allows this correctly, so the restriction on lowest 4 bits of
address can be lifted safely.

The comments added were by envyas, I think because I used
a newer version.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: stable@vger.kernel.org
drivers/gpu/drm/nouveau/nva3_copy.fuc
drivers/gpu/drm/nouveau/nva3_copy.fuc.h
drivers/gpu/drm/nouveau/nvc0_copy.fuc.h

index abc3662..219850d 100644 (file)
@@ -119,9 +119,9 @@ dispatch_dma:
 // mthd 0x030c-0x0340, various stuff
 .b16 0xc3 14
 .b32 #ctx_src_address_high           ~0x000000ff
-.b32 #ctx_src_address_low            ~0xfffffff0
+.b32 #ctx_src_address_low            ~0xffffffff
 .b32 #ctx_dst_address_high           ~0x000000ff
-.b32 #ctx_dst_address_low            ~0xfffffff0
+.b32 #ctx_dst_address_low            ~0xffffffff
 .b32 #ctx_src_pitch                  ~0x0007ffff
 .b32 #ctx_dst_pitch                  ~0x0007ffff
 .b32 #ctx_xcnt                       ~0x0000ffff
index 1f33fbd..37d6de3 100644 (file)
@@ -1,37 +1,72 @@
-uint32_t nva3_pcopy_data[] = {
+u32 nva3_pcopy_data[] = {
+/* 0x0000: ctx_object */
        0x00000000,
+/* 0x0004: ctx_dma */
+/* 0x0004: ctx_dma_query */
        0x00000000,
+/* 0x0008: ctx_dma_src */
        0x00000000,
+/* 0x000c: ctx_dma_dst */
        0x00000000,
+/* 0x0010: ctx_query_address_high */
        0x00000000,
+/* 0x0014: ctx_query_address_low */
        0x00000000,
+/* 0x0018: ctx_query_counter */
        0x00000000,
+/* 0x001c: ctx_src_address_high */
        0x00000000,
+/* 0x0020: ctx_src_address_low */
        0x00000000,
+/* 0x0024: ctx_src_pitch */
        0x00000000,
+/* 0x0028: ctx_src_tile_mode */
        0x00000000,
+/* 0x002c: ctx_src_xsize */
        0x00000000,
+/* 0x0030: ctx_src_ysize */
        0x00000000,
+/* 0x0034: ctx_src_zsize */
        0x00000000,
+/* 0x0038: ctx_src_zoff */
        0x00000000,
+/* 0x003c: ctx_src_xoff */
        0x00000000,
+/* 0x0040: ctx_src_yoff */
        0x00000000,
+/* 0x0044: ctx_src_cpp */
        0x00000000,
+/* 0x0048: ctx_dst_address_high */
        0x00000000,
+/* 0x004c: ctx_dst_address_low */
        0x00000000,
+/* 0x0050: ctx_dst_pitch */
        0x00000000,
+/* 0x0054: ctx_dst_tile_mode */
        0x00000000,
+/* 0x0058: ctx_dst_xsize */
        0x00000000,
+/* 0x005c: ctx_dst_ysize */
        0x00000000,
+/* 0x0060: ctx_dst_zsize */
        0x00000000,
+/* 0x0064: ctx_dst_zoff */
        0x00000000,
+/* 0x0068: ctx_dst_xoff */
        0x00000000,
+/* 0x006c: ctx_dst_yoff */
        0x00000000,
+/* 0x0070: ctx_dst_cpp */
        0x00000000,
+/* 0x0074: ctx_format */
        0x00000000,
+/* 0x0078: ctx_swz_const0 */
        0x00000000,
+/* 0x007c: ctx_swz_const1 */
        0x00000000,
+/* 0x0080: ctx_xcnt */
        0x00000000,
+/* 0x0084: ctx_ycnt */
        0x00000000,
        0x00000000,
        0x00000000,
@@ -63,6 +98,7 @@ uint32_t nva3_pcopy_data[] = {
        0x00000000,
        0x00000000,
        0x00000000,
+/* 0x0100: dispatch_table */
        0x00010000,
        0x00000000,
        0x00000000,
@@ -73,6 +109,7 @@ uint32_t nva3_pcopy_data[] = {
        0x00010162,
        0x00000000,
        0x00030060,
+/* 0x0128: dispatch_dma */
        0x00010170,
        0x00000000,
        0x00010170,
@@ -118,11 +155,11 @@ uint32_t nva3_pcopy_data[] = {
        0x0000001c,
        0xffffff00,
        0x00000020,
-       0x0000000f,
+       0x00000000,
        0x00000048,
        0xffffff00,
        0x0000004c,
-       0x0000000f,
+       0x00000000,
        0x00000024,
        0xfff80000,
        0x00000050,
@@ -146,7 +183,8 @@ uint32_t nva3_pcopy_data[] = {
        0x00000800,
 };
 
-uint32_t nva3_pcopy_code[] = {
+u32 nva3_pcopy_code[] = {
+/* 0x0000: main */
        0x04fe04bd,
        0x3517f000,
        0xf10010fe,
@@ -158,23 +196,31 @@ uint32_t nva3_pcopy_code[] = {
        0x17f11031,
        0x27f01200,
        0x0012d003,
+/* 0x002f: spin */
        0xf40031f4,
        0x0ef40028,
+/* 0x0035: ih */
        0x8001cffd,
        0xf40812c4,
        0x21f4060b,
+/* 0x0041: ih_no_chsw */
        0x0412c472,
        0xf4060bf4,
+/* 0x004a: ih_no_cmd */
        0x11c4c321,
        0x4001d00c,
+/* 0x0052: swctx */
        0x47f101f8,
        0x4bfe7700,
        0x0007fe00,
        0xf00204b9,
        0x01f40643,
        0x0604fa09,
+/* 0x006b: swctx_load */
        0xfa060ef4,
+/* 0x006e: swctx_done */
        0x03f80504,
+/* 0x0072: chsw */
        0x27f100f8,
        0x23cf1400,
        0x1e3fc800,
@@ -183,18 +229,22 @@ uint32_t nva3_pcopy_code[] = {
        0x1e3af052,
        0xf00023d0,
        0x24d00147,
+/* 0x0093: chsw_no_unload */
        0xcf00f880,
        0x3dc84023,
        0x220bf41e,
        0xf40131f4,
        0x57f05221,
        0x0367f004,
+/* 0x00a8: chsw_load_ctx_dma */
        0xa07856bc,
        0xb6018068,
        0x87d00884,
        0x0162b600,
+/* 0x00bb: chsw_finish_load */
        0xf0f018f4,
        0x23d00237,
+/* 0x00c3: dispatch */
        0xf100f880,
        0xcf190037,
        0x33cf4032,
@@ -202,6 +252,7 @@ uint32_t nva3_pcopy_code[] = {
        0x1024b607,
        0x010057f1,
        0x74bd64bd,
+/* 0x00dc: dispatch_loop */
        0x58005658,
        0x50b60157,
        0x0446b804,
@@ -211,6 +262,7 @@ uint32_t nva3_pcopy_code[] = {
        0xb60276bb,
        0x57bb0374,
        0xdf0ef400,
+/* 0x0100: dispatch_valid_mthd */
        0xb60246bb,
        0x45bb0344,
        0x01459800,
@@ -220,31 +272,41 @@ uint32_t nva3_pcopy_code[] = {
        0xb0014658,
        0x1bf40064,
        0x00538009,
+/* 0x0127: dispatch_cmd */
        0xf4300ef4,
        0x55f90132,
        0xf40c01f4,
+/* 0x0132: dispatch_invalid_bitfield */
        0x25f0250e,
+/* 0x0135: dispatch_illegal_mthd */
        0x0125f002,
+/* 0x0138: dispatch_error */
        0x100047f1,
        0xd00042d0,
        0x27f04043,
        0x0002d040,
+/* 0x0148: hostirq_wait */
        0xf08002cf,
        0x24b04024,
        0xf71bf400,
+/* 0x0154: dispatch_done */
        0x1d0027f1,
        0xd00137f0,
        0x00f80023,
+/* 0x0160: cmd_nop */
+/* 0x0162: cmd_pm_trigger */
        0x27f100f8,
        0x34bd2200,
        0xd00233f0,
        0x00f80023,
+/* 0x0170: cmd_dma */
        0x012842b7,
        0xf00145b6,
        0x43801e39,
        0x0040b701,
        0x0644b606,
        0xf80043d0,
+/* 0x0189: cmd_exec_set_format */
        0xf030f400,
        0xb00001b0,
        0x01b00101,
@@ -256,20 +318,26 @@ uint32_t nva3_pcopy_code[] = {
        0x70b63847,
        0x0232f401,
        0x94bd84bd,
+/* 0x01b4: ncomp_loop */
        0xb60f4ac4,
        0xb4bd0445,
+/* 0x01bc: bpc_loop */
        0xf404a430,
        0xa5ff0f18,
        0x00cbbbc0,
        0xf40231f4,
+/* 0x01ce: cmp_c0 */
        0x1bf4220e,
        0x10c7f00c,
        0xf400cbbb,
+/* 0x01da: cmp_c1 */
        0xa430160e,
        0x0c18f406,
        0xbb14c7f0,
        0x0ef400cb,
+/* 0x01e9: cmp_zero */
        0x80c7f107,
+/* 0x01ed: bpc_next */
        0x01c83800,
        0xb60180b6,
        0xb5b801b0,
@@ -280,6 +348,7 @@ uint32_t nva3_pcopy_code[] = {
        0x98110680,
        0x68fd2008,
        0x0502f400,
+/* 0x0216: dst_xcnt */
        0x75fd64bd,
        0x1c078000,
        0xf10078fd,
@@ -304,6 +373,7 @@ uint32_t nva3_pcopy_code[] = {
        0x980056d0,
        0x56d01f06,
        0x1030f440,
+/* 0x0276: cmd_exec_set_surface_tiled */
        0x579800f8,
        0x6879c70a,
        0xb66478c7,
@@ -311,9 +381,11 @@ uint32_t nva3_pcopy_code[] = {
        0x0e76b060,
        0xf0091bf4,
        0x0ef40477,
+/* 0x0291: xtile64 */
        0x027cf00f,
        0xfd1170b6,
        0x77f00947,
+/* 0x029d: xtileok */
        0x0f5a9806,
        0xfd115b98,
        0xb7f000ab,
@@ -371,6 +443,7 @@ uint32_t nva3_pcopy_code[] = {
        0x67d00600,
        0x0060b700,
        0x0068d004,
+/* 0x0382: cmd_exec_set_surface_linear */
        0x6cf000f8,
        0x0260b702,
        0x0864b602,
@@ -381,13 +454,16 @@ uint32_t nva3_pcopy_code[] = {
        0xb70067d0,
        0x98040060,
        0x67d00957,
+/* 0x03ab: cmd_exec_wait */
        0xf900f800,
        0xf110f900,
        0xb6080007,
+/* 0x03b6: loop */
        0x01cf0604,
        0x0114f000,
        0xfcfa1bf4,
        0xf800fc10,
+/* 0x03c5: cmd_exec_query */
        0x0d34c800,
        0xf5701bf4,
        0xf103ab21,
@@ -417,6 +493,7 @@ uint32_t nva3_pcopy_code[] = {
        0x47f10153,
        0x44b60800,
        0x0045d006,
+/* 0x0438: query_counter */
        0x03ab21f5,
        0x080c47f1,
        0x980644b6,
@@ -439,11 +516,13 @@ uint32_t nva3_pcopy_code[] = {
        0x47f10153,
        0x44b60800,
        0x0045d006,
+/* 0x0492: cmd_exec */
        0x21f500f8,
        0x3fc803ab,
        0x0e0bf400,
        0x018921f5,
        0x020047f1,
+/* 0x04a7: cmd_exec_no_format */
        0xf11e0ef4,
        0xb6081067,
        0x77f00664,
@@ -451,19 +530,24 @@ uint32_t nva3_pcopy_code[] = {
        0x981c0780,
        0x67d02007,
        0x4067d000,
+/* 0x04c2: cmd_exec_init_src_surface */
        0x32f444bd,
        0xc854bd02,
        0x0bf4043f,
        0x8221f50a,
        0x0a0ef403,
+/* 0x04d4: src_tiled */
        0x027621f5,
+/* 0x04db: cmd_exec_init_dst_surface */
        0xf40749f0,
        0x57f00231,
        0x083fc82c,
        0xf50a0bf4,
        0xf4038221,
+/* 0x04ee: dst_tiled */
        0x21f50a0e,
        0x49f00276,
+/* 0x04f5: cmd_exec_kick */
        0x0057f108,
        0x0654b608,
        0xd0210698,
@@ -473,6 +557,8 @@ uint32_t nva3_pcopy_code[] = {
        0xc80054d0,
        0x0bf40c3f,
        0xc521f507,
+/* 0x0519: cmd_exec_done */
+/* 0x051b: cmd_wrcache_flush */
        0xf100f803,
        0xbd220027,
        0x0133f034,
index a8d1745..cd879f3 100644 (file)
@@ -1,34 +1,65 @@
-uint32_t nvc0_pcopy_data[] = {
+u32 nvc0_pcopy_data[] = {
+/* 0x0000: ctx_object */
        0x00000000,
+/* 0x0004: ctx_query_address_high */
        0x00000000,
+/* 0x0008: ctx_query_address_low */
        0x00000000,
+/* 0x000c: ctx_query_counter */
        0x00000000,
+/* 0x0010: ctx_src_address_high */
        0x00000000,
+/* 0x0014: ctx_src_address_low */
        0x00000000,
+/* 0x0018: ctx_src_pitch */
        0x00000000,
+/* 0x001c: ctx_src_tile_mode */
        0x00000000,
+/* 0x0020: ctx_src_xsize */
        0x00000000,
+/* 0x0024: ctx_src_ysize */
        0x00000000,
+/* 0x0028: ctx_src_zsize */
        0x00000000,
+/* 0x002c: ctx_src_zoff */
        0x00000000,
+/* 0x0030: ctx_src_xoff */
        0x00000000,
+/* 0x0034: ctx_src_yoff */
        0x00000000,
+/* 0x0038: ctx_src_cpp */
        0x00000000,
+/* 0x003c: ctx_dst_address_high */
        0x00000000,
+/* 0x0040: ctx_dst_address_low */
        0x00000000,
+/* 0x0044: ctx_dst_pitch */
        0x00000000,
+/* 0x0048: ctx_dst_tile_mode */
        0x00000000,
+/* 0x004c: ctx_dst_xsize */
        0x00000000,
+/* 0x0050: ctx_dst_ysize */
        0x00000000,
+/* 0x0054: ctx_dst_zsize */
        0x00000000,
+/* 0x0058: ctx_dst_zoff */
        0x00000000,
+/* 0x005c: ctx_dst_xoff */
        0x00000000,
+/* 0x0060: ctx_dst_yoff */
        0x00000000,
+/* 0x0064: ctx_dst_cpp */
        0x00000000,
+/* 0x0068: ctx_format */
        0x00000000,
+/* 0x006c: ctx_swz_const0 */
        0x00000000,
+/* 0x0070: ctx_swz_const1 */
        0x00000000,
+/* 0x0074: ctx_xcnt */
        0x00000000,
+/* 0x0078: ctx_ycnt */
        0x00000000,
        0x00000000,
        0x00000000,
@@ -63,6 +94,7 @@ uint32_t nvc0_pcopy_data[] = {
        0x00000000,
        0x00000000,
        0x00000000,
+/* 0x0100: dispatch_table */
        0x00010000,
        0x00000000,
        0x00000000,
@@ -111,11 +143,11 @@ uint32_t nvc0_pcopy_data[] = {
        0x00000010,
        0xffffff00,
        0x00000014,
-       0x0000000f,
+       0x00000000,
        0x0000003c,
        0xffffff00,
        0x00000040,
-       0x0000000f,
+       0x00000000,
        0x00000018,
        0xfff80000,
        0x00000044,
@@ -139,7 +171,8 @@ uint32_t nvc0_pcopy_data[] = {
        0x00000800,
 };
 
-uint32_t nvc0_pcopy_code[] = {
+u32 nvc0_pcopy_code[] = {
+/* 0x0000: main */
        0x04fe04bd,
        0x3517f000,
        0xf10010fe,
@@ -151,15 +184,20 @@ uint32_t nvc0_pcopy_code[] = {
        0x17f11031,
        0x27f01200,
        0x0012d003,
+/* 0x002f: spin */
        0xf40031f4,
        0x0ef40028,
+/* 0x0035: ih */
        0x8001cffd,
        0xf40812c4,
        0x21f4060b,
+/* 0x0041: ih_no_chsw */
        0x0412c4ca,
        0xf5070bf4,
+/* 0x004b: ih_no_cmd */
        0xc4010221,
        0x01d00c11,
+/* 0x0053: swctx */
        0xf101f840,
        0xfe770047,
        0x47f1004b,
@@ -188,8 +226,11 @@ uint32_t nvc0_pcopy_code[] = {
        0xf00204b9,
        0x01f40643,
        0x0604fa09,
+/* 0x00c3: swctx_load */
        0xfa060ef4,
+/* 0x00c6: swctx_done */
        0x03f80504,
+/* 0x00ca: chsw */
        0x27f100f8,
        0x23cf1400,
        0x1e3fc800,
@@ -198,18 +239,22 @@ uint32_t nvc0_pcopy_code[] = {
        0x1e3af053,
        0xf00023d0,
        0x24d00147,
+/* 0x00eb: chsw_no_unload */
        0xcf00f880,
        0x3dc84023,
        0x090bf41e,
        0xf40131f4,
+/* 0x00fa: chsw_finish_load */
        0x37f05321,
        0x8023d002,
+/* 0x0102: dispatch */
        0x37f100f8,
        0x32cf1900,
        0x0033cf40,
        0x07ff24e4,
        0xf11024b6,
        0xbd010057,
+/* 0x011b: dispatch_loop */
        0x5874bd64,
        0x57580056,
        0x0450b601,
@@ -219,6 +264,7 @@ uint32_t nvc0_pcopy_code[] = {
        0xbb0f08f4,
        0x74b60276,
        0x0057bb03,
+/* 0x013f: dispatch_valid_mthd */
        0xbbdf0ef4,
        0x44b60246,
        0x0045bb03,
@@ -229,24 +275,33 @@ uint32_t nvc0_pcopy_code[] = {
        0x64b00146,
        0x091bf400,
        0xf4005380,
+/* 0x0166: dispatch_cmd */
        0x32f4300e,
        0xf455f901,
        0x0ef40c01,
+/* 0x0171: dispatch_invalid_bitfield */
        0x0225f025,
+/* 0x0174: dispatch_illegal_mthd */
+/* 0x0177: dispatch_error */
        0xf10125f0,
        0xd0100047,
        0x43d00042,
        0x4027f040,
+/* 0x0187: hostirq_wait */
        0xcf0002d0,
        0x24f08002,
        0x0024b040,
+/* 0x0193: dispatch_done */
        0xf1f71bf4,
        0xf01d0027,
        0x23d00137,
+/* 0x019f: cmd_nop */
        0xf800f800,
+/* 0x01a1: cmd_pm_trigger */
        0x0027f100,
        0xf034bd22,
        0x23d00233,
+/* 0x01af: cmd_exec_set_format */
        0xf400f800,
        0x01b0f030,
        0x0101b000,
@@ -258,20 +313,26 @@ uint32_t nvc0_pcopy_code[] = {
        0x3847c701,
        0xf40170b6,
        0x84bd0232,
+/* 0x01da: ncomp_loop */
        0x4ac494bd,
        0x0445b60f,
+/* 0x01e2: bpc_loop */
        0xa430b4bd,
        0x0f18f404,
        0xbbc0a5ff,
        0x31f400cb,
        0x220ef402,
+/* 0x01f4: cmp_c0 */
        0xf00c1bf4,
        0xcbbb10c7,
        0x160ef400,
+/* 0x0200: cmp_c1 */
        0xf406a430,
        0xc7f00c18,
        0x00cbbb14,
+/* 0x020f: cmp_zero */
        0xf1070ef4,
+/* 0x0213: bpc_next */
        0x380080c7,
        0x80b601c8,
        0x01b0b601,
@@ -283,6 +344,7 @@ uint32_t nvc0_pcopy_code[] = {
        0x1d08980e,
        0xf40068fd,
        0x64bd0502,
+/* 0x023c: dst_xcnt */
        0x800075fd,
        0x78fd1907,
        0x1057f100,
@@ -307,15 +369,18 @@ uint32_t nvc0_pcopy_code[] = {
        0x1c069800,
        0xf44056d0,
        0x00f81030,
+/* 0x029c: cmd_exec_set_surface_tiled */
        0xc7075798,
        0x78c76879,
        0x0380b664,
        0xb06077c7,
        0x1bf40e76,
        0x0477f009,
+/* 0x02b7: xtile64 */
        0xf00f0ef4,
        0x70b6027c,
        0x0947fd11,
+/* 0x02c3: xtileok */
        0x980677f0,
        0x5b980c5a,
        0x00abfd0e,
@@ -374,6 +439,7 @@ uint32_t nvc0_pcopy_code[] = {
        0xb70067d0,
        0xd0040060,
        0x00f80068,
+/* 0x03a8: cmd_exec_set_surface_linear */
        0xb7026cf0,
        0xb6020260,
        0x57980864,
@@ -384,12 +450,15 @@ uint32_t nvc0_pcopy_code[] = {
        0x0060b700,
        0x06579804,
        0xf80067d0,
+/* 0x03d1: cmd_exec_wait */
        0xf900f900,
        0x0007f110,
        0x0604b608,
+/* 0x03dc: loop */
        0xf00001cf,
        0x1bf40114,
        0xfc10fcfa,
+/* 0x03eb: cmd_exec_query */
        0xc800f800,
        0x1bf40d34,
        0xd121f570,
@@ -419,6 +488,7 @@ uint32_t nvc0_pcopy_code[] = {
        0x0153f026,
        0x080047f1,
        0xd00644b6,
+/* 0x045e: query_counter */
        0x21f50045,
        0x47f103d1,
        0x44b6080c,
@@ -442,11 +512,13 @@ uint32_t nvc0_pcopy_code[] = {
        0x080047f1,
        0xd00644b6,
        0x00f80045,
+/* 0x04b8: cmd_exec */
        0x03d121f5,
        0xf4003fc8,
        0x21f50e0b,
        0x47f101af,
        0x0ef40200,
+/* 0x04cd: cmd_exec_no_format */
        0x1067f11e,
        0x0664b608,
        0x800177f0,
@@ -454,18 +526,23 @@ uint32_t nvc0_pcopy_code[] = {
        0x1d079819,
        0xd00067d0,
        0x44bd4067,
+/* 0x04e8: cmd_exec_init_src_surface */
        0xbd0232f4,
        0x043fc854,
        0xf50a0bf4,
        0xf403a821,
+/* 0x04fa: src_tiled */
        0x21f50a0e,
        0x49f0029c,
+/* 0x0501: cmd_exec_init_dst_surface */
        0x0231f407,
        0xc82c57f0,
        0x0bf4083f,
        0xa821f50a,
        0x0a0ef403,
+/* 0x0514: dst_tiled */
        0x029c21f5,
+/* 0x051b: cmd_exec_kick */
        0xf10849f0,
        0xb6080057,
        0x06980654,
@@ -475,7 +552,9 @@ uint32_t nvc0_pcopy_code[] = {
        0x54d00546,
        0x0c3fc800,
        0xf5070bf4,
+/* 0x053f: cmd_exec_done */
        0xf803eb21,
+/* 0x0541: cmd_wrcache_flush */
        0x0027f100,
        0xf034bd22,
        0x23d00133,