Merge branch 'x86-kbuild-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34 #include "radeon_share.h"
35
36 /* r300,r350,rv350,rv370,rv380 depends on : */
37 void r100_hdp_reset(struct radeon_device *rdev);
38 int r100_cp_reset(struct radeon_device *rdev);
39 int r100_rb2d_reset(struct radeon_device *rdev);
40 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
41 int r100_pci_gart_enable(struct radeon_device *rdev);
42 void r100_pci_gart_disable(struct radeon_device *rdev);
43 void r100_mc_setup(struct radeon_device *rdev);
44 void r100_mc_disable_clients(struct radeon_device *rdev);
45 int r100_gui_wait_for_idle(struct radeon_device *rdev);
46 int r100_cs_packet_parse(struct radeon_cs_parser *p,
47                          struct radeon_cs_packet *pkt,
48                          unsigned idx);
49 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
50 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
51                               struct radeon_cs_reloc **cs_reloc);
52 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
53                           struct radeon_cs_packet *pkt,
54                           const unsigned *auth, unsigned n,
55                           radeon_packet0_check_t check);
56 void r100_cs_dump_packet(struct radeon_cs_parser *p,
57                          struct radeon_cs_packet *pkt);
58 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
59                                          struct radeon_cs_packet *pkt,
60                                          struct radeon_object *robj);
61
62 /* This files gather functions specifics to:
63  * r300,r350,rv350,rv370,rv380
64  *
65  * Some of these functions might be used by newer ASICs.
66  */
67 void r300_gpu_init(struct radeon_device *rdev);
68 int r300_mc_wait_for_idle(struct radeon_device *rdev);
69 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
70
71
72 /*
73  * rv370,rv380 PCIE GART
74  */
75 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
76 {
77         uint32_t tmp;
78         int i;
79
80         /* Workaround HW bug do flush 2 times */
81         for (i = 0; i < 2; i++) {
82                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
84                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
85                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
86         }
87         mb();
88 }
89
90 int rv370_pcie_gart_enable(struct radeon_device *rdev)
91 {
92         uint32_t table_addr;
93         uint32_t tmp;
94         int r;
95
96         /* Initialize common gart structure */
97         r = radeon_gart_init(rdev);
98         if (r) {
99                 return r;
100         }
101         r = rv370_debugfs_pcie_gart_info_init(rdev);
102         if (r) {
103                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
104         }
105         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
106         r = radeon_gart_table_vram_alloc(rdev);
107         if (r) {
108                 return r;
109         }
110         /* discard memory request outside of configured range */
111         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
112         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
113         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
114         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
115         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
116         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
117         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
118         table_addr = rdev->gart.table_addr;
119         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
120         /* FIXME: setup default page */
121         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
122         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
123         /* Clear error */
124         WREG32_PCIE(0x18, 0);
125         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
126         tmp |= RADEON_PCIE_TX_GART_EN;
127         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
128         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
129         rv370_pcie_gart_tlb_flush(rdev);
130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
131                  rdev->mc.gtt_size >> 20, table_addr);
132         rdev->gart.ready = true;
133         return 0;
134 }
135
136 void rv370_pcie_gart_disable(struct radeon_device *rdev)
137 {
138         uint32_t tmp;
139
140         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
141         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
142         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
143         if (rdev->gart.table.vram.robj) {
144                 radeon_object_kunmap(rdev->gart.table.vram.robj);
145                 radeon_object_unpin(rdev->gart.table.vram.robj);
146         }
147 }
148
149 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
150 {
151         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
152
153         if (i < 0 || i > rdev->gart.num_gpu_pages) {
154                 return -EINVAL;
155         }
156         addr = (lower_32_bits(addr) >> 8) |
157                ((upper_32_bits(addr) & 0xff) << 24) |
158                0xc;
159         /* on x86 we want this to be CPU endian, on powerpc
160          * on powerpc without HW swappers, it'll get swapped on way
161          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
162         writel(addr, ((void __iomem *)ptr) + (i * 4));
163         return 0;
164 }
165
166 int r300_gart_enable(struct radeon_device *rdev)
167 {
168 #if __OS_HAS_AGP
169         if (rdev->flags & RADEON_IS_AGP) {
170                 if (rdev->family > CHIP_RV350) {
171                         rv370_pcie_gart_disable(rdev);
172                 } else {
173                         r100_pci_gart_disable(rdev);
174                 }
175                 return 0;
176         }
177 #endif
178         if (rdev->flags & RADEON_IS_PCIE) {
179                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
180                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
181                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
182                 return rv370_pcie_gart_enable(rdev);
183         }
184         return r100_pci_gart_enable(rdev);
185 }
186
187
188 /*
189  * MC
190  */
191 int r300_mc_init(struct radeon_device *rdev)
192 {
193         int r;
194
195         if (r100_debugfs_rbbm_init(rdev)) {
196                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
197         }
198
199         r300_gpu_init(rdev);
200         r100_pci_gart_disable(rdev);
201         if (rdev->flags & RADEON_IS_PCIE) {
202                 rv370_pcie_gart_disable(rdev);
203         }
204
205         /* Setup GPU memory space */
206         rdev->mc.vram_location = 0xFFFFFFFFUL;
207         rdev->mc.gtt_location = 0xFFFFFFFFUL;
208         if (rdev->flags & RADEON_IS_AGP) {
209                 r = radeon_agp_init(rdev);
210                 if (r) {
211                         printk(KERN_WARNING "[drm] Disabling AGP\n");
212                         rdev->flags &= ~RADEON_IS_AGP;
213                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
214                 } else {
215                         rdev->mc.gtt_location = rdev->mc.agp_base;
216                 }
217         }
218         r = radeon_mc_setup(rdev);
219         if (r) {
220                 return r;
221         }
222
223         /* Program GPU memory space */
224         r100_mc_disable_clients(rdev);
225         if (r300_mc_wait_for_idle(rdev)) {
226                 printk(KERN_WARNING "Failed to wait MC idle while "
227                        "programming pipes. Bad things might happen.\n");
228         }
229         r100_mc_setup(rdev);
230         return 0;
231 }
232
233 void r300_mc_fini(struct radeon_device *rdev)
234 {
235         if (rdev->flags & RADEON_IS_PCIE) {
236                 rv370_pcie_gart_disable(rdev);
237                 radeon_gart_table_vram_free(rdev);
238         } else {
239                 r100_pci_gart_disable(rdev);
240                 radeon_gart_table_ram_free(rdev);
241         }
242         radeon_gart_fini(rdev);
243 }
244
245
246 /*
247  * Fence emission
248  */
249 void r300_fence_ring_emit(struct radeon_device *rdev,
250                           struct radeon_fence *fence)
251 {
252         /* Who ever call radeon_fence_emit should call ring_lock and ask
253          * for enough space (today caller are ib schedule and buffer move) */
254         /* Write SC register so SC & US assert idle */
255         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
256         radeon_ring_write(rdev, 0);
257         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
258         radeon_ring_write(rdev, 0);
259         /* Flush 3D cache */
260         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
261         radeon_ring_write(rdev, (2 << 0));
262         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
263         radeon_ring_write(rdev, (1 << 0));
264         /* Wait until IDLE & CLEAN */
265         radeon_ring_write(rdev, PACKET0(0x1720, 0));
266         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
267         /* Emit fence sequence & fire IRQ */
268         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
269         radeon_ring_write(rdev, fence->seq);
270         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
271         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
272 }
273
274
275 /*
276  * Global GPU functions
277  */
278 int r300_copy_dma(struct radeon_device *rdev,
279                   uint64_t src_offset,
280                   uint64_t dst_offset,
281                   unsigned num_pages,
282                   struct radeon_fence *fence)
283 {
284         uint32_t size;
285         uint32_t cur_size;
286         int i, num_loops;
287         int r = 0;
288
289         /* radeon pitch is /64 */
290         size = num_pages << PAGE_SHIFT;
291         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
292         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
293         if (r) {
294                 DRM_ERROR("radeon: moving bo (%d).\n", r);
295                 return r;
296         }
297         /* Must wait for 2D idle & clean before DMA or hangs might happen */
298         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
299         radeon_ring_write(rdev, (1 << 16));
300         for (i = 0; i < num_loops; i++) {
301                 cur_size = size;
302                 if (cur_size > 0x1FFFFF) {
303                         cur_size = 0x1FFFFF;
304                 }
305                 size -= cur_size;
306                 radeon_ring_write(rdev, PACKET0(0x720, 2));
307                 radeon_ring_write(rdev, src_offset);
308                 radeon_ring_write(rdev, dst_offset);
309                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
310                 src_offset += cur_size;
311                 dst_offset += cur_size;
312         }
313         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
314         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
315         if (fence) {
316                 r = radeon_fence_emit(rdev, fence);
317         }
318         radeon_ring_unlock_commit(rdev);
319         return r;
320 }
321
322 void r300_ring_start(struct radeon_device *rdev)
323 {
324         unsigned gb_tile_config;
325         int r;
326
327         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
328         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
329         switch(rdev->num_gb_pipes) {
330         case 2:
331                 gb_tile_config |= R300_PIPE_COUNT_R300;
332                 break;
333         case 3:
334                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
335                 break;
336         case 4:
337                 gb_tile_config |= R300_PIPE_COUNT_R420;
338                 break;
339         case 1:
340         default:
341                 gb_tile_config |= R300_PIPE_COUNT_RV350;
342                 break;
343         }
344
345         r = radeon_ring_lock(rdev, 64);
346         if (r) {
347                 return;
348         }
349         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
350         radeon_ring_write(rdev,
351                           RADEON_ISYNC_ANY2D_IDLE3D |
352                           RADEON_ISYNC_ANY3D_IDLE2D |
353                           RADEON_ISYNC_WAIT_IDLEGUI |
354                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
355         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
356         radeon_ring_write(rdev, gb_tile_config);
357         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
358         radeon_ring_write(rdev,
359                           RADEON_WAIT_2D_IDLECLEAN |
360                           RADEON_WAIT_3D_IDLECLEAN);
361         radeon_ring_write(rdev, PACKET0(0x170C, 0));
362         radeon_ring_write(rdev, 1 << 31);
363         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
364         radeon_ring_write(rdev, 0);
365         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
366         radeon_ring_write(rdev, 0);
367         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
368         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
369         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
370         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
371         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
372         radeon_ring_write(rdev,
373                           RADEON_WAIT_2D_IDLECLEAN |
374                           RADEON_WAIT_3D_IDLECLEAN);
375         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
376         radeon_ring_write(rdev, 0);
377         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
378         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
379         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
380         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
381         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
382         radeon_ring_write(rdev,
383                           ((6 << R300_MS_X0_SHIFT) |
384                            (6 << R300_MS_Y0_SHIFT) |
385                            (6 << R300_MS_X1_SHIFT) |
386                            (6 << R300_MS_Y1_SHIFT) |
387                            (6 << R300_MS_X2_SHIFT) |
388                            (6 << R300_MS_Y2_SHIFT) |
389                            (6 << R300_MSBD0_Y_SHIFT) |
390                            (6 << R300_MSBD0_X_SHIFT)));
391         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
392         radeon_ring_write(rdev,
393                           ((6 << R300_MS_X3_SHIFT) |
394                            (6 << R300_MS_Y3_SHIFT) |
395                            (6 << R300_MS_X4_SHIFT) |
396                            (6 << R300_MS_Y4_SHIFT) |
397                            (6 << R300_MS_X5_SHIFT) |
398                            (6 << R300_MS_Y5_SHIFT) |
399                            (6 << R300_MSBD1_SHIFT)));
400         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
401         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
402         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
403         radeon_ring_write(rdev,
404                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
405         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
406         radeon_ring_write(rdev,
407                           R300_GEOMETRY_ROUND_NEAREST |
408                           R300_COLOR_ROUND_NEAREST);
409         radeon_ring_unlock_commit(rdev);
410 }
411
412 void r300_errata(struct radeon_device *rdev)
413 {
414         rdev->pll_errata = 0;
415
416         if (rdev->family == CHIP_R300 &&
417             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
418                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
419         }
420 }
421
422 int r300_mc_wait_for_idle(struct radeon_device *rdev)
423 {
424         unsigned i;
425         uint32_t tmp;
426
427         for (i = 0; i < rdev->usec_timeout; i++) {
428                 /* read MC_STATUS */
429                 tmp = RREG32(0x0150);
430                 if (tmp & (1 << 4)) {
431                         return 0;
432                 }
433                 DRM_UDELAY(1);
434         }
435         return -1;
436 }
437
438 void r300_gpu_init(struct radeon_device *rdev)
439 {
440         uint32_t gb_tile_config, tmp;
441
442         r100_hdp_reset(rdev);
443         /* FIXME: rv380 one pipes ? */
444         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
445                 /* r300,r350 */
446                 rdev->num_gb_pipes = 2;
447         } else {
448                 /* rv350,rv370,rv380 */
449                 rdev->num_gb_pipes = 1;
450         }
451         rdev->num_z_pipes = 1;
452         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
453         switch (rdev->num_gb_pipes) {
454         case 2:
455                 gb_tile_config |= R300_PIPE_COUNT_R300;
456                 break;
457         case 3:
458                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
459                 break;
460         case 4:
461                 gb_tile_config |= R300_PIPE_COUNT_R420;
462                 break;
463         default:
464         case 1:
465                 gb_tile_config |= R300_PIPE_COUNT_RV350;
466                 break;
467         }
468         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
469
470         if (r100_gui_wait_for_idle(rdev)) {
471                 printk(KERN_WARNING "Failed to wait GUI idle while "
472                        "programming pipes. Bad things might happen.\n");
473         }
474
475         tmp = RREG32(0x170C);
476         WREG32(0x170C, tmp | (1 << 31));
477
478         WREG32(R300_RB2D_DSTCACHE_MODE,
479                R300_DC_AUTOFLUSH_ENABLE |
480                R300_DC_DC_DISABLE_IGNORE_PE);
481
482         if (r100_gui_wait_for_idle(rdev)) {
483                 printk(KERN_WARNING "Failed to wait GUI idle while "
484                        "programming pipes. Bad things might happen.\n");
485         }
486         if (r300_mc_wait_for_idle(rdev)) {
487                 printk(KERN_WARNING "Failed to wait MC idle while "
488                        "programming pipes. Bad things might happen.\n");
489         }
490         DRM_INFO("radeon: %d quad pipes, %d Z pipes initialized.\n",
491                  rdev->num_gb_pipes, rdev->num_z_pipes);
492 }
493
494 int r300_ga_reset(struct radeon_device *rdev)
495 {
496         uint32_t tmp;
497         bool reinit_cp;
498         int i;
499
500         reinit_cp = rdev->cp.ready;
501         rdev->cp.ready = false;
502         for (i = 0; i < rdev->usec_timeout; i++) {
503                 WREG32(RADEON_CP_CSQ_MODE, 0);
504                 WREG32(RADEON_CP_CSQ_CNTL, 0);
505                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
506                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
507                 udelay(200);
508                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
509                 /* Wait to prevent race in RBBM_STATUS */
510                 mdelay(1);
511                 tmp = RREG32(RADEON_RBBM_STATUS);
512                 if (tmp & ((1 << 20) | (1 << 26))) {
513                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
514                         /* GA still busy soft reset it */
515                         WREG32(0x429C, 0x200);
516                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
517                         WREG32(0x43E0, 0);
518                         WREG32(0x43E4, 0);
519                         WREG32(0x24AC, 0);
520                 }
521                 /* Wait to prevent race in RBBM_STATUS */
522                 mdelay(1);
523                 tmp = RREG32(RADEON_RBBM_STATUS);
524                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
525                         break;
526                 }
527         }
528         for (i = 0; i < rdev->usec_timeout; i++) {
529                 tmp = RREG32(RADEON_RBBM_STATUS);
530                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
531                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
532                                  tmp);
533                         if (reinit_cp) {
534                                 return r100_cp_init(rdev, rdev->cp.ring_size);
535                         }
536                         return 0;
537                 }
538                 DRM_UDELAY(1);
539         }
540         tmp = RREG32(RADEON_RBBM_STATUS);
541         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
542         return -1;
543 }
544
545 int r300_gpu_reset(struct radeon_device *rdev)
546 {
547         uint32_t status;
548
549         /* reset order likely matter */
550         status = RREG32(RADEON_RBBM_STATUS);
551         /* reset HDP */
552         r100_hdp_reset(rdev);
553         /* reset rb2d */
554         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
555                 r100_rb2d_reset(rdev);
556         }
557         /* reset GA */
558         if (status & ((1 << 20) | (1 << 26))) {
559                 r300_ga_reset(rdev);
560         }
561         /* reset CP */
562         status = RREG32(RADEON_RBBM_STATUS);
563         if (status & (1 << 16)) {
564                 r100_cp_reset(rdev);
565         }
566         /* Check if GPU is idle */
567         status = RREG32(RADEON_RBBM_STATUS);
568         if (status & (1 << 31)) {
569                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
570                 return -1;
571         }
572         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
573         return 0;
574 }
575
576
577 /*
578  * r300,r350,rv350,rv380 VRAM info
579  */
580 void r300_vram_info(struct radeon_device *rdev)
581 {
582         uint32_t tmp;
583
584         /* DDR for all card after R300 & IGP */
585         rdev->mc.vram_is_ddr = true;
586         tmp = RREG32(RADEON_MEM_CNTL);
587         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
588                 rdev->mc.vram_width = 128;
589         } else {
590                 rdev->mc.vram_width = 64;
591         }
592
593         r100_vram_init_sizes(rdev);
594 }
595
596
597 /*
598  * PCIE Lanes
599  */
600
601 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
602 {
603         uint32_t link_width_cntl, mask;
604
605         if (rdev->flags & RADEON_IS_IGP)
606                 return;
607
608         if (!(rdev->flags & RADEON_IS_PCIE))
609                 return;
610
611         /* FIXME wait for idle */
612
613         switch (lanes) {
614         case 0:
615                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
616                 break;
617         case 1:
618                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
619                 break;
620         case 2:
621                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
622                 break;
623         case 4:
624                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
625                 break;
626         case 8:
627                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
628                 break;
629         case 12:
630                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
631                 break;
632         case 16:
633         default:
634                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
635                 break;
636         }
637
638         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
639
640         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
641             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
642                 return;
643
644         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
645                              RADEON_PCIE_LC_RECONFIG_NOW |
646                              RADEON_PCIE_LC_RECONFIG_LATER |
647                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
648         link_width_cntl |= mask;
649         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
650         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
651                                                      RADEON_PCIE_LC_RECONFIG_NOW));
652
653         /* wait for lane set to complete */
654         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
655         while (link_width_cntl == 0xffffffff)
656                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
657
658 }
659
660
661 /*
662  * Debugfs info
663  */
664 #if defined(CONFIG_DEBUG_FS)
665 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
666 {
667         struct drm_info_node *node = (struct drm_info_node *) m->private;
668         struct drm_device *dev = node->minor->dev;
669         struct radeon_device *rdev = dev->dev_private;
670         uint32_t tmp;
671
672         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
673         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
674         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
675         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
676         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
677         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
678         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
679         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
680         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
681         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
682         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
683         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
684         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
685         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
686         return 0;
687 }
688
689 static struct drm_info_list rv370_pcie_gart_info_list[] = {
690         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
691 };
692 #endif
693
694 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
695 {
696 #if defined(CONFIG_DEBUG_FS)
697         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
698 #else
699         return 0;
700 #endif
701 }
702
703
704 /*
705  * CS functions
706  */
707 struct r300_cs_track_cb {
708         struct radeon_object    *robj;
709         unsigned                pitch;
710         unsigned                cpp;
711         unsigned                offset;
712 };
713
714 struct r300_cs_track_array {
715         struct radeon_object    *robj;
716         unsigned                esize;
717 };
718
719 struct r300_cs_track_texture {
720         struct radeon_object    *robj;
721         unsigned                pitch;
722         unsigned                width;
723         unsigned                height;
724         unsigned                num_levels;
725         unsigned                cpp;
726         unsigned                tex_coord_type;
727         unsigned                txdepth;
728         unsigned                width_11;
729         unsigned                height_11;
730         bool                    use_pitch;
731         bool                    enabled;
732         bool                    roundup_w;
733         bool                    roundup_h;
734 };
735
736 struct r300_cs_track {
737         unsigned                        num_cb;
738         unsigned                        maxy;
739         unsigned                        vtx_size;
740         unsigned                        vap_vf_cntl;
741         unsigned                        immd_dwords;
742         unsigned                        num_arrays;
743         unsigned                        max_indx;
744         struct r300_cs_track_array      arrays[11];
745         struct r300_cs_track_cb         cb[4];
746         struct r300_cs_track_cb         zb;
747         struct r300_cs_track_texture    textures[16];
748         bool                            z_enabled;
749 };
750
751 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
752 {
753         DRM_ERROR("pitch                      %d\n", t->pitch);
754         DRM_ERROR("width                      %d\n", t->width);
755         DRM_ERROR("height                     %d\n", t->height);
756         DRM_ERROR("num levels                 %d\n", t->num_levels);
757         DRM_ERROR("depth                      %d\n", t->txdepth);
758         DRM_ERROR("bpp                        %d\n", t->cpp);
759         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
760         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
761         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
762 }
763
764 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
765                                               struct r300_cs_track *track)
766 {
767         struct radeon_object *robj;
768         unsigned long size;
769         unsigned u, i, w, h;
770
771         for (u = 0; u < 16; u++) {
772                 if (!track->textures[u].enabled)
773                         continue;
774                 robj = track->textures[u].robj;
775                 if (robj == NULL) {
776                         DRM_ERROR("No texture bound to unit %u\n", u);
777                         return -EINVAL;
778                 }
779                 size = 0;
780                 for (i = 0; i <= track->textures[u].num_levels; i++) {
781                         if (track->textures[u].use_pitch) {
782                                 w = track->textures[u].pitch / (1 << i);
783                         } else {
784                                 w = track->textures[u].width / (1 << i);
785                                 if (rdev->family >= CHIP_RV515)
786                                         w |= track->textures[u].width_11;
787                                 if (track->textures[u].roundup_w)
788                                         w = roundup_pow_of_two(w);
789                         }
790                         h = track->textures[u].height / (1 << i);
791                         if (rdev->family >= CHIP_RV515)
792                                 h |= track->textures[u].height_11;
793                         if (track->textures[u].roundup_h)
794                                 h = roundup_pow_of_two(h);
795                         size += w * h;
796                 }
797                 size *= track->textures[u].cpp;
798                 switch (track->textures[u].tex_coord_type) {
799                 case 0:
800                         break;
801                 case 1:
802                         size *= (1 << track->textures[u].txdepth);
803                         break;
804                 case 2:
805                         size *= 6;
806                         break;
807                 default:
808                         DRM_ERROR("Invalid texture coordinate type %u for unit "
809                                   "%u\n", track->textures[u].tex_coord_type, u);
810                         return -EINVAL;
811                 }
812                 if (size > radeon_object_size(robj)) {
813                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
814                                   "%lu\n", u, size, radeon_object_size(robj));
815                         r300_cs_track_texture_print(&track->textures[u]);
816                         return -EINVAL;
817                 }
818         }
819         return 0;
820 }
821
822 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
823 {
824         unsigned i;
825         unsigned long size;
826         unsigned prim_walk;
827         unsigned nverts;
828
829         for (i = 0; i < track->num_cb; i++) {
830                 if (track->cb[i].robj == NULL) {
831                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
832                         return -EINVAL;
833                 }
834                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
835                 size += track->cb[i].offset;
836                 if (size > radeon_object_size(track->cb[i].robj)) {
837                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
838                                   "(need %lu have %lu) !\n", i, size,
839                                   radeon_object_size(track->cb[i].robj));
840                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
841                                   i, track->cb[i].pitch, track->cb[i].cpp,
842                                   track->cb[i].offset, track->maxy);
843                         return -EINVAL;
844                 }
845         }
846         if (track->z_enabled) {
847                 if (track->zb.robj == NULL) {
848                         DRM_ERROR("[drm] No buffer for z buffer !\n");
849                         return -EINVAL;
850                 }
851                 size = track->zb.pitch * track->zb.cpp * track->maxy;
852                 size += track->zb.offset;
853                 if (size > radeon_object_size(track->zb.robj)) {
854                         DRM_ERROR("[drm] Buffer too small for z buffer "
855                                   "(need %lu have %lu) !\n", size,
856                                   radeon_object_size(track->zb.robj));
857                         return -EINVAL;
858                 }
859         }
860         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
861         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
862         switch (prim_walk) {
863         case 1:
864                 for (i = 0; i < track->num_arrays; i++) {
865                         size = track->arrays[i].esize * track->max_indx * 4;
866                         if (track->arrays[i].robj == NULL) {
867                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
868                                           "bound\n", prim_walk, i);
869                                 return -EINVAL;
870                         }
871                         if (size > radeon_object_size(track->arrays[i].robj)) {
872                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
873                                            "have %lu dwords\n", prim_walk, i,
874                                            size >> 2,
875                                            radeon_object_size(track->arrays[i].robj) >> 2);
876                                 DRM_ERROR("Max indices %u\n", track->max_indx);
877                                 return -EINVAL;
878                         }
879                 }
880                 break;
881         case 2:
882                 for (i = 0; i < track->num_arrays; i++) {
883                         size = track->arrays[i].esize * (nverts - 1) * 4;
884                         if (track->arrays[i].robj == NULL) {
885                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
886                                           "bound\n", prim_walk, i);
887                                 return -EINVAL;
888                         }
889                         if (size > radeon_object_size(track->arrays[i].robj)) {
890                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
891                                            "have %lu dwords\n", prim_walk, i, size >> 2,
892                                            radeon_object_size(track->arrays[i].robj) >> 2);
893                                 return -EINVAL;
894                         }
895                 }
896                 break;
897         case 3:
898                 size = track->vtx_size * nverts;
899                 if (size != track->immd_dwords) {
900                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
901                                   track->immd_dwords, size);
902                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
903                                   nverts, track->vtx_size);
904                         return -EINVAL;
905                 }
906                 break;
907         default:
908                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
909                           prim_walk);
910                 return -EINVAL;
911         }
912         return r300_cs_track_texture_check(rdev, track);
913 }
914
915 static inline void r300_cs_track_clear(struct r300_cs_track *track)
916 {
917         unsigned i;
918
919         track->num_cb = 4;
920         track->maxy = 4096;
921         for (i = 0; i < track->num_cb; i++) {
922                 track->cb[i].robj = NULL;
923                 track->cb[i].pitch = 8192;
924                 track->cb[i].cpp = 16;
925                 track->cb[i].offset = 0;
926         }
927         track->z_enabled = true;
928         track->zb.robj = NULL;
929         track->zb.pitch = 8192;
930         track->zb.cpp = 4;
931         track->zb.offset = 0;
932         track->vtx_size = 0x7F;
933         track->immd_dwords = 0xFFFFFFFFUL;
934         track->num_arrays = 11;
935         track->max_indx = 0x00FFFFFFUL;
936         for (i = 0; i < track->num_arrays; i++) {
937                 track->arrays[i].robj = NULL;
938                 track->arrays[i].esize = 0x7F;
939         }
940         for (i = 0; i < 16; i++) {
941                 track->textures[i].pitch = 16536;
942                 track->textures[i].width = 16536;
943                 track->textures[i].height = 16536;
944                 track->textures[i].width_11 = 1 << 11;
945                 track->textures[i].height_11 = 1 << 11;
946                 track->textures[i].num_levels = 12;
947                 track->textures[i].txdepth = 16;
948                 track->textures[i].cpp = 64;
949                 track->textures[i].tex_coord_type = 1;
950                 track->textures[i].robj = NULL;
951                 /* CS IB emission code makes sure texture unit are disabled */
952                 track->textures[i].enabled = false;
953                 track->textures[i].roundup_w = true;
954                 track->textures[i].roundup_h = true;
955         }
956 }
957
958 static const unsigned r300_reg_safe_bm[159] = {
959         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
960         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
961         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
962         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
963         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
964         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
965         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
966         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
967         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
968         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
969         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
970         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
971         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
972         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
974         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
975         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
976         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
977         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
989         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
992         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
993         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
994         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
995         0x00000000, 0x00000000, 0x00000000, 0x00000000,
996         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
997         0x00000000, 0x00000000, 0x00000000, 0x00000000,
998         0x0003FC01, 0xFFFFFCF8, 0xFF800B19,
999 };
1000
1001 static int r300_packet0_check(struct radeon_cs_parser *p,
1002                 struct radeon_cs_packet *pkt,
1003                 unsigned idx, unsigned reg)
1004 {
1005         struct radeon_cs_chunk *ib_chunk;
1006         struct radeon_cs_reloc *reloc;
1007         struct r300_cs_track *track;
1008         volatile uint32_t *ib;
1009         uint32_t tmp, tile_flags = 0;
1010         unsigned i;
1011         int r;
1012
1013         ib = p->ib->ptr;
1014         ib_chunk = &p->chunks[p->chunk_ib_idx];
1015         track = (struct r300_cs_track*)p->track;
1016         switch(reg) {
1017         case AVIVO_D1MODE_VLINE_START_END:
1018         case RADEON_CRTC_GUI_TRIG_VLINE:
1019                 r = r100_cs_packet_parse_vline(p);
1020                 if (r) {
1021                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1022                                         idx, reg);
1023                         r100_cs_dump_packet(p, pkt);
1024                         return r;
1025                 }
1026                 break;
1027         case RADEON_DST_PITCH_OFFSET:
1028         case RADEON_SRC_PITCH_OFFSET:
1029                 r = r100_cs_packet_next_reloc(p, &reloc);
1030                 if (r) {
1031                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1032                                         idx, reg);
1033                         r100_cs_dump_packet(p, pkt);
1034                         return r;
1035                 }
1036                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1037                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1038
1039                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1040                         tile_flags |= RADEON_DST_TILE_MACRO;
1041                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1042                         if (reg == RADEON_SRC_PITCH_OFFSET) {
1043                                 DRM_ERROR("Cannot src blit from microtiled surface\n");
1044                                 r100_cs_dump_packet(p, pkt);
1045                                 return -EINVAL;
1046                         }
1047                         tile_flags |= RADEON_DST_TILE_MICRO;
1048                 }
1049                 tmp |= tile_flags;
1050                 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1051                 break;
1052         case R300_RB3D_COLOROFFSET0:
1053         case R300_RB3D_COLOROFFSET1:
1054         case R300_RB3D_COLOROFFSET2:
1055         case R300_RB3D_COLOROFFSET3:
1056                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1057                 r = r100_cs_packet_next_reloc(p, &reloc);
1058                 if (r) {
1059                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1060                                         idx, reg);
1061                         r100_cs_dump_packet(p, pkt);
1062                         return r;
1063                 }
1064                 track->cb[i].robj = reloc->robj;
1065                 track->cb[i].offset = ib_chunk->kdata[idx];
1066                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1067                 break;
1068         case R300_ZB_DEPTHOFFSET:
1069                 r = r100_cs_packet_next_reloc(p, &reloc);
1070                 if (r) {
1071                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1072                                         idx, reg);
1073                         r100_cs_dump_packet(p, pkt);
1074                         return r;
1075                 }
1076                 track->zb.robj = reloc->robj;
1077                 track->zb.offset = ib_chunk->kdata[idx];
1078                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1079                 break;
1080         case R300_TX_OFFSET_0:
1081         case R300_TX_OFFSET_0+4:
1082         case R300_TX_OFFSET_0+8:
1083         case R300_TX_OFFSET_0+12:
1084         case R300_TX_OFFSET_0+16:
1085         case R300_TX_OFFSET_0+20:
1086         case R300_TX_OFFSET_0+24:
1087         case R300_TX_OFFSET_0+28:
1088         case R300_TX_OFFSET_0+32:
1089         case R300_TX_OFFSET_0+36:
1090         case R300_TX_OFFSET_0+40:
1091         case R300_TX_OFFSET_0+44:
1092         case R300_TX_OFFSET_0+48:
1093         case R300_TX_OFFSET_0+52:
1094         case R300_TX_OFFSET_0+56:
1095         case R300_TX_OFFSET_0+60:
1096                 i = (reg - R300_TX_OFFSET_0) >> 2;
1097                 r = r100_cs_packet_next_reloc(p, &reloc);
1098                 if (r) {
1099                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1100                                         idx, reg);
1101                         r100_cs_dump_packet(p, pkt);
1102                         return r;
1103                 }
1104                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1105                 track->textures[i].robj = reloc->robj;
1106                 break;
1107         /* Tracked registers */
1108         case 0x2084:
1109                 /* VAP_VF_CNTL */
1110                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1111                 break;
1112         case 0x20B4:
1113                 /* VAP_VTX_SIZE */
1114                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1115                 break;
1116         case 0x2134:
1117                 /* VAP_VF_MAX_VTX_INDX */
1118                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1119                 break;
1120         case 0x43E4:
1121                 /* SC_SCISSOR1 */
1122                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1123                 if (p->rdev->family < CHIP_RV515) {
1124                         track->maxy -= 1440;
1125                 }
1126                 break;
1127         case 0x4E00:
1128                 /* RB3D_CCTL */
1129                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1130                 break;
1131         case 0x4E38:
1132         case 0x4E3C:
1133         case 0x4E40:
1134         case 0x4E44:
1135                 /* RB3D_COLORPITCH0 */
1136                 /* RB3D_COLORPITCH1 */
1137                 /* RB3D_COLORPITCH2 */
1138                 /* RB3D_COLORPITCH3 */
1139                 r = r100_cs_packet_next_reloc(p, &reloc);
1140                 if (r) {
1141                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1142                                   idx, reg);
1143                         r100_cs_dump_packet(p, pkt);
1144                         return r;
1145                 }
1146
1147                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1148                         tile_flags |= R300_COLOR_TILE_ENABLE;
1149                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1150                         tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1151
1152                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1153                 tmp |= tile_flags;
1154                 ib[idx] = tmp;
1155
1156                 i = (reg - 0x4E38) >> 2;
1157                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1158                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1159                 case 9:
1160                 case 11:
1161                 case 12:
1162                         track->cb[i].cpp = 1;
1163                         break;
1164                 case 3:
1165                 case 4:
1166                 case 13:
1167                 case 15:
1168                         track->cb[i].cpp = 2;
1169                         break;
1170                 case 6:
1171                         track->cb[i].cpp = 4;
1172                         break;
1173                 case 10:
1174                         track->cb[i].cpp = 8;
1175                         break;
1176                 case 7:
1177                         track->cb[i].cpp = 16;
1178                         break;
1179                 default:
1180                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1181                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1182                         return -EINVAL;
1183                 }
1184                 break;
1185         case 0x4F00:
1186                 /* ZB_CNTL */
1187                 if (ib_chunk->kdata[idx] & 2) {
1188                         track->z_enabled = true;
1189                 } else {
1190                         track->z_enabled = false;
1191                 }
1192                 break;
1193         case 0x4F10:
1194                 /* ZB_FORMAT */
1195                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1196                 case 0:
1197                 case 1:
1198                         track->zb.cpp = 2;
1199                         break;
1200                 case 2:
1201                         track->zb.cpp = 4;
1202                         break;
1203                 default:
1204                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1205                                   (ib_chunk->kdata[idx] & 0xF));
1206                         return -EINVAL;
1207                 }
1208                 break;
1209         case 0x4F24:
1210                 /* ZB_DEPTHPITCH */
1211                 r = r100_cs_packet_next_reloc(p, &reloc);
1212                 if (r) {
1213                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1214                                   idx, reg);
1215                         r100_cs_dump_packet(p, pkt);
1216                         return r;
1217                 }
1218
1219                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1220                         tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1221                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1222                         tile_flags |= R300_DEPTHMICROTILE_TILED;;
1223
1224                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1225                 tmp |= tile_flags;
1226                 ib[idx] = tmp;
1227
1228                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1229                 break;
1230         case 0x4104:
1231                 for (i = 0; i < 16; i++) {
1232                         bool enabled;
1233
1234                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1235                         track->textures[i].enabled = enabled;
1236                 }
1237                 break;
1238         case 0x44C0:
1239         case 0x44C4:
1240         case 0x44C8:
1241         case 0x44CC:
1242         case 0x44D0:
1243         case 0x44D4:
1244         case 0x44D8:
1245         case 0x44DC:
1246         case 0x44E0:
1247         case 0x44E4:
1248         case 0x44E8:
1249         case 0x44EC:
1250         case 0x44F0:
1251         case 0x44F4:
1252         case 0x44F8:
1253         case 0x44FC:
1254                 /* TX_FORMAT1_[0-15] */
1255                 i = (reg - 0x44C0) >> 2;
1256                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1257                 track->textures[i].tex_coord_type = tmp;
1258                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1259                 case 0:
1260                 case 2:
1261                 case 5:
1262                 case 18:
1263                 case 20:
1264                 case 21:
1265                         track->textures[i].cpp = 1;
1266                         break;
1267                 case 1:
1268                 case 3:
1269                 case 6:
1270                 case 7:
1271                 case 10:
1272                 case 11:
1273                 case 19:
1274                 case 22:
1275                 case 24:
1276                         track->textures[i].cpp = 2;
1277                         break;
1278                 case 4:
1279                 case 8:
1280                 case 9:
1281                 case 12:
1282                 case 13:
1283                 case 23:
1284                 case 25:
1285                 case 27:
1286                 case 30:
1287                         track->textures[i].cpp = 4;
1288                         break;
1289                 case 14:
1290                 case 26:
1291                 case 28:
1292                         track->textures[i].cpp = 8;
1293                         break;
1294                 case 29:
1295                         track->textures[i].cpp = 16;
1296                         break;
1297                 default:
1298                         DRM_ERROR("Invalid texture format %u\n",
1299                                   (ib_chunk->kdata[idx] & 0x1F));
1300                         return -EINVAL;
1301                         break;
1302                 }
1303                 break;
1304         case 0x4400:
1305         case 0x4404:
1306         case 0x4408:
1307         case 0x440C:
1308         case 0x4410:
1309         case 0x4414:
1310         case 0x4418:
1311         case 0x441C:
1312         case 0x4420:
1313         case 0x4424:
1314         case 0x4428:
1315         case 0x442C:
1316         case 0x4430:
1317         case 0x4434:
1318         case 0x4438:
1319         case 0x443C:
1320                 /* TX_FILTER0_[0-15] */
1321                 i = (reg - 0x4400) >> 2;
1322                 tmp = ib_chunk->kdata[idx] & 0x7;;
1323                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1324                         track->textures[i].roundup_w = false;
1325                 }
1326                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1327                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1328                         track->textures[i].roundup_h = false;
1329                 }
1330                 break;
1331         case 0x4500:
1332         case 0x4504:
1333         case 0x4508:
1334         case 0x450C:
1335         case 0x4510:
1336         case 0x4514:
1337         case 0x4518:
1338         case 0x451C:
1339         case 0x4520:
1340         case 0x4524:
1341         case 0x4528:
1342         case 0x452C:
1343         case 0x4530:
1344         case 0x4534:
1345         case 0x4538:
1346         case 0x453C:
1347                 /* TX_FORMAT2_[0-15] */
1348                 i = (reg - 0x4500) >> 2;
1349                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1350                 track->textures[i].pitch = tmp + 1;
1351                 if (p->rdev->family >= CHIP_RV515) {
1352                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1353                         track->textures[i].width_11 = tmp;
1354                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1355                         track->textures[i].height_11 = tmp;
1356                 }
1357                 break;
1358         case 0x4480:
1359         case 0x4484:
1360         case 0x4488:
1361         case 0x448C:
1362         case 0x4490:
1363         case 0x4494:
1364         case 0x4498:
1365         case 0x449C:
1366         case 0x44A0:
1367         case 0x44A4:
1368         case 0x44A8:
1369         case 0x44AC:
1370         case 0x44B0:
1371         case 0x44B4:
1372         case 0x44B8:
1373         case 0x44BC:
1374                 /* TX_FORMAT0_[0-15] */
1375                 i = (reg - 0x4480) >> 2;
1376                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1377                 track->textures[i].width = tmp + 1;
1378                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1379                 track->textures[i].height = tmp + 1;
1380                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1381                 track->textures[i].num_levels = tmp;
1382                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1383                 track->textures[i].use_pitch = !!tmp;
1384                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1385                 track->textures[i].txdepth = tmp;
1386                 break;
1387         case R300_ZB_ZPASS_ADDR:
1388                 r = r100_cs_packet_next_reloc(p, &reloc);
1389                 if (r) {
1390                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1391                                         idx, reg);
1392                         r100_cs_dump_packet(p, pkt);
1393                         return r;
1394                 }
1395                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1396                 break;
1397         case 0x4be8:
1398                 /* valid register only on RV530 */
1399                 if (p->rdev->family == CHIP_RV530)
1400                         break;
1401                 /* fallthrough do not move */
1402         default:
1403                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1404                        reg, idx);
1405                 return -EINVAL;
1406         }
1407         return 0;
1408 }
1409
1410 static int r300_packet3_check(struct radeon_cs_parser *p,
1411                               struct radeon_cs_packet *pkt)
1412 {
1413         struct radeon_cs_chunk *ib_chunk;
1414         struct radeon_cs_reloc *reloc;
1415         struct r300_cs_track *track;
1416         volatile uint32_t *ib;
1417         unsigned idx;
1418         unsigned i, c;
1419         int r;
1420
1421         ib = p->ib->ptr;
1422         ib_chunk = &p->chunks[p->chunk_ib_idx];
1423         idx = pkt->idx + 1;
1424         track = (struct r300_cs_track*)p->track;
1425         switch(pkt->opcode) {
1426         case PACKET3_3D_LOAD_VBPNTR:
1427                 c = ib_chunk->kdata[idx++] & 0x1F;
1428                 track->num_arrays = c;
1429                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1430                         r = r100_cs_packet_next_reloc(p, &reloc);
1431                         if (r) {
1432                                 DRM_ERROR("No reloc for packet3 %d\n",
1433                                           pkt->opcode);
1434                                 r100_cs_dump_packet(p, pkt);
1435                                 return r;
1436                         }
1437                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1438                         track->arrays[i + 0].robj = reloc->robj;
1439                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1440                         track->arrays[i + 0].esize &= 0x7F;
1441                         r = r100_cs_packet_next_reloc(p, &reloc);
1442                         if (r) {
1443                                 DRM_ERROR("No reloc for packet3 %d\n",
1444                                           pkt->opcode);
1445                                 r100_cs_dump_packet(p, pkt);
1446                                 return r;
1447                         }
1448                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1449                         track->arrays[i + 1].robj = reloc->robj;
1450                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1451                         track->arrays[i + 1].esize &= 0x7F;
1452                 }
1453                 if (c & 1) {
1454                         r = r100_cs_packet_next_reloc(p, &reloc);
1455                         if (r) {
1456                                 DRM_ERROR("No reloc for packet3 %d\n",
1457                                           pkt->opcode);
1458                                 r100_cs_dump_packet(p, pkt);
1459                                 return r;
1460                         }
1461                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1462                         track->arrays[i + 0].robj = reloc->robj;
1463                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1464                         track->arrays[i + 0].esize &= 0x7F;
1465                 }
1466                 break;
1467         case PACKET3_INDX_BUFFER:
1468                 r = r100_cs_packet_next_reloc(p, &reloc);
1469                 if (r) {
1470                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1471                         r100_cs_dump_packet(p, pkt);
1472                         return r;
1473                 }
1474                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1475                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1476                 if (r) {
1477                         return r;
1478                 }
1479                 break;
1480         /* Draw packet */
1481         case PACKET3_3D_DRAW_IMMD:
1482                 /* Number of dwords is vtx_size * (num_vertices - 1)
1483                  * PRIM_WALK must be equal to 3 vertex data in embedded
1484                  * in cmd stream */
1485                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1486                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1487                         return -EINVAL;
1488                 }
1489                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1490                 track->immd_dwords = pkt->count - 1;
1491                 r = r300_cs_track_check(p->rdev, track);
1492                 if (r) {
1493                         return r;
1494                 }
1495                 break;
1496         case PACKET3_3D_DRAW_IMMD_2:
1497                 /* Number of dwords is vtx_size * (num_vertices - 1)
1498                  * PRIM_WALK must be equal to 3 vertex data in embedded
1499                  * in cmd stream */
1500                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1501                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1502                         return -EINVAL;
1503                 }
1504                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1505                 track->immd_dwords = pkt->count;
1506                 r = r300_cs_track_check(p->rdev, track);
1507                 if (r) {
1508                         return r;
1509                 }
1510                 break;
1511         case PACKET3_3D_DRAW_VBUF:
1512                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1513                 r = r300_cs_track_check(p->rdev, track);
1514                 if (r) {
1515                         return r;
1516                 }
1517                 break;
1518         case PACKET3_3D_DRAW_VBUF_2:
1519                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1520                 r = r300_cs_track_check(p->rdev, track);
1521                 if (r) {
1522                         return r;
1523                 }
1524                 break;
1525         case PACKET3_3D_DRAW_INDX:
1526                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1527                 r = r300_cs_track_check(p->rdev, track);
1528                 if (r) {
1529                         return r;
1530                 }
1531                 break;
1532         case PACKET3_3D_DRAW_INDX_2:
1533                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1534                 r = r300_cs_track_check(p->rdev, track);
1535                 if (r) {
1536                         return r;
1537                 }
1538                 break;
1539         case PACKET3_NOP:
1540                 break;
1541         default:
1542                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1543                 return -EINVAL;
1544         }
1545         return 0;
1546 }
1547
1548 int r300_cs_parse(struct radeon_cs_parser *p)
1549 {
1550         struct radeon_cs_packet pkt;
1551         struct r300_cs_track track;
1552         int r;
1553
1554         r300_cs_track_clear(&track);
1555         p->track = &track;
1556         do {
1557                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1558                 if (r) {
1559                         return r;
1560                 }
1561                 p->idx += pkt.count + 2;
1562                 switch (pkt.type) {
1563                 case PACKET_TYPE0:
1564                         r = r100_cs_parse_packet0(p, &pkt,
1565                                                   p->rdev->config.r300.reg_safe_bm,
1566                                                   p->rdev->config.r300.reg_safe_bm_size,
1567                                                   &r300_packet0_check);
1568                         break;
1569                 case PACKET_TYPE2:
1570                         break;
1571                 case PACKET_TYPE3:
1572                         r = r300_packet3_check(p, &pkt);
1573                         break;
1574                 default:
1575                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1576                         return -EINVAL;
1577                 }
1578                 if (r) {
1579                         return r;
1580                 }
1581         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1582         return 0;
1583 }
1584
1585 int r300_init(struct radeon_device *rdev)
1586 {
1587         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1588         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1589         return 0;
1590 }