Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[pandora-kernel.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34 #include "radeon_share.h"
35
36 /* r300,r350,rv350,rv370,rv380 depends on : */
37 void r100_hdp_reset(struct radeon_device *rdev);
38 int r100_cp_reset(struct radeon_device *rdev);
39 int r100_rb2d_reset(struct radeon_device *rdev);
40 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
41 int r100_pci_gart_enable(struct radeon_device *rdev);
42 void r100_pci_gart_disable(struct radeon_device *rdev);
43 void r100_mc_setup(struct radeon_device *rdev);
44 void r100_mc_disable_clients(struct radeon_device *rdev);
45 int r100_gui_wait_for_idle(struct radeon_device *rdev);
46 int r100_cs_packet_parse(struct radeon_cs_parser *p,
47                          struct radeon_cs_packet *pkt,
48                          unsigned idx);
49 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
50 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
51                               struct radeon_cs_reloc **cs_reloc);
52 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
53                           struct radeon_cs_packet *pkt,
54                           const unsigned *auth, unsigned n,
55                           radeon_packet0_check_t check);
56 void r100_cs_dump_packet(struct radeon_cs_parser *p,
57                          struct radeon_cs_packet *pkt);
58 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
59                                          struct radeon_cs_packet *pkt,
60                                          struct radeon_object *robj);
61
62 /* This files gather functions specifics to:
63  * r300,r350,rv350,rv370,rv380
64  *
65  * Some of these functions might be used by newer ASICs.
66  */
67 void r300_gpu_init(struct radeon_device *rdev);
68 int r300_mc_wait_for_idle(struct radeon_device *rdev);
69 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
70
71
72 /*
73  * rv370,rv380 PCIE GART
74  */
75 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
76 {
77         uint32_t tmp;
78         int i;
79
80         /* Workaround HW bug do flush 2 times */
81         for (i = 0; i < 2; i++) {
82                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
84                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
85                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
86                 mb();
87         }
88 }
89
90 int rv370_pcie_gart_enable(struct radeon_device *rdev)
91 {
92         uint32_t table_addr;
93         uint32_t tmp;
94         int r;
95
96         /* Initialize common gart structure */
97         r = radeon_gart_init(rdev);
98         if (r) {
99                 return r;
100         }
101         r = rv370_debugfs_pcie_gart_info_init(rdev);
102         if (r) {
103                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
104         }
105         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
106         r = radeon_gart_table_vram_alloc(rdev);
107         if (r) {
108                 return r;
109         }
110         /* discard memory request outside of configured range */
111         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
112         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
113         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
114         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
115         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
116         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
117         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
118         table_addr = rdev->gart.table_addr;
119         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
120         /* FIXME: setup default page */
121         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
122         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
123         /* Clear error */
124         WREG32_PCIE(0x18, 0);
125         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
126         tmp |= RADEON_PCIE_TX_GART_EN;
127         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
128         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
129         rv370_pcie_gart_tlb_flush(rdev);
130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
131                  rdev->mc.gtt_size >> 20, table_addr);
132         rdev->gart.ready = true;
133         return 0;
134 }
135
136 void rv370_pcie_gart_disable(struct radeon_device *rdev)
137 {
138         uint32_t tmp;
139
140         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
141         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
142         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
143         if (rdev->gart.table.vram.robj) {
144                 radeon_object_kunmap(rdev->gart.table.vram.robj);
145                 radeon_object_unpin(rdev->gart.table.vram.robj);
146         }
147 }
148
149 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
150 {
151         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
152
153         if (i < 0 || i > rdev->gart.num_gpu_pages) {
154                 return -EINVAL;
155         }
156         addr = (lower_32_bits(addr) >> 8) |
157                ((upper_32_bits(addr) & 0xff) << 24) |
158                0xc;
159         /* on x86 we want this to be CPU endian, on powerpc
160          * on powerpc without HW swappers, it'll get swapped on way
161          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
162         writel(addr, ((void __iomem *)ptr) + (i * 4));
163         return 0;
164 }
165
166 int r300_gart_enable(struct radeon_device *rdev)
167 {
168 #if __OS_HAS_AGP
169         if (rdev->flags & RADEON_IS_AGP) {
170                 if (rdev->family > CHIP_RV350) {
171                         rv370_pcie_gart_disable(rdev);
172                 } else {
173                         r100_pci_gart_disable(rdev);
174                 }
175                 return 0;
176         }
177 #endif
178         if (rdev->flags & RADEON_IS_PCIE) {
179                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
180                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
181                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
182                 return rv370_pcie_gart_enable(rdev);
183         }
184         return r100_pci_gart_enable(rdev);
185 }
186
187
188 /*
189  * MC
190  */
191 int r300_mc_init(struct radeon_device *rdev)
192 {
193         int r;
194
195         if (r100_debugfs_rbbm_init(rdev)) {
196                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
197         }
198
199         r300_gpu_init(rdev);
200         r100_pci_gart_disable(rdev);
201         if (rdev->flags & RADEON_IS_PCIE) {
202                 rv370_pcie_gart_disable(rdev);
203         }
204
205         /* Setup GPU memory space */
206         rdev->mc.vram_location = 0xFFFFFFFFUL;
207         rdev->mc.gtt_location = 0xFFFFFFFFUL;
208         if (rdev->flags & RADEON_IS_AGP) {
209                 r = radeon_agp_init(rdev);
210                 if (r) {
211                         printk(KERN_WARNING "[drm] Disabling AGP\n");
212                         rdev->flags &= ~RADEON_IS_AGP;
213                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
214                 } else {
215                         rdev->mc.gtt_location = rdev->mc.agp_base;
216                 }
217         }
218         r = radeon_mc_setup(rdev);
219         if (r) {
220                 return r;
221         }
222
223         /* Program GPU memory space */
224         r100_mc_disable_clients(rdev);
225         if (r300_mc_wait_for_idle(rdev)) {
226                 printk(KERN_WARNING "Failed to wait MC idle while "
227                        "programming pipes. Bad things might happen.\n");
228         }
229         r100_mc_setup(rdev);
230         return 0;
231 }
232
233 void r300_mc_fini(struct radeon_device *rdev)
234 {
235         if (rdev->flags & RADEON_IS_PCIE) {
236                 rv370_pcie_gart_disable(rdev);
237                 radeon_gart_table_vram_free(rdev);
238         } else {
239                 r100_pci_gart_disable(rdev);
240                 radeon_gart_table_ram_free(rdev);
241         }
242         radeon_gart_fini(rdev);
243 }
244
245
246 /*
247  * Fence emission
248  */
249 void r300_fence_ring_emit(struct radeon_device *rdev,
250                           struct radeon_fence *fence)
251 {
252         /* Who ever call radeon_fence_emit should call ring_lock and ask
253          * for enough space (today caller are ib schedule and buffer move) */
254         /* Write SC register so SC & US assert idle */
255         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
256         radeon_ring_write(rdev, 0);
257         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
258         radeon_ring_write(rdev, 0);
259         /* Flush 3D cache */
260         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
261         radeon_ring_write(rdev, (2 << 0));
262         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
263         radeon_ring_write(rdev, (1 << 0));
264         /* Wait until IDLE & CLEAN */
265         radeon_ring_write(rdev, PACKET0(0x1720, 0));
266         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
267         /* Emit fence sequence & fire IRQ */
268         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
269         radeon_ring_write(rdev, fence->seq);
270         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
271         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
272 }
273
274
275 /*
276  * Global GPU functions
277  */
278 int r300_copy_dma(struct radeon_device *rdev,
279                   uint64_t src_offset,
280                   uint64_t dst_offset,
281                   unsigned num_pages,
282                   struct radeon_fence *fence)
283 {
284         uint32_t size;
285         uint32_t cur_size;
286         int i, num_loops;
287         int r = 0;
288
289         /* radeon pitch is /64 */
290         size = num_pages << PAGE_SHIFT;
291         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
292         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
293         if (r) {
294                 DRM_ERROR("radeon: moving bo (%d).\n", r);
295                 return r;
296         }
297         /* Must wait for 2D idle & clean before DMA or hangs might happen */
298         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
299         radeon_ring_write(rdev, (1 << 16));
300         for (i = 0; i < num_loops; i++) {
301                 cur_size = size;
302                 if (cur_size > 0x1FFFFF) {
303                         cur_size = 0x1FFFFF;
304                 }
305                 size -= cur_size;
306                 radeon_ring_write(rdev, PACKET0(0x720, 2));
307                 radeon_ring_write(rdev, src_offset);
308                 radeon_ring_write(rdev, dst_offset);
309                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
310                 src_offset += cur_size;
311                 dst_offset += cur_size;
312         }
313         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
314         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
315         if (fence) {
316                 r = radeon_fence_emit(rdev, fence);
317         }
318         radeon_ring_unlock_commit(rdev);
319         return r;
320 }
321
322 void r300_ring_start(struct radeon_device *rdev)
323 {
324         unsigned gb_tile_config;
325         int r;
326
327         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
328         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
329         switch(rdev->num_gb_pipes) {
330         case 2:
331                 gb_tile_config |= R300_PIPE_COUNT_R300;
332                 break;
333         case 3:
334                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
335                 break;
336         case 4:
337                 gb_tile_config |= R300_PIPE_COUNT_R420;
338                 break;
339         case 1:
340         default:
341                 gb_tile_config |= R300_PIPE_COUNT_RV350;
342                 break;
343         }
344
345         r = radeon_ring_lock(rdev, 64);
346         if (r) {
347                 return;
348         }
349         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
350         radeon_ring_write(rdev,
351                           RADEON_ISYNC_ANY2D_IDLE3D |
352                           RADEON_ISYNC_ANY3D_IDLE2D |
353                           RADEON_ISYNC_WAIT_IDLEGUI |
354                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
355         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
356         radeon_ring_write(rdev, gb_tile_config);
357         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
358         radeon_ring_write(rdev,
359                           RADEON_WAIT_2D_IDLECLEAN |
360                           RADEON_WAIT_3D_IDLECLEAN);
361         radeon_ring_write(rdev, PACKET0(0x170C, 0));
362         radeon_ring_write(rdev, 1 << 31);
363         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
364         radeon_ring_write(rdev, 0);
365         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
366         radeon_ring_write(rdev, 0);
367         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
368         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
369         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
370         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
371         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
372         radeon_ring_write(rdev,
373                           RADEON_WAIT_2D_IDLECLEAN |
374                           RADEON_WAIT_3D_IDLECLEAN);
375         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
376         radeon_ring_write(rdev, 0);
377         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
378         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
379         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
380         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
381         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
382         radeon_ring_write(rdev,
383                           ((6 << R300_MS_X0_SHIFT) |
384                            (6 << R300_MS_Y0_SHIFT) |
385                            (6 << R300_MS_X1_SHIFT) |
386                            (6 << R300_MS_Y1_SHIFT) |
387                            (6 << R300_MS_X2_SHIFT) |
388                            (6 << R300_MS_Y2_SHIFT) |
389                            (6 << R300_MSBD0_Y_SHIFT) |
390                            (6 << R300_MSBD0_X_SHIFT)));
391         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
392         radeon_ring_write(rdev,
393                           ((6 << R300_MS_X3_SHIFT) |
394                            (6 << R300_MS_Y3_SHIFT) |
395                            (6 << R300_MS_X4_SHIFT) |
396                            (6 << R300_MS_Y4_SHIFT) |
397                            (6 << R300_MS_X5_SHIFT) |
398                            (6 << R300_MS_Y5_SHIFT) |
399                            (6 << R300_MSBD1_SHIFT)));
400         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
401         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
402         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
403         radeon_ring_write(rdev,
404                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
405         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
406         radeon_ring_write(rdev,
407                           R300_GEOMETRY_ROUND_NEAREST |
408                           R300_COLOR_ROUND_NEAREST);
409         radeon_ring_unlock_commit(rdev);
410 }
411
412 void r300_errata(struct radeon_device *rdev)
413 {
414         rdev->pll_errata = 0;
415
416         if (rdev->family == CHIP_R300 &&
417             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
418                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
419         }
420 }
421
422 int r300_mc_wait_for_idle(struct radeon_device *rdev)
423 {
424         unsigned i;
425         uint32_t tmp;
426
427         for (i = 0; i < rdev->usec_timeout; i++) {
428                 /* read MC_STATUS */
429                 tmp = RREG32(0x0150);
430                 if (tmp & (1 << 4)) {
431                         return 0;
432                 }
433                 DRM_UDELAY(1);
434         }
435         return -1;
436 }
437
438 void r300_gpu_init(struct radeon_device *rdev)
439 {
440         uint32_t gb_tile_config, tmp;
441
442         r100_hdp_reset(rdev);
443         /* FIXME: rv380 one pipes ? */
444         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
445                 /* r300,r350 */
446                 rdev->num_gb_pipes = 2;
447         } else {
448                 /* rv350,rv370,rv380 */
449                 rdev->num_gb_pipes = 1;
450         }
451         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
452         switch (rdev->num_gb_pipes) {
453         case 2:
454                 gb_tile_config |= R300_PIPE_COUNT_R300;
455                 break;
456         case 3:
457                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
458                 break;
459         case 4:
460                 gb_tile_config |= R300_PIPE_COUNT_R420;
461                 break;
462         default:
463         case 1:
464                 gb_tile_config |= R300_PIPE_COUNT_RV350;
465                 break;
466         }
467         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
468
469         if (r100_gui_wait_for_idle(rdev)) {
470                 printk(KERN_WARNING "Failed to wait GUI idle while "
471                        "programming pipes. Bad things might happen.\n");
472         }
473
474         tmp = RREG32(0x170C);
475         WREG32(0x170C, tmp | (1 << 31));
476
477         WREG32(R300_RB2D_DSTCACHE_MODE,
478                R300_DC_AUTOFLUSH_ENABLE |
479                R300_DC_DC_DISABLE_IGNORE_PE);
480
481         if (r100_gui_wait_for_idle(rdev)) {
482                 printk(KERN_WARNING "Failed to wait GUI idle while "
483                        "programming pipes. Bad things might happen.\n");
484         }
485         if (r300_mc_wait_for_idle(rdev)) {
486                 printk(KERN_WARNING "Failed to wait MC idle while "
487                        "programming pipes. Bad things might happen.\n");
488         }
489         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
490 }
491
492 int r300_ga_reset(struct radeon_device *rdev)
493 {
494         uint32_t tmp;
495         bool reinit_cp;
496         int i;
497
498         reinit_cp = rdev->cp.ready;
499         rdev->cp.ready = false;
500         for (i = 0; i < rdev->usec_timeout; i++) {
501                 WREG32(RADEON_CP_CSQ_MODE, 0);
502                 WREG32(RADEON_CP_CSQ_CNTL, 0);
503                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
504                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
505                 udelay(200);
506                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
507                 /* Wait to prevent race in RBBM_STATUS */
508                 mdelay(1);
509                 tmp = RREG32(RADEON_RBBM_STATUS);
510                 if (tmp & ((1 << 20) | (1 << 26))) {
511                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
512                         /* GA still busy soft reset it */
513                         WREG32(0x429C, 0x200);
514                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
515                         WREG32(0x43E0, 0);
516                         WREG32(0x43E4, 0);
517                         WREG32(0x24AC, 0);
518                 }
519                 /* Wait to prevent race in RBBM_STATUS */
520                 mdelay(1);
521                 tmp = RREG32(RADEON_RBBM_STATUS);
522                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
523                         break;
524                 }
525         }
526         for (i = 0; i < rdev->usec_timeout; i++) {
527                 tmp = RREG32(RADEON_RBBM_STATUS);
528                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
529                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
530                                  tmp);
531                         if (reinit_cp) {
532                                 return r100_cp_init(rdev, rdev->cp.ring_size);
533                         }
534                         return 0;
535                 }
536                 DRM_UDELAY(1);
537         }
538         tmp = RREG32(RADEON_RBBM_STATUS);
539         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
540         return -1;
541 }
542
543 int r300_gpu_reset(struct radeon_device *rdev)
544 {
545         uint32_t status;
546
547         /* reset order likely matter */
548         status = RREG32(RADEON_RBBM_STATUS);
549         /* reset HDP */
550         r100_hdp_reset(rdev);
551         /* reset rb2d */
552         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
553                 r100_rb2d_reset(rdev);
554         }
555         /* reset GA */
556         if (status & ((1 << 20) | (1 << 26))) {
557                 r300_ga_reset(rdev);
558         }
559         /* reset CP */
560         status = RREG32(RADEON_RBBM_STATUS);
561         if (status & (1 << 16)) {
562                 r100_cp_reset(rdev);
563         }
564         /* Check if GPU is idle */
565         status = RREG32(RADEON_RBBM_STATUS);
566         if (status & (1 << 31)) {
567                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
568                 return -1;
569         }
570         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
571         return 0;
572 }
573
574
575 /*
576  * r300,r350,rv350,rv380 VRAM info
577  */
578 void r300_vram_info(struct radeon_device *rdev)
579 {
580         uint32_t tmp;
581
582         /* DDR for all card after R300 & IGP */
583         rdev->mc.vram_is_ddr = true;
584         tmp = RREG32(RADEON_MEM_CNTL);
585         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
586                 rdev->mc.vram_width = 128;
587         } else {
588                 rdev->mc.vram_width = 64;
589         }
590
591         r100_vram_init_sizes(rdev);
592 }
593
594
595 /*
596  * Indirect registers accessor
597  */
598 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
599 {
600         uint32_t r;
601
602         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
603         (void)RREG32(RADEON_PCIE_INDEX);
604         r = RREG32(RADEON_PCIE_DATA);
605         return r;
606 }
607
608 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
609 {
610         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
611         (void)RREG32(RADEON_PCIE_INDEX);
612         WREG32(RADEON_PCIE_DATA, (v));
613         (void)RREG32(RADEON_PCIE_DATA);
614 }
615
616 /*
617  * PCIE Lanes
618  */
619
620 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
621 {
622         uint32_t link_width_cntl, mask;
623
624         if (rdev->flags & RADEON_IS_IGP)
625                 return;
626
627         if (!(rdev->flags & RADEON_IS_PCIE))
628                 return;
629
630         /* FIXME wait for idle */
631
632         switch (lanes) {
633         case 0:
634                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
635                 break;
636         case 1:
637                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
638                 break;
639         case 2:
640                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
641                 break;
642         case 4:
643                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
644                 break;
645         case 8:
646                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
647                 break;
648         case 12:
649                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
650                 break;
651         case 16:
652         default:
653                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
654                 break;
655         }
656
657         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
658
659         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
660             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
661                 return;
662
663         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
664                              RADEON_PCIE_LC_RECONFIG_NOW |
665                              RADEON_PCIE_LC_RECONFIG_LATER |
666                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
667         link_width_cntl |= mask;
668         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
669         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
670                                                      RADEON_PCIE_LC_RECONFIG_NOW));
671
672         /* wait for lane set to complete */
673         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
674         while (link_width_cntl == 0xffffffff)
675                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
676
677 }
678
679
680 /*
681  * Debugfs info
682  */
683 #if defined(CONFIG_DEBUG_FS)
684 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
685 {
686         struct drm_info_node *node = (struct drm_info_node *) m->private;
687         struct drm_device *dev = node->minor->dev;
688         struct radeon_device *rdev = dev->dev_private;
689         uint32_t tmp;
690
691         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
692         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
693         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
694         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
695         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
696         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
697         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
698         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
699         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
700         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
701         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
702         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
703         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
704         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
705         return 0;
706 }
707
708 static struct drm_info_list rv370_pcie_gart_info_list[] = {
709         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
710 };
711 #endif
712
713 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
714 {
715 #if defined(CONFIG_DEBUG_FS)
716         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
717 #else
718         return 0;
719 #endif
720 }
721
722
723 /*
724  * CS functions
725  */
726 struct r300_cs_track_cb {
727         struct radeon_object    *robj;
728         unsigned                pitch;
729         unsigned                cpp;
730         unsigned                offset;
731 };
732
733 struct r300_cs_track_array {
734         struct radeon_object    *robj;
735         unsigned                esize;
736 };
737
738 struct r300_cs_track_texture {
739         struct radeon_object    *robj;
740         unsigned                pitch;
741         unsigned                width;
742         unsigned                height;
743         unsigned                num_levels;
744         unsigned                cpp;
745         unsigned                tex_coord_type;
746         unsigned                txdepth;
747         unsigned                width_11;
748         unsigned                height_11;
749         bool                    use_pitch;
750         bool                    enabled;
751         bool                    roundup_w;
752         bool                    roundup_h;
753 };
754
755 struct r300_cs_track {
756         unsigned                        num_cb;
757         unsigned                        maxy;
758         unsigned                        vtx_size;
759         unsigned                        vap_vf_cntl;
760         unsigned                        immd_dwords;
761         unsigned                        num_arrays;
762         unsigned                        max_indx;
763         struct r300_cs_track_array      arrays[11];
764         struct r300_cs_track_cb         cb[4];
765         struct r300_cs_track_cb         zb;
766         struct r300_cs_track_texture    textures[16];
767         bool                            z_enabled;
768 };
769
770 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
771 {
772         DRM_ERROR("pitch                      %d\n", t->pitch);
773         DRM_ERROR("width                      %d\n", t->width);
774         DRM_ERROR("height                     %d\n", t->height);
775         DRM_ERROR("num levels                 %d\n", t->num_levels);
776         DRM_ERROR("depth                      %d\n", t->txdepth);
777         DRM_ERROR("bpp                        %d\n", t->cpp);
778         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
779         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
780         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
781 }
782
783 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
784                                               struct r300_cs_track *track)
785 {
786         struct radeon_object *robj;
787         unsigned long size;
788         unsigned u, i, w, h;
789
790         for (u = 0; u < 16; u++) {
791                 if (!track->textures[u].enabled)
792                         continue;
793                 robj = track->textures[u].robj;
794                 if (robj == NULL) {
795                         DRM_ERROR("No texture bound to unit %u\n", u);
796                         return -EINVAL;
797                 }
798                 size = 0;
799                 for (i = 0; i <= track->textures[u].num_levels; i++) {
800                         if (track->textures[u].use_pitch) {
801                                 w = track->textures[u].pitch / (1 << i);
802                         } else {
803                                 w = track->textures[u].width / (1 << i);
804                                 if (rdev->family >= CHIP_RV515)
805                                         w |= track->textures[u].width_11;
806                                 if (track->textures[u].roundup_w)
807                                         w = roundup_pow_of_two(w);
808                         }
809                         h = track->textures[u].height / (1 << i);
810                         if (rdev->family >= CHIP_RV515)
811                                 h |= track->textures[u].height_11;
812                         if (track->textures[u].roundup_h)
813                                 h = roundup_pow_of_two(h);
814                         size += w * h;
815                 }
816                 size *= track->textures[u].cpp;
817                 switch (track->textures[u].tex_coord_type) {
818                 case 0:
819                         break;
820                 case 1:
821                         size *= (1 << track->textures[u].txdepth);
822                         break;
823                 case 2:
824                         size *= 6;
825                         break;
826                 default:
827                         DRM_ERROR("Invalid texture coordinate type %u for unit "
828                                   "%u\n", track->textures[u].tex_coord_type, u);
829                         return -EINVAL;
830                 }
831                 if (size > radeon_object_size(robj)) {
832                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
833                                   "%lu\n", u, size, radeon_object_size(robj));
834                         r300_cs_track_texture_print(&track->textures[u]);
835                         return -EINVAL;
836                 }
837         }
838         return 0;
839 }
840
841 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
842 {
843         unsigned i;
844         unsigned long size;
845         unsigned prim_walk;
846         unsigned nverts;
847
848         for (i = 0; i < track->num_cb; i++) {
849                 if (track->cb[i].robj == NULL) {
850                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
851                         return -EINVAL;
852                 }
853                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
854                 size += track->cb[i].offset;
855                 if (size > radeon_object_size(track->cb[i].robj)) {
856                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
857                                   "(need %lu have %lu) !\n", i, size,
858                                   radeon_object_size(track->cb[i].robj));
859                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
860                                   i, track->cb[i].pitch, track->cb[i].cpp,
861                                   track->cb[i].offset, track->maxy);
862                         return -EINVAL;
863                 }
864         }
865         if (track->z_enabled) {
866                 if (track->zb.robj == NULL) {
867                         DRM_ERROR("[drm] No buffer for z buffer !\n");
868                         return -EINVAL;
869                 }
870                 size = track->zb.pitch * track->zb.cpp * track->maxy;
871                 size += track->zb.offset;
872                 if (size > radeon_object_size(track->zb.robj)) {
873                         DRM_ERROR("[drm] Buffer too small for z buffer "
874                                   "(need %lu have %lu) !\n", size,
875                                   radeon_object_size(track->zb.robj));
876                         return -EINVAL;
877                 }
878         }
879         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
880         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
881         switch (prim_walk) {
882         case 1:
883                 for (i = 0; i < track->num_arrays; i++) {
884                         size = track->arrays[i].esize * track->max_indx * 4;
885                         if (track->arrays[i].robj == NULL) {
886                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
887                                           "bound\n", prim_walk, i);
888                                 return -EINVAL;
889                         }
890                         if (size > radeon_object_size(track->arrays[i].robj)) {
891                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
892                                            "have %lu dwords\n", prim_walk, i,
893                                            size >> 2,
894                                            radeon_object_size(track->arrays[i].robj) >> 2);
895                                 DRM_ERROR("Max indices %u\n", track->max_indx);
896                                 return -EINVAL;
897                         }
898                 }
899                 break;
900         case 2:
901                 for (i = 0; i < track->num_arrays; i++) {
902                         size = track->arrays[i].esize * (nverts - 1) * 4;
903                         if (track->arrays[i].robj == NULL) {
904                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
905                                           "bound\n", prim_walk, i);
906                                 return -EINVAL;
907                         }
908                         if (size > radeon_object_size(track->arrays[i].robj)) {
909                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
910                                            "have %lu dwords\n", prim_walk, i, size >> 2,
911                                            radeon_object_size(track->arrays[i].robj) >> 2);
912                                 return -EINVAL;
913                         }
914                 }
915                 break;
916         case 3:
917                 size = track->vtx_size * nverts;
918                 if (size != track->immd_dwords) {
919                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
920                                   track->immd_dwords, size);
921                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
922                                   nverts, track->vtx_size);
923                         return -EINVAL;
924                 }
925                 break;
926         default:
927                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
928                           prim_walk);
929                 return -EINVAL;
930         }
931         return r300_cs_track_texture_check(rdev, track);
932 }
933
934 static inline void r300_cs_track_clear(struct r300_cs_track *track)
935 {
936         unsigned i;
937
938         track->num_cb = 4;
939         track->maxy = 4096;
940         for (i = 0; i < track->num_cb; i++) {
941                 track->cb[i].robj = NULL;
942                 track->cb[i].pitch = 8192;
943                 track->cb[i].cpp = 16;
944                 track->cb[i].offset = 0;
945         }
946         track->z_enabled = true;
947         track->zb.robj = NULL;
948         track->zb.pitch = 8192;
949         track->zb.cpp = 4;
950         track->zb.offset = 0;
951         track->vtx_size = 0x7F;
952         track->immd_dwords = 0xFFFFFFFFUL;
953         track->num_arrays = 11;
954         track->max_indx = 0x00FFFFFFUL;
955         for (i = 0; i < track->num_arrays; i++) {
956                 track->arrays[i].robj = NULL;
957                 track->arrays[i].esize = 0x7F;
958         }
959         for (i = 0; i < 16; i++) {
960                 track->textures[i].pitch = 16536;
961                 track->textures[i].width = 16536;
962                 track->textures[i].height = 16536;
963                 track->textures[i].width_11 = 1 << 11;
964                 track->textures[i].height_11 = 1 << 11;
965                 track->textures[i].num_levels = 12;
966                 track->textures[i].txdepth = 16;
967                 track->textures[i].cpp = 64;
968                 track->textures[i].tex_coord_type = 1;
969                 track->textures[i].robj = NULL;
970                 /* CS IB emission code makes sure texture unit are disabled */
971                 track->textures[i].enabled = false;
972                 track->textures[i].roundup_w = true;
973                 track->textures[i].roundup_h = true;
974         }
975 }
976
977 static const unsigned r300_reg_safe_bm[159] = {
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
989         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
993         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
995         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
996         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1008         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1009         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1010         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1011         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1012         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1013         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1014         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1015         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1016         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1017         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1018 };
1019
1020 static int r300_packet0_check(struct radeon_cs_parser *p,
1021                 struct radeon_cs_packet *pkt,
1022                 unsigned idx, unsigned reg)
1023 {
1024         struct radeon_cs_chunk *ib_chunk;
1025         struct radeon_cs_reloc *reloc;
1026         struct r300_cs_track *track;
1027         volatile uint32_t *ib;
1028         uint32_t tmp, tile_flags = 0;
1029         unsigned i;
1030         int r;
1031
1032         ib = p->ib->ptr;
1033         ib_chunk = &p->chunks[p->chunk_ib_idx];
1034         track = (struct r300_cs_track*)p->track;
1035         switch(reg) {
1036         case AVIVO_D1MODE_VLINE_START_END:
1037         case RADEON_CRTC_GUI_TRIG_VLINE:
1038                 r = r100_cs_packet_parse_vline(p);
1039                 if (r) {
1040                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1041                                         idx, reg);
1042                         r100_cs_dump_packet(p, pkt);
1043                         return r;
1044                 }
1045                 break;
1046         case RADEON_DST_PITCH_OFFSET:
1047         case RADEON_SRC_PITCH_OFFSET:
1048                 r = r100_cs_packet_next_reloc(p, &reloc);
1049                 if (r) {
1050                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1051                                         idx, reg);
1052                         r100_cs_dump_packet(p, pkt);
1053                         return r;
1054                 }
1055                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1056                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1057
1058                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1059                         tile_flags |= RADEON_DST_TILE_MACRO;
1060                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1061                         if (reg == RADEON_SRC_PITCH_OFFSET) {
1062                                 DRM_ERROR("Cannot src blit from microtiled surface\n");
1063                                 r100_cs_dump_packet(p, pkt);
1064                                 return -EINVAL;
1065                         }
1066                         tile_flags |= RADEON_DST_TILE_MICRO;
1067                 }
1068                 tmp |= tile_flags;
1069                 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1070                 break;
1071         case R300_RB3D_COLOROFFSET0:
1072         case R300_RB3D_COLOROFFSET1:
1073         case R300_RB3D_COLOROFFSET2:
1074         case R300_RB3D_COLOROFFSET3:
1075                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1076                 r = r100_cs_packet_next_reloc(p, &reloc);
1077                 if (r) {
1078                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1079                                         idx, reg);
1080                         r100_cs_dump_packet(p, pkt);
1081                         return r;
1082                 }
1083                 track->cb[i].robj = reloc->robj;
1084                 track->cb[i].offset = ib_chunk->kdata[idx];
1085                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1086                 break;
1087         case R300_ZB_DEPTHOFFSET:
1088                 r = r100_cs_packet_next_reloc(p, &reloc);
1089                 if (r) {
1090                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1091                                         idx, reg);
1092                         r100_cs_dump_packet(p, pkt);
1093                         return r;
1094                 }
1095                 track->zb.robj = reloc->robj;
1096                 track->zb.offset = ib_chunk->kdata[idx];
1097                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1098                 break;
1099         case R300_TX_OFFSET_0:
1100         case R300_TX_OFFSET_0+4:
1101         case R300_TX_OFFSET_0+8:
1102         case R300_TX_OFFSET_0+12:
1103         case R300_TX_OFFSET_0+16:
1104         case R300_TX_OFFSET_0+20:
1105         case R300_TX_OFFSET_0+24:
1106         case R300_TX_OFFSET_0+28:
1107         case R300_TX_OFFSET_0+32:
1108         case R300_TX_OFFSET_0+36:
1109         case R300_TX_OFFSET_0+40:
1110         case R300_TX_OFFSET_0+44:
1111         case R300_TX_OFFSET_0+48:
1112         case R300_TX_OFFSET_0+52:
1113         case R300_TX_OFFSET_0+56:
1114         case R300_TX_OFFSET_0+60:
1115                 i = (reg - R300_TX_OFFSET_0) >> 2;
1116                 r = r100_cs_packet_next_reloc(p, &reloc);
1117                 if (r) {
1118                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1119                                         idx, reg);
1120                         r100_cs_dump_packet(p, pkt);
1121                         return r;
1122                 }
1123                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1124                 track->textures[i].robj = reloc->robj;
1125                 break;
1126         /* Tracked registers */
1127         case 0x2084:
1128                 /* VAP_VF_CNTL */
1129                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1130                 break;
1131         case 0x20B4:
1132                 /* VAP_VTX_SIZE */
1133                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1134                 break;
1135         case 0x2134:
1136                 /* VAP_VF_MAX_VTX_INDX */
1137                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1138                 break;
1139         case 0x43E4:
1140                 /* SC_SCISSOR1 */
1141                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1142                 if (p->rdev->family < CHIP_RV515) {
1143                         track->maxy -= 1440;
1144                 }
1145                 break;
1146         case 0x4E00:
1147                 /* RB3D_CCTL */
1148                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1149                 break;
1150         case 0x4E38:
1151         case 0x4E3C:
1152         case 0x4E40:
1153         case 0x4E44:
1154                 /* RB3D_COLORPITCH0 */
1155                 /* RB3D_COLORPITCH1 */
1156                 /* RB3D_COLORPITCH2 */
1157                 /* RB3D_COLORPITCH3 */
1158                 r = r100_cs_packet_next_reloc(p, &reloc);
1159                 if (r) {
1160                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1161                                   idx, reg);
1162                         r100_cs_dump_packet(p, pkt);
1163                         return r;
1164                 }
1165
1166                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1167                         tile_flags |= R300_COLOR_TILE_ENABLE;
1168                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1169                         tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1170
1171                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1172                 tmp |= tile_flags;
1173                 ib[idx] = tmp;
1174
1175                 i = (reg - 0x4E38) >> 2;
1176                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1177                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1178                 case 9:
1179                 case 11:
1180                 case 12:
1181                         track->cb[i].cpp = 1;
1182                         break;
1183                 case 3:
1184                 case 4:
1185                 case 13:
1186                 case 15:
1187                         track->cb[i].cpp = 2;
1188                         break;
1189                 case 6:
1190                         track->cb[i].cpp = 4;
1191                         break;
1192                 case 10:
1193                         track->cb[i].cpp = 8;
1194                         break;
1195                 case 7:
1196                         track->cb[i].cpp = 16;
1197                         break;
1198                 default:
1199                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1200                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1201                         return -EINVAL;
1202                 }
1203                 break;
1204         case 0x4F00:
1205                 /* ZB_CNTL */
1206                 if (ib_chunk->kdata[idx] & 2) {
1207                         track->z_enabled = true;
1208                 } else {
1209                         track->z_enabled = false;
1210                 }
1211                 break;
1212         case 0x4F10:
1213                 /* ZB_FORMAT */
1214                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1215                 case 0:
1216                 case 1:
1217                         track->zb.cpp = 2;
1218                         break;
1219                 case 2:
1220                         track->zb.cpp = 4;
1221                         break;
1222                 default:
1223                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1224                                   (ib_chunk->kdata[idx] & 0xF));
1225                         return -EINVAL;
1226                 }
1227                 break;
1228         case 0x4F24:
1229                 /* ZB_DEPTHPITCH */
1230                 r = r100_cs_packet_next_reloc(p, &reloc);
1231                 if (r) {
1232                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1233                                   idx, reg);
1234                         r100_cs_dump_packet(p, pkt);
1235                         return r;
1236                 }
1237
1238                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1239                         tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1240                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1241                         tile_flags |= R300_DEPTHMICROTILE_TILED;;
1242
1243                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1244                 tmp |= tile_flags;
1245                 ib[idx] = tmp;
1246
1247                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1248                 break;
1249         case 0x4104:
1250                 for (i = 0; i < 16; i++) {
1251                         bool enabled;
1252
1253                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1254                         track->textures[i].enabled = enabled;
1255                 }
1256                 break;
1257         case 0x44C0:
1258         case 0x44C4:
1259         case 0x44C8:
1260         case 0x44CC:
1261         case 0x44D0:
1262         case 0x44D4:
1263         case 0x44D8:
1264         case 0x44DC:
1265         case 0x44E0:
1266         case 0x44E4:
1267         case 0x44E8:
1268         case 0x44EC:
1269         case 0x44F0:
1270         case 0x44F4:
1271         case 0x44F8:
1272         case 0x44FC:
1273                 /* TX_FORMAT1_[0-15] */
1274                 i = (reg - 0x44C0) >> 2;
1275                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1276                 track->textures[i].tex_coord_type = tmp;
1277                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1278                 case 0:
1279                 case 2:
1280                 case 5:
1281                 case 18:
1282                 case 20:
1283                 case 21:
1284                         track->textures[i].cpp = 1;
1285                         break;
1286                 case 1:
1287                 case 3:
1288                 case 6:
1289                 case 7:
1290                 case 10:
1291                 case 11:
1292                 case 19:
1293                 case 22:
1294                 case 24:
1295                         track->textures[i].cpp = 2;
1296                         break;
1297                 case 4:
1298                 case 8:
1299                 case 9:
1300                 case 12:
1301                 case 13:
1302                 case 23:
1303                 case 25:
1304                 case 27:
1305                 case 30:
1306                         track->textures[i].cpp = 4;
1307                         break;
1308                 case 14:
1309                 case 26:
1310                 case 28:
1311                         track->textures[i].cpp = 8;
1312                         break;
1313                 case 29:
1314                         track->textures[i].cpp = 16;
1315                         break;
1316                 default:
1317                         DRM_ERROR("Invalid texture format %u\n",
1318                                   (ib_chunk->kdata[idx] & 0x1F));
1319                         return -EINVAL;
1320                         break;
1321                 }
1322                 break;
1323         case 0x4400:
1324         case 0x4404:
1325         case 0x4408:
1326         case 0x440C:
1327         case 0x4410:
1328         case 0x4414:
1329         case 0x4418:
1330         case 0x441C:
1331         case 0x4420:
1332         case 0x4424:
1333         case 0x4428:
1334         case 0x442C:
1335         case 0x4430:
1336         case 0x4434:
1337         case 0x4438:
1338         case 0x443C:
1339                 /* TX_FILTER0_[0-15] */
1340                 i = (reg - 0x4400) >> 2;
1341                 tmp = ib_chunk->kdata[idx] & 0x7;;
1342                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1343                         track->textures[i].roundup_w = false;
1344                 }
1345                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1346                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1347                         track->textures[i].roundup_h = false;
1348                 }
1349                 break;
1350         case 0x4500:
1351         case 0x4504:
1352         case 0x4508:
1353         case 0x450C:
1354         case 0x4510:
1355         case 0x4514:
1356         case 0x4518:
1357         case 0x451C:
1358         case 0x4520:
1359         case 0x4524:
1360         case 0x4528:
1361         case 0x452C:
1362         case 0x4530:
1363         case 0x4534:
1364         case 0x4538:
1365         case 0x453C:
1366                 /* TX_FORMAT2_[0-15] */
1367                 i = (reg - 0x4500) >> 2;
1368                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1369                 track->textures[i].pitch = tmp + 1;
1370                 if (p->rdev->family >= CHIP_RV515) {
1371                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1372                         track->textures[i].width_11 = tmp;
1373                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1374                         track->textures[i].height_11 = tmp;
1375                 }
1376                 break;
1377         case 0x4480:
1378         case 0x4484:
1379         case 0x4488:
1380         case 0x448C:
1381         case 0x4490:
1382         case 0x4494:
1383         case 0x4498:
1384         case 0x449C:
1385         case 0x44A0:
1386         case 0x44A4:
1387         case 0x44A8:
1388         case 0x44AC:
1389         case 0x44B0:
1390         case 0x44B4:
1391         case 0x44B8:
1392         case 0x44BC:
1393                 /* TX_FORMAT0_[0-15] */
1394                 i = (reg - 0x4480) >> 2;
1395                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1396                 track->textures[i].width = tmp + 1;
1397                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1398                 track->textures[i].height = tmp + 1;
1399                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1400                 track->textures[i].num_levels = tmp;
1401                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1402                 track->textures[i].use_pitch = !!tmp;
1403                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1404                 track->textures[i].txdepth = tmp;
1405                 break;
1406         default:
1407                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1408                        reg, idx);
1409                 return -EINVAL;
1410         }
1411         return 0;
1412 }
1413
1414 static int r300_packet3_check(struct radeon_cs_parser *p,
1415                               struct radeon_cs_packet *pkt)
1416 {
1417         struct radeon_cs_chunk *ib_chunk;
1418         struct radeon_cs_reloc *reloc;
1419         struct r300_cs_track *track;
1420         volatile uint32_t *ib;
1421         unsigned idx;
1422         unsigned i, c;
1423         int r;
1424
1425         ib = p->ib->ptr;
1426         ib_chunk = &p->chunks[p->chunk_ib_idx];
1427         idx = pkt->idx + 1;
1428         track = (struct r300_cs_track*)p->track;
1429         switch(pkt->opcode) {
1430         case PACKET3_3D_LOAD_VBPNTR:
1431                 c = ib_chunk->kdata[idx++] & 0x1F;
1432                 track->num_arrays = c;
1433                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1434                         r = r100_cs_packet_next_reloc(p, &reloc);
1435                         if (r) {
1436                                 DRM_ERROR("No reloc for packet3 %d\n",
1437                                           pkt->opcode);
1438                                 r100_cs_dump_packet(p, pkt);
1439                                 return r;
1440                         }
1441                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1442                         track->arrays[i + 0].robj = reloc->robj;
1443                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1444                         track->arrays[i + 0].esize &= 0x7F;
1445                         r = r100_cs_packet_next_reloc(p, &reloc);
1446                         if (r) {
1447                                 DRM_ERROR("No reloc for packet3 %d\n",
1448                                           pkt->opcode);
1449                                 r100_cs_dump_packet(p, pkt);
1450                                 return r;
1451                         }
1452                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1453                         track->arrays[i + 1].robj = reloc->robj;
1454                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1455                         track->arrays[i + 1].esize &= 0x7F;
1456                 }
1457                 if (c & 1) {
1458                         r = r100_cs_packet_next_reloc(p, &reloc);
1459                         if (r) {
1460                                 DRM_ERROR("No reloc for packet3 %d\n",
1461                                           pkt->opcode);
1462                                 r100_cs_dump_packet(p, pkt);
1463                                 return r;
1464                         }
1465                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1466                         track->arrays[i + 0].robj = reloc->robj;
1467                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1468                         track->arrays[i + 0].esize &= 0x7F;
1469                 }
1470                 break;
1471         case PACKET3_INDX_BUFFER:
1472                 r = r100_cs_packet_next_reloc(p, &reloc);
1473                 if (r) {
1474                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1475                         r100_cs_dump_packet(p, pkt);
1476                         return r;
1477                 }
1478                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1479                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1480                 if (r) {
1481                         return r;
1482                 }
1483                 break;
1484         /* Draw packet */
1485         case PACKET3_3D_DRAW_IMMD:
1486                 /* Number of dwords is vtx_size * (num_vertices - 1)
1487                  * PRIM_WALK must be equal to 3 vertex data in embedded
1488                  * in cmd stream */
1489                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1490                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1491                         return -EINVAL;
1492                 }
1493                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1494                 track->immd_dwords = pkt->count - 1;
1495                 r = r300_cs_track_check(p->rdev, track);
1496                 if (r) {
1497                         return r;
1498                 }
1499                 break;
1500         case PACKET3_3D_DRAW_IMMD_2:
1501                 /* Number of dwords is vtx_size * (num_vertices - 1)
1502                  * PRIM_WALK must be equal to 3 vertex data in embedded
1503                  * in cmd stream */
1504                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1505                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1506                         return -EINVAL;
1507                 }
1508                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1509                 track->immd_dwords = pkt->count;
1510                 r = r300_cs_track_check(p->rdev, track);
1511                 if (r) {
1512                         return r;
1513                 }
1514                 break;
1515         case PACKET3_3D_DRAW_VBUF:
1516                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1517                 r = r300_cs_track_check(p->rdev, track);
1518                 if (r) {
1519                         return r;
1520                 }
1521                 break;
1522         case PACKET3_3D_DRAW_VBUF_2:
1523                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1524                 r = r300_cs_track_check(p->rdev, track);
1525                 if (r) {
1526                         return r;
1527                 }
1528                 break;
1529         case PACKET3_3D_DRAW_INDX:
1530                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1531                 r = r300_cs_track_check(p->rdev, track);
1532                 if (r) {
1533                         return r;
1534                 }
1535                 break;
1536         case PACKET3_3D_DRAW_INDX_2:
1537                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1538                 r = r300_cs_track_check(p->rdev, track);
1539                 if (r) {
1540                         return r;
1541                 }
1542                 break;
1543         case PACKET3_NOP:
1544                 break;
1545         default:
1546                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1547                 return -EINVAL;
1548         }
1549         return 0;
1550 }
1551
1552 int r300_cs_parse(struct radeon_cs_parser *p)
1553 {
1554         struct radeon_cs_packet pkt;
1555         struct r300_cs_track track;
1556         int r;
1557
1558         r300_cs_track_clear(&track);
1559         p->track = &track;
1560         do {
1561                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1562                 if (r) {
1563                         return r;
1564                 }
1565                 p->idx += pkt.count + 2;
1566                 switch (pkt.type) {
1567                 case PACKET_TYPE0:
1568                         r = r100_cs_parse_packet0(p, &pkt,
1569                                                   p->rdev->config.r300.reg_safe_bm,
1570                                                   p->rdev->config.r300.reg_safe_bm_size,
1571                                                   &r300_packet0_check);
1572                         break;
1573                 case PACKET_TYPE2:
1574                         break;
1575                 case PACKET_TYPE3:
1576                         r = r300_packet3_check(p, &pkt);
1577                         break;
1578                 default:
1579                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1580                         return -EINVAL;
1581                 }
1582                 if (r) {
1583                         return r;
1584                 }
1585         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1586         return 0;
1587 }
1588
1589 int r300_init(struct radeon_device *rdev)
1590 {
1591         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1592         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1593         return 0;
1594 }