Merge branch 'driver-core-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / gpu / drm / radeon / rv770.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/firmware.h>
29 #include <linux/platform_device.h>
30 #include <linux/slab.h>
31 #include "drmP.h"
32 #include "radeon.h"
33 #include "radeon_asic.h"
34 #include "radeon_drm.h"
35 #include "rv770d.h"
36 #include "atom.h"
37 #include "avivod.h"
38
39 #define R700_PFP_UCODE_SIZE 848
40 #define R700_PM4_UCODE_SIZE 1360
41
42 static void rv770_gpu_init(struct radeon_device *rdev);
43 void rv770_fini(struct radeon_device *rdev);
44 static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
45
46 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
47 {
48         struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
49         u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
50
51         /* Lock the graphics update lock */
52         tmp |= AVIVO_D1GRPH_UPDATE_LOCK;
53         WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
54
55         /* update the scanout addresses */
56         if (radeon_crtc->crtc_id) {
57                 WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
58                 WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
59         } else {
60                 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
61                 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
62         }
63         WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
64                (u32)crtc_base);
65         WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
66                (u32)crtc_base);
67
68         /* Wait for update_pending to go high. */
69         while (!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING));
70         DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
71
72         /* Unlock the lock, so double-buffering can take place inside vblank */
73         tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
74         WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
75
76         /* Return current update_pending status: */
77         return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
78 }
79
80 /* get temperature in millidegrees */
81 int rv770_get_temp(struct radeon_device *rdev)
82 {
83         u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
84                 ASIC_T_SHIFT;
85         int actual_temp;
86
87         if (temp & 0x400)
88                 actual_temp = -256;
89         else if (temp & 0x200)
90                 actual_temp = 255;
91         else if (temp & 0x100) {
92                 actual_temp = temp & 0x1ff;
93                 actual_temp |= ~0x1ff;
94         } else
95                 actual_temp = temp & 0xff;
96
97         return (actual_temp * 1000) / 2;
98 }
99
100 void rv770_pm_misc(struct radeon_device *rdev)
101 {
102         int req_ps_idx = rdev->pm.requested_power_state_index;
103         int req_cm_idx = rdev->pm.requested_clock_mode_index;
104         struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx];
105         struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage;
106
107         if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
108                 /* 0xff01 is a flag rather then an actual voltage */
109                 if (voltage->voltage == 0xff01)
110                         return;
111                 if (voltage->voltage != rdev->pm.current_vddc) {
112                         radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
113                         rdev->pm.current_vddc = voltage->voltage;
114                         DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
115                 }
116         }
117 }
118
119 /*
120  * GART
121  */
122 int rv770_pcie_gart_enable(struct radeon_device *rdev)
123 {
124         u32 tmp;
125         int r, i;
126
127         if (rdev->gart.table.vram.robj == NULL) {
128                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
129                 return -EINVAL;
130         }
131         r = radeon_gart_table_vram_pin(rdev);
132         if (r)
133                 return r;
134         radeon_gart_restore(rdev);
135         /* Setup L2 cache */
136         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
137                                 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
138                                 EFFECTIVE_L2_QUEUE_SIZE(7));
139         WREG32(VM_L2_CNTL2, 0);
140         WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
141         /* Setup TLB control */
142         tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
143                 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
144                 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
145                 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
146         WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
147         WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
148         WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
149         WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
150         WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
151         WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
152         WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
153         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
154         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
155         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
156         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
157                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
158         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
159                         (u32)(rdev->dummy_page.addr >> 12));
160         for (i = 1; i < 7; i++)
161                 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
162
163         r600_pcie_gart_tlb_flush(rdev);
164         rdev->gart.ready = true;
165         return 0;
166 }
167
168 void rv770_pcie_gart_disable(struct radeon_device *rdev)
169 {
170         u32 tmp;
171         int i, r;
172
173         /* Disable all tables */
174         for (i = 0; i < 7; i++)
175                 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
176
177         /* Setup L2 cache */
178         WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
179                                 EFFECTIVE_L2_QUEUE_SIZE(7));
180         WREG32(VM_L2_CNTL2, 0);
181         WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
182         /* Setup TLB control */
183         tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
184         WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
185         WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
186         WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
187         WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
188         WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
189         WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
190         WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
191         if (rdev->gart.table.vram.robj) {
192                 r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
193                 if (likely(r == 0)) {
194                         radeon_bo_kunmap(rdev->gart.table.vram.robj);
195                         radeon_bo_unpin(rdev->gart.table.vram.robj);
196                         radeon_bo_unreserve(rdev->gart.table.vram.robj);
197                 }
198         }
199 }
200
201 void rv770_pcie_gart_fini(struct radeon_device *rdev)
202 {
203         radeon_gart_fini(rdev);
204         rv770_pcie_gart_disable(rdev);
205         radeon_gart_table_vram_free(rdev);
206 }
207
208
209 void rv770_agp_enable(struct radeon_device *rdev)
210 {
211         u32 tmp;
212         int i;
213
214         /* Setup L2 cache */
215         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
216                                 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
217                                 EFFECTIVE_L2_QUEUE_SIZE(7));
218         WREG32(VM_L2_CNTL2, 0);
219         WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
220         /* Setup TLB control */
221         tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
222                 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
223                 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
224                 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
225         WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
226         WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
227         WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
228         WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
229         WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
230         WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
231         WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
232         for (i = 0; i < 7; i++)
233                 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
234 }
235
236 static void rv770_mc_program(struct radeon_device *rdev)
237 {
238         struct rv515_mc_save save;
239         u32 tmp;
240         int i, j;
241
242         /* Initialize HDP */
243         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
244                 WREG32((0x2c14 + j), 0x00000000);
245                 WREG32((0x2c18 + j), 0x00000000);
246                 WREG32((0x2c1c + j), 0x00000000);
247                 WREG32((0x2c20 + j), 0x00000000);
248                 WREG32((0x2c24 + j), 0x00000000);
249         }
250         /* r7xx hw bug.  Read from HDP_DEBUG1 rather
251          * than writing to HDP_REG_COHERENCY_FLUSH_CNTL
252          */
253         tmp = RREG32(HDP_DEBUG1);
254
255         rv515_mc_stop(rdev, &save);
256         if (r600_mc_wait_for_idle(rdev)) {
257                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
258         }
259         /* Lockout access through VGA aperture*/
260         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
261         /* Update configuration */
262         if (rdev->flags & RADEON_IS_AGP) {
263                 if (rdev->mc.vram_start < rdev->mc.gtt_start) {
264                         /* VRAM before AGP */
265                         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
266                                 rdev->mc.vram_start >> 12);
267                         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
268                                 rdev->mc.gtt_end >> 12);
269                 } else {
270                         /* VRAM after AGP */
271                         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
272                                 rdev->mc.gtt_start >> 12);
273                         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
274                                 rdev->mc.vram_end >> 12);
275                 }
276         } else {
277                 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
278                         rdev->mc.vram_start >> 12);
279                 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
280                         rdev->mc.vram_end >> 12);
281         }
282         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
283         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
284         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
285         WREG32(MC_VM_FB_LOCATION, tmp);
286         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
287         WREG32(HDP_NONSURFACE_INFO, (2 << 7));
288         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
289         if (rdev->flags & RADEON_IS_AGP) {
290                 WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
291                 WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
292                 WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
293         } else {
294                 WREG32(MC_VM_AGP_BASE, 0);
295                 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
296                 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
297         }
298         if (r600_mc_wait_for_idle(rdev)) {
299                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
300         }
301         rv515_mc_resume(rdev, &save);
302         /* we need to own VRAM, so turn off the VGA renderer here
303          * to stop it overwriting our objects */
304         rv515_vga_render_disable(rdev);
305 }
306
307
308 /*
309  * CP.
310  */
311 void r700_cp_stop(struct radeon_device *rdev)
312 {
313         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
314         WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
315         WREG32(SCRATCH_UMSK, 0);
316 }
317
318 static int rv770_cp_load_microcode(struct radeon_device *rdev)
319 {
320         const __be32 *fw_data;
321         int i;
322
323         if (!rdev->me_fw || !rdev->pfp_fw)
324                 return -EINVAL;
325
326         r700_cp_stop(rdev);
327         WREG32(CP_RB_CNTL,
328 #ifdef __BIG_ENDIAN
329                BUF_SWAP_32BIT |
330 #endif
331                RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
332
333         /* Reset cp */
334         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
335         RREG32(GRBM_SOFT_RESET);
336         mdelay(15);
337         WREG32(GRBM_SOFT_RESET, 0);
338
339         fw_data = (const __be32 *)rdev->pfp_fw->data;
340         WREG32(CP_PFP_UCODE_ADDR, 0);
341         for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
342                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
343         WREG32(CP_PFP_UCODE_ADDR, 0);
344
345         fw_data = (const __be32 *)rdev->me_fw->data;
346         WREG32(CP_ME_RAM_WADDR, 0);
347         for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
348                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
349
350         WREG32(CP_PFP_UCODE_ADDR, 0);
351         WREG32(CP_ME_RAM_WADDR, 0);
352         WREG32(CP_ME_RAM_RADDR, 0);
353         return 0;
354 }
355
356 void r700_cp_fini(struct radeon_device *rdev)
357 {
358         r700_cp_stop(rdev);
359         radeon_ring_fini(rdev);
360 }
361
362 /*
363  * Core functions
364  */
365 static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
366                                              u32 num_tile_pipes,
367                                              u32 num_backends,
368                                              u32 backend_disable_mask)
369 {
370         u32 backend_map = 0;
371         u32 enabled_backends_mask;
372         u32 enabled_backends_count;
373         u32 cur_pipe;
374         u32 swizzle_pipe[R7XX_MAX_PIPES];
375         u32 cur_backend;
376         u32 i;
377         bool force_no_swizzle;
378
379         if (num_tile_pipes > R7XX_MAX_PIPES)
380                 num_tile_pipes = R7XX_MAX_PIPES;
381         if (num_tile_pipes < 1)
382                 num_tile_pipes = 1;
383         if (num_backends > R7XX_MAX_BACKENDS)
384                 num_backends = R7XX_MAX_BACKENDS;
385         if (num_backends < 1)
386                 num_backends = 1;
387
388         enabled_backends_mask = 0;
389         enabled_backends_count = 0;
390         for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
391                 if (((backend_disable_mask >> i) & 1) == 0) {
392                         enabled_backends_mask |= (1 << i);
393                         ++enabled_backends_count;
394                 }
395                 if (enabled_backends_count == num_backends)
396                         break;
397         }
398
399         if (enabled_backends_count == 0) {
400                 enabled_backends_mask = 1;
401                 enabled_backends_count = 1;
402         }
403
404         if (enabled_backends_count != num_backends)
405                 num_backends = enabled_backends_count;
406
407         switch (rdev->family) {
408         case CHIP_RV770:
409         case CHIP_RV730:
410                 force_no_swizzle = false;
411                 break;
412         case CHIP_RV710:
413         case CHIP_RV740:
414         default:
415                 force_no_swizzle = true;
416                 break;
417         }
418
419         memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
420         switch (num_tile_pipes) {
421         case 1:
422                 swizzle_pipe[0] = 0;
423                 break;
424         case 2:
425                 swizzle_pipe[0] = 0;
426                 swizzle_pipe[1] = 1;
427                 break;
428         case 3:
429                 if (force_no_swizzle) {
430                         swizzle_pipe[0] = 0;
431                         swizzle_pipe[1] = 1;
432                         swizzle_pipe[2] = 2;
433                 } else {
434                         swizzle_pipe[0] = 0;
435                         swizzle_pipe[1] = 2;
436                         swizzle_pipe[2] = 1;
437                 }
438                 break;
439         case 4:
440                 if (force_no_swizzle) {
441                         swizzle_pipe[0] = 0;
442                         swizzle_pipe[1] = 1;
443                         swizzle_pipe[2] = 2;
444                         swizzle_pipe[3] = 3;
445                 } else {
446                         swizzle_pipe[0] = 0;
447                         swizzle_pipe[1] = 2;
448                         swizzle_pipe[2] = 3;
449                         swizzle_pipe[3] = 1;
450                 }
451                 break;
452         case 5:
453                 if (force_no_swizzle) {
454                         swizzle_pipe[0] = 0;
455                         swizzle_pipe[1] = 1;
456                         swizzle_pipe[2] = 2;
457                         swizzle_pipe[3] = 3;
458                         swizzle_pipe[4] = 4;
459                 } else {
460                         swizzle_pipe[0] = 0;
461                         swizzle_pipe[1] = 2;
462                         swizzle_pipe[2] = 4;
463                         swizzle_pipe[3] = 1;
464                         swizzle_pipe[4] = 3;
465                 }
466                 break;
467         case 6:
468                 if (force_no_swizzle) {
469                         swizzle_pipe[0] = 0;
470                         swizzle_pipe[1] = 1;
471                         swizzle_pipe[2] = 2;
472                         swizzle_pipe[3] = 3;
473                         swizzle_pipe[4] = 4;
474                         swizzle_pipe[5] = 5;
475                 } else {
476                         swizzle_pipe[0] = 0;
477                         swizzle_pipe[1] = 2;
478                         swizzle_pipe[2] = 4;
479                         swizzle_pipe[3] = 5;
480                         swizzle_pipe[4] = 3;
481                         swizzle_pipe[5] = 1;
482                 }
483                 break;
484         case 7:
485                 if (force_no_swizzle) {
486                         swizzle_pipe[0] = 0;
487                         swizzle_pipe[1] = 1;
488                         swizzle_pipe[2] = 2;
489                         swizzle_pipe[3] = 3;
490                         swizzle_pipe[4] = 4;
491                         swizzle_pipe[5] = 5;
492                         swizzle_pipe[6] = 6;
493                 } else {
494                         swizzle_pipe[0] = 0;
495                         swizzle_pipe[1] = 2;
496                         swizzle_pipe[2] = 4;
497                         swizzle_pipe[3] = 6;
498                         swizzle_pipe[4] = 3;
499                         swizzle_pipe[5] = 1;
500                         swizzle_pipe[6] = 5;
501                 }
502                 break;
503         case 8:
504                 if (force_no_swizzle) {
505                         swizzle_pipe[0] = 0;
506                         swizzle_pipe[1] = 1;
507                         swizzle_pipe[2] = 2;
508                         swizzle_pipe[3] = 3;
509                         swizzle_pipe[4] = 4;
510                         swizzle_pipe[5] = 5;
511                         swizzle_pipe[6] = 6;
512                         swizzle_pipe[7] = 7;
513                 } else {
514                         swizzle_pipe[0] = 0;
515                         swizzle_pipe[1] = 2;
516                         swizzle_pipe[2] = 4;
517                         swizzle_pipe[3] = 6;
518                         swizzle_pipe[4] = 3;
519                         swizzle_pipe[5] = 1;
520                         swizzle_pipe[6] = 7;
521                         swizzle_pipe[7] = 5;
522                 }
523                 break;
524         }
525
526         cur_backend = 0;
527         for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
528                 while (((1 << cur_backend) & enabled_backends_mask) == 0)
529                         cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
530
531                 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
532
533                 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
534         }
535
536         return backend_map;
537 }
538
539 static void rv770_program_channel_remap(struct radeon_device *rdev)
540 {
541         u32 tcp_chan_steer, mc_shared_chremap, tmp;
542         bool force_no_swizzle;
543
544         switch (rdev->family) {
545         case CHIP_RV770:
546         case CHIP_RV730:
547                 force_no_swizzle = false;
548                 break;
549         case CHIP_RV710:
550         case CHIP_RV740:
551         default:
552                 force_no_swizzle = true;
553                 break;
554         }
555
556         tmp = RREG32(MC_SHARED_CHMAP);
557         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
558         case 0:
559         case 1:
560         default:
561                 /* default mapping */
562                 mc_shared_chremap = 0x00fac688;
563                 break;
564         case 2:
565         case 3:
566                 if (force_no_swizzle)
567                         mc_shared_chremap = 0x00fac688;
568                 else
569                         mc_shared_chremap = 0x00bbc298;
570                 break;
571         }
572
573         if (rdev->family == CHIP_RV740)
574                 tcp_chan_steer = 0x00ef2a60;
575         else
576                 tcp_chan_steer = 0x00fac688;
577
578         WREG32(TCP_CHAN_STEER, tcp_chan_steer);
579         WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
580 }
581
582 static void rv770_gpu_init(struct radeon_device *rdev)
583 {
584         int i, j, num_qd_pipes;
585         u32 ta_aux_cntl;
586         u32 sx_debug_1;
587         u32 smx_dc_ctl0;
588         u32 db_debug3;
589         u32 num_gs_verts_per_thread;
590         u32 vgt_gs_per_es;
591         u32 gs_prim_buffer_depth = 0;
592         u32 sq_ms_fifo_sizes;
593         u32 sq_config;
594         u32 sq_thread_resource_mgmt;
595         u32 hdp_host_path_cntl;
596         u32 sq_dyn_gpr_size_simd_ab_0;
597         u32 backend_map;
598         u32 gb_tiling_config = 0;
599         u32 cc_rb_backend_disable = 0;
600         u32 cc_gc_shader_pipe_config = 0;
601         u32 mc_arb_ramcfg;
602         u32 db_debug4;
603
604         /* setup chip specs */
605         switch (rdev->family) {
606         case CHIP_RV770:
607                 rdev->config.rv770.max_pipes = 4;
608                 rdev->config.rv770.max_tile_pipes = 8;
609                 rdev->config.rv770.max_simds = 10;
610                 rdev->config.rv770.max_backends = 4;
611                 rdev->config.rv770.max_gprs = 256;
612                 rdev->config.rv770.max_threads = 248;
613                 rdev->config.rv770.max_stack_entries = 512;
614                 rdev->config.rv770.max_hw_contexts = 8;
615                 rdev->config.rv770.max_gs_threads = 16 * 2;
616                 rdev->config.rv770.sx_max_export_size = 128;
617                 rdev->config.rv770.sx_max_export_pos_size = 16;
618                 rdev->config.rv770.sx_max_export_smx_size = 112;
619                 rdev->config.rv770.sq_num_cf_insts = 2;
620
621                 rdev->config.rv770.sx_num_of_sets = 7;
622                 rdev->config.rv770.sc_prim_fifo_size = 0xF9;
623                 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
624                 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
625                 break;
626         case CHIP_RV730:
627                 rdev->config.rv770.max_pipes = 2;
628                 rdev->config.rv770.max_tile_pipes = 4;
629                 rdev->config.rv770.max_simds = 8;
630                 rdev->config.rv770.max_backends = 2;
631                 rdev->config.rv770.max_gprs = 128;
632                 rdev->config.rv770.max_threads = 248;
633                 rdev->config.rv770.max_stack_entries = 256;
634                 rdev->config.rv770.max_hw_contexts = 8;
635                 rdev->config.rv770.max_gs_threads = 16 * 2;
636                 rdev->config.rv770.sx_max_export_size = 256;
637                 rdev->config.rv770.sx_max_export_pos_size = 32;
638                 rdev->config.rv770.sx_max_export_smx_size = 224;
639                 rdev->config.rv770.sq_num_cf_insts = 2;
640
641                 rdev->config.rv770.sx_num_of_sets = 7;
642                 rdev->config.rv770.sc_prim_fifo_size = 0xf9;
643                 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
644                 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
645                 if (rdev->config.rv770.sx_max_export_pos_size > 16) {
646                         rdev->config.rv770.sx_max_export_pos_size -= 16;
647                         rdev->config.rv770.sx_max_export_smx_size += 16;
648                 }
649                 break;
650         case CHIP_RV710:
651                 rdev->config.rv770.max_pipes = 2;
652                 rdev->config.rv770.max_tile_pipes = 2;
653                 rdev->config.rv770.max_simds = 2;
654                 rdev->config.rv770.max_backends = 1;
655                 rdev->config.rv770.max_gprs = 256;
656                 rdev->config.rv770.max_threads = 192;
657                 rdev->config.rv770.max_stack_entries = 256;
658                 rdev->config.rv770.max_hw_contexts = 4;
659                 rdev->config.rv770.max_gs_threads = 8 * 2;
660                 rdev->config.rv770.sx_max_export_size = 128;
661                 rdev->config.rv770.sx_max_export_pos_size = 16;
662                 rdev->config.rv770.sx_max_export_smx_size = 112;
663                 rdev->config.rv770.sq_num_cf_insts = 1;
664
665                 rdev->config.rv770.sx_num_of_sets = 7;
666                 rdev->config.rv770.sc_prim_fifo_size = 0x40;
667                 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
668                 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
669                 break;
670         case CHIP_RV740:
671                 rdev->config.rv770.max_pipes = 4;
672                 rdev->config.rv770.max_tile_pipes = 4;
673                 rdev->config.rv770.max_simds = 8;
674                 rdev->config.rv770.max_backends = 4;
675                 rdev->config.rv770.max_gprs = 256;
676                 rdev->config.rv770.max_threads = 248;
677                 rdev->config.rv770.max_stack_entries = 512;
678                 rdev->config.rv770.max_hw_contexts = 8;
679                 rdev->config.rv770.max_gs_threads = 16 * 2;
680                 rdev->config.rv770.sx_max_export_size = 256;
681                 rdev->config.rv770.sx_max_export_pos_size = 32;
682                 rdev->config.rv770.sx_max_export_smx_size = 224;
683                 rdev->config.rv770.sq_num_cf_insts = 2;
684
685                 rdev->config.rv770.sx_num_of_sets = 7;
686                 rdev->config.rv770.sc_prim_fifo_size = 0x100;
687                 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
688                 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
689
690                 if (rdev->config.rv770.sx_max_export_pos_size > 16) {
691                         rdev->config.rv770.sx_max_export_pos_size -= 16;
692                         rdev->config.rv770.sx_max_export_smx_size += 16;
693                 }
694                 break;
695         default:
696                 break;
697         }
698
699         /* Initialize HDP */
700         j = 0;
701         for (i = 0; i < 32; i++) {
702                 WREG32((0x2c14 + j), 0x00000000);
703                 WREG32((0x2c18 + j), 0x00000000);
704                 WREG32((0x2c1c + j), 0x00000000);
705                 WREG32((0x2c20 + j), 0x00000000);
706                 WREG32((0x2c24 + j), 0x00000000);
707                 j += 0x18;
708         }
709
710         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
711
712         /* setup tiling, simd, pipe config */
713         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
714
715         switch (rdev->config.rv770.max_tile_pipes) {
716         case 1:
717         default:
718                 gb_tiling_config |= PIPE_TILING(0);
719                 break;
720         case 2:
721                 gb_tiling_config |= PIPE_TILING(1);
722                 break;
723         case 4:
724                 gb_tiling_config |= PIPE_TILING(2);
725                 break;
726         case 8:
727                 gb_tiling_config |= PIPE_TILING(3);
728                 break;
729         }
730         rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
731
732         if (rdev->family == CHIP_RV770)
733                 gb_tiling_config |= BANK_TILING(1);
734         else
735                 gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
736         rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
737         gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
738         if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT)
739                 rdev->config.rv770.tiling_group_size = 512;
740         else
741                 rdev->config.rv770.tiling_group_size = 256;
742         if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
743                 gb_tiling_config |= ROW_TILING(3);
744                 gb_tiling_config |= SAMPLE_SPLIT(3);
745         } else {
746                 gb_tiling_config |=
747                         ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
748                 gb_tiling_config |=
749                         SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
750         }
751
752         gb_tiling_config |= BANK_SWAPS(1);
753
754         cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
755         cc_rb_backend_disable |=
756                 BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
757
758         cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
759         cc_gc_shader_pipe_config |=
760                 INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
761         cc_gc_shader_pipe_config |=
762                 INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
763
764         if (rdev->family == CHIP_RV740)
765                 backend_map = 0x28;
766         else
767                 backend_map = r700_get_tile_pipe_to_backend_map(rdev,
768                                                                 rdev->config.rv770.max_tile_pipes,
769                                                                 (R7XX_MAX_BACKENDS -
770                                                                  r600_count_pipe_bits((cc_rb_backend_disable &
771                                                                                        R7XX_MAX_BACKENDS_MASK) >> 16)),
772                                                                 (cc_rb_backend_disable >> 16));
773
774         rdev->config.rv770.tile_config = gb_tiling_config;
775         gb_tiling_config |= BACKEND_MAP(backend_map);
776
777         WREG32(GB_TILING_CONFIG, gb_tiling_config);
778         WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
779         WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
780
781         rv770_program_channel_remap(rdev);
782
783         WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
784         WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
785         WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
786         WREG32(CC_SYS_RB_BACKEND_DISABLE,  cc_rb_backend_disable);
787
788         WREG32(CGTS_SYS_TCC_DISABLE, 0);
789         WREG32(CGTS_TCC_DISABLE, 0);
790         WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
791         WREG32(CGTS_USER_TCC_DISABLE, 0);
792
793         num_qd_pipes =
794                 R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
795         WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
796         WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
797
798         /* set HW defaults for 3D engine */
799         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
800                                      ROQ_IB2_START(0x2b)));
801
802         WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
803
804         ta_aux_cntl = RREG32(TA_CNTL_AUX);
805         WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO);
806
807         sx_debug_1 = RREG32(SX_DEBUG_1);
808         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
809         WREG32(SX_DEBUG_1, sx_debug_1);
810
811         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
812         smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
813         smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
814         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
815
816         if (rdev->family != CHIP_RV740)
817                 WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
818                                        GS_FLUSH_CTL(4) |
819                                        ACK_FLUSH_CTL(3) |
820                                        SYNC_FLUSH_CTL));
821
822         db_debug3 = RREG32(DB_DEBUG3);
823         db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
824         switch (rdev->family) {
825         case CHIP_RV770:
826         case CHIP_RV740:
827                 db_debug3 |= DB_CLK_OFF_DELAY(0x1f);
828                 break;
829         case CHIP_RV710:
830         case CHIP_RV730:
831         default:
832                 db_debug3 |= DB_CLK_OFF_DELAY(2);
833                 break;
834         }
835         WREG32(DB_DEBUG3, db_debug3);
836
837         if (rdev->family != CHIP_RV770) {
838                 db_debug4 = RREG32(DB_DEBUG4);
839                 db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
840                 WREG32(DB_DEBUG4, db_debug4);
841         }
842
843         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
844                                         POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
845                                         SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
846
847         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
848                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
849                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
850
851         WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
852
853         WREG32(VGT_NUM_INSTANCES, 1);
854
855         WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
856
857         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
858
859         WREG32(CP_PERFMON_CNTL, 0);
860
861         sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
862                             DONE_FIFO_HIWATER(0xe0) |
863                             ALU_UPDATE_FIFO_HIWATER(0x8));
864         switch (rdev->family) {
865         case CHIP_RV770:
866         case CHIP_RV730:
867         case CHIP_RV710:
868                 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
869                 break;
870         case CHIP_RV740:
871         default:
872                 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
873                 break;
874         }
875         WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
876
877         /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
878          * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
879          */
880         sq_config = RREG32(SQ_CONFIG);
881         sq_config &= ~(PS_PRIO(3) |
882                        VS_PRIO(3) |
883                        GS_PRIO(3) |
884                        ES_PRIO(3));
885         sq_config |= (DX9_CONSTS |
886                       VC_ENABLE |
887                       EXPORT_SRC_C |
888                       PS_PRIO(0) |
889                       VS_PRIO(1) |
890                       GS_PRIO(2) |
891                       ES_PRIO(3));
892         if (rdev->family == CHIP_RV710)
893                 /* no vertex cache */
894                 sq_config &= ~VC_ENABLE;
895
896         WREG32(SQ_CONFIG, sq_config);
897
898         WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
899                                          NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
900                                          NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
901
902         WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
903                                          NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
904
905         sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
906                                    NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
907                                    NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
908         if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
909                 sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
910         else
911                 sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
912         WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
913
914         WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
915                                                      NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
916
917         WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
918                                                      NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
919
920         sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
921                                      SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
922                                      SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
923                                      SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
924
925         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
926         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
927         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
928         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
929         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
930         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
931         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
932         WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
933
934         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
935                                           FORCE_EOV_MAX_REZ_CNT(255)));
936
937         if (rdev->family == CHIP_RV710)
938                 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
939                                                 AUTO_INVLD_EN(ES_AND_GS_AUTO)));
940         else
941                 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
942                                                 AUTO_INVLD_EN(ES_AND_GS_AUTO)));
943
944         switch (rdev->family) {
945         case CHIP_RV770:
946         case CHIP_RV730:
947         case CHIP_RV740:
948                 gs_prim_buffer_depth = 384;
949                 break;
950         case CHIP_RV710:
951                 gs_prim_buffer_depth = 128;
952                 break;
953         default:
954                 break;
955         }
956
957         num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
958         vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
959         /* Max value for this is 256 */
960         if (vgt_gs_per_es > 256)
961                 vgt_gs_per_es = 256;
962
963         WREG32(VGT_ES_PER_GS, 128);
964         WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
965         WREG32(VGT_GS_PER_VS, 2);
966
967         /* more default values. 2D/3D driver should adjust as needed */
968         WREG32(VGT_GS_VERTEX_REUSE, 16);
969         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
970         WREG32(VGT_STRMOUT_EN, 0);
971         WREG32(SX_MISC, 0);
972         WREG32(PA_SC_MODE_CNTL, 0);
973         WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
974         WREG32(PA_SC_AA_CONFIG, 0);
975         WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
976         WREG32(PA_SC_LINE_STIPPLE, 0);
977         WREG32(SPI_INPUT_Z, 0);
978         WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
979         WREG32(CB_COLOR7_FRAG, 0);
980
981         /* clear render buffer base addresses */
982         WREG32(CB_COLOR0_BASE, 0);
983         WREG32(CB_COLOR1_BASE, 0);
984         WREG32(CB_COLOR2_BASE, 0);
985         WREG32(CB_COLOR3_BASE, 0);
986         WREG32(CB_COLOR4_BASE, 0);
987         WREG32(CB_COLOR5_BASE, 0);
988         WREG32(CB_COLOR6_BASE, 0);
989         WREG32(CB_COLOR7_BASE, 0);
990
991         WREG32(TCP_CNTL, 0);
992
993         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
994         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
995
996         WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
997
998         WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
999                                           NUM_CLIP_SEQ(3)));
1000
1001 }
1002
1003 static int rv770_vram_scratch_init(struct radeon_device *rdev)
1004 {
1005         int r;
1006         u64 gpu_addr;
1007
1008         if (rdev->vram_scratch.robj == NULL) {
1009                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE,
1010                                      PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
1011                                      &rdev->vram_scratch.robj);
1012                 if (r) {
1013                         return r;
1014                 }
1015         }
1016
1017         r = radeon_bo_reserve(rdev->vram_scratch.robj, false);
1018         if (unlikely(r != 0))
1019                 return r;
1020         r = radeon_bo_pin(rdev->vram_scratch.robj,
1021                           RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
1022         if (r) {
1023                 radeon_bo_unreserve(rdev->vram_scratch.robj);
1024                 return r;
1025         }
1026         r = radeon_bo_kmap(rdev->vram_scratch.robj,
1027                                 (void **)&rdev->vram_scratch.ptr);
1028         if (r)
1029                 radeon_bo_unpin(rdev->vram_scratch.robj);
1030         radeon_bo_unreserve(rdev->vram_scratch.robj);
1031
1032         return r;
1033 }
1034
1035 static void rv770_vram_scratch_fini(struct radeon_device *rdev)
1036 {
1037         int r;
1038
1039         if (rdev->vram_scratch.robj == NULL) {
1040                 return;
1041         }
1042         r = radeon_bo_reserve(rdev->vram_scratch.robj, false);
1043         if (likely(r == 0)) {
1044                 radeon_bo_kunmap(rdev->vram_scratch.robj);
1045                 radeon_bo_unpin(rdev->vram_scratch.robj);
1046                 radeon_bo_unreserve(rdev->vram_scratch.robj);
1047         }
1048         radeon_bo_unref(&rdev->vram_scratch.robj);
1049 }
1050
1051 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
1052 {
1053         u64 size_bf, size_af;
1054
1055         if (mc->mc_vram_size > 0xE0000000) {
1056                 /* leave room for at least 512M GTT */
1057                 dev_warn(rdev->dev, "limiting VRAM\n");
1058                 mc->real_vram_size = 0xE0000000;
1059                 mc->mc_vram_size = 0xE0000000;
1060         }
1061         if (rdev->flags & RADEON_IS_AGP) {
1062                 size_bf = mc->gtt_start;
1063                 size_af = 0xFFFFFFFF - mc->gtt_end + 1;
1064                 if (size_bf > size_af) {
1065                         if (mc->mc_vram_size > size_bf) {
1066                                 dev_warn(rdev->dev, "limiting VRAM\n");
1067                                 mc->real_vram_size = size_bf;
1068                                 mc->mc_vram_size = size_bf;
1069                         }
1070                         mc->vram_start = mc->gtt_start - mc->mc_vram_size;
1071                 } else {
1072                         if (mc->mc_vram_size > size_af) {
1073                                 dev_warn(rdev->dev, "limiting VRAM\n");
1074                                 mc->real_vram_size = size_af;
1075                                 mc->mc_vram_size = size_af;
1076                         }
1077                         mc->vram_start = mc->gtt_end;
1078                 }
1079                 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
1080                 dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
1081                                 mc->mc_vram_size >> 20, mc->vram_start,
1082                                 mc->vram_end, mc->real_vram_size >> 20);
1083         } else {
1084                 radeon_vram_location(rdev, &rdev->mc, 0);
1085                 rdev->mc.gtt_base_align = 0;
1086                 radeon_gtt_location(rdev, mc);
1087         }
1088 }
1089
1090 int rv770_mc_init(struct radeon_device *rdev)
1091 {
1092         u32 tmp;
1093         int chansize, numchan;
1094
1095         /* Get VRAM informations */
1096         rdev->mc.vram_is_ddr = true;
1097         tmp = RREG32(MC_ARB_RAMCFG);
1098         if (tmp & CHANSIZE_OVERRIDE) {
1099                 chansize = 16;
1100         } else if (tmp & CHANSIZE_MASK) {
1101                 chansize = 64;
1102         } else {
1103                 chansize = 32;
1104         }
1105         tmp = RREG32(MC_SHARED_CHMAP);
1106         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1107         case 0:
1108         default:
1109                 numchan = 1;
1110                 break;
1111         case 1:
1112                 numchan = 2;
1113                 break;
1114         case 2:
1115                 numchan = 4;
1116                 break;
1117         case 3:
1118                 numchan = 8;
1119                 break;
1120         }
1121         rdev->mc.vram_width = numchan * chansize;
1122         /* Could aper size report 0 ? */
1123         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
1124         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1125         /* Setup GPU memory space */
1126         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1127         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1128         rdev->mc.visible_vram_size = rdev->mc.aper_size;
1129         r700_vram_gtt_location(rdev, &rdev->mc);
1130         radeon_update_bandwidth_info(rdev);
1131
1132         return 0;
1133 }
1134
1135 static int rv770_startup(struct radeon_device *rdev)
1136 {
1137         int r;
1138
1139         /* enable pcie gen2 link */
1140         rv770_pcie_gen2_enable(rdev);
1141
1142         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1143                 r = r600_init_microcode(rdev);
1144                 if (r) {
1145                         DRM_ERROR("Failed to load firmware!\n");
1146                         return r;
1147                 }
1148         }
1149
1150         rv770_mc_program(rdev);
1151         if (rdev->flags & RADEON_IS_AGP) {
1152                 rv770_agp_enable(rdev);
1153         } else {
1154                 r = rv770_pcie_gart_enable(rdev);
1155                 if (r)
1156                         return r;
1157         }
1158         r = rv770_vram_scratch_init(rdev);
1159         if (r)
1160                 return r;
1161         rv770_gpu_init(rdev);
1162         r = r600_blit_init(rdev);
1163         if (r) {
1164                 r600_blit_fini(rdev);
1165                 rdev->asic->copy = NULL;
1166                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1167         }
1168
1169         /* allocate wb buffer */
1170         r = radeon_wb_init(rdev);
1171         if (r)
1172                 return r;
1173
1174         /* Enable IRQ */
1175         r = r600_irq_init(rdev);
1176         if (r) {
1177                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
1178                 radeon_irq_kms_fini(rdev);
1179                 return r;
1180         }
1181         r600_irq_set(rdev);
1182
1183         r = radeon_ring_init(rdev, rdev->cp.ring_size);
1184         if (r)
1185                 return r;
1186         r = rv770_cp_load_microcode(rdev);
1187         if (r)
1188                 return r;
1189         r = r600_cp_resume(rdev);
1190         if (r)
1191                 return r;
1192
1193         return 0;
1194 }
1195
1196 int rv770_resume(struct radeon_device *rdev)
1197 {
1198         int r;
1199
1200         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1201          * posting will perform necessary task to bring back GPU into good
1202          * shape.
1203          */
1204         /* post card */
1205         atom_asic_init(rdev->mode_info.atom_context);
1206
1207         r = rv770_startup(rdev);
1208         if (r) {
1209                 DRM_ERROR("r600 startup failed on resume\n");
1210                 return r;
1211         }
1212
1213         r = r600_ib_test(rdev);
1214         if (r) {
1215                 DRM_ERROR("radeon: failed testing IB (%d).\n", r);
1216                 return r;
1217         }
1218
1219         r = r600_audio_init(rdev);
1220         if (r) {
1221                 dev_err(rdev->dev, "radeon: audio init failed\n");
1222                 return r;
1223         }
1224
1225         return r;
1226
1227 }
1228
1229 int rv770_suspend(struct radeon_device *rdev)
1230 {
1231         int r;
1232
1233         r600_audio_fini(rdev);
1234         /* FIXME: we should wait for ring to be empty */
1235         r700_cp_stop(rdev);
1236         rdev->cp.ready = false;
1237         r600_irq_suspend(rdev);
1238         radeon_wb_disable(rdev);
1239         rv770_pcie_gart_disable(rdev);
1240         /* unpin shaders bo */
1241         if (rdev->r600_blit.shader_obj) {
1242                 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
1243                 if (likely(r == 0)) {
1244                         radeon_bo_unpin(rdev->r600_blit.shader_obj);
1245                         radeon_bo_unreserve(rdev->r600_blit.shader_obj);
1246                 }
1247         }
1248         return 0;
1249 }
1250
1251 /* Plan is to move initialization in that function and use
1252  * helper function so that radeon_device_init pretty much
1253  * do nothing more than calling asic specific function. This
1254  * should also allow to remove a bunch of callback function
1255  * like vram_info.
1256  */
1257 int rv770_init(struct radeon_device *rdev)
1258 {
1259         int r;
1260
1261         /* This don't do much */
1262         r = radeon_gem_init(rdev);
1263         if (r)
1264                 return r;
1265         /* Read BIOS */
1266         if (!radeon_get_bios(rdev)) {
1267                 if (ASIC_IS_AVIVO(rdev))
1268                         return -EINVAL;
1269         }
1270         /* Must be an ATOMBIOS */
1271         if (!rdev->is_atom_bios) {
1272                 dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
1273                 return -EINVAL;
1274         }
1275         r = radeon_atombios_init(rdev);
1276         if (r)
1277                 return r;
1278         /* Post card if necessary */
1279         if (!radeon_card_posted(rdev)) {
1280                 if (!rdev->bios) {
1281                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1282                         return -EINVAL;
1283                 }
1284                 DRM_INFO("GPU not posted. posting now...\n");
1285                 atom_asic_init(rdev->mode_info.atom_context);
1286         }
1287         /* Initialize scratch registers */
1288         r600_scratch_init(rdev);
1289         /* Initialize surface registers */
1290         radeon_surface_init(rdev);
1291         /* Initialize clocks */
1292         radeon_get_clock_info(rdev->ddev);
1293         /* Fence driver */
1294         r = radeon_fence_driver_init(rdev);
1295         if (r)
1296                 return r;
1297         /* initialize AGP */
1298         if (rdev->flags & RADEON_IS_AGP) {
1299                 r = radeon_agp_init(rdev);
1300                 if (r)
1301                         radeon_agp_disable(rdev);
1302         }
1303         r = rv770_mc_init(rdev);
1304         if (r)
1305                 return r;
1306         /* Memory manager */
1307         r = radeon_bo_init(rdev);
1308         if (r)
1309                 return r;
1310
1311         r = radeon_irq_kms_init(rdev);
1312         if (r)
1313                 return r;
1314
1315         rdev->cp.ring_obj = NULL;
1316         r600_ring_init(rdev, 1024 * 1024);
1317
1318         rdev->ih.ring_obj = NULL;
1319         r600_ih_ring_init(rdev, 64 * 1024);
1320
1321         r = r600_pcie_gart_init(rdev);
1322         if (r)
1323                 return r;
1324
1325         rdev->accel_working = true;
1326         r = rv770_startup(rdev);
1327         if (r) {
1328                 dev_err(rdev->dev, "disabling GPU acceleration\n");
1329                 r700_cp_fini(rdev);
1330                 r600_irq_fini(rdev);
1331                 radeon_wb_fini(rdev);
1332                 radeon_irq_kms_fini(rdev);
1333                 rv770_pcie_gart_fini(rdev);
1334                 rdev->accel_working = false;
1335         }
1336         if (rdev->accel_working) {
1337                 r = radeon_ib_pool_init(rdev);
1338                 if (r) {
1339                         dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
1340                         rdev->accel_working = false;
1341                 } else {
1342                         r = r600_ib_test(rdev);
1343                         if (r) {
1344                                 dev_err(rdev->dev, "IB test failed (%d).\n", r);
1345                                 rdev->accel_working = false;
1346                         }
1347                 }
1348         }
1349
1350         r = r600_audio_init(rdev);
1351         if (r) {
1352                 dev_err(rdev->dev, "radeon: audio init failed\n");
1353                 return r;
1354         }
1355
1356         return 0;
1357 }
1358
1359 void rv770_fini(struct radeon_device *rdev)
1360 {
1361         r600_blit_fini(rdev);
1362         r700_cp_fini(rdev);
1363         r600_irq_fini(rdev);
1364         radeon_wb_fini(rdev);
1365         radeon_irq_kms_fini(rdev);
1366         rv770_pcie_gart_fini(rdev);
1367         rv770_vram_scratch_fini(rdev);
1368         radeon_gem_fini(rdev);
1369         radeon_fence_driver_fini(rdev);
1370         radeon_agp_fini(rdev);
1371         radeon_bo_fini(rdev);
1372         radeon_atombios_fini(rdev);
1373         kfree(rdev->bios);
1374         rdev->bios = NULL;
1375 }
1376
1377 static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
1378 {
1379         u32 link_width_cntl, lanes, speed_cntl, tmp;
1380         u16 link_cntl2;
1381
1382         if (radeon_pcie_gen2 == 0)
1383                 return;
1384
1385         if (rdev->flags & RADEON_IS_IGP)
1386                 return;
1387
1388         if (!(rdev->flags & RADEON_IS_PCIE))
1389                 return;
1390
1391         /* x2 cards have a special sequence */
1392         if (ASIC_IS_X2(rdev))
1393                 return;
1394
1395         /* advertise upconfig capability */
1396         link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1397         link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1398         WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1399         link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1400         if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
1401                 lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
1402                 link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
1403                                      LC_RECONFIG_ARC_MISSING_ESCAPE);
1404                 link_width_cntl |= lanes | LC_RECONFIG_NOW |
1405                         LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT;
1406                 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1407         } else {
1408                 link_width_cntl |= LC_UPCONFIGURE_DIS;
1409                 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1410         }
1411
1412         speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1413         if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
1414             (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
1415
1416                 tmp = RREG32(0x541c);
1417                 WREG32(0x541c, tmp | 0x8);
1418                 WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN);
1419                 link_cntl2 = RREG16(0x4088);
1420                 link_cntl2 &= ~TARGET_LINK_SPEED_MASK;
1421                 link_cntl2 |= 0x2;
1422                 WREG16(0x4088, link_cntl2);
1423                 WREG32(MM_CFGREGS_CNTL, 0);
1424
1425                 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1426                 speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
1427                 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1428
1429                 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1430                 speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
1431                 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1432
1433                 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1434                 speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
1435                 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1436
1437                 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1438                 speed_cntl |= LC_GEN2_EN_STRAP;
1439                 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1440
1441         } else {
1442                 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1443                 /* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
1444                 if (1)
1445                         link_width_cntl |= LC_UPCONFIGURE_DIS;
1446                 else
1447                         link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1448                 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1449         }
1450 }