2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 static void cik_rlc_stop(struct radeon_device *rdev);
71 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
72 static void cik_program_aspm(struct radeon_device *rdev);
73 static void cik_init_pg(struct radeon_device *rdev);
74 static void cik_init_cg(struct radeon_device *rdev);
75 static void cik_fini_pg(struct radeon_device *rdev);
76 static void cik_fini_cg(struct radeon_device *rdev);
77 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
80 /* get temperature in millidegrees */
81 int ci_get_temp(struct radeon_device *rdev)
86 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92 actual_temp = temp & 0x1ff;
94 actual_temp = actual_temp * 1000;
99 /* get temperature in millidegrees */
100 int kv_get_temp(struct radeon_device *rdev)
105 temp = RREG32_SMC(0xC0300E0C);
108 actual_temp = (temp / 8) - 49;
112 actual_temp = actual_temp * 1000;
118 * Indirect registers accessor
120 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
125 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
126 WREG32(PCIE_INDEX, reg);
127 (void)RREG32(PCIE_INDEX);
128 r = RREG32(PCIE_DATA);
129 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
133 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
137 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
138 WREG32(PCIE_INDEX, reg);
139 (void)RREG32(PCIE_INDEX);
140 WREG32(PCIE_DATA, v);
141 (void)RREG32(PCIE_DATA);
142 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
145 static const u32 spectre_rlc_save_restore_register_list[] =
147 (0x0e00 << 16) | (0xc12c >> 2),
149 (0x0e00 << 16) | (0xc140 >> 2),
151 (0x0e00 << 16) | (0xc150 >> 2),
153 (0x0e00 << 16) | (0xc15c >> 2),
155 (0x0e00 << 16) | (0xc168 >> 2),
157 (0x0e00 << 16) | (0xc170 >> 2),
159 (0x0e00 << 16) | (0xc178 >> 2),
161 (0x0e00 << 16) | (0xc204 >> 2),
163 (0x0e00 << 16) | (0xc2b4 >> 2),
165 (0x0e00 << 16) | (0xc2b8 >> 2),
167 (0x0e00 << 16) | (0xc2bc >> 2),
169 (0x0e00 << 16) | (0xc2c0 >> 2),
171 (0x0e00 << 16) | (0x8228 >> 2),
173 (0x0e00 << 16) | (0x829c >> 2),
175 (0x0e00 << 16) | (0x869c >> 2),
177 (0x0600 << 16) | (0x98f4 >> 2),
179 (0x0e00 << 16) | (0x98f8 >> 2),
181 (0x0e00 << 16) | (0x9900 >> 2),
183 (0x0e00 << 16) | (0xc260 >> 2),
185 (0x0e00 << 16) | (0x90e8 >> 2),
187 (0x0e00 << 16) | (0x3c000 >> 2),
189 (0x0e00 << 16) | (0x3c00c >> 2),
191 (0x0e00 << 16) | (0x8c1c >> 2),
193 (0x0e00 << 16) | (0x9700 >> 2),
195 (0x0e00 << 16) | (0xcd20 >> 2),
197 (0x4e00 << 16) | (0xcd20 >> 2),
199 (0x5e00 << 16) | (0xcd20 >> 2),
201 (0x6e00 << 16) | (0xcd20 >> 2),
203 (0x7e00 << 16) | (0xcd20 >> 2),
205 (0x8e00 << 16) | (0xcd20 >> 2),
207 (0x9e00 << 16) | (0xcd20 >> 2),
209 (0xae00 << 16) | (0xcd20 >> 2),
211 (0xbe00 << 16) | (0xcd20 >> 2),
213 (0x0e00 << 16) | (0x89bc >> 2),
215 (0x0e00 << 16) | (0x8900 >> 2),
218 (0x0e00 << 16) | (0xc130 >> 2),
220 (0x0e00 << 16) | (0xc134 >> 2),
222 (0x0e00 << 16) | (0xc1fc >> 2),
224 (0x0e00 << 16) | (0xc208 >> 2),
226 (0x0e00 << 16) | (0xc264 >> 2),
228 (0x0e00 << 16) | (0xc268 >> 2),
230 (0x0e00 << 16) | (0xc26c >> 2),
232 (0x0e00 << 16) | (0xc270 >> 2),
234 (0x0e00 << 16) | (0xc274 >> 2),
236 (0x0e00 << 16) | (0xc278 >> 2),
238 (0x0e00 << 16) | (0xc27c >> 2),
240 (0x0e00 << 16) | (0xc280 >> 2),
242 (0x0e00 << 16) | (0xc284 >> 2),
244 (0x0e00 << 16) | (0xc288 >> 2),
246 (0x0e00 << 16) | (0xc28c >> 2),
248 (0x0e00 << 16) | (0xc290 >> 2),
250 (0x0e00 << 16) | (0xc294 >> 2),
252 (0x0e00 << 16) | (0xc298 >> 2),
254 (0x0e00 << 16) | (0xc29c >> 2),
256 (0x0e00 << 16) | (0xc2a0 >> 2),
258 (0x0e00 << 16) | (0xc2a4 >> 2),
260 (0x0e00 << 16) | (0xc2a8 >> 2),
262 (0x0e00 << 16) | (0xc2ac >> 2),
264 (0x0e00 << 16) | (0xc2b0 >> 2),
266 (0x0e00 << 16) | (0x301d0 >> 2),
268 (0x0e00 << 16) | (0x30238 >> 2),
270 (0x0e00 << 16) | (0x30250 >> 2),
272 (0x0e00 << 16) | (0x30254 >> 2),
274 (0x0e00 << 16) | (0x30258 >> 2),
276 (0x0e00 << 16) | (0x3025c >> 2),
278 (0x4e00 << 16) | (0xc900 >> 2),
280 (0x5e00 << 16) | (0xc900 >> 2),
282 (0x6e00 << 16) | (0xc900 >> 2),
284 (0x7e00 << 16) | (0xc900 >> 2),
286 (0x8e00 << 16) | (0xc900 >> 2),
288 (0x9e00 << 16) | (0xc900 >> 2),
290 (0xae00 << 16) | (0xc900 >> 2),
292 (0xbe00 << 16) | (0xc900 >> 2),
294 (0x4e00 << 16) | (0xc904 >> 2),
296 (0x5e00 << 16) | (0xc904 >> 2),
298 (0x6e00 << 16) | (0xc904 >> 2),
300 (0x7e00 << 16) | (0xc904 >> 2),
302 (0x8e00 << 16) | (0xc904 >> 2),
304 (0x9e00 << 16) | (0xc904 >> 2),
306 (0xae00 << 16) | (0xc904 >> 2),
308 (0xbe00 << 16) | (0xc904 >> 2),
310 (0x4e00 << 16) | (0xc908 >> 2),
312 (0x5e00 << 16) | (0xc908 >> 2),
314 (0x6e00 << 16) | (0xc908 >> 2),
316 (0x7e00 << 16) | (0xc908 >> 2),
318 (0x8e00 << 16) | (0xc908 >> 2),
320 (0x9e00 << 16) | (0xc908 >> 2),
322 (0xae00 << 16) | (0xc908 >> 2),
324 (0xbe00 << 16) | (0xc908 >> 2),
326 (0x4e00 << 16) | (0xc90c >> 2),
328 (0x5e00 << 16) | (0xc90c >> 2),
330 (0x6e00 << 16) | (0xc90c >> 2),
332 (0x7e00 << 16) | (0xc90c >> 2),
334 (0x8e00 << 16) | (0xc90c >> 2),
336 (0x9e00 << 16) | (0xc90c >> 2),
338 (0xae00 << 16) | (0xc90c >> 2),
340 (0xbe00 << 16) | (0xc90c >> 2),
342 (0x4e00 << 16) | (0xc910 >> 2),
344 (0x5e00 << 16) | (0xc910 >> 2),
346 (0x6e00 << 16) | (0xc910 >> 2),
348 (0x7e00 << 16) | (0xc910 >> 2),
350 (0x8e00 << 16) | (0xc910 >> 2),
352 (0x9e00 << 16) | (0xc910 >> 2),
354 (0xae00 << 16) | (0xc910 >> 2),
356 (0xbe00 << 16) | (0xc910 >> 2),
358 (0x0e00 << 16) | (0xc99c >> 2),
360 (0x0e00 << 16) | (0x9834 >> 2),
362 (0x0000 << 16) | (0x30f00 >> 2),
364 (0x0001 << 16) | (0x30f00 >> 2),
366 (0x0000 << 16) | (0x30f04 >> 2),
368 (0x0001 << 16) | (0x30f04 >> 2),
370 (0x0000 << 16) | (0x30f08 >> 2),
372 (0x0001 << 16) | (0x30f08 >> 2),
374 (0x0000 << 16) | (0x30f0c >> 2),
376 (0x0001 << 16) | (0x30f0c >> 2),
378 (0x0600 << 16) | (0x9b7c >> 2),
380 (0x0e00 << 16) | (0x8a14 >> 2),
382 (0x0e00 << 16) | (0x8a18 >> 2),
384 (0x0600 << 16) | (0x30a00 >> 2),
386 (0x0e00 << 16) | (0x8bf0 >> 2),
388 (0x0e00 << 16) | (0x8bcc >> 2),
390 (0x0e00 << 16) | (0x8b24 >> 2),
392 (0x0e00 << 16) | (0x30a04 >> 2),
394 (0x0600 << 16) | (0x30a10 >> 2),
396 (0x0600 << 16) | (0x30a14 >> 2),
398 (0x0600 << 16) | (0x30a18 >> 2),
400 (0x0600 << 16) | (0x30a2c >> 2),
402 (0x0e00 << 16) | (0xc700 >> 2),
404 (0x0e00 << 16) | (0xc704 >> 2),
406 (0x0e00 << 16) | (0xc708 >> 2),
408 (0x0e00 << 16) | (0xc768 >> 2),
410 (0x0400 << 16) | (0xc770 >> 2),
412 (0x0400 << 16) | (0xc774 >> 2),
414 (0x0400 << 16) | (0xc778 >> 2),
416 (0x0400 << 16) | (0xc77c >> 2),
418 (0x0400 << 16) | (0xc780 >> 2),
420 (0x0400 << 16) | (0xc784 >> 2),
422 (0x0400 << 16) | (0xc788 >> 2),
424 (0x0400 << 16) | (0xc78c >> 2),
426 (0x0400 << 16) | (0xc798 >> 2),
428 (0x0400 << 16) | (0xc79c >> 2),
430 (0x0400 << 16) | (0xc7a0 >> 2),
432 (0x0400 << 16) | (0xc7a4 >> 2),
434 (0x0400 << 16) | (0xc7a8 >> 2),
436 (0x0400 << 16) | (0xc7ac >> 2),
438 (0x0400 << 16) | (0xc7b0 >> 2),
440 (0x0400 << 16) | (0xc7b4 >> 2),
442 (0x0e00 << 16) | (0x9100 >> 2),
444 (0x0e00 << 16) | (0x3c010 >> 2),
446 (0x0e00 << 16) | (0x92a8 >> 2),
448 (0x0e00 << 16) | (0x92ac >> 2),
450 (0x0e00 << 16) | (0x92b4 >> 2),
452 (0x0e00 << 16) | (0x92b8 >> 2),
454 (0x0e00 << 16) | (0x92bc >> 2),
456 (0x0e00 << 16) | (0x92c0 >> 2),
458 (0x0e00 << 16) | (0x92c4 >> 2),
460 (0x0e00 << 16) | (0x92c8 >> 2),
462 (0x0e00 << 16) | (0x92cc >> 2),
464 (0x0e00 << 16) | (0x92d0 >> 2),
466 (0x0e00 << 16) | (0x8c00 >> 2),
468 (0x0e00 << 16) | (0x8c04 >> 2),
470 (0x0e00 << 16) | (0x8c20 >> 2),
472 (0x0e00 << 16) | (0x8c38 >> 2),
474 (0x0e00 << 16) | (0x8c3c >> 2),
476 (0x0e00 << 16) | (0xae00 >> 2),
478 (0x0e00 << 16) | (0x9604 >> 2),
480 (0x0e00 << 16) | (0xac08 >> 2),
482 (0x0e00 << 16) | (0xac0c >> 2),
484 (0x0e00 << 16) | (0xac10 >> 2),
486 (0x0e00 << 16) | (0xac14 >> 2),
488 (0x0e00 << 16) | (0xac58 >> 2),
490 (0x0e00 << 16) | (0xac68 >> 2),
492 (0x0e00 << 16) | (0xac6c >> 2),
494 (0x0e00 << 16) | (0xac70 >> 2),
496 (0x0e00 << 16) | (0xac74 >> 2),
498 (0x0e00 << 16) | (0xac78 >> 2),
500 (0x0e00 << 16) | (0xac7c >> 2),
502 (0x0e00 << 16) | (0xac80 >> 2),
504 (0x0e00 << 16) | (0xac84 >> 2),
506 (0x0e00 << 16) | (0xac88 >> 2),
508 (0x0e00 << 16) | (0xac8c >> 2),
510 (0x0e00 << 16) | (0x970c >> 2),
512 (0x0e00 << 16) | (0x9714 >> 2),
514 (0x0e00 << 16) | (0x9718 >> 2),
516 (0x0e00 << 16) | (0x971c >> 2),
518 (0x0e00 << 16) | (0x31068 >> 2),
520 (0x4e00 << 16) | (0x31068 >> 2),
522 (0x5e00 << 16) | (0x31068 >> 2),
524 (0x6e00 << 16) | (0x31068 >> 2),
526 (0x7e00 << 16) | (0x31068 >> 2),
528 (0x8e00 << 16) | (0x31068 >> 2),
530 (0x9e00 << 16) | (0x31068 >> 2),
532 (0xae00 << 16) | (0x31068 >> 2),
534 (0xbe00 << 16) | (0x31068 >> 2),
536 (0x0e00 << 16) | (0xcd10 >> 2),
538 (0x0e00 << 16) | (0xcd14 >> 2),
540 (0x0e00 << 16) | (0x88b0 >> 2),
542 (0x0e00 << 16) | (0x88b4 >> 2),
544 (0x0e00 << 16) | (0x88b8 >> 2),
546 (0x0e00 << 16) | (0x88bc >> 2),
548 (0x0400 << 16) | (0x89c0 >> 2),
550 (0x0e00 << 16) | (0x88c4 >> 2),
552 (0x0e00 << 16) | (0x88c8 >> 2),
554 (0x0e00 << 16) | (0x88d0 >> 2),
556 (0x0e00 << 16) | (0x88d4 >> 2),
558 (0x0e00 << 16) | (0x88d8 >> 2),
560 (0x0e00 << 16) | (0x8980 >> 2),
562 (0x0e00 << 16) | (0x30938 >> 2),
564 (0x0e00 << 16) | (0x3093c >> 2),
566 (0x0e00 << 16) | (0x30940 >> 2),
568 (0x0e00 << 16) | (0x89a0 >> 2),
570 (0x0e00 << 16) | (0x30900 >> 2),
572 (0x0e00 << 16) | (0x30904 >> 2),
574 (0x0e00 << 16) | (0x89b4 >> 2),
576 (0x0e00 << 16) | (0x3c210 >> 2),
578 (0x0e00 << 16) | (0x3c214 >> 2),
580 (0x0e00 << 16) | (0x3c218 >> 2),
582 (0x0e00 << 16) | (0x8904 >> 2),
585 (0x0e00 << 16) | (0x8c28 >> 2),
586 (0x0e00 << 16) | (0x8c2c >> 2),
587 (0x0e00 << 16) | (0x8c30 >> 2),
588 (0x0e00 << 16) | (0x8c34 >> 2),
589 (0x0e00 << 16) | (0x9600 >> 2),
592 static const u32 kalindi_rlc_save_restore_register_list[] =
594 (0x0e00 << 16) | (0xc12c >> 2),
596 (0x0e00 << 16) | (0xc140 >> 2),
598 (0x0e00 << 16) | (0xc150 >> 2),
600 (0x0e00 << 16) | (0xc15c >> 2),
602 (0x0e00 << 16) | (0xc168 >> 2),
604 (0x0e00 << 16) | (0xc170 >> 2),
606 (0x0e00 << 16) | (0xc204 >> 2),
608 (0x0e00 << 16) | (0xc2b4 >> 2),
610 (0x0e00 << 16) | (0xc2b8 >> 2),
612 (0x0e00 << 16) | (0xc2bc >> 2),
614 (0x0e00 << 16) | (0xc2c0 >> 2),
616 (0x0e00 << 16) | (0x8228 >> 2),
618 (0x0e00 << 16) | (0x829c >> 2),
620 (0x0e00 << 16) | (0x869c >> 2),
622 (0x0600 << 16) | (0x98f4 >> 2),
624 (0x0e00 << 16) | (0x98f8 >> 2),
626 (0x0e00 << 16) | (0x9900 >> 2),
628 (0x0e00 << 16) | (0xc260 >> 2),
630 (0x0e00 << 16) | (0x90e8 >> 2),
632 (0x0e00 << 16) | (0x3c000 >> 2),
634 (0x0e00 << 16) | (0x3c00c >> 2),
636 (0x0e00 << 16) | (0x8c1c >> 2),
638 (0x0e00 << 16) | (0x9700 >> 2),
640 (0x0e00 << 16) | (0xcd20 >> 2),
642 (0x4e00 << 16) | (0xcd20 >> 2),
644 (0x5e00 << 16) | (0xcd20 >> 2),
646 (0x6e00 << 16) | (0xcd20 >> 2),
648 (0x7e00 << 16) | (0xcd20 >> 2),
650 (0x0e00 << 16) | (0x89bc >> 2),
652 (0x0e00 << 16) | (0x8900 >> 2),
655 (0x0e00 << 16) | (0xc130 >> 2),
657 (0x0e00 << 16) | (0xc134 >> 2),
659 (0x0e00 << 16) | (0xc1fc >> 2),
661 (0x0e00 << 16) | (0xc208 >> 2),
663 (0x0e00 << 16) | (0xc264 >> 2),
665 (0x0e00 << 16) | (0xc268 >> 2),
667 (0x0e00 << 16) | (0xc26c >> 2),
669 (0x0e00 << 16) | (0xc270 >> 2),
671 (0x0e00 << 16) | (0xc274 >> 2),
673 (0x0e00 << 16) | (0xc28c >> 2),
675 (0x0e00 << 16) | (0xc290 >> 2),
677 (0x0e00 << 16) | (0xc294 >> 2),
679 (0x0e00 << 16) | (0xc298 >> 2),
681 (0x0e00 << 16) | (0xc2a0 >> 2),
683 (0x0e00 << 16) | (0xc2a4 >> 2),
685 (0x0e00 << 16) | (0xc2a8 >> 2),
687 (0x0e00 << 16) | (0xc2ac >> 2),
689 (0x0e00 << 16) | (0x301d0 >> 2),
691 (0x0e00 << 16) | (0x30238 >> 2),
693 (0x0e00 << 16) | (0x30250 >> 2),
695 (0x0e00 << 16) | (0x30254 >> 2),
697 (0x0e00 << 16) | (0x30258 >> 2),
699 (0x0e00 << 16) | (0x3025c >> 2),
701 (0x4e00 << 16) | (0xc900 >> 2),
703 (0x5e00 << 16) | (0xc900 >> 2),
705 (0x6e00 << 16) | (0xc900 >> 2),
707 (0x7e00 << 16) | (0xc900 >> 2),
709 (0x4e00 << 16) | (0xc904 >> 2),
711 (0x5e00 << 16) | (0xc904 >> 2),
713 (0x6e00 << 16) | (0xc904 >> 2),
715 (0x7e00 << 16) | (0xc904 >> 2),
717 (0x4e00 << 16) | (0xc908 >> 2),
719 (0x5e00 << 16) | (0xc908 >> 2),
721 (0x6e00 << 16) | (0xc908 >> 2),
723 (0x7e00 << 16) | (0xc908 >> 2),
725 (0x4e00 << 16) | (0xc90c >> 2),
727 (0x5e00 << 16) | (0xc90c >> 2),
729 (0x6e00 << 16) | (0xc90c >> 2),
731 (0x7e00 << 16) | (0xc90c >> 2),
733 (0x4e00 << 16) | (0xc910 >> 2),
735 (0x5e00 << 16) | (0xc910 >> 2),
737 (0x6e00 << 16) | (0xc910 >> 2),
739 (0x7e00 << 16) | (0xc910 >> 2),
741 (0x0e00 << 16) | (0xc99c >> 2),
743 (0x0e00 << 16) | (0x9834 >> 2),
745 (0x0000 << 16) | (0x30f00 >> 2),
747 (0x0000 << 16) | (0x30f04 >> 2),
749 (0x0000 << 16) | (0x30f08 >> 2),
751 (0x0000 << 16) | (0x30f0c >> 2),
753 (0x0600 << 16) | (0x9b7c >> 2),
755 (0x0e00 << 16) | (0x8a14 >> 2),
757 (0x0e00 << 16) | (0x8a18 >> 2),
759 (0x0600 << 16) | (0x30a00 >> 2),
761 (0x0e00 << 16) | (0x8bf0 >> 2),
763 (0x0e00 << 16) | (0x8bcc >> 2),
765 (0x0e00 << 16) | (0x8b24 >> 2),
767 (0x0e00 << 16) | (0x30a04 >> 2),
769 (0x0600 << 16) | (0x30a10 >> 2),
771 (0x0600 << 16) | (0x30a14 >> 2),
773 (0x0600 << 16) | (0x30a18 >> 2),
775 (0x0600 << 16) | (0x30a2c >> 2),
777 (0x0e00 << 16) | (0xc700 >> 2),
779 (0x0e00 << 16) | (0xc704 >> 2),
781 (0x0e00 << 16) | (0xc708 >> 2),
783 (0x0e00 << 16) | (0xc768 >> 2),
785 (0x0400 << 16) | (0xc770 >> 2),
787 (0x0400 << 16) | (0xc774 >> 2),
789 (0x0400 << 16) | (0xc798 >> 2),
791 (0x0400 << 16) | (0xc79c >> 2),
793 (0x0e00 << 16) | (0x9100 >> 2),
795 (0x0e00 << 16) | (0x3c010 >> 2),
797 (0x0e00 << 16) | (0x8c00 >> 2),
799 (0x0e00 << 16) | (0x8c04 >> 2),
801 (0x0e00 << 16) | (0x8c20 >> 2),
803 (0x0e00 << 16) | (0x8c38 >> 2),
805 (0x0e00 << 16) | (0x8c3c >> 2),
807 (0x0e00 << 16) | (0xae00 >> 2),
809 (0x0e00 << 16) | (0x9604 >> 2),
811 (0x0e00 << 16) | (0xac08 >> 2),
813 (0x0e00 << 16) | (0xac0c >> 2),
815 (0x0e00 << 16) | (0xac10 >> 2),
817 (0x0e00 << 16) | (0xac14 >> 2),
819 (0x0e00 << 16) | (0xac58 >> 2),
821 (0x0e00 << 16) | (0xac68 >> 2),
823 (0x0e00 << 16) | (0xac6c >> 2),
825 (0x0e00 << 16) | (0xac70 >> 2),
827 (0x0e00 << 16) | (0xac74 >> 2),
829 (0x0e00 << 16) | (0xac78 >> 2),
831 (0x0e00 << 16) | (0xac7c >> 2),
833 (0x0e00 << 16) | (0xac80 >> 2),
835 (0x0e00 << 16) | (0xac84 >> 2),
837 (0x0e00 << 16) | (0xac88 >> 2),
839 (0x0e00 << 16) | (0xac8c >> 2),
841 (0x0e00 << 16) | (0x970c >> 2),
843 (0x0e00 << 16) | (0x9714 >> 2),
845 (0x0e00 << 16) | (0x9718 >> 2),
847 (0x0e00 << 16) | (0x971c >> 2),
849 (0x0e00 << 16) | (0x31068 >> 2),
851 (0x4e00 << 16) | (0x31068 >> 2),
853 (0x5e00 << 16) | (0x31068 >> 2),
855 (0x6e00 << 16) | (0x31068 >> 2),
857 (0x7e00 << 16) | (0x31068 >> 2),
859 (0x0e00 << 16) | (0xcd10 >> 2),
861 (0x0e00 << 16) | (0xcd14 >> 2),
863 (0x0e00 << 16) | (0x88b0 >> 2),
865 (0x0e00 << 16) | (0x88b4 >> 2),
867 (0x0e00 << 16) | (0x88b8 >> 2),
869 (0x0e00 << 16) | (0x88bc >> 2),
871 (0x0400 << 16) | (0x89c0 >> 2),
873 (0x0e00 << 16) | (0x88c4 >> 2),
875 (0x0e00 << 16) | (0x88c8 >> 2),
877 (0x0e00 << 16) | (0x88d0 >> 2),
879 (0x0e00 << 16) | (0x88d4 >> 2),
881 (0x0e00 << 16) | (0x88d8 >> 2),
883 (0x0e00 << 16) | (0x8980 >> 2),
885 (0x0e00 << 16) | (0x30938 >> 2),
887 (0x0e00 << 16) | (0x3093c >> 2),
889 (0x0e00 << 16) | (0x30940 >> 2),
891 (0x0e00 << 16) | (0x89a0 >> 2),
893 (0x0e00 << 16) | (0x30900 >> 2),
895 (0x0e00 << 16) | (0x30904 >> 2),
897 (0x0e00 << 16) | (0x89b4 >> 2),
899 (0x0e00 << 16) | (0x3e1fc >> 2),
901 (0x0e00 << 16) | (0x3c210 >> 2),
903 (0x0e00 << 16) | (0x3c214 >> 2),
905 (0x0e00 << 16) | (0x3c218 >> 2),
907 (0x0e00 << 16) | (0x8904 >> 2),
910 (0x0e00 << 16) | (0x8c28 >> 2),
911 (0x0e00 << 16) | (0x8c2c >> 2),
912 (0x0e00 << 16) | (0x8c30 >> 2),
913 (0x0e00 << 16) | (0x8c34 >> 2),
914 (0x0e00 << 16) | (0x9600 >> 2),
917 static const u32 bonaire_golden_spm_registers[] =
919 0x30800, 0xe0ffffff, 0xe0000000
922 static const u32 bonaire_golden_common_registers[] =
924 0xc770, 0xffffffff, 0x00000800,
925 0xc774, 0xffffffff, 0x00000800,
926 0xc798, 0xffffffff, 0x00007fbf,
927 0xc79c, 0xffffffff, 0x00007faf
930 static const u32 bonaire_golden_registers[] =
932 0x3354, 0x00000333, 0x00000333,
933 0x3350, 0x000c0fc0, 0x00040200,
934 0x9a10, 0x00010000, 0x00058208,
935 0x3c000, 0xffff1fff, 0x00140000,
936 0x3c200, 0xfdfc0fff, 0x00000100,
937 0x3c234, 0x40000000, 0x40000200,
938 0x9830, 0xffffffff, 0x00000000,
939 0x9834, 0xf00fffff, 0x00000400,
940 0x9838, 0x0002021c, 0x00020200,
941 0xc78, 0x00000080, 0x00000000,
942 0x5bb0, 0x000000f0, 0x00000070,
943 0x5bc0, 0xf0311fff, 0x80300000,
944 0x98f8, 0x73773777, 0x12010001,
945 0x350c, 0x00810000, 0x408af000,
946 0x7030, 0x31000111, 0x00000011,
947 0x2f48, 0x73773777, 0x12010001,
948 0x220c, 0x00007fb6, 0x0021a1b1,
949 0x2210, 0x00007fb6, 0x002021b1,
950 0x2180, 0x00007fb6, 0x00002191,
951 0x2218, 0x00007fb6, 0x002121b1,
952 0x221c, 0x00007fb6, 0x002021b1,
953 0x21dc, 0x00007fb6, 0x00002191,
954 0x21e0, 0x00007fb6, 0x00002191,
955 0x3628, 0x0000003f, 0x0000000a,
956 0x362c, 0x0000003f, 0x0000000a,
957 0x2ae4, 0x00073ffe, 0x000022a2,
958 0x240c, 0x000007ff, 0x00000000,
959 0x8a14, 0xf000003f, 0x00000007,
960 0x8bf0, 0x00002001, 0x00000001,
961 0x8b24, 0xffffffff, 0x00ffffff,
962 0x30a04, 0x0000ff0f, 0x00000000,
963 0x28a4c, 0x07ffffff, 0x06000000,
964 0x4d8, 0x00000fff, 0x00000100,
965 0x3e78, 0x00000001, 0x00000002,
966 0x9100, 0x03000000, 0x0362c688,
967 0x8c00, 0x000000ff, 0x00000001,
968 0xe40, 0x00001fff, 0x00001fff,
969 0x9060, 0x0000007f, 0x00000020,
970 0x9508, 0x00010000, 0x00010000,
971 0xac14, 0x000003ff, 0x000000f3,
972 0xac0c, 0xffffffff, 0x00001032
975 static const u32 bonaire_mgcg_cgcg_init[] =
977 0xc420, 0xffffffff, 0xfffffffc,
978 0x30800, 0xffffffff, 0xe0000000,
979 0x3c2a0, 0xffffffff, 0x00000100,
980 0x3c208, 0xffffffff, 0x00000100,
981 0x3c2c0, 0xffffffff, 0xc0000100,
982 0x3c2c8, 0xffffffff, 0xc0000100,
983 0x3c2c4, 0xffffffff, 0xc0000100,
984 0x55e4, 0xffffffff, 0x00600100,
985 0x3c280, 0xffffffff, 0x00000100,
986 0x3c214, 0xffffffff, 0x06000100,
987 0x3c220, 0xffffffff, 0x00000100,
988 0x3c218, 0xffffffff, 0x06000100,
989 0x3c204, 0xffffffff, 0x00000100,
990 0x3c2e0, 0xffffffff, 0x00000100,
991 0x3c224, 0xffffffff, 0x00000100,
992 0x3c200, 0xffffffff, 0x00000100,
993 0x3c230, 0xffffffff, 0x00000100,
994 0x3c234, 0xffffffff, 0x00000100,
995 0x3c250, 0xffffffff, 0x00000100,
996 0x3c254, 0xffffffff, 0x00000100,
997 0x3c258, 0xffffffff, 0x00000100,
998 0x3c25c, 0xffffffff, 0x00000100,
999 0x3c260, 0xffffffff, 0x00000100,
1000 0x3c27c, 0xffffffff, 0x00000100,
1001 0x3c278, 0xffffffff, 0x00000100,
1002 0x3c210, 0xffffffff, 0x06000100,
1003 0x3c290, 0xffffffff, 0x00000100,
1004 0x3c274, 0xffffffff, 0x00000100,
1005 0x3c2b4, 0xffffffff, 0x00000100,
1006 0x3c2b0, 0xffffffff, 0x00000100,
1007 0x3c270, 0xffffffff, 0x00000100,
1008 0x30800, 0xffffffff, 0xe0000000,
1009 0x3c020, 0xffffffff, 0x00010000,
1010 0x3c024, 0xffffffff, 0x00030002,
1011 0x3c028, 0xffffffff, 0x00040007,
1012 0x3c02c, 0xffffffff, 0x00060005,
1013 0x3c030, 0xffffffff, 0x00090008,
1014 0x3c034, 0xffffffff, 0x00010000,
1015 0x3c038, 0xffffffff, 0x00030002,
1016 0x3c03c, 0xffffffff, 0x00040007,
1017 0x3c040, 0xffffffff, 0x00060005,
1018 0x3c044, 0xffffffff, 0x00090008,
1019 0x3c048, 0xffffffff, 0x00010000,
1020 0x3c04c, 0xffffffff, 0x00030002,
1021 0x3c050, 0xffffffff, 0x00040007,
1022 0x3c054, 0xffffffff, 0x00060005,
1023 0x3c058, 0xffffffff, 0x00090008,
1024 0x3c05c, 0xffffffff, 0x00010000,
1025 0x3c060, 0xffffffff, 0x00030002,
1026 0x3c064, 0xffffffff, 0x00040007,
1027 0x3c068, 0xffffffff, 0x00060005,
1028 0x3c06c, 0xffffffff, 0x00090008,
1029 0x3c070, 0xffffffff, 0x00010000,
1030 0x3c074, 0xffffffff, 0x00030002,
1031 0x3c078, 0xffffffff, 0x00040007,
1032 0x3c07c, 0xffffffff, 0x00060005,
1033 0x3c080, 0xffffffff, 0x00090008,
1034 0x3c084, 0xffffffff, 0x00010000,
1035 0x3c088, 0xffffffff, 0x00030002,
1036 0x3c08c, 0xffffffff, 0x00040007,
1037 0x3c090, 0xffffffff, 0x00060005,
1038 0x3c094, 0xffffffff, 0x00090008,
1039 0x3c098, 0xffffffff, 0x00010000,
1040 0x3c09c, 0xffffffff, 0x00030002,
1041 0x3c0a0, 0xffffffff, 0x00040007,
1042 0x3c0a4, 0xffffffff, 0x00060005,
1043 0x3c0a8, 0xffffffff, 0x00090008,
1044 0x3c000, 0xffffffff, 0x96e00200,
1045 0x8708, 0xffffffff, 0x00900100,
1046 0xc424, 0xffffffff, 0x0020003f,
1047 0x38, 0xffffffff, 0x0140001c,
1048 0x3c, 0x000f0000, 0x000f0000,
1049 0x220, 0xffffffff, 0xC060000C,
1050 0x224, 0xc0000fff, 0x00000100,
1051 0xf90, 0xffffffff, 0x00000100,
1052 0xf98, 0x00000101, 0x00000000,
1053 0x20a8, 0xffffffff, 0x00000104,
1054 0x55e4, 0xff000fff, 0x00000100,
1055 0x30cc, 0xc0000fff, 0x00000104,
1056 0xc1e4, 0x00000001, 0x00000001,
1057 0xd00c, 0xff000ff0, 0x00000100,
1058 0xd80c, 0xff000ff0, 0x00000100
1061 static const u32 spectre_golden_spm_registers[] =
1063 0x30800, 0xe0ffffff, 0xe0000000
1066 static const u32 spectre_golden_common_registers[] =
1068 0xc770, 0xffffffff, 0x00000800,
1069 0xc774, 0xffffffff, 0x00000800,
1070 0xc798, 0xffffffff, 0x00007fbf,
1071 0xc79c, 0xffffffff, 0x00007faf
1074 static const u32 spectre_golden_registers[] =
1076 0x3c000, 0xffff1fff, 0x96940200,
1077 0x3c00c, 0xffff0001, 0xff000000,
1078 0x3c200, 0xfffc0fff, 0x00000100,
1079 0x6ed8, 0x00010101, 0x00010000,
1080 0x9834, 0xf00fffff, 0x00000400,
1081 0x9838, 0xfffffffc, 0x00020200,
1082 0x5bb0, 0x000000f0, 0x00000070,
1083 0x5bc0, 0xf0311fff, 0x80300000,
1084 0x98f8, 0x73773777, 0x12010001,
1085 0x9b7c, 0x00ff0000, 0x00fc0000,
1086 0x2f48, 0x73773777, 0x12010001,
1087 0x8a14, 0xf000003f, 0x00000007,
1088 0x8b24, 0xffffffff, 0x00ffffff,
1089 0x28350, 0x3f3f3fff, 0x00000082,
1090 0x28355, 0x0000003f, 0x00000000,
1091 0x3e78, 0x00000001, 0x00000002,
1092 0x913c, 0xffff03df, 0x00000004,
1093 0xc768, 0x00000008, 0x00000008,
1094 0x8c00, 0x000008ff, 0x00000800,
1095 0x9508, 0x00010000, 0x00010000,
1096 0xac0c, 0xffffffff, 0x54763210,
1097 0x214f8, 0x01ff01ff, 0x00000002,
1098 0x21498, 0x007ff800, 0x00200000,
1099 0x2015c, 0xffffffff, 0x00000f40,
1100 0x30934, 0xffffffff, 0x00000001
1103 static const u32 spectre_mgcg_cgcg_init[] =
1105 0xc420, 0xffffffff, 0xfffffffc,
1106 0x30800, 0xffffffff, 0xe0000000,
1107 0x3c2a0, 0xffffffff, 0x00000100,
1108 0x3c208, 0xffffffff, 0x00000100,
1109 0x3c2c0, 0xffffffff, 0x00000100,
1110 0x3c2c8, 0xffffffff, 0x00000100,
1111 0x3c2c4, 0xffffffff, 0x00000100,
1112 0x55e4, 0xffffffff, 0x00600100,
1113 0x3c280, 0xffffffff, 0x00000100,
1114 0x3c214, 0xffffffff, 0x06000100,
1115 0x3c220, 0xffffffff, 0x00000100,
1116 0x3c218, 0xffffffff, 0x06000100,
1117 0x3c204, 0xffffffff, 0x00000100,
1118 0x3c2e0, 0xffffffff, 0x00000100,
1119 0x3c224, 0xffffffff, 0x00000100,
1120 0x3c200, 0xffffffff, 0x00000100,
1121 0x3c230, 0xffffffff, 0x00000100,
1122 0x3c234, 0xffffffff, 0x00000100,
1123 0x3c250, 0xffffffff, 0x00000100,
1124 0x3c254, 0xffffffff, 0x00000100,
1125 0x3c258, 0xffffffff, 0x00000100,
1126 0x3c25c, 0xffffffff, 0x00000100,
1127 0x3c260, 0xffffffff, 0x00000100,
1128 0x3c27c, 0xffffffff, 0x00000100,
1129 0x3c278, 0xffffffff, 0x00000100,
1130 0x3c210, 0xffffffff, 0x06000100,
1131 0x3c290, 0xffffffff, 0x00000100,
1132 0x3c274, 0xffffffff, 0x00000100,
1133 0x3c2b4, 0xffffffff, 0x00000100,
1134 0x3c2b0, 0xffffffff, 0x00000100,
1135 0x3c270, 0xffffffff, 0x00000100,
1136 0x30800, 0xffffffff, 0xe0000000,
1137 0x3c020, 0xffffffff, 0x00010000,
1138 0x3c024, 0xffffffff, 0x00030002,
1139 0x3c028, 0xffffffff, 0x00040007,
1140 0x3c02c, 0xffffffff, 0x00060005,
1141 0x3c030, 0xffffffff, 0x00090008,
1142 0x3c034, 0xffffffff, 0x00010000,
1143 0x3c038, 0xffffffff, 0x00030002,
1144 0x3c03c, 0xffffffff, 0x00040007,
1145 0x3c040, 0xffffffff, 0x00060005,
1146 0x3c044, 0xffffffff, 0x00090008,
1147 0x3c048, 0xffffffff, 0x00010000,
1148 0x3c04c, 0xffffffff, 0x00030002,
1149 0x3c050, 0xffffffff, 0x00040007,
1150 0x3c054, 0xffffffff, 0x00060005,
1151 0x3c058, 0xffffffff, 0x00090008,
1152 0x3c05c, 0xffffffff, 0x00010000,
1153 0x3c060, 0xffffffff, 0x00030002,
1154 0x3c064, 0xffffffff, 0x00040007,
1155 0x3c068, 0xffffffff, 0x00060005,
1156 0x3c06c, 0xffffffff, 0x00090008,
1157 0x3c070, 0xffffffff, 0x00010000,
1158 0x3c074, 0xffffffff, 0x00030002,
1159 0x3c078, 0xffffffff, 0x00040007,
1160 0x3c07c, 0xffffffff, 0x00060005,
1161 0x3c080, 0xffffffff, 0x00090008,
1162 0x3c084, 0xffffffff, 0x00010000,
1163 0x3c088, 0xffffffff, 0x00030002,
1164 0x3c08c, 0xffffffff, 0x00040007,
1165 0x3c090, 0xffffffff, 0x00060005,
1166 0x3c094, 0xffffffff, 0x00090008,
1167 0x3c098, 0xffffffff, 0x00010000,
1168 0x3c09c, 0xffffffff, 0x00030002,
1169 0x3c0a0, 0xffffffff, 0x00040007,
1170 0x3c0a4, 0xffffffff, 0x00060005,
1171 0x3c0a8, 0xffffffff, 0x00090008,
1172 0x3c0ac, 0xffffffff, 0x00010000,
1173 0x3c0b0, 0xffffffff, 0x00030002,
1174 0x3c0b4, 0xffffffff, 0x00040007,
1175 0x3c0b8, 0xffffffff, 0x00060005,
1176 0x3c0bc, 0xffffffff, 0x00090008,
1177 0x3c000, 0xffffffff, 0x96e00200,
1178 0x8708, 0xffffffff, 0x00900100,
1179 0xc424, 0xffffffff, 0x0020003f,
1180 0x38, 0xffffffff, 0x0140001c,
1181 0x3c, 0x000f0000, 0x000f0000,
1182 0x220, 0xffffffff, 0xC060000C,
1183 0x224, 0xc0000fff, 0x00000100,
1184 0xf90, 0xffffffff, 0x00000100,
1185 0xf98, 0x00000101, 0x00000000,
1186 0x20a8, 0xffffffff, 0x00000104,
1187 0x55e4, 0xff000fff, 0x00000100,
1188 0x30cc, 0xc0000fff, 0x00000104,
1189 0xc1e4, 0x00000001, 0x00000001,
1190 0xd00c, 0xff000ff0, 0x00000100,
1191 0xd80c, 0xff000ff0, 0x00000100
1194 static const u32 kalindi_golden_spm_registers[] =
1196 0x30800, 0xe0ffffff, 0xe0000000
1199 static const u32 kalindi_golden_common_registers[] =
1201 0xc770, 0xffffffff, 0x00000800,
1202 0xc774, 0xffffffff, 0x00000800,
1203 0xc798, 0xffffffff, 0x00007fbf,
1204 0xc79c, 0xffffffff, 0x00007faf
1207 static const u32 kalindi_golden_registers[] =
1209 0x3c000, 0xffffdfff, 0x6e944040,
1210 0x55e4, 0xff607fff, 0xfc000100,
1211 0x3c220, 0xff000fff, 0x00000100,
1212 0x3c224, 0xff000fff, 0x00000100,
1213 0x3c200, 0xfffc0fff, 0x00000100,
1214 0x6ed8, 0x00010101, 0x00010000,
1215 0x9830, 0xffffffff, 0x00000000,
1216 0x9834, 0xf00fffff, 0x00000400,
1217 0x5bb0, 0x000000f0, 0x00000070,
1218 0x5bc0, 0xf0311fff, 0x80300000,
1219 0x98f8, 0x73773777, 0x12010001,
1220 0x98fc, 0xffffffff, 0x00000010,
1221 0x9b7c, 0x00ff0000, 0x00fc0000,
1222 0x8030, 0x00001f0f, 0x0000100a,
1223 0x2f48, 0x73773777, 0x12010001,
1224 0x2408, 0x000fffff, 0x000c007f,
1225 0x8a14, 0xf000003f, 0x00000007,
1226 0x8b24, 0x3fff3fff, 0x00ffcfff,
1227 0x30a04, 0x0000ff0f, 0x00000000,
1228 0x28a4c, 0x07ffffff, 0x06000000,
1229 0x4d8, 0x00000fff, 0x00000100,
1230 0x3e78, 0x00000001, 0x00000002,
1231 0xc768, 0x00000008, 0x00000008,
1232 0x8c00, 0x000000ff, 0x00000003,
1233 0x214f8, 0x01ff01ff, 0x00000002,
1234 0x21498, 0x007ff800, 0x00200000,
1235 0x2015c, 0xffffffff, 0x00000f40,
1236 0x88c4, 0x001f3ae3, 0x00000082,
1237 0x88d4, 0x0000001f, 0x00000010,
1238 0x30934, 0xffffffff, 0x00000000
1241 static const u32 kalindi_mgcg_cgcg_init[] =
1243 0xc420, 0xffffffff, 0xfffffffc,
1244 0x30800, 0xffffffff, 0xe0000000,
1245 0x3c2a0, 0xffffffff, 0x00000100,
1246 0x3c208, 0xffffffff, 0x00000100,
1247 0x3c2c0, 0xffffffff, 0x00000100,
1248 0x3c2c8, 0xffffffff, 0x00000100,
1249 0x3c2c4, 0xffffffff, 0x00000100,
1250 0x55e4, 0xffffffff, 0x00600100,
1251 0x3c280, 0xffffffff, 0x00000100,
1252 0x3c214, 0xffffffff, 0x06000100,
1253 0x3c220, 0xffffffff, 0x00000100,
1254 0x3c218, 0xffffffff, 0x06000100,
1255 0x3c204, 0xffffffff, 0x00000100,
1256 0x3c2e0, 0xffffffff, 0x00000100,
1257 0x3c224, 0xffffffff, 0x00000100,
1258 0x3c200, 0xffffffff, 0x00000100,
1259 0x3c230, 0xffffffff, 0x00000100,
1260 0x3c234, 0xffffffff, 0x00000100,
1261 0x3c250, 0xffffffff, 0x00000100,
1262 0x3c254, 0xffffffff, 0x00000100,
1263 0x3c258, 0xffffffff, 0x00000100,
1264 0x3c25c, 0xffffffff, 0x00000100,
1265 0x3c260, 0xffffffff, 0x00000100,
1266 0x3c27c, 0xffffffff, 0x00000100,
1267 0x3c278, 0xffffffff, 0x00000100,
1268 0x3c210, 0xffffffff, 0x06000100,
1269 0x3c290, 0xffffffff, 0x00000100,
1270 0x3c274, 0xffffffff, 0x00000100,
1271 0x3c2b4, 0xffffffff, 0x00000100,
1272 0x3c2b0, 0xffffffff, 0x00000100,
1273 0x3c270, 0xffffffff, 0x00000100,
1274 0x30800, 0xffffffff, 0xe0000000,
1275 0x3c020, 0xffffffff, 0x00010000,
1276 0x3c024, 0xffffffff, 0x00030002,
1277 0x3c028, 0xffffffff, 0x00040007,
1278 0x3c02c, 0xffffffff, 0x00060005,
1279 0x3c030, 0xffffffff, 0x00090008,
1280 0x3c034, 0xffffffff, 0x00010000,
1281 0x3c038, 0xffffffff, 0x00030002,
1282 0x3c03c, 0xffffffff, 0x00040007,
1283 0x3c040, 0xffffffff, 0x00060005,
1284 0x3c044, 0xffffffff, 0x00090008,
1285 0x3c000, 0xffffffff, 0x96e00200,
1286 0x8708, 0xffffffff, 0x00900100,
1287 0xc424, 0xffffffff, 0x0020003f,
1288 0x38, 0xffffffff, 0x0140001c,
1289 0x3c, 0x000f0000, 0x000f0000,
1290 0x220, 0xffffffff, 0xC060000C,
1291 0x224, 0xc0000fff, 0x00000100,
1292 0x20a8, 0xffffffff, 0x00000104,
1293 0x55e4, 0xff000fff, 0x00000100,
1294 0x30cc, 0xc0000fff, 0x00000104,
1295 0xc1e4, 0x00000001, 0x00000001,
1296 0xd00c, 0xff000ff0, 0x00000100,
1297 0xd80c, 0xff000ff0, 0x00000100
1300 static void cik_init_golden_registers(struct radeon_device *rdev)
1302 switch (rdev->family) {
1304 radeon_program_register_sequence(rdev,
1305 bonaire_mgcg_cgcg_init,
1306 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1307 radeon_program_register_sequence(rdev,
1308 bonaire_golden_registers,
1309 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1310 radeon_program_register_sequence(rdev,
1311 bonaire_golden_common_registers,
1312 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1313 radeon_program_register_sequence(rdev,
1314 bonaire_golden_spm_registers,
1315 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1318 radeon_program_register_sequence(rdev,
1319 kalindi_mgcg_cgcg_init,
1320 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_golden_registers,
1323 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1324 radeon_program_register_sequence(rdev,
1325 kalindi_golden_common_registers,
1326 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1327 radeon_program_register_sequence(rdev,
1328 kalindi_golden_spm_registers,
1329 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1332 radeon_program_register_sequence(rdev,
1333 spectre_mgcg_cgcg_init,
1334 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1335 radeon_program_register_sequence(rdev,
1336 spectre_golden_registers,
1337 (const u32)ARRAY_SIZE(spectre_golden_registers));
1338 radeon_program_register_sequence(rdev,
1339 spectre_golden_common_registers,
1340 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1341 radeon_program_register_sequence(rdev,
1342 spectre_golden_spm_registers,
1343 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1351 * cik_get_xclk - get the xclk
1353 * @rdev: radeon_device pointer
1355 * Returns the reference clock used by the gfx engine
1358 u32 cik_get_xclk(struct radeon_device *rdev)
1360 u32 reference_clock = rdev->clock.spll.reference_freq;
1362 if (rdev->flags & RADEON_IS_IGP) {
1363 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1364 return reference_clock / 2;
1366 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1367 return reference_clock / 4;
1369 return reference_clock;
1373 * cik_mm_rdoorbell - read a doorbell dword
1375 * @rdev: radeon_device pointer
1376 * @offset: byte offset into the aperture
1378 * Returns the value in the doorbell aperture at the
1379 * requested offset (CIK).
1381 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1383 if (offset < rdev->doorbell.size) {
1384 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1386 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392 * cik_mm_wdoorbell - write a doorbell dword
1394 * @rdev: radeon_device pointer
1395 * @offset: byte offset into the aperture
1396 * @v: value to write
1398 * Writes @v to the doorbell aperture at the
1399 * requested offset (CIK).
1401 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1403 if (offset < rdev->doorbell.size) {
1404 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1406 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1410 #define BONAIRE_IO_MC_REGS_SIZE 36
1412 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1414 {0x00000070, 0x04400000},
1415 {0x00000071, 0x80c01803},
1416 {0x00000072, 0x00004004},
1417 {0x00000073, 0x00000100},
1418 {0x00000074, 0x00ff0000},
1419 {0x00000075, 0x34000000},
1420 {0x00000076, 0x08000014},
1421 {0x00000077, 0x00cc08ec},
1422 {0x00000078, 0x00000400},
1423 {0x00000079, 0x00000000},
1424 {0x0000007a, 0x04090000},
1425 {0x0000007c, 0x00000000},
1426 {0x0000007e, 0x4408a8e8},
1427 {0x0000007f, 0x00000304},
1428 {0x00000080, 0x00000000},
1429 {0x00000082, 0x00000001},
1430 {0x00000083, 0x00000002},
1431 {0x00000084, 0xf3e4f400},
1432 {0x00000085, 0x052024e3},
1433 {0x00000087, 0x00000000},
1434 {0x00000088, 0x01000000},
1435 {0x0000008a, 0x1c0a0000},
1436 {0x0000008b, 0xff010000},
1437 {0x0000008d, 0xffffefff},
1438 {0x0000008e, 0xfff3efff},
1439 {0x0000008f, 0xfff3efbf},
1440 {0x00000092, 0xf7ffffff},
1441 {0x00000093, 0xffffff7f},
1442 {0x00000095, 0x00101101},
1443 {0x00000096, 0x00000fff},
1444 {0x00000097, 0x00116fff},
1445 {0x00000098, 0x60010000},
1446 {0x00000099, 0x10010000},
1447 {0x0000009a, 0x00006000},
1448 {0x0000009b, 0x00001000},
1449 {0x0000009f, 0x00b48000}
1453 * cik_srbm_select - select specific register instances
1455 * @rdev: radeon_device pointer
1456 * @me: selected ME (micro engine)
1461 * Switches the currently active registers instances. Some
1462 * registers are instanced per VMID, others are instanced per
1463 * me/pipe/queue combination.
1465 static void cik_srbm_select(struct radeon_device *rdev,
1466 u32 me, u32 pipe, u32 queue, u32 vmid)
1468 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1471 QUEUEID(queue & 0x7));
1472 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1477 * ci_mc_load_microcode - load MC ucode into the hw
1479 * @rdev: radeon_device pointer
1481 * Load the GDDR MC ucode into the hw (CIK).
1482 * Returns 0 on success, error on failure.
1484 static int ci_mc_load_microcode(struct radeon_device *rdev)
1486 const __be32 *fw_data;
1487 u32 running, blackout = 0;
1489 int i, ucode_size, regs_size;
1494 switch (rdev->family) {
1497 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1498 ucode_size = CIK_MC_UCODE_SIZE;
1499 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1503 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1508 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1511 /* reset the engine and set to writable */
1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1515 /* load mc io regs */
1516 for (i = 0; i < regs_size; i++) {
1517 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1518 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1520 /* load the MC ucode */
1521 fw_data = (const __be32 *)rdev->mc_fw->data;
1522 for (i = 0; i < ucode_size; i++)
1523 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1525 /* put the engine back into the active state */
1526 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1527 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1528 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1530 /* wait for training to complete */
1531 for (i = 0; i < rdev->usec_timeout; i++) {
1532 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1550 * cik_init_microcode - load ucode images from disk
1552 * @rdev: radeon_device pointer
1554 * Use the firmware interface to load the ucode images into
1555 * the driver (not loaded into hw).
1556 * Returns 0 on success, error on failure.
1558 static int cik_init_microcode(struct radeon_device *rdev)
1560 const char *chip_name;
1561 size_t pfp_req_size, me_req_size, ce_req_size,
1562 mec_req_size, rlc_req_size, mc_req_size,
1563 sdma_req_size, smc_req_size;
1569 switch (rdev->family) {
1571 chip_name = "BONAIRE";
1572 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1573 me_req_size = CIK_ME_UCODE_SIZE * 4;
1574 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1575 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1576 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1577 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1578 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1579 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1582 chip_name = "KAVERI";
1583 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1584 me_req_size = CIK_ME_UCODE_SIZE * 4;
1585 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1586 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1587 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1588 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1591 chip_name = "KABINI";
1592 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1593 me_req_size = CIK_ME_UCODE_SIZE * 4;
1594 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1595 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1596 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1597 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1602 DRM_INFO("Loading %s Microcode\n", chip_name);
1604 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1605 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1608 if (rdev->pfp_fw->size != pfp_req_size) {
1610 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1611 rdev->pfp_fw->size, fw_name);
1616 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1617 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1620 if (rdev->me_fw->size != me_req_size) {
1622 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1623 rdev->me_fw->size, fw_name);
1627 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1628 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1631 if (rdev->ce_fw->size != ce_req_size) {
1633 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1634 rdev->ce_fw->size, fw_name);
1638 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1639 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1642 if (rdev->mec_fw->size != mec_req_size) {
1644 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1645 rdev->mec_fw->size, fw_name);
1649 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1650 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1653 if (rdev->rlc_fw->size != rlc_req_size) {
1655 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1656 rdev->rlc_fw->size, fw_name);
1660 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1661 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1664 if (rdev->sdma_fw->size != sdma_req_size) {
1666 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1667 rdev->sdma_fw->size, fw_name);
1671 /* No SMC, MC ucode on APUs */
1672 if (!(rdev->flags & RADEON_IS_IGP)) {
1673 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1674 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1677 if (rdev->mc_fw->size != mc_req_size) {
1679 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1680 rdev->mc_fw->size, fw_name);
1684 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1688 "smc: error loading firmware \"%s\"\n",
1690 release_firmware(rdev->smc_fw);
1691 rdev->smc_fw = NULL;
1693 } else if (rdev->smc_fw->size != smc_req_size) {
1695 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1696 rdev->smc_fw->size, fw_name);
1705 "cik_cp: Failed to load firmware \"%s\"\n",
1707 release_firmware(rdev->pfp_fw);
1708 rdev->pfp_fw = NULL;
1709 release_firmware(rdev->me_fw);
1711 release_firmware(rdev->ce_fw);
1713 release_firmware(rdev->rlc_fw);
1714 rdev->rlc_fw = NULL;
1715 release_firmware(rdev->mc_fw);
1717 release_firmware(rdev->smc_fw);
1718 rdev->smc_fw = NULL;
1727 * cik_tiling_mode_table_init - init the hw tiling table
1729 * @rdev: radeon_device pointer
1731 * Starting with SI, the tiling setup is done globally in a
1732 * set of 32 tiling modes. Rather than selecting each set of
1733 * parameters per surface as on older asics, we just select
1734 * which index in the tiling table we want to use, and the
1735 * surface uses those parameters (CIK).
1737 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1739 const u32 num_tile_mode_states = 32;
1740 const u32 num_secondary_tile_mode_states = 16;
1741 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1742 u32 num_pipe_configs;
1743 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1744 rdev->config.cik.max_shader_engines;
1746 switch (rdev->config.cik.mem_row_size_in_kb) {
1748 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1752 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1755 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1759 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1760 if (num_pipe_configs > 8)
1761 num_pipe_configs = 16;
1763 if (num_pipe_configs == 16) {
1764 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1765 switch (reg_offset) {
1767 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1769 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1773 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1775 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1776 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1779 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1781 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1785 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1786 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1787 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1788 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1791 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1792 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1793 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1794 TILE_SPLIT(split_equal_to_row_size));
1797 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1801 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1803 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1807 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1809 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1810 TILE_SPLIT(split_equal_to_row_size));
1813 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1814 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1817 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1818 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1821 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1823 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1828 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1829 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1834 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1835 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1843 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1845 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1850 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1851 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1857 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1862 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1866 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1867 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1873 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1877 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1878 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1879 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1887 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1889 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1890 switch (reg_offset) {
1892 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1893 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1894 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1895 NUM_BANKS(ADDR_SURF_16_BANK));
1898 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1899 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1900 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1901 NUM_BANKS(ADDR_SURF_16_BANK));
1904 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1907 NUM_BANKS(ADDR_SURF_16_BANK));
1910 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1913 NUM_BANKS(ADDR_SURF_16_BANK));
1916 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1919 NUM_BANKS(ADDR_SURF_8_BANK));
1922 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1923 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1924 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1925 NUM_BANKS(ADDR_SURF_4_BANK));
1928 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1931 NUM_BANKS(ADDR_SURF_2_BANK));
1934 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1937 NUM_BANKS(ADDR_SURF_16_BANK));
1940 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1943 NUM_BANKS(ADDR_SURF_16_BANK));
1946 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1949 NUM_BANKS(ADDR_SURF_16_BANK));
1952 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1955 NUM_BANKS(ADDR_SURF_8_BANK));
1958 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1961 NUM_BANKS(ADDR_SURF_4_BANK));
1964 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1967 NUM_BANKS(ADDR_SURF_2_BANK));
1970 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1973 NUM_BANKS(ADDR_SURF_2_BANK));
1979 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1981 } else if (num_pipe_configs == 8) {
1982 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1983 switch (reg_offset) {
1985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2012 TILE_SPLIT(split_equal_to_row_size));
2015 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2022 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028 TILE_SPLIT(split_equal_to_row_size));
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2035 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2036 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2039 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2057 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2061 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2104 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2105 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2107 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2108 switch (reg_offset) {
2110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2113 NUM_BANKS(ADDR_SURF_16_BANK));
2116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2119 NUM_BANKS(ADDR_SURF_16_BANK));
2122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2125 NUM_BANKS(ADDR_SURF_16_BANK));
2128 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131 NUM_BANKS(ADDR_SURF_16_BANK));
2134 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2137 NUM_BANKS(ADDR_SURF_8_BANK));
2140 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2143 NUM_BANKS(ADDR_SURF_4_BANK));
2146 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2149 NUM_BANKS(ADDR_SURF_2_BANK));
2152 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155 NUM_BANKS(ADDR_SURF_16_BANK));
2158 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161 NUM_BANKS(ADDR_SURF_16_BANK));
2164 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2167 NUM_BANKS(ADDR_SURF_16_BANK));
2170 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2173 NUM_BANKS(ADDR_SURF_16_BANK));
2176 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2179 NUM_BANKS(ADDR_SURF_8_BANK));
2182 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 NUM_BANKS(ADDR_SURF_4_BANK));
2188 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2191 NUM_BANKS(ADDR_SURF_2_BANK));
2197 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2199 } else if (num_pipe_configs == 4) {
2201 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2202 switch (reg_offset) {
2204 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2206 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2210 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2212 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2216 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2218 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2224 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2228 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2230 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2231 TILE_SPLIT(split_equal_to_row_size));
2234 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2238 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2240 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2246 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2247 TILE_SPLIT(split_equal_to_row_size));
2250 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2251 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2254 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2258 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2266 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2272 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2280 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2302 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2308 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2324 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2326 } else if (num_rbs < 4) {
2327 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2328 switch (reg_offset) {
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2357 TILE_SPLIT(split_equal_to_row_size));
2360 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2367 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2373 TILE_SPLIT(split_equal_to_row_size));
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2377 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2380 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2392 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2406 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2428 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2450 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2453 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2454 switch (reg_offset) {
2456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459 NUM_BANKS(ADDR_SURF_16_BANK));
2462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2465 NUM_BANKS(ADDR_SURF_16_BANK));
2468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 NUM_BANKS(ADDR_SURF_16_BANK));
2474 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 NUM_BANKS(ADDR_SURF_16_BANK));
2480 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483 NUM_BANKS(ADDR_SURF_16_BANK));
2486 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2492 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 NUM_BANKS(ADDR_SURF_4_BANK));
2498 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2501 NUM_BANKS(ADDR_SURF_16_BANK));
2504 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507 NUM_BANKS(ADDR_SURF_16_BANK));
2510 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2513 NUM_BANKS(ADDR_SURF_16_BANK));
2516 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519 NUM_BANKS(ADDR_SURF_16_BANK));
2522 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525 NUM_BANKS(ADDR_SURF_16_BANK));
2528 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531 NUM_BANKS(ADDR_SURF_8_BANK));
2534 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2537 NUM_BANKS(ADDR_SURF_4_BANK));
2543 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545 } else if (num_pipe_configs == 2) {
2546 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2547 switch (reg_offset) {
2549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 PIPE_CONFIG(ADDR_SURF_P2) |
2552 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2555 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 PIPE_CONFIG(ADDR_SURF_P2) |
2558 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2561 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 PIPE_CONFIG(ADDR_SURF_P2) |
2564 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2567 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2569 PIPE_CONFIG(ADDR_SURF_P2) |
2570 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2573 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575 PIPE_CONFIG(ADDR_SURF_P2) |
2576 TILE_SPLIT(split_equal_to_row_size));
2579 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585 PIPE_CONFIG(ADDR_SURF_P2) |
2586 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2589 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591 PIPE_CONFIG(ADDR_SURF_P2) |
2592 TILE_SPLIT(split_equal_to_row_size));
2595 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2598 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2602 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 PIPE_CONFIG(ADDR_SURF_P2) |
2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 PIPE_CONFIG(ADDR_SURF_P2) |
2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 PIPE_CONFIG(ADDR_SURF_P2) |
2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2624 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626 PIPE_CONFIG(ADDR_SURF_P2) |
2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 PIPE_CONFIG(ADDR_SURF_P2) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2638 PIPE_CONFIG(ADDR_SURF_P2) |
2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2646 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P2) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2652 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 PIPE_CONFIG(ADDR_SURF_P2) |
2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2658 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2660 PIPE_CONFIG(ADDR_SURF_P2) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2668 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2670 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2671 switch (reg_offset) {
2673 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2676 NUM_BANKS(ADDR_SURF_16_BANK));
2679 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2682 NUM_BANKS(ADDR_SURF_16_BANK));
2685 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2688 NUM_BANKS(ADDR_SURF_16_BANK));
2691 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694 NUM_BANKS(ADDR_SURF_16_BANK));
2697 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2700 NUM_BANKS(ADDR_SURF_16_BANK));
2703 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2706 NUM_BANKS(ADDR_SURF_16_BANK));
2709 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2712 NUM_BANKS(ADDR_SURF_8_BANK));
2715 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 NUM_BANKS(ADDR_SURF_16_BANK));
2721 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 NUM_BANKS(ADDR_SURF_16_BANK));
2727 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2728 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 NUM_BANKS(ADDR_SURF_16_BANK));
2733 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736 NUM_BANKS(ADDR_SURF_16_BANK));
2739 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2745 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748 NUM_BANKS(ADDR_SURF_16_BANK));
2751 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_8_BANK));
2760 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2763 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2767 * cik_select_se_sh - select which SE, SH to address
2769 * @rdev: radeon_device pointer
2770 * @se_num: shader engine to address
2771 * @sh_num: sh block to address
2773 * Select which SE, SH combinations to address. Certain
2774 * registers are instanced per SE or SH. 0xffffffff means
2775 * broadcast to all SEs or SHs (CIK).
2777 static void cik_select_se_sh(struct radeon_device *rdev,
2778 u32 se_num, u32 sh_num)
2780 u32 data = INSTANCE_BROADCAST_WRITES;
2782 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2783 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2784 else if (se_num == 0xffffffff)
2785 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2786 else if (sh_num == 0xffffffff)
2787 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2789 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2790 WREG32(GRBM_GFX_INDEX, data);
2794 * cik_create_bitmask - create a bitmask
2796 * @bit_width: length of the mask
2798 * create a variable length bit mask (CIK).
2799 * Returns the bitmask.
2801 static u32 cik_create_bitmask(u32 bit_width)
2805 for (i = 0; i < bit_width; i++) {
2813 * cik_select_se_sh - select which SE, SH to address
2815 * @rdev: radeon_device pointer
2816 * @max_rb_num: max RBs (render backends) for the asic
2817 * @se_num: number of SEs (shader engines) for the asic
2818 * @sh_per_se: number of SH blocks per SE for the asic
2820 * Calculates the bitmask of disabled RBs (CIK).
2821 * Returns the disabled RB bitmask.
2823 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2824 u32 max_rb_num, u32 se_num,
2829 data = RREG32(CC_RB_BACKEND_DISABLE);
2831 data &= BACKEND_DISABLE_MASK;
2834 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2836 data >>= BACKEND_DISABLE_SHIFT;
2838 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2844 * cik_setup_rb - setup the RBs on the asic
2846 * @rdev: radeon_device pointer
2847 * @se_num: number of SEs (shader engines) for the asic
2848 * @sh_per_se: number of SH blocks per SE for the asic
2849 * @max_rb_num: max RBs (render backends) for the asic
2851 * Configures per-SE/SH RB registers (CIK).
2853 static void cik_setup_rb(struct radeon_device *rdev,
2854 u32 se_num, u32 sh_per_se,
2859 u32 disabled_rbs = 0;
2860 u32 enabled_rbs = 0;
2862 for (i = 0; i < se_num; i++) {
2863 for (j = 0; j < sh_per_se; j++) {
2864 cik_select_se_sh(rdev, i, j);
2865 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2866 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2869 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2872 for (i = 0; i < max_rb_num; i++) {
2873 if (!(disabled_rbs & mask))
2874 enabled_rbs |= mask;
2878 for (i = 0; i < se_num; i++) {
2879 cik_select_se_sh(rdev, i, 0xffffffff);
2881 for (j = 0; j < sh_per_se; j++) {
2882 switch (enabled_rbs & 3) {
2884 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2887 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2891 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2896 WREG32(PA_SC_RASTER_CONFIG, data);
2898 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2902 * cik_gpu_init - setup the 3D engine
2904 * @rdev: radeon_device pointer
2906 * Configures the 3D engine and tiling configuration
2907 * registers so that the 3D engine is usable.
2909 static void cik_gpu_init(struct radeon_device *rdev)
2911 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2912 u32 mc_shared_chmap, mc_arb_ramcfg;
2913 u32 hdp_host_path_cntl;
2917 switch (rdev->family) {
2919 rdev->config.cik.max_shader_engines = 2;
2920 rdev->config.cik.max_tile_pipes = 4;
2921 rdev->config.cik.max_cu_per_sh = 7;
2922 rdev->config.cik.max_sh_per_se = 1;
2923 rdev->config.cik.max_backends_per_se = 2;
2924 rdev->config.cik.max_texture_channel_caches = 4;
2925 rdev->config.cik.max_gprs = 256;
2926 rdev->config.cik.max_gs_threads = 32;
2927 rdev->config.cik.max_hw_contexts = 8;
2929 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2930 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2931 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2932 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2933 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2936 rdev->config.cik.max_shader_engines = 4;
2937 rdev->config.cik.max_tile_pipes = 16;
2938 rdev->config.cik.max_cu_per_sh = 11;
2939 rdev->config.cik.max_sh_per_se = 1;
2940 rdev->config.cik.max_backends_per_se = 4;
2941 rdev->config.cik.max_texture_channel_caches = 16;
2942 rdev->config.cik.max_gprs = 256;
2943 rdev->config.cik.max_gs_threads = 32;
2944 rdev->config.cik.max_hw_contexts = 8;
2946 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2947 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2948 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2949 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2950 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2953 rdev->config.cik.max_shader_engines = 1;
2954 rdev->config.cik.max_tile_pipes = 4;
2955 if ((rdev->pdev->device == 0x1304) ||
2956 (rdev->pdev->device == 0x1305) ||
2957 (rdev->pdev->device == 0x130C) ||
2958 (rdev->pdev->device == 0x130F) ||
2959 (rdev->pdev->device == 0x1310) ||
2960 (rdev->pdev->device == 0x1311) ||
2961 (rdev->pdev->device == 0x131C)) {
2962 rdev->config.cik.max_cu_per_sh = 8;
2963 rdev->config.cik.max_backends_per_se = 2;
2964 } else if ((rdev->pdev->device == 0x1309) ||
2965 (rdev->pdev->device == 0x130A) ||
2966 (rdev->pdev->device == 0x130D) ||
2967 (rdev->pdev->device == 0x1313) ||
2968 (rdev->pdev->device == 0x131D)) {
2969 rdev->config.cik.max_cu_per_sh = 6;
2970 rdev->config.cik.max_backends_per_se = 2;
2971 } else if ((rdev->pdev->device == 0x1306) ||
2972 (rdev->pdev->device == 0x1307) ||
2973 (rdev->pdev->device == 0x130B) ||
2974 (rdev->pdev->device == 0x130E) ||
2975 (rdev->pdev->device == 0x1315) ||
2976 (rdev->pdev->device == 0x131B)) {
2977 rdev->config.cik.max_cu_per_sh = 4;
2978 rdev->config.cik.max_backends_per_se = 1;
2980 rdev->config.cik.max_cu_per_sh = 3;
2981 rdev->config.cik.max_backends_per_se = 1;
2983 rdev->config.cik.max_sh_per_se = 1;
2984 rdev->config.cik.max_texture_channel_caches = 4;
2985 rdev->config.cik.max_gprs = 256;
2986 rdev->config.cik.max_gs_threads = 16;
2987 rdev->config.cik.max_hw_contexts = 8;
2989 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2990 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2991 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2992 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2993 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2997 rdev->config.cik.max_shader_engines = 1;
2998 rdev->config.cik.max_tile_pipes = 2;
2999 rdev->config.cik.max_cu_per_sh = 2;
3000 rdev->config.cik.max_sh_per_se = 1;
3001 rdev->config.cik.max_backends_per_se = 1;
3002 rdev->config.cik.max_texture_channel_caches = 2;
3003 rdev->config.cik.max_gprs = 256;
3004 rdev->config.cik.max_gs_threads = 16;
3005 rdev->config.cik.max_hw_contexts = 8;
3007 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3008 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3009 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3010 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3011 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3015 /* Initialize HDP */
3016 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3017 WREG32((0x2c14 + j), 0x00000000);
3018 WREG32((0x2c18 + j), 0x00000000);
3019 WREG32((0x2c1c + j), 0x00000000);
3020 WREG32((0x2c20 + j), 0x00000000);
3021 WREG32((0x2c24 + j), 0x00000000);
3024 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3026 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3028 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3029 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3031 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3032 rdev->config.cik.mem_max_burst_length_bytes = 256;
3033 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3034 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3035 if (rdev->config.cik.mem_row_size_in_kb > 4)
3036 rdev->config.cik.mem_row_size_in_kb = 4;
3037 /* XXX use MC settings? */
3038 rdev->config.cik.shader_engine_tile_size = 32;
3039 rdev->config.cik.num_gpus = 1;
3040 rdev->config.cik.multi_gpu_tile_size = 64;
3042 /* fix up row size */
3043 gb_addr_config &= ~ROW_SIZE_MASK;
3044 switch (rdev->config.cik.mem_row_size_in_kb) {
3047 gb_addr_config |= ROW_SIZE(0);
3050 gb_addr_config |= ROW_SIZE(1);
3053 gb_addr_config |= ROW_SIZE(2);
3057 /* setup tiling info dword. gb_addr_config is not adequate since it does
3058 * not have bank info, so create a custom tiling dword.
3059 * bits 3:0 num_pipes
3060 * bits 7:4 num_banks
3061 * bits 11:8 group_size
3062 * bits 15:12 row_size
3064 rdev->config.cik.tile_config = 0;
3065 switch (rdev->config.cik.num_tile_pipes) {
3067 rdev->config.cik.tile_config |= (0 << 0);
3070 rdev->config.cik.tile_config |= (1 << 0);
3073 rdev->config.cik.tile_config |= (2 << 0);
3077 /* XXX what about 12? */
3078 rdev->config.cik.tile_config |= (3 << 0);
3081 rdev->config.cik.tile_config |=
3082 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3083 rdev->config.cik.tile_config |=
3084 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3085 rdev->config.cik.tile_config |=
3086 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3088 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3089 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3090 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3091 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3092 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3093 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3094 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3095 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3097 cik_tiling_mode_table_init(rdev);
3099 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3100 rdev->config.cik.max_sh_per_se,
3101 rdev->config.cik.max_backends_per_se);
3103 /* set HW defaults for 3D engine */
3104 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3106 WREG32(SX_DEBUG_1, 0x20);
3108 WREG32(TA_CNTL_AUX, 0x00010000);
3110 tmp = RREG32(SPI_CONFIG_CNTL);
3112 WREG32(SPI_CONFIG_CNTL, tmp);
3114 WREG32(SQ_CONFIG, 1);
3116 WREG32(DB_DEBUG, 0);
3118 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3120 WREG32(DB_DEBUG2, tmp);
3122 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3124 WREG32(DB_DEBUG3, tmp);
3126 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3128 WREG32(CB_HW_CONTROL, tmp);
3130 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3132 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3133 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3134 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3135 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3137 WREG32(VGT_NUM_INSTANCES, 1);
3139 WREG32(CP_PERFMON_CNTL, 0);
3141 WREG32(SQ_CONFIG, 0);
3143 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3144 FORCE_EOV_MAX_REZ_CNT(255)));
3146 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3147 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3149 WREG32(VGT_GS_VERTEX_REUSE, 16);
3150 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3152 tmp = RREG32(HDP_MISC_CNTL);
3153 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3154 WREG32(HDP_MISC_CNTL, tmp);
3156 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3157 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3159 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3160 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3166 * GPU scratch registers helpers function.
3169 * cik_scratch_init - setup driver info for CP scratch regs
3171 * @rdev: radeon_device pointer
3173 * Set up the number and offset of the CP scratch registers.
3174 * NOTE: use of CP scratch registers is a legacy inferface and
3175 * is not used by default on newer asics (r6xx+). On newer asics,
3176 * memory buffers are used for fences rather than scratch regs.
3178 static void cik_scratch_init(struct radeon_device *rdev)
3182 rdev->scratch.num_reg = 7;
3183 rdev->scratch.reg_base = SCRATCH_REG0;
3184 for (i = 0; i < rdev->scratch.num_reg; i++) {
3185 rdev->scratch.free[i] = true;
3186 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3191 * cik_ring_test - basic gfx ring test
3193 * @rdev: radeon_device pointer
3194 * @ring: radeon_ring structure holding ring information
3196 * Allocate a scratch register and write to it using the gfx ring (CIK).
3197 * Provides a basic gfx ring test to verify that the ring is working.
3198 * Used by cik_cp_gfx_resume();
3199 * Returns 0 on success, error on failure.
3201 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3208 r = radeon_scratch_get(rdev, &scratch);
3210 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3213 WREG32(scratch, 0xCAFEDEAD);
3214 r = radeon_ring_lock(rdev, ring, 3);
3216 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3217 radeon_scratch_free(rdev, scratch);
3220 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3221 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3222 radeon_ring_write(ring, 0xDEADBEEF);
3223 radeon_ring_unlock_commit(rdev, ring);
3225 for (i = 0; i < rdev->usec_timeout; i++) {
3226 tmp = RREG32(scratch);
3227 if (tmp == 0xDEADBEEF)
3231 if (i < rdev->usec_timeout) {
3232 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3234 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3235 ring->idx, scratch, tmp);
3238 radeon_scratch_free(rdev, scratch);
3243 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3245 * @rdev: radeon_device pointer
3246 * @fence: radeon fence object
3248 * Emits a fence sequnce number on the gfx ring and flushes
3251 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3252 struct radeon_fence *fence)
3254 struct radeon_ring *ring = &rdev->ring[fence->ring];
3255 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3257 /* EVENT_WRITE_EOP - flush caches, send int */
3258 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3259 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3261 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3263 radeon_ring_write(ring, addr & 0xfffffffc);
3264 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3265 radeon_ring_write(ring, fence->seq);
3266 radeon_ring_write(ring, 0);
3268 /* We should be using the new WAIT_REG_MEM special op packet here
3269 * but it causes the CP to hang
3271 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3272 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3273 WRITE_DATA_DST_SEL(0)));
3274 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3275 radeon_ring_write(ring, 0);
3276 radeon_ring_write(ring, 0);
3280 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3282 * @rdev: radeon_device pointer
3283 * @fence: radeon fence object
3285 * Emits a fence sequnce number on the compute ring and flushes
3288 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3289 struct radeon_fence *fence)
3291 struct radeon_ring *ring = &rdev->ring[fence->ring];
3292 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3294 /* RELEASE_MEM - flush caches, send int */
3295 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3296 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3298 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3300 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3301 radeon_ring_write(ring, addr & 0xfffffffc);
3302 radeon_ring_write(ring, upper_32_bits(addr));
3303 radeon_ring_write(ring, fence->seq);
3304 radeon_ring_write(ring, 0);
3306 /* We should be using the new WAIT_REG_MEM special op packet here
3307 * but it causes the CP to hang
3309 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3310 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3311 WRITE_DATA_DST_SEL(0)));
3312 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3313 radeon_ring_write(ring, 0);
3314 radeon_ring_write(ring, 0);
3317 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3318 struct radeon_ring *ring,
3319 struct radeon_semaphore *semaphore,
3322 uint64_t addr = semaphore->gpu_addr;
3323 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3325 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3326 radeon_ring_write(ring, addr & 0xffffffff);
3327 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3331 * cik_copy_cpdma - copy pages using the CP DMA engine
3333 * @rdev: radeon_device pointer
3334 * @src_offset: src GPU address
3335 * @dst_offset: dst GPU address
3336 * @num_gpu_pages: number of GPU pages to xfer
3337 * @fence: radeon fence object
3339 * Copy GPU paging using the CP DMA engine (CIK+).
3340 * Used by the radeon ttm implementation to move pages if
3341 * registered as the asic copy callback.
3343 int cik_copy_cpdma(struct radeon_device *rdev,
3344 uint64_t src_offset, uint64_t dst_offset,
3345 unsigned num_gpu_pages,
3346 struct radeon_fence **fence)
3348 struct radeon_semaphore *sem = NULL;
3349 int ring_index = rdev->asic->copy.blit_ring_index;
3350 struct radeon_ring *ring = &rdev->ring[ring_index];
3351 u32 size_in_bytes, cur_size_in_bytes, control;
3355 r = radeon_semaphore_create(rdev, &sem);
3357 DRM_ERROR("radeon: moving bo (%d).\n", r);
3361 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3362 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3363 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3365 DRM_ERROR("radeon: moving bo (%d).\n", r);
3366 radeon_semaphore_free(rdev, &sem, NULL);
3370 if (radeon_fence_need_sync(*fence, ring->idx)) {
3371 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3373 radeon_fence_note_sync(*fence, ring->idx);
3375 radeon_semaphore_free(rdev, &sem, NULL);
3378 for (i = 0; i < num_loops; i++) {
3379 cur_size_in_bytes = size_in_bytes;
3380 if (cur_size_in_bytes > 0x1fffff)
3381 cur_size_in_bytes = 0x1fffff;
3382 size_in_bytes -= cur_size_in_bytes;
3384 if (size_in_bytes == 0)
3385 control |= PACKET3_DMA_DATA_CP_SYNC;
3386 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3387 radeon_ring_write(ring, control);
3388 radeon_ring_write(ring, lower_32_bits(src_offset));
3389 radeon_ring_write(ring, upper_32_bits(src_offset));
3390 radeon_ring_write(ring, lower_32_bits(dst_offset));
3391 radeon_ring_write(ring, upper_32_bits(dst_offset));
3392 radeon_ring_write(ring, cur_size_in_bytes);
3393 src_offset += cur_size_in_bytes;
3394 dst_offset += cur_size_in_bytes;
3397 r = radeon_fence_emit(rdev, fence, ring->idx);
3399 radeon_ring_unlock_undo(rdev, ring);
3403 radeon_ring_unlock_commit(rdev, ring);
3404 radeon_semaphore_free(rdev, &sem, *fence);
3413 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3415 * @rdev: radeon_device pointer
3416 * @ib: radeon indirect buffer object
3418 * Emits an DE (drawing engine) or CE (constant engine) IB
3419 * on the gfx ring. IBs are usually generated by userspace
3420 * acceleration drivers and submitted to the kernel for
3421 * sheduling on the ring. This function schedules the IB
3422 * on the gfx ring for execution by the GPU.
3424 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3426 struct radeon_ring *ring = &rdev->ring[ib->ring];
3427 u32 header, control = INDIRECT_BUFFER_VALID;
3429 if (ib->is_const_ib) {
3430 /* set switch buffer packet before const IB */
3431 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3432 radeon_ring_write(ring, 0);
3434 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3437 if (ring->rptr_save_reg) {
3438 next_rptr = ring->wptr + 3 + 4;
3439 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3440 radeon_ring_write(ring, ((ring->rptr_save_reg -
3441 PACKET3_SET_UCONFIG_REG_START) >> 2));
3442 radeon_ring_write(ring, next_rptr);
3443 } else if (rdev->wb.enabled) {
3444 next_rptr = ring->wptr + 5 + 4;
3445 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3446 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3447 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3448 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3449 radeon_ring_write(ring, next_rptr);
3452 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3455 control |= ib->length_dw |
3456 (ib->vm ? (ib->vm->id << 24) : 0);
3458 radeon_ring_write(ring, header);
3459 radeon_ring_write(ring,
3463 (ib->gpu_addr & 0xFFFFFFFC));
3464 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3465 radeon_ring_write(ring, control);
3469 * cik_ib_test - basic gfx ring IB test
3471 * @rdev: radeon_device pointer
3472 * @ring: radeon_ring structure holding ring information
3474 * Allocate an IB and execute it on the gfx ring (CIK).
3475 * Provides a basic gfx ring test to verify that IBs are working.
3476 * Returns 0 on success, error on failure.
3478 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3480 struct radeon_ib ib;
3486 r = radeon_scratch_get(rdev, &scratch);
3488 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3491 WREG32(scratch, 0xCAFEDEAD);
3492 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3494 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3495 radeon_scratch_free(rdev, scratch);
3498 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3499 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3500 ib.ptr[2] = 0xDEADBEEF;
3502 r = radeon_ib_schedule(rdev, &ib, NULL);
3504 radeon_scratch_free(rdev, scratch);
3505 radeon_ib_free(rdev, &ib);
3506 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3509 r = radeon_fence_wait(ib.fence, false);
3511 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3512 radeon_scratch_free(rdev, scratch);
3513 radeon_ib_free(rdev, &ib);
3516 for (i = 0; i < rdev->usec_timeout; i++) {
3517 tmp = RREG32(scratch);
3518 if (tmp == 0xDEADBEEF)
3522 if (i < rdev->usec_timeout) {
3523 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3525 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3529 radeon_scratch_free(rdev, scratch);
3530 radeon_ib_free(rdev, &ib);
3536 * On CIK, gfx and compute now have independant command processors.
3539 * Gfx consists of a single ring and can process both gfx jobs and
3540 * compute jobs. The gfx CP consists of three microengines (ME):
3541 * PFP - Pre-Fetch Parser
3543 * CE - Constant Engine
3544 * The PFP and ME make up what is considered the Drawing Engine (DE).
3545 * The CE is an asynchronous engine used for updating buffer desciptors
3546 * used by the DE so that they can be loaded into cache in parallel
3547 * while the DE is processing state update packets.
3550 * The compute CP consists of two microengines (ME):
3551 * MEC1 - Compute MicroEngine 1
3552 * MEC2 - Compute MicroEngine 2
3553 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3554 * The queues are exposed to userspace and are programmed directly
3555 * by the compute runtime.
3558 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3560 * @rdev: radeon_device pointer
3561 * @enable: enable or disable the MEs
3563 * Halts or unhalts the gfx MEs.
3565 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3568 WREG32(CP_ME_CNTL, 0);
3570 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3577 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3579 * @rdev: radeon_device pointer
3581 * Loads the gfx PFP, ME, and CE ucode.
3582 * Returns 0 for success, -EINVAL if the ucode is not available.
3584 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3586 const __be32 *fw_data;
3589 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3592 cik_cp_gfx_enable(rdev, false);
3595 fw_data = (const __be32 *)rdev->pfp_fw->data;
3596 WREG32(CP_PFP_UCODE_ADDR, 0);
3597 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3598 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3599 WREG32(CP_PFP_UCODE_ADDR, 0);
3602 fw_data = (const __be32 *)rdev->ce_fw->data;
3603 WREG32(CP_CE_UCODE_ADDR, 0);
3604 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3605 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3606 WREG32(CP_CE_UCODE_ADDR, 0);
3609 fw_data = (const __be32 *)rdev->me_fw->data;
3610 WREG32(CP_ME_RAM_WADDR, 0);
3611 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3612 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3613 WREG32(CP_ME_RAM_WADDR, 0);
3615 WREG32(CP_PFP_UCODE_ADDR, 0);
3616 WREG32(CP_CE_UCODE_ADDR, 0);
3617 WREG32(CP_ME_RAM_WADDR, 0);
3618 WREG32(CP_ME_RAM_RADDR, 0);
3623 * cik_cp_gfx_start - start the gfx ring
3625 * @rdev: radeon_device pointer
3627 * Enables the ring and loads the clear state context and other
3628 * packets required to init the ring.
3629 * Returns 0 for success, error for failure.
3631 static int cik_cp_gfx_start(struct radeon_device *rdev)
3633 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3637 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3638 WREG32(CP_ENDIAN_SWAP, 0);
3639 WREG32(CP_DEVICE_ID, 1);
3641 cik_cp_gfx_enable(rdev, true);
3643 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3645 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3649 /* init the CE partitions. CE only used for gfx on CIK */
3650 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3651 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3652 radeon_ring_write(ring, 0xc000);
3653 radeon_ring_write(ring, 0xc000);
3655 /* setup clear context state */
3656 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3657 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3659 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3660 radeon_ring_write(ring, 0x80000000);
3661 radeon_ring_write(ring, 0x80000000);
3663 for (i = 0; i < cik_default_size; i++)
3664 radeon_ring_write(ring, cik_default_state[i]);
3666 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3667 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3669 /* set clear context state */
3670 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3671 radeon_ring_write(ring, 0);
3673 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3674 radeon_ring_write(ring, 0x00000316);
3675 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3676 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3678 radeon_ring_unlock_commit(rdev, ring);
3684 * cik_cp_gfx_fini - stop the gfx ring
3686 * @rdev: radeon_device pointer
3688 * Stop the gfx ring and tear down the driver ring
3691 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3693 cik_cp_gfx_enable(rdev, false);
3694 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3698 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3700 * @rdev: radeon_device pointer
3702 * Program the location and size of the gfx ring buffer
3703 * and test it to make sure it's working.
3704 * Returns 0 for success, error for failure.
3706 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3708 struct radeon_ring *ring;
3714 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3715 if (rdev->family != CHIP_HAWAII)
3716 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3718 /* Set the write pointer delay */
3719 WREG32(CP_RB_WPTR_DELAY, 0);
3721 /* set the RB to use vmid 0 */
3722 WREG32(CP_RB_VMID, 0);
3724 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3726 /* ring 0 - compute and gfx */
3727 /* Set ring buffer size */
3728 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3729 rb_bufsz = order_base_2(ring->ring_size / 8);
3730 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3732 tmp |= BUF_SWAP_32BIT;
3734 WREG32(CP_RB0_CNTL, tmp);
3736 /* Initialize the ring buffer's read and write pointers */
3737 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3739 WREG32(CP_RB0_WPTR, ring->wptr);
3741 /* set the wb address wether it's enabled or not */
3742 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3743 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3745 /* scratch register shadowing is no longer supported */
3746 WREG32(SCRATCH_UMSK, 0);
3748 if (!rdev->wb.enabled)
3749 tmp |= RB_NO_UPDATE;
3752 WREG32(CP_RB0_CNTL, tmp);
3754 rb_addr = ring->gpu_addr >> 8;
3755 WREG32(CP_RB0_BASE, rb_addr);
3756 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3758 ring->rptr = RREG32(CP_RB0_RPTR);
3760 /* start the ring */
3761 cik_cp_gfx_start(rdev);
3762 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3763 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3765 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3771 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3772 struct radeon_ring *ring)
3778 if (rdev->wb.enabled) {
3779 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3781 mutex_lock(&rdev->srbm_mutex);
3782 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3783 rptr = RREG32(CP_HQD_PQ_RPTR);
3784 cik_srbm_select(rdev, 0, 0, 0, 0);
3785 mutex_unlock(&rdev->srbm_mutex);
3791 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3792 struct radeon_ring *ring)
3796 if (rdev->wb.enabled) {
3797 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3799 mutex_lock(&rdev->srbm_mutex);
3800 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3801 wptr = RREG32(CP_HQD_PQ_WPTR);
3802 cik_srbm_select(rdev, 0, 0, 0, 0);
3803 mutex_unlock(&rdev->srbm_mutex);
3809 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3810 struct radeon_ring *ring)
3812 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3813 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3817 * cik_cp_compute_enable - enable/disable the compute CP MEs
3819 * @rdev: radeon_device pointer
3820 * @enable: enable or disable the MEs
3822 * Halts or unhalts the compute MEs.
3824 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3827 WREG32(CP_MEC_CNTL, 0);
3829 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3834 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3836 * @rdev: radeon_device pointer
3838 * Loads the compute MEC1&2 ucode.
3839 * Returns 0 for success, -EINVAL if the ucode is not available.
3841 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3843 const __be32 *fw_data;
3849 cik_cp_compute_enable(rdev, false);
3852 fw_data = (const __be32 *)rdev->mec_fw->data;
3853 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3854 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3855 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3856 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3858 if (rdev->family == CHIP_KAVERI) {
3860 fw_data = (const __be32 *)rdev->mec_fw->data;
3861 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3862 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3863 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3864 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3871 * cik_cp_compute_start - start the compute queues
3873 * @rdev: radeon_device pointer
3875 * Enable the compute queues.
3876 * Returns 0 for success, error for failure.
3878 static int cik_cp_compute_start(struct radeon_device *rdev)
3880 cik_cp_compute_enable(rdev, true);
3886 * cik_cp_compute_fini - stop the compute queues
3888 * @rdev: radeon_device pointer
3890 * Stop the compute queues and tear down the driver queue
3893 static void cik_cp_compute_fini(struct radeon_device *rdev)
3897 cik_cp_compute_enable(rdev, false);
3899 for (i = 0; i < 2; i++) {
3901 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3903 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3905 if (rdev->ring[idx].mqd_obj) {
3906 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3907 if (unlikely(r != 0))
3908 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3910 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3911 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3913 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3914 rdev->ring[idx].mqd_obj = NULL;
3919 static void cik_mec_fini(struct radeon_device *rdev)
3923 if (rdev->mec.hpd_eop_obj) {
3924 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3925 if (unlikely(r != 0))
3926 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3927 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3928 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3930 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3931 rdev->mec.hpd_eop_obj = NULL;
3935 #define MEC_HPD_SIZE 2048
3937 static int cik_mec_init(struct radeon_device *rdev)
3943 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3944 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3946 if (rdev->family == CHIP_KAVERI)
3947 rdev->mec.num_mec = 2;
3949 rdev->mec.num_mec = 1;
3950 rdev->mec.num_pipe = 4;
3951 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3953 if (rdev->mec.hpd_eop_obj == NULL) {
3954 r = radeon_bo_create(rdev,
3955 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3957 RADEON_GEM_DOMAIN_GTT, NULL,
3958 &rdev->mec.hpd_eop_obj);
3960 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3965 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3966 if (unlikely(r != 0)) {
3970 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3971 &rdev->mec.hpd_eop_gpu_addr);
3973 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3977 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3979 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3984 /* clear memory. Not sure if this is required or not */
3985 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3987 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3988 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3993 struct hqd_registers
3995 u32 cp_mqd_base_addr;
3996 u32 cp_mqd_base_addr_hi;
3999 u32 cp_hqd_persistent_state;
4000 u32 cp_hqd_pipe_priority;
4001 u32 cp_hqd_queue_priority;
4004 u32 cp_hqd_pq_base_hi;
4006 u32 cp_hqd_pq_rptr_report_addr;
4007 u32 cp_hqd_pq_rptr_report_addr_hi;
4008 u32 cp_hqd_pq_wptr_poll_addr;
4009 u32 cp_hqd_pq_wptr_poll_addr_hi;
4010 u32 cp_hqd_pq_doorbell_control;
4012 u32 cp_hqd_pq_control;
4013 u32 cp_hqd_ib_base_addr;
4014 u32 cp_hqd_ib_base_addr_hi;
4016 u32 cp_hqd_ib_control;
4017 u32 cp_hqd_iq_timer;
4019 u32 cp_hqd_dequeue_request;
4020 u32 cp_hqd_dma_offload;
4021 u32 cp_hqd_sema_cmd;
4022 u32 cp_hqd_msg_type;
4023 u32 cp_hqd_atomic0_preop_lo;
4024 u32 cp_hqd_atomic0_preop_hi;
4025 u32 cp_hqd_atomic1_preop_lo;
4026 u32 cp_hqd_atomic1_preop_hi;
4027 u32 cp_hqd_hq_scheduler0;
4028 u32 cp_hqd_hq_scheduler1;
4035 u32 dispatch_initiator;
4039 u32 pipeline_stat_enable;
4040 u32 perf_counter_enable;
4046 u32 resource_limits;
4047 u32 static_thread_mgmt01[2];
4049 u32 static_thread_mgmt23[2];
4051 u32 thread_trace_enable;
4054 u32 vgtcs_invoke_count[2];
4055 struct hqd_registers queue_state;
4057 u32 interrupt_queue[64];
4061 * cik_cp_compute_resume - setup the compute queue registers
4063 * @rdev: radeon_device pointer
4065 * Program the compute queues and test them to make sure they
4067 * Returns 0 for success, error for failure.
4069 static int cik_cp_compute_resume(struct radeon_device *rdev)
4073 bool use_doorbell = true;
4079 struct bonaire_mqd *mqd;
4081 r = cik_cp_compute_start(rdev);
4085 /* fix up chicken bits */
4086 tmp = RREG32(CP_CPF_DEBUG);
4088 WREG32(CP_CPF_DEBUG, tmp);
4090 /* init the pipes */
4091 mutex_lock(&rdev->srbm_mutex);
4092 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4093 int me = (i < 4) ? 1 : 2;
4094 int pipe = (i < 4) ? i : (i - 4);
4096 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4098 cik_srbm_select(rdev, me, pipe, 0, 0);
4100 /* write the EOP addr */
4101 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4102 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4104 /* set the VMID assigned */
4105 WREG32(CP_HPD_EOP_VMID, 0);
4107 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4108 tmp = RREG32(CP_HPD_EOP_CONTROL);
4109 tmp &= ~EOP_SIZE_MASK;
4110 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4111 WREG32(CP_HPD_EOP_CONTROL, tmp);
4113 cik_srbm_select(rdev, 0, 0, 0, 0);
4114 mutex_unlock(&rdev->srbm_mutex);
4116 /* init the queues. Just two for now. */
4117 for (i = 0; i < 2; i++) {
4119 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4121 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4123 if (rdev->ring[idx].mqd_obj == NULL) {
4124 r = radeon_bo_create(rdev,
4125 sizeof(struct bonaire_mqd),
4127 RADEON_GEM_DOMAIN_GTT, NULL,
4128 &rdev->ring[idx].mqd_obj);
4130 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4135 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4136 if (unlikely(r != 0)) {
4137 cik_cp_compute_fini(rdev);
4140 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4143 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4144 cik_cp_compute_fini(rdev);
4147 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4149 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4150 cik_cp_compute_fini(rdev);
4154 /* doorbell offset */
4155 rdev->ring[idx].doorbell_offset =
4156 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
4158 /* init the mqd struct */
4159 memset(buf, 0, sizeof(struct bonaire_mqd));
4161 mqd = (struct bonaire_mqd *)buf;
4162 mqd->header = 0xC0310800;
4163 mqd->static_thread_mgmt01[0] = 0xffffffff;
4164 mqd->static_thread_mgmt01[1] = 0xffffffff;
4165 mqd->static_thread_mgmt23[0] = 0xffffffff;
4166 mqd->static_thread_mgmt23[1] = 0xffffffff;
4168 mutex_lock(&rdev->srbm_mutex);
4169 cik_srbm_select(rdev, rdev->ring[idx].me,
4170 rdev->ring[idx].pipe,
4171 rdev->ring[idx].queue, 0);
4173 /* disable wptr polling */
4174 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4175 tmp &= ~WPTR_POLL_EN;
4176 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4178 /* enable doorbell? */
4179 mqd->queue_state.cp_hqd_pq_doorbell_control =
4180 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4182 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4184 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4185 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4186 mqd->queue_state.cp_hqd_pq_doorbell_control);
4188 /* disable the queue if it's active */
4189 mqd->queue_state.cp_hqd_dequeue_request = 0;
4190 mqd->queue_state.cp_hqd_pq_rptr = 0;
4191 mqd->queue_state.cp_hqd_pq_wptr= 0;
4192 if (RREG32(CP_HQD_ACTIVE) & 1) {
4193 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4194 for (i = 0; i < rdev->usec_timeout; i++) {
4195 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4200 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4201 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4204 /* set the pointer to the MQD */
4205 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4206 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4207 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4208 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4209 /* set MQD vmid to 0 */
4210 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4211 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4212 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4214 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4215 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4216 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4217 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4218 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4219 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4221 /* set up the HQD, this is similar to CP_RB0_CNTL */
4222 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4223 mqd->queue_state.cp_hqd_pq_control &=
4224 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4226 mqd->queue_state.cp_hqd_pq_control |=
4227 order_base_2(rdev->ring[idx].ring_size / 8);
4228 mqd->queue_state.cp_hqd_pq_control |=
4229 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4231 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4233 mqd->queue_state.cp_hqd_pq_control &=
4234 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4235 mqd->queue_state.cp_hqd_pq_control |=
4236 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4237 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4239 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4241 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4243 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4244 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4245 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4246 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4247 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4248 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4250 /* set the wb address wether it's enabled or not */
4252 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4254 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4255 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4256 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4257 upper_32_bits(wb_gpu_addr) & 0xffff;
4258 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4259 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4260 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4261 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4263 /* enable the doorbell if requested */
4265 mqd->queue_state.cp_hqd_pq_doorbell_control =
4266 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4267 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4268 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4269 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
4270 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4271 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4272 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4275 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4277 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4278 mqd->queue_state.cp_hqd_pq_doorbell_control);
4280 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4281 rdev->ring[idx].wptr = 0;
4282 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4283 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4284 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4285 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4287 /* set the vmid for the queue */
4288 mqd->queue_state.cp_hqd_vmid = 0;
4289 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4291 /* activate the queue */
4292 mqd->queue_state.cp_hqd_active = 1;
4293 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4295 cik_srbm_select(rdev, 0, 0, 0, 0);
4296 mutex_unlock(&rdev->srbm_mutex);
4298 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4299 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4301 rdev->ring[idx].ready = true;
4302 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4304 rdev->ring[idx].ready = false;
4310 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4312 cik_cp_gfx_enable(rdev, enable);
4313 cik_cp_compute_enable(rdev, enable);
4316 static int cik_cp_load_microcode(struct radeon_device *rdev)
4320 r = cik_cp_gfx_load_microcode(rdev);
4323 r = cik_cp_compute_load_microcode(rdev);
4330 static void cik_cp_fini(struct radeon_device *rdev)
4332 cik_cp_gfx_fini(rdev);
4333 cik_cp_compute_fini(rdev);
4336 static int cik_cp_resume(struct radeon_device *rdev)
4340 cik_enable_gui_idle_interrupt(rdev, false);
4342 r = cik_cp_load_microcode(rdev);
4346 r = cik_cp_gfx_resume(rdev);
4349 r = cik_cp_compute_resume(rdev);
4353 cik_enable_gui_idle_interrupt(rdev, true);
4358 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4360 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4361 RREG32(GRBM_STATUS));
4362 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4363 RREG32(GRBM_STATUS2));
4364 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4365 RREG32(GRBM_STATUS_SE0));
4366 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4367 RREG32(GRBM_STATUS_SE1));
4368 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4369 RREG32(GRBM_STATUS_SE2));
4370 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4371 RREG32(GRBM_STATUS_SE3));
4372 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4373 RREG32(SRBM_STATUS));
4374 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4375 RREG32(SRBM_STATUS2));
4376 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4377 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4378 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4379 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4380 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4381 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4382 RREG32(CP_STALLED_STAT1));
4383 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4384 RREG32(CP_STALLED_STAT2));
4385 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4386 RREG32(CP_STALLED_STAT3));
4387 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4388 RREG32(CP_CPF_BUSY_STAT));
4389 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4390 RREG32(CP_CPF_STALLED_STAT1));
4391 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4392 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4393 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4394 RREG32(CP_CPC_STALLED_STAT1));
4395 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4399 * cik_gpu_check_soft_reset - check which blocks are busy
4401 * @rdev: radeon_device pointer
4403 * Check which blocks are busy and return the relevant reset
4404 * mask to be used by cik_gpu_soft_reset().
4405 * Returns a mask of the blocks to be reset.
4407 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4413 tmp = RREG32(GRBM_STATUS);
4414 if (tmp & (PA_BUSY | SC_BUSY |
4415 BCI_BUSY | SX_BUSY |
4416 TA_BUSY | VGT_BUSY |
4418 GDS_BUSY | SPI_BUSY |
4419 IA_BUSY | IA_BUSY_NO_DMA))
4420 reset_mask |= RADEON_RESET_GFX;
4422 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4423 reset_mask |= RADEON_RESET_CP;
4426 tmp = RREG32(GRBM_STATUS2);
4428 reset_mask |= RADEON_RESET_RLC;
4430 /* SDMA0_STATUS_REG */
4431 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4432 if (!(tmp & SDMA_IDLE))
4433 reset_mask |= RADEON_RESET_DMA;
4435 /* SDMA1_STATUS_REG */
4436 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4437 if (!(tmp & SDMA_IDLE))
4438 reset_mask |= RADEON_RESET_DMA1;
4441 tmp = RREG32(SRBM_STATUS2);
4442 if (tmp & SDMA_BUSY)
4443 reset_mask |= RADEON_RESET_DMA;
4445 if (tmp & SDMA1_BUSY)
4446 reset_mask |= RADEON_RESET_DMA1;
4449 tmp = RREG32(SRBM_STATUS);
4452 reset_mask |= RADEON_RESET_IH;
4455 reset_mask |= RADEON_RESET_SEM;
4457 if (tmp & GRBM_RQ_PENDING)
4458 reset_mask |= RADEON_RESET_GRBM;
4461 reset_mask |= RADEON_RESET_VMC;
4463 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4464 MCC_BUSY | MCD_BUSY))
4465 reset_mask |= RADEON_RESET_MC;
4467 if (evergreen_is_display_hung(rdev))
4468 reset_mask |= RADEON_RESET_DISPLAY;
4470 /* Skip MC reset as it's mostly likely not hung, just busy */
4471 if (reset_mask & RADEON_RESET_MC) {
4472 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4473 reset_mask &= ~RADEON_RESET_MC;
4480 * cik_gpu_soft_reset - soft reset GPU
4482 * @rdev: radeon_device pointer
4483 * @reset_mask: mask of which blocks to reset
4485 * Soft reset the blocks specified in @reset_mask.
4487 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4489 struct evergreen_mc_save save;
4490 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4493 if (reset_mask == 0)
4496 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4498 cik_print_gpu_status_regs(rdev);
4499 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4500 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4501 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4502 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4511 /* Disable GFX parsing/prefetching */
4512 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4514 /* Disable MEC parsing/prefetching */
4515 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4517 if (reset_mask & RADEON_RESET_DMA) {
4519 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4521 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4523 if (reset_mask & RADEON_RESET_DMA1) {
4525 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4527 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4530 evergreen_mc_stop(rdev, &save);
4531 if (evergreen_mc_wait_for_idle(rdev)) {
4532 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4535 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4536 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4538 if (reset_mask & RADEON_RESET_CP) {
4539 grbm_soft_reset |= SOFT_RESET_CP;
4541 srbm_soft_reset |= SOFT_RESET_GRBM;
4544 if (reset_mask & RADEON_RESET_DMA)
4545 srbm_soft_reset |= SOFT_RESET_SDMA;
4547 if (reset_mask & RADEON_RESET_DMA1)
4548 srbm_soft_reset |= SOFT_RESET_SDMA1;
4550 if (reset_mask & RADEON_RESET_DISPLAY)
4551 srbm_soft_reset |= SOFT_RESET_DC;
4553 if (reset_mask & RADEON_RESET_RLC)
4554 grbm_soft_reset |= SOFT_RESET_RLC;
4556 if (reset_mask & RADEON_RESET_SEM)
4557 srbm_soft_reset |= SOFT_RESET_SEM;
4559 if (reset_mask & RADEON_RESET_IH)
4560 srbm_soft_reset |= SOFT_RESET_IH;
4562 if (reset_mask & RADEON_RESET_GRBM)
4563 srbm_soft_reset |= SOFT_RESET_GRBM;
4565 if (reset_mask & RADEON_RESET_VMC)
4566 srbm_soft_reset |= SOFT_RESET_VMC;
4568 if (!(rdev->flags & RADEON_IS_IGP)) {
4569 if (reset_mask & RADEON_RESET_MC)
4570 srbm_soft_reset |= SOFT_RESET_MC;
4573 if (grbm_soft_reset) {
4574 tmp = RREG32(GRBM_SOFT_RESET);
4575 tmp |= grbm_soft_reset;
4576 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4577 WREG32(GRBM_SOFT_RESET, tmp);
4578 tmp = RREG32(GRBM_SOFT_RESET);
4582 tmp &= ~grbm_soft_reset;
4583 WREG32(GRBM_SOFT_RESET, tmp);
4584 tmp = RREG32(GRBM_SOFT_RESET);
4587 if (srbm_soft_reset) {
4588 tmp = RREG32(SRBM_SOFT_RESET);
4589 tmp |= srbm_soft_reset;
4590 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4591 WREG32(SRBM_SOFT_RESET, tmp);
4592 tmp = RREG32(SRBM_SOFT_RESET);
4596 tmp &= ~srbm_soft_reset;
4597 WREG32(SRBM_SOFT_RESET, tmp);
4598 tmp = RREG32(SRBM_SOFT_RESET);
4601 /* Wait a little for things to settle down */
4604 evergreen_mc_resume(rdev, &save);
4607 cik_print_gpu_status_regs(rdev);
4611 * cik_asic_reset - soft reset GPU
4613 * @rdev: radeon_device pointer
4615 * Look up which blocks are hung and attempt
4617 * Returns 0 for success.
4619 int cik_asic_reset(struct radeon_device *rdev)
4623 reset_mask = cik_gpu_check_soft_reset(rdev);
4626 r600_set_bios_scratch_engine_hung(rdev, true);
4628 cik_gpu_soft_reset(rdev, reset_mask);
4630 reset_mask = cik_gpu_check_soft_reset(rdev);
4633 r600_set_bios_scratch_engine_hung(rdev, false);
4639 * cik_gfx_is_lockup - check if the 3D engine is locked up
4641 * @rdev: radeon_device pointer
4642 * @ring: radeon_ring structure holding ring information
4644 * Check if the 3D engine is locked up (CIK).
4645 * Returns true if the engine is locked, false if not.
4647 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4649 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4651 if (!(reset_mask & (RADEON_RESET_GFX |
4652 RADEON_RESET_COMPUTE |
4653 RADEON_RESET_CP))) {
4654 radeon_ring_lockup_update(ring);
4657 /* force CP activities */
4658 radeon_ring_force_activity(rdev, ring);
4659 return radeon_ring_test_lockup(rdev, ring);
4664 * cik_mc_program - program the GPU memory controller
4666 * @rdev: radeon_device pointer
4668 * Set the location of vram, gart, and AGP in the GPU's
4669 * physical address space (CIK).
4671 static void cik_mc_program(struct radeon_device *rdev)
4673 struct evergreen_mc_save save;
4677 /* Initialize HDP */
4678 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4679 WREG32((0x2c14 + j), 0x00000000);
4680 WREG32((0x2c18 + j), 0x00000000);
4681 WREG32((0x2c1c + j), 0x00000000);
4682 WREG32((0x2c20 + j), 0x00000000);
4683 WREG32((0x2c24 + j), 0x00000000);
4685 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4687 evergreen_mc_stop(rdev, &save);
4688 if (radeon_mc_wait_for_idle(rdev)) {
4689 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4691 /* Lockout access through VGA aperture*/
4692 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4693 /* Update configuration */
4694 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4695 rdev->mc.vram_start >> 12);
4696 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4697 rdev->mc.vram_end >> 12);
4698 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4699 rdev->vram_scratch.gpu_addr >> 12);
4700 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4701 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4702 WREG32(MC_VM_FB_LOCATION, tmp);
4703 /* XXX double check these! */
4704 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4705 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4706 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4707 WREG32(MC_VM_AGP_BASE, 0);
4708 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4709 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4710 if (radeon_mc_wait_for_idle(rdev)) {
4711 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4713 evergreen_mc_resume(rdev, &save);
4714 /* we need to own VRAM, so turn off the VGA renderer here
4715 * to stop it overwriting our objects */
4716 rv515_vga_render_disable(rdev);
4720 * cik_mc_init - initialize the memory controller driver params
4722 * @rdev: radeon_device pointer
4724 * Look up the amount of vram, vram width, and decide how to place
4725 * vram and gart within the GPU's physical address space (CIK).
4726 * Returns 0 for success.
4728 static int cik_mc_init(struct radeon_device *rdev)
4731 int chansize, numchan;
4733 /* Get VRAM informations */
4734 rdev->mc.vram_is_ddr = true;
4735 tmp = RREG32(MC_ARB_RAMCFG);
4736 if (tmp & CHANSIZE_MASK) {
4741 tmp = RREG32(MC_SHARED_CHMAP);
4742 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4772 rdev->mc.vram_width = numchan * chansize;
4773 /* Could aper size report 0 ? */
4774 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4775 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4776 /* size in MB on si */
4777 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4778 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4779 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4780 si_vram_gtt_location(rdev, &rdev->mc);
4781 radeon_update_bandwidth_info(rdev);
4788 * VMID 0 is the physical GPU addresses as used by the kernel.
4789 * VMIDs 1-15 are used for userspace clients and are handled
4790 * by the radeon vm/hsa code.
4793 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4795 * @rdev: radeon_device pointer
4797 * Flush the TLB for the VMID 0 page table (CIK).
4799 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4801 /* flush hdp cache */
4802 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4804 /* bits 0-15 are the VM contexts0-15 */
4805 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4809 * cik_pcie_gart_enable - gart enable
4811 * @rdev: radeon_device pointer
4813 * This sets up the TLBs, programs the page tables for VMID0,
4814 * sets up the hw for VMIDs 1-15 which are allocated on
4815 * demand, and sets up the global locations for the LDS, GDS,
4816 * and GPUVM for FSA64 clients (CIK).
4817 * Returns 0 for success, errors for failure.
4819 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4823 if (rdev->gart.robj == NULL) {
4824 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4827 r = radeon_gart_table_vram_pin(rdev);
4830 radeon_gart_restore(rdev);
4831 /* Setup TLB control */
4832 WREG32(MC_VM_MX_L1_TLB_CNTL,
4835 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4836 ENABLE_ADVANCED_DRIVER_MODEL |
4837 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4838 /* Setup L2 cache */
4839 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4840 ENABLE_L2_FRAGMENT_PROCESSING |
4841 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4842 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4843 EFFECTIVE_L2_QUEUE_SIZE(7) |
4844 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4845 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4846 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4847 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4848 /* setup context0 */
4849 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4850 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4851 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4852 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4853 (u32)(rdev->dummy_page.addr >> 12));
4854 WREG32(VM_CONTEXT0_CNTL2, 0);
4855 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4856 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4862 /* empty context1-15 */
4863 /* FIXME start with 4G, once using 2 level pt switch to full
4866 /* set vm size, must be a multiple of 4 */
4867 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4868 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4869 for (i = 1; i < 16; i++) {
4871 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4872 rdev->gart.table_addr >> 12);
4874 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4875 rdev->gart.table_addr >> 12);
4878 /* enable context1-15 */
4879 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4880 (u32)(rdev->dummy_page.addr >> 12));
4881 WREG32(VM_CONTEXT1_CNTL2, 4);
4882 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4883 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4884 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4885 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4886 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4887 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4888 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4889 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4890 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4891 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4892 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4893 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4894 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4896 /* TC cache setup ??? */
4897 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4898 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4899 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4901 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4902 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4903 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4904 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4905 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4907 WREG32(TC_CFG_L1_VOLATILE, 0);
4908 WREG32(TC_CFG_L2_VOLATILE, 0);
4910 if (rdev->family == CHIP_KAVERI) {
4911 u32 tmp = RREG32(CHUB_CONTROL);
4913 WREG32(CHUB_CONTROL, tmp);
4916 /* XXX SH_MEM regs */
4917 /* where to put LDS, scratch, GPUVM in FSA64 space */
4918 mutex_lock(&rdev->srbm_mutex);
4919 for (i = 0; i < 16; i++) {
4920 cik_srbm_select(rdev, 0, 0, 0, i);
4921 /* CP and shaders */
4922 WREG32(SH_MEM_CONFIG, 0);
4923 WREG32(SH_MEM_APE1_BASE, 1);
4924 WREG32(SH_MEM_APE1_LIMIT, 0);
4925 WREG32(SH_MEM_BASES, 0);
4927 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4928 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4929 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4930 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4931 /* XXX SDMA RLC - todo */
4933 cik_srbm_select(rdev, 0, 0, 0, 0);
4934 mutex_unlock(&rdev->srbm_mutex);
4936 cik_pcie_gart_tlb_flush(rdev);
4937 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4938 (unsigned)(rdev->mc.gtt_size >> 20),
4939 (unsigned long long)rdev->gart.table_addr);
4940 rdev->gart.ready = true;
4945 * cik_pcie_gart_disable - gart disable
4947 * @rdev: radeon_device pointer
4949 * This disables all VM page table (CIK).
4951 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4953 /* Disable all tables */
4954 WREG32(VM_CONTEXT0_CNTL, 0);
4955 WREG32(VM_CONTEXT1_CNTL, 0);
4956 /* Setup TLB control */
4957 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4958 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4959 /* Setup L2 cache */
4961 ENABLE_L2_FRAGMENT_PROCESSING |
4962 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4963 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4964 EFFECTIVE_L2_QUEUE_SIZE(7) |
4965 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4966 WREG32(VM_L2_CNTL2, 0);
4967 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4968 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4969 radeon_gart_table_vram_unpin(rdev);
4973 * cik_pcie_gart_fini - vm fini callback
4975 * @rdev: radeon_device pointer
4977 * Tears down the driver GART/VM setup (CIK).
4979 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4981 cik_pcie_gart_disable(rdev);
4982 radeon_gart_table_vram_free(rdev);
4983 radeon_gart_fini(rdev);
4988 * cik_ib_parse - vm ib_parse callback
4990 * @rdev: radeon_device pointer
4991 * @ib: indirect buffer pointer
4993 * CIK uses hw IB checking so this is a nop (CIK).
4995 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5002 * VMID 0 is the physical GPU addresses as used by the kernel.
5003 * VMIDs 1-15 are used for userspace clients and are handled
5004 * by the radeon vm/hsa code.
5007 * cik_vm_init - cik vm init callback
5009 * @rdev: radeon_device pointer
5011 * Inits cik specific vm parameters (number of VMs, base of vram for
5012 * VMIDs 1-15) (CIK).
5013 * Returns 0 for success.
5015 int cik_vm_init(struct radeon_device *rdev)
5018 rdev->vm_manager.nvm = 16;
5019 /* base offset of vram pages */
5020 if (rdev->flags & RADEON_IS_IGP) {
5021 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5023 rdev->vm_manager.vram_base_offset = tmp;
5025 rdev->vm_manager.vram_base_offset = 0;
5031 * cik_vm_fini - cik vm fini callback
5033 * @rdev: radeon_device pointer
5035 * Tear down any asic specific VM setup (CIK).
5037 void cik_vm_fini(struct radeon_device *rdev)
5042 * cik_vm_decode_fault - print human readable fault info
5044 * @rdev: radeon_device pointer
5045 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5046 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5048 * Print human readable fault information (CIK).
5050 static void cik_vm_decode_fault(struct radeon_device *rdev,
5051 u32 status, u32 addr, u32 mc_client)
5054 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5055 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5056 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5057 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5059 if (rdev->family == CHIP_HAWAII)
5060 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5062 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5064 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5065 protections, vmid, addr,
5066 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5067 block, mc_client, mc_id);
5071 * cik_vm_flush - cik vm flush using the CP
5073 * @rdev: radeon_device pointer
5075 * Update the page table base and flush the VM TLB
5076 * using the CP (CIK).
5078 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5080 struct radeon_ring *ring = &rdev->ring[ridx];
5085 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5086 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5087 WRITE_DATA_DST_SEL(0)));
5089 radeon_ring_write(ring,
5090 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5092 radeon_ring_write(ring,
5093 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5095 radeon_ring_write(ring, 0);
5096 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5098 /* update SH_MEM_* regs */
5099 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5101 WRITE_DATA_DST_SEL(0)));
5102 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5103 radeon_ring_write(ring, 0);
5104 radeon_ring_write(ring, VMID(vm->id));
5106 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5107 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5108 WRITE_DATA_DST_SEL(0)));
5109 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5110 radeon_ring_write(ring, 0);
5112 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5113 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5114 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5115 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5117 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5118 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5119 WRITE_DATA_DST_SEL(0)));
5120 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5121 radeon_ring_write(ring, 0);
5122 radeon_ring_write(ring, VMID(0));
5125 /* We should be using the WAIT_REG_MEM packet here like in
5126 * cik_fence_ring_emit(), but it causes the CP to hang in this
5129 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5130 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5131 WRITE_DATA_DST_SEL(0)));
5132 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5133 radeon_ring_write(ring, 0);
5134 radeon_ring_write(ring, 0);
5136 /* bits 0-15 are the VM contexts0-15 */
5137 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5138 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5139 WRITE_DATA_DST_SEL(0)));
5140 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5141 radeon_ring_write(ring, 0);
5142 radeon_ring_write(ring, 1 << vm->id);
5144 /* compute doesn't have PFP */
5145 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5146 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5147 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5148 radeon_ring_write(ring, 0x0);
5154 * The RLC is a multi-purpose microengine that handles a
5155 * variety of functions, the most important of which is
5156 * the interrupt controller.
5158 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5161 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5164 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5166 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5167 WREG32(CP_INT_CNTL_RING0, tmp);
5170 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5174 tmp = RREG32(RLC_LB_CNTL);
5176 tmp |= LOAD_BALANCE_ENABLE;
5178 tmp &= ~LOAD_BALANCE_ENABLE;
5179 WREG32(RLC_LB_CNTL, tmp);
5182 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5187 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5188 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5189 cik_select_se_sh(rdev, i, j);
5190 for (k = 0; k < rdev->usec_timeout; k++) {
5191 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5197 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5199 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5200 for (k = 0; k < rdev->usec_timeout; k++) {
5201 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5207 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5211 tmp = RREG32(RLC_CNTL);
5213 WREG32(RLC_CNTL, rlc);
5216 static u32 cik_halt_rlc(struct radeon_device *rdev)
5220 orig = data = RREG32(RLC_CNTL);
5222 if (data & RLC_ENABLE) {
5225 data &= ~RLC_ENABLE;
5226 WREG32(RLC_CNTL, data);
5228 for (i = 0; i < rdev->usec_timeout; i++) {
5229 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5234 cik_wait_for_rlc_serdes(rdev);
5240 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5244 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5245 WREG32(RLC_GPR_REG2, tmp);
5247 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5248 for (i = 0; i < rdev->usec_timeout; i++) {
5249 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5254 for (i = 0; i < rdev->usec_timeout; i++) {
5255 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5261 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5265 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5266 WREG32(RLC_GPR_REG2, tmp);
5270 * cik_rlc_stop - stop the RLC ME
5272 * @rdev: radeon_device pointer
5274 * Halt the RLC ME (MicroEngine) (CIK).
5276 static void cik_rlc_stop(struct radeon_device *rdev)
5278 WREG32(RLC_CNTL, 0);
5280 cik_enable_gui_idle_interrupt(rdev, false);
5282 cik_wait_for_rlc_serdes(rdev);
5286 * cik_rlc_start - start the RLC ME
5288 * @rdev: radeon_device pointer
5290 * Unhalt the RLC ME (MicroEngine) (CIK).
5292 static void cik_rlc_start(struct radeon_device *rdev)
5294 WREG32(RLC_CNTL, RLC_ENABLE);
5296 cik_enable_gui_idle_interrupt(rdev, true);
5302 * cik_rlc_resume - setup the RLC hw
5304 * @rdev: radeon_device pointer
5306 * Initialize the RLC registers, load the ucode,
5307 * and start the RLC (CIK).
5308 * Returns 0 for success, -EINVAL if the ucode is not available.
5310 static int cik_rlc_resume(struct radeon_device *rdev)
5313 const __be32 *fw_data;
5318 switch (rdev->family) {
5321 size = BONAIRE_RLC_UCODE_SIZE;
5324 size = KV_RLC_UCODE_SIZE;
5327 size = KB_RLC_UCODE_SIZE;
5334 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5335 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5343 WREG32(RLC_LB_CNTR_INIT, 0);
5344 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5346 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5347 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5348 WREG32(RLC_LB_PARAMS, 0x00600408);
5349 WREG32(RLC_LB_CNTL, 0x80000004);
5351 WREG32(RLC_MC_CNTL, 0);
5352 WREG32(RLC_UCODE_CNTL, 0);
5354 fw_data = (const __be32 *)rdev->rlc_fw->data;
5355 WREG32(RLC_GPM_UCODE_ADDR, 0);
5356 for (i = 0; i < size; i++)
5357 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5358 WREG32(RLC_GPM_UCODE_ADDR, 0);
5360 /* XXX - find out what chips support lbpw */
5361 cik_enable_lbpw(rdev, false);
5363 if (rdev->family == CHIP_BONAIRE)
5364 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5366 cik_rlc_start(rdev);
5371 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5373 u32 data, orig, tmp, tmp2;
5375 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5377 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5378 cik_enable_gui_idle_interrupt(rdev, true);
5380 tmp = cik_halt_rlc(rdev);
5382 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5383 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5384 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5385 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5386 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5388 cik_update_rlc(rdev, tmp);
5390 data |= CGCG_EN | CGLS_EN;
5392 cik_enable_gui_idle_interrupt(rdev, false);
5394 RREG32(CB_CGTT_SCLK_CTRL);
5395 RREG32(CB_CGTT_SCLK_CTRL);
5396 RREG32(CB_CGTT_SCLK_CTRL);
5397 RREG32(CB_CGTT_SCLK_CTRL);
5399 data &= ~(CGCG_EN | CGLS_EN);
5403 WREG32(RLC_CGCG_CGLS_CTRL, data);
5407 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5409 u32 data, orig, tmp = 0;
5411 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5412 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5413 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5414 orig = data = RREG32(CP_MEM_SLP_CNTL);
5415 data |= CP_MEM_LS_EN;
5417 WREG32(CP_MEM_SLP_CNTL, data);
5421 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5424 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5426 tmp = cik_halt_rlc(rdev);
5428 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5429 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5430 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5431 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5432 WREG32(RLC_SERDES_WR_CTRL, data);
5434 cik_update_rlc(rdev, tmp);
5436 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5437 orig = data = RREG32(CGTS_SM_CTRL_REG);
5438 data &= ~SM_MODE_MASK;
5439 data |= SM_MODE(0x2);
5440 data |= SM_MODE_ENABLE;
5441 data &= ~CGTS_OVERRIDE;
5442 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5443 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5444 data &= ~CGTS_LS_OVERRIDE;
5445 data &= ~ON_MONITOR_ADD_MASK;
5446 data |= ON_MONITOR_ADD_EN;
5447 data |= ON_MONITOR_ADD(0x96);
5449 WREG32(CGTS_SM_CTRL_REG, data);
5452 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5455 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5457 data = RREG32(RLC_MEM_SLP_CNTL);
5458 if (data & RLC_MEM_LS_EN) {
5459 data &= ~RLC_MEM_LS_EN;
5460 WREG32(RLC_MEM_SLP_CNTL, data);
5463 data = RREG32(CP_MEM_SLP_CNTL);
5464 if (data & CP_MEM_LS_EN) {
5465 data &= ~CP_MEM_LS_EN;
5466 WREG32(CP_MEM_SLP_CNTL, data);
5469 orig = data = RREG32(CGTS_SM_CTRL_REG);
5470 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5472 WREG32(CGTS_SM_CTRL_REG, data);
5474 tmp = cik_halt_rlc(rdev);
5476 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5477 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5478 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5479 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5480 WREG32(RLC_SERDES_WR_CTRL, data);
5482 cik_update_rlc(rdev, tmp);
5486 static const u32 mc_cg_registers[] =
5499 static void cik_enable_mc_ls(struct radeon_device *rdev,
5505 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5506 orig = data = RREG32(mc_cg_registers[i]);
5507 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5508 data |= MC_LS_ENABLE;
5510 data &= ~MC_LS_ENABLE;
5512 WREG32(mc_cg_registers[i], data);
5516 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5522 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5523 orig = data = RREG32(mc_cg_registers[i]);
5524 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5525 data |= MC_CG_ENABLE;
5527 data &= ~MC_CG_ENABLE;
5529 WREG32(mc_cg_registers[i], data);
5533 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5538 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5539 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5540 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5542 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5545 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5547 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5550 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5554 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5559 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5560 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5563 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5565 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5568 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5570 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5573 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5575 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5578 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5582 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5587 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5588 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5590 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5592 orig = data = RREG32(UVD_CGC_CTRL);
5595 WREG32(UVD_CGC_CTRL, data);
5597 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5599 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5601 orig = data = RREG32(UVD_CGC_CTRL);
5604 WREG32(UVD_CGC_CTRL, data);
5608 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5613 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5615 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5616 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5617 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5619 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5620 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5623 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5626 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5631 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5633 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5634 data &= ~CLOCK_GATING_DIS;
5636 data |= CLOCK_GATING_DIS;
5639 WREG32(HDP_HOST_PATH_CNTL, data);
5642 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5647 orig = data = RREG32(HDP_MEM_POWER_LS);
5649 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5650 data |= HDP_LS_ENABLE;
5652 data &= ~HDP_LS_ENABLE;
5655 WREG32(HDP_MEM_POWER_LS, data);
5658 void cik_update_cg(struct radeon_device *rdev,
5659 u32 block, bool enable)
5662 if (block & RADEON_CG_BLOCK_GFX) {
5663 cik_enable_gui_idle_interrupt(rdev, false);
5664 /* order matters! */
5666 cik_enable_mgcg(rdev, true);
5667 cik_enable_cgcg(rdev, true);
5669 cik_enable_cgcg(rdev, false);
5670 cik_enable_mgcg(rdev, false);
5672 cik_enable_gui_idle_interrupt(rdev, true);
5675 if (block & RADEON_CG_BLOCK_MC) {
5676 if (!(rdev->flags & RADEON_IS_IGP)) {
5677 cik_enable_mc_mgcg(rdev, enable);
5678 cik_enable_mc_ls(rdev, enable);
5682 if (block & RADEON_CG_BLOCK_SDMA) {
5683 cik_enable_sdma_mgcg(rdev, enable);
5684 cik_enable_sdma_mgls(rdev, enable);
5687 if (block & RADEON_CG_BLOCK_BIF) {
5688 cik_enable_bif_mgls(rdev, enable);
5691 if (block & RADEON_CG_BLOCK_UVD) {
5693 cik_enable_uvd_mgcg(rdev, enable);
5696 if (block & RADEON_CG_BLOCK_HDP) {
5697 cik_enable_hdp_mgcg(rdev, enable);
5698 cik_enable_hdp_ls(rdev, enable);
5702 static void cik_init_cg(struct radeon_device *rdev)
5705 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5708 si_init_uvd_internal_cg(rdev);
5710 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5711 RADEON_CG_BLOCK_SDMA |
5712 RADEON_CG_BLOCK_BIF |
5713 RADEON_CG_BLOCK_UVD |
5714 RADEON_CG_BLOCK_HDP), true);
5717 static void cik_fini_cg(struct radeon_device *rdev)
5719 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5720 RADEON_CG_BLOCK_SDMA |
5721 RADEON_CG_BLOCK_BIF |
5722 RADEON_CG_BLOCK_UVD |
5723 RADEON_CG_BLOCK_HDP), false);
5725 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5728 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5733 orig = data = RREG32(RLC_PG_CNTL);
5734 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5735 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5737 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5739 WREG32(RLC_PG_CNTL, data);
5742 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5747 orig = data = RREG32(RLC_PG_CNTL);
5748 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5749 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5751 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5753 WREG32(RLC_PG_CNTL, data);
5756 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5760 orig = data = RREG32(RLC_PG_CNTL);
5761 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5762 data &= ~DISABLE_CP_PG;
5764 data |= DISABLE_CP_PG;
5766 WREG32(RLC_PG_CNTL, data);
5769 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5773 orig = data = RREG32(RLC_PG_CNTL);
5774 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5775 data &= ~DISABLE_GDS_PG;
5777 data |= DISABLE_GDS_PG;
5779 WREG32(RLC_PG_CNTL, data);
5782 #define CP_ME_TABLE_SIZE 96
5783 #define CP_ME_TABLE_OFFSET 2048
5784 #define CP_MEC_TABLE_OFFSET 4096
5786 void cik_init_cp_pg_table(struct radeon_device *rdev)
5788 const __be32 *fw_data;
5789 volatile u32 *dst_ptr;
5790 int me, i, max_me = 4;
5794 if (rdev->family == CHIP_KAVERI)
5797 if (rdev->rlc.cp_table_ptr == NULL)
5800 /* write the cp table buffer */
5801 dst_ptr = rdev->rlc.cp_table_ptr;
5802 for (me = 0; me < max_me; me++) {
5804 fw_data = (const __be32 *)rdev->ce_fw->data;
5805 table_offset = CP_ME_TABLE_OFFSET;
5806 } else if (me == 1) {
5807 fw_data = (const __be32 *)rdev->pfp_fw->data;
5808 table_offset = CP_ME_TABLE_OFFSET;
5809 } else if (me == 2) {
5810 fw_data = (const __be32 *)rdev->me_fw->data;
5811 table_offset = CP_ME_TABLE_OFFSET;
5813 fw_data = (const __be32 *)rdev->mec_fw->data;
5814 table_offset = CP_MEC_TABLE_OFFSET;
5817 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5818 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
5820 bo_offset += CP_ME_TABLE_SIZE;
5824 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5829 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5830 orig = data = RREG32(RLC_PG_CNTL);
5831 data |= GFX_PG_ENABLE;
5833 WREG32(RLC_PG_CNTL, data);
5835 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5838 WREG32(RLC_AUTO_PG_CTRL, data);
5840 orig = data = RREG32(RLC_PG_CNTL);
5841 data &= ~GFX_PG_ENABLE;
5843 WREG32(RLC_PG_CNTL, data);
5845 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5846 data &= ~AUTO_PG_EN;
5848 WREG32(RLC_AUTO_PG_CTRL, data);
5850 data = RREG32(DB_RENDER_CONTROL);
5854 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5856 u32 mask = 0, tmp, tmp1;
5859 cik_select_se_sh(rdev, se, sh);
5860 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5861 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5862 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5869 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5874 return (~tmp) & mask;
5877 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5879 u32 i, j, k, active_cu_number = 0;
5880 u32 mask, counter, cu_bitmap;
5883 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5884 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5888 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5889 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5897 active_cu_number += counter;
5898 tmp |= (cu_bitmap << (i * 16 + j * 8));
5902 WREG32(RLC_PG_AO_CU_MASK, tmp);
5904 tmp = RREG32(RLC_MAX_PG_CU);
5905 tmp &= ~MAX_PU_CU_MASK;
5906 tmp |= MAX_PU_CU(active_cu_number);
5907 WREG32(RLC_MAX_PG_CU, tmp);
5910 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5915 orig = data = RREG32(RLC_PG_CNTL);
5916 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5917 data |= STATIC_PER_CU_PG_ENABLE;
5919 data &= ~STATIC_PER_CU_PG_ENABLE;
5921 WREG32(RLC_PG_CNTL, data);
5924 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5929 orig = data = RREG32(RLC_PG_CNTL);
5930 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5931 data |= DYN_PER_CU_PG_ENABLE;
5933 data &= ~DYN_PER_CU_PG_ENABLE;
5935 WREG32(RLC_PG_CNTL, data);
5938 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5939 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5941 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5946 if (rdev->rlc.cs_data) {
5947 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5948 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5949 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5950 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5952 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5953 for (i = 0; i < 3; i++)
5954 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5956 if (rdev->rlc.reg_list) {
5957 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5958 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5959 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5962 orig = data = RREG32(RLC_PG_CNTL);
5965 WREG32(RLC_PG_CNTL, data);
5967 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5968 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5970 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5971 data &= ~IDLE_POLL_COUNT_MASK;
5972 data |= IDLE_POLL_COUNT(0x60);
5973 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5976 WREG32(RLC_PG_DELAY, data);
5978 data = RREG32(RLC_PG_DELAY_2);
5981 WREG32(RLC_PG_DELAY_2, data);
5983 data = RREG32(RLC_AUTO_PG_CTRL);
5984 data &= ~GRBM_REG_SGIT_MASK;
5985 data |= GRBM_REG_SGIT(0x700);
5986 WREG32(RLC_AUTO_PG_CTRL, data);
5990 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5992 cik_enable_gfx_cgpg(rdev, enable);
5993 cik_enable_gfx_static_mgpg(rdev, enable);
5994 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5997 u32 cik_get_csb_size(struct radeon_device *rdev)
6000 const struct cs_section_def *sect = NULL;
6001 const struct cs_extent_def *ext = NULL;
6003 if (rdev->rlc.cs_data == NULL)
6006 /* begin clear state */
6008 /* context control state */
6011 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6012 for (ext = sect->section; ext->extent != NULL; ++ext) {
6013 if (sect->id == SECT_CONTEXT)
6014 count += 2 + ext->reg_count;
6019 /* pa_sc_raster_config/pa_sc_raster_config1 */
6021 /* end clear state */
6029 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6032 const struct cs_section_def *sect = NULL;
6033 const struct cs_extent_def *ext = NULL;
6035 if (rdev->rlc.cs_data == NULL)
6040 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6041 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6043 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6044 buffer[count++] = cpu_to_le32(0x80000000);
6045 buffer[count++] = cpu_to_le32(0x80000000);
6047 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6048 for (ext = sect->section; ext->extent != NULL; ++ext) {
6049 if (sect->id == SECT_CONTEXT) {
6051 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6052 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6053 for (i = 0; i < ext->reg_count; i++)
6054 buffer[count++] = cpu_to_le32(ext->extent[i]);
6061 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6062 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6063 switch (rdev->family) {
6065 buffer[count++] = cpu_to_le32(0x16000012);
6066 buffer[count++] = cpu_to_le32(0x00000000);
6069 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6070 buffer[count++] = cpu_to_le32(0x00000000);
6073 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6074 buffer[count++] = cpu_to_le32(0x00000000);
6077 buffer[count++] = cpu_to_le32(0x00000000);
6078 buffer[count++] = cpu_to_le32(0x00000000);
6082 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6083 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6085 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6086 buffer[count++] = cpu_to_le32(0);
6089 static void cik_init_pg(struct radeon_device *rdev)
6091 if (rdev->pg_flags) {
6092 cik_enable_sck_slowdown_on_pu(rdev, true);
6093 cik_enable_sck_slowdown_on_pd(rdev, true);
6094 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6095 cik_init_gfx_cgpg(rdev);
6096 cik_enable_cp_pg(rdev, true);
6097 cik_enable_gds_pg(rdev, true);
6099 cik_init_ao_cu_mask(rdev);
6100 cik_update_gfx_pg(rdev, true);
6104 static void cik_fini_pg(struct radeon_device *rdev)
6106 if (rdev->pg_flags) {
6107 cik_update_gfx_pg(rdev, false);
6108 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6109 cik_enable_cp_pg(rdev, false);
6110 cik_enable_gds_pg(rdev, false);
6117 * Starting with r6xx, interrupts are handled via a ring buffer.
6118 * Ring buffers are areas of GPU accessible memory that the GPU
6119 * writes interrupt vectors into and the host reads vectors out of.
6120 * There is a rptr (read pointer) that determines where the
6121 * host is currently reading, and a wptr (write pointer)
6122 * which determines where the GPU has written. When the
6123 * pointers are equal, the ring is idle. When the GPU
6124 * writes vectors to the ring buffer, it increments the
6125 * wptr. When there is an interrupt, the host then starts
6126 * fetching commands and processing them until the pointers are
6127 * equal again at which point it updates the rptr.
6131 * cik_enable_interrupts - Enable the interrupt ring buffer
6133 * @rdev: radeon_device pointer
6135 * Enable the interrupt ring buffer (CIK).
6137 static void cik_enable_interrupts(struct radeon_device *rdev)
6139 u32 ih_cntl = RREG32(IH_CNTL);
6140 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6142 ih_cntl |= ENABLE_INTR;
6143 ih_rb_cntl |= IH_RB_ENABLE;
6144 WREG32(IH_CNTL, ih_cntl);
6145 WREG32(IH_RB_CNTL, ih_rb_cntl);
6146 rdev->ih.enabled = true;
6150 * cik_disable_interrupts - Disable the interrupt ring buffer
6152 * @rdev: radeon_device pointer
6154 * Disable the interrupt ring buffer (CIK).
6156 static void cik_disable_interrupts(struct radeon_device *rdev)
6158 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6159 u32 ih_cntl = RREG32(IH_CNTL);
6161 ih_rb_cntl &= ~IH_RB_ENABLE;
6162 ih_cntl &= ~ENABLE_INTR;
6163 WREG32(IH_RB_CNTL, ih_rb_cntl);
6164 WREG32(IH_CNTL, ih_cntl);
6165 /* set rptr, wptr to 0 */
6166 WREG32(IH_RB_RPTR, 0);
6167 WREG32(IH_RB_WPTR, 0);
6168 rdev->ih.enabled = false;
6173 * cik_disable_interrupt_state - Disable all interrupt sources
6175 * @rdev: radeon_device pointer
6177 * Clear all interrupt enable bits used by the driver (CIK).
6179 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6184 tmp = RREG32(CP_INT_CNTL_RING0) &
6185 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6186 WREG32(CP_INT_CNTL_RING0, tmp);
6188 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6189 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6190 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6191 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6192 /* compute queues */
6193 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6194 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6195 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6196 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6197 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6198 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6199 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6200 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6202 WREG32(GRBM_INT_CNTL, 0);
6203 /* vline/vblank, etc. */
6204 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6205 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6206 if (rdev->num_crtc >= 4) {
6207 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6208 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6210 if (rdev->num_crtc >= 6) {
6211 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6212 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6216 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6218 /* digital hotplug */
6219 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6220 WREG32(DC_HPD1_INT_CONTROL, tmp);
6221 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6222 WREG32(DC_HPD2_INT_CONTROL, tmp);
6223 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6224 WREG32(DC_HPD3_INT_CONTROL, tmp);
6225 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6226 WREG32(DC_HPD4_INT_CONTROL, tmp);
6227 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6228 WREG32(DC_HPD5_INT_CONTROL, tmp);
6229 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6230 WREG32(DC_HPD6_INT_CONTROL, tmp);
6235 * cik_irq_init - init and enable the interrupt ring
6237 * @rdev: radeon_device pointer
6239 * Allocate a ring buffer for the interrupt controller,
6240 * enable the RLC, disable interrupts, enable the IH
6241 * ring buffer and enable it (CIK).
6242 * Called at device load and reume.
6243 * Returns 0 for success, errors for failure.
6245 static int cik_irq_init(struct radeon_device *rdev)
6249 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6252 ret = r600_ih_ring_alloc(rdev);
6257 cik_disable_interrupts(rdev);
6260 ret = cik_rlc_resume(rdev);
6262 r600_ih_ring_fini(rdev);
6266 /* setup interrupt control */
6267 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6268 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6269 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6270 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6271 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6273 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6274 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6275 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6276 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6278 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6279 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6281 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6282 IH_WPTR_OVERFLOW_CLEAR |
6285 if (rdev->wb.enabled)
6286 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6288 /* set the writeback address whether it's enabled or not */
6289 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6290 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6292 WREG32(IH_RB_CNTL, ih_rb_cntl);
6294 /* set rptr, wptr to 0 */
6295 WREG32(IH_RB_RPTR, 0);
6296 WREG32(IH_RB_WPTR, 0);
6298 /* Default settings for IH_CNTL (disabled at first) */
6299 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6300 /* RPTR_REARM only works if msi's are enabled */
6301 if (rdev->msi_enabled)
6302 ih_cntl |= RPTR_REARM;
6303 WREG32(IH_CNTL, ih_cntl);
6305 /* force the active interrupt state to all disabled */
6306 cik_disable_interrupt_state(rdev);
6308 pci_set_master(rdev->pdev);
6311 cik_enable_interrupts(rdev);
6317 * cik_irq_set - enable/disable interrupt sources
6319 * @rdev: radeon_device pointer
6321 * Enable interrupt sources on the GPU (vblanks, hpd,
6323 * Returns 0 for success, errors for failure.
6325 int cik_irq_set(struct radeon_device *rdev)
6328 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6329 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6330 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6331 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6332 u32 grbm_int_cntl = 0;
6333 u32 dma_cntl, dma_cntl1;
6336 if (!rdev->irq.installed) {
6337 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6340 /* don't enable anything if the ih is disabled */
6341 if (!rdev->ih.enabled) {
6342 cik_disable_interrupts(rdev);
6343 /* force the active interrupt state to all disabled */
6344 cik_disable_interrupt_state(rdev);
6348 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6349 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6350 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6352 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6353 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6354 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6355 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6356 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6357 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6359 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6360 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6362 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6363 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6364 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6365 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6366 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6367 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6368 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6369 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6371 if (rdev->flags & RADEON_IS_IGP)
6372 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6373 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6375 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6376 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6378 /* enable CP interrupts on all rings */
6379 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6380 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6381 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6383 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6384 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6385 DRM_DEBUG("si_irq_set: sw int cp1\n");
6386 if (ring->me == 1) {
6387 switch (ring->pipe) {
6389 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6392 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6395 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6398 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6401 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6404 } else if (ring->me == 2) {
6405 switch (ring->pipe) {
6407 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6410 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6413 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6416 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6419 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6423 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6426 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6427 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6428 DRM_DEBUG("si_irq_set: sw int cp2\n");
6429 if (ring->me == 1) {
6430 switch (ring->pipe) {
6432 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6435 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6438 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6441 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6444 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6447 } else if (ring->me == 2) {
6448 switch (ring->pipe) {
6450 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6453 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6456 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6459 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6462 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6466 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6470 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6471 DRM_DEBUG("cik_irq_set: sw int dma\n");
6472 dma_cntl |= TRAP_ENABLE;
6475 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6476 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6477 dma_cntl1 |= TRAP_ENABLE;
6480 if (rdev->irq.crtc_vblank_int[0] ||
6481 atomic_read(&rdev->irq.pflip[0])) {
6482 DRM_DEBUG("cik_irq_set: vblank 0\n");
6483 crtc1 |= VBLANK_INTERRUPT_MASK;
6485 if (rdev->irq.crtc_vblank_int[1] ||
6486 atomic_read(&rdev->irq.pflip[1])) {
6487 DRM_DEBUG("cik_irq_set: vblank 1\n");
6488 crtc2 |= VBLANK_INTERRUPT_MASK;
6490 if (rdev->irq.crtc_vblank_int[2] ||
6491 atomic_read(&rdev->irq.pflip[2])) {
6492 DRM_DEBUG("cik_irq_set: vblank 2\n");
6493 crtc3 |= VBLANK_INTERRUPT_MASK;
6495 if (rdev->irq.crtc_vblank_int[3] ||
6496 atomic_read(&rdev->irq.pflip[3])) {
6497 DRM_DEBUG("cik_irq_set: vblank 3\n");
6498 crtc4 |= VBLANK_INTERRUPT_MASK;
6500 if (rdev->irq.crtc_vblank_int[4] ||
6501 atomic_read(&rdev->irq.pflip[4])) {
6502 DRM_DEBUG("cik_irq_set: vblank 4\n");
6503 crtc5 |= VBLANK_INTERRUPT_MASK;
6505 if (rdev->irq.crtc_vblank_int[5] ||
6506 atomic_read(&rdev->irq.pflip[5])) {
6507 DRM_DEBUG("cik_irq_set: vblank 5\n");
6508 crtc6 |= VBLANK_INTERRUPT_MASK;
6510 if (rdev->irq.hpd[0]) {
6511 DRM_DEBUG("cik_irq_set: hpd 1\n");
6512 hpd1 |= DC_HPDx_INT_EN;
6514 if (rdev->irq.hpd[1]) {
6515 DRM_DEBUG("cik_irq_set: hpd 2\n");
6516 hpd2 |= DC_HPDx_INT_EN;
6518 if (rdev->irq.hpd[2]) {
6519 DRM_DEBUG("cik_irq_set: hpd 3\n");
6520 hpd3 |= DC_HPDx_INT_EN;
6522 if (rdev->irq.hpd[3]) {
6523 DRM_DEBUG("cik_irq_set: hpd 4\n");
6524 hpd4 |= DC_HPDx_INT_EN;
6526 if (rdev->irq.hpd[4]) {
6527 DRM_DEBUG("cik_irq_set: hpd 5\n");
6528 hpd5 |= DC_HPDx_INT_EN;
6530 if (rdev->irq.hpd[5]) {
6531 DRM_DEBUG("cik_irq_set: hpd 6\n");
6532 hpd6 |= DC_HPDx_INT_EN;
6535 if (rdev->irq.dpm_thermal) {
6536 DRM_DEBUG("dpm thermal\n");
6537 if (rdev->flags & RADEON_IS_IGP)
6538 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6540 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6543 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6545 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6546 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6548 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6549 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6550 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6551 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6552 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6553 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6554 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6555 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6557 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6559 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6560 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6561 if (rdev->num_crtc >= 4) {
6562 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6563 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6565 if (rdev->num_crtc >= 6) {
6566 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6567 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6570 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6571 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6572 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6573 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6574 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6575 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6577 if (rdev->flags & RADEON_IS_IGP)
6578 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6580 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6586 * cik_irq_ack - ack interrupt sources
6588 * @rdev: radeon_device pointer
6590 * Ack interrupt sources on the GPU (vblanks, hpd,
6591 * etc.) (CIK). Certain interrupts sources are sw
6592 * generated and do not require an explicit ack.
6594 static inline void cik_irq_ack(struct radeon_device *rdev)
6598 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6599 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6600 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6601 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6602 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6603 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6604 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6606 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6607 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6608 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6609 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6610 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6611 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6612 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6613 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6615 if (rdev->num_crtc >= 4) {
6616 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6617 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6618 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6619 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6620 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6621 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6622 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6623 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6626 if (rdev->num_crtc >= 6) {
6627 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6628 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6629 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6630 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6631 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6632 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6633 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6634 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6637 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6638 tmp = RREG32(DC_HPD1_INT_CONTROL);
6639 tmp |= DC_HPDx_INT_ACK;
6640 WREG32(DC_HPD1_INT_CONTROL, tmp);
6642 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6643 tmp = RREG32(DC_HPD2_INT_CONTROL);
6644 tmp |= DC_HPDx_INT_ACK;
6645 WREG32(DC_HPD2_INT_CONTROL, tmp);
6647 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6648 tmp = RREG32(DC_HPD3_INT_CONTROL);
6649 tmp |= DC_HPDx_INT_ACK;
6650 WREG32(DC_HPD3_INT_CONTROL, tmp);
6652 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6653 tmp = RREG32(DC_HPD4_INT_CONTROL);
6654 tmp |= DC_HPDx_INT_ACK;
6655 WREG32(DC_HPD4_INT_CONTROL, tmp);
6657 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6658 tmp = RREG32(DC_HPD5_INT_CONTROL);
6659 tmp |= DC_HPDx_INT_ACK;
6660 WREG32(DC_HPD5_INT_CONTROL, tmp);
6662 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6663 tmp = RREG32(DC_HPD5_INT_CONTROL);
6664 tmp |= DC_HPDx_INT_ACK;
6665 WREG32(DC_HPD6_INT_CONTROL, tmp);
6670 * cik_irq_disable - disable interrupts
6672 * @rdev: radeon_device pointer
6674 * Disable interrupts on the hw (CIK).
6676 static void cik_irq_disable(struct radeon_device *rdev)
6678 cik_disable_interrupts(rdev);
6679 /* Wait and acknowledge irq */
6682 cik_disable_interrupt_state(rdev);
6686 * cik_irq_disable - disable interrupts for suspend
6688 * @rdev: radeon_device pointer
6690 * Disable interrupts and stop the RLC (CIK).
6693 static void cik_irq_suspend(struct radeon_device *rdev)
6695 cik_irq_disable(rdev);
6700 * cik_irq_fini - tear down interrupt support
6702 * @rdev: radeon_device pointer
6704 * Disable interrupts on the hw and free the IH ring
6706 * Used for driver unload.
6708 static void cik_irq_fini(struct radeon_device *rdev)
6710 cik_irq_suspend(rdev);
6711 r600_ih_ring_fini(rdev);
6715 * cik_get_ih_wptr - get the IH ring buffer wptr
6717 * @rdev: radeon_device pointer
6719 * Get the IH ring buffer wptr from either the register
6720 * or the writeback memory buffer (CIK). Also check for
6721 * ring buffer overflow and deal with it.
6722 * Used by cik_irq_process().
6723 * Returns the value of the wptr.
6725 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6729 if (rdev->wb.enabled)
6730 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6732 wptr = RREG32(IH_RB_WPTR);
6734 if (wptr & RB_OVERFLOW) {
6735 /* When a ring buffer overflow happen start parsing interrupt
6736 * from the last not overwritten vector (wptr + 16). Hopefully
6737 * this should allow us to catchup.
6739 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6740 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6741 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6742 tmp = RREG32(IH_RB_CNTL);
6743 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6744 WREG32(IH_RB_CNTL, tmp);
6746 return (wptr & rdev->ih.ptr_mask);
6750 * Each IV ring entry is 128 bits:
6751 * [7:0] - interrupt source id
6753 * [59:32] - interrupt source data
6754 * [63:60] - reserved
6757 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6758 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6759 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6760 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6761 * PIPE_ID - ME0 0=3D
6762 * - ME1&2 compute dispatcher (4 pipes each)
6764 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6765 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6766 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6769 * [127:96] - reserved
6772 * cik_irq_process - interrupt handler
6774 * @rdev: radeon_device pointer
6776 * Interrupt hander (CIK). Walk the IH ring,
6777 * ack interrupts and schedule work to handle
6779 * Returns irq process return code.
6781 int cik_irq_process(struct radeon_device *rdev)
6783 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6784 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6787 u32 src_id, src_data, ring_id;
6788 u8 me_id, pipe_id, queue_id;
6790 bool queue_hotplug = false;
6791 bool queue_reset = false;
6792 u32 addr, status, mc_client;
6793 bool queue_thermal = false;
6795 if (!rdev->ih.enabled || rdev->shutdown)
6798 wptr = cik_get_ih_wptr(rdev);
6801 /* is somebody else already processing irqs? */
6802 if (atomic_xchg(&rdev->ih.lock, 1))
6805 rptr = rdev->ih.rptr;
6806 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6808 /* Order reading of wptr vs. reading of IH ring data */
6811 /* display interrupts */
6814 while (rptr != wptr) {
6815 /* wptr/rptr are in bytes! */
6816 ring_index = rptr / 4;
6817 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6818 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6819 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6822 case 1: /* D1 vblank/vline */
6824 case 0: /* D1 vblank */
6825 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6826 if (rdev->irq.crtc_vblank_int[0]) {
6827 drm_handle_vblank(rdev->ddev, 0);
6828 rdev->pm.vblank_sync = true;
6829 wake_up(&rdev->irq.vblank_queue);
6831 if (atomic_read(&rdev->irq.pflip[0]))
6832 radeon_crtc_handle_flip(rdev, 0);
6833 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6834 DRM_DEBUG("IH: D1 vblank\n");
6837 case 1: /* D1 vline */
6838 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6839 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6840 DRM_DEBUG("IH: D1 vline\n");
6844 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6848 case 2: /* D2 vblank/vline */
6850 case 0: /* D2 vblank */
6851 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6852 if (rdev->irq.crtc_vblank_int[1]) {
6853 drm_handle_vblank(rdev->ddev, 1);
6854 rdev->pm.vblank_sync = true;
6855 wake_up(&rdev->irq.vblank_queue);
6857 if (atomic_read(&rdev->irq.pflip[1]))
6858 radeon_crtc_handle_flip(rdev, 1);
6859 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6860 DRM_DEBUG("IH: D2 vblank\n");
6863 case 1: /* D2 vline */
6864 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6865 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6866 DRM_DEBUG("IH: D2 vline\n");
6870 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6874 case 3: /* D3 vblank/vline */
6876 case 0: /* D3 vblank */
6877 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6878 if (rdev->irq.crtc_vblank_int[2]) {
6879 drm_handle_vblank(rdev->ddev, 2);
6880 rdev->pm.vblank_sync = true;
6881 wake_up(&rdev->irq.vblank_queue);
6883 if (atomic_read(&rdev->irq.pflip[2]))
6884 radeon_crtc_handle_flip(rdev, 2);
6885 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6886 DRM_DEBUG("IH: D3 vblank\n");
6889 case 1: /* D3 vline */
6890 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6891 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6892 DRM_DEBUG("IH: D3 vline\n");
6896 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6900 case 4: /* D4 vblank/vline */
6902 case 0: /* D4 vblank */
6903 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6904 if (rdev->irq.crtc_vblank_int[3]) {
6905 drm_handle_vblank(rdev->ddev, 3);
6906 rdev->pm.vblank_sync = true;
6907 wake_up(&rdev->irq.vblank_queue);
6909 if (atomic_read(&rdev->irq.pflip[3]))
6910 radeon_crtc_handle_flip(rdev, 3);
6911 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6912 DRM_DEBUG("IH: D4 vblank\n");
6915 case 1: /* D4 vline */
6916 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6917 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6918 DRM_DEBUG("IH: D4 vline\n");
6922 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6926 case 5: /* D5 vblank/vline */
6928 case 0: /* D5 vblank */
6929 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6930 if (rdev->irq.crtc_vblank_int[4]) {
6931 drm_handle_vblank(rdev->ddev, 4);
6932 rdev->pm.vblank_sync = true;
6933 wake_up(&rdev->irq.vblank_queue);
6935 if (atomic_read(&rdev->irq.pflip[4]))
6936 radeon_crtc_handle_flip(rdev, 4);
6937 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6938 DRM_DEBUG("IH: D5 vblank\n");
6941 case 1: /* D5 vline */
6942 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6943 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6944 DRM_DEBUG("IH: D5 vline\n");
6948 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6952 case 6: /* D6 vblank/vline */
6954 case 0: /* D6 vblank */
6955 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6956 if (rdev->irq.crtc_vblank_int[5]) {
6957 drm_handle_vblank(rdev->ddev, 5);
6958 rdev->pm.vblank_sync = true;
6959 wake_up(&rdev->irq.vblank_queue);
6961 if (atomic_read(&rdev->irq.pflip[5]))
6962 radeon_crtc_handle_flip(rdev, 5);
6963 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6964 DRM_DEBUG("IH: D6 vblank\n");
6967 case 1: /* D6 vline */
6968 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6969 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6970 DRM_DEBUG("IH: D6 vline\n");
6974 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6978 case 42: /* HPD hotplug */
6981 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6982 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6983 queue_hotplug = true;
6984 DRM_DEBUG("IH: HPD1\n");
6988 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6989 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6990 queue_hotplug = true;
6991 DRM_DEBUG("IH: HPD2\n");
6995 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6996 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6997 queue_hotplug = true;
6998 DRM_DEBUG("IH: HPD3\n");
7002 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7003 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7004 queue_hotplug = true;
7005 DRM_DEBUG("IH: HPD4\n");
7009 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7010 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7011 queue_hotplug = true;
7012 DRM_DEBUG("IH: HPD5\n");
7016 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7017 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7018 queue_hotplug = true;
7019 DRM_DEBUG("IH: HPD6\n");
7023 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7028 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7029 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7033 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7034 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7035 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7036 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7037 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7039 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7041 cik_vm_decode_fault(rdev, status, addr, mc_client);
7042 /* reset addr and status */
7043 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7045 case 176: /* GFX RB CP_INT */
7046 case 177: /* GFX IB CP_INT */
7047 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7049 case 181: /* CP EOP event */
7050 DRM_DEBUG("IH: CP EOP\n");
7051 /* XXX check the bitfield order! */
7052 me_id = (ring_id & 0x60) >> 5;
7053 pipe_id = (ring_id & 0x18) >> 3;
7054 queue_id = (ring_id & 0x7) >> 0;
7057 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7061 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7062 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7063 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7064 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7068 case 184: /* CP Privileged reg access */
7069 DRM_ERROR("Illegal register access in command stream\n");
7070 /* XXX check the bitfield order! */
7071 me_id = (ring_id & 0x60) >> 5;
7072 pipe_id = (ring_id & 0x18) >> 3;
7073 queue_id = (ring_id & 0x7) >> 0;
7076 /* This results in a full GPU reset, but all we need to do is soft
7077 * reset the CP for gfx
7091 case 185: /* CP Privileged inst */
7092 DRM_ERROR("Illegal instruction in command stream\n");
7093 /* XXX check the bitfield order! */
7094 me_id = (ring_id & 0x60) >> 5;
7095 pipe_id = (ring_id & 0x18) >> 3;
7096 queue_id = (ring_id & 0x7) >> 0;
7099 /* This results in a full GPU reset, but all we need to do is soft
7100 * reset the CP for gfx
7114 case 224: /* SDMA trap event */
7115 /* XXX check the bitfield order! */
7116 me_id = (ring_id & 0x3) >> 0;
7117 queue_id = (ring_id & 0xc) >> 2;
7118 DRM_DEBUG("IH: SDMA trap\n");
7123 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7136 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7148 case 230: /* thermal low to high */
7149 DRM_DEBUG("IH: thermal low to high\n");
7150 rdev->pm.dpm.thermal.high_to_low = false;
7151 queue_thermal = true;
7153 case 231: /* thermal high to low */
7154 DRM_DEBUG("IH: thermal high to low\n");
7155 rdev->pm.dpm.thermal.high_to_low = true;
7156 queue_thermal = true;
7158 case 233: /* GUI IDLE */
7159 DRM_DEBUG("IH: GUI idle\n");
7161 case 241: /* SDMA Privileged inst */
7162 case 247: /* SDMA Privileged inst */
7163 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7164 /* XXX check the bitfield order! */
7165 me_id = (ring_id & 0x3) >> 0;
7166 queue_id = (ring_id & 0xc) >> 2;
7201 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7205 /* wptr/rptr are in bytes! */
7207 rptr &= rdev->ih.ptr_mask;
7210 schedule_work(&rdev->hotplug_work);
7212 schedule_work(&rdev->reset_work);
7214 schedule_work(&rdev->pm.dpm.thermal.work);
7215 rdev->ih.rptr = rptr;
7216 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7217 atomic_set(&rdev->ih.lock, 0);
7219 /* make sure wptr hasn't changed while processing */
7220 wptr = cik_get_ih_wptr(rdev);
7228 * startup/shutdown callbacks
7231 * cik_startup - program the asic to a functional state
7233 * @rdev: radeon_device pointer
7235 * Programs the asic to a functional state (CIK).
7236 * Called by cik_init() and cik_resume().
7237 * Returns 0 for success, error for failure.
7239 static int cik_startup(struct radeon_device *rdev)
7241 struct radeon_ring *ring;
7244 /* enable pcie gen2/3 link */
7245 cik_pcie_gen3_enable(rdev);
7247 cik_program_aspm(rdev);
7249 /* scratch needs to be initialized before MC */
7250 r = r600_vram_scratch_init(rdev);
7254 cik_mc_program(rdev);
7256 if (rdev->flags & RADEON_IS_IGP) {
7257 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7258 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7259 r = cik_init_microcode(rdev);
7261 DRM_ERROR("Failed to load firmware!\n");
7266 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7267 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7269 r = cik_init_microcode(rdev);
7271 DRM_ERROR("Failed to load firmware!\n");
7276 r = ci_mc_load_microcode(rdev);
7278 DRM_ERROR("Failed to load MC firmware!\n");
7283 r = cik_pcie_gart_enable(rdev);
7288 /* allocate rlc buffers */
7289 if (rdev->flags & RADEON_IS_IGP) {
7290 if (rdev->family == CHIP_KAVERI) {
7291 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7292 rdev->rlc.reg_list_size =
7293 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7295 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7296 rdev->rlc.reg_list_size =
7297 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7300 rdev->rlc.cs_data = ci_cs_data;
7301 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7302 r = sumo_rlc_init(rdev);
7304 DRM_ERROR("Failed to init rlc BOs!\n");
7308 /* allocate wb buffer */
7309 r = radeon_wb_init(rdev);
7313 /* allocate mec buffers */
7314 r = cik_mec_init(rdev);
7316 DRM_ERROR("Failed to init MEC BOs!\n");
7320 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7322 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7326 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7328 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7332 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7334 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7338 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7340 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7344 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7346 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7350 r = radeon_uvd_resume(rdev);
7352 r = uvd_v4_2_resume(rdev);
7354 r = radeon_fence_driver_start_ring(rdev,
7355 R600_RING_TYPE_UVD_INDEX);
7357 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7361 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7364 if (!rdev->irq.installed) {
7365 r = radeon_irq_kms_init(rdev);
7370 r = cik_irq_init(rdev);
7372 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7373 radeon_irq_kms_fini(rdev);
7378 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7379 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7380 CP_RB0_RPTR, CP_RB0_WPTR,
7381 PACKET3(PACKET3_NOP, 0x3FFF));
7385 /* set up the compute queues */
7386 /* type-2 packets are deprecated on MEC, use type-3 instead */
7387 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7388 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7389 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7390 PACKET3(PACKET3_NOP, 0x3FFF));
7393 ring->me = 1; /* first MEC */
7394 ring->pipe = 0; /* first pipe */
7395 ring->queue = 0; /* first queue */
7396 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7398 /* type-2 packets are deprecated on MEC, use type-3 instead */
7399 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7400 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7401 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7402 PACKET3(PACKET3_NOP, 0x3FFF));
7405 /* dGPU only have 1 MEC */
7406 ring->me = 1; /* first MEC */
7407 ring->pipe = 0; /* first pipe */
7408 ring->queue = 1; /* second queue */
7409 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7411 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7412 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7413 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7414 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7415 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7419 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7420 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7421 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7422 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7423 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7427 r = cik_cp_resume(rdev);
7431 r = cik_sdma_resume(rdev);
7435 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7436 if (ring->ring_size) {
7437 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7438 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7441 r = uvd_v1_0_init(rdev);
7443 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7446 r = radeon_ib_pool_init(rdev);
7448 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7452 r = radeon_vm_manager_init(rdev);
7454 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7458 r = dce6_audio_init(rdev);
7466 * cik_resume - resume the asic to a functional state
7468 * @rdev: radeon_device pointer
7470 * Programs the asic to a functional state (CIK).
7472 * Returns 0 for success, error for failure.
7474 int cik_resume(struct radeon_device *rdev)
7479 atom_asic_init(rdev->mode_info.atom_context);
7481 /* init golden registers */
7482 cik_init_golden_registers(rdev);
7484 rdev->accel_working = true;
7485 r = cik_startup(rdev);
7487 DRM_ERROR("cik startup failed on resume\n");
7488 rdev->accel_working = false;
7497 * cik_suspend - suspend the asic
7499 * @rdev: radeon_device pointer
7501 * Bring the chip into a state suitable for suspend (CIK).
7502 * Called at suspend.
7503 * Returns 0 for success.
7505 int cik_suspend(struct radeon_device *rdev)
7507 dce6_audio_fini(rdev);
7508 radeon_vm_manager_fini(rdev);
7509 cik_cp_enable(rdev, false);
7510 cik_sdma_enable(rdev, false);
7511 uvd_v1_0_fini(rdev);
7512 radeon_uvd_suspend(rdev);
7515 cik_irq_suspend(rdev);
7516 radeon_wb_disable(rdev);
7517 cik_pcie_gart_disable(rdev);
7521 /* Plan is to move initialization in that function and use
7522 * helper function so that radeon_device_init pretty much
7523 * do nothing more than calling asic specific function. This
7524 * should also allow to remove a bunch of callback function
7528 * cik_init - asic specific driver and hw init
7530 * @rdev: radeon_device pointer
7532 * Setup asic specific driver variables and program the hw
7533 * to a functional state (CIK).
7534 * Called at driver startup.
7535 * Returns 0 for success, errors for failure.
7537 int cik_init(struct radeon_device *rdev)
7539 struct radeon_ring *ring;
7543 if (!radeon_get_bios(rdev)) {
7544 if (ASIC_IS_AVIVO(rdev))
7547 /* Must be an ATOMBIOS */
7548 if (!rdev->is_atom_bios) {
7549 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7552 r = radeon_atombios_init(rdev);
7556 /* Post card if necessary */
7557 if (!radeon_card_posted(rdev)) {
7559 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7562 DRM_INFO("GPU not posted. posting now...\n");
7563 atom_asic_init(rdev->mode_info.atom_context);
7565 /* init golden registers */
7566 cik_init_golden_registers(rdev);
7567 /* Initialize scratch registers */
7568 cik_scratch_init(rdev);
7569 /* Initialize surface registers */
7570 radeon_surface_init(rdev);
7571 /* Initialize clocks */
7572 radeon_get_clock_info(rdev->ddev);
7575 r = radeon_fence_driver_init(rdev);
7579 /* initialize memory controller */
7580 r = cik_mc_init(rdev);
7583 /* Memory manager */
7584 r = radeon_bo_init(rdev);
7588 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7589 ring->ring_obj = NULL;
7590 r600_ring_init(rdev, ring, 1024 * 1024);
7592 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7593 ring->ring_obj = NULL;
7594 r600_ring_init(rdev, ring, 1024 * 1024);
7595 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7599 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7600 ring->ring_obj = NULL;
7601 r600_ring_init(rdev, ring, 1024 * 1024);
7602 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7606 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7607 ring->ring_obj = NULL;
7608 r600_ring_init(rdev, ring, 256 * 1024);
7610 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7611 ring->ring_obj = NULL;
7612 r600_ring_init(rdev, ring, 256 * 1024);
7614 r = radeon_uvd_init(rdev);
7616 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7617 ring->ring_obj = NULL;
7618 r600_ring_init(rdev, ring, 4096);
7621 rdev->ih.ring_obj = NULL;
7622 r600_ih_ring_init(rdev, 64 * 1024);
7624 r = r600_pcie_gart_init(rdev);
7628 rdev->accel_working = true;
7629 r = cik_startup(rdev);
7631 dev_err(rdev->dev, "disabling GPU acceleration\n");
7633 cik_sdma_fini(rdev);
7635 sumo_rlc_fini(rdev);
7637 radeon_wb_fini(rdev);
7638 radeon_ib_pool_fini(rdev);
7639 radeon_vm_manager_fini(rdev);
7640 radeon_irq_kms_fini(rdev);
7641 cik_pcie_gart_fini(rdev);
7642 rdev->accel_working = false;
7645 /* Don't start up if the MC ucode is missing.
7646 * The default clocks and voltages before the MC ucode
7647 * is loaded are not suffient for advanced operations.
7649 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7650 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7658 * cik_fini - asic specific driver and hw fini
7660 * @rdev: radeon_device pointer
7662 * Tear down the asic specific driver variables and program the hw
7663 * to an idle state (CIK).
7664 * Called at driver unload.
7666 void cik_fini(struct radeon_device *rdev)
7669 cik_sdma_fini(rdev);
7673 sumo_rlc_fini(rdev);
7675 radeon_wb_fini(rdev);
7676 radeon_vm_manager_fini(rdev);
7677 radeon_ib_pool_fini(rdev);
7678 radeon_irq_kms_fini(rdev);
7679 uvd_v1_0_fini(rdev);
7680 radeon_uvd_fini(rdev);
7681 cik_pcie_gart_fini(rdev);
7682 r600_vram_scratch_fini(rdev);
7683 radeon_gem_fini(rdev);
7684 radeon_fence_driver_fini(rdev);
7685 radeon_bo_fini(rdev);
7686 radeon_atombios_fini(rdev);
7691 void dce8_program_fmt(struct drm_encoder *encoder)
7693 struct drm_device *dev = encoder->dev;
7694 struct radeon_device *rdev = dev->dev_private;
7695 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7696 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7697 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7700 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7703 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7704 bpc = radeon_get_monitor_bpc(connector);
7705 dither = radeon_connector->dither;
7708 /* LVDS/eDP FMT is set up by atom */
7709 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7712 /* not needed for analog */
7713 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7714 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7722 if (dither == RADEON_FMT_DITHER_ENABLE)
7723 /* XXX sort out optimal dither settings */
7724 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7725 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7727 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7730 if (dither == RADEON_FMT_DITHER_ENABLE)
7731 /* XXX sort out optimal dither settings */
7732 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7733 FMT_RGB_RANDOM_ENABLE |
7734 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7736 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7739 if (dither == RADEON_FMT_DITHER_ENABLE)
7740 /* XXX sort out optimal dither settings */
7741 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7742 FMT_RGB_RANDOM_ENABLE |
7743 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7745 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7752 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
7755 /* display watermark setup */
7757 * dce8_line_buffer_adjust - Set up the line buffer
7759 * @rdev: radeon_device pointer
7760 * @radeon_crtc: the selected display controller
7761 * @mode: the current display mode on the selected display
7764 * Setup up the line buffer allocation for
7765 * the selected display controller (CIK).
7766 * Returns the line buffer size in pixels.
7768 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7769 struct radeon_crtc *radeon_crtc,
7770 struct drm_display_mode *mode)
7772 u32 tmp, buffer_alloc, i;
7773 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7776 * There are 6 line buffers, one for each display controllers.
7777 * There are 3 partitions per LB. Select the number of partitions
7778 * to enable based on the display width. For display widths larger
7779 * than 4096, you need use to use 2 display controllers and combine
7780 * them using the stereo blender.
7782 if (radeon_crtc->base.enabled && mode) {
7783 if (mode->crtc_hdisplay < 1920) {
7786 } else if (mode->crtc_hdisplay < 2560) {
7789 } else if (mode->crtc_hdisplay < 4096) {
7791 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7793 DRM_DEBUG_KMS("Mode too big for LB!\n");
7795 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7802 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7803 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7805 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7806 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7807 for (i = 0; i < rdev->usec_timeout; i++) {
7808 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7809 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7814 if (radeon_crtc->base.enabled && mode) {
7826 /* controller not enabled, so no lb used */
7831 * cik_get_number_of_dram_channels - get the number of dram channels
7833 * @rdev: radeon_device pointer
7835 * Look up the number of video ram channels (CIK).
7836 * Used for display watermark bandwidth calculations
7837 * Returns the number of dram channels
7839 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7841 u32 tmp = RREG32(MC_SHARED_CHMAP);
7843 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7866 struct dce8_wm_params {
7867 u32 dram_channels; /* number of dram channels */
7868 u32 yclk; /* bandwidth per dram data pin in kHz */
7869 u32 sclk; /* engine clock in kHz */
7870 u32 disp_clk; /* display clock in kHz */
7871 u32 src_width; /* viewport width */
7872 u32 active_time; /* active display time in ns */
7873 u32 blank_time; /* blank time in ns */
7874 bool interlaced; /* mode is interlaced */
7875 fixed20_12 vsc; /* vertical scale ratio */
7876 u32 num_heads; /* number of active crtcs */
7877 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7878 u32 lb_size; /* line buffer allocated to pipe */
7879 u32 vtaps; /* vertical scaler taps */
7883 * dce8_dram_bandwidth - get the dram bandwidth
7885 * @wm: watermark calculation data
7887 * Calculate the raw dram bandwidth (CIK).
7888 * Used for display watermark bandwidth calculations
7889 * Returns the dram bandwidth in MBytes/s
7891 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7893 /* Calculate raw DRAM Bandwidth */
7894 fixed20_12 dram_efficiency; /* 0.7 */
7895 fixed20_12 yclk, dram_channels, bandwidth;
7898 a.full = dfixed_const(1000);
7899 yclk.full = dfixed_const(wm->yclk);
7900 yclk.full = dfixed_div(yclk, a);
7901 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7902 a.full = dfixed_const(10);
7903 dram_efficiency.full = dfixed_const(7);
7904 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7905 bandwidth.full = dfixed_mul(dram_channels, yclk);
7906 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7908 return dfixed_trunc(bandwidth);
7912 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7914 * @wm: watermark calculation data
7916 * Calculate the dram bandwidth used for display (CIK).
7917 * Used for display watermark bandwidth calculations
7918 * Returns the dram bandwidth for display in MBytes/s
7920 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7922 /* Calculate DRAM Bandwidth and the part allocated to display. */
7923 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7924 fixed20_12 yclk, dram_channels, bandwidth;
7927 a.full = dfixed_const(1000);
7928 yclk.full = dfixed_const(wm->yclk);
7929 yclk.full = dfixed_div(yclk, a);
7930 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7931 a.full = dfixed_const(10);
7932 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7933 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7934 bandwidth.full = dfixed_mul(dram_channels, yclk);
7935 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7937 return dfixed_trunc(bandwidth);
7941 * dce8_data_return_bandwidth - get the data return bandwidth
7943 * @wm: watermark calculation data
7945 * Calculate the data return bandwidth used for display (CIK).
7946 * Used for display watermark bandwidth calculations
7947 * Returns the data return bandwidth in MBytes/s
7949 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7951 /* Calculate the display Data return Bandwidth */
7952 fixed20_12 return_efficiency; /* 0.8 */
7953 fixed20_12 sclk, bandwidth;
7956 a.full = dfixed_const(1000);
7957 sclk.full = dfixed_const(wm->sclk);
7958 sclk.full = dfixed_div(sclk, a);
7959 a.full = dfixed_const(10);
7960 return_efficiency.full = dfixed_const(8);
7961 return_efficiency.full = dfixed_div(return_efficiency, a);
7962 a.full = dfixed_const(32);
7963 bandwidth.full = dfixed_mul(a, sclk);
7964 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7966 return dfixed_trunc(bandwidth);
7970 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7972 * @wm: watermark calculation data
7974 * Calculate the dmif bandwidth used for display (CIK).
7975 * Used for display watermark bandwidth calculations
7976 * Returns the dmif bandwidth in MBytes/s
7978 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7980 /* Calculate the DMIF Request Bandwidth */
7981 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7982 fixed20_12 disp_clk, bandwidth;
7985 a.full = dfixed_const(1000);
7986 disp_clk.full = dfixed_const(wm->disp_clk);
7987 disp_clk.full = dfixed_div(disp_clk, a);
7988 a.full = dfixed_const(32);
7989 b.full = dfixed_mul(a, disp_clk);
7991 a.full = dfixed_const(10);
7992 disp_clk_request_efficiency.full = dfixed_const(8);
7993 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7995 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7997 return dfixed_trunc(bandwidth);
8001 * dce8_available_bandwidth - get the min available bandwidth
8003 * @wm: watermark calculation data
8005 * Calculate the min available bandwidth used for display (CIK).
8006 * Used for display watermark bandwidth calculations
8007 * Returns the min available bandwidth in MBytes/s
8009 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8011 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8012 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8013 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8014 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8016 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8020 * dce8_average_bandwidth - get the average available bandwidth
8022 * @wm: watermark calculation data
8024 * Calculate the average available bandwidth used for display (CIK).
8025 * Used for display watermark bandwidth calculations
8026 * Returns the average available bandwidth in MBytes/s
8028 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8030 /* Calculate the display mode Average Bandwidth
8031 * DisplayMode should contain the source and destination dimensions,
8035 fixed20_12 line_time;
8036 fixed20_12 src_width;
8037 fixed20_12 bandwidth;
8040 a.full = dfixed_const(1000);
8041 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8042 line_time.full = dfixed_div(line_time, a);
8043 bpp.full = dfixed_const(wm->bytes_per_pixel);
8044 src_width.full = dfixed_const(wm->src_width);
8045 bandwidth.full = dfixed_mul(src_width, bpp);
8046 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8047 bandwidth.full = dfixed_div(bandwidth, line_time);
8049 return dfixed_trunc(bandwidth);
8053 * dce8_latency_watermark - get the latency watermark
8055 * @wm: watermark calculation data
8057 * Calculate the latency watermark (CIK).
8058 * Used for display watermark bandwidth calculations
8059 * Returns the latency watermark in ns
8061 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8063 /* First calculate the latency in ns */
8064 u32 mc_latency = 2000; /* 2000 ns. */
8065 u32 available_bandwidth = dce8_available_bandwidth(wm);
8066 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8067 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8068 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8069 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8070 (wm->num_heads * cursor_line_pair_return_time);
8071 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8072 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8073 u32 tmp, dmif_size = 12288;
8076 if (wm->num_heads == 0)
8079 a.full = dfixed_const(2);
8080 b.full = dfixed_const(1);
8081 if ((wm->vsc.full > a.full) ||
8082 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8084 ((wm->vsc.full >= a.full) && wm->interlaced))
8085 max_src_lines_per_dst_line = 4;
8087 max_src_lines_per_dst_line = 2;
8089 a.full = dfixed_const(available_bandwidth);
8090 b.full = dfixed_const(wm->num_heads);
8091 a.full = dfixed_div(a, b);
8093 b.full = dfixed_const(mc_latency + 512);
8094 c.full = dfixed_const(wm->disp_clk);
8095 b.full = dfixed_div(b, c);
8097 c.full = dfixed_const(dmif_size);
8098 b.full = dfixed_div(c, b);
8100 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8102 b.full = dfixed_const(1000);
8103 c.full = dfixed_const(wm->disp_clk);
8104 b.full = dfixed_div(c, b);
8105 c.full = dfixed_const(wm->bytes_per_pixel);
8106 b.full = dfixed_mul(b, c);
8108 lb_fill_bw = min(tmp, dfixed_trunc(b));
8110 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8111 b.full = dfixed_const(1000);
8112 c.full = dfixed_const(lb_fill_bw);
8113 b.full = dfixed_div(c, b);
8114 a.full = dfixed_div(a, b);
8115 line_fill_time = dfixed_trunc(a);
8117 if (line_fill_time < wm->active_time)
8120 return latency + (line_fill_time - wm->active_time);
8125 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8126 * average and available dram bandwidth
8128 * @wm: watermark calculation data
8130 * Check if the display average bandwidth fits in the display
8131 * dram bandwidth (CIK).
8132 * Used for display watermark bandwidth calculations
8133 * Returns true if the display fits, false if not.
8135 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8137 if (dce8_average_bandwidth(wm) <=
8138 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8145 * dce8_average_bandwidth_vs_available_bandwidth - check
8146 * average and available bandwidth
8148 * @wm: watermark calculation data
8150 * Check if the display average bandwidth fits in the display
8151 * available bandwidth (CIK).
8152 * Used for display watermark bandwidth calculations
8153 * Returns true if the display fits, false if not.
8155 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8157 if (dce8_average_bandwidth(wm) <=
8158 (dce8_available_bandwidth(wm) / wm->num_heads))
8165 * dce8_check_latency_hiding - check latency hiding
8167 * @wm: watermark calculation data
8169 * Check latency hiding (CIK).
8170 * Used for display watermark bandwidth calculations
8171 * Returns true if the display fits, false if not.
8173 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8175 u32 lb_partitions = wm->lb_size / wm->src_width;
8176 u32 line_time = wm->active_time + wm->blank_time;
8177 u32 latency_tolerant_lines;
8181 a.full = dfixed_const(1);
8182 if (wm->vsc.full > a.full)
8183 latency_tolerant_lines = 1;
8185 if (lb_partitions <= (wm->vtaps + 1))
8186 latency_tolerant_lines = 1;
8188 latency_tolerant_lines = 2;
8191 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8193 if (dce8_latency_watermark(wm) <= latency_hiding)
8200 * dce8_program_watermarks - program display watermarks
8202 * @rdev: radeon_device pointer
8203 * @radeon_crtc: the selected display controller
8204 * @lb_size: line buffer size
8205 * @num_heads: number of display controllers in use
8207 * Calculate and program the display watermarks for the
8208 * selected display controller (CIK).
8210 static void dce8_program_watermarks(struct radeon_device *rdev,
8211 struct radeon_crtc *radeon_crtc,
8212 u32 lb_size, u32 num_heads)
8214 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8215 struct dce8_wm_params wm_low, wm_high;
8218 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8221 if (radeon_crtc->base.enabled && num_heads && mode) {
8222 pixel_period = 1000000 / (u32)mode->clock;
8223 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8225 /* watermark for high clocks */
8226 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8227 rdev->pm.dpm_enabled) {
8229 radeon_dpm_get_mclk(rdev, false) * 10;
8231 radeon_dpm_get_sclk(rdev, false) * 10;
8233 wm_high.yclk = rdev->pm.current_mclk * 10;
8234 wm_high.sclk = rdev->pm.current_sclk * 10;
8237 wm_high.disp_clk = mode->clock;
8238 wm_high.src_width = mode->crtc_hdisplay;
8239 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8240 wm_high.blank_time = line_time - wm_high.active_time;
8241 wm_high.interlaced = false;
8242 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8243 wm_high.interlaced = true;
8244 wm_high.vsc = radeon_crtc->vsc;
8246 if (radeon_crtc->rmx_type != RMX_OFF)
8248 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8249 wm_high.lb_size = lb_size;
8250 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8251 wm_high.num_heads = num_heads;
8253 /* set for high clocks */
8254 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8256 /* possibly force display priority to high */
8257 /* should really do this at mode validation time... */
8258 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8259 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8260 !dce8_check_latency_hiding(&wm_high) ||
8261 (rdev->disp_priority == 2)) {
8262 DRM_DEBUG_KMS("force priority to high\n");
8265 /* watermark for low clocks */
8266 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8267 rdev->pm.dpm_enabled) {
8269 radeon_dpm_get_mclk(rdev, true) * 10;
8271 radeon_dpm_get_sclk(rdev, true) * 10;
8273 wm_low.yclk = rdev->pm.current_mclk * 10;
8274 wm_low.sclk = rdev->pm.current_sclk * 10;
8277 wm_low.disp_clk = mode->clock;
8278 wm_low.src_width = mode->crtc_hdisplay;
8279 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8280 wm_low.blank_time = line_time - wm_low.active_time;
8281 wm_low.interlaced = false;
8282 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8283 wm_low.interlaced = true;
8284 wm_low.vsc = radeon_crtc->vsc;
8286 if (radeon_crtc->rmx_type != RMX_OFF)
8288 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8289 wm_low.lb_size = lb_size;
8290 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8291 wm_low.num_heads = num_heads;
8293 /* set for low clocks */
8294 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8296 /* possibly force display priority to high */
8297 /* should really do this at mode validation time... */
8298 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8299 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8300 !dce8_check_latency_hiding(&wm_low) ||
8301 (rdev->disp_priority == 2)) {
8302 DRM_DEBUG_KMS("force priority to high\n");
8307 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8309 tmp &= ~LATENCY_WATERMARK_MASK(3);
8310 tmp |= LATENCY_WATERMARK_MASK(1);
8311 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8312 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8313 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8314 LATENCY_HIGH_WATERMARK(line_time)));
8316 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8317 tmp &= ~LATENCY_WATERMARK_MASK(3);
8318 tmp |= LATENCY_WATERMARK_MASK(2);
8319 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8320 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8321 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8322 LATENCY_HIGH_WATERMARK(line_time)));
8323 /* restore original selection */
8324 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8326 /* save values for DPM */
8327 radeon_crtc->line_time = line_time;
8328 radeon_crtc->wm_high = latency_watermark_a;
8329 radeon_crtc->wm_low = latency_watermark_b;
8333 * dce8_bandwidth_update - program display watermarks
8335 * @rdev: radeon_device pointer
8337 * Calculate and program the display watermarks and line
8338 * buffer allocation (CIK).
8340 void dce8_bandwidth_update(struct radeon_device *rdev)
8342 struct drm_display_mode *mode = NULL;
8343 u32 num_heads = 0, lb_size;
8346 radeon_update_display_priority(rdev);
8348 for (i = 0; i < rdev->num_crtc; i++) {
8349 if (rdev->mode_info.crtcs[i]->base.enabled)
8352 for (i = 0; i < rdev->num_crtc; i++) {
8353 mode = &rdev->mode_info.crtcs[i]->base.mode;
8354 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8355 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8360 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8362 * @rdev: radeon_device pointer
8364 * Fetches a GPU clock counter snapshot (SI).
8365 * Returns the 64 bit clock counter snapshot.
8367 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8371 mutex_lock(&rdev->gpu_clock_mutex);
8372 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8373 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8374 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8375 mutex_unlock(&rdev->gpu_clock_mutex);
8379 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8380 u32 cntl_reg, u32 status_reg)
8383 struct atom_clock_dividers dividers;
8386 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8387 clock, false, ÷rs);
8391 tmp = RREG32_SMC(cntl_reg);
8392 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8393 tmp |= dividers.post_divider;
8394 WREG32_SMC(cntl_reg, tmp);
8396 for (i = 0; i < 100; i++) {
8397 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8407 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8411 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8415 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8419 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8421 struct pci_dev *root = rdev->pdev->bus->self;
8422 int bridge_pos, gpu_pos;
8423 u32 speed_cntl, mask, current_data_rate;
8427 if (radeon_pcie_gen2 == 0)
8430 if (rdev->flags & RADEON_IS_IGP)
8433 if (!(rdev->flags & RADEON_IS_PCIE))
8436 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8440 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8443 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8444 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8445 LC_CURRENT_DATA_RATE_SHIFT;
8446 if (mask & DRM_PCIE_SPEED_80) {
8447 if (current_data_rate == 2) {
8448 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8451 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8452 } else if (mask & DRM_PCIE_SPEED_50) {
8453 if (current_data_rate == 1) {
8454 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8457 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8460 bridge_pos = pci_pcie_cap(root);
8464 gpu_pos = pci_pcie_cap(rdev->pdev);
8468 if (mask & DRM_PCIE_SPEED_80) {
8469 /* re-try equalization if gen3 is not already enabled */
8470 if (current_data_rate != 2) {
8471 u16 bridge_cfg, gpu_cfg;
8472 u16 bridge_cfg2, gpu_cfg2;
8473 u32 max_lw, current_lw, tmp;
8475 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8476 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8478 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8479 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8481 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8482 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8484 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8485 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8486 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8488 if (current_lw < max_lw) {
8489 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8490 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8491 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8492 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8493 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8494 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8498 for (i = 0; i < 10; i++) {
8500 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8501 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8504 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8505 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8507 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8508 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8510 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8511 tmp |= LC_SET_QUIESCE;
8512 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8514 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8516 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8521 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8522 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8523 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8524 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8526 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8527 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8528 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8529 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8532 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8533 tmp16 &= ~((1 << 4) | (7 << 9));
8534 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8535 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8537 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8538 tmp16 &= ~((1 << 4) | (7 << 9));
8539 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8540 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8542 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8543 tmp &= ~LC_SET_QUIESCE;
8544 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8549 /* set the link speed */
8550 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8551 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8552 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8554 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8556 if (mask & DRM_PCIE_SPEED_80)
8557 tmp16 |= 3; /* gen3 */
8558 else if (mask & DRM_PCIE_SPEED_50)
8559 tmp16 |= 2; /* gen2 */
8561 tmp16 |= 1; /* gen1 */
8562 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8564 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8565 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8566 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8568 for (i = 0; i < rdev->usec_timeout; i++) {
8569 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8570 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8576 static void cik_program_aspm(struct radeon_device *rdev)
8579 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8580 bool disable_clkreq = false;
8582 if (radeon_aspm == 0)
8585 /* XXX double check IGPs */
8586 if (rdev->flags & RADEON_IS_IGP)
8589 if (!(rdev->flags & RADEON_IS_PCIE))
8592 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8593 data &= ~LC_XMIT_N_FTS_MASK;
8594 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8596 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8598 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8599 data |= LC_GO_TO_RECOVERY;
8601 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8603 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8604 data |= P_IGNORE_EDB_ERR;
8606 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8608 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8609 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8610 data |= LC_PMI_TO_L1_DIS;
8612 data |= LC_L0S_INACTIVITY(7);
8615 data |= LC_L1_INACTIVITY(7);
8616 data &= ~LC_PMI_TO_L1_DIS;
8618 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8620 if (!disable_plloff_in_l1) {
8621 bool clk_req_support;
8623 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8624 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8625 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8627 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8629 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8630 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8631 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8633 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8635 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8636 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8637 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8639 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8641 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8642 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8643 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8645 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8647 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8648 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8649 data |= LC_DYN_LANES_PWR_STATE(3);
8651 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8653 if (!disable_clkreq) {
8654 struct pci_dev *root = rdev->pdev->bus->self;
8657 clk_req_support = false;
8658 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8659 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8660 clk_req_support = true;
8662 clk_req_support = false;
8665 if (clk_req_support) {
8666 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8667 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8669 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8671 orig = data = RREG32_SMC(THM_CLK_CNTL);
8672 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8673 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8675 WREG32_SMC(THM_CLK_CNTL, data);
8677 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8678 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8679 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8681 WREG32_SMC(MISC_CLK_CTRL, data);
8683 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8684 data &= ~BCLK_AS_XCLK;
8686 WREG32_SMC(CG_CLKPIN_CNTL, data);
8688 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8689 data &= ~FORCE_BIF_REFCLK_EN;
8691 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8693 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8694 data &= ~MPLL_CLKOUT_SEL_MASK;
8695 data |= MPLL_CLKOUT_SEL(4);
8697 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8702 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8705 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8706 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8708 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8711 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8712 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8713 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8714 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8715 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8716 data &= ~LC_L0S_INACTIVITY_MASK;
8718 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);