drm/radeon: update cik_tiling_mode_table_init() for hawaii
[pandora-kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 static void cik_rlc_stop(struct radeon_device *rdev);
71 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
72 static void cik_program_aspm(struct radeon_device *rdev);
73 static void cik_init_pg(struct radeon_device *rdev);
74 static void cik_init_cg(struct radeon_device *rdev);
75 static void cik_fini_pg(struct radeon_device *rdev);
76 static void cik_fini_cg(struct radeon_device *rdev);
77 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
78                                           bool enable);
79
80 /* get temperature in millidegrees */
81 int ci_get_temp(struct radeon_device *rdev)
82 {
83         u32 temp;
84         int actual_temp = 0;
85
86         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
87                 CTF_TEMP_SHIFT;
88
89         if (temp & 0x200)
90                 actual_temp = 255;
91         else
92                 actual_temp = temp & 0x1ff;
93
94         actual_temp = actual_temp * 1000;
95
96         return actual_temp;
97 }
98
99 /* get temperature in millidegrees */
100 int kv_get_temp(struct radeon_device *rdev)
101 {
102         u32 temp;
103         int actual_temp = 0;
104
105         temp = RREG32_SMC(0xC0300E0C);
106
107         if (temp)
108                 actual_temp = (temp / 8) - 49;
109         else
110                 actual_temp = 0;
111
112         actual_temp = actual_temp * 1000;
113
114         return actual_temp;
115 }
116
117 /*
118  * Indirect registers accessor
119  */
120 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
121 {
122         unsigned long flags;
123         u32 r;
124
125         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
126         WREG32(PCIE_INDEX, reg);
127         (void)RREG32(PCIE_INDEX);
128         r = RREG32(PCIE_DATA);
129         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
130         return r;
131 }
132
133 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
134 {
135         unsigned long flags;
136
137         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
138         WREG32(PCIE_INDEX, reg);
139         (void)RREG32(PCIE_INDEX);
140         WREG32(PCIE_DATA, v);
141         (void)RREG32(PCIE_DATA);
142         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
143 }
144
145 static const u32 spectre_rlc_save_restore_register_list[] =
146 {
147         (0x0e00 << 16) | (0xc12c >> 2),
148         0x00000000,
149         (0x0e00 << 16) | (0xc140 >> 2),
150         0x00000000,
151         (0x0e00 << 16) | (0xc150 >> 2),
152         0x00000000,
153         (0x0e00 << 16) | (0xc15c >> 2),
154         0x00000000,
155         (0x0e00 << 16) | (0xc168 >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc170 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc178 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc204 >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc2b4 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc2b8 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc2bc >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc2c0 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0x8228 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0x829c >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0x869c >> 2),
176         0x00000000,
177         (0x0600 << 16) | (0x98f4 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x98f8 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x9900 >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0xc260 >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0x90e8 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x3c000 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x3c00c >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0x8c1c >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x9700 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0xcd20 >> 2),
196         0x00000000,
197         (0x4e00 << 16) | (0xcd20 >> 2),
198         0x00000000,
199         (0x5e00 << 16) | (0xcd20 >> 2),
200         0x00000000,
201         (0x6e00 << 16) | (0xcd20 >> 2),
202         0x00000000,
203         (0x7e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x8e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x9e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0xae00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0xbe00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0x89bc >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0x8900 >> 2),
216         0x00000000,
217         0x3,
218         (0x0e00 << 16) | (0xc130 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc134 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc1fc >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc208 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc264 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc268 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc26c >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc270 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc274 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc278 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc27c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc280 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc284 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc288 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc28c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc290 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc294 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc298 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc29c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc2a0 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc2a4 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc2a8 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc2ac  >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2b0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0x301d0 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x30238 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x30250 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0x30254 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x30258 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x3025c >> 2),
277         0x00000000,
278         (0x4e00 << 16) | (0xc900 >> 2),
279         0x00000000,
280         (0x5e00 << 16) | (0xc900 >> 2),
281         0x00000000,
282         (0x6e00 << 16) | (0xc900 >> 2),
283         0x00000000,
284         (0x7e00 << 16) | (0xc900 >> 2),
285         0x00000000,
286         (0x8e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x9e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0xae00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0xbe00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x4e00 << 16) | (0xc904 >> 2),
295         0x00000000,
296         (0x5e00 << 16) | (0xc904 >> 2),
297         0x00000000,
298         (0x6e00 << 16) | (0xc904 >> 2),
299         0x00000000,
300         (0x7e00 << 16) | (0xc904 >> 2),
301         0x00000000,
302         (0x8e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x9e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0xae00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0xbe00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x4e00 << 16) | (0xc908 >> 2),
311         0x00000000,
312         (0x5e00 << 16) | (0xc908 >> 2),
313         0x00000000,
314         (0x6e00 << 16) | (0xc908 >> 2),
315         0x00000000,
316         (0x7e00 << 16) | (0xc908 >> 2),
317         0x00000000,
318         (0x8e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x9e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0xae00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0xbe00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x4e00 << 16) | (0xc90c >> 2),
327         0x00000000,
328         (0x5e00 << 16) | (0xc90c >> 2),
329         0x00000000,
330         (0x6e00 << 16) | (0xc90c >> 2),
331         0x00000000,
332         (0x7e00 << 16) | (0xc90c >> 2),
333         0x00000000,
334         (0x8e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x9e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0xae00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0xbe00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x4e00 << 16) | (0xc910 >> 2),
343         0x00000000,
344         (0x5e00 << 16) | (0xc910 >> 2),
345         0x00000000,
346         (0x6e00 << 16) | (0xc910 >> 2),
347         0x00000000,
348         (0x7e00 << 16) | (0xc910 >> 2),
349         0x00000000,
350         (0x8e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x9e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0xae00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0xbe00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc99c >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0x9834 >> 2),
361         0x00000000,
362         (0x0000 << 16) | (0x30f00 >> 2),
363         0x00000000,
364         (0x0001 << 16) | (0x30f00 >> 2),
365         0x00000000,
366         (0x0000 << 16) | (0x30f04 >> 2),
367         0x00000000,
368         (0x0001 << 16) | (0x30f04 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f08 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f08 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f0c >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f0c >> 2),
377         0x00000000,
378         (0x0600 << 16) | (0x9b7c >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0x8a14 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0x8a18 >> 2),
383         0x00000000,
384         (0x0600 << 16) | (0x30a00 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0x8bf0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8bcc >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8b24 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30a04 >> 2),
393         0x00000000,
394         (0x0600 << 16) | (0x30a10 >> 2),
395         0x00000000,
396         (0x0600 << 16) | (0x30a14 >> 2),
397         0x00000000,
398         (0x0600 << 16) | (0x30a18 >> 2),
399         0x00000000,
400         (0x0600 << 16) | (0x30a2c >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0xc700 >> 2),
403         0x00000000,
404         (0x0e00 << 16) | (0xc704 >> 2),
405         0x00000000,
406         (0x0e00 << 16) | (0xc708 >> 2),
407         0x00000000,
408         (0x0e00 << 16) | (0xc768 >> 2),
409         0x00000000,
410         (0x0400 << 16) | (0xc770 >> 2),
411         0x00000000,
412         (0x0400 << 16) | (0xc774 >> 2),
413         0x00000000,
414         (0x0400 << 16) | (0xc778 >> 2),
415         0x00000000,
416         (0x0400 << 16) | (0xc77c >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc780 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc784 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc788 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc78c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc798 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc79c >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc7a0 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc7a4 >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc7a8 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc7ac >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7b0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7b4 >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0x9100 >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x3c010 >> 2),
445         0x00000000,
446         (0x0e00 << 16) | (0x92a8 >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0x92ac >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x92b4 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x92b8 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92bc >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92c0 >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92c4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92c8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92cc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92d0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x8c00 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x8c04 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x8c20 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x8c38 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c3c >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0xae00 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x9604 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xac08 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xac0c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xac10 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0xac14 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac58 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac68 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac6c >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac70 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac74 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac78 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac80 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac84 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac88 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac8c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x970c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x9714 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x9718 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x971c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x31068 >> 2),
519         0x00000000,
520         (0x4e00 << 16) | (0x31068 >> 2),
521         0x00000000,
522         (0x5e00 << 16) | (0x31068 >> 2),
523         0x00000000,
524         (0x6e00 << 16) | (0x31068 >> 2),
525         0x00000000,
526         (0x7e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x8e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x9e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0xae00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0xbe00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0xcd10 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0xcd14 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0x88b0 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0x88b4 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0x88b8 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0x88bc >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0x89c0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88c4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88c8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88d0 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x88d4 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88d8 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x8980 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x30938 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x3093c >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x30940 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x89a0 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30900 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x30904 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x89b4 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x3c210 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x3c214 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x3c218 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x8904 >> 2),
583         0x00000000,
584         0x5,
585         (0x0e00 << 16) | (0x8c28 >> 2),
586         (0x0e00 << 16) | (0x8c2c >> 2),
587         (0x0e00 << 16) | (0x8c30 >> 2),
588         (0x0e00 << 16) | (0x8c34 >> 2),
589         (0x0e00 << 16) | (0x9600 >> 2),
590 };
591
592 static const u32 kalindi_rlc_save_restore_register_list[] =
593 {
594         (0x0e00 << 16) | (0xc12c >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0xc140 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xc150 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xc15c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xc168 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc170 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc204 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc2b4 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc2b8 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc2bc >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc2c0 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x8228 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x829c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x869c >> 2),
621         0x00000000,
622         (0x0600 << 16) | (0x98f4 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x98f8 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x9900 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xc260 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x90e8 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x3c000 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x3c00c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x8c1c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9700 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0xcd20 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0xcd20 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0xcd20 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0xcd20 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0x89bc >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0x8900 >> 2),
653         0x00000000,
654         0x3,
655         (0x0e00 << 16) | (0xc130 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc134 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc1fc >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc208 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc264 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc268 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc26c >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc270 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc274 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc28c >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc290 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc294 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc298 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc2a0 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc2a4 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc2a8 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc2ac >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x301d0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x30238 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x30250 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x30254 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x30258 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x3025c >> 2),
700         0x00000000,
701         (0x4e00 << 16) | (0xc900 >> 2),
702         0x00000000,
703         (0x5e00 << 16) | (0xc900 >> 2),
704         0x00000000,
705         (0x6e00 << 16) | (0xc900 >> 2),
706         0x00000000,
707         (0x7e00 << 16) | (0xc900 >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc904 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc904 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc904 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc904 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc908 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc908 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc908 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc908 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc90c >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc90c >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc90c >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc90c >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc910 >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc910 >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc910 >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc910 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc99c >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x9834 >> 2),
744         0x00000000,
745         (0x0000 << 16) | (0x30f00 >> 2),
746         0x00000000,
747         (0x0000 << 16) | (0x30f04 >> 2),
748         0x00000000,
749         (0x0000 << 16) | (0x30f08 >> 2),
750         0x00000000,
751         (0x0000 << 16) | (0x30f0c >> 2),
752         0x00000000,
753         (0x0600 << 16) | (0x9b7c >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x8a14 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x8a18 >> 2),
758         0x00000000,
759         (0x0600 << 16) | (0x30a00 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x8bf0 >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8bcc >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8b24 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0x30a04 >> 2),
768         0x00000000,
769         (0x0600 << 16) | (0x30a10 >> 2),
770         0x00000000,
771         (0x0600 << 16) | (0x30a14 >> 2),
772         0x00000000,
773         (0x0600 << 16) | (0x30a18 >> 2),
774         0x00000000,
775         (0x0600 << 16) | (0x30a2c >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc700 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc704 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc708 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc768 >> 2),
784         0x00000000,
785         (0x0400 << 16) | (0xc770 >> 2),
786         0x00000000,
787         (0x0400 << 16) | (0xc774 >> 2),
788         0x00000000,
789         (0x0400 << 16) | (0xc798 >> 2),
790         0x00000000,
791         (0x0400 << 16) | (0xc79c >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0x9100 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0x3c010 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0x8c00 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0x8c04 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x8c20 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x8c38 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c3c >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xae00 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x9604 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xac08 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xac0c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xac10 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xac14 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac58 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac68 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac6c >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac70 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac74 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac78 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac7c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac80 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac84 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac88 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac8c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x970c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0x9714 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0x9718 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0x971c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x31068 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0x31068 >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0x31068 >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0x31068 >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0xcd10 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0xcd14 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x88b0 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x88b4 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x88b8 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x88bc >> 2),
870         0x00000000,
871         (0x0400 << 16) | (0x89c0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88c4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88c8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88d0 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x88d4 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88d8 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8980 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x30938 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x3093c >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30940 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x89a0 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30900 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x30904 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x89b4 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x3e1fc >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x3c210 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x3c214 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x3c218 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x8904 >> 2),
908         0x00000000,
909         0x5,
910         (0x0e00 << 16) | (0x8c28 >> 2),
911         (0x0e00 << 16) | (0x8c2c >> 2),
912         (0x0e00 << 16) | (0x8c30 >> 2),
913         (0x0e00 << 16) | (0x8c34 >> 2),
914         (0x0e00 << 16) | (0x9600 >> 2),
915 };
916
917 static const u32 bonaire_golden_spm_registers[] =
918 {
919         0x30800, 0xe0ffffff, 0xe0000000
920 };
921
922 static const u32 bonaire_golden_common_registers[] =
923 {
924         0xc770, 0xffffffff, 0x00000800,
925         0xc774, 0xffffffff, 0x00000800,
926         0xc798, 0xffffffff, 0x00007fbf,
927         0xc79c, 0xffffffff, 0x00007faf
928 };
929
930 static const u32 bonaire_golden_registers[] =
931 {
932         0x3354, 0x00000333, 0x00000333,
933         0x3350, 0x000c0fc0, 0x00040200,
934         0x9a10, 0x00010000, 0x00058208,
935         0x3c000, 0xffff1fff, 0x00140000,
936         0x3c200, 0xfdfc0fff, 0x00000100,
937         0x3c234, 0x40000000, 0x40000200,
938         0x9830, 0xffffffff, 0x00000000,
939         0x9834, 0xf00fffff, 0x00000400,
940         0x9838, 0x0002021c, 0x00020200,
941         0xc78, 0x00000080, 0x00000000,
942         0x5bb0, 0x000000f0, 0x00000070,
943         0x5bc0, 0xf0311fff, 0x80300000,
944         0x98f8, 0x73773777, 0x12010001,
945         0x350c, 0x00810000, 0x408af000,
946         0x7030, 0x31000111, 0x00000011,
947         0x2f48, 0x73773777, 0x12010001,
948         0x220c, 0x00007fb6, 0x0021a1b1,
949         0x2210, 0x00007fb6, 0x002021b1,
950         0x2180, 0x00007fb6, 0x00002191,
951         0x2218, 0x00007fb6, 0x002121b1,
952         0x221c, 0x00007fb6, 0x002021b1,
953         0x21dc, 0x00007fb6, 0x00002191,
954         0x21e0, 0x00007fb6, 0x00002191,
955         0x3628, 0x0000003f, 0x0000000a,
956         0x362c, 0x0000003f, 0x0000000a,
957         0x2ae4, 0x00073ffe, 0x000022a2,
958         0x240c, 0x000007ff, 0x00000000,
959         0x8a14, 0xf000003f, 0x00000007,
960         0x8bf0, 0x00002001, 0x00000001,
961         0x8b24, 0xffffffff, 0x00ffffff,
962         0x30a04, 0x0000ff0f, 0x00000000,
963         0x28a4c, 0x07ffffff, 0x06000000,
964         0x4d8, 0x00000fff, 0x00000100,
965         0x3e78, 0x00000001, 0x00000002,
966         0x9100, 0x03000000, 0x0362c688,
967         0x8c00, 0x000000ff, 0x00000001,
968         0xe40, 0x00001fff, 0x00001fff,
969         0x9060, 0x0000007f, 0x00000020,
970         0x9508, 0x00010000, 0x00010000,
971         0xac14, 0x000003ff, 0x000000f3,
972         0xac0c, 0xffffffff, 0x00001032
973 };
974
975 static const u32 bonaire_mgcg_cgcg_init[] =
976 {
977         0xc420, 0xffffffff, 0xfffffffc,
978         0x30800, 0xffffffff, 0xe0000000,
979         0x3c2a0, 0xffffffff, 0x00000100,
980         0x3c208, 0xffffffff, 0x00000100,
981         0x3c2c0, 0xffffffff, 0xc0000100,
982         0x3c2c8, 0xffffffff, 0xc0000100,
983         0x3c2c4, 0xffffffff, 0xc0000100,
984         0x55e4, 0xffffffff, 0x00600100,
985         0x3c280, 0xffffffff, 0x00000100,
986         0x3c214, 0xffffffff, 0x06000100,
987         0x3c220, 0xffffffff, 0x00000100,
988         0x3c218, 0xffffffff, 0x06000100,
989         0x3c204, 0xffffffff, 0x00000100,
990         0x3c2e0, 0xffffffff, 0x00000100,
991         0x3c224, 0xffffffff, 0x00000100,
992         0x3c200, 0xffffffff, 0x00000100,
993         0x3c230, 0xffffffff, 0x00000100,
994         0x3c234, 0xffffffff, 0x00000100,
995         0x3c250, 0xffffffff, 0x00000100,
996         0x3c254, 0xffffffff, 0x00000100,
997         0x3c258, 0xffffffff, 0x00000100,
998         0x3c25c, 0xffffffff, 0x00000100,
999         0x3c260, 0xffffffff, 0x00000100,
1000         0x3c27c, 0xffffffff, 0x00000100,
1001         0x3c278, 0xffffffff, 0x00000100,
1002         0x3c210, 0xffffffff, 0x06000100,
1003         0x3c290, 0xffffffff, 0x00000100,
1004         0x3c274, 0xffffffff, 0x00000100,
1005         0x3c2b4, 0xffffffff, 0x00000100,
1006         0x3c2b0, 0xffffffff, 0x00000100,
1007         0x3c270, 0xffffffff, 0x00000100,
1008         0x30800, 0xffffffff, 0xe0000000,
1009         0x3c020, 0xffffffff, 0x00010000,
1010         0x3c024, 0xffffffff, 0x00030002,
1011         0x3c028, 0xffffffff, 0x00040007,
1012         0x3c02c, 0xffffffff, 0x00060005,
1013         0x3c030, 0xffffffff, 0x00090008,
1014         0x3c034, 0xffffffff, 0x00010000,
1015         0x3c038, 0xffffffff, 0x00030002,
1016         0x3c03c, 0xffffffff, 0x00040007,
1017         0x3c040, 0xffffffff, 0x00060005,
1018         0x3c044, 0xffffffff, 0x00090008,
1019         0x3c048, 0xffffffff, 0x00010000,
1020         0x3c04c, 0xffffffff, 0x00030002,
1021         0x3c050, 0xffffffff, 0x00040007,
1022         0x3c054, 0xffffffff, 0x00060005,
1023         0x3c058, 0xffffffff, 0x00090008,
1024         0x3c05c, 0xffffffff, 0x00010000,
1025         0x3c060, 0xffffffff, 0x00030002,
1026         0x3c064, 0xffffffff, 0x00040007,
1027         0x3c068, 0xffffffff, 0x00060005,
1028         0x3c06c, 0xffffffff, 0x00090008,
1029         0x3c070, 0xffffffff, 0x00010000,
1030         0x3c074, 0xffffffff, 0x00030002,
1031         0x3c078, 0xffffffff, 0x00040007,
1032         0x3c07c, 0xffffffff, 0x00060005,
1033         0x3c080, 0xffffffff, 0x00090008,
1034         0x3c084, 0xffffffff, 0x00010000,
1035         0x3c088, 0xffffffff, 0x00030002,
1036         0x3c08c, 0xffffffff, 0x00040007,
1037         0x3c090, 0xffffffff, 0x00060005,
1038         0x3c094, 0xffffffff, 0x00090008,
1039         0x3c098, 0xffffffff, 0x00010000,
1040         0x3c09c, 0xffffffff, 0x00030002,
1041         0x3c0a0, 0xffffffff, 0x00040007,
1042         0x3c0a4, 0xffffffff, 0x00060005,
1043         0x3c0a8, 0xffffffff, 0x00090008,
1044         0x3c000, 0xffffffff, 0x96e00200,
1045         0x8708, 0xffffffff, 0x00900100,
1046         0xc424, 0xffffffff, 0x0020003f,
1047         0x38, 0xffffffff, 0x0140001c,
1048         0x3c, 0x000f0000, 0x000f0000,
1049         0x220, 0xffffffff, 0xC060000C,
1050         0x224, 0xc0000fff, 0x00000100,
1051         0xf90, 0xffffffff, 0x00000100,
1052         0xf98, 0x00000101, 0x00000000,
1053         0x20a8, 0xffffffff, 0x00000104,
1054         0x55e4, 0xff000fff, 0x00000100,
1055         0x30cc, 0xc0000fff, 0x00000104,
1056         0xc1e4, 0x00000001, 0x00000001,
1057         0xd00c, 0xff000ff0, 0x00000100,
1058         0xd80c, 0xff000ff0, 0x00000100
1059 };
1060
1061 static const u32 spectre_golden_spm_registers[] =
1062 {
1063         0x30800, 0xe0ffffff, 0xe0000000
1064 };
1065
1066 static const u32 spectre_golden_common_registers[] =
1067 {
1068         0xc770, 0xffffffff, 0x00000800,
1069         0xc774, 0xffffffff, 0x00000800,
1070         0xc798, 0xffffffff, 0x00007fbf,
1071         0xc79c, 0xffffffff, 0x00007faf
1072 };
1073
1074 static const u32 spectre_golden_registers[] =
1075 {
1076         0x3c000, 0xffff1fff, 0x96940200,
1077         0x3c00c, 0xffff0001, 0xff000000,
1078         0x3c200, 0xfffc0fff, 0x00000100,
1079         0x6ed8, 0x00010101, 0x00010000,
1080         0x9834, 0xf00fffff, 0x00000400,
1081         0x9838, 0xfffffffc, 0x00020200,
1082         0x5bb0, 0x000000f0, 0x00000070,
1083         0x5bc0, 0xf0311fff, 0x80300000,
1084         0x98f8, 0x73773777, 0x12010001,
1085         0x9b7c, 0x00ff0000, 0x00fc0000,
1086         0x2f48, 0x73773777, 0x12010001,
1087         0x8a14, 0xf000003f, 0x00000007,
1088         0x8b24, 0xffffffff, 0x00ffffff,
1089         0x28350, 0x3f3f3fff, 0x00000082,
1090         0x28355, 0x0000003f, 0x00000000,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x913c, 0xffff03df, 0x00000004,
1093         0xc768, 0x00000008, 0x00000008,
1094         0x8c00, 0x000008ff, 0x00000800,
1095         0x9508, 0x00010000, 0x00010000,
1096         0xac0c, 0xffffffff, 0x54763210,
1097         0x214f8, 0x01ff01ff, 0x00000002,
1098         0x21498, 0x007ff800, 0x00200000,
1099         0x2015c, 0xffffffff, 0x00000f40,
1100         0x30934, 0xffffffff, 0x00000001
1101 };
1102
1103 static const u32 spectre_mgcg_cgcg_init[] =
1104 {
1105         0xc420, 0xffffffff, 0xfffffffc,
1106         0x30800, 0xffffffff, 0xe0000000,
1107         0x3c2a0, 0xffffffff, 0x00000100,
1108         0x3c208, 0xffffffff, 0x00000100,
1109         0x3c2c0, 0xffffffff, 0x00000100,
1110         0x3c2c8, 0xffffffff, 0x00000100,
1111         0x3c2c4, 0xffffffff, 0x00000100,
1112         0x55e4, 0xffffffff, 0x00600100,
1113         0x3c280, 0xffffffff, 0x00000100,
1114         0x3c214, 0xffffffff, 0x06000100,
1115         0x3c220, 0xffffffff, 0x00000100,
1116         0x3c218, 0xffffffff, 0x06000100,
1117         0x3c204, 0xffffffff, 0x00000100,
1118         0x3c2e0, 0xffffffff, 0x00000100,
1119         0x3c224, 0xffffffff, 0x00000100,
1120         0x3c200, 0xffffffff, 0x00000100,
1121         0x3c230, 0xffffffff, 0x00000100,
1122         0x3c234, 0xffffffff, 0x00000100,
1123         0x3c250, 0xffffffff, 0x00000100,
1124         0x3c254, 0xffffffff, 0x00000100,
1125         0x3c258, 0xffffffff, 0x00000100,
1126         0x3c25c, 0xffffffff, 0x00000100,
1127         0x3c260, 0xffffffff, 0x00000100,
1128         0x3c27c, 0xffffffff, 0x00000100,
1129         0x3c278, 0xffffffff, 0x00000100,
1130         0x3c210, 0xffffffff, 0x06000100,
1131         0x3c290, 0xffffffff, 0x00000100,
1132         0x3c274, 0xffffffff, 0x00000100,
1133         0x3c2b4, 0xffffffff, 0x00000100,
1134         0x3c2b0, 0xffffffff, 0x00000100,
1135         0x3c270, 0xffffffff, 0x00000100,
1136         0x30800, 0xffffffff, 0xe0000000,
1137         0x3c020, 0xffffffff, 0x00010000,
1138         0x3c024, 0xffffffff, 0x00030002,
1139         0x3c028, 0xffffffff, 0x00040007,
1140         0x3c02c, 0xffffffff, 0x00060005,
1141         0x3c030, 0xffffffff, 0x00090008,
1142         0x3c034, 0xffffffff, 0x00010000,
1143         0x3c038, 0xffffffff, 0x00030002,
1144         0x3c03c, 0xffffffff, 0x00040007,
1145         0x3c040, 0xffffffff, 0x00060005,
1146         0x3c044, 0xffffffff, 0x00090008,
1147         0x3c048, 0xffffffff, 0x00010000,
1148         0x3c04c, 0xffffffff, 0x00030002,
1149         0x3c050, 0xffffffff, 0x00040007,
1150         0x3c054, 0xffffffff, 0x00060005,
1151         0x3c058, 0xffffffff, 0x00090008,
1152         0x3c05c, 0xffffffff, 0x00010000,
1153         0x3c060, 0xffffffff, 0x00030002,
1154         0x3c064, 0xffffffff, 0x00040007,
1155         0x3c068, 0xffffffff, 0x00060005,
1156         0x3c06c, 0xffffffff, 0x00090008,
1157         0x3c070, 0xffffffff, 0x00010000,
1158         0x3c074, 0xffffffff, 0x00030002,
1159         0x3c078, 0xffffffff, 0x00040007,
1160         0x3c07c, 0xffffffff, 0x00060005,
1161         0x3c080, 0xffffffff, 0x00090008,
1162         0x3c084, 0xffffffff, 0x00010000,
1163         0x3c088, 0xffffffff, 0x00030002,
1164         0x3c08c, 0xffffffff, 0x00040007,
1165         0x3c090, 0xffffffff, 0x00060005,
1166         0x3c094, 0xffffffff, 0x00090008,
1167         0x3c098, 0xffffffff, 0x00010000,
1168         0x3c09c, 0xffffffff, 0x00030002,
1169         0x3c0a0, 0xffffffff, 0x00040007,
1170         0x3c0a4, 0xffffffff, 0x00060005,
1171         0x3c0a8, 0xffffffff, 0x00090008,
1172         0x3c0ac, 0xffffffff, 0x00010000,
1173         0x3c0b0, 0xffffffff, 0x00030002,
1174         0x3c0b4, 0xffffffff, 0x00040007,
1175         0x3c0b8, 0xffffffff, 0x00060005,
1176         0x3c0bc, 0xffffffff, 0x00090008,
1177         0x3c000, 0xffffffff, 0x96e00200,
1178         0x8708, 0xffffffff, 0x00900100,
1179         0xc424, 0xffffffff, 0x0020003f,
1180         0x38, 0xffffffff, 0x0140001c,
1181         0x3c, 0x000f0000, 0x000f0000,
1182         0x220, 0xffffffff, 0xC060000C,
1183         0x224, 0xc0000fff, 0x00000100,
1184         0xf90, 0xffffffff, 0x00000100,
1185         0xf98, 0x00000101, 0x00000000,
1186         0x20a8, 0xffffffff, 0x00000104,
1187         0x55e4, 0xff000fff, 0x00000100,
1188         0x30cc, 0xc0000fff, 0x00000104,
1189         0xc1e4, 0x00000001, 0x00000001,
1190         0xd00c, 0xff000ff0, 0x00000100,
1191         0xd80c, 0xff000ff0, 0x00000100
1192 };
1193
1194 static const u32 kalindi_golden_spm_registers[] =
1195 {
1196         0x30800, 0xe0ffffff, 0xe0000000
1197 };
1198
1199 static const u32 kalindi_golden_common_registers[] =
1200 {
1201         0xc770, 0xffffffff, 0x00000800,
1202         0xc774, 0xffffffff, 0x00000800,
1203         0xc798, 0xffffffff, 0x00007fbf,
1204         0xc79c, 0xffffffff, 0x00007faf
1205 };
1206
1207 static const u32 kalindi_golden_registers[] =
1208 {
1209         0x3c000, 0xffffdfff, 0x6e944040,
1210         0x55e4, 0xff607fff, 0xfc000100,
1211         0x3c220, 0xff000fff, 0x00000100,
1212         0x3c224, 0xff000fff, 0x00000100,
1213         0x3c200, 0xfffc0fff, 0x00000100,
1214         0x6ed8, 0x00010101, 0x00010000,
1215         0x9830, 0xffffffff, 0x00000000,
1216         0x9834, 0xf00fffff, 0x00000400,
1217         0x5bb0, 0x000000f0, 0x00000070,
1218         0x5bc0, 0xf0311fff, 0x80300000,
1219         0x98f8, 0x73773777, 0x12010001,
1220         0x98fc, 0xffffffff, 0x00000010,
1221         0x9b7c, 0x00ff0000, 0x00fc0000,
1222         0x8030, 0x00001f0f, 0x0000100a,
1223         0x2f48, 0x73773777, 0x12010001,
1224         0x2408, 0x000fffff, 0x000c007f,
1225         0x8a14, 0xf000003f, 0x00000007,
1226         0x8b24, 0x3fff3fff, 0x00ffcfff,
1227         0x30a04, 0x0000ff0f, 0x00000000,
1228         0x28a4c, 0x07ffffff, 0x06000000,
1229         0x4d8, 0x00000fff, 0x00000100,
1230         0x3e78, 0x00000001, 0x00000002,
1231         0xc768, 0x00000008, 0x00000008,
1232         0x8c00, 0x000000ff, 0x00000003,
1233         0x214f8, 0x01ff01ff, 0x00000002,
1234         0x21498, 0x007ff800, 0x00200000,
1235         0x2015c, 0xffffffff, 0x00000f40,
1236         0x88c4, 0x001f3ae3, 0x00000082,
1237         0x88d4, 0x0000001f, 0x00000010,
1238         0x30934, 0xffffffff, 0x00000000
1239 };
1240
1241 static const u32 kalindi_mgcg_cgcg_init[] =
1242 {
1243         0xc420, 0xffffffff, 0xfffffffc,
1244         0x30800, 0xffffffff, 0xe0000000,
1245         0x3c2a0, 0xffffffff, 0x00000100,
1246         0x3c208, 0xffffffff, 0x00000100,
1247         0x3c2c0, 0xffffffff, 0x00000100,
1248         0x3c2c8, 0xffffffff, 0x00000100,
1249         0x3c2c4, 0xffffffff, 0x00000100,
1250         0x55e4, 0xffffffff, 0x00600100,
1251         0x3c280, 0xffffffff, 0x00000100,
1252         0x3c214, 0xffffffff, 0x06000100,
1253         0x3c220, 0xffffffff, 0x00000100,
1254         0x3c218, 0xffffffff, 0x06000100,
1255         0x3c204, 0xffffffff, 0x00000100,
1256         0x3c2e0, 0xffffffff, 0x00000100,
1257         0x3c224, 0xffffffff, 0x00000100,
1258         0x3c200, 0xffffffff, 0x00000100,
1259         0x3c230, 0xffffffff, 0x00000100,
1260         0x3c234, 0xffffffff, 0x00000100,
1261         0x3c250, 0xffffffff, 0x00000100,
1262         0x3c254, 0xffffffff, 0x00000100,
1263         0x3c258, 0xffffffff, 0x00000100,
1264         0x3c25c, 0xffffffff, 0x00000100,
1265         0x3c260, 0xffffffff, 0x00000100,
1266         0x3c27c, 0xffffffff, 0x00000100,
1267         0x3c278, 0xffffffff, 0x00000100,
1268         0x3c210, 0xffffffff, 0x06000100,
1269         0x3c290, 0xffffffff, 0x00000100,
1270         0x3c274, 0xffffffff, 0x00000100,
1271         0x3c2b4, 0xffffffff, 0x00000100,
1272         0x3c2b0, 0xffffffff, 0x00000100,
1273         0x3c270, 0xffffffff, 0x00000100,
1274         0x30800, 0xffffffff, 0xe0000000,
1275         0x3c020, 0xffffffff, 0x00010000,
1276         0x3c024, 0xffffffff, 0x00030002,
1277         0x3c028, 0xffffffff, 0x00040007,
1278         0x3c02c, 0xffffffff, 0x00060005,
1279         0x3c030, 0xffffffff, 0x00090008,
1280         0x3c034, 0xffffffff, 0x00010000,
1281         0x3c038, 0xffffffff, 0x00030002,
1282         0x3c03c, 0xffffffff, 0x00040007,
1283         0x3c040, 0xffffffff, 0x00060005,
1284         0x3c044, 0xffffffff, 0x00090008,
1285         0x3c000, 0xffffffff, 0x96e00200,
1286         0x8708, 0xffffffff, 0x00900100,
1287         0xc424, 0xffffffff, 0x0020003f,
1288         0x38, 0xffffffff, 0x0140001c,
1289         0x3c, 0x000f0000, 0x000f0000,
1290         0x220, 0xffffffff, 0xC060000C,
1291         0x224, 0xc0000fff, 0x00000100,
1292         0x20a8, 0xffffffff, 0x00000104,
1293         0x55e4, 0xff000fff, 0x00000100,
1294         0x30cc, 0xc0000fff, 0x00000104,
1295         0xc1e4, 0x00000001, 0x00000001,
1296         0xd00c, 0xff000ff0, 0x00000100,
1297         0xd80c, 0xff000ff0, 0x00000100
1298 };
1299
1300 static void cik_init_golden_registers(struct radeon_device *rdev)
1301 {
1302         switch (rdev->family) {
1303         case CHIP_BONAIRE:
1304                 radeon_program_register_sequence(rdev,
1305                                                  bonaire_mgcg_cgcg_init,
1306                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_golden_registers,
1309                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1310                 radeon_program_register_sequence(rdev,
1311                                                  bonaire_golden_common_registers,
1312                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1313                 radeon_program_register_sequence(rdev,
1314                                                  bonaire_golden_spm_registers,
1315                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1316                 break;
1317         case CHIP_KABINI:
1318                 radeon_program_register_sequence(rdev,
1319                                                  kalindi_mgcg_cgcg_init,
1320                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_golden_registers,
1323                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1324                 radeon_program_register_sequence(rdev,
1325                                                  kalindi_golden_common_registers,
1326                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1327                 radeon_program_register_sequence(rdev,
1328                                                  kalindi_golden_spm_registers,
1329                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1330                 break;
1331         case CHIP_KAVERI:
1332                 radeon_program_register_sequence(rdev,
1333                                                  spectre_mgcg_cgcg_init,
1334                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_golden_registers,
1337                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1338                 radeon_program_register_sequence(rdev,
1339                                                  spectre_golden_common_registers,
1340                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1341                 radeon_program_register_sequence(rdev,
1342                                                  spectre_golden_spm_registers,
1343                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1344                 break;
1345         default:
1346                 break;
1347         }
1348 }
1349
1350 /**
1351  * cik_get_xclk - get the xclk
1352  *
1353  * @rdev: radeon_device pointer
1354  *
1355  * Returns the reference clock used by the gfx engine
1356  * (CIK).
1357  */
1358 u32 cik_get_xclk(struct radeon_device *rdev)
1359 {
1360         u32 reference_clock = rdev->clock.spll.reference_freq;
1361
1362         if (rdev->flags & RADEON_IS_IGP) {
1363                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1364                         return reference_clock / 2;
1365         } else {
1366                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1367                         return reference_clock / 4;
1368         }
1369         return reference_clock;
1370 }
1371
1372 /**
1373  * cik_mm_rdoorbell - read a doorbell dword
1374  *
1375  * @rdev: radeon_device pointer
1376  * @offset: byte offset into the aperture
1377  *
1378  * Returns the value in the doorbell aperture at the
1379  * requested offset (CIK).
1380  */
1381 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1382 {
1383         if (offset < rdev->doorbell.size) {
1384                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1385         } else {
1386                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1387                 return 0;
1388         }
1389 }
1390
1391 /**
1392  * cik_mm_wdoorbell - write a doorbell dword
1393  *
1394  * @rdev: radeon_device pointer
1395  * @offset: byte offset into the aperture
1396  * @v: value to write
1397  *
1398  * Writes @v to the doorbell aperture at the
1399  * requested offset (CIK).
1400  */
1401 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1402 {
1403         if (offset < rdev->doorbell.size) {
1404                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1405         } else {
1406                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1407         }
1408 }
1409
1410 #define BONAIRE_IO_MC_REGS_SIZE 36
1411
1412 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1413 {
1414         {0x00000070, 0x04400000},
1415         {0x00000071, 0x80c01803},
1416         {0x00000072, 0x00004004},
1417         {0x00000073, 0x00000100},
1418         {0x00000074, 0x00ff0000},
1419         {0x00000075, 0x34000000},
1420         {0x00000076, 0x08000014},
1421         {0x00000077, 0x00cc08ec},
1422         {0x00000078, 0x00000400},
1423         {0x00000079, 0x00000000},
1424         {0x0000007a, 0x04090000},
1425         {0x0000007c, 0x00000000},
1426         {0x0000007e, 0x4408a8e8},
1427         {0x0000007f, 0x00000304},
1428         {0x00000080, 0x00000000},
1429         {0x00000082, 0x00000001},
1430         {0x00000083, 0x00000002},
1431         {0x00000084, 0xf3e4f400},
1432         {0x00000085, 0x052024e3},
1433         {0x00000087, 0x00000000},
1434         {0x00000088, 0x01000000},
1435         {0x0000008a, 0x1c0a0000},
1436         {0x0000008b, 0xff010000},
1437         {0x0000008d, 0xffffefff},
1438         {0x0000008e, 0xfff3efff},
1439         {0x0000008f, 0xfff3efbf},
1440         {0x00000092, 0xf7ffffff},
1441         {0x00000093, 0xffffff7f},
1442         {0x00000095, 0x00101101},
1443         {0x00000096, 0x00000fff},
1444         {0x00000097, 0x00116fff},
1445         {0x00000098, 0x60010000},
1446         {0x00000099, 0x10010000},
1447         {0x0000009a, 0x00006000},
1448         {0x0000009b, 0x00001000},
1449         {0x0000009f, 0x00b48000}
1450 };
1451
1452 /**
1453  * cik_srbm_select - select specific register instances
1454  *
1455  * @rdev: radeon_device pointer
1456  * @me: selected ME (micro engine)
1457  * @pipe: pipe
1458  * @queue: queue
1459  * @vmid: VMID
1460  *
1461  * Switches the currently active registers instances.  Some
1462  * registers are instanced per VMID, others are instanced per
1463  * me/pipe/queue combination.
1464  */
1465 static void cik_srbm_select(struct radeon_device *rdev,
1466                             u32 me, u32 pipe, u32 queue, u32 vmid)
1467 {
1468         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1469                              MEID(me & 0x3) |
1470                              VMID(vmid & 0xf) |
1471                              QUEUEID(queue & 0x7));
1472         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1473 }
1474
1475 /* ucode loading */
1476 /**
1477  * ci_mc_load_microcode - load MC ucode into the hw
1478  *
1479  * @rdev: radeon_device pointer
1480  *
1481  * Load the GDDR MC ucode into the hw (CIK).
1482  * Returns 0 on success, error on failure.
1483  */
1484 static int ci_mc_load_microcode(struct radeon_device *rdev)
1485 {
1486         const __be32 *fw_data;
1487         u32 running, blackout = 0;
1488         u32 *io_mc_regs;
1489         int i, ucode_size, regs_size;
1490
1491         if (!rdev->mc_fw)
1492                 return -EINVAL;
1493
1494         switch (rdev->family) {
1495         case CHIP_BONAIRE:
1496         default:
1497                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1498                 ucode_size = CIK_MC_UCODE_SIZE;
1499                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1500                 break;
1501         }
1502
1503         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1504
1505         if (running == 0) {
1506                 if (running) {
1507                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1508                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1509                 }
1510
1511                 /* reset the engine and set to writable */
1512                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1514
1515                 /* load mc io regs */
1516                 for (i = 0; i < regs_size; i++) {
1517                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1518                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1519                 }
1520                 /* load the MC ucode */
1521                 fw_data = (const __be32 *)rdev->mc_fw->data;
1522                 for (i = 0; i < ucode_size; i++)
1523                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1524
1525                 /* put the engine back into the active state */
1526                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1528                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1529
1530                 /* wait for training to complete */
1531                 for (i = 0; i < rdev->usec_timeout; i++) {
1532                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1533                                 break;
1534                         udelay(1);
1535                 }
1536                 for (i = 0; i < rdev->usec_timeout; i++) {
1537                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1538                                 break;
1539                         udelay(1);
1540                 }
1541
1542                 if (running)
1543                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1544         }
1545
1546         return 0;
1547 }
1548
1549 /**
1550  * cik_init_microcode - load ucode images from disk
1551  *
1552  * @rdev: radeon_device pointer
1553  *
1554  * Use the firmware interface to load the ucode images into
1555  * the driver (not loaded into hw).
1556  * Returns 0 on success, error on failure.
1557  */
1558 static int cik_init_microcode(struct radeon_device *rdev)
1559 {
1560         const char *chip_name;
1561         size_t pfp_req_size, me_req_size, ce_req_size,
1562                 mec_req_size, rlc_req_size, mc_req_size,
1563                 sdma_req_size, smc_req_size;
1564         char fw_name[30];
1565         int err;
1566
1567         DRM_DEBUG("\n");
1568
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 chip_name = "BONAIRE";
1572                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1573                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1574                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1575                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1576                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1577                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1578                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1579                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1580                 break;
1581         case CHIP_KAVERI:
1582                 chip_name = "KAVERI";
1583                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1584                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1585                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1586                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1587                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1588                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1589                 break;
1590         case CHIP_KABINI:
1591                 chip_name = "KABINI";
1592                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1593                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1594                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1595                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1596                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1597                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1598                 break;
1599         default: BUG();
1600         }
1601
1602         DRM_INFO("Loading %s Microcode\n", chip_name);
1603
1604         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1605         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1606         if (err)
1607                 goto out;
1608         if (rdev->pfp_fw->size != pfp_req_size) {
1609                 printk(KERN_ERR
1610                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1611                        rdev->pfp_fw->size, fw_name);
1612                 err = -EINVAL;
1613                 goto out;
1614         }
1615
1616         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1617         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1618         if (err)
1619                 goto out;
1620         if (rdev->me_fw->size != me_req_size) {
1621                 printk(KERN_ERR
1622                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1623                        rdev->me_fw->size, fw_name);
1624                 err = -EINVAL;
1625         }
1626
1627         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1628         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1629         if (err)
1630                 goto out;
1631         if (rdev->ce_fw->size != ce_req_size) {
1632                 printk(KERN_ERR
1633                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1634                        rdev->ce_fw->size, fw_name);
1635                 err = -EINVAL;
1636         }
1637
1638         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1639         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1640         if (err)
1641                 goto out;
1642         if (rdev->mec_fw->size != mec_req_size) {
1643                 printk(KERN_ERR
1644                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1645                        rdev->mec_fw->size, fw_name);
1646                 err = -EINVAL;
1647         }
1648
1649         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1650         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651         if (err)
1652                 goto out;
1653         if (rdev->rlc_fw->size != rlc_req_size) {
1654                 printk(KERN_ERR
1655                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1656                        rdev->rlc_fw->size, fw_name);
1657                 err = -EINVAL;
1658         }
1659
1660         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1661         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1662         if (err)
1663                 goto out;
1664         if (rdev->sdma_fw->size != sdma_req_size) {
1665                 printk(KERN_ERR
1666                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1667                        rdev->sdma_fw->size, fw_name);
1668                 err = -EINVAL;
1669         }
1670
1671         /* No SMC, MC ucode on APUs */
1672         if (!(rdev->flags & RADEON_IS_IGP)) {
1673                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1674                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1675                 if (err)
1676                         goto out;
1677                 if (rdev->mc_fw->size != mc_req_size) {
1678                         printk(KERN_ERR
1679                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1680                                rdev->mc_fw->size, fw_name);
1681                         err = -EINVAL;
1682                 }
1683
1684                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1686                 if (err) {
1687                         printk(KERN_ERR
1688                                "smc: error loading firmware \"%s\"\n",
1689                                fw_name);
1690                         release_firmware(rdev->smc_fw);
1691                         rdev->smc_fw = NULL;
1692                         err = 0;
1693                 } else if (rdev->smc_fw->size != smc_req_size) {
1694                         printk(KERN_ERR
1695                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1696                                rdev->smc_fw->size, fw_name);
1697                         err = -EINVAL;
1698                 }
1699         }
1700
1701 out:
1702         if (err) {
1703                 if (err != -EINVAL)
1704                         printk(KERN_ERR
1705                                "cik_cp: Failed to load firmware \"%s\"\n",
1706                                fw_name);
1707                 release_firmware(rdev->pfp_fw);
1708                 rdev->pfp_fw = NULL;
1709                 release_firmware(rdev->me_fw);
1710                 rdev->me_fw = NULL;
1711                 release_firmware(rdev->ce_fw);
1712                 rdev->ce_fw = NULL;
1713                 release_firmware(rdev->rlc_fw);
1714                 rdev->rlc_fw = NULL;
1715                 release_firmware(rdev->mc_fw);
1716                 rdev->mc_fw = NULL;
1717                 release_firmware(rdev->smc_fw);
1718                 rdev->smc_fw = NULL;
1719         }
1720         return err;
1721 }
1722
1723 /*
1724  * Core functions
1725  */
1726 /**
1727  * cik_tiling_mode_table_init - init the hw tiling table
1728  *
1729  * @rdev: radeon_device pointer
1730  *
1731  * Starting with SI, the tiling setup is done globally in a
1732  * set of 32 tiling modes.  Rather than selecting each set of
1733  * parameters per surface as on older asics, we just select
1734  * which index in the tiling table we want to use, and the
1735  * surface uses those parameters (CIK).
1736  */
1737 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1738 {
1739         const u32 num_tile_mode_states = 32;
1740         const u32 num_secondary_tile_mode_states = 16;
1741         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1742         u32 num_pipe_configs;
1743         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1744                 rdev->config.cik.max_shader_engines;
1745
1746         switch (rdev->config.cik.mem_row_size_in_kb) {
1747         case 1:
1748                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1749                 break;
1750         case 2:
1751         default:
1752                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1753                 break;
1754         case 4:
1755                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1756                 break;
1757         }
1758
1759         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1760         if (num_pipe_configs > 8)
1761                 num_pipe_configs = 16;
1762
1763         if (num_pipe_configs == 16) {
1764                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1765                         switch (reg_offset) {
1766                         case 0:
1767                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1768                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1769                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1770                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1771                                 break;
1772                         case 1:
1773                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1775                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1776                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1777                                 break;
1778                         case 2:
1779                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1781                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1782                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1783                                 break;
1784                         case 3:
1785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1786                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1787                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1789                                 break;
1790                         case 4:
1791                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1792                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1793                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1794                                                  TILE_SPLIT(split_equal_to_row_size));
1795                                 break;
1796                         case 5:
1797                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1798                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1799                                 break;
1800                         case 6:
1801                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1802                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1803                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1804                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1805                                 break;
1806                         case 7:
1807                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1808                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1809                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1810                                                  TILE_SPLIT(split_equal_to_row_size));
1811                                 break;
1812                         case 8:
1813                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1814                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1815                                 break;
1816                         case 9:
1817                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1818                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1819                                 break;
1820                         case 10:
1821                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1822                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1823                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1824                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1825                                 break;
1826                         case 11:
1827                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1828                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1829                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1830                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1831                                 break;
1832                         case 12:
1833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1835                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1836                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1837                                 break;
1838                         case 13:
1839                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1840                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1841                                 break;
1842                         case 14:
1843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1845                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1847                                 break;
1848                         case 16:
1849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1851                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1853                                 break;
1854                         case 17:
1855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1857                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1858                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1859                                 break;
1860                         case 27:
1861                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1862                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1863                                 break;
1864                         case 28:
1865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1867                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1869                                 break;
1870                         case 29:
1871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1873                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
1874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1875                                 break;
1876                         case 30:
1877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1879                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1880                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1881                                 break;
1882                         default:
1883                                 gb_tile_moden = 0;
1884                                 break;
1885                         }
1886                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1887                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1888                 }
1889                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1890                         switch (reg_offset) {
1891                         case 0:
1892                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1893                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1894                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1895                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1896                                 break;
1897                         case 1:
1898                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1899                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1900                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1901                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1902                                 break;
1903                         case 2:
1904                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1906                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1907                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1908                                 break;
1909                         case 3:
1910                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1911                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1912                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1913                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1914                                 break;
1915                         case 4:
1916                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1917                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1918                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1919                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1920                                 break;
1921                         case 5:
1922                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1923                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1924                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1925                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1926                                 break;
1927                         case 6:
1928                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1929                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1930                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1931                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1932                                 break;
1933                         case 8:
1934                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1936                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1937                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1938                                 break;
1939                         case 9:
1940                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1941                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1942                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1943                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1944                                 break;
1945                         case 10:
1946                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1947                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1948                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1949                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1950                                 break;
1951                         case 11:
1952                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1954                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1955                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1956                                 break;
1957                         case 12:
1958                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1959                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1960                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1961                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1962                                 break;
1963                         case 13:
1964                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1967                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1968                                 break;
1969                         case 14:
1970                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1971                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1972                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1973                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1974                                 break;
1975                         default:
1976                                 gb_tile_moden = 0;
1977                                 break;
1978                         }
1979                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1980                 }
1981         } else if (num_pipe_configs == 8) {
1982                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1983                         switch (reg_offset) {
1984                         case 0:
1985                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1988                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1989                                 break;
1990                         case 1:
1991                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1994                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1995                                 break;
1996                         case 2:
1997                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2000                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2001                                 break;
2002                         case 3:
2003                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2006                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2007                                 break;
2008                         case 4:
2009                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2012                                                  TILE_SPLIT(split_equal_to_row_size));
2013                                 break;
2014                         case 5:
2015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2016                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2017                                 break;
2018                         case 6:
2019                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2022                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2023                                 break;
2024                         case 7:
2025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028                                                  TILE_SPLIT(split_equal_to_row_size));
2029                                 break;
2030                         case 8:
2031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2032                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2033                                 break;
2034                         case 9:
2035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2036                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2037                                 break;
2038                         case 10:
2039                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2042                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043                                 break;
2044                         case 11:
2045                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2046                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2048                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049                                 break;
2050                         case 12:
2051                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2052                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055                                 break;
2056                         case 13:
2057                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2058                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2059                                 break;
2060                         case 14:
2061                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2062                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2064                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065                                 break;
2066                         case 16:
2067                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2070                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                 break;
2072                         case 17:
2073                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2074                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2076                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                                 break;
2078                         case 27:
2079                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2080                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2081                                 break;
2082                         case 28:
2083                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                                 break;
2088                         case 29:
2089                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2090                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2092                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                                 break;
2094                         case 30:
2095                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2096                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099                                 break;
2100                         default:
2101                                 gb_tile_moden = 0;
2102                                 break;
2103                         }
2104                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2105                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2106                 }
2107                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2108                         switch (reg_offset) {
2109                         case 0:
2110                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2113                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2114                                 break;
2115                         case 1:
2116                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2119                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2120                                 break;
2121                         case 2:
2122                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2123                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2124                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2125                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2126                                 break;
2127                         case 3:
2128                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2130                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2132                                 break;
2133                         case 4:
2134                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2135                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2136                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2137                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2138                                 break;
2139                         case 5:
2140                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2142                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2143                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2144                                 break;
2145                         case 6:
2146                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2148                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2149                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2150                                 break;
2151                         case 8:
2152                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2154                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2156                                 break;
2157                         case 9:
2158                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2162                                 break;
2163                         case 10:
2164                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2166                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2167                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2168                                 break;
2169                         case 11:
2170                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2173                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2174                                 break;
2175                         case 12:
2176                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2179                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2180                                 break;
2181                         case 13:
2182                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2186                                 break;
2187                         case 14:
2188                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2191                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2192                                 break;
2193                         default:
2194                                 gb_tile_moden = 0;
2195                                 break;
2196                         }
2197                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2198                 }
2199         } else if (num_pipe_configs == 4) {
2200                 if (num_rbs == 4) {
2201                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2202                                 switch (reg_offset) {
2203                                 case 0:
2204                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2206                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2207                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2208                                         break;
2209                                 case 1:
2210                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2212                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2213                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2214                                         break;
2215                                 case 2:
2216                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2218                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2219                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2220                                         break;
2221                                 case 3:
2222                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2224                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2225                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2226                                         break;
2227                                 case 4:
2228                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2230                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2231                                                          TILE_SPLIT(split_equal_to_row_size));
2232                                         break;
2233                                 case 5:
2234                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2235                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                                         break;
2237                                 case 6:
2238                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2239                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2240                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2242                                         break;
2243                                 case 7:
2244                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2245                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2246                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2247                                                          TILE_SPLIT(split_equal_to_row_size));
2248                                         break;
2249                                 case 8:
2250                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2251                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2252                                         break;
2253                                 case 9:
2254                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2256                                         break;
2257                                 case 10:
2258                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2261                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                                         break;
2263                                 case 11:
2264                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2265                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2266                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2267                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2268                                         break;
2269                                 case 12:
2270                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2271                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2272                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2273                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                                         break;
2275                                 case 13:
2276                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2278                                         break;
2279                                 case 14:
2280                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2282                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2283                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2284                                         break;
2285                                 case 16:
2286                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2289                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290                                         break;
2291                                 case 17:
2292                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2293                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2295                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296                                         break;
2297                                 case 27:
2298                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2300                                         break;
2301                                 case 28:
2302                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2303                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2305                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306                                         break;
2307                                 case 29:
2308                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2309                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2311                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312                                         break;
2313                                 case 30:
2314                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2315                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2317                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                                         break;
2319                                 default:
2320                                         gb_tile_moden = 0;
2321                                         break;
2322                                 }
2323                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2324                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2325                         }
2326                 } else if (num_rbs < 4) {
2327                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2328                                 switch (reg_offset) {
2329                                 case 0:
2330                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2333                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2334                                         break;
2335                                 case 1:
2336                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2339                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2340                                         break;
2341                                 case 2:
2342                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2345                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2346                                         break;
2347                                 case 3:
2348                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2351                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2352                                         break;
2353                                 case 4:
2354                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2357                                                          TILE_SPLIT(split_equal_to_row_size));
2358                                         break;
2359                                 case 5:
2360                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2362                                         break;
2363                                 case 6:
2364                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2367                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2368                                         break;
2369                                 case 7:
2370                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2373                                                          TILE_SPLIT(split_equal_to_row_size));
2374                                         break;
2375                                 case 8:
2376                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2377                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2378                                         break;
2379                                 case 9:
2380                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2381                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2382                                         break;
2383                                 case 10:
2384                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2387                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388                                         break;
2389                                 case 11:
2390                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2392                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2393                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394                                         break;
2395                                 case 12:
2396                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2399                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                                         break;
2401                                 case 13:
2402                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2404                                         break;
2405                                 case 14:
2406                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2409                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                         break;
2411                                 case 16:
2412                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2415                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                                         break;
2417                                 case 17:
2418                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2421                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422                                         break;
2423                                 case 27:
2424                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2426                                         break;
2427                                 case 28:
2428                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2431                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                                         break;
2433                                 case 29:
2434                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2437                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                                         break;
2439                                 case 30:
2440                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2443                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                                         break;
2445                                 default:
2446                                         gb_tile_moden = 0;
2447                                         break;
2448                                 }
2449                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2450                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2451                         }
2452                 }
2453                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2454                         switch (reg_offset) {
2455                         case 0:
2456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2460                                 break;
2461                         case 1:
2462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2466                                 break;
2467                         case 2:
2468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2472                                 break;
2473                         case 3:
2474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2478                                 break;
2479                         case 4:
2480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2483                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2484                                 break;
2485                         case 5:
2486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                                 break;
2491                         case 6:
2492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2496                                 break;
2497                         case 8:
2498                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2501                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2502                                 break;
2503                         case 9:
2504                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2508                                 break;
2509                         case 10:
2510                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2513                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2514                                 break;
2515                         case 11:
2516                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2520                                 break;
2521                         case 12:
2522                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2526                                 break;
2527                         case 13:
2528                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2532                                 break;
2533                         case 14:
2534                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2537                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2538                                 break;
2539                         default:
2540                                 gb_tile_moden = 0;
2541                                 break;
2542                         }
2543                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2544                 }
2545         } else if (num_pipe_configs == 2) {
2546                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2547                         switch (reg_offset) {
2548                         case 0:
2549                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2552                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2553                                 break;
2554                         case 1:
2555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2558                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2559                                 break;
2560                         case 2:
2561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2564                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2565                                 break;
2566                         case 3:
2567                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2569                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2570                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2571                                 break;
2572                         case 4:
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2576                                                  TILE_SPLIT(split_equal_to_row_size));
2577                                 break;
2578                         case 5:
2579                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2580                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2581                                 break;
2582                         case 6:
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2587                                 break;
2588                         case 7:
2589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2590                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2592                                                  TILE_SPLIT(split_equal_to_row_size));
2593                                 break;
2594                         case 8:
2595                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2596                                 break;
2597                         case 9:
2598                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2600                                 break;
2601                         case 10:
2602                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2605                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606                                 break;
2607                         case 11:
2608                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2611                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612                                 break;
2613                         case 12:
2614                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2617                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                                 break;
2619                         case 13:
2620                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2622                                 break;
2623                         case 14:
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2627                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                 break;
2629                         case 16:
2630                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2633                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                 break;
2635                         case 17:
2636                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2637                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2638                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2639                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                                 break;
2641                         case 27:
2642                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2643                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2644                                 break;
2645                         case 28:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2649                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                 break;
2651                         case 29:
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2655                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                 break;
2657                         case 30:
2658                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2659                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2660                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2661                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2662                                 break;
2663                         default:
2664                                 gb_tile_moden = 0;
2665                                 break;
2666                         }
2667                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2668                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2669                 }
2670                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2671                         switch (reg_offset) {
2672                         case 0:
2673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                                 break;
2678                         case 1:
2679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                                 break;
2684                         case 2:
2685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2689                                 break;
2690                         case 3:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2695                                 break;
2696                         case 4:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2700                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2701                                 break;
2702                         case 5:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2706                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2707                                 break;
2708                         case 6:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2712                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2713                                 break;
2714                         case 8:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 9:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2725                                 break;
2726                         case 10:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2731                                 break;
2732                         case 11:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2737                                 break;
2738                         case 12:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2743                                 break;
2744                         case 13:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2749                                 break;
2750                         case 14:
2751                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2755                                 break;
2756                         default:
2757                                 gb_tile_moden = 0;
2758                                 break;
2759                         }
2760                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2761                 }
2762         } else
2763                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2764 }
2765
2766 /**
2767  * cik_select_se_sh - select which SE, SH to address
2768  *
2769  * @rdev: radeon_device pointer
2770  * @se_num: shader engine to address
2771  * @sh_num: sh block to address
2772  *
2773  * Select which SE, SH combinations to address. Certain
2774  * registers are instanced per SE or SH.  0xffffffff means
2775  * broadcast to all SEs or SHs (CIK).
2776  */
2777 static void cik_select_se_sh(struct radeon_device *rdev,
2778                              u32 se_num, u32 sh_num)
2779 {
2780         u32 data = INSTANCE_BROADCAST_WRITES;
2781
2782         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2783                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2784         else if (se_num == 0xffffffff)
2785                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2786         else if (sh_num == 0xffffffff)
2787                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2788         else
2789                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2790         WREG32(GRBM_GFX_INDEX, data);
2791 }
2792
2793 /**
2794  * cik_create_bitmask - create a bitmask
2795  *
2796  * @bit_width: length of the mask
2797  *
2798  * create a variable length bit mask (CIK).
2799  * Returns the bitmask.
2800  */
2801 static u32 cik_create_bitmask(u32 bit_width)
2802 {
2803         u32 i, mask = 0;
2804
2805         for (i = 0; i < bit_width; i++) {
2806                 mask <<= 1;
2807                 mask |= 1;
2808         }
2809         return mask;
2810 }
2811
2812 /**
2813  * cik_select_se_sh - select which SE, SH to address
2814  *
2815  * @rdev: radeon_device pointer
2816  * @max_rb_num: max RBs (render backends) for the asic
2817  * @se_num: number of SEs (shader engines) for the asic
2818  * @sh_per_se: number of SH blocks per SE for the asic
2819  *
2820  * Calculates the bitmask of disabled RBs (CIK).
2821  * Returns the disabled RB bitmask.
2822  */
2823 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2824                               u32 max_rb_num, u32 se_num,
2825                               u32 sh_per_se)
2826 {
2827         u32 data, mask;
2828
2829         data = RREG32(CC_RB_BACKEND_DISABLE);
2830         if (data & 1)
2831                 data &= BACKEND_DISABLE_MASK;
2832         else
2833                 data = 0;
2834         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2835
2836         data >>= BACKEND_DISABLE_SHIFT;
2837
2838         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2839
2840         return data & mask;
2841 }
2842
2843 /**
2844  * cik_setup_rb - setup the RBs on the asic
2845  *
2846  * @rdev: radeon_device pointer
2847  * @se_num: number of SEs (shader engines) for the asic
2848  * @sh_per_se: number of SH blocks per SE for the asic
2849  * @max_rb_num: max RBs (render backends) for the asic
2850  *
2851  * Configures per-SE/SH RB registers (CIK).
2852  */
2853 static void cik_setup_rb(struct radeon_device *rdev,
2854                          u32 se_num, u32 sh_per_se,
2855                          u32 max_rb_num)
2856 {
2857         int i, j;
2858         u32 data, mask;
2859         u32 disabled_rbs = 0;
2860         u32 enabled_rbs = 0;
2861
2862         for (i = 0; i < se_num; i++) {
2863                 for (j = 0; j < sh_per_se; j++) {
2864                         cik_select_se_sh(rdev, i, j);
2865                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2866                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2867                 }
2868         }
2869         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2870
2871         mask = 1;
2872         for (i = 0; i < max_rb_num; i++) {
2873                 if (!(disabled_rbs & mask))
2874                         enabled_rbs |= mask;
2875                 mask <<= 1;
2876         }
2877
2878         for (i = 0; i < se_num; i++) {
2879                 cik_select_se_sh(rdev, i, 0xffffffff);
2880                 data = 0;
2881                 for (j = 0; j < sh_per_se; j++) {
2882                         switch (enabled_rbs & 3) {
2883                         case 1:
2884                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2885                                 break;
2886                         case 2:
2887                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2888                                 break;
2889                         case 3:
2890                         default:
2891                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2892                                 break;
2893                         }
2894                         enabled_rbs >>= 2;
2895                 }
2896                 WREG32(PA_SC_RASTER_CONFIG, data);
2897         }
2898         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2899 }
2900
2901 /**
2902  * cik_gpu_init - setup the 3D engine
2903  *
2904  * @rdev: radeon_device pointer
2905  *
2906  * Configures the 3D engine and tiling configuration
2907  * registers so that the 3D engine is usable.
2908  */
2909 static void cik_gpu_init(struct radeon_device *rdev)
2910 {
2911         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2912         u32 mc_shared_chmap, mc_arb_ramcfg;
2913         u32 hdp_host_path_cntl;
2914         u32 tmp;
2915         int i, j;
2916
2917         switch (rdev->family) {
2918         case CHIP_BONAIRE:
2919                 rdev->config.cik.max_shader_engines = 2;
2920                 rdev->config.cik.max_tile_pipes = 4;
2921                 rdev->config.cik.max_cu_per_sh = 7;
2922                 rdev->config.cik.max_sh_per_se = 1;
2923                 rdev->config.cik.max_backends_per_se = 2;
2924                 rdev->config.cik.max_texture_channel_caches = 4;
2925                 rdev->config.cik.max_gprs = 256;
2926                 rdev->config.cik.max_gs_threads = 32;
2927                 rdev->config.cik.max_hw_contexts = 8;
2928
2929                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2930                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2931                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2932                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2933                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2934                 break;
2935         case CHIP_HAWAII:
2936                 rdev->config.cik.max_shader_engines = 4;
2937                 rdev->config.cik.max_tile_pipes = 16;
2938                 rdev->config.cik.max_cu_per_sh = 11;
2939                 rdev->config.cik.max_sh_per_se = 1;
2940                 rdev->config.cik.max_backends_per_se = 4;
2941                 rdev->config.cik.max_texture_channel_caches = 16;
2942                 rdev->config.cik.max_gprs = 256;
2943                 rdev->config.cik.max_gs_threads = 32;
2944                 rdev->config.cik.max_hw_contexts = 8;
2945
2946                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2947                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2948                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2949                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2950                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2951                 break;
2952         case CHIP_KAVERI:
2953                 rdev->config.cik.max_shader_engines = 1;
2954                 rdev->config.cik.max_tile_pipes = 4;
2955                 if ((rdev->pdev->device == 0x1304) ||
2956                     (rdev->pdev->device == 0x1305) ||
2957                     (rdev->pdev->device == 0x130C) ||
2958                     (rdev->pdev->device == 0x130F) ||
2959                     (rdev->pdev->device == 0x1310) ||
2960                     (rdev->pdev->device == 0x1311) ||
2961                     (rdev->pdev->device == 0x131C)) {
2962                         rdev->config.cik.max_cu_per_sh = 8;
2963                         rdev->config.cik.max_backends_per_se = 2;
2964                 } else if ((rdev->pdev->device == 0x1309) ||
2965                            (rdev->pdev->device == 0x130A) ||
2966                            (rdev->pdev->device == 0x130D) ||
2967                            (rdev->pdev->device == 0x1313) ||
2968                            (rdev->pdev->device == 0x131D)) {
2969                         rdev->config.cik.max_cu_per_sh = 6;
2970                         rdev->config.cik.max_backends_per_se = 2;
2971                 } else if ((rdev->pdev->device == 0x1306) ||
2972                            (rdev->pdev->device == 0x1307) ||
2973                            (rdev->pdev->device == 0x130B) ||
2974                            (rdev->pdev->device == 0x130E) ||
2975                            (rdev->pdev->device == 0x1315) ||
2976                            (rdev->pdev->device == 0x131B)) {
2977                         rdev->config.cik.max_cu_per_sh = 4;
2978                         rdev->config.cik.max_backends_per_se = 1;
2979                 } else {
2980                         rdev->config.cik.max_cu_per_sh = 3;
2981                         rdev->config.cik.max_backends_per_se = 1;
2982                 }
2983                 rdev->config.cik.max_sh_per_se = 1;
2984                 rdev->config.cik.max_texture_channel_caches = 4;
2985                 rdev->config.cik.max_gprs = 256;
2986                 rdev->config.cik.max_gs_threads = 16;
2987                 rdev->config.cik.max_hw_contexts = 8;
2988
2989                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2990                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2991                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2992                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2993                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2994                 break;
2995         case CHIP_KABINI:
2996         default:
2997                 rdev->config.cik.max_shader_engines = 1;
2998                 rdev->config.cik.max_tile_pipes = 2;
2999                 rdev->config.cik.max_cu_per_sh = 2;
3000                 rdev->config.cik.max_sh_per_se = 1;
3001                 rdev->config.cik.max_backends_per_se = 1;
3002                 rdev->config.cik.max_texture_channel_caches = 2;
3003                 rdev->config.cik.max_gprs = 256;
3004                 rdev->config.cik.max_gs_threads = 16;
3005                 rdev->config.cik.max_hw_contexts = 8;
3006
3007                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3008                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3009                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3010                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3011                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3012                 break;
3013         }
3014
3015         /* Initialize HDP */
3016         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3017                 WREG32((0x2c14 + j), 0x00000000);
3018                 WREG32((0x2c18 + j), 0x00000000);
3019                 WREG32((0x2c1c + j), 0x00000000);
3020                 WREG32((0x2c20 + j), 0x00000000);
3021                 WREG32((0x2c24 + j), 0x00000000);
3022         }
3023
3024         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3025
3026         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3027
3028         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3029         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3030
3031         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3032         rdev->config.cik.mem_max_burst_length_bytes = 256;
3033         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3034         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3035         if (rdev->config.cik.mem_row_size_in_kb > 4)
3036                 rdev->config.cik.mem_row_size_in_kb = 4;
3037         /* XXX use MC settings? */
3038         rdev->config.cik.shader_engine_tile_size = 32;
3039         rdev->config.cik.num_gpus = 1;
3040         rdev->config.cik.multi_gpu_tile_size = 64;
3041
3042         /* fix up row size */
3043         gb_addr_config &= ~ROW_SIZE_MASK;
3044         switch (rdev->config.cik.mem_row_size_in_kb) {
3045         case 1:
3046         default:
3047                 gb_addr_config |= ROW_SIZE(0);
3048                 break;
3049         case 2:
3050                 gb_addr_config |= ROW_SIZE(1);
3051                 break;
3052         case 4:
3053                 gb_addr_config |= ROW_SIZE(2);
3054                 break;
3055         }
3056
3057         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3058          * not have bank info, so create a custom tiling dword.
3059          * bits 3:0   num_pipes
3060          * bits 7:4   num_banks
3061          * bits 11:8  group_size
3062          * bits 15:12 row_size
3063          */
3064         rdev->config.cik.tile_config = 0;
3065         switch (rdev->config.cik.num_tile_pipes) {
3066         case 1:
3067                 rdev->config.cik.tile_config |= (0 << 0);
3068                 break;
3069         case 2:
3070                 rdev->config.cik.tile_config |= (1 << 0);
3071                 break;
3072         case 4:
3073                 rdev->config.cik.tile_config |= (2 << 0);
3074                 break;
3075         case 8:
3076         default:
3077                 /* XXX what about 12? */
3078                 rdev->config.cik.tile_config |= (3 << 0);
3079                 break;
3080         }
3081         rdev->config.cik.tile_config |=
3082                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3083         rdev->config.cik.tile_config |=
3084                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3085         rdev->config.cik.tile_config |=
3086                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3087
3088         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3089         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3090         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3091         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3092         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3093         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3094         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3095         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3096
3097         cik_tiling_mode_table_init(rdev);
3098
3099         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3100                      rdev->config.cik.max_sh_per_se,
3101                      rdev->config.cik.max_backends_per_se);
3102
3103         /* set HW defaults for 3D engine */
3104         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3105
3106         WREG32(SX_DEBUG_1, 0x20);
3107
3108         WREG32(TA_CNTL_AUX, 0x00010000);
3109
3110         tmp = RREG32(SPI_CONFIG_CNTL);
3111         tmp |= 0x03000000;
3112         WREG32(SPI_CONFIG_CNTL, tmp);
3113
3114         WREG32(SQ_CONFIG, 1);
3115
3116         WREG32(DB_DEBUG, 0);
3117
3118         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3119         tmp |= 0x00000400;
3120         WREG32(DB_DEBUG2, tmp);
3121
3122         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3123         tmp |= 0x00020200;
3124         WREG32(DB_DEBUG3, tmp);
3125
3126         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3127         tmp |= 0x00018208;
3128         WREG32(CB_HW_CONTROL, tmp);
3129
3130         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3131
3132         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3133                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3134                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3135                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3136
3137         WREG32(VGT_NUM_INSTANCES, 1);
3138
3139         WREG32(CP_PERFMON_CNTL, 0);
3140
3141         WREG32(SQ_CONFIG, 0);
3142
3143         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3144                                           FORCE_EOV_MAX_REZ_CNT(255)));
3145
3146         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3147                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3148
3149         WREG32(VGT_GS_VERTEX_REUSE, 16);
3150         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3151
3152         tmp = RREG32(HDP_MISC_CNTL);
3153         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3154         WREG32(HDP_MISC_CNTL, tmp);
3155
3156         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3157         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3158
3159         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3160         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3161
3162         udelay(50);
3163 }
3164
3165 /*
3166  * GPU scratch registers helpers function.
3167  */
3168 /**
3169  * cik_scratch_init - setup driver info for CP scratch regs
3170  *
3171  * @rdev: radeon_device pointer
3172  *
3173  * Set up the number and offset of the CP scratch registers.
3174  * NOTE: use of CP scratch registers is a legacy inferface and
3175  * is not used by default on newer asics (r6xx+).  On newer asics,
3176  * memory buffers are used for fences rather than scratch regs.
3177  */
3178 static void cik_scratch_init(struct radeon_device *rdev)
3179 {
3180         int i;
3181
3182         rdev->scratch.num_reg = 7;
3183         rdev->scratch.reg_base = SCRATCH_REG0;
3184         for (i = 0; i < rdev->scratch.num_reg; i++) {
3185                 rdev->scratch.free[i] = true;
3186                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3187         }
3188 }
3189
3190 /**
3191  * cik_ring_test - basic gfx ring test
3192  *
3193  * @rdev: radeon_device pointer
3194  * @ring: radeon_ring structure holding ring information
3195  *
3196  * Allocate a scratch register and write to it using the gfx ring (CIK).
3197  * Provides a basic gfx ring test to verify that the ring is working.
3198  * Used by cik_cp_gfx_resume();
3199  * Returns 0 on success, error on failure.
3200  */
3201 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3202 {
3203         uint32_t scratch;
3204         uint32_t tmp = 0;
3205         unsigned i;
3206         int r;
3207
3208         r = radeon_scratch_get(rdev, &scratch);
3209         if (r) {
3210                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3211                 return r;
3212         }
3213         WREG32(scratch, 0xCAFEDEAD);
3214         r = radeon_ring_lock(rdev, ring, 3);
3215         if (r) {
3216                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3217                 radeon_scratch_free(rdev, scratch);
3218                 return r;
3219         }
3220         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3221         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3222         radeon_ring_write(ring, 0xDEADBEEF);
3223         radeon_ring_unlock_commit(rdev, ring);
3224
3225         for (i = 0; i < rdev->usec_timeout; i++) {
3226                 tmp = RREG32(scratch);
3227                 if (tmp == 0xDEADBEEF)
3228                         break;
3229                 DRM_UDELAY(1);
3230         }
3231         if (i < rdev->usec_timeout) {
3232                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3233         } else {
3234                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3235                           ring->idx, scratch, tmp);
3236                 r = -EINVAL;
3237         }
3238         radeon_scratch_free(rdev, scratch);
3239         return r;
3240 }
3241
3242 /**
3243  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3244  *
3245  * @rdev: radeon_device pointer
3246  * @fence: radeon fence object
3247  *
3248  * Emits a fence sequnce number on the gfx ring and flushes
3249  * GPU caches.
3250  */
3251 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3252                              struct radeon_fence *fence)
3253 {
3254         struct radeon_ring *ring = &rdev->ring[fence->ring];
3255         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3256
3257         /* EVENT_WRITE_EOP - flush caches, send int */
3258         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3259         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3260                                  EOP_TC_ACTION_EN |
3261                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3262                                  EVENT_INDEX(5)));
3263         radeon_ring_write(ring, addr & 0xfffffffc);
3264         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3265         radeon_ring_write(ring, fence->seq);
3266         radeon_ring_write(ring, 0);
3267         /* HDP flush */
3268         /* We should be using the new WAIT_REG_MEM special op packet here
3269          * but it causes the CP to hang
3270          */
3271         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3272         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3273                                  WRITE_DATA_DST_SEL(0)));
3274         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3275         radeon_ring_write(ring, 0);
3276         radeon_ring_write(ring, 0);
3277 }
3278
3279 /**
3280  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3281  *
3282  * @rdev: radeon_device pointer
3283  * @fence: radeon fence object
3284  *
3285  * Emits a fence sequnce number on the compute ring and flushes
3286  * GPU caches.
3287  */
3288 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3289                                  struct radeon_fence *fence)
3290 {
3291         struct radeon_ring *ring = &rdev->ring[fence->ring];
3292         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3293
3294         /* RELEASE_MEM - flush caches, send int */
3295         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3296         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3297                                  EOP_TC_ACTION_EN |
3298                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3299                                  EVENT_INDEX(5)));
3300         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3301         radeon_ring_write(ring, addr & 0xfffffffc);
3302         radeon_ring_write(ring, upper_32_bits(addr));
3303         radeon_ring_write(ring, fence->seq);
3304         radeon_ring_write(ring, 0);
3305         /* HDP flush */
3306         /* We should be using the new WAIT_REG_MEM special op packet here
3307          * but it causes the CP to hang
3308          */
3309         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3310         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3311                                  WRITE_DATA_DST_SEL(0)));
3312         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3313         radeon_ring_write(ring, 0);
3314         radeon_ring_write(ring, 0);
3315 }
3316
3317 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3318                              struct radeon_ring *ring,
3319                              struct radeon_semaphore *semaphore,
3320                              bool emit_wait)
3321 {
3322         uint64_t addr = semaphore->gpu_addr;
3323         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3324
3325         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3326         radeon_ring_write(ring, addr & 0xffffffff);
3327         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3328 }
3329
3330 /**
3331  * cik_copy_cpdma - copy pages using the CP DMA engine
3332  *
3333  * @rdev: radeon_device pointer
3334  * @src_offset: src GPU address
3335  * @dst_offset: dst GPU address
3336  * @num_gpu_pages: number of GPU pages to xfer
3337  * @fence: radeon fence object
3338  *
3339  * Copy GPU paging using the CP DMA engine (CIK+).
3340  * Used by the radeon ttm implementation to move pages if
3341  * registered as the asic copy callback.
3342  */
3343 int cik_copy_cpdma(struct radeon_device *rdev,
3344                    uint64_t src_offset, uint64_t dst_offset,
3345                    unsigned num_gpu_pages,
3346                    struct radeon_fence **fence)
3347 {
3348         struct radeon_semaphore *sem = NULL;
3349         int ring_index = rdev->asic->copy.blit_ring_index;
3350         struct radeon_ring *ring = &rdev->ring[ring_index];
3351         u32 size_in_bytes, cur_size_in_bytes, control;
3352         int i, num_loops;
3353         int r = 0;
3354
3355         r = radeon_semaphore_create(rdev, &sem);
3356         if (r) {
3357                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3358                 return r;
3359         }
3360
3361         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3362         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3363         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3364         if (r) {
3365                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3366                 radeon_semaphore_free(rdev, &sem, NULL);
3367                 return r;
3368         }
3369
3370         if (radeon_fence_need_sync(*fence, ring->idx)) {
3371                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3372                                             ring->idx);
3373                 radeon_fence_note_sync(*fence, ring->idx);
3374         } else {
3375                 radeon_semaphore_free(rdev, &sem, NULL);
3376         }
3377
3378         for (i = 0; i < num_loops; i++) {
3379                 cur_size_in_bytes = size_in_bytes;
3380                 if (cur_size_in_bytes > 0x1fffff)
3381                         cur_size_in_bytes = 0x1fffff;
3382                 size_in_bytes -= cur_size_in_bytes;
3383                 control = 0;
3384                 if (size_in_bytes == 0)
3385                         control |= PACKET3_DMA_DATA_CP_SYNC;
3386                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3387                 radeon_ring_write(ring, control);
3388                 radeon_ring_write(ring, lower_32_bits(src_offset));
3389                 radeon_ring_write(ring, upper_32_bits(src_offset));
3390                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3391                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3392                 radeon_ring_write(ring, cur_size_in_bytes);
3393                 src_offset += cur_size_in_bytes;
3394                 dst_offset += cur_size_in_bytes;
3395         }
3396
3397         r = radeon_fence_emit(rdev, fence, ring->idx);
3398         if (r) {
3399                 radeon_ring_unlock_undo(rdev, ring);
3400                 return r;
3401         }
3402
3403         radeon_ring_unlock_commit(rdev, ring);
3404         radeon_semaphore_free(rdev, &sem, *fence);
3405
3406         return r;
3407 }
3408
3409 /*
3410  * IB stuff
3411  */
3412 /**
3413  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3414  *
3415  * @rdev: radeon_device pointer
3416  * @ib: radeon indirect buffer object
3417  *
3418  * Emits an DE (drawing engine) or CE (constant engine) IB
3419  * on the gfx ring.  IBs are usually generated by userspace
3420  * acceleration drivers and submitted to the kernel for
3421  * sheduling on the ring.  This function schedules the IB
3422  * on the gfx ring for execution by the GPU.
3423  */
3424 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3425 {
3426         struct radeon_ring *ring = &rdev->ring[ib->ring];
3427         u32 header, control = INDIRECT_BUFFER_VALID;
3428
3429         if (ib->is_const_ib) {
3430                 /* set switch buffer packet before const IB */
3431                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3432                 radeon_ring_write(ring, 0);
3433
3434                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3435         } else {
3436                 u32 next_rptr;
3437                 if (ring->rptr_save_reg) {
3438                         next_rptr = ring->wptr + 3 + 4;
3439                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3440                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3441                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3442                         radeon_ring_write(ring, next_rptr);
3443                 } else if (rdev->wb.enabled) {
3444                         next_rptr = ring->wptr + 5 + 4;
3445                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3446                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3447                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3448                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3449                         radeon_ring_write(ring, next_rptr);
3450                 }
3451
3452                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3453         }
3454
3455         control |= ib->length_dw |
3456                 (ib->vm ? (ib->vm->id << 24) : 0);
3457
3458         radeon_ring_write(ring, header);
3459         radeon_ring_write(ring,
3460 #ifdef __BIG_ENDIAN
3461                           (2 << 0) |
3462 #endif
3463                           (ib->gpu_addr & 0xFFFFFFFC));
3464         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3465         radeon_ring_write(ring, control);
3466 }
3467
3468 /**
3469  * cik_ib_test - basic gfx ring IB test
3470  *
3471  * @rdev: radeon_device pointer
3472  * @ring: radeon_ring structure holding ring information
3473  *
3474  * Allocate an IB and execute it on the gfx ring (CIK).
3475  * Provides a basic gfx ring test to verify that IBs are working.
3476  * Returns 0 on success, error on failure.
3477  */
3478 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3479 {
3480         struct radeon_ib ib;
3481         uint32_t scratch;
3482         uint32_t tmp = 0;
3483         unsigned i;
3484         int r;
3485
3486         r = radeon_scratch_get(rdev, &scratch);
3487         if (r) {
3488                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3489                 return r;
3490         }
3491         WREG32(scratch, 0xCAFEDEAD);
3492         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3493         if (r) {
3494                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3495                 radeon_scratch_free(rdev, scratch);
3496                 return r;
3497         }
3498         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3499         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3500         ib.ptr[2] = 0xDEADBEEF;
3501         ib.length_dw = 3;
3502         r = radeon_ib_schedule(rdev, &ib, NULL);
3503         if (r) {
3504                 radeon_scratch_free(rdev, scratch);
3505                 radeon_ib_free(rdev, &ib);
3506                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3507                 return r;
3508         }
3509         r = radeon_fence_wait(ib.fence, false);
3510         if (r) {
3511                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3512                 radeon_scratch_free(rdev, scratch);
3513                 radeon_ib_free(rdev, &ib);
3514                 return r;
3515         }
3516         for (i = 0; i < rdev->usec_timeout; i++) {
3517                 tmp = RREG32(scratch);
3518                 if (tmp == 0xDEADBEEF)
3519                         break;
3520                 DRM_UDELAY(1);
3521         }
3522         if (i < rdev->usec_timeout) {
3523                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3524         } else {
3525                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3526                           scratch, tmp);
3527                 r = -EINVAL;
3528         }
3529         radeon_scratch_free(rdev, scratch);
3530         radeon_ib_free(rdev, &ib);
3531         return r;
3532 }
3533
3534 /*
3535  * CP.
3536  * On CIK, gfx and compute now have independant command processors.
3537  *
3538  * GFX
3539  * Gfx consists of a single ring and can process both gfx jobs and
3540  * compute jobs.  The gfx CP consists of three microengines (ME):
3541  * PFP - Pre-Fetch Parser
3542  * ME - Micro Engine
3543  * CE - Constant Engine
3544  * The PFP and ME make up what is considered the Drawing Engine (DE).
3545  * The CE is an asynchronous engine used for updating buffer desciptors
3546  * used by the DE so that they can be loaded into cache in parallel
3547  * while the DE is processing state update packets.
3548  *
3549  * Compute
3550  * The compute CP consists of two microengines (ME):
3551  * MEC1 - Compute MicroEngine 1
3552  * MEC2 - Compute MicroEngine 2
3553  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3554  * The queues are exposed to userspace and are programmed directly
3555  * by the compute runtime.
3556  */
3557 /**
3558  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3559  *
3560  * @rdev: radeon_device pointer
3561  * @enable: enable or disable the MEs
3562  *
3563  * Halts or unhalts the gfx MEs.
3564  */
3565 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3566 {
3567         if (enable)
3568                 WREG32(CP_ME_CNTL, 0);
3569         else {
3570                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3571                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3572         }
3573         udelay(50);
3574 }
3575
3576 /**
3577  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3578  *
3579  * @rdev: radeon_device pointer
3580  *
3581  * Loads the gfx PFP, ME, and CE ucode.
3582  * Returns 0 for success, -EINVAL if the ucode is not available.
3583  */
3584 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3585 {
3586         const __be32 *fw_data;
3587         int i;
3588
3589         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3590                 return -EINVAL;
3591
3592         cik_cp_gfx_enable(rdev, false);
3593
3594         /* PFP */
3595         fw_data = (const __be32 *)rdev->pfp_fw->data;
3596         WREG32(CP_PFP_UCODE_ADDR, 0);
3597         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3598                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3599         WREG32(CP_PFP_UCODE_ADDR, 0);
3600
3601         /* CE */
3602         fw_data = (const __be32 *)rdev->ce_fw->data;
3603         WREG32(CP_CE_UCODE_ADDR, 0);
3604         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3605                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3606         WREG32(CP_CE_UCODE_ADDR, 0);
3607
3608         /* ME */
3609         fw_data = (const __be32 *)rdev->me_fw->data;
3610         WREG32(CP_ME_RAM_WADDR, 0);
3611         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3612                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3613         WREG32(CP_ME_RAM_WADDR, 0);
3614
3615         WREG32(CP_PFP_UCODE_ADDR, 0);
3616         WREG32(CP_CE_UCODE_ADDR, 0);
3617         WREG32(CP_ME_RAM_WADDR, 0);
3618         WREG32(CP_ME_RAM_RADDR, 0);
3619         return 0;
3620 }
3621
3622 /**
3623  * cik_cp_gfx_start - start the gfx ring
3624  *
3625  * @rdev: radeon_device pointer
3626  *
3627  * Enables the ring and loads the clear state context and other
3628  * packets required to init the ring.
3629  * Returns 0 for success, error for failure.
3630  */
3631 static int cik_cp_gfx_start(struct radeon_device *rdev)
3632 {
3633         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3634         int r, i;
3635
3636         /* init the CP */
3637         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3638         WREG32(CP_ENDIAN_SWAP, 0);
3639         WREG32(CP_DEVICE_ID, 1);
3640
3641         cik_cp_gfx_enable(rdev, true);
3642
3643         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3644         if (r) {
3645                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3646                 return r;
3647         }
3648
3649         /* init the CE partitions.  CE only used for gfx on CIK */
3650         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3651         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3652         radeon_ring_write(ring, 0xc000);
3653         radeon_ring_write(ring, 0xc000);
3654
3655         /* setup clear context state */
3656         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3657         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3658
3659         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3660         radeon_ring_write(ring, 0x80000000);
3661         radeon_ring_write(ring, 0x80000000);
3662
3663         for (i = 0; i < cik_default_size; i++)
3664                 radeon_ring_write(ring, cik_default_state[i]);
3665
3666         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3667         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3668
3669         /* set clear context state */
3670         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3671         radeon_ring_write(ring, 0);
3672
3673         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3674         radeon_ring_write(ring, 0x00000316);
3675         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3676         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3677
3678         radeon_ring_unlock_commit(rdev, ring);
3679
3680         return 0;
3681 }
3682
3683 /**
3684  * cik_cp_gfx_fini - stop the gfx ring
3685  *
3686  * @rdev: radeon_device pointer
3687  *
3688  * Stop the gfx ring and tear down the driver ring
3689  * info.
3690  */
3691 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3692 {
3693         cik_cp_gfx_enable(rdev, false);
3694         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3695 }
3696
3697 /**
3698  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3699  *
3700  * @rdev: radeon_device pointer
3701  *
3702  * Program the location and size of the gfx ring buffer
3703  * and test it to make sure it's working.
3704  * Returns 0 for success, error for failure.
3705  */
3706 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3707 {
3708         struct radeon_ring *ring;
3709         u32 tmp;
3710         u32 rb_bufsz;
3711         u64 rb_addr;
3712         int r;
3713
3714         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3715         if (rdev->family != CHIP_HAWAII)
3716                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3717
3718         /* Set the write pointer delay */
3719         WREG32(CP_RB_WPTR_DELAY, 0);
3720
3721         /* set the RB to use vmid 0 */
3722         WREG32(CP_RB_VMID, 0);
3723
3724         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3725
3726         /* ring 0 - compute and gfx */
3727         /* Set ring buffer size */
3728         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3729         rb_bufsz = order_base_2(ring->ring_size / 8);
3730         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3731 #ifdef __BIG_ENDIAN
3732         tmp |= BUF_SWAP_32BIT;
3733 #endif
3734         WREG32(CP_RB0_CNTL, tmp);
3735
3736         /* Initialize the ring buffer's read and write pointers */
3737         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3738         ring->wptr = 0;
3739         WREG32(CP_RB0_WPTR, ring->wptr);
3740
3741         /* set the wb address wether it's enabled or not */
3742         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3743         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3744
3745         /* scratch register shadowing is no longer supported */
3746         WREG32(SCRATCH_UMSK, 0);
3747
3748         if (!rdev->wb.enabled)
3749                 tmp |= RB_NO_UPDATE;
3750
3751         mdelay(1);
3752         WREG32(CP_RB0_CNTL, tmp);
3753
3754         rb_addr = ring->gpu_addr >> 8;
3755         WREG32(CP_RB0_BASE, rb_addr);
3756         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3757
3758         ring->rptr = RREG32(CP_RB0_RPTR);
3759
3760         /* start the ring */
3761         cik_cp_gfx_start(rdev);
3762         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3763         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3764         if (r) {
3765                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3766                 return r;
3767         }
3768         return 0;
3769 }
3770
3771 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3772                               struct radeon_ring *ring)
3773 {
3774         u32 rptr;
3775
3776
3777
3778         if (rdev->wb.enabled) {
3779                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3780         } else {
3781                 mutex_lock(&rdev->srbm_mutex);
3782                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3783                 rptr = RREG32(CP_HQD_PQ_RPTR);
3784                 cik_srbm_select(rdev, 0, 0, 0, 0);
3785                 mutex_unlock(&rdev->srbm_mutex);
3786         }
3787
3788         return rptr;
3789 }
3790
3791 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3792                               struct radeon_ring *ring)
3793 {
3794         u32 wptr;
3795
3796         if (rdev->wb.enabled) {
3797                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3798         } else {
3799                 mutex_lock(&rdev->srbm_mutex);
3800                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3801                 wptr = RREG32(CP_HQD_PQ_WPTR);
3802                 cik_srbm_select(rdev, 0, 0, 0, 0);
3803                 mutex_unlock(&rdev->srbm_mutex);
3804         }
3805
3806         return wptr;
3807 }
3808
3809 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3810                                struct radeon_ring *ring)
3811 {
3812         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3813         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3814 }
3815
3816 /**
3817  * cik_cp_compute_enable - enable/disable the compute CP MEs
3818  *
3819  * @rdev: radeon_device pointer
3820  * @enable: enable or disable the MEs
3821  *
3822  * Halts or unhalts the compute MEs.
3823  */
3824 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3825 {
3826         if (enable)
3827                 WREG32(CP_MEC_CNTL, 0);
3828         else
3829                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3830         udelay(50);
3831 }
3832
3833 /**
3834  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3835  *
3836  * @rdev: radeon_device pointer
3837  *
3838  * Loads the compute MEC1&2 ucode.
3839  * Returns 0 for success, -EINVAL if the ucode is not available.
3840  */
3841 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3842 {
3843         const __be32 *fw_data;
3844         int i;
3845
3846         if (!rdev->mec_fw)
3847                 return -EINVAL;
3848
3849         cik_cp_compute_enable(rdev, false);
3850
3851         /* MEC1 */
3852         fw_data = (const __be32 *)rdev->mec_fw->data;
3853         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3854         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3855                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3856         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3857
3858         if (rdev->family == CHIP_KAVERI) {
3859                 /* MEC2 */
3860                 fw_data = (const __be32 *)rdev->mec_fw->data;
3861                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3862                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3863                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3864                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3865         }
3866
3867         return 0;
3868 }
3869
3870 /**
3871  * cik_cp_compute_start - start the compute queues
3872  *
3873  * @rdev: radeon_device pointer
3874  *
3875  * Enable the compute queues.
3876  * Returns 0 for success, error for failure.
3877  */
3878 static int cik_cp_compute_start(struct radeon_device *rdev)
3879 {
3880         cik_cp_compute_enable(rdev, true);
3881
3882         return 0;
3883 }
3884
3885 /**
3886  * cik_cp_compute_fini - stop the compute queues
3887  *
3888  * @rdev: radeon_device pointer
3889  *
3890  * Stop the compute queues and tear down the driver queue
3891  * info.
3892  */
3893 static void cik_cp_compute_fini(struct radeon_device *rdev)
3894 {
3895         int i, idx, r;
3896
3897         cik_cp_compute_enable(rdev, false);
3898
3899         for (i = 0; i < 2; i++) {
3900                 if (i == 0)
3901                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3902                 else
3903                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3904
3905                 if (rdev->ring[idx].mqd_obj) {
3906                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3907                         if (unlikely(r != 0))
3908                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3909
3910                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3911                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3912
3913                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3914                         rdev->ring[idx].mqd_obj = NULL;
3915                 }
3916         }
3917 }
3918
3919 static void cik_mec_fini(struct radeon_device *rdev)
3920 {
3921         int r;
3922
3923         if (rdev->mec.hpd_eop_obj) {
3924                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3925                 if (unlikely(r != 0))
3926                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3927                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3928                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3929
3930                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3931                 rdev->mec.hpd_eop_obj = NULL;
3932         }
3933 }
3934
3935 #define MEC_HPD_SIZE 2048
3936
3937 static int cik_mec_init(struct radeon_device *rdev)
3938 {
3939         int r;
3940         u32 *hpd;
3941
3942         /*
3943          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3944          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3945          */
3946         if (rdev->family == CHIP_KAVERI)
3947                 rdev->mec.num_mec = 2;
3948         else
3949                 rdev->mec.num_mec = 1;
3950         rdev->mec.num_pipe = 4;
3951         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3952
3953         if (rdev->mec.hpd_eop_obj == NULL) {
3954                 r = radeon_bo_create(rdev,
3955                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3956                                      PAGE_SIZE, true,
3957                                      RADEON_GEM_DOMAIN_GTT, NULL,
3958                                      &rdev->mec.hpd_eop_obj);
3959                 if (r) {
3960                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3961                         return r;
3962                 }
3963         }
3964
3965         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3966         if (unlikely(r != 0)) {
3967                 cik_mec_fini(rdev);
3968                 return r;
3969         }
3970         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3971                           &rdev->mec.hpd_eop_gpu_addr);
3972         if (r) {
3973                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3974                 cik_mec_fini(rdev);
3975                 return r;
3976         }
3977         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3978         if (r) {
3979                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3980                 cik_mec_fini(rdev);
3981                 return r;
3982         }
3983
3984         /* clear memory.  Not sure if this is required or not */
3985         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3986
3987         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3988         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3989
3990         return 0;
3991 }
3992
3993 struct hqd_registers
3994 {
3995         u32 cp_mqd_base_addr;
3996         u32 cp_mqd_base_addr_hi;
3997         u32 cp_hqd_active;
3998         u32 cp_hqd_vmid;
3999         u32 cp_hqd_persistent_state;
4000         u32 cp_hqd_pipe_priority;
4001         u32 cp_hqd_queue_priority;
4002         u32 cp_hqd_quantum;
4003         u32 cp_hqd_pq_base;
4004         u32 cp_hqd_pq_base_hi;
4005         u32 cp_hqd_pq_rptr;
4006         u32 cp_hqd_pq_rptr_report_addr;
4007         u32 cp_hqd_pq_rptr_report_addr_hi;
4008         u32 cp_hqd_pq_wptr_poll_addr;
4009         u32 cp_hqd_pq_wptr_poll_addr_hi;
4010         u32 cp_hqd_pq_doorbell_control;
4011         u32 cp_hqd_pq_wptr;
4012         u32 cp_hqd_pq_control;
4013         u32 cp_hqd_ib_base_addr;
4014         u32 cp_hqd_ib_base_addr_hi;
4015         u32 cp_hqd_ib_rptr;
4016         u32 cp_hqd_ib_control;
4017         u32 cp_hqd_iq_timer;
4018         u32 cp_hqd_iq_rptr;
4019         u32 cp_hqd_dequeue_request;
4020         u32 cp_hqd_dma_offload;
4021         u32 cp_hqd_sema_cmd;
4022         u32 cp_hqd_msg_type;
4023         u32 cp_hqd_atomic0_preop_lo;
4024         u32 cp_hqd_atomic0_preop_hi;
4025         u32 cp_hqd_atomic1_preop_lo;
4026         u32 cp_hqd_atomic1_preop_hi;
4027         u32 cp_hqd_hq_scheduler0;
4028         u32 cp_hqd_hq_scheduler1;
4029         u32 cp_mqd_control;
4030 };
4031
4032 struct bonaire_mqd
4033 {
4034         u32 header;
4035         u32 dispatch_initiator;
4036         u32 dimensions[3];
4037         u32 start_idx[3];
4038         u32 num_threads[3];
4039         u32 pipeline_stat_enable;
4040         u32 perf_counter_enable;
4041         u32 pgm[2];
4042         u32 tba[2];
4043         u32 tma[2];
4044         u32 pgm_rsrc[2];
4045         u32 vmid;
4046         u32 resource_limits;
4047         u32 static_thread_mgmt01[2];
4048         u32 tmp_ring_size;
4049         u32 static_thread_mgmt23[2];
4050         u32 restart[3];
4051         u32 thread_trace_enable;
4052         u32 reserved1;
4053         u32 user_data[16];
4054         u32 vgtcs_invoke_count[2];
4055         struct hqd_registers queue_state;
4056         u32 dequeue_cntr;
4057         u32 interrupt_queue[64];
4058 };
4059
4060 /**
4061  * cik_cp_compute_resume - setup the compute queue registers
4062  *
4063  * @rdev: radeon_device pointer
4064  *
4065  * Program the compute queues and test them to make sure they
4066  * are working.
4067  * Returns 0 for success, error for failure.
4068  */
4069 static int cik_cp_compute_resume(struct radeon_device *rdev)
4070 {
4071         int r, i, idx;
4072         u32 tmp;
4073         bool use_doorbell = true;
4074         u64 hqd_gpu_addr;
4075         u64 mqd_gpu_addr;
4076         u64 eop_gpu_addr;
4077         u64 wb_gpu_addr;
4078         u32 *buf;
4079         struct bonaire_mqd *mqd;
4080
4081         r = cik_cp_compute_start(rdev);
4082         if (r)
4083                 return r;
4084
4085         /* fix up chicken bits */
4086         tmp = RREG32(CP_CPF_DEBUG);
4087         tmp |= (1 << 23);
4088         WREG32(CP_CPF_DEBUG, tmp);
4089
4090         /* init the pipes */
4091         mutex_lock(&rdev->srbm_mutex);
4092         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4093                 int me = (i < 4) ? 1 : 2;
4094                 int pipe = (i < 4) ? i : (i - 4);
4095
4096                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4097
4098                 cik_srbm_select(rdev, me, pipe, 0, 0);
4099
4100                 /* write the EOP addr */
4101                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4102                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4103
4104                 /* set the VMID assigned */
4105                 WREG32(CP_HPD_EOP_VMID, 0);
4106
4107                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4108                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4109                 tmp &= ~EOP_SIZE_MASK;
4110                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4111                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4112         }
4113         cik_srbm_select(rdev, 0, 0, 0, 0);
4114         mutex_unlock(&rdev->srbm_mutex);
4115
4116         /* init the queues.  Just two for now. */
4117         for (i = 0; i < 2; i++) {
4118                 if (i == 0)
4119                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4120                 else
4121                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4122
4123                 if (rdev->ring[idx].mqd_obj == NULL) {
4124                         r = radeon_bo_create(rdev,
4125                                              sizeof(struct bonaire_mqd),
4126                                              PAGE_SIZE, true,
4127                                              RADEON_GEM_DOMAIN_GTT, NULL,
4128                                              &rdev->ring[idx].mqd_obj);
4129                         if (r) {
4130                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4131                                 return r;
4132                         }
4133                 }
4134
4135                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4136                 if (unlikely(r != 0)) {
4137                         cik_cp_compute_fini(rdev);
4138                         return r;
4139                 }
4140                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4141                                   &mqd_gpu_addr);
4142                 if (r) {
4143                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4144                         cik_cp_compute_fini(rdev);
4145                         return r;
4146                 }
4147                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4148                 if (r) {
4149                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4150                         cik_cp_compute_fini(rdev);
4151                         return r;
4152                 }
4153
4154                 /* doorbell offset */
4155                 rdev->ring[idx].doorbell_offset =
4156                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
4157
4158                 /* init the mqd struct */
4159                 memset(buf, 0, sizeof(struct bonaire_mqd));
4160
4161                 mqd = (struct bonaire_mqd *)buf;
4162                 mqd->header = 0xC0310800;
4163                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4164                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4165                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4166                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4167
4168                 mutex_lock(&rdev->srbm_mutex);
4169                 cik_srbm_select(rdev, rdev->ring[idx].me,
4170                                 rdev->ring[idx].pipe,
4171                                 rdev->ring[idx].queue, 0);
4172
4173                 /* disable wptr polling */
4174                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4175                 tmp &= ~WPTR_POLL_EN;
4176                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4177
4178                 /* enable doorbell? */
4179                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4180                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4181                 if (use_doorbell)
4182                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4183                 else
4184                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4185                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4186                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4187
4188                 /* disable the queue if it's active */
4189                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4190                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4191                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4192                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4193                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4194                         for (i = 0; i < rdev->usec_timeout; i++) {
4195                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4196                                         break;
4197                                 udelay(1);
4198                         }
4199                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4200                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4201                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4202                 }
4203
4204                 /* set the pointer to the MQD */
4205                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4206                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4207                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4208                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4209                 /* set MQD vmid to 0 */
4210                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4211                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4212                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4213
4214                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4215                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4216                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4217                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4218                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4219                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4220
4221                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4222                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4223                 mqd->queue_state.cp_hqd_pq_control &=
4224                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4225
4226                 mqd->queue_state.cp_hqd_pq_control |=
4227                         order_base_2(rdev->ring[idx].ring_size / 8);
4228                 mqd->queue_state.cp_hqd_pq_control |=
4229                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4230 #ifdef __BIG_ENDIAN
4231                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4232 #endif
4233                 mqd->queue_state.cp_hqd_pq_control &=
4234                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4235                 mqd->queue_state.cp_hqd_pq_control |=
4236                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4237                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4238
4239                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4240                 if (i == 0)
4241                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4242                 else
4243                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4244                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4245                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4246                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4247                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4248                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4249
4250                 /* set the wb address wether it's enabled or not */
4251                 if (i == 0)
4252                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4253                 else
4254                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4255                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4256                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4257                         upper_32_bits(wb_gpu_addr) & 0xffff;
4258                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4259                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4260                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4261                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4262
4263                 /* enable the doorbell if requested */
4264                 if (use_doorbell) {
4265                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4266                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4267                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4268                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4269                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
4270                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4271                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4272                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4273
4274                 } else {
4275                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4276                 }
4277                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4278                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4279
4280                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4281                 rdev->ring[idx].wptr = 0;
4282                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4283                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4284                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4285                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4286
4287                 /* set the vmid for the queue */
4288                 mqd->queue_state.cp_hqd_vmid = 0;
4289                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4290
4291                 /* activate the queue */
4292                 mqd->queue_state.cp_hqd_active = 1;
4293                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4294
4295                 cik_srbm_select(rdev, 0, 0, 0, 0);
4296                 mutex_unlock(&rdev->srbm_mutex);
4297
4298                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4299                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4300
4301                 rdev->ring[idx].ready = true;
4302                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4303                 if (r)
4304                         rdev->ring[idx].ready = false;
4305         }
4306
4307         return 0;
4308 }
4309
4310 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4311 {
4312         cik_cp_gfx_enable(rdev, enable);
4313         cik_cp_compute_enable(rdev, enable);
4314 }
4315
4316 static int cik_cp_load_microcode(struct radeon_device *rdev)
4317 {
4318         int r;
4319
4320         r = cik_cp_gfx_load_microcode(rdev);
4321         if (r)
4322                 return r;
4323         r = cik_cp_compute_load_microcode(rdev);
4324         if (r)
4325                 return r;
4326
4327         return 0;
4328 }
4329
4330 static void cik_cp_fini(struct radeon_device *rdev)
4331 {
4332         cik_cp_gfx_fini(rdev);
4333         cik_cp_compute_fini(rdev);
4334 }
4335
4336 static int cik_cp_resume(struct radeon_device *rdev)
4337 {
4338         int r;
4339
4340         cik_enable_gui_idle_interrupt(rdev, false);
4341
4342         r = cik_cp_load_microcode(rdev);
4343         if (r)
4344                 return r;
4345
4346         r = cik_cp_gfx_resume(rdev);
4347         if (r)
4348                 return r;
4349         r = cik_cp_compute_resume(rdev);
4350         if (r)
4351                 return r;
4352
4353         cik_enable_gui_idle_interrupt(rdev, true);
4354
4355         return 0;
4356 }
4357
4358 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4359 {
4360         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4361                 RREG32(GRBM_STATUS));
4362         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4363                 RREG32(GRBM_STATUS2));
4364         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4365                 RREG32(GRBM_STATUS_SE0));
4366         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4367                 RREG32(GRBM_STATUS_SE1));
4368         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4369                 RREG32(GRBM_STATUS_SE2));
4370         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4371                 RREG32(GRBM_STATUS_SE3));
4372         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4373                 RREG32(SRBM_STATUS));
4374         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4375                 RREG32(SRBM_STATUS2));
4376         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4377                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4378         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4379                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4380         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4381         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4382                  RREG32(CP_STALLED_STAT1));
4383         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4384                  RREG32(CP_STALLED_STAT2));
4385         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4386                  RREG32(CP_STALLED_STAT3));
4387         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4388                  RREG32(CP_CPF_BUSY_STAT));
4389         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4390                  RREG32(CP_CPF_STALLED_STAT1));
4391         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4392         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4393         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4394                  RREG32(CP_CPC_STALLED_STAT1));
4395         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4396 }
4397
4398 /**
4399  * cik_gpu_check_soft_reset - check which blocks are busy
4400  *
4401  * @rdev: radeon_device pointer
4402  *
4403  * Check which blocks are busy and return the relevant reset
4404  * mask to be used by cik_gpu_soft_reset().
4405  * Returns a mask of the blocks to be reset.
4406  */
4407 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4408 {
4409         u32 reset_mask = 0;
4410         u32 tmp;
4411
4412         /* GRBM_STATUS */
4413         tmp = RREG32(GRBM_STATUS);
4414         if (tmp & (PA_BUSY | SC_BUSY |
4415                    BCI_BUSY | SX_BUSY |
4416                    TA_BUSY | VGT_BUSY |
4417                    DB_BUSY | CB_BUSY |
4418                    GDS_BUSY | SPI_BUSY |
4419                    IA_BUSY | IA_BUSY_NO_DMA))
4420                 reset_mask |= RADEON_RESET_GFX;
4421
4422         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4423                 reset_mask |= RADEON_RESET_CP;
4424
4425         /* GRBM_STATUS2 */
4426         tmp = RREG32(GRBM_STATUS2);
4427         if (tmp & RLC_BUSY)
4428                 reset_mask |= RADEON_RESET_RLC;
4429
4430         /* SDMA0_STATUS_REG */
4431         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4432         if (!(tmp & SDMA_IDLE))
4433                 reset_mask |= RADEON_RESET_DMA;
4434
4435         /* SDMA1_STATUS_REG */
4436         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4437         if (!(tmp & SDMA_IDLE))
4438                 reset_mask |= RADEON_RESET_DMA1;
4439
4440         /* SRBM_STATUS2 */
4441         tmp = RREG32(SRBM_STATUS2);
4442         if (tmp & SDMA_BUSY)
4443                 reset_mask |= RADEON_RESET_DMA;
4444
4445         if (tmp & SDMA1_BUSY)
4446                 reset_mask |= RADEON_RESET_DMA1;
4447
4448         /* SRBM_STATUS */
4449         tmp = RREG32(SRBM_STATUS);
4450
4451         if (tmp & IH_BUSY)
4452                 reset_mask |= RADEON_RESET_IH;
4453
4454         if (tmp & SEM_BUSY)
4455                 reset_mask |= RADEON_RESET_SEM;
4456
4457         if (tmp & GRBM_RQ_PENDING)
4458                 reset_mask |= RADEON_RESET_GRBM;
4459
4460         if (tmp & VMC_BUSY)
4461                 reset_mask |= RADEON_RESET_VMC;
4462
4463         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4464                    MCC_BUSY | MCD_BUSY))
4465                 reset_mask |= RADEON_RESET_MC;
4466
4467         if (evergreen_is_display_hung(rdev))
4468                 reset_mask |= RADEON_RESET_DISPLAY;
4469
4470         /* Skip MC reset as it's mostly likely not hung, just busy */
4471         if (reset_mask & RADEON_RESET_MC) {
4472                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4473                 reset_mask &= ~RADEON_RESET_MC;
4474         }
4475
4476         return reset_mask;
4477 }
4478
4479 /**
4480  * cik_gpu_soft_reset - soft reset GPU
4481  *
4482  * @rdev: radeon_device pointer
4483  * @reset_mask: mask of which blocks to reset
4484  *
4485  * Soft reset the blocks specified in @reset_mask.
4486  */
4487 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4488 {
4489         struct evergreen_mc_save save;
4490         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4491         u32 tmp;
4492
4493         if (reset_mask == 0)
4494                 return;
4495
4496         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4497
4498         cik_print_gpu_status_regs(rdev);
4499         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4500                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4501         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4502                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4503
4504         /* disable CG/PG */
4505         cik_fini_pg(rdev);
4506         cik_fini_cg(rdev);
4507
4508         /* stop the rlc */
4509         cik_rlc_stop(rdev);
4510
4511         /* Disable GFX parsing/prefetching */
4512         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4513
4514         /* Disable MEC parsing/prefetching */
4515         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4516
4517         if (reset_mask & RADEON_RESET_DMA) {
4518                 /* sdma0 */
4519                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4520                 tmp |= SDMA_HALT;
4521                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4522         }
4523         if (reset_mask & RADEON_RESET_DMA1) {
4524                 /* sdma1 */
4525                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4526                 tmp |= SDMA_HALT;
4527                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4528         }
4529
4530         evergreen_mc_stop(rdev, &save);
4531         if (evergreen_mc_wait_for_idle(rdev)) {
4532                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4533         }
4534
4535         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4536                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4537
4538         if (reset_mask & RADEON_RESET_CP) {
4539                 grbm_soft_reset |= SOFT_RESET_CP;
4540
4541                 srbm_soft_reset |= SOFT_RESET_GRBM;
4542         }
4543
4544         if (reset_mask & RADEON_RESET_DMA)
4545                 srbm_soft_reset |= SOFT_RESET_SDMA;
4546
4547         if (reset_mask & RADEON_RESET_DMA1)
4548                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4549
4550         if (reset_mask & RADEON_RESET_DISPLAY)
4551                 srbm_soft_reset |= SOFT_RESET_DC;
4552
4553         if (reset_mask & RADEON_RESET_RLC)
4554                 grbm_soft_reset |= SOFT_RESET_RLC;
4555
4556         if (reset_mask & RADEON_RESET_SEM)
4557                 srbm_soft_reset |= SOFT_RESET_SEM;
4558
4559         if (reset_mask & RADEON_RESET_IH)
4560                 srbm_soft_reset |= SOFT_RESET_IH;
4561
4562         if (reset_mask & RADEON_RESET_GRBM)
4563                 srbm_soft_reset |= SOFT_RESET_GRBM;
4564
4565         if (reset_mask & RADEON_RESET_VMC)
4566                 srbm_soft_reset |= SOFT_RESET_VMC;
4567
4568         if (!(rdev->flags & RADEON_IS_IGP)) {
4569                 if (reset_mask & RADEON_RESET_MC)
4570                         srbm_soft_reset |= SOFT_RESET_MC;
4571         }
4572
4573         if (grbm_soft_reset) {
4574                 tmp = RREG32(GRBM_SOFT_RESET);
4575                 tmp |= grbm_soft_reset;
4576                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4577                 WREG32(GRBM_SOFT_RESET, tmp);
4578                 tmp = RREG32(GRBM_SOFT_RESET);
4579
4580                 udelay(50);
4581
4582                 tmp &= ~grbm_soft_reset;
4583                 WREG32(GRBM_SOFT_RESET, tmp);
4584                 tmp = RREG32(GRBM_SOFT_RESET);
4585         }
4586
4587         if (srbm_soft_reset) {
4588                 tmp = RREG32(SRBM_SOFT_RESET);
4589                 tmp |= srbm_soft_reset;
4590                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4591                 WREG32(SRBM_SOFT_RESET, tmp);
4592                 tmp = RREG32(SRBM_SOFT_RESET);
4593
4594                 udelay(50);
4595
4596                 tmp &= ~srbm_soft_reset;
4597                 WREG32(SRBM_SOFT_RESET, tmp);
4598                 tmp = RREG32(SRBM_SOFT_RESET);
4599         }
4600
4601         /* Wait a little for things to settle down */
4602         udelay(50);
4603
4604         evergreen_mc_resume(rdev, &save);
4605         udelay(50);
4606
4607         cik_print_gpu_status_regs(rdev);
4608 }
4609
4610 /**
4611  * cik_asic_reset - soft reset GPU
4612  *
4613  * @rdev: radeon_device pointer
4614  *
4615  * Look up which blocks are hung and attempt
4616  * to reset them.
4617  * Returns 0 for success.
4618  */
4619 int cik_asic_reset(struct radeon_device *rdev)
4620 {
4621         u32 reset_mask;
4622
4623         reset_mask = cik_gpu_check_soft_reset(rdev);
4624
4625         if (reset_mask)
4626                 r600_set_bios_scratch_engine_hung(rdev, true);
4627
4628         cik_gpu_soft_reset(rdev, reset_mask);
4629
4630         reset_mask = cik_gpu_check_soft_reset(rdev);
4631
4632         if (!reset_mask)
4633                 r600_set_bios_scratch_engine_hung(rdev, false);
4634
4635         return 0;
4636 }
4637
4638 /**
4639  * cik_gfx_is_lockup - check if the 3D engine is locked up
4640  *
4641  * @rdev: radeon_device pointer
4642  * @ring: radeon_ring structure holding ring information
4643  *
4644  * Check if the 3D engine is locked up (CIK).
4645  * Returns true if the engine is locked, false if not.
4646  */
4647 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4648 {
4649         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4650
4651         if (!(reset_mask & (RADEON_RESET_GFX |
4652                             RADEON_RESET_COMPUTE |
4653                             RADEON_RESET_CP))) {
4654                 radeon_ring_lockup_update(ring);
4655                 return false;
4656         }
4657         /* force CP activities */
4658         radeon_ring_force_activity(rdev, ring);
4659         return radeon_ring_test_lockup(rdev, ring);
4660 }
4661
4662 /* MC */
4663 /**
4664  * cik_mc_program - program the GPU memory controller
4665  *
4666  * @rdev: radeon_device pointer
4667  *
4668  * Set the location of vram, gart, and AGP in the GPU's
4669  * physical address space (CIK).
4670  */
4671 static void cik_mc_program(struct radeon_device *rdev)
4672 {
4673         struct evergreen_mc_save save;
4674         u32 tmp;
4675         int i, j;
4676
4677         /* Initialize HDP */
4678         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4679                 WREG32((0x2c14 + j), 0x00000000);
4680                 WREG32((0x2c18 + j), 0x00000000);
4681                 WREG32((0x2c1c + j), 0x00000000);
4682                 WREG32((0x2c20 + j), 0x00000000);
4683                 WREG32((0x2c24 + j), 0x00000000);
4684         }
4685         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4686
4687         evergreen_mc_stop(rdev, &save);
4688         if (radeon_mc_wait_for_idle(rdev)) {
4689                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4690         }
4691         /* Lockout access through VGA aperture*/
4692         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4693         /* Update configuration */
4694         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4695                rdev->mc.vram_start >> 12);
4696         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4697                rdev->mc.vram_end >> 12);
4698         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4699                rdev->vram_scratch.gpu_addr >> 12);
4700         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4701         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4702         WREG32(MC_VM_FB_LOCATION, tmp);
4703         /* XXX double check these! */
4704         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4705         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4706         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4707         WREG32(MC_VM_AGP_BASE, 0);
4708         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4709         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4710         if (radeon_mc_wait_for_idle(rdev)) {
4711                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4712         }
4713         evergreen_mc_resume(rdev, &save);
4714         /* we need to own VRAM, so turn off the VGA renderer here
4715          * to stop it overwriting our objects */
4716         rv515_vga_render_disable(rdev);
4717 }
4718
4719 /**
4720  * cik_mc_init - initialize the memory controller driver params
4721  *
4722  * @rdev: radeon_device pointer
4723  *
4724  * Look up the amount of vram, vram width, and decide how to place
4725  * vram and gart within the GPU's physical address space (CIK).
4726  * Returns 0 for success.
4727  */
4728 static int cik_mc_init(struct radeon_device *rdev)
4729 {
4730         u32 tmp;
4731         int chansize, numchan;
4732
4733         /* Get VRAM informations */
4734         rdev->mc.vram_is_ddr = true;
4735         tmp = RREG32(MC_ARB_RAMCFG);
4736         if (tmp & CHANSIZE_MASK) {
4737                 chansize = 64;
4738         } else {
4739                 chansize = 32;
4740         }
4741         tmp = RREG32(MC_SHARED_CHMAP);
4742         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4743         case 0:
4744         default:
4745                 numchan = 1;
4746                 break;
4747         case 1:
4748                 numchan = 2;
4749                 break;
4750         case 2:
4751                 numchan = 4;
4752                 break;
4753         case 3:
4754                 numchan = 8;
4755                 break;
4756         case 4:
4757                 numchan = 3;
4758                 break;
4759         case 5:
4760                 numchan = 6;
4761                 break;
4762         case 6:
4763                 numchan = 10;
4764                 break;
4765         case 7:
4766                 numchan = 12;
4767                 break;
4768         case 8:
4769                 numchan = 16;
4770                 break;
4771         }
4772         rdev->mc.vram_width = numchan * chansize;
4773         /* Could aper size report 0 ? */
4774         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4775         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4776         /* size in MB on si */
4777         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4778         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4779         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4780         si_vram_gtt_location(rdev, &rdev->mc);
4781         radeon_update_bandwidth_info(rdev);
4782
4783         return 0;
4784 }
4785
4786 /*
4787  * GART
4788  * VMID 0 is the physical GPU addresses as used by the kernel.
4789  * VMIDs 1-15 are used for userspace clients and are handled
4790  * by the radeon vm/hsa code.
4791  */
4792 /**
4793  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4794  *
4795  * @rdev: radeon_device pointer
4796  *
4797  * Flush the TLB for the VMID 0 page table (CIK).
4798  */
4799 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4800 {
4801         /* flush hdp cache */
4802         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4803
4804         /* bits 0-15 are the VM contexts0-15 */
4805         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4806 }
4807
4808 /**
4809  * cik_pcie_gart_enable - gart enable
4810  *
4811  * @rdev: radeon_device pointer
4812  *
4813  * This sets up the TLBs, programs the page tables for VMID0,
4814  * sets up the hw for VMIDs 1-15 which are allocated on
4815  * demand, and sets up the global locations for the LDS, GDS,
4816  * and GPUVM for FSA64 clients (CIK).
4817  * Returns 0 for success, errors for failure.
4818  */
4819 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4820 {
4821         int r, i;
4822
4823         if (rdev->gart.robj == NULL) {
4824                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4825                 return -EINVAL;
4826         }
4827         r = radeon_gart_table_vram_pin(rdev);
4828         if (r)
4829                 return r;
4830         radeon_gart_restore(rdev);
4831         /* Setup TLB control */
4832         WREG32(MC_VM_MX_L1_TLB_CNTL,
4833                (0xA << 7) |
4834                ENABLE_L1_TLB |
4835                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4836                ENABLE_ADVANCED_DRIVER_MODEL |
4837                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4838         /* Setup L2 cache */
4839         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4840                ENABLE_L2_FRAGMENT_PROCESSING |
4841                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4842                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4843                EFFECTIVE_L2_QUEUE_SIZE(7) |
4844                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4845         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4846         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4847                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4848         /* setup context0 */
4849         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4850         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4851         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4852         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4853                         (u32)(rdev->dummy_page.addr >> 12));
4854         WREG32(VM_CONTEXT0_CNTL2, 0);
4855         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4856                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4857
4858         WREG32(0x15D4, 0);
4859         WREG32(0x15D8, 0);
4860         WREG32(0x15DC, 0);
4861
4862         /* empty context1-15 */
4863         /* FIXME start with 4G, once using 2 level pt switch to full
4864          * vm size space
4865          */
4866         /* set vm size, must be a multiple of 4 */
4867         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4868         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4869         for (i = 1; i < 16; i++) {
4870                 if (i < 8)
4871                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4872                                rdev->gart.table_addr >> 12);
4873                 else
4874                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4875                                rdev->gart.table_addr >> 12);
4876         }
4877
4878         /* enable context1-15 */
4879         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4880                (u32)(rdev->dummy_page.addr >> 12));
4881         WREG32(VM_CONTEXT1_CNTL2, 4);
4882         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4883                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4884                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4885                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4886                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4887                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4888                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4889                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4890                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4891                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4892                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4893                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4894                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4895
4896         /* TC cache setup ??? */
4897         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4898         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4899         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4900
4901         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4902         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4903         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4904         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4905         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4906
4907         WREG32(TC_CFG_L1_VOLATILE, 0);
4908         WREG32(TC_CFG_L2_VOLATILE, 0);
4909
4910         if (rdev->family == CHIP_KAVERI) {
4911                 u32 tmp = RREG32(CHUB_CONTROL);
4912                 tmp &= ~BYPASS_VM;
4913                 WREG32(CHUB_CONTROL, tmp);
4914         }
4915
4916         /* XXX SH_MEM regs */
4917         /* where to put LDS, scratch, GPUVM in FSA64 space */
4918         mutex_lock(&rdev->srbm_mutex);
4919         for (i = 0; i < 16; i++) {
4920                 cik_srbm_select(rdev, 0, 0, 0, i);
4921                 /* CP and shaders */
4922                 WREG32(SH_MEM_CONFIG, 0);
4923                 WREG32(SH_MEM_APE1_BASE, 1);
4924                 WREG32(SH_MEM_APE1_LIMIT, 0);
4925                 WREG32(SH_MEM_BASES, 0);
4926                 /* SDMA GFX */
4927                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4928                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4929                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4930                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4931                 /* XXX SDMA RLC - todo */
4932         }
4933         cik_srbm_select(rdev, 0, 0, 0, 0);
4934         mutex_unlock(&rdev->srbm_mutex);
4935
4936         cik_pcie_gart_tlb_flush(rdev);
4937         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4938                  (unsigned)(rdev->mc.gtt_size >> 20),
4939                  (unsigned long long)rdev->gart.table_addr);
4940         rdev->gart.ready = true;
4941         return 0;
4942 }
4943
4944 /**
4945  * cik_pcie_gart_disable - gart disable
4946  *
4947  * @rdev: radeon_device pointer
4948  *
4949  * This disables all VM page table (CIK).
4950  */
4951 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4952 {
4953         /* Disable all tables */
4954         WREG32(VM_CONTEXT0_CNTL, 0);
4955         WREG32(VM_CONTEXT1_CNTL, 0);
4956         /* Setup TLB control */
4957         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4958                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4959         /* Setup L2 cache */
4960         WREG32(VM_L2_CNTL,
4961                ENABLE_L2_FRAGMENT_PROCESSING |
4962                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4963                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4964                EFFECTIVE_L2_QUEUE_SIZE(7) |
4965                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4966         WREG32(VM_L2_CNTL2, 0);
4967         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4968                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4969         radeon_gart_table_vram_unpin(rdev);
4970 }
4971
4972 /**
4973  * cik_pcie_gart_fini - vm fini callback
4974  *
4975  * @rdev: radeon_device pointer
4976  *
4977  * Tears down the driver GART/VM setup (CIK).
4978  */
4979 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4980 {
4981         cik_pcie_gart_disable(rdev);
4982         radeon_gart_table_vram_free(rdev);
4983         radeon_gart_fini(rdev);
4984 }
4985
4986 /* vm parser */
4987 /**
4988  * cik_ib_parse - vm ib_parse callback
4989  *
4990  * @rdev: radeon_device pointer
4991  * @ib: indirect buffer pointer
4992  *
4993  * CIK uses hw IB checking so this is a nop (CIK).
4994  */
4995 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4996 {
4997         return 0;
4998 }
4999
5000 /*
5001  * vm
5002  * VMID 0 is the physical GPU addresses as used by the kernel.
5003  * VMIDs 1-15 are used for userspace clients and are handled
5004  * by the radeon vm/hsa code.
5005  */
5006 /**
5007  * cik_vm_init - cik vm init callback
5008  *
5009  * @rdev: radeon_device pointer
5010  *
5011  * Inits cik specific vm parameters (number of VMs, base of vram for
5012  * VMIDs 1-15) (CIK).
5013  * Returns 0 for success.
5014  */
5015 int cik_vm_init(struct radeon_device *rdev)
5016 {
5017         /* number of VMs */
5018         rdev->vm_manager.nvm = 16;
5019         /* base offset of vram pages */
5020         if (rdev->flags & RADEON_IS_IGP) {
5021                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5022                 tmp <<= 22;
5023                 rdev->vm_manager.vram_base_offset = tmp;
5024         } else
5025                 rdev->vm_manager.vram_base_offset = 0;
5026
5027         return 0;
5028 }
5029
5030 /**
5031  * cik_vm_fini - cik vm fini callback
5032  *
5033  * @rdev: radeon_device pointer
5034  *
5035  * Tear down any asic specific VM setup (CIK).
5036  */
5037 void cik_vm_fini(struct radeon_device *rdev)
5038 {
5039 }
5040
5041 /**
5042  * cik_vm_decode_fault - print human readable fault info
5043  *
5044  * @rdev: radeon_device pointer
5045  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5046  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5047  *
5048  * Print human readable fault information (CIK).
5049  */
5050 static void cik_vm_decode_fault(struct radeon_device *rdev,
5051                                 u32 status, u32 addr, u32 mc_client)
5052 {
5053         u32 mc_id;
5054         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5055         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5056         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5057                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5058
5059         if (rdev->family == CHIP_HAWAII)
5060                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5061         else
5062                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5063
5064         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5065                protections, vmid, addr,
5066                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5067                block, mc_client, mc_id);
5068 }
5069
5070 /**
5071  * cik_vm_flush - cik vm flush using the CP
5072  *
5073  * @rdev: radeon_device pointer
5074  *
5075  * Update the page table base and flush the VM TLB
5076  * using the CP (CIK).
5077  */
5078 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5079 {
5080         struct radeon_ring *ring = &rdev->ring[ridx];
5081
5082         if (vm == NULL)
5083                 return;
5084
5085         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5086         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5087                                  WRITE_DATA_DST_SEL(0)));
5088         if (vm->id < 8) {
5089                 radeon_ring_write(ring,
5090                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5091         } else {
5092                 radeon_ring_write(ring,
5093                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5094         }
5095         radeon_ring_write(ring, 0);
5096         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5097
5098         /* update SH_MEM_* regs */
5099         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5101                                  WRITE_DATA_DST_SEL(0)));
5102         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5103         radeon_ring_write(ring, 0);
5104         radeon_ring_write(ring, VMID(vm->id));
5105
5106         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5107         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5108                                  WRITE_DATA_DST_SEL(0)));
5109         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5110         radeon_ring_write(ring, 0);
5111
5112         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5113         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5114         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5115         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5116
5117         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5118         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5119                                  WRITE_DATA_DST_SEL(0)));
5120         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5121         radeon_ring_write(ring, 0);
5122         radeon_ring_write(ring, VMID(0));
5123
5124         /* HDP flush */
5125         /* We should be using the WAIT_REG_MEM packet here like in
5126          * cik_fence_ring_emit(), but it causes the CP to hang in this
5127          * context...
5128          */
5129         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5130         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5131                                  WRITE_DATA_DST_SEL(0)));
5132         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5133         radeon_ring_write(ring, 0);
5134         radeon_ring_write(ring, 0);
5135
5136         /* bits 0-15 are the VM contexts0-15 */
5137         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5138         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5139                                  WRITE_DATA_DST_SEL(0)));
5140         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5141         radeon_ring_write(ring, 0);
5142         radeon_ring_write(ring, 1 << vm->id);
5143
5144         /* compute doesn't have PFP */
5145         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5146                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5147                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5148                 radeon_ring_write(ring, 0x0);
5149         }
5150 }
5151
5152 /*
5153  * RLC
5154  * The RLC is a multi-purpose microengine that handles a
5155  * variety of functions, the most important of which is
5156  * the interrupt controller.
5157  */
5158 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5159                                           bool enable)
5160 {
5161         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5162
5163         if (enable)
5164                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5165         else
5166                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5167         WREG32(CP_INT_CNTL_RING0, tmp);
5168 }
5169
5170 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5171 {
5172         u32 tmp;
5173
5174         tmp = RREG32(RLC_LB_CNTL);
5175         if (enable)
5176                 tmp |= LOAD_BALANCE_ENABLE;
5177         else
5178                 tmp &= ~LOAD_BALANCE_ENABLE;
5179         WREG32(RLC_LB_CNTL, tmp);
5180 }
5181
5182 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5183 {
5184         u32 i, j, k;
5185         u32 mask;
5186
5187         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5188                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5189                         cik_select_se_sh(rdev, i, j);
5190                         for (k = 0; k < rdev->usec_timeout; k++) {
5191                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5192                                         break;
5193                                 udelay(1);
5194                         }
5195                 }
5196         }
5197         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5198
5199         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5200         for (k = 0; k < rdev->usec_timeout; k++) {
5201                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5202                         break;
5203                 udelay(1);
5204         }
5205 }
5206
5207 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5208 {
5209         u32 tmp;
5210
5211         tmp = RREG32(RLC_CNTL);
5212         if (tmp != rlc)
5213                 WREG32(RLC_CNTL, rlc);
5214 }
5215
5216 static u32 cik_halt_rlc(struct radeon_device *rdev)
5217 {
5218         u32 data, orig;
5219
5220         orig = data = RREG32(RLC_CNTL);
5221
5222         if (data & RLC_ENABLE) {
5223                 u32 i;
5224
5225                 data &= ~RLC_ENABLE;
5226                 WREG32(RLC_CNTL, data);
5227
5228                 for (i = 0; i < rdev->usec_timeout; i++) {
5229                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5230                                 break;
5231                         udelay(1);
5232                 }
5233
5234                 cik_wait_for_rlc_serdes(rdev);
5235         }
5236
5237         return orig;
5238 }
5239
5240 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5241 {
5242         u32 tmp, i, mask;
5243
5244         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5245         WREG32(RLC_GPR_REG2, tmp);
5246
5247         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5248         for (i = 0; i < rdev->usec_timeout; i++) {
5249                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5250                         break;
5251                 udelay(1);
5252         }
5253
5254         for (i = 0; i < rdev->usec_timeout; i++) {
5255                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5256                         break;
5257                 udelay(1);
5258         }
5259 }
5260
5261 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5262 {
5263         u32 tmp;
5264
5265         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5266         WREG32(RLC_GPR_REG2, tmp);
5267 }
5268
5269 /**
5270  * cik_rlc_stop - stop the RLC ME
5271  *
5272  * @rdev: radeon_device pointer
5273  *
5274  * Halt the RLC ME (MicroEngine) (CIK).
5275  */
5276 static void cik_rlc_stop(struct radeon_device *rdev)
5277 {
5278         WREG32(RLC_CNTL, 0);
5279
5280         cik_enable_gui_idle_interrupt(rdev, false);
5281
5282         cik_wait_for_rlc_serdes(rdev);
5283 }
5284
5285 /**
5286  * cik_rlc_start - start the RLC ME
5287  *
5288  * @rdev: radeon_device pointer
5289  *
5290  * Unhalt the RLC ME (MicroEngine) (CIK).
5291  */
5292 static void cik_rlc_start(struct radeon_device *rdev)
5293 {
5294         WREG32(RLC_CNTL, RLC_ENABLE);
5295
5296         cik_enable_gui_idle_interrupt(rdev, true);
5297
5298         udelay(50);
5299 }
5300
5301 /**
5302  * cik_rlc_resume - setup the RLC hw
5303  *
5304  * @rdev: radeon_device pointer
5305  *
5306  * Initialize the RLC registers, load the ucode,
5307  * and start the RLC (CIK).
5308  * Returns 0 for success, -EINVAL if the ucode is not available.
5309  */
5310 static int cik_rlc_resume(struct radeon_device *rdev)
5311 {
5312         u32 i, size, tmp;
5313         const __be32 *fw_data;
5314
5315         if (!rdev->rlc_fw)
5316                 return -EINVAL;
5317
5318         switch (rdev->family) {
5319         case CHIP_BONAIRE:
5320         default:
5321                 size = BONAIRE_RLC_UCODE_SIZE;
5322                 break;
5323         case CHIP_KAVERI:
5324                 size = KV_RLC_UCODE_SIZE;
5325                 break;
5326         case CHIP_KABINI:
5327                 size = KB_RLC_UCODE_SIZE;
5328                 break;
5329         }
5330
5331         cik_rlc_stop(rdev);
5332
5333         /* disable CG */
5334         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5335         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5336
5337         si_rlc_reset(rdev);
5338
5339         cik_init_pg(rdev);
5340
5341         cik_init_cg(rdev);
5342
5343         WREG32(RLC_LB_CNTR_INIT, 0);
5344         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5345
5346         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5347         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5348         WREG32(RLC_LB_PARAMS, 0x00600408);
5349         WREG32(RLC_LB_CNTL, 0x80000004);
5350
5351         WREG32(RLC_MC_CNTL, 0);
5352         WREG32(RLC_UCODE_CNTL, 0);
5353
5354         fw_data = (const __be32 *)rdev->rlc_fw->data;
5355                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5356         for (i = 0; i < size; i++)
5357                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5358         WREG32(RLC_GPM_UCODE_ADDR, 0);
5359
5360         /* XXX - find out what chips support lbpw */
5361         cik_enable_lbpw(rdev, false);
5362
5363         if (rdev->family == CHIP_BONAIRE)
5364                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5365
5366         cik_rlc_start(rdev);
5367
5368         return 0;
5369 }
5370
5371 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5372 {
5373         u32 data, orig, tmp, tmp2;
5374
5375         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5376
5377         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5378                 cik_enable_gui_idle_interrupt(rdev, true);
5379
5380                 tmp = cik_halt_rlc(rdev);
5381
5382                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5383                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5384                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5385                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5386                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5387
5388                 cik_update_rlc(rdev, tmp);
5389
5390                 data |= CGCG_EN | CGLS_EN;
5391         } else {
5392                 cik_enable_gui_idle_interrupt(rdev, false);
5393
5394                 RREG32(CB_CGTT_SCLK_CTRL);
5395                 RREG32(CB_CGTT_SCLK_CTRL);
5396                 RREG32(CB_CGTT_SCLK_CTRL);
5397                 RREG32(CB_CGTT_SCLK_CTRL);
5398
5399                 data &= ~(CGCG_EN | CGLS_EN);
5400         }
5401
5402         if (orig != data)
5403                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5404
5405 }
5406
5407 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5408 {
5409         u32 data, orig, tmp = 0;
5410
5411         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5412                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5413                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5414                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5415                                 data |= CP_MEM_LS_EN;
5416                                 if (orig != data)
5417                                         WREG32(CP_MEM_SLP_CNTL, data);
5418                         }
5419                 }
5420
5421                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422                 data &= 0xfffffffd;
5423                 if (orig != data)
5424                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425
5426                 tmp = cik_halt_rlc(rdev);
5427
5428                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5429                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5430                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5431                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5432                 WREG32(RLC_SERDES_WR_CTRL, data);
5433
5434                 cik_update_rlc(rdev, tmp);
5435
5436                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5437                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5438                         data &= ~SM_MODE_MASK;
5439                         data |= SM_MODE(0x2);
5440                         data |= SM_MODE_ENABLE;
5441                         data &= ~CGTS_OVERRIDE;
5442                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5443                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5444                                 data &= ~CGTS_LS_OVERRIDE;
5445                         data &= ~ON_MONITOR_ADD_MASK;
5446                         data |= ON_MONITOR_ADD_EN;
5447                         data |= ON_MONITOR_ADD(0x96);
5448                         if (orig != data)
5449                                 WREG32(CGTS_SM_CTRL_REG, data);
5450                 }
5451         } else {
5452                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5453                 data |= 0x00000002;
5454                 if (orig != data)
5455                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5456
5457                 data = RREG32(RLC_MEM_SLP_CNTL);
5458                 if (data & RLC_MEM_LS_EN) {
5459                         data &= ~RLC_MEM_LS_EN;
5460                         WREG32(RLC_MEM_SLP_CNTL, data);
5461                 }
5462
5463                 data = RREG32(CP_MEM_SLP_CNTL);
5464                 if (data & CP_MEM_LS_EN) {
5465                         data &= ~CP_MEM_LS_EN;
5466                         WREG32(CP_MEM_SLP_CNTL, data);
5467                 }
5468
5469                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5470                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5471                 if (orig != data)
5472                         WREG32(CGTS_SM_CTRL_REG, data);
5473
5474                 tmp = cik_halt_rlc(rdev);
5475
5476                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5477                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5478                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5479                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5480                 WREG32(RLC_SERDES_WR_CTRL, data);
5481
5482                 cik_update_rlc(rdev, tmp);
5483         }
5484 }
5485
5486 static const u32 mc_cg_registers[] =
5487 {
5488         MC_HUB_MISC_HUB_CG,
5489         MC_HUB_MISC_SIP_CG,
5490         MC_HUB_MISC_VM_CG,
5491         MC_XPB_CLK_GAT,
5492         ATC_MISC_CG,
5493         MC_CITF_MISC_WR_CG,
5494         MC_CITF_MISC_RD_CG,
5495         MC_CITF_MISC_VM_CG,
5496         VM_L2_CG,
5497 };
5498
5499 static void cik_enable_mc_ls(struct radeon_device *rdev,
5500                              bool enable)
5501 {
5502         int i;
5503         u32 orig, data;
5504
5505         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5506                 orig = data = RREG32(mc_cg_registers[i]);
5507                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5508                         data |= MC_LS_ENABLE;
5509                 else
5510                         data &= ~MC_LS_ENABLE;
5511                 if (data != orig)
5512                         WREG32(mc_cg_registers[i], data);
5513         }
5514 }
5515
5516 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5517                                bool enable)
5518 {
5519         int i;
5520         u32 orig, data;
5521
5522         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5523                 orig = data = RREG32(mc_cg_registers[i]);
5524                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5525                         data |= MC_CG_ENABLE;
5526                 else
5527                         data &= ~MC_CG_ENABLE;
5528                 if (data != orig)
5529                         WREG32(mc_cg_registers[i], data);
5530         }
5531 }
5532
5533 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5534                                  bool enable)
5535 {
5536         u32 orig, data;
5537
5538         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5539                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5540                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5541         } else {
5542                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5543                 data |= 0xff000000;
5544                 if (data != orig)
5545                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5546
5547                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5548                 data |= 0xff000000;
5549                 if (data != orig)
5550                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5551         }
5552 }
5553
5554 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5555                                  bool enable)
5556 {
5557         u32 orig, data;
5558
5559         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5560                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5561                 data |= 0x100;
5562                 if (orig != data)
5563                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5564
5565                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5566                 data |= 0x100;
5567                 if (orig != data)
5568                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5569         } else {
5570                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5571                 data &= ~0x100;
5572                 if (orig != data)
5573                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5574
5575                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5576                 data &= ~0x100;
5577                 if (orig != data)
5578                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5579         }
5580 }
5581
5582 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5583                                 bool enable)
5584 {
5585         u32 orig, data;
5586
5587         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5588                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5589                 data = 0xfff;
5590                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5591
5592                 orig = data = RREG32(UVD_CGC_CTRL);
5593                 data |= DCM;
5594                 if (orig != data)
5595                         WREG32(UVD_CGC_CTRL, data);
5596         } else {
5597                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5598                 data &= ~0xfff;
5599                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5600
5601                 orig = data = RREG32(UVD_CGC_CTRL);
5602                 data &= ~DCM;
5603                 if (orig != data)
5604                         WREG32(UVD_CGC_CTRL, data);
5605         }
5606 }
5607
5608 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5609                                bool enable)
5610 {
5611         u32 orig, data;
5612
5613         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5614
5615         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5616                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5617                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5618         else
5619                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5620                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5621
5622         if (orig != data)
5623                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5624 }
5625
5626 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5627                                 bool enable)
5628 {
5629         u32 orig, data;
5630
5631         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5632
5633         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5634                 data &= ~CLOCK_GATING_DIS;
5635         else
5636                 data |= CLOCK_GATING_DIS;
5637
5638         if (orig != data)
5639                 WREG32(HDP_HOST_PATH_CNTL, data);
5640 }
5641
5642 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5643                               bool enable)
5644 {
5645         u32 orig, data;
5646
5647         orig = data = RREG32(HDP_MEM_POWER_LS);
5648
5649         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5650                 data |= HDP_LS_ENABLE;
5651         else
5652                 data &= ~HDP_LS_ENABLE;
5653
5654         if (orig != data)
5655                 WREG32(HDP_MEM_POWER_LS, data);
5656 }
5657
5658 void cik_update_cg(struct radeon_device *rdev,
5659                    u32 block, bool enable)
5660 {
5661
5662         if (block & RADEON_CG_BLOCK_GFX) {
5663                 cik_enable_gui_idle_interrupt(rdev, false);
5664                 /* order matters! */
5665                 if (enable) {
5666                         cik_enable_mgcg(rdev, true);
5667                         cik_enable_cgcg(rdev, true);
5668                 } else {
5669                         cik_enable_cgcg(rdev, false);
5670                         cik_enable_mgcg(rdev, false);
5671                 }
5672                 cik_enable_gui_idle_interrupt(rdev, true);
5673         }
5674
5675         if (block & RADEON_CG_BLOCK_MC) {
5676                 if (!(rdev->flags & RADEON_IS_IGP)) {
5677                         cik_enable_mc_mgcg(rdev, enable);
5678                         cik_enable_mc_ls(rdev, enable);
5679                 }
5680         }
5681
5682         if (block & RADEON_CG_BLOCK_SDMA) {
5683                 cik_enable_sdma_mgcg(rdev, enable);
5684                 cik_enable_sdma_mgls(rdev, enable);
5685         }
5686
5687         if (block & RADEON_CG_BLOCK_BIF) {
5688                 cik_enable_bif_mgls(rdev, enable);
5689         }
5690
5691         if (block & RADEON_CG_BLOCK_UVD) {
5692                 if (rdev->has_uvd)
5693                         cik_enable_uvd_mgcg(rdev, enable);
5694         }
5695
5696         if (block & RADEON_CG_BLOCK_HDP) {
5697                 cik_enable_hdp_mgcg(rdev, enable);
5698                 cik_enable_hdp_ls(rdev, enable);
5699         }
5700 }
5701
5702 static void cik_init_cg(struct radeon_device *rdev)
5703 {
5704
5705         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5706
5707         if (rdev->has_uvd)
5708                 si_init_uvd_internal_cg(rdev);
5709
5710         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5711                              RADEON_CG_BLOCK_SDMA |
5712                              RADEON_CG_BLOCK_BIF |
5713                              RADEON_CG_BLOCK_UVD |
5714                              RADEON_CG_BLOCK_HDP), true);
5715 }
5716
5717 static void cik_fini_cg(struct radeon_device *rdev)
5718 {
5719         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5720                              RADEON_CG_BLOCK_SDMA |
5721                              RADEON_CG_BLOCK_BIF |
5722                              RADEON_CG_BLOCK_UVD |
5723                              RADEON_CG_BLOCK_HDP), false);
5724
5725         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5726 }
5727
5728 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5729                                           bool enable)
5730 {
5731         u32 data, orig;
5732
5733         orig = data = RREG32(RLC_PG_CNTL);
5734         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5735                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5736         else
5737                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5738         if (orig != data)
5739                 WREG32(RLC_PG_CNTL, data);
5740 }
5741
5742 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5743                                           bool enable)
5744 {
5745         u32 data, orig;
5746
5747         orig = data = RREG32(RLC_PG_CNTL);
5748         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5749                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5750         else
5751                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5752         if (orig != data)
5753                 WREG32(RLC_PG_CNTL, data);
5754 }
5755
5756 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5757 {
5758         u32 data, orig;
5759
5760         orig = data = RREG32(RLC_PG_CNTL);
5761         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5762                 data &= ~DISABLE_CP_PG;
5763         else
5764                 data |= DISABLE_CP_PG;
5765         if (orig != data)
5766                 WREG32(RLC_PG_CNTL, data);
5767 }
5768
5769 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5770 {
5771         u32 data, orig;
5772
5773         orig = data = RREG32(RLC_PG_CNTL);
5774         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5775                 data &= ~DISABLE_GDS_PG;
5776         else
5777                 data |= DISABLE_GDS_PG;
5778         if (orig != data)
5779                 WREG32(RLC_PG_CNTL, data);
5780 }
5781
5782 #define CP_ME_TABLE_SIZE    96
5783 #define CP_ME_TABLE_OFFSET  2048
5784 #define CP_MEC_TABLE_OFFSET 4096
5785
5786 void cik_init_cp_pg_table(struct radeon_device *rdev)
5787 {
5788         const __be32 *fw_data;
5789         volatile u32 *dst_ptr;
5790         int me, i, max_me = 4;
5791         u32 bo_offset = 0;
5792         u32 table_offset;
5793
5794         if (rdev->family == CHIP_KAVERI)
5795                 max_me = 5;
5796
5797         if (rdev->rlc.cp_table_ptr == NULL)
5798                 return;
5799
5800         /* write the cp table buffer */
5801         dst_ptr = rdev->rlc.cp_table_ptr;
5802         for (me = 0; me < max_me; me++) {
5803                 if (me == 0) {
5804                         fw_data = (const __be32 *)rdev->ce_fw->data;
5805                         table_offset = CP_ME_TABLE_OFFSET;
5806                 } else if (me == 1) {
5807                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5808                         table_offset = CP_ME_TABLE_OFFSET;
5809                 } else if (me == 2) {
5810                         fw_data = (const __be32 *)rdev->me_fw->data;
5811                         table_offset = CP_ME_TABLE_OFFSET;
5812                 } else {
5813                         fw_data = (const __be32 *)rdev->mec_fw->data;
5814                         table_offset = CP_MEC_TABLE_OFFSET;
5815                 }
5816
5817                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5818                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
5819                 }
5820                 bo_offset += CP_ME_TABLE_SIZE;
5821         }
5822 }
5823
5824 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5825                                 bool enable)
5826 {
5827         u32 data, orig;
5828
5829         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5830                 orig = data = RREG32(RLC_PG_CNTL);
5831                 data |= GFX_PG_ENABLE;
5832                 if (orig != data)
5833                         WREG32(RLC_PG_CNTL, data);
5834
5835                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5836                 data |= AUTO_PG_EN;
5837                 if (orig != data)
5838                         WREG32(RLC_AUTO_PG_CTRL, data);
5839         } else {
5840                 orig = data = RREG32(RLC_PG_CNTL);
5841                 data &= ~GFX_PG_ENABLE;
5842                 if (orig != data)
5843                         WREG32(RLC_PG_CNTL, data);
5844
5845                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5846                 data &= ~AUTO_PG_EN;
5847                 if (orig != data)
5848                         WREG32(RLC_AUTO_PG_CTRL, data);
5849
5850                 data = RREG32(DB_RENDER_CONTROL);
5851         }
5852 }
5853
5854 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5855 {
5856         u32 mask = 0, tmp, tmp1;
5857         int i;
5858
5859         cik_select_se_sh(rdev, se, sh);
5860         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5861         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5862         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5863
5864         tmp &= 0xffff0000;
5865
5866         tmp |= tmp1;
5867         tmp >>= 16;
5868
5869         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5870                 mask <<= 1;
5871                 mask |= 1;
5872         }
5873
5874         return (~tmp) & mask;
5875 }
5876
5877 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5878 {
5879         u32 i, j, k, active_cu_number = 0;
5880         u32 mask, counter, cu_bitmap;
5881         u32 tmp = 0;
5882
5883         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5884                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5885                         mask = 1;
5886                         cu_bitmap = 0;
5887                         counter = 0;
5888                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5889                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5890                                         if (counter < 2)
5891                                                 cu_bitmap |= mask;
5892                                         counter ++;
5893                                 }
5894                                 mask <<= 1;
5895                         }
5896
5897                         active_cu_number += counter;
5898                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5899                 }
5900         }
5901
5902         WREG32(RLC_PG_AO_CU_MASK, tmp);
5903
5904         tmp = RREG32(RLC_MAX_PG_CU);
5905         tmp &= ~MAX_PU_CU_MASK;
5906         tmp |= MAX_PU_CU(active_cu_number);
5907         WREG32(RLC_MAX_PG_CU, tmp);
5908 }
5909
5910 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5911                                        bool enable)
5912 {
5913         u32 data, orig;
5914
5915         orig = data = RREG32(RLC_PG_CNTL);
5916         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5917                 data |= STATIC_PER_CU_PG_ENABLE;
5918         else
5919                 data &= ~STATIC_PER_CU_PG_ENABLE;
5920         if (orig != data)
5921                 WREG32(RLC_PG_CNTL, data);
5922 }
5923
5924 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5925                                         bool enable)
5926 {
5927         u32 data, orig;
5928
5929         orig = data = RREG32(RLC_PG_CNTL);
5930         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5931                 data |= DYN_PER_CU_PG_ENABLE;
5932         else
5933                 data &= ~DYN_PER_CU_PG_ENABLE;
5934         if (orig != data)
5935                 WREG32(RLC_PG_CNTL, data);
5936 }
5937
5938 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5939 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5940
5941 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5942 {
5943         u32 data, orig;
5944         u32 i;
5945
5946         if (rdev->rlc.cs_data) {
5947                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5948                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5949                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5950                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5951         } else {
5952                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5953                 for (i = 0; i < 3; i++)
5954                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5955         }
5956         if (rdev->rlc.reg_list) {
5957                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5958                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5959                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5960         }
5961
5962         orig = data = RREG32(RLC_PG_CNTL);
5963         data |= GFX_PG_SRC;
5964         if (orig != data)
5965                 WREG32(RLC_PG_CNTL, data);
5966
5967         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5968         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5969
5970         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5971         data &= ~IDLE_POLL_COUNT_MASK;
5972         data |= IDLE_POLL_COUNT(0x60);
5973         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5974
5975         data = 0x10101010;
5976         WREG32(RLC_PG_DELAY, data);
5977
5978         data = RREG32(RLC_PG_DELAY_2);
5979         data &= ~0xff;
5980         data |= 0x3;
5981         WREG32(RLC_PG_DELAY_2, data);
5982
5983         data = RREG32(RLC_AUTO_PG_CTRL);
5984         data &= ~GRBM_REG_SGIT_MASK;
5985         data |= GRBM_REG_SGIT(0x700);
5986         WREG32(RLC_AUTO_PG_CTRL, data);
5987
5988 }
5989
5990 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5991 {
5992         cik_enable_gfx_cgpg(rdev, enable);
5993         cik_enable_gfx_static_mgpg(rdev, enable);
5994         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5995 }
5996
5997 u32 cik_get_csb_size(struct radeon_device *rdev)
5998 {
5999         u32 count = 0;
6000         const struct cs_section_def *sect = NULL;
6001         const struct cs_extent_def *ext = NULL;
6002
6003         if (rdev->rlc.cs_data == NULL)
6004                 return 0;
6005
6006         /* begin clear state */
6007         count += 2;
6008         /* context control state */
6009         count += 3;
6010
6011         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6012                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6013                         if (sect->id == SECT_CONTEXT)
6014                                 count += 2 + ext->reg_count;
6015                         else
6016                                 return 0;
6017                 }
6018         }
6019         /* pa_sc_raster_config/pa_sc_raster_config1 */
6020         count += 4;
6021         /* end clear state */
6022         count += 2;
6023         /* clear state */
6024         count += 2;
6025
6026         return count;
6027 }
6028
6029 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6030 {
6031         u32 count = 0, i;
6032         const struct cs_section_def *sect = NULL;
6033         const struct cs_extent_def *ext = NULL;
6034
6035         if (rdev->rlc.cs_data == NULL)
6036                 return;
6037         if (buffer == NULL)
6038                 return;
6039
6040         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6041         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6042
6043         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6044         buffer[count++] = cpu_to_le32(0x80000000);
6045         buffer[count++] = cpu_to_le32(0x80000000);
6046
6047         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6048                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6049                         if (sect->id == SECT_CONTEXT) {
6050                                 buffer[count++] =
6051                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6052                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6053                                 for (i = 0; i < ext->reg_count; i++)
6054                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6055                         } else {
6056                                 return;
6057                         }
6058                 }
6059         }
6060
6061         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6062         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6063         switch (rdev->family) {
6064         case CHIP_BONAIRE:
6065                 buffer[count++] = cpu_to_le32(0x16000012);
6066                 buffer[count++] = cpu_to_le32(0x00000000);
6067                 break;
6068         case CHIP_KAVERI:
6069                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6070                 buffer[count++] = cpu_to_le32(0x00000000);
6071                 break;
6072         case CHIP_KABINI:
6073                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6074                 buffer[count++] = cpu_to_le32(0x00000000);
6075                 break;
6076         default:
6077                 buffer[count++] = cpu_to_le32(0x00000000);
6078                 buffer[count++] = cpu_to_le32(0x00000000);
6079                 break;
6080         }
6081
6082         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6083         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6084
6085         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6086         buffer[count++] = cpu_to_le32(0);
6087 }
6088
6089 static void cik_init_pg(struct radeon_device *rdev)
6090 {
6091         if (rdev->pg_flags) {
6092                 cik_enable_sck_slowdown_on_pu(rdev, true);
6093                 cik_enable_sck_slowdown_on_pd(rdev, true);
6094                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6095                         cik_init_gfx_cgpg(rdev);
6096                         cik_enable_cp_pg(rdev, true);
6097                         cik_enable_gds_pg(rdev, true);
6098                 }
6099                 cik_init_ao_cu_mask(rdev);
6100                 cik_update_gfx_pg(rdev, true);
6101         }
6102 }
6103
6104 static void cik_fini_pg(struct radeon_device *rdev)
6105 {
6106         if (rdev->pg_flags) {
6107                 cik_update_gfx_pg(rdev, false);
6108                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6109                         cik_enable_cp_pg(rdev, false);
6110                         cik_enable_gds_pg(rdev, false);
6111                 }
6112         }
6113 }
6114
6115 /*
6116  * Interrupts
6117  * Starting with r6xx, interrupts are handled via a ring buffer.
6118  * Ring buffers are areas of GPU accessible memory that the GPU
6119  * writes interrupt vectors into and the host reads vectors out of.
6120  * There is a rptr (read pointer) that determines where the
6121  * host is currently reading, and a wptr (write pointer)
6122  * which determines where the GPU has written.  When the
6123  * pointers are equal, the ring is idle.  When the GPU
6124  * writes vectors to the ring buffer, it increments the
6125  * wptr.  When there is an interrupt, the host then starts
6126  * fetching commands and processing them until the pointers are
6127  * equal again at which point it updates the rptr.
6128  */
6129
6130 /**
6131  * cik_enable_interrupts - Enable the interrupt ring buffer
6132  *
6133  * @rdev: radeon_device pointer
6134  *
6135  * Enable the interrupt ring buffer (CIK).
6136  */
6137 static void cik_enable_interrupts(struct radeon_device *rdev)
6138 {
6139         u32 ih_cntl = RREG32(IH_CNTL);
6140         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6141
6142         ih_cntl |= ENABLE_INTR;
6143         ih_rb_cntl |= IH_RB_ENABLE;
6144         WREG32(IH_CNTL, ih_cntl);
6145         WREG32(IH_RB_CNTL, ih_rb_cntl);
6146         rdev->ih.enabled = true;
6147 }
6148
6149 /**
6150  * cik_disable_interrupts - Disable the interrupt ring buffer
6151  *
6152  * @rdev: radeon_device pointer
6153  *
6154  * Disable the interrupt ring buffer (CIK).
6155  */
6156 static void cik_disable_interrupts(struct radeon_device *rdev)
6157 {
6158         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6159         u32 ih_cntl = RREG32(IH_CNTL);
6160
6161         ih_rb_cntl &= ~IH_RB_ENABLE;
6162         ih_cntl &= ~ENABLE_INTR;
6163         WREG32(IH_RB_CNTL, ih_rb_cntl);
6164         WREG32(IH_CNTL, ih_cntl);
6165         /* set rptr, wptr to 0 */
6166         WREG32(IH_RB_RPTR, 0);
6167         WREG32(IH_RB_WPTR, 0);
6168         rdev->ih.enabled = false;
6169         rdev->ih.rptr = 0;
6170 }
6171
6172 /**
6173  * cik_disable_interrupt_state - Disable all interrupt sources
6174  *
6175  * @rdev: radeon_device pointer
6176  *
6177  * Clear all interrupt enable bits used by the driver (CIK).
6178  */
6179 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6180 {
6181         u32 tmp;
6182
6183         /* gfx ring */
6184         tmp = RREG32(CP_INT_CNTL_RING0) &
6185                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6186         WREG32(CP_INT_CNTL_RING0, tmp);
6187         /* sdma */
6188         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6189         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6190         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6191         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6192         /* compute queues */
6193         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6194         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6195         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6196         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6197         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6198         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6199         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6200         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6201         /* grbm */
6202         WREG32(GRBM_INT_CNTL, 0);
6203         /* vline/vblank, etc. */
6204         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6205         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6206         if (rdev->num_crtc >= 4) {
6207                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6208                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6209         }
6210         if (rdev->num_crtc >= 6) {
6211                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6212                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6213         }
6214
6215         /* dac hotplug */
6216         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6217
6218         /* digital hotplug */
6219         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6220         WREG32(DC_HPD1_INT_CONTROL, tmp);
6221         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6222         WREG32(DC_HPD2_INT_CONTROL, tmp);
6223         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6224         WREG32(DC_HPD3_INT_CONTROL, tmp);
6225         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6226         WREG32(DC_HPD4_INT_CONTROL, tmp);
6227         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6228         WREG32(DC_HPD5_INT_CONTROL, tmp);
6229         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6230         WREG32(DC_HPD6_INT_CONTROL, tmp);
6231
6232 }
6233
6234 /**
6235  * cik_irq_init - init and enable the interrupt ring
6236  *
6237  * @rdev: radeon_device pointer
6238  *
6239  * Allocate a ring buffer for the interrupt controller,
6240  * enable the RLC, disable interrupts, enable the IH
6241  * ring buffer and enable it (CIK).
6242  * Called at device load and reume.
6243  * Returns 0 for success, errors for failure.
6244  */
6245 static int cik_irq_init(struct radeon_device *rdev)
6246 {
6247         int ret = 0;
6248         int rb_bufsz;
6249         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6250
6251         /* allocate ring */
6252         ret = r600_ih_ring_alloc(rdev);
6253         if (ret)
6254                 return ret;
6255
6256         /* disable irqs */
6257         cik_disable_interrupts(rdev);
6258
6259         /* init rlc */
6260         ret = cik_rlc_resume(rdev);
6261         if (ret) {
6262                 r600_ih_ring_fini(rdev);
6263                 return ret;
6264         }
6265
6266         /* setup interrupt control */
6267         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6268         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6269         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6270         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6271          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6272          */
6273         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6274         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6275         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6276         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6277
6278         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6279         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6280
6281         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6282                       IH_WPTR_OVERFLOW_CLEAR |
6283                       (rb_bufsz << 1));
6284
6285         if (rdev->wb.enabled)
6286                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6287
6288         /* set the writeback address whether it's enabled or not */
6289         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6290         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6291
6292         WREG32(IH_RB_CNTL, ih_rb_cntl);
6293
6294         /* set rptr, wptr to 0 */
6295         WREG32(IH_RB_RPTR, 0);
6296         WREG32(IH_RB_WPTR, 0);
6297
6298         /* Default settings for IH_CNTL (disabled at first) */
6299         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6300         /* RPTR_REARM only works if msi's are enabled */
6301         if (rdev->msi_enabled)
6302                 ih_cntl |= RPTR_REARM;
6303         WREG32(IH_CNTL, ih_cntl);
6304
6305         /* force the active interrupt state to all disabled */
6306         cik_disable_interrupt_state(rdev);
6307
6308         pci_set_master(rdev->pdev);
6309
6310         /* enable irqs */
6311         cik_enable_interrupts(rdev);
6312
6313         return ret;
6314 }
6315
6316 /**
6317  * cik_irq_set - enable/disable interrupt sources
6318  *
6319  * @rdev: radeon_device pointer
6320  *
6321  * Enable interrupt sources on the GPU (vblanks, hpd,
6322  * etc.) (CIK).
6323  * Returns 0 for success, errors for failure.
6324  */
6325 int cik_irq_set(struct radeon_device *rdev)
6326 {
6327         u32 cp_int_cntl;
6328         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6329         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6330         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6331         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6332         u32 grbm_int_cntl = 0;
6333         u32 dma_cntl, dma_cntl1;
6334         u32 thermal_int;
6335
6336         if (!rdev->irq.installed) {
6337                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6338                 return -EINVAL;
6339         }
6340         /* don't enable anything if the ih is disabled */
6341         if (!rdev->ih.enabled) {
6342                 cik_disable_interrupts(rdev);
6343                 /* force the active interrupt state to all disabled */
6344                 cik_disable_interrupt_state(rdev);
6345                 return 0;
6346         }
6347
6348         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6349                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6350         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6351
6352         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6353         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6354         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6355         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6356         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6357         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6358
6359         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6360         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6361
6362         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6363         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6364         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6365         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6366         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6367         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6368         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6369         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6370
6371         if (rdev->flags & RADEON_IS_IGP)
6372                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6373                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6374         else
6375                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6376                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6377
6378         /* enable CP interrupts on all rings */
6379         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6380                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6381                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6382         }
6383         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6384                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6385                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6386                 if (ring->me == 1) {
6387                         switch (ring->pipe) {
6388                         case 0:
6389                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6390                                 break;
6391                         case 1:
6392                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6393                                 break;
6394                         case 2:
6395                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6396                                 break;
6397                         case 3:
6398                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6399                                 break;
6400                         default:
6401                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6402                                 break;
6403                         }
6404                 } else if (ring->me == 2) {
6405                         switch (ring->pipe) {
6406                         case 0:
6407                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6408                                 break;
6409                         case 1:
6410                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6411                                 break;
6412                         case 2:
6413                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6414                                 break;
6415                         case 3:
6416                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6417                                 break;
6418                         default:
6419                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6420                                 break;
6421                         }
6422                 } else {
6423                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6424                 }
6425         }
6426         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6427                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6428                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6429                 if (ring->me == 1) {
6430                         switch (ring->pipe) {
6431                         case 0:
6432                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6433                                 break;
6434                         case 1:
6435                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6436                                 break;
6437                         case 2:
6438                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6439                                 break;
6440                         case 3:
6441                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6442                                 break;
6443                         default:
6444                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6445                                 break;
6446                         }
6447                 } else if (ring->me == 2) {
6448                         switch (ring->pipe) {
6449                         case 0:
6450                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6451                                 break;
6452                         case 1:
6453                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6454                                 break;
6455                         case 2:
6456                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6457                                 break;
6458                         case 3:
6459                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6460                                 break;
6461                         default:
6462                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6463                                 break;
6464                         }
6465                 } else {
6466                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6467                 }
6468         }
6469
6470         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6471                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6472                 dma_cntl |= TRAP_ENABLE;
6473         }
6474
6475         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6476                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6477                 dma_cntl1 |= TRAP_ENABLE;
6478         }
6479
6480         if (rdev->irq.crtc_vblank_int[0] ||
6481             atomic_read(&rdev->irq.pflip[0])) {
6482                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6483                 crtc1 |= VBLANK_INTERRUPT_MASK;
6484         }
6485         if (rdev->irq.crtc_vblank_int[1] ||
6486             atomic_read(&rdev->irq.pflip[1])) {
6487                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6488                 crtc2 |= VBLANK_INTERRUPT_MASK;
6489         }
6490         if (rdev->irq.crtc_vblank_int[2] ||
6491             atomic_read(&rdev->irq.pflip[2])) {
6492                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6493                 crtc3 |= VBLANK_INTERRUPT_MASK;
6494         }
6495         if (rdev->irq.crtc_vblank_int[3] ||
6496             atomic_read(&rdev->irq.pflip[3])) {
6497                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6498                 crtc4 |= VBLANK_INTERRUPT_MASK;
6499         }
6500         if (rdev->irq.crtc_vblank_int[4] ||
6501             atomic_read(&rdev->irq.pflip[4])) {
6502                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6503                 crtc5 |= VBLANK_INTERRUPT_MASK;
6504         }
6505         if (rdev->irq.crtc_vblank_int[5] ||
6506             atomic_read(&rdev->irq.pflip[5])) {
6507                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6508                 crtc6 |= VBLANK_INTERRUPT_MASK;
6509         }
6510         if (rdev->irq.hpd[0]) {
6511                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6512                 hpd1 |= DC_HPDx_INT_EN;
6513         }
6514         if (rdev->irq.hpd[1]) {
6515                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6516                 hpd2 |= DC_HPDx_INT_EN;
6517         }
6518         if (rdev->irq.hpd[2]) {
6519                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6520                 hpd3 |= DC_HPDx_INT_EN;
6521         }
6522         if (rdev->irq.hpd[3]) {
6523                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6524                 hpd4 |= DC_HPDx_INT_EN;
6525         }
6526         if (rdev->irq.hpd[4]) {
6527                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6528                 hpd5 |= DC_HPDx_INT_EN;
6529         }
6530         if (rdev->irq.hpd[5]) {
6531                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6532                 hpd6 |= DC_HPDx_INT_EN;
6533         }
6534
6535         if (rdev->irq.dpm_thermal) {
6536                 DRM_DEBUG("dpm thermal\n");
6537                 if (rdev->flags & RADEON_IS_IGP)
6538                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6539                 else
6540                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6541         }
6542
6543         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6544
6545         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6546         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6547
6548         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6549         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6550         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6551         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6552         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6553         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6554         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6555         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6556
6557         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6558
6559         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6560         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6561         if (rdev->num_crtc >= 4) {
6562                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6563                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6564         }
6565         if (rdev->num_crtc >= 6) {
6566                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6567                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6568         }
6569
6570         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6571         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6572         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6573         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6574         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6575         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6576
6577         if (rdev->flags & RADEON_IS_IGP)
6578                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6579         else
6580                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6581
6582         return 0;
6583 }
6584
6585 /**
6586  * cik_irq_ack - ack interrupt sources
6587  *
6588  * @rdev: radeon_device pointer
6589  *
6590  * Ack interrupt sources on the GPU (vblanks, hpd,
6591  * etc.) (CIK).  Certain interrupts sources are sw
6592  * generated and do not require an explicit ack.
6593  */
6594 static inline void cik_irq_ack(struct radeon_device *rdev)
6595 {
6596         u32 tmp;
6597
6598         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6599         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6600         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6601         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6602         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6603         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6604         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6605
6606         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6607                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6608         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6609                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6610         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6611                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6612         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6613                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6614
6615         if (rdev->num_crtc >= 4) {
6616                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6617                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6618                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6619                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6620                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6621                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6622                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6623                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6624         }
6625
6626         if (rdev->num_crtc >= 6) {
6627                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6628                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6629                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6630                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6631                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6632                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6633                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6634                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6635         }
6636
6637         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6638                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6639                 tmp |= DC_HPDx_INT_ACK;
6640                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6641         }
6642         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6643                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6644                 tmp |= DC_HPDx_INT_ACK;
6645                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6646         }
6647         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6648                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6649                 tmp |= DC_HPDx_INT_ACK;
6650                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6651         }
6652         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6653                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6654                 tmp |= DC_HPDx_INT_ACK;
6655                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6656         }
6657         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6658                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6659                 tmp |= DC_HPDx_INT_ACK;
6660                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6661         }
6662         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6663                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6664                 tmp |= DC_HPDx_INT_ACK;
6665                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6666         }
6667 }
6668
6669 /**
6670  * cik_irq_disable - disable interrupts
6671  *
6672  * @rdev: radeon_device pointer
6673  *
6674  * Disable interrupts on the hw (CIK).
6675  */
6676 static void cik_irq_disable(struct radeon_device *rdev)
6677 {
6678         cik_disable_interrupts(rdev);
6679         /* Wait and acknowledge irq */
6680         mdelay(1);
6681         cik_irq_ack(rdev);
6682         cik_disable_interrupt_state(rdev);
6683 }
6684
6685 /**
6686  * cik_irq_disable - disable interrupts for suspend
6687  *
6688  * @rdev: radeon_device pointer
6689  *
6690  * Disable interrupts and stop the RLC (CIK).
6691  * Used for suspend.
6692  */
6693 static void cik_irq_suspend(struct radeon_device *rdev)
6694 {
6695         cik_irq_disable(rdev);
6696         cik_rlc_stop(rdev);
6697 }
6698
6699 /**
6700  * cik_irq_fini - tear down interrupt support
6701  *
6702  * @rdev: radeon_device pointer
6703  *
6704  * Disable interrupts on the hw and free the IH ring
6705  * buffer (CIK).
6706  * Used for driver unload.
6707  */
6708 static void cik_irq_fini(struct radeon_device *rdev)
6709 {
6710         cik_irq_suspend(rdev);
6711         r600_ih_ring_fini(rdev);
6712 }
6713
6714 /**
6715  * cik_get_ih_wptr - get the IH ring buffer wptr
6716  *
6717  * @rdev: radeon_device pointer
6718  *
6719  * Get the IH ring buffer wptr from either the register
6720  * or the writeback memory buffer (CIK).  Also check for
6721  * ring buffer overflow and deal with it.
6722  * Used by cik_irq_process().
6723  * Returns the value of the wptr.
6724  */
6725 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6726 {
6727         u32 wptr, tmp;
6728
6729         if (rdev->wb.enabled)
6730                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6731         else
6732                 wptr = RREG32(IH_RB_WPTR);
6733
6734         if (wptr & RB_OVERFLOW) {
6735                 /* When a ring buffer overflow happen start parsing interrupt
6736                  * from the last not overwritten vector (wptr + 16). Hopefully
6737                  * this should allow us to catchup.
6738                  */
6739                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6740                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6741                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6742                 tmp = RREG32(IH_RB_CNTL);
6743                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6744                 WREG32(IH_RB_CNTL, tmp);
6745         }
6746         return (wptr & rdev->ih.ptr_mask);
6747 }
6748
6749 /*        CIK IV Ring
6750  * Each IV ring entry is 128 bits:
6751  * [7:0]    - interrupt source id
6752  * [31:8]   - reserved
6753  * [59:32]  - interrupt source data
6754  * [63:60]  - reserved
6755  * [71:64]  - RINGID
6756  *            CP:
6757  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6758  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6759  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6760  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6761  *            PIPE_ID - ME0 0=3D
6762  *                    - ME1&2 compute dispatcher (4 pipes each)
6763  *            SDMA:
6764  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6765  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6766  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6767  * [79:72]  - VMID
6768  * [95:80]  - PASID
6769  * [127:96] - reserved
6770  */
6771 /**
6772  * cik_irq_process - interrupt handler
6773  *
6774  * @rdev: radeon_device pointer
6775  *
6776  * Interrupt hander (CIK).  Walk the IH ring,
6777  * ack interrupts and schedule work to handle
6778  * interrupt events.
6779  * Returns irq process return code.
6780  */
6781 int cik_irq_process(struct radeon_device *rdev)
6782 {
6783         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6784         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6785         u32 wptr;
6786         u32 rptr;
6787         u32 src_id, src_data, ring_id;
6788         u8 me_id, pipe_id, queue_id;
6789         u32 ring_index;
6790         bool queue_hotplug = false;
6791         bool queue_reset = false;
6792         u32 addr, status, mc_client;
6793         bool queue_thermal = false;
6794
6795         if (!rdev->ih.enabled || rdev->shutdown)
6796                 return IRQ_NONE;
6797
6798         wptr = cik_get_ih_wptr(rdev);
6799
6800 restart_ih:
6801         /* is somebody else already processing irqs? */
6802         if (atomic_xchg(&rdev->ih.lock, 1))
6803                 return IRQ_NONE;
6804
6805         rptr = rdev->ih.rptr;
6806         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6807
6808         /* Order reading of wptr vs. reading of IH ring data */
6809         rmb();
6810
6811         /* display interrupts */
6812         cik_irq_ack(rdev);
6813
6814         while (rptr != wptr) {
6815                 /* wptr/rptr are in bytes! */
6816                 ring_index = rptr / 4;
6817                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6818                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6819                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6820
6821                 switch (src_id) {
6822                 case 1: /* D1 vblank/vline */
6823                         switch (src_data) {
6824                         case 0: /* D1 vblank */
6825                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6826                                         if (rdev->irq.crtc_vblank_int[0]) {
6827                                                 drm_handle_vblank(rdev->ddev, 0);
6828                                                 rdev->pm.vblank_sync = true;
6829                                                 wake_up(&rdev->irq.vblank_queue);
6830                                         }
6831                                         if (atomic_read(&rdev->irq.pflip[0]))
6832                                                 radeon_crtc_handle_flip(rdev, 0);
6833                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6834                                         DRM_DEBUG("IH: D1 vblank\n");
6835                                 }
6836                                 break;
6837                         case 1: /* D1 vline */
6838                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6839                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6840                                         DRM_DEBUG("IH: D1 vline\n");
6841                                 }
6842                                 break;
6843                         default:
6844                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6845                                 break;
6846                         }
6847                         break;
6848                 case 2: /* D2 vblank/vline */
6849                         switch (src_data) {
6850                         case 0: /* D2 vblank */
6851                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6852                                         if (rdev->irq.crtc_vblank_int[1]) {
6853                                                 drm_handle_vblank(rdev->ddev, 1);
6854                                                 rdev->pm.vblank_sync = true;
6855                                                 wake_up(&rdev->irq.vblank_queue);
6856                                         }
6857                                         if (atomic_read(&rdev->irq.pflip[1]))
6858                                                 radeon_crtc_handle_flip(rdev, 1);
6859                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6860                                         DRM_DEBUG("IH: D2 vblank\n");
6861                                 }
6862                                 break;
6863                         case 1: /* D2 vline */
6864                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6865                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6866                                         DRM_DEBUG("IH: D2 vline\n");
6867                                 }
6868                                 break;
6869                         default:
6870                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6871                                 break;
6872                         }
6873                         break;
6874                 case 3: /* D3 vblank/vline */
6875                         switch (src_data) {
6876                         case 0: /* D3 vblank */
6877                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6878                                         if (rdev->irq.crtc_vblank_int[2]) {
6879                                                 drm_handle_vblank(rdev->ddev, 2);
6880                                                 rdev->pm.vblank_sync = true;
6881                                                 wake_up(&rdev->irq.vblank_queue);
6882                                         }
6883                                         if (atomic_read(&rdev->irq.pflip[2]))
6884                                                 radeon_crtc_handle_flip(rdev, 2);
6885                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6886                                         DRM_DEBUG("IH: D3 vblank\n");
6887                                 }
6888                                 break;
6889                         case 1: /* D3 vline */
6890                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6891                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6892                                         DRM_DEBUG("IH: D3 vline\n");
6893                                 }
6894                                 break;
6895                         default:
6896                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6897                                 break;
6898                         }
6899                         break;
6900                 case 4: /* D4 vblank/vline */
6901                         switch (src_data) {
6902                         case 0: /* D4 vblank */
6903                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6904                                         if (rdev->irq.crtc_vblank_int[3]) {
6905                                                 drm_handle_vblank(rdev->ddev, 3);
6906                                                 rdev->pm.vblank_sync = true;
6907                                                 wake_up(&rdev->irq.vblank_queue);
6908                                         }
6909                                         if (atomic_read(&rdev->irq.pflip[3]))
6910                                                 radeon_crtc_handle_flip(rdev, 3);
6911                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6912                                         DRM_DEBUG("IH: D4 vblank\n");
6913                                 }
6914                                 break;
6915                         case 1: /* D4 vline */
6916                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6917                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6918                                         DRM_DEBUG("IH: D4 vline\n");
6919                                 }
6920                                 break;
6921                         default:
6922                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6923                                 break;
6924                         }
6925                         break;
6926                 case 5: /* D5 vblank/vline */
6927                         switch (src_data) {
6928                         case 0: /* D5 vblank */
6929                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6930                                         if (rdev->irq.crtc_vblank_int[4]) {
6931                                                 drm_handle_vblank(rdev->ddev, 4);
6932                                                 rdev->pm.vblank_sync = true;
6933                                                 wake_up(&rdev->irq.vblank_queue);
6934                                         }
6935                                         if (atomic_read(&rdev->irq.pflip[4]))
6936                                                 radeon_crtc_handle_flip(rdev, 4);
6937                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6938                                         DRM_DEBUG("IH: D5 vblank\n");
6939                                 }
6940                                 break;
6941                         case 1: /* D5 vline */
6942                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6943                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6944                                         DRM_DEBUG("IH: D5 vline\n");
6945                                 }
6946                                 break;
6947                         default:
6948                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6949                                 break;
6950                         }
6951                         break;
6952                 case 6: /* D6 vblank/vline */
6953                         switch (src_data) {
6954                         case 0: /* D6 vblank */
6955                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6956                                         if (rdev->irq.crtc_vblank_int[5]) {
6957                                                 drm_handle_vblank(rdev->ddev, 5);
6958                                                 rdev->pm.vblank_sync = true;
6959                                                 wake_up(&rdev->irq.vblank_queue);
6960                                         }
6961                                         if (atomic_read(&rdev->irq.pflip[5]))
6962                                                 radeon_crtc_handle_flip(rdev, 5);
6963                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6964                                         DRM_DEBUG("IH: D6 vblank\n");
6965                                 }
6966                                 break;
6967                         case 1: /* D6 vline */
6968                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6969                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6970                                         DRM_DEBUG("IH: D6 vline\n");
6971                                 }
6972                                 break;
6973                         default:
6974                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6975                                 break;
6976                         }
6977                         break;
6978                 case 42: /* HPD hotplug */
6979                         switch (src_data) {
6980                         case 0:
6981                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6982                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6983                                         queue_hotplug = true;
6984                                         DRM_DEBUG("IH: HPD1\n");
6985                                 }
6986                                 break;
6987                         case 1:
6988                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6989                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6990                                         queue_hotplug = true;
6991                                         DRM_DEBUG("IH: HPD2\n");
6992                                 }
6993                                 break;
6994                         case 2:
6995                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6996                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6997                                         queue_hotplug = true;
6998                                         DRM_DEBUG("IH: HPD3\n");
6999                                 }
7000                                 break;
7001                         case 3:
7002                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7003                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7004                                         queue_hotplug = true;
7005                                         DRM_DEBUG("IH: HPD4\n");
7006                                 }
7007                                 break;
7008                         case 4:
7009                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7010                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7011                                         queue_hotplug = true;
7012                                         DRM_DEBUG("IH: HPD5\n");
7013                                 }
7014                                 break;
7015                         case 5:
7016                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7017                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7018                                         queue_hotplug = true;
7019                                         DRM_DEBUG("IH: HPD6\n");
7020                                 }
7021                                 break;
7022                         default:
7023                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7024                                 break;
7025                         }
7026                         break;
7027                 case 124: /* UVD */
7028                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7029                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7030                         break;
7031                 case 146:
7032                 case 147:
7033                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7034                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7035                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7036                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7037                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7038                                 addr);
7039                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7040                                 status);
7041                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7042                         /* reset addr and status */
7043                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7044                         break;
7045                 case 176: /* GFX RB CP_INT */
7046                 case 177: /* GFX IB CP_INT */
7047                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7048                         break;
7049                 case 181: /* CP EOP event */
7050                         DRM_DEBUG("IH: CP EOP\n");
7051                         /* XXX check the bitfield order! */
7052                         me_id = (ring_id & 0x60) >> 5;
7053                         pipe_id = (ring_id & 0x18) >> 3;
7054                         queue_id = (ring_id & 0x7) >> 0;
7055                         switch (me_id) {
7056                         case 0:
7057                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7058                                 break;
7059                         case 1:
7060                         case 2:
7061                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7062                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7063                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7064                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7065                                 break;
7066                         }
7067                         break;
7068                 case 184: /* CP Privileged reg access */
7069                         DRM_ERROR("Illegal register access in command stream\n");
7070                         /* XXX check the bitfield order! */
7071                         me_id = (ring_id & 0x60) >> 5;
7072                         pipe_id = (ring_id & 0x18) >> 3;
7073                         queue_id = (ring_id & 0x7) >> 0;
7074                         switch (me_id) {
7075                         case 0:
7076                                 /* This results in a full GPU reset, but all we need to do is soft
7077                                  * reset the CP for gfx
7078                                  */
7079                                 queue_reset = true;
7080                                 break;
7081                         case 1:
7082                                 /* XXX compute */
7083                                 queue_reset = true;
7084                                 break;
7085                         case 2:
7086                                 /* XXX compute */
7087                                 queue_reset = true;
7088                                 break;
7089                         }
7090                         break;
7091                 case 185: /* CP Privileged inst */
7092                         DRM_ERROR("Illegal instruction in command stream\n");
7093                         /* XXX check the bitfield order! */
7094                         me_id = (ring_id & 0x60) >> 5;
7095                         pipe_id = (ring_id & 0x18) >> 3;
7096                         queue_id = (ring_id & 0x7) >> 0;
7097                         switch (me_id) {
7098                         case 0:
7099                                 /* This results in a full GPU reset, but all we need to do is soft
7100                                  * reset the CP for gfx
7101                                  */
7102                                 queue_reset = true;
7103                                 break;
7104                         case 1:
7105                                 /* XXX compute */
7106                                 queue_reset = true;
7107                                 break;
7108                         case 2:
7109                                 /* XXX compute */
7110                                 queue_reset = true;
7111                                 break;
7112                         }
7113                         break;
7114                 case 224: /* SDMA trap event */
7115                         /* XXX check the bitfield order! */
7116                         me_id = (ring_id & 0x3) >> 0;
7117                         queue_id = (ring_id & 0xc) >> 2;
7118                         DRM_DEBUG("IH: SDMA trap\n");
7119                         switch (me_id) {
7120                         case 0:
7121                                 switch (queue_id) {
7122                                 case 0:
7123                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7124                                         break;
7125                                 case 1:
7126                                         /* XXX compute */
7127                                         break;
7128                                 case 2:
7129                                         /* XXX compute */
7130                                         break;
7131                                 }
7132                                 break;
7133                         case 1:
7134                                 switch (queue_id) {
7135                                 case 0:
7136                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7137                                         break;
7138                                 case 1:
7139                                         /* XXX compute */
7140                                         break;
7141                                 case 2:
7142                                         /* XXX compute */
7143                                         break;
7144                                 }
7145                                 break;
7146                         }
7147                         break;
7148                 case 230: /* thermal low to high */
7149                         DRM_DEBUG("IH: thermal low to high\n");
7150                         rdev->pm.dpm.thermal.high_to_low = false;
7151                         queue_thermal = true;
7152                         break;
7153                 case 231: /* thermal high to low */
7154                         DRM_DEBUG("IH: thermal high to low\n");
7155                         rdev->pm.dpm.thermal.high_to_low = true;
7156                         queue_thermal = true;
7157                         break;
7158                 case 233: /* GUI IDLE */
7159                         DRM_DEBUG("IH: GUI idle\n");
7160                         break;
7161                 case 241: /* SDMA Privileged inst */
7162                 case 247: /* SDMA Privileged inst */
7163                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7164                         /* XXX check the bitfield order! */
7165                         me_id = (ring_id & 0x3) >> 0;
7166                         queue_id = (ring_id & 0xc) >> 2;
7167                         switch (me_id) {
7168                         case 0:
7169                                 switch (queue_id) {
7170                                 case 0:
7171                                         queue_reset = true;
7172                                         break;
7173                                 case 1:
7174                                         /* XXX compute */
7175                                         queue_reset = true;
7176                                         break;
7177                                 case 2:
7178                                         /* XXX compute */
7179                                         queue_reset = true;
7180                                         break;
7181                                 }
7182                                 break;
7183                         case 1:
7184                                 switch (queue_id) {
7185                                 case 0:
7186                                         queue_reset = true;
7187                                         break;
7188                                 case 1:
7189                                         /* XXX compute */
7190                                         queue_reset = true;
7191                                         break;
7192                                 case 2:
7193                                         /* XXX compute */
7194                                         queue_reset = true;
7195                                         break;
7196                                 }
7197                                 break;
7198                         }
7199                         break;
7200                 default:
7201                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7202                         break;
7203                 }
7204
7205                 /* wptr/rptr are in bytes! */
7206                 rptr += 16;
7207                 rptr &= rdev->ih.ptr_mask;
7208         }
7209         if (queue_hotplug)
7210                 schedule_work(&rdev->hotplug_work);
7211         if (queue_reset)
7212                 schedule_work(&rdev->reset_work);
7213         if (queue_thermal)
7214                 schedule_work(&rdev->pm.dpm.thermal.work);
7215         rdev->ih.rptr = rptr;
7216         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7217         atomic_set(&rdev->ih.lock, 0);
7218
7219         /* make sure wptr hasn't changed while processing */
7220         wptr = cik_get_ih_wptr(rdev);
7221         if (wptr != rptr)
7222                 goto restart_ih;
7223
7224         return IRQ_HANDLED;
7225 }
7226
7227 /*
7228  * startup/shutdown callbacks
7229  */
7230 /**
7231  * cik_startup - program the asic to a functional state
7232  *
7233  * @rdev: radeon_device pointer
7234  *
7235  * Programs the asic to a functional state (CIK).
7236  * Called by cik_init() and cik_resume().
7237  * Returns 0 for success, error for failure.
7238  */
7239 static int cik_startup(struct radeon_device *rdev)
7240 {
7241         struct radeon_ring *ring;
7242         int r;
7243
7244         /* enable pcie gen2/3 link */
7245         cik_pcie_gen3_enable(rdev);
7246         /* enable aspm */
7247         cik_program_aspm(rdev);
7248
7249         /* scratch needs to be initialized before MC */
7250         r = r600_vram_scratch_init(rdev);
7251         if (r)
7252                 return r;
7253
7254         cik_mc_program(rdev);
7255
7256         if (rdev->flags & RADEON_IS_IGP) {
7257                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7258                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7259                         r = cik_init_microcode(rdev);
7260                         if (r) {
7261                                 DRM_ERROR("Failed to load firmware!\n");
7262                                 return r;
7263                         }
7264                 }
7265         } else {
7266                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7267                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7268                     !rdev->mc_fw) {
7269                         r = cik_init_microcode(rdev);
7270                         if (r) {
7271                                 DRM_ERROR("Failed to load firmware!\n");
7272                                 return r;
7273                         }
7274                 }
7275
7276                 r = ci_mc_load_microcode(rdev);
7277                 if (r) {
7278                         DRM_ERROR("Failed to load MC firmware!\n");
7279                         return r;
7280                 }
7281         }
7282
7283         r = cik_pcie_gart_enable(rdev);
7284         if (r)
7285                 return r;
7286         cik_gpu_init(rdev);
7287
7288         /* allocate rlc buffers */
7289         if (rdev->flags & RADEON_IS_IGP) {
7290                 if (rdev->family == CHIP_KAVERI) {
7291                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7292                         rdev->rlc.reg_list_size =
7293                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7294                 } else {
7295                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7296                         rdev->rlc.reg_list_size =
7297                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7298                 }
7299         }
7300         rdev->rlc.cs_data = ci_cs_data;
7301         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7302         r = sumo_rlc_init(rdev);
7303         if (r) {
7304                 DRM_ERROR("Failed to init rlc BOs!\n");
7305                 return r;
7306         }
7307
7308         /* allocate wb buffer */
7309         r = radeon_wb_init(rdev);
7310         if (r)
7311                 return r;
7312
7313         /* allocate mec buffers */
7314         r = cik_mec_init(rdev);
7315         if (r) {
7316                 DRM_ERROR("Failed to init MEC BOs!\n");
7317                 return r;
7318         }
7319
7320         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7321         if (r) {
7322                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7323                 return r;
7324         }
7325
7326         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7327         if (r) {
7328                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7329                 return r;
7330         }
7331
7332         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7333         if (r) {
7334                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7335                 return r;
7336         }
7337
7338         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7339         if (r) {
7340                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7341                 return r;
7342         }
7343
7344         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7345         if (r) {
7346                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7347                 return r;
7348         }
7349
7350         r = radeon_uvd_resume(rdev);
7351         if (!r) {
7352                 r = uvd_v4_2_resume(rdev);
7353                 if (!r) {
7354                         r = radeon_fence_driver_start_ring(rdev,
7355                                                            R600_RING_TYPE_UVD_INDEX);
7356                         if (r)
7357                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7358                 }
7359         }
7360         if (r)
7361                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7362
7363         /* Enable IRQ */
7364         if (!rdev->irq.installed) {
7365                 r = radeon_irq_kms_init(rdev);
7366                 if (r)
7367                         return r;
7368         }
7369
7370         r = cik_irq_init(rdev);
7371         if (r) {
7372                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7373                 radeon_irq_kms_fini(rdev);
7374                 return r;
7375         }
7376         cik_irq_set(rdev);
7377
7378         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7379         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7380                              CP_RB0_RPTR, CP_RB0_WPTR,
7381                              PACKET3(PACKET3_NOP, 0x3FFF));
7382         if (r)
7383                 return r;
7384
7385         /* set up the compute queues */
7386         /* type-2 packets are deprecated on MEC, use type-3 instead */
7387         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7388         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7389                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7390                              PACKET3(PACKET3_NOP, 0x3FFF));
7391         if (r)
7392                 return r;
7393         ring->me = 1; /* first MEC */
7394         ring->pipe = 0; /* first pipe */
7395         ring->queue = 0; /* first queue */
7396         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7397
7398         /* type-2 packets are deprecated on MEC, use type-3 instead */
7399         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7400         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7401                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7402                              PACKET3(PACKET3_NOP, 0x3FFF));
7403         if (r)
7404                 return r;
7405         /* dGPU only have 1 MEC */
7406         ring->me = 1; /* first MEC */
7407         ring->pipe = 0; /* first pipe */
7408         ring->queue = 1; /* second queue */
7409         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7410
7411         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7412         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7413                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7414                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7415                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7416         if (r)
7417                 return r;
7418
7419         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7420         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7421                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7422                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7423                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7424         if (r)
7425                 return r;
7426
7427         r = cik_cp_resume(rdev);
7428         if (r)
7429                 return r;
7430
7431         r = cik_sdma_resume(rdev);
7432         if (r)
7433                 return r;
7434
7435         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7436         if (ring->ring_size) {
7437                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7438                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7439                                      RADEON_CP_PACKET2);
7440                 if (!r)
7441                         r = uvd_v1_0_init(rdev);
7442                 if (r)
7443                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7444         }
7445
7446         r = radeon_ib_pool_init(rdev);
7447         if (r) {
7448                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7449                 return r;
7450         }
7451
7452         r = radeon_vm_manager_init(rdev);
7453         if (r) {
7454                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7455                 return r;
7456         }
7457
7458         r = dce6_audio_init(rdev);
7459         if (r)
7460                 return r;
7461
7462         return 0;
7463 }
7464
7465 /**
7466  * cik_resume - resume the asic to a functional state
7467  *
7468  * @rdev: radeon_device pointer
7469  *
7470  * Programs the asic to a functional state (CIK).
7471  * Called at resume.
7472  * Returns 0 for success, error for failure.
7473  */
7474 int cik_resume(struct radeon_device *rdev)
7475 {
7476         int r;
7477
7478         /* post card */
7479         atom_asic_init(rdev->mode_info.atom_context);
7480
7481         /* init golden registers */
7482         cik_init_golden_registers(rdev);
7483
7484         rdev->accel_working = true;
7485         r = cik_startup(rdev);
7486         if (r) {
7487                 DRM_ERROR("cik startup failed on resume\n");
7488                 rdev->accel_working = false;
7489                 return r;
7490         }
7491
7492         return r;
7493
7494 }
7495
7496 /**
7497  * cik_suspend - suspend the asic
7498  *
7499  * @rdev: radeon_device pointer
7500  *
7501  * Bring the chip into a state suitable for suspend (CIK).
7502  * Called at suspend.
7503  * Returns 0 for success.
7504  */
7505 int cik_suspend(struct radeon_device *rdev)
7506 {
7507         dce6_audio_fini(rdev);
7508         radeon_vm_manager_fini(rdev);
7509         cik_cp_enable(rdev, false);
7510         cik_sdma_enable(rdev, false);
7511         uvd_v1_0_fini(rdev);
7512         radeon_uvd_suspend(rdev);
7513         cik_fini_pg(rdev);
7514         cik_fini_cg(rdev);
7515         cik_irq_suspend(rdev);
7516         radeon_wb_disable(rdev);
7517         cik_pcie_gart_disable(rdev);
7518         return 0;
7519 }
7520
7521 /* Plan is to move initialization in that function and use
7522  * helper function so that radeon_device_init pretty much
7523  * do nothing more than calling asic specific function. This
7524  * should also allow to remove a bunch of callback function
7525  * like vram_info.
7526  */
7527 /**
7528  * cik_init - asic specific driver and hw init
7529  *
7530  * @rdev: radeon_device pointer
7531  *
7532  * Setup asic specific driver variables and program the hw
7533  * to a functional state (CIK).
7534  * Called at driver startup.
7535  * Returns 0 for success, errors for failure.
7536  */
7537 int cik_init(struct radeon_device *rdev)
7538 {
7539         struct radeon_ring *ring;
7540         int r;
7541
7542         /* Read BIOS */
7543         if (!radeon_get_bios(rdev)) {
7544                 if (ASIC_IS_AVIVO(rdev))
7545                         return -EINVAL;
7546         }
7547         /* Must be an ATOMBIOS */
7548         if (!rdev->is_atom_bios) {
7549                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7550                 return -EINVAL;
7551         }
7552         r = radeon_atombios_init(rdev);
7553         if (r)
7554                 return r;
7555
7556         /* Post card if necessary */
7557         if (!radeon_card_posted(rdev)) {
7558                 if (!rdev->bios) {
7559                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7560                         return -EINVAL;
7561                 }
7562                 DRM_INFO("GPU not posted. posting now...\n");
7563                 atom_asic_init(rdev->mode_info.atom_context);
7564         }
7565         /* init golden registers */
7566         cik_init_golden_registers(rdev);
7567         /* Initialize scratch registers */
7568         cik_scratch_init(rdev);
7569         /* Initialize surface registers */
7570         radeon_surface_init(rdev);
7571         /* Initialize clocks */
7572         radeon_get_clock_info(rdev->ddev);
7573
7574         /* Fence driver */
7575         r = radeon_fence_driver_init(rdev);
7576         if (r)
7577                 return r;
7578
7579         /* initialize memory controller */
7580         r = cik_mc_init(rdev);
7581         if (r)
7582                 return r;
7583         /* Memory manager */
7584         r = radeon_bo_init(rdev);
7585         if (r)
7586                 return r;
7587
7588         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7589         ring->ring_obj = NULL;
7590         r600_ring_init(rdev, ring, 1024 * 1024);
7591
7592         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7593         ring->ring_obj = NULL;
7594         r600_ring_init(rdev, ring, 1024 * 1024);
7595         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7596         if (r)
7597                 return r;
7598
7599         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7600         ring->ring_obj = NULL;
7601         r600_ring_init(rdev, ring, 1024 * 1024);
7602         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7603         if (r)
7604                 return r;
7605
7606         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7607         ring->ring_obj = NULL;
7608         r600_ring_init(rdev, ring, 256 * 1024);
7609
7610         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7611         ring->ring_obj = NULL;
7612         r600_ring_init(rdev, ring, 256 * 1024);
7613
7614         r = radeon_uvd_init(rdev);
7615         if (!r) {
7616                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7617                 ring->ring_obj = NULL;
7618                 r600_ring_init(rdev, ring, 4096);
7619         }
7620
7621         rdev->ih.ring_obj = NULL;
7622         r600_ih_ring_init(rdev, 64 * 1024);
7623
7624         r = r600_pcie_gart_init(rdev);
7625         if (r)
7626                 return r;
7627
7628         rdev->accel_working = true;
7629         r = cik_startup(rdev);
7630         if (r) {
7631                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7632                 cik_cp_fini(rdev);
7633                 cik_sdma_fini(rdev);
7634                 cik_irq_fini(rdev);
7635                 sumo_rlc_fini(rdev);
7636                 cik_mec_fini(rdev);
7637                 radeon_wb_fini(rdev);
7638                 radeon_ib_pool_fini(rdev);
7639                 radeon_vm_manager_fini(rdev);
7640                 radeon_irq_kms_fini(rdev);
7641                 cik_pcie_gart_fini(rdev);
7642                 rdev->accel_working = false;
7643         }
7644
7645         /* Don't start up if the MC ucode is missing.
7646          * The default clocks and voltages before the MC ucode
7647          * is loaded are not suffient for advanced operations.
7648          */
7649         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7650                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7651                 return -EINVAL;
7652         }
7653
7654         return 0;
7655 }
7656
7657 /**
7658  * cik_fini - asic specific driver and hw fini
7659  *
7660  * @rdev: radeon_device pointer
7661  *
7662  * Tear down the asic specific driver variables and program the hw
7663  * to an idle state (CIK).
7664  * Called at driver unload.
7665  */
7666 void cik_fini(struct radeon_device *rdev)
7667 {
7668         cik_cp_fini(rdev);
7669         cik_sdma_fini(rdev);
7670         cik_fini_pg(rdev);
7671         cik_fini_cg(rdev);
7672         cik_irq_fini(rdev);
7673         sumo_rlc_fini(rdev);
7674         cik_mec_fini(rdev);
7675         radeon_wb_fini(rdev);
7676         radeon_vm_manager_fini(rdev);
7677         radeon_ib_pool_fini(rdev);
7678         radeon_irq_kms_fini(rdev);
7679         uvd_v1_0_fini(rdev);
7680         radeon_uvd_fini(rdev);
7681         cik_pcie_gart_fini(rdev);
7682         r600_vram_scratch_fini(rdev);
7683         radeon_gem_fini(rdev);
7684         radeon_fence_driver_fini(rdev);
7685         radeon_bo_fini(rdev);
7686         radeon_atombios_fini(rdev);
7687         kfree(rdev->bios);
7688         rdev->bios = NULL;
7689 }
7690
7691 void dce8_program_fmt(struct drm_encoder *encoder)
7692 {
7693         struct drm_device *dev = encoder->dev;
7694         struct radeon_device *rdev = dev->dev_private;
7695         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7696         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7697         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7698         int bpc = 0;
7699         u32 tmp = 0;
7700         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7701
7702         if (connector) {
7703                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7704                 bpc = radeon_get_monitor_bpc(connector);
7705                 dither = radeon_connector->dither;
7706         }
7707
7708         /* LVDS/eDP FMT is set up by atom */
7709         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7710                 return;
7711
7712         /* not needed for analog */
7713         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7714             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7715                 return;
7716
7717         if (bpc == 0)
7718                 return;
7719
7720         switch (bpc) {
7721         case 6:
7722                 if (dither == RADEON_FMT_DITHER_ENABLE)
7723                         /* XXX sort out optimal dither settings */
7724                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7725                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7726                 else
7727                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7728                 break;
7729         case 8:
7730                 if (dither == RADEON_FMT_DITHER_ENABLE)
7731                         /* XXX sort out optimal dither settings */
7732                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7733                                 FMT_RGB_RANDOM_ENABLE |
7734                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7735                 else
7736                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7737                 break;
7738         case 10:
7739                 if (dither == RADEON_FMT_DITHER_ENABLE)
7740                         /* XXX sort out optimal dither settings */
7741                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7742                                 FMT_RGB_RANDOM_ENABLE |
7743                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7744                 else
7745                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7746                 break;
7747         default:
7748                 /* not needed */
7749                 break;
7750         }
7751
7752         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
7753 }
7754
7755 /* display watermark setup */
7756 /**
7757  * dce8_line_buffer_adjust - Set up the line buffer
7758  *
7759  * @rdev: radeon_device pointer
7760  * @radeon_crtc: the selected display controller
7761  * @mode: the current display mode on the selected display
7762  * controller
7763  *
7764  * Setup up the line buffer allocation for
7765  * the selected display controller (CIK).
7766  * Returns the line buffer size in pixels.
7767  */
7768 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7769                                    struct radeon_crtc *radeon_crtc,
7770                                    struct drm_display_mode *mode)
7771 {
7772         u32 tmp, buffer_alloc, i;
7773         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7774         /*
7775          * Line Buffer Setup
7776          * There are 6 line buffers, one for each display controllers.
7777          * There are 3 partitions per LB. Select the number of partitions
7778          * to enable based on the display width.  For display widths larger
7779          * than 4096, you need use to use 2 display controllers and combine
7780          * them using the stereo blender.
7781          */
7782         if (radeon_crtc->base.enabled && mode) {
7783                 if (mode->crtc_hdisplay < 1920) {
7784                         tmp = 1;
7785                         buffer_alloc = 2;
7786                 } else if (mode->crtc_hdisplay < 2560) {
7787                         tmp = 2;
7788                         buffer_alloc = 2;
7789                 } else if (mode->crtc_hdisplay < 4096) {
7790                         tmp = 0;
7791                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7792                 } else {
7793                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7794                         tmp = 0;
7795                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7796                 }
7797         } else {
7798                 tmp = 1;
7799                 buffer_alloc = 0;
7800         }
7801
7802         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7803                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7804
7805         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7806                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7807         for (i = 0; i < rdev->usec_timeout; i++) {
7808                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7809                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7810                         break;
7811                 udelay(1);
7812         }
7813
7814         if (radeon_crtc->base.enabled && mode) {
7815                 switch (tmp) {
7816                 case 0:
7817                 default:
7818                         return 4096 * 2;
7819                 case 1:
7820                         return 1920 * 2;
7821                 case 2:
7822                         return 2560 * 2;
7823                 }
7824         }
7825
7826         /* controller not enabled, so no lb used */
7827         return 0;
7828 }
7829
7830 /**
7831  * cik_get_number_of_dram_channels - get the number of dram channels
7832  *
7833  * @rdev: radeon_device pointer
7834  *
7835  * Look up the number of video ram channels (CIK).
7836  * Used for display watermark bandwidth calculations
7837  * Returns the number of dram channels
7838  */
7839 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7840 {
7841         u32 tmp = RREG32(MC_SHARED_CHMAP);
7842
7843         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7844         case 0:
7845         default:
7846                 return 1;
7847         case 1:
7848                 return 2;
7849         case 2:
7850                 return 4;
7851         case 3:
7852                 return 8;
7853         case 4:
7854                 return 3;
7855         case 5:
7856                 return 6;
7857         case 6:
7858                 return 10;
7859         case 7:
7860                 return 12;
7861         case 8:
7862                 return 16;
7863         }
7864 }
7865
7866 struct dce8_wm_params {
7867         u32 dram_channels; /* number of dram channels */
7868         u32 yclk;          /* bandwidth per dram data pin in kHz */
7869         u32 sclk;          /* engine clock in kHz */
7870         u32 disp_clk;      /* display clock in kHz */
7871         u32 src_width;     /* viewport width */
7872         u32 active_time;   /* active display time in ns */
7873         u32 blank_time;    /* blank time in ns */
7874         bool interlaced;    /* mode is interlaced */
7875         fixed20_12 vsc;    /* vertical scale ratio */
7876         u32 num_heads;     /* number of active crtcs */
7877         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7878         u32 lb_size;       /* line buffer allocated to pipe */
7879         u32 vtaps;         /* vertical scaler taps */
7880 };
7881
7882 /**
7883  * dce8_dram_bandwidth - get the dram bandwidth
7884  *
7885  * @wm: watermark calculation data
7886  *
7887  * Calculate the raw dram bandwidth (CIK).
7888  * Used for display watermark bandwidth calculations
7889  * Returns the dram bandwidth in MBytes/s
7890  */
7891 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7892 {
7893         /* Calculate raw DRAM Bandwidth */
7894         fixed20_12 dram_efficiency; /* 0.7 */
7895         fixed20_12 yclk, dram_channels, bandwidth;
7896         fixed20_12 a;
7897
7898         a.full = dfixed_const(1000);
7899         yclk.full = dfixed_const(wm->yclk);
7900         yclk.full = dfixed_div(yclk, a);
7901         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7902         a.full = dfixed_const(10);
7903         dram_efficiency.full = dfixed_const(7);
7904         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7905         bandwidth.full = dfixed_mul(dram_channels, yclk);
7906         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7907
7908         return dfixed_trunc(bandwidth);
7909 }
7910
7911 /**
7912  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7913  *
7914  * @wm: watermark calculation data
7915  *
7916  * Calculate the dram bandwidth used for display (CIK).
7917  * Used for display watermark bandwidth calculations
7918  * Returns the dram bandwidth for display in MBytes/s
7919  */
7920 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7921 {
7922         /* Calculate DRAM Bandwidth and the part allocated to display. */
7923         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7924         fixed20_12 yclk, dram_channels, bandwidth;
7925         fixed20_12 a;
7926
7927         a.full = dfixed_const(1000);
7928         yclk.full = dfixed_const(wm->yclk);
7929         yclk.full = dfixed_div(yclk, a);
7930         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7931         a.full = dfixed_const(10);
7932         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7933         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7934         bandwidth.full = dfixed_mul(dram_channels, yclk);
7935         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7936
7937         return dfixed_trunc(bandwidth);
7938 }
7939
7940 /**
7941  * dce8_data_return_bandwidth - get the data return bandwidth
7942  *
7943  * @wm: watermark calculation data
7944  *
7945  * Calculate the data return bandwidth used for display (CIK).
7946  * Used for display watermark bandwidth calculations
7947  * Returns the data return bandwidth in MBytes/s
7948  */
7949 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7950 {
7951         /* Calculate the display Data return Bandwidth */
7952         fixed20_12 return_efficiency; /* 0.8 */
7953         fixed20_12 sclk, bandwidth;
7954         fixed20_12 a;
7955
7956         a.full = dfixed_const(1000);
7957         sclk.full = dfixed_const(wm->sclk);
7958         sclk.full = dfixed_div(sclk, a);
7959         a.full = dfixed_const(10);
7960         return_efficiency.full = dfixed_const(8);
7961         return_efficiency.full = dfixed_div(return_efficiency, a);
7962         a.full = dfixed_const(32);
7963         bandwidth.full = dfixed_mul(a, sclk);
7964         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7965
7966         return dfixed_trunc(bandwidth);
7967 }
7968
7969 /**
7970  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7971  *
7972  * @wm: watermark calculation data
7973  *
7974  * Calculate the dmif bandwidth used for display (CIK).
7975  * Used for display watermark bandwidth calculations
7976  * Returns the dmif bandwidth in MBytes/s
7977  */
7978 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7979 {
7980         /* Calculate the DMIF Request Bandwidth */
7981         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7982         fixed20_12 disp_clk, bandwidth;
7983         fixed20_12 a, b;
7984
7985         a.full = dfixed_const(1000);
7986         disp_clk.full = dfixed_const(wm->disp_clk);
7987         disp_clk.full = dfixed_div(disp_clk, a);
7988         a.full = dfixed_const(32);
7989         b.full = dfixed_mul(a, disp_clk);
7990
7991         a.full = dfixed_const(10);
7992         disp_clk_request_efficiency.full = dfixed_const(8);
7993         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7994
7995         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7996
7997         return dfixed_trunc(bandwidth);
7998 }
7999
8000 /**
8001  * dce8_available_bandwidth - get the min available bandwidth
8002  *
8003  * @wm: watermark calculation data
8004  *
8005  * Calculate the min available bandwidth used for display (CIK).
8006  * Used for display watermark bandwidth calculations
8007  * Returns the min available bandwidth in MBytes/s
8008  */
8009 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8010 {
8011         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8012         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8013         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8014         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8015
8016         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8017 }
8018
8019 /**
8020  * dce8_average_bandwidth - get the average available bandwidth
8021  *
8022  * @wm: watermark calculation data
8023  *
8024  * Calculate the average available bandwidth used for display (CIK).
8025  * Used for display watermark bandwidth calculations
8026  * Returns the average available bandwidth in MBytes/s
8027  */
8028 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8029 {
8030         /* Calculate the display mode Average Bandwidth
8031          * DisplayMode should contain the source and destination dimensions,
8032          * timing, etc.
8033          */
8034         fixed20_12 bpp;
8035         fixed20_12 line_time;
8036         fixed20_12 src_width;
8037         fixed20_12 bandwidth;
8038         fixed20_12 a;
8039
8040         a.full = dfixed_const(1000);
8041         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8042         line_time.full = dfixed_div(line_time, a);
8043         bpp.full = dfixed_const(wm->bytes_per_pixel);
8044         src_width.full = dfixed_const(wm->src_width);
8045         bandwidth.full = dfixed_mul(src_width, bpp);
8046         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8047         bandwidth.full = dfixed_div(bandwidth, line_time);
8048
8049         return dfixed_trunc(bandwidth);
8050 }
8051
8052 /**
8053  * dce8_latency_watermark - get the latency watermark
8054  *
8055  * @wm: watermark calculation data
8056  *
8057  * Calculate the latency watermark (CIK).
8058  * Used for display watermark bandwidth calculations
8059  * Returns the latency watermark in ns
8060  */
8061 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8062 {
8063         /* First calculate the latency in ns */
8064         u32 mc_latency = 2000; /* 2000 ns. */
8065         u32 available_bandwidth = dce8_available_bandwidth(wm);
8066         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8067         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8068         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8069         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8070                 (wm->num_heads * cursor_line_pair_return_time);
8071         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8072         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8073         u32 tmp, dmif_size = 12288;
8074         fixed20_12 a, b, c;
8075
8076         if (wm->num_heads == 0)
8077                 return 0;
8078
8079         a.full = dfixed_const(2);
8080         b.full = dfixed_const(1);
8081         if ((wm->vsc.full > a.full) ||
8082             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8083             (wm->vtaps >= 5) ||
8084             ((wm->vsc.full >= a.full) && wm->interlaced))
8085                 max_src_lines_per_dst_line = 4;
8086         else
8087                 max_src_lines_per_dst_line = 2;
8088
8089         a.full = dfixed_const(available_bandwidth);
8090         b.full = dfixed_const(wm->num_heads);
8091         a.full = dfixed_div(a, b);
8092
8093         b.full = dfixed_const(mc_latency + 512);
8094         c.full = dfixed_const(wm->disp_clk);
8095         b.full = dfixed_div(b, c);
8096
8097         c.full = dfixed_const(dmif_size);
8098         b.full = dfixed_div(c, b);
8099
8100         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8101
8102         b.full = dfixed_const(1000);
8103         c.full = dfixed_const(wm->disp_clk);
8104         b.full = dfixed_div(c, b);
8105         c.full = dfixed_const(wm->bytes_per_pixel);
8106         b.full = dfixed_mul(b, c);
8107
8108         lb_fill_bw = min(tmp, dfixed_trunc(b));
8109
8110         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8111         b.full = dfixed_const(1000);
8112         c.full = dfixed_const(lb_fill_bw);
8113         b.full = dfixed_div(c, b);
8114         a.full = dfixed_div(a, b);
8115         line_fill_time = dfixed_trunc(a);
8116
8117         if (line_fill_time < wm->active_time)
8118                 return latency;
8119         else
8120                 return latency + (line_fill_time - wm->active_time);
8121
8122 }
8123
8124 /**
8125  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8126  * average and available dram bandwidth
8127  *
8128  * @wm: watermark calculation data
8129  *
8130  * Check if the display average bandwidth fits in the display
8131  * dram bandwidth (CIK).
8132  * Used for display watermark bandwidth calculations
8133  * Returns true if the display fits, false if not.
8134  */
8135 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8136 {
8137         if (dce8_average_bandwidth(wm) <=
8138             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8139                 return true;
8140         else
8141                 return false;
8142 }
8143
8144 /**
8145  * dce8_average_bandwidth_vs_available_bandwidth - check
8146  * average and available bandwidth
8147  *
8148  * @wm: watermark calculation data
8149  *
8150  * Check if the display average bandwidth fits in the display
8151  * available bandwidth (CIK).
8152  * Used for display watermark bandwidth calculations
8153  * Returns true if the display fits, false if not.
8154  */
8155 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8156 {
8157         if (dce8_average_bandwidth(wm) <=
8158             (dce8_available_bandwidth(wm) / wm->num_heads))
8159                 return true;
8160         else
8161                 return false;
8162 }
8163
8164 /**
8165  * dce8_check_latency_hiding - check latency hiding
8166  *
8167  * @wm: watermark calculation data
8168  *
8169  * Check latency hiding (CIK).
8170  * Used for display watermark bandwidth calculations
8171  * Returns true if the display fits, false if not.
8172  */
8173 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8174 {
8175         u32 lb_partitions = wm->lb_size / wm->src_width;
8176         u32 line_time = wm->active_time + wm->blank_time;
8177         u32 latency_tolerant_lines;
8178         u32 latency_hiding;
8179         fixed20_12 a;
8180
8181         a.full = dfixed_const(1);
8182         if (wm->vsc.full > a.full)
8183                 latency_tolerant_lines = 1;
8184         else {
8185                 if (lb_partitions <= (wm->vtaps + 1))
8186                         latency_tolerant_lines = 1;
8187                 else
8188                         latency_tolerant_lines = 2;
8189         }
8190
8191         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8192
8193         if (dce8_latency_watermark(wm) <= latency_hiding)
8194                 return true;
8195         else
8196                 return false;
8197 }
8198
8199 /**
8200  * dce8_program_watermarks - program display watermarks
8201  *
8202  * @rdev: radeon_device pointer
8203  * @radeon_crtc: the selected display controller
8204  * @lb_size: line buffer size
8205  * @num_heads: number of display controllers in use
8206  *
8207  * Calculate and program the display watermarks for the
8208  * selected display controller (CIK).
8209  */
8210 static void dce8_program_watermarks(struct radeon_device *rdev,
8211                                     struct radeon_crtc *radeon_crtc,
8212                                     u32 lb_size, u32 num_heads)
8213 {
8214         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8215         struct dce8_wm_params wm_low, wm_high;
8216         u32 pixel_period;
8217         u32 line_time = 0;
8218         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8219         u32 tmp, wm_mask;
8220
8221         if (radeon_crtc->base.enabled && num_heads && mode) {
8222                 pixel_period = 1000000 / (u32)mode->clock;
8223                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8224
8225                 /* watermark for high clocks */
8226                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8227                     rdev->pm.dpm_enabled) {
8228                         wm_high.yclk =
8229                                 radeon_dpm_get_mclk(rdev, false) * 10;
8230                         wm_high.sclk =
8231                                 radeon_dpm_get_sclk(rdev, false) * 10;
8232                 } else {
8233                         wm_high.yclk = rdev->pm.current_mclk * 10;
8234                         wm_high.sclk = rdev->pm.current_sclk * 10;
8235                 }
8236
8237                 wm_high.disp_clk = mode->clock;
8238                 wm_high.src_width = mode->crtc_hdisplay;
8239                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8240                 wm_high.blank_time = line_time - wm_high.active_time;
8241                 wm_high.interlaced = false;
8242                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8243                         wm_high.interlaced = true;
8244                 wm_high.vsc = radeon_crtc->vsc;
8245                 wm_high.vtaps = 1;
8246                 if (radeon_crtc->rmx_type != RMX_OFF)
8247                         wm_high.vtaps = 2;
8248                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8249                 wm_high.lb_size = lb_size;
8250                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8251                 wm_high.num_heads = num_heads;
8252
8253                 /* set for high clocks */
8254                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8255
8256                 /* possibly force display priority to high */
8257                 /* should really do this at mode validation time... */
8258                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8259                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8260                     !dce8_check_latency_hiding(&wm_high) ||
8261                     (rdev->disp_priority == 2)) {
8262                         DRM_DEBUG_KMS("force priority to high\n");
8263                 }
8264
8265                 /* watermark for low clocks */
8266                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8267                     rdev->pm.dpm_enabled) {
8268                         wm_low.yclk =
8269                                 radeon_dpm_get_mclk(rdev, true) * 10;
8270                         wm_low.sclk =
8271                                 radeon_dpm_get_sclk(rdev, true) * 10;
8272                 } else {
8273                         wm_low.yclk = rdev->pm.current_mclk * 10;
8274                         wm_low.sclk = rdev->pm.current_sclk * 10;
8275                 }
8276
8277                 wm_low.disp_clk = mode->clock;
8278                 wm_low.src_width = mode->crtc_hdisplay;
8279                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8280                 wm_low.blank_time = line_time - wm_low.active_time;
8281                 wm_low.interlaced = false;
8282                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8283                         wm_low.interlaced = true;
8284                 wm_low.vsc = radeon_crtc->vsc;
8285                 wm_low.vtaps = 1;
8286                 if (radeon_crtc->rmx_type != RMX_OFF)
8287                         wm_low.vtaps = 2;
8288                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8289                 wm_low.lb_size = lb_size;
8290                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8291                 wm_low.num_heads = num_heads;
8292
8293                 /* set for low clocks */
8294                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8295
8296                 /* possibly force display priority to high */
8297                 /* should really do this at mode validation time... */
8298                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8299                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8300                     !dce8_check_latency_hiding(&wm_low) ||
8301                     (rdev->disp_priority == 2)) {
8302                         DRM_DEBUG_KMS("force priority to high\n");
8303                 }
8304         }
8305
8306         /* select wm A */
8307         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8308         tmp = wm_mask;
8309         tmp &= ~LATENCY_WATERMARK_MASK(3);
8310         tmp |= LATENCY_WATERMARK_MASK(1);
8311         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8312         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8313                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8314                 LATENCY_HIGH_WATERMARK(line_time)));
8315         /* select wm B */
8316         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8317         tmp &= ~LATENCY_WATERMARK_MASK(3);
8318         tmp |= LATENCY_WATERMARK_MASK(2);
8319         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8320         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8321                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8322                 LATENCY_HIGH_WATERMARK(line_time)));
8323         /* restore original selection */
8324         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8325
8326         /* save values for DPM */
8327         radeon_crtc->line_time = line_time;
8328         radeon_crtc->wm_high = latency_watermark_a;
8329         radeon_crtc->wm_low = latency_watermark_b;
8330 }
8331
8332 /**
8333  * dce8_bandwidth_update - program display watermarks
8334  *
8335  * @rdev: radeon_device pointer
8336  *
8337  * Calculate and program the display watermarks and line
8338  * buffer allocation (CIK).
8339  */
8340 void dce8_bandwidth_update(struct radeon_device *rdev)
8341 {
8342         struct drm_display_mode *mode = NULL;
8343         u32 num_heads = 0, lb_size;
8344         int i;
8345
8346         radeon_update_display_priority(rdev);
8347
8348         for (i = 0; i < rdev->num_crtc; i++) {
8349                 if (rdev->mode_info.crtcs[i]->base.enabled)
8350                         num_heads++;
8351         }
8352         for (i = 0; i < rdev->num_crtc; i++) {
8353                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8354                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8355                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8356         }
8357 }
8358
8359 /**
8360  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8361  *
8362  * @rdev: radeon_device pointer
8363  *
8364  * Fetches a GPU clock counter snapshot (SI).
8365  * Returns the 64 bit clock counter snapshot.
8366  */
8367 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8368 {
8369         uint64_t clock;
8370
8371         mutex_lock(&rdev->gpu_clock_mutex);
8372         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8373         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8374                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8375         mutex_unlock(&rdev->gpu_clock_mutex);
8376         return clock;
8377 }
8378
8379 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8380                               u32 cntl_reg, u32 status_reg)
8381 {
8382         int r, i;
8383         struct atom_clock_dividers dividers;
8384         uint32_t tmp;
8385
8386         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8387                                            clock, false, &dividers);
8388         if (r)
8389                 return r;
8390
8391         tmp = RREG32_SMC(cntl_reg);
8392         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8393         tmp |= dividers.post_divider;
8394         WREG32_SMC(cntl_reg, tmp);
8395
8396         for (i = 0; i < 100; i++) {
8397                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8398                         break;
8399                 mdelay(10);
8400         }
8401         if (i == 100)
8402                 return -ETIMEDOUT;
8403
8404         return 0;
8405 }
8406
8407 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8408 {
8409         int r = 0;
8410
8411         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8412         if (r)
8413                 return r;
8414
8415         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8416         return r;
8417 }
8418
8419 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8420 {
8421         struct pci_dev *root = rdev->pdev->bus->self;
8422         int bridge_pos, gpu_pos;
8423         u32 speed_cntl, mask, current_data_rate;
8424         int ret, i;
8425         u16 tmp16;
8426
8427         if (radeon_pcie_gen2 == 0)
8428                 return;
8429
8430         if (rdev->flags & RADEON_IS_IGP)
8431                 return;
8432
8433         if (!(rdev->flags & RADEON_IS_PCIE))
8434                 return;
8435
8436         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8437         if (ret != 0)
8438                 return;
8439
8440         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8441                 return;
8442
8443         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8444         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8445                 LC_CURRENT_DATA_RATE_SHIFT;
8446         if (mask & DRM_PCIE_SPEED_80) {
8447                 if (current_data_rate == 2) {
8448                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8449                         return;
8450                 }
8451                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8452         } else if (mask & DRM_PCIE_SPEED_50) {
8453                 if (current_data_rate == 1) {
8454                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8455                         return;
8456                 }
8457                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8458         }
8459
8460         bridge_pos = pci_pcie_cap(root);
8461         if (!bridge_pos)
8462                 return;
8463
8464         gpu_pos = pci_pcie_cap(rdev->pdev);
8465         if (!gpu_pos)
8466                 return;
8467
8468         if (mask & DRM_PCIE_SPEED_80) {
8469                 /* re-try equalization if gen3 is not already enabled */
8470                 if (current_data_rate != 2) {
8471                         u16 bridge_cfg, gpu_cfg;
8472                         u16 bridge_cfg2, gpu_cfg2;
8473                         u32 max_lw, current_lw, tmp;
8474
8475                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8476                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8477
8478                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8479                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8480
8481                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8482                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8483
8484                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8485                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8486                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8487
8488                         if (current_lw < max_lw) {
8489                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8490                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8491                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8492                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8493                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8494                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8495                                 }
8496                         }
8497
8498                         for (i = 0; i < 10; i++) {
8499                                 /* check status */
8500                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8501                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8502                                         break;
8503
8504                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8505                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8506
8507                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8508                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8509
8510                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8511                                 tmp |= LC_SET_QUIESCE;
8512                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8513
8514                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8515                                 tmp |= LC_REDO_EQ;
8516                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8517
8518                                 mdelay(100);
8519
8520                                 /* linkctl */
8521                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8522                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8523                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8524                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8525
8526                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8527                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8528                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8529                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8530
8531                                 /* linkctl2 */
8532                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8533                                 tmp16 &= ~((1 << 4) | (7 << 9));
8534                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8535                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8536
8537                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8538                                 tmp16 &= ~((1 << 4) | (7 << 9));
8539                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8540                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8541
8542                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8543                                 tmp &= ~LC_SET_QUIESCE;
8544                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8545                         }
8546                 }
8547         }
8548
8549         /* set the link speed */
8550         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8551         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8552         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8553
8554         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8555         tmp16 &= ~0xf;
8556         if (mask & DRM_PCIE_SPEED_80)
8557                 tmp16 |= 3; /* gen3 */
8558         else if (mask & DRM_PCIE_SPEED_50)
8559                 tmp16 |= 2; /* gen2 */
8560         else
8561                 tmp16 |= 1; /* gen1 */
8562         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8563
8564         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8565         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8566         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8567
8568         for (i = 0; i < rdev->usec_timeout; i++) {
8569                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8570                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8571                         break;
8572                 udelay(1);
8573         }
8574 }
8575
8576 static void cik_program_aspm(struct radeon_device *rdev)
8577 {
8578         u32 data, orig;
8579         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8580         bool disable_clkreq = false;
8581
8582         if (radeon_aspm == 0)
8583                 return;
8584
8585         /* XXX double check IGPs */
8586         if (rdev->flags & RADEON_IS_IGP)
8587                 return;
8588
8589         if (!(rdev->flags & RADEON_IS_PCIE))
8590                 return;
8591
8592         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8593         data &= ~LC_XMIT_N_FTS_MASK;
8594         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8595         if (orig != data)
8596                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8597
8598         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8599         data |= LC_GO_TO_RECOVERY;
8600         if (orig != data)
8601                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8602
8603         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8604         data |= P_IGNORE_EDB_ERR;
8605         if (orig != data)
8606                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8607
8608         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8609         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8610         data |= LC_PMI_TO_L1_DIS;
8611         if (!disable_l0s)
8612                 data |= LC_L0S_INACTIVITY(7);
8613
8614         if (!disable_l1) {
8615                 data |= LC_L1_INACTIVITY(7);
8616                 data &= ~LC_PMI_TO_L1_DIS;
8617                 if (orig != data)
8618                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8619
8620                 if (!disable_plloff_in_l1) {
8621                         bool clk_req_support;
8622
8623                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8624                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8625                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8626                         if (orig != data)
8627                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8628
8629                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8630                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8631                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8632                         if (orig != data)
8633                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8634
8635                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8636                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8637                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8638                         if (orig != data)
8639                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8640
8641                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8642                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8643                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8644                         if (orig != data)
8645                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8646
8647                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8648                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8649                         data |= LC_DYN_LANES_PWR_STATE(3);
8650                         if (orig != data)
8651                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8652
8653                         if (!disable_clkreq) {
8654                                 struct pci_dev *root = rdev->pdev->bus->self;
8655                                 u32 lnkcap;
8656
8657                                 clk_req_support = false;
8658                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8659                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8660                                         clk_req_support = true;
8661                         } else {
8662                                 clk_req_support = false;
8663                         }
8664
8665                         if (clk_req_support) {
8666                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8667                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8668                                 if (orig != data)
8669                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8670
8671                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8672                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8673                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8674                                 if (orig != data)
8675                                         WREG32_SMC(THM_CLK_CNTL, data);
8676
8677                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8678                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8679                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8680                                 if (orig != data)
8681                                         WREG32_SMC(MISC_CLK_CTRL, data);
8682
8683                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8684                                 data &= ~BCLK_AS_XCLK;
8685                                 if (orig != data)
8686                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8687
8688                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8689                                 data &= ~FORCE_BIF_REFCLK_EN;
8690                                 if (orig != data)
8691                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8692
8693                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8694                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8695                                 data |= MPLL_CLKOUT_SEL(4);
8696                                 if (orig != data)
8697                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8698                         }
8699                 }
8700         } else {
8701                 if (orig != data)
8702                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8703         }
8704
8705         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8706         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8707         if (orig != data)
8708                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8709
8710         if (!disable_l0s) {
8711                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8712                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8713                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8714                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8715                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8716                                 data &= ~LC_L0S_INACTIVITY_MASK;
8717                                 if (orig != data)
8718                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8719                         }
8720                 }
8721         }
8722 }