ARM: debug: qcom: add UART addresses to Kconfig help for APQ8084
[pandora-kernel.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
82                                          bool enable);
83 static void si_init_pg(struct radeon_device *rdev);
84 static void si_init_cg(struct radeon_device *rdev);
85 static void si_fini_pg(struct radeon_device *rdev);
86 static void si_fini_cg(struct radeon_device *rdev);
87 static void si_rlc_stop(struct radeon_device *rdev);
88
89 static const u32 verde_rlc_save_restore_register_list[] =
90 {
91         (0x8000 << 16) | (0x98f4 >> 2),
92         0x00000000,
93         (0x8040 << 16) | (0x98f4 >> 2),
94         0x00000000,
95         (0x8000 << 16) | (0xe80 >> 2),
96         0x00000000,
97         (0x8040 << 16) | (0xe80 >> 2),
98         0x00000000,
99         (0x8000 << 16) | (0x89bc >> 2),
100         0x00000000,
101         (0x8040 << 16) | (0x89bc >> 2),
102         0x00000000,
103         (0x8000 << 16) | (0x8c1c >> 2),
104         0x00000000,
105         (0x8040 << 16) | (0x8c1c >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0x98f0 >> 2),
108         0x00000000,
109         (0x9c00 << 16) | (0xe7c >> 2),
110         0x00000000,
111         (0x8000 << 16) | (0x9148 >> 2),
112         0x00000000,
113         (0x8040 << 16) | (0x9148 >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0x9150 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x897c >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x8d8c >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0xac54 >> 2),
122         0X00000000,
123         0x3,
124         (0x9c00 << 16) | (0x98f8 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9910 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9914 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x9918 >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x991c >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9920 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9924 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x9928 >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x992c >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9930 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9934 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x9938 >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x993c >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9940 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9944 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x9948 >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x994c >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9950 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9954 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x9958 >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x995c >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9960 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9964 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x9968 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x996c >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9970 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9974 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x9978 >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x997c >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9980 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9984 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x9988 >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x998c >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c00 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x8c14 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x8c04 >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x8c08 >> 2),
197         0x00000000,
198         (0x8000 << 16) | (0x9b7c >> 2),
199         0x00000000,
200         (0x8040 << 16) | (0x9b7c >> 2),
201         0x00000000,
202         (0x8000 << 16) | (0xe84 >> 2),
203         0x00000000,
204         (0x8040 << 16) | (0xe84 >> 2),
205         0x00000000,
206         (0x8000 << 16) | (0x89c0 >> 2),
207         0x00000000,
208         (0x8040 << 16) | (0x89c0 >> 2),
209         0x00000000,
210         (0x8000 << 16) | (0x914c >> 2),
211         0x00000000,
212         (0x8040 << 16) | (0x914c >> 2),
213         0x00000000,
214         (0x8000 << 16) | (0x8c20 >> 2),
215         0x00000000,
216         (0x8040 << 16) | (0x8c20 >> 2),
217         0x00000000,
218         (0x8000 << 16) | (0x9354 >> 2),
219         0x00000000,
220         (0x8040 << 16) | (0x9354 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9060 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9364 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x9100 >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x913c >> 2),
229         0x00000000,
230         (0x8000 << 16) | (0x90e0 >> 2),
231         0x00000000,
232         (0x8000 << 16) | (0x90e4 >> 2),
233         0x00000000,
234         (0x8000 << 16) | (0x90e8 >> 2),
235         0x00000000,
236         (0x8040 << 16) | (0x90e0 >> 2),
237         0x00000000,
238         (0x8040 << 16) | (0x90e4 >> 2),
239         0x00000000,
240         (0x8040 << 16) | (0x90e8 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8bcc >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x8b24 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x88c4 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8e50 >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x8c0c >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x8e58 >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x8e5c >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x9508 >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0x950c >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0x9494 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac0c >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xac10 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0xac14 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0xae00 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0xac08 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x88d4 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x88c8 >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x88cc >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x89b0 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x8b10 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x8a14 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9830 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x9834 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x9838 >> 2),
289         0x00000000,
290         (0x9c00 << 16) | (0x9a10 >> 2),
291         0x00000000,
292         (0x8000 << 16) | (0x9870 >> 2),
293         0x00000000,
294         (0x8000 << 16) | (0x9874 >> 2),
295         0x00000000,
296         (0x8001 << 16) | (0x9870 >> 2),
297         0x00000000,
298         (0x8001 << 16) | (0x9874 >> 2),
299         0x00000000,
300         (0x8040 << 16) | (0x9870 >> 2),
301         0x00000000,
302         (0x8040 << 16) | (0x9874 >> 2),
303         0x00000000,
304         (0x8041 << 16) | (0x9870 >> 2),
305         0x00000000,
306         (0x8041 << 16) | (0x9874 >> 2),
307         0x00000000,
308         0x00000000
309 };
310
311 static const u32 tahiti_golden_rlc_registers[] =
312 {
313         0xc424, 0xffffffff, 0x00601005,
314         0xc47c, 0xffffffff, 0x10104040,
315         0xc488, 0xffffffff, 0x0100000a,
316         0xc314, 0xffffffff, 0x00000800,
317         0xc30c, 0xffffffff, 0x800000f4,
318         0xf4a8, 0xffffffff, 0x00000000
319 };
320
321 static const u32 tahiti_golden_registers[] =
322 {
323         0x9a10, 0x00010000, 0x00018208,
324         0x9830, 0xffffffff, 0x00000000,
325         0x9834, 0xf00fffff, 0x00000400,
326         0x9838, 0x0002021c, 0x00020200,
327         0xc78, 0x00000080, 0x00000000,
328         0xd030, 0x000300c0, 0x00800040,
329         0xd830, 0x000300c0, 0x00800040,
330         0x5bb0, 0x000000f0, 0x00000070,
331         0x5bc0, 0x00200000, 0x50100000,
332         0x7030, 0x31000311, 0x00000011,
333         0x277c, 0x00000003, 0x000007ff,
334         0x240c, 0x000007ff, 0x00000000,
335         0x8a14, 0xf000001f, 0x00000007,
336         0x8b24, 0xffffffff, 0x00ffffff,
337         0x8b10, 0x0000ff0f, 0x00000000,
338         0x28a4c, 0x07ffffff, 0x4e000000,
339         0x28350, 0x3f3f3fff, 0x2a00126a,
340         0x30, 0x000000ff, 0x0040,
341         0x34, 0x00000040, 0x00004040,
342         0x9100, 0x07ffffff, 0x03000000,
343         0x8e88, 0x01ff1f3f, 0x00000000,
344         0x8e84, 0x01ff1f3f, 0x00000000,
345         0x9060, 0x0000007f, 0x00000020,
346         0x9508, 0x00010000, 0x00010000,
347         0xac14, 0x00000200, 0x000002fb,
348         0xac10, 0xffffffff, 0x0000543b,
349         0xac0c, 0xffffffff, 0xa9210876,
350         0x88d0, 0xffffffff, 0x000fff40,
351         0x88d4, 0x0000001f, 0x00000010,
352         0x1410, 0x20000000, 0x20fffed8,
353         0x15c0, 0x000c0fc0, 0x000c0400
354 };
355
356 static const u32 tahiti_golden_registers2[] =
357 {
358         0xc64, 0x00000001, 0x00000001
359 };
360
361 static const u32 pitcairn_golden_rlc_registers[] =
362 {
363         0xc424, 0xffffffff, 0x00601004,
364         0xc47c, 0xffffffff, 0x10102020,
365         0xc488, 0xffffffff, 0x01000020,
366         0xc314, 0xffffffff, 0x00000800,
367         0xc30c, 0xffffffff, 0x800000a4
368 };
369
370 static const u32 pitcairn_golden_registers[] =
371 {
372         0x9a10, 0x00010000, 0x00018208,
373         0x9830, 0xffffffff, 0x00000000,
374         0x9834, 0xf00fffff, 0x00000400,
375         0x9838, 0x0002021c, 0x00020200,
376         0xc78, 0x00000080, 0x00000000,
377         0xd030, 0x000300c0, 0x00800040,
378         0xd830, 0x000300c0, 0x00800040,
379         0x5bb0, 0x000000f0, 0x00000070,
380         0x5bc0, 0x00200000, 0x50100000,
381         0x7030, 0x31000311, 0x00000011,
382         0x2ae4, 0x00073ffe, 0x000022a2,
383         0x240c, 0x000007ff, 0x00000000,
384         0x8a14, 0xf000001f, 0x00000007,
385         0x8b24, 0xffffffff, 0x00ffffff,
386         0x8b10, 0x0000ff0f, 0x00000000,
387         0x28a4c, 0x07ffffff, 0x4e000000,
388         0x28350, 0x3f3f3fff, 0x2a00126a,
389         0x30, 0x000000ff, 0x0040,
390         0x34, 0x00000040, 0x00004040,
391         0x9100, 0x07ffffff, 0x03000000,
392         0x9060, 0x0000007f, 0x00000020,
393         0x9508, 0x00010000, 0x00010000,
394         0xac14, 0x000003ff, 0x000000f7,
395         0xac10, 0xffffffff, 0x00000000,
396         0xac0c, 0xffffffff, 0x32761054,
397         0x88d4, 0x0000001f, 0x00000010,
398         0x15c0, 0x000c0fc0, 0x000c0400
399 };
400
401 static const u32 verde_golden_rlc_registers[] =
402 {
403         0xc424, 0xffffffff, 0x033f1005,
404         0xc47c, 0xffffffff, 0x10808020,
405         0xc488, 0xffffffff, 0x00800008,
406         0xc314, 0xffffffff, 0x00001000,
407         0xc30c, 0xffffffff, 0x80010014
408 };
409
410 static const u32 verde_golden_registers[] =
411 {
412         0x9a10, 0x00010000, 0x00018208,
413         0x9830, 0xffffffff, 0x00000000,
414         0x9834, 0xf00fffff, 0x00000400,
415         0x9838, 0x0002021c, 0x00020200,
416         0xc78, 0x00000080, 0x00000000,
417         0xd030, 0x000300c0, 0x00800040,
418         0xd030, 0x000300c0, 0x00800040,
419         0xd830, 0x000300c0, 0x00800040,
420         0xd830, 0x000300c0, 0x00800040,
421         0x5bb0, 0x000000f0, 0x00000070,
422         0x5bc0, 0x00200000, 0x50100000,
423         0x7030, 0x31000311, 0x00000011,
424         0x2ae4, 0x00073ffe, 0x000022a2,
425         0x2ae4, 0x00073ffe, 0x000022a2,
426         0x2ae4, 0x00073ffe, 0x000022a2,
427         0x240c, 0x000007ff, 0x00000000,
428         0x240c, 0x000007ff, 0x00000000,
429         0x240c, 0x000007ff, 0x00000000,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8a14, 0xf000001f, 0x00000007,
432         0x8a14, 0xf000001f, 0x00000007,
433         0x8b24, 0xffffffff, 0x00ffffff,
434         0x8b10, 0x0000ff0f, 0x00000000,
435         0x28a4c, 0x07ffffff, 0x4e000000,
436         0x28350, 0x3f3f3fff, 0x0000124a,
437         0x28350, 0x3f3f3fff, 0x0000124a,
438         0x28350, 0x3f3f3fff, 0x0000124a,
439         0x30, 0x000000ff, 0x0040,
440         0x34, 0x00000040, 0x00004040,
441         0x9100, 0x07ffffff, 0x03000000,
442         0x9100, 0x07ffffff, 0x03000000,
443         0x8e88, 0x01ff1f3f, 0x00000000,
444         0x8e88, 0x01ff1f3f, 0x00000000,
445         0x8e88, 0x01ff1f3f, 0x00000000,
446         0x8e84, 0x01ff1f3f, 0x00000000,
447         0x8e84, 0x01ff1f3f, 0x00000000,
448         0x8e84, 0x01ff1f3f, 0x00000000,
449         0x9060, 0x0000007f, 0x00000020,
450         0x9508, 0x00010000, 0x00010000,
451         0xac14, 0x000003ff, 0x00000003,
452         0xac14, 0x000003ff, 0x00000003,
453         0xac14, 0x000003ff, 0x00000003,
454         0xac10, 0xffffffff, 0x00000000,
455         0xac10, 0xffffffff, 0x00000000,
456         0xac10, 0xffffffff, 0x00000000,
457         0xac0c, 0xffffffff, 0x00001032,
458         0xac0c, 0xffffffff, 0x00001032,
459         0xac0c, 0xffffffff, 0x00001032,
460         0x88d4, 0x0000001f, 0x00000010,
461         0x88d4, 0x0000001f, 0x00000010,
462         0x88d4, 0x0000001f, 0x00000010,
463         0x15c0, 0x000c0fc0, 0x000c0400
464 };
465
466 static const u32 oland_golden_rlc_registers[] =
467 {
468         0xc424, 0xffffffff, 0x00601005,
469         0xc47c, 0xffffffff, 0x10104040,
470         0xc488, 0xffffffff, 0x0100000a,
471         0xc314, 0xffffffff, 0x00000800,
472         0xc30c, 0xffffffff, 0x800000f4
473 };
474
475 static const u32 oland_golden_registers[] =
476 {
477         0x9a10, 0x00010000, 0x00018208,
478         0x9830, 0xffffffff, 0x00000000,
479         0x9834, 0xf00fffff, 0x00000400,
480         0x9838, 0x0002021c, 0x00020200,
481         0xc78, 0x00000080, 0x00000000,
482         0xd030, 0x000300c0, 0x00800040,
483         0xd830, 0x000300c0, 0x00800040,
484         0x5bb0, 0x000000f0, 0x00000070,
485         0x5bc0, 0x00200000, 0x50100000,
486         0x7030, 0x31000311, 0x00000011,
487         0x2ae4, 0x00073ffe, 0x000022a2,
488         0x240c, 0x000007ff, 0x00000000,
489         0x8a14, 0xf000001f, 0x00000007,
490         0x8b24, 0xffffffff, 0x00ffffff,
491         0x8b10, 0x0000ff0f, 0x00000000,
492         0x28a4c, 0x07ffffff, 0x4e000000,
493         0x28350, 0x3f3f3fff, 0x00000082,
494         0x30, 0x000000ff, 0x0040,
495         0x34, 0x00000040, 0x00004040,
496         0x9100, 0x07ffffff, 0x03000000,
497         0x9060, 0x0000007f, 0x00000020,
498         0x9508, 0x00010000, 0x00010000,
499         0xac14, 0x000003ff, 0x000000f3,
500         0xac10, 0xffffffff, 0x00000000,
501         0xac0c, 0xffffffff, 0x00003210,
502         0x88d4, 0x0000001f, 0x00000010,
503         0x15c0, 0x000c0fc0, 0x000c0400
504 };
505
506 static const u32 hainan_golden_registers[] =
507 {
508         0x9a10, 0x00010000, 0x00018208,
509         0x9830, 0xffffffff, 0x00000000,
510         0x9834, 0xf00fffff, 0x00000400,
511         0x9838, 0x0002021c, 0x00020200,
512         0xd0c0, 0xff000fff, 0x00000100,
513         0xd030, 0x000300c0, 0x00800040,
514         0xd8c0, 0xff000fff, 0x00000100,
515         0xd830, 0x000300c0, 0x00800040,
516         0x2ae4, 0x00073ffe, 0x000022a2,
517         0x240c, 0x000007ff, 0x00000000,
518         0x8a14, 0xf000001f, 0x00000007,
519         0x8b24, 0xffffffff, 0x00ffffff,
520         0x8b10, 0x0000ff0f, 0x00000000,
521         0x28a4c, 0x07ffffff, 0x4e000000,
522         0x28350, 0x3f3f3fff, 0x00000000,
523         0x30, 0x000000ff, 0x0040,
524         0x34, 0x00000040, 0x00004040,
525         0x9100, 0x03e00000, 0x03600000,
526         0x9060, 0x0000007f, 0x00000020,
527         0x9508, 0x00010000, 0x00010000,
528         0xac14, 0x000003ff, 0x000000f1,
529         0xac10, 0xffffffff, 0x00000000,
530         0xac0c, 0xffffffff, 0x00003210,
531         0x88d4, 0x0000001f, 0x00000010,
532         0x15c0, 0x000c0fc0, 0x000c0400
533 };
534
535 static const u32 hainan_golden_registers2[] =
536 {
537         0x98f8, 0xffffffff, 0x02010001
538 };
539
540 static const u32 tahiti_mgcg_cgcg_init[] =
541 {
542         0xc400, 0xffffffff, 0xfffffffc,
543         0x802c, 0xffffffff, 0xe0000000,
544         0x9a60, 0xffffffff, 0x00000100,
545         0x92a4, 0xffffffff, 0x00000100,
546         0xc164, 0xffffffff, 0x00000100,
547         0x9774, 0xffffffff, 0x00000100,
548         0x8984, 0xffffffff, 0x06000100,
549         0x8a18, 0xffffffff, 0x00000100,
550         0x92a0, 0xffffffff, 0x00000100,
551         0xc380, 0xffffffff, 0x00000100,
552         0x8b28, 0xffffffff, 0x00000100,
553         0x9144, 0xffffffff, 0x00000100,
554         0x8d88, 0xffffffff, 0x00000100,
555         0x8d8c, 0xffffffff, 0x00000100,
556         0x9030, 0xffffffff, 0x00000100,
557         0x9034, 0xffffffff, 0x00000100,
558         0x9038, 0xffffffff, 0x00000100,
559         0x903c, 0xffffffff, 0x00000100,
560         0xad80, 0xffffffff, 0x00000100,
561         0xac54, 0xffffffff, 0x00000100,
562         0x897c, 0xffffffff, 0x06000100,
563         0x9868, 0xffffffff, 0x00000100,
564         0x9510, 0xffffffff, 0x00000100,
565         0xaf04, 0xffffffff, 0x00000100,
566         0xae04, 0xffffffff, 0x00000100,
567         0x949c, 0xffffffff, 0x00000100,
568         0x802c, 0xffffffff, 0xe0000000,
569         0x9160, 0xffffffff, 0x00010000,
570         0x9164, 0xffffffff, 0x00030002,
571         0x9168, 0xffffffff, 0x00040007,
572         0x916c, 0xffffffff, 0x00060005,
573         0x9170, 0xffffffff, 0x00090008,
574         0x9174, 0xffffffff, 0x00020001,
575         0x9178, 0xffffffff, 0x00040003,
576         0x917c, 0xffffffff, 0x00000007,
577         0x9180, 0xffffffff, 0x00060005,
578         0x9184, 0xffffffff, 0x00090008,
579         0x9188, 0xffffffff, 0x00030002,
580         0x918c, 0xffffffff, 0x00050004,
581         0x9190, 0xffffffff, 0x00000008,
582         0x9194, 0xffffffff, 0x00070006,
583         0x9198, 0xffffffff, 0x000a0009,
584         0x919c, 0xffffffff, 0x00040003,
585         0x91a0, 0xffffffff, 0x00060005,
586         0x91a4, 0xffffffff, 0x00000009,
587         0x91a8, 0xffffffff, 0x00080007,
588         0x91ac, 0xffffffff, 0x000b000a,
589         0x91b0, 0xffffffff, 0x00050004,
590         0x91b4, 0xffffffff, 0x00070006,
591         0x91b8, 0xffffffff, 0x0008000b,
592         0x91bc, 0xffffffff, 0x000a0009,
593         0x91c0, 0xffffffff, 0x000d000c,
594         0x91c4, 0xffffffff, 0x00060005,
595         0x91c8, 0xffffffff, 0x00080007,
596         0x91cc, 0xffffffff, 0x0000000b,
597         0x91d0, 0xffffffff, 0x000a0009,
598         0x91d4, 0xffffffff, 0x000d000c,
599         0x91d8, 0xffffffff, 0x00070006,
600         0x91dc, 0xffffffff, 0x00090008,
601         0x91e0, 0xffffffff, 0x0000000c,
602         0x91e4, 0xffffffff, 0x000b000a,
603         0x91e8, 0xffffffff, 0x000e000d,
604         0x91ec, 0xffffffff, 0x00080007,
605         0x91f0, 0xffffffff, 0x000a0009,
606         0x91f4, 0xffffffff, 0x0000000d,
607         0x91f8, 0xffffffff, 0x000c000b,
608         0x91fc, 0xffffffff, 0x000f000e,
609         0x9200, 0xffffffff, 0x00090008,
610         0x9204, 0xffffffff, 0x000b000a,
611         0x9208, 0xffffffff, 0x000c000f,
612         0x920c, 0xffffffff, 0x000e000d,
613         0x9210, 0xffffffff, 0x00110010,
614         0x9214, 0xffffffff, 0x000a0009,
615         0x9218, 0xffffffff, 0x000c000b,
616         0x921c, 0xffffffff, 0x0000000f,
617         0x9220, 0xffffffff, 0x000e000d,
618         0x9224, 0xffffffff, 0x00110010,
619         0x9228, 0xffffffff, 0x000b000a,
620         0x922c, 0xffffffff, 0x000d000c,
621         0x9230, 0xffffffff, 0x00000010,
622         0x9234, 0xffffffff, 0x000f000e,
623         0x9238, 0xffffffff, 0x00120011,
624         0x923c, 0xffffffff, 0x000c000b,
625         0x9240, 0xffffffff, 0x000e000d,
626         0x9244, 0xffffffff, 0x00000011,
627         0x9248, 0xffffffff, 0x0010000f,
628         0x924c, 0xffffffff, 0x00130012,
629         0x9250, 0xffffffff, 0x000d000c,
630         0x9254, 0xffffffff, 0x000f000e,
631         0x9258, 0xffffffff, 0x00100013,
632         0x925c, 0xffffffff, 0x00120011,
633         0x9260, 0xffffffff, 0x00150014,
634         0x9264, 0xffffffff, 0x000e000d,
635         0x9268, 0xffffffff, 0x0010000f,
636         0x926c, 0xffffffff, 0x00000013,
637         0x9270, 0xffffffff, 0x00120011,
638         0x9274, 0xffffffff, 0x00150014,
639         0x9278, 0xffffffff, 0x000f000e,
640         0x927c, 0xffffffff, 0x00110010,
641         0x9280, 0xffffffff, 0x00000014,
642         0x9284, 0xffffffff, 0x00130012,
643         0x9288, 0xffffffff, 0x00160015,
644         0x928c, 0xffffffff, 0x0010000f,
645         0x9290, 0xffffffff, 0x00120011,
646         0x9294, 0xffffffff, 0x00000015,
647         0x9298, 0xffffffff, 0x00140013,
648         0x929c, 0xffffffff, 0x00170016,
649         0x9150, 0xffffffff, 0x96940200,
650         0x8708, 0xffffffff, 0x00900100,
651         0xc478, 0xffffffff, 0x00000080,
652         0xc404, 0xffffffff, 0x0020003f,
653         0x30, 0xffffffff, 0x0000001c,
654         0x34, 0x000f0000, 0x000f0000,
655         0x160c, 0xffffffff, 0x00000100,
656         0x1024, 0xffffffff, 0x00000100,
657         0x102c, 0x00000101, 0x00000000,
658         0x20a8, 0xffffffff, 0x00000104,
659         0x264c, 0x000c0000, 0x000c0000,
660         0x2648, 0x000c0000, 0x000c0000,
661         0x55e4, 0xff000fff, 0x00000100,
662         0x55e8, 0x00000001, 0x00000001,
663         0x2f50, 0x00000001, 0x00000001,
664         0x30cc, 0xc0000fff, 0x00000104,
665         0xc1e4, 0x00000001, 0x00000001,
666         0xd0c0, 0xfffffff0, 0x00000100,
667         0xd8c0, 0xfffffff0, 0x00000100
668 };
669
670 static const u32 pitcairn_mgcg_cgcg_init[] =
671 {
672         0xc400, 0xffffffff, 0xfffffffc,
673         0x802c, 0xffffffff, 0xe0000000,
674         0x9a60, 0xffffffff, 0x00000100,
675         0x92a4, 0xffffffff, 0x00000100,
676         0xc164, 0xffffffff, 0x00000100,
677         0x9774, 0xffffffff, 0x00000100,
678         0x8984, 0xffffffff, 0x06000100,
679         0x8a18, 0xffffffff, 0x00000100,
680         0x92a0, 0xffffffff, 0x00000100,
681         0xc380, 0xffffffff, 0x00000100,
682         0x8b28, 0xffffffff, 0x00000100,
683         0x9144, 0xffffffff, 0x00000100,
684         0x8d88, 0xffffffff, 0x00000100,
685         0x8d8c, 0xffffffff, 0x00000100,
686         0x9030, 0xffffffff, 0x00000100,
687         0x9034, 0xffffffff, 0x00000100,
688         0x9038, 0xffffffff, 0x00000100,
689         0x903c, 0xffffffff, 0x00000100,
690         0xad80, 0xffffffff, 0x00000100,
691         0xac54, 0xffffffff, 0x00000100,
692         0x897c, 0xffffffff, 0x06000100,
693         0x9868, 0xffffffff, 0x00000100,
694         0x9510, 0xffffffff, 0x00000100,
695         0xaf04, 0xffffffff, 0x00000100,
696         0xae04, 0xffffffff, 0x00000100,
697         0x949c, 0xffffffff, 0x00000100,
698         0x802c, 0xffffffff, 0xe0000000,
699         0x9160, 0xffffffff, 0x00010000,
700         0x9164, 0xffffffff, 0x00030002,
701         0x9168, 0xffffffff, 0x00040007,
702         0x916c, 0xffffffff, 0x00060005,
703         0x9170, 0xffffffff, 0x00090008,
704         0x9174, 0xffffffff, 0x00020001,
705         0x9178, 0xffffffff, 0x00040003,
706         0x917c, 0xffffffff, 0x00000007,
707         0x9180, 0xffffffff, 0x00060005,
708         0x9184, 0xffffffff, 0x00090008,
709         0x9188, 0xffffffff, 0x00030002,
710         0x918c, 0xffffffff, 0x00050004,
711         0x9190, 0xffffffff, 0x00000008,
712         0x9194, 0xffffffff, 0x00070006,
713         0x9198, 0xffffffff, 0x000a0009,
714         0x919c, 0xffffffff, 0x00040003,
715         0x91a0, 0xffffffff, 0x00060005,
716         0x91a4, 0xffffffff, 0x00000009,
717         0x91a8, 0xffffffff, 0x00080007,
718         0x91ac, 0xffffffff, 0x000b000a,
719         0x91b0, 0xffffffff, 0x00050004,
720         0x91b4, 0xffffffff, 0x00070006,
721         0x91b8, 0xffffffff, 0x0008000b,
722         0x91bc, 0xffffffff, 0x000a0009,
723         0x91c0, 0xffffffff, 0x000d000c,
724         0x9200, 0xffffffff, 0x00090008,
725         0x9204, 0xffffffff, 0x000b000a,
726         0x9208, 0xffffffff, 0x000c000f,
727         0x920c, 0xffffffff, 0x000e000d,
728         0x9210, 0xffffffff, 0x00110010,
729         0x9214, 0xffffffff, 0x000a0009,
730         0x9218, 0xffffffff, 0x000c000b,
731         0x921c, 0xffffffff, 0x0000000f,
732         0x9220, 0xffffffff, 0x000e000d,
733         0x9224, 0xffffffff, 0x00110010,
734         0x9228, 0xffffffff, 0x000b000a,
735         0x922c, 0xffffffff, 0x000d000c,
736         0x9230, 0xffffffff, 0x00000010,
737         0x9234, 0xffffffff, 0x000f000e,
738         0x9238, 0xffffffff, 0x00120011,
739         0x923c, 0xffffffff, 0x000c000b,
740         0x9240, 0xffffffff, 0x000e000d,
741         0x9244, 0xffffffff, 0x00000011,
742         0x9248, 0xffffffff, 0x0010000f,
743         0x924c, 0xffffffff, 0x00130012,
744         0x9250, 0xffffffff, 0x000d000c,
745         0x9254, 0xffffffff, 0x000f000e,
746         0x9258, 0xffffffff, 0x00100013,
747         0x925c, 0xffffffff, 0x00120011,
748         0x9260, 0xffffffff, 0x00150014,
749         0x9150, 0xffffffff, 0x96940200,
750         0x8708, 0xffffffff, 0x00900100,
751         0xc478, 0xffffffff, 0x00000080,
752         0xc404, 0xffffffff, 0x0020003f,
753         0x30, 0xffffffff, 0x0000001c,
754         0x34, 0x000f0000, 0x000f0000,
755         0x160c, 0xffffffff, 0x00000100,
756         0x1024, 0xffffffff, 0x00000100,
757         0x102c, 0x00000101, 0x00000000,
758         0x20a8, 0xffffffff, 0x00000104,
759         0x55e4, 0xff000fff, 0x00000100,
760         0x55e8, 0x00000001, 0x00000001,
761         0x2f50, 0x00000001, 0x00000001,
762         0x30cc, 0xc0000fff, 0x00000104,
763         0xc1e4, 0x00000001, 0x00000001,
764         0xd0c0, 0xfffffff0, 0x00000100,
765         0xd8c0, 0xfffffff0, 0x00000100
766 };
767
768 static const u32 verde_mgcg_cgcg_init[] =
769 {
770         0xc400, 0xffffffff, 0xfffffffc,
771         0x802c, 0xffffffff, 0xe0000000,
772         0x9a60, 0xffffffff, 0x00000100,
773         0x92a4, 0xffffffff, 0x00000100,
774         0xc164, 0xffffffff, 0x00000100,
775         0x9774, 0xffffffff, 0x00000100,
776         0x8984, 0xffffffff, 0x06000100,
777         0x8a18, 0xffffffff, 0x00000100,
778         0x92a0, 0xffffffff, 0x00000100,
779         0xc380, 0xffffffff, 0x00000100,
780         0x8b28, 0xffffffff, 0x00000100,
781         0x9144, 0xffffffff, 0x00000100,
782         0x8d88, 0xffffffff, 0x00000100,
783         0x8d8c, 0xffffffff, 0x00000100,
784         0x9030, 0xffffffff, 0x00000100,
785         0x9034, 0xffffffff, 0x00000100,
786         0x9038, 0xffffffff, 0x00000100,
787         0x903c, 0xffffffff, 0x00000100,
788         0xad80, 0xffffffff, 0x00000100,
789         0xac54, 0xffffffff, 0x00000100,
790         0x897c, 0xffffffff, 0x06000100,
791         0x9868, 0xffffffff, 0x00000100,
792         0x9510, 0xffffffff, 0x00000100,
793         0xaf04, 0xffffffff, 0x00000100,
794         0xae04, 0xffffffff, 0x00000100,
795         0x949c, 0xffffffff, 0x00000100,
796         0x802c, 0xffffffff, 0xe0000000,
797         0x9160, 0xffffffff, 0x00010000,
798         0x9164, 0xffffffff, 0x00030002,
799         0x9168, 0xffffffff, 0x00040007,
800         0x916c, 0xffffffff, 0x00060005,
801         0x9170, 0xffffffff, 0x00090008,
802         0x9174, 0xffffffff, 0x00020001,
803         0x9178, 0xffffffff, 0x00040003,
804         0x917c, 0xffffffff, 0x00000007,
805         0x9180, 0xffffffff, 0x00060005,
806         0x9184, 0xffffffff, 0x00090008,
807         0x9188, 0xffffffff, 0x00030002,
808         0x918c, 0xffffffff, 0x00050004,
809         0x9190, 0xffffffff, 0x00000008,
810         0x9194, 0xffffffff, 0x00070006,
811         0x9198, 0xffffffff, 0x000a0009,
812         0x919c, 0xffffffff, 0x00040003,
813         0x91a0, 0xffffffff, 0x00060005,
814         0x91a4, 0xffffffff, 0x00000009,
815         0x91a8, 0xffffffff, 0x00080007,
816         0x91ac, 0xffffffff, 0x000b000a,
817         0x91b0, 0xffffffff, 0x00050004,
818         0x91b4, 0xffffffff, 0x00070006,
819         0x91b8, 0xffffffff, 0x0008000b,
820         0x91bc, 0xffffffff, 0x000a0009,
821         0x91c0, 0xffffffff, 0x000d000c,
822         0x9200, 0xffffffff, 0x00090008,
823         0x9204, 0xffffffff, 0x000b000a,
824         0x9208, 0xffffffff, 0x000c000f,
825         0x920c, 0xffffffff, 0x000e000d,
826         0x9210, 0xffffffff, 0x00110010,
827         0x9214, 0xffffffff, 0x000a0009,
828         0x9218, 0xffffffff, 0x000c000b,
829         0x921c, 0xffffffff, 0x0000000f,
830         0x9220, 0xffffffff, 0x000e000d,
831         0x9224, 0xffffffff, 0x00110010,
832         0x9228, 0xffffffff, 0x000b000a,
833         0x922c, 0xffffffff, 0x000d000c,
834         0x9230, 0xffffffff, 0x00000010,
835         0x9234, 0xffffffff, 0x000f000e,
836         0x9238, 0xffffffff, 0x00120011,
837         0x923c, 0xffffffff, 0x000c000b,
838         0x9240, 0xffffffff, 0x000e000d,
839         0x9244, 0xffffffff, 0x00000011,
840         0x9248, 0xffffffff, 0x0010000f,
841         0x924c, 0xffffffff, 0x00130012,
842         0x9250, 0xffffffff, 0x000d000c,
843         0x9254, 0xffffffff, 0x000f000e,
844         0x9258, 0xffffffff, 0x00100013,
845         0x925c, 0xffffffff, 0x00120011,
846         0x9260, 0xffffffff, 0x00150014,
847         0x9150, 0xffffffff, 0x96940200,
848         0x8708, 0xffffffff, 0x00900100,
849         0xc478, 0xffffffff, 0x00000080,
850         0xc404, 0xffffffff, 0x0020003f,
851         0x30, 0xffffffff, 0x0000001c,
852         0x34, 0x000f0000, 0x000f0000,
853         0x160c, 0xffffffff, 0x00000100,
854         0x1024, 0xffffffff, 0x00000100,
855         0x102c, 0x00000101, 0x00000000,
856         0x20a8, 0xffffffff, 0x00000104,
857         0x264c, 0x000c0000, 0x000c0000,
858         0x2648, 0x000c0000, 0x000c0000,
859         0x55e4, 0xff000fff, 0x00000100,
860         0x55e8, 0x00000001, 0x00000001,
861         0x2f50, 0x00000001, 0x00000001,
862         0x30cc, 0xc0000fff, 0x00000104,
863         0xc1e4, 0x00000001, 0x00000001,
864         0xd0c0, 0xfffffff0, 0x00000100,
865         0xd8c0, 0xfffffff0, 0x00000100
866 };
867
868 static const u32 oland_mgcg_cgcg_init[] =
869 {
870         0xc400, 0xffffffff, 0xfffffffc,
871         0x802c, 0xffffffff, 0xe0000000,
872         0x9a60, 0xffffffff, 0x00000100,
873         0x92a4, 0xffffffff, 0x00000100,
874         0xc164, 0xffffffff, 0x00000100,
875         0x9774, 0xffffffff, 0x00000100,
876         0x8984, 0xffffffff, 0x06000100,
877         0x8a18, 0xffffffff, 0x00000100,
878         0x92a0, 0xffffffff, 0x00000100,
879         0xc380, 0xffffffff, 0x00000100,
880         0x8b28, 0xffffffff, 0x00000100,
881         0x9144, 0xffffffff, 0x00000100,
882         0x8d88, 0xffffffff, 0x00000100,
883         0x8d8c, 0xffffffff, 0x00000100,
884         0x9030, 0xffffffff, 0x00000100,
885         0x9034, 0xffffffff, 0x00000100,
886         0x9038, 0xffffffff, 0x00000100,
887         0x903c, 0xffffffff, 0x00000100,
888         0xad80, 0xffffffff, 0x00000100,
889         0xac54, 0xffffffff, 0x00000100,
890         0x897c, 0xffffffff, 0x06000100,
891         0x9868, 0xffffffff, 0x00000100,
892         0x9510, 0xffffffff, 0x00000100,
893         0xaf04, 0xffffffff, 0x00000100,
894         0xae04, 0xffffffff, 0x00000100,
895         0x949c, 0xffffffff, 0x00000100,
896         0x802c, 0xffffffff, 0xe0000000,
897         0x9160, 0xffffffff, 0x00010000,
898         0x9164, 0xffffffff, 0x00030002,
899         0x9168, 0xffffffff, 0x00040007,
900         0x916c, 0xffffffff, 0x00060005,
901         0x9170, 0xffffffff, 0x00090008,
902         0x9174, 0xffffffff, 0x00020001,
903         0x9178, 0xffffffff, 0x00040003,
904         0x917c, 0xffffffff, 0x00000007,
905         0x9180, 0xffffffff, 0x00060005,
906         0x9184, 0xffffffff, 0x00090008,
907         0x9188, 0xffffffff, 0x00030002,
908         0x918c, 0xffffffff, 0x00050004,
909         0x9190, 0xffffffff, 0x00000008,
910         0x9194, 0xffffffff, 0x00070006,
911         0x9198, 0xffffffff, 0x000a0009,
912         0x919c, 0xffffffff, 0x00040003,
913         0x91a0, 0xffffffff, 0x00060005,
914         0x91a4, 0xffffffff, 0x00000009,
915         0x91a8, 0xffffffff, 0x00080007,
916         0x91ac, 0xffffffff, 0x000b000a,
917         0x91b0, 0xffffffff, 0x00050004,
918         0x91b4, 0xffffffff, 0x00070006,
919         0x91b8, 0xffffffff, 0x0008000b,
920         0x91bc, 0xffffffff, 0x000a0009,
921         0x91c0, 0xffffffff, 0x000d000c,
922         0x91c4, 0xffffffff, 0x00060005,
923         0x91c8, 0xffffffff, 0x00080007,
924         0x91cc, 0xffffffff, 0x0000000b,
925         0x91d0, 0xffffffff, 0x000a0009,
926         0x91d4, 0xffffffff, 0x000d000c,
927         0x9150, 0xffffffff, 0x96940200,
928         0x8708, 0xffffffff, 0x00900100,
929         0xc478, 0xffffffff, 0x00000080,
930         0xc404, 0xffffffff, 0x0020003f,
931         0x30, 0xffffffff, 0x0000001c,
932         0x34, 0x000f0000, 0x000f0000,
933         0x160c, 0xffffffff, 0x00000100,
934         0x1024, 0xffffffff, 0x00000100,
935         0x102c, 0x00000101, 0x00000000,
936         0x20a8, 0xffffffff, 0x00000104,
937         0x264c, 0x000c0000, 0x000c0000,
938         0x2648, 0x000c0000, 0x000c0000,
939         0x55e4, 0xff000fff, 0x00000100,
940         0x55e8, 0x00000001, 0x00000001,
941         0x2f50, 0x00000001, 0x00000001,
942         0x30cc, 0xc0000fff, 0x00000104,
943         0xc1e4, 0x00000001, 0x00000001,
944         0xd0c0, 0xfffffff0, 0x00000100,
945         0xd8c0, 0xfffffff0, 0x00000100
946 };
947
948 static const u32 hainan_mgcg_cgcg_init[] =
949 {
950         0xc400, 0xffffffff, 0xfffffffc,
951         0x802c, 0xffffffff, 0xe0000000,
952         0x9a60, 0xffffffff, 0x00000100,
953         0x92a4, 0xffffffff, 0x00000100,
954         0xc164, 0xffffffff, 0x00000100,
955         0x9774, 0xffffffff, 0x00000100,
956         0x8984, 0xffffffff, 0x06000100,
957         0x8a18, 0xffffffff, 0x00000100,
958         0x92a0, 0xffffffff, 0x00000100,
959         0xc380, 0xffffffff, 0x00000100,
960         0x8b28, 0xffffffff, 0x00000100,
961         0x9144, 0xffffffff, 0x00000100,
962         0x8d88, 0xffffffff, 0x00000100,
963         0x8d8c, 0xffffffff, 0x00000100,
964         0x9030, 0xffffffff, 0x00000100,
965         0x9034, 0xffffffff, 0x00000100,
966         0x9038, 0xffffffff, 0x00000100,
967         0x903c, 0xffffffff, 0x00000100,
968         0xad80, 0xffffffff, 0x00000100,
969         0xac54, 0xffffffff, 0x00000100,
970         0x897c, 0xffffffff, 0x06000100,
971         0x9868, 0xffffffff, 0x00000100,
972         0x9510, 0xffffffff, 0x00000100,
973         0xaf04, 0xffffffff, 0x00000100,
974         0xae04, 0xffffffff, 0x00000100,
975         0x949c, 0xffffffff, 0x00000100,
976         0x802c, 0xffffffff, 0xe0000000,
977         0x9160, 0xffffffff, 0x00010000,
978         0x9164, 0xffffffff, 0x00030002,
979         0x9168, 0xffffffff, 0x00040007,
980         0x916c, 0xffffffff, 0x00060005,
981         0x9170, 0xffffffff, 0x00090008,
982         0x9174, 0xffffffff, 0x00020001,
983         0x9178, 0xffffffff, 0x00040003,
984         0x917c, 0xffffffff, 0x00000007,
985         0x9180, 0xffffffff, 0x00060005,
986         0x9184, 0xffffffff, 0x00090008,
987         0x9188, 0xffffffff, 0x00030002,
988         0x918c, 0xffffffff, 0x00050004,
989         0x9190, 0xffffffff, 0x00000008,
990         0x9194, 0xffffffff, 0x00070006,
991         0x9198, 0xffffffff, 0x000a0009,
992         0x919c, 0xffffffff, 0x00040003,
993         0x91a0, 0xffffffff, 0x00060005,
994         0x91a4, 0xffffffff, 0x00000009,
995         0x91a8, 0xffffffff, 0x00080007,
996         0x91ac, 0xffffffff, 0x000b000a,
997         0x91b0, 0xffffffff, 0x00050004,
998         0x91b4, 0xffffffff, 0x00070006,
999         0x91b8, 0xffffffff, 0x0008000b,
1000         0x91bc, 0xffffffff, 0x000a0009,
1001         0x91c0, 0xffffffff, 0x000d000c,
1002         0x91c4, 0xffffffff, 0x00060005,
1003         0x91c8, 0xffffffff, 0x00080007,
1004         0x91cc, 0xffffffff, 0x0000000b,
1005         0x91d0, 0xffffffff, 0x000a0009,
1006         0x91d4, 0xffffffff, 0x000d000c,
1007         0x9150, 0xffffffff, 0x96940200,
1008         0x8708, 0xffffffff, 0x00900100,
1009         0xc478, 0xffffffff, 0x00000080,
1010         0xc404, 0xffffffff, 0x0020003f,
1011         0x30, 0xffffffff, 0x0000001c,
1012         0x34, 0x000f0000, 0x000f0000,
1013         0x160c, 0xffffffff, 0x00000100,
1014         0x1024, 0xffffffff, 0x00000100,
1015         0x20a8, 0xffffffff, 0x00000104,
1016         0x264c, 0x000c0000, 0x000c0000,
1017         0x2648, 0x000c0000, 0x000c0000,
1018         0x2f50, 0x00000001, 0x00000001,
1019         0x30cc, 0xc0000fff, 0x00000104,
1020         0xc1e4, 0x00000001, 0x00000001,
1021         0xd0c0, 0xfffffff0, 0x00000100,
1022         0xd8c0, 0xfffffff0, 0x00000100
1023 };
1024
1025 static u32 verde_pg_init[] =
1026 {
1027         0x353c, 0xffffffff, 0x40000,
1028         0x3538, 0xffffffff, 0x200010ff,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x7007,
1035         0x3538, 0xffffffff, 0x300010ff,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x400000,
1042         0x3538, 0xffffffff, 0x100010ff,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x120200,
1049         0x3538, 0xffffffff, 0x500010ff,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x1e1e16,
1056         0x3538, 0xffffffff, 0x600010ff,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x171f1e,
1063         0x3538, 0xffffffff, 0x700010ff,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x0,
1068         0x353c, 0xffffffff, 0x0,
1069         0x353c, 0xffffffff, 0x0,
1070         0x3538, 0xffffffff, 0x9ff,
1071         0x3500, 0xffffffff, 0x0,
1072         0x3504, 0xffffffff, 0x10000800,
1073         0x3504, 0xffffffff, 0xf,
1074         0x3504, 0xffffffff, 0xf,
1075         0x3500, 0xffffffff, 0x4,
1076         0x3504, 0xffffffff, 0x1000051e,
1077         0x3504, 0xffffffff, 0xffff,
1078         0x3504, 0xffffffff, 0xffff,
1079         0x3500, 0xffffffff, 0x8,
1080         0x3504, 0xffffffff, 0x80500,
1081         0x3500, 0xffffffff, 0x12,
1082         0x3504, 0xffffffff, 0x9050c,
1083         0x3500, 0xffffffff, 0x1d,
1084         0x3504, 0xffffffff, 0xb052c,
1085         0x3500, 0xffffffff, 0x2a,
1086         0x3504, 0xffffffff, 0x1053e,
1087         0x3500, 0xffffffff, 0x2d,
1088         0x3504, 0xffffffff, 0x10546,
1089         0x3500, 0xffffffff, 0x30,
1090         0x3504, 0xffffffff, 0xa054e,
1091         0x3500, 0xffffffff, 0x3c,
1092         0x3504, 0xffffffff, 0x1055f,
1093         0x3500, 0xffffffff, 0x3f,
1094         0x3504, 0xffffffff, 0x10567,
1095         0x3500, 0xffffffff, 0x42,
1096         0x3504, 0xffffffff, 0x1056f,
1097         0x3500, 0xffffffff, 0x45,
1098         0x3504, 0xffffffff, 0x10572,
1099         0x3500, 0xffffffff, 0x48,
1100         0x3504, 0xffffffff, 0x20575,
1101         0x3500, 0xffffffff, 0x4c,
1102         0x3504, 0xffffffff, 0x190801,
1103         0x3500, 0xffffffff, 0x67,
1104         0x3504, 0xffffffff, 0x1082a,
1105         0x3500, 0xffffffff, 0x6a,
1106         0x3504, 0xffffffff, 0x1b082d,
1107         0x3500, 0xffffffff, 0x87,
1108         0x3504, 0xffffffff, 0x310851,
1109         0x3500, 0xffffffff, 0xba,
1110         0x3504, 0xffffffff, 0x891,
1111         0x3500, 0xffffffff, 0xbc,
1112         0x3504, 0xffffffff, 0x893,
1113         0x3500, 0xffffffff, 0xbe,
1114         0x3504, 0xffffffff, 0x20895,
1115         0x3500, 0xffffffff, 0xc2,
1116         0x3504, 0xffffffff, 0x20899,
1117         0x3500, 0xffffffff, 0xc6,
1118         0x3504, 0xffffffff, 0x2089d,
1119         0x3500, 0xffffffff, 0xca,
1120         0x3504, 0xffffffff, 0x8a1,
1121         0x3500, 0xffffffff, 0xcc,
1122         0x3504, 0xffffffff, 0x8a3,
1123         0x3500, 0xffffffff, 0xce,
1124         0x3504, 0xffffffff, 0x308a5,
1125         0x3500, 0xffffffff, 0xd3,
1126         0x3504, 0xffffffff, 0x6d08cd,
1127         0x3500, 0xffffffff, 0x142,
1128         0x3504, 0xffffffff, 0x2000095a,
1129         0x3504, 0xffffffff, 0x1,
1130         0x3500, 0xffffffff, 0x144,
1131         0x3504, 0xffffffff, 0x301f095b,
1132         0x3500, 0xffffffff, 0x165,
1133         0x3504, 0xffffffff, 0xc094d,
1134         0x3500, 0xffffffff, 0x173,
1135         0x3504, 0xffffffff, 0xf096d,
1136         0x3500, 0xffffffff, 0x184,
1137         0x3504, 0xffffffff, 0x15097f,
1138         0x3500, 0xffffffff, 0x19b,
1139         0x3504, 0xffffffff, 0xc0998,
1140         0x3500, 0xffffffff, 0x1a9,
1141         0x3504, 0xffffffff, 0x409a7,
1142         0x3500, 0xffffffff, 0x1af,
1143         0x3504, 0xffffffff, 0xcdc,
1144         0x3500, 0xffffffff, 0x1b1,
1145         0x3504, 0xffffffff, 0x800,
1146         0x3508, 0xffffffff, 0x6c9b2000,
1147         0x3510, 0xfc00, 0x2000,
1148         0x3544, 0xffffffff, 0xfc0,
1149         0x28d4, 0x00000100, 0x100
1150 };
1151
1152 static void si_init_golden_registers(struct radeon_device *rdev)
1153 {
1154         switch (rdev->family) {
1155         case CHIP_TAHITI:
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1159                 radeon_program_register_sequence(rdev,
1160                                                  tahiti_golden_rlc_registers,
1161                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1162                 radeon_program_register_sequence(rdev,
1163                                                  tahiti_mgcg_cgcg_init,
1164                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1165                 radeon_program_register_sequence(rdev,
1166                                                  tahiti_golden_registers2,
1167                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1168                 break;
1169         case CHIP_PITCAIRN:
1170                 radeon_program_register_sequence(rdev,
1171                                                  pitcairn_golden_registers,
1172                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1173                 radeon_program_register_sequence(rdev,
1174                                                  pitcairn_golden_rlc_registers,
1175                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1176                 radeon_program_register_sequence(rdev,
1177                                                  pitcairn_mgcg_cgcg_init,
1178                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1179                 break;
1180         case CHIP_VERDE:
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_golden_registers,
1183                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1184                 radeon_program_register_sequence(rdev,
1185                                                  verde_golden_rlc_registers,
1186                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1187                 radeon_program_register_sequence(rdev,
1188                                                  verde_mgcg_cgcg_init,
1189                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1190                 radeon_program_register_sequence(rdev,
1191                                                  verde_pg_init,
1192                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1193                 break;
1194         case CHIP_OLAND:
1195                 radeon_program_register_sequence(rdev,
1196                                                  oland_golden_registers,
1197                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1198                 radeon_program_register_sequence(rdev,
1199                                                  oland_golden_rlc_registers,
1200                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1201                 radeon_program_register_sequence(rdev,
1202                                                  oland_mgcg_cgcg_init,
1203                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1204                 break;
1205         case CHIP_HAINAN:
1206                 radeon_program_register_sequence(rdev,
1207                                                  hainan_golden_registers,
1208                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1209                 radeon_program_register_sequence(rdev,
1210                                                  hainan_golden_registers2,
1211                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1212                 radeon_program_register_sequence(rdev,
1213                                                  hainan_mgcg_cgcg_init,
1214                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1215                 break;
1216         default:
1217                 break;
1218         }
1219 }
1220
1221 #define PCIE_BUS_CLK                10000
1222 #define TCLK                        (PCIE_BUS_CLK / 10)
1223
1224 /**
1225  * si_get_xclk - get the xclk
1226  *
1227  * @rdev: radeon_device pointer
1228  *
1229  * Returns the reference clock used by the gfx engine
1230  * (SI).
1231  */
1232 u32 si_get_xclk(struct radeon_device *rdev)
1233 {
1234         u32 reference_clock = rdev->clock.spll.reference_freq;
1235         u32 tmp;
1236
1237         tmp = RREG32(CG_CLKPIN_CNTL_2);
1238         if (tmp & MUX_TCLK_TO_XCLK)
1239                 return TCLK;
1240
1241         tmp = RREG32(CG_CLKPIN_CNTL);
1242         if (tmp & XTALIN_DIVIDE)
1243                 return reference_clock / 4;
1244
1245         return reference_clock;
1246 }
1247
1248 /* get temperature in millidegrees */
1249 int si_get_temp(struct radeon_device *rdev)
1250 {
1251         u32 temp;
1252         int actual_temp = 0;
1253
1254         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1255                 CTF_TEMP_SHIFT;
1256
1257         if (temp & 0x200)
1258                 actual_temp = 255;
1259         else
1260                 actual_temp = temp & 0x1ff;
1261
1262         actual_temp = (actual_temp * 1000);
1263
1264         return actual_temp;
1265 }
1266
1267 #define TAHITI_IO_MC_REGS_SIZE 36
1268
1269 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1270         {0x0000006f, 0x03044000},
1271         {0x00000070, 0x0480c018},
1272         {0x00000071, 0x00000040},
1273         {0x00000072, 0x01000000},
1274         {0x00000074, 0x000000ff},
1275         {0x00000075, 0x00143400},
1276         {0x00000076, 0x08ec0800},
1277         {0x00000077, 0x040000cc},
1278         {0x00000079, 0x00000000},
1279         {0x0000007a, 0x21000409},
1280         {0x0000007c, 0x00000000},
1281         {0x0000007d, 0xe8000000},
1282         {0x0000007e, 0x044408a8},
1283         {0x0000007f, 0x00000003},
1284         {0x00000080, 0x00000000},
1285         {0x00000081, 0x01000000},
1286         {0x00000082, 0x02000000},
1287         {0x00000083, 0x00000000},
1288         {0x00000084, 0xe3f3e4f4},
1289         {0x00000085, 0x00052024},
1290         {0x00000087, 0x00000000},
1291         {0x00000088, 0x66036603},
1292         {0x00000089, 0x01000000},
1293         {0x0000008b, 0x1c0a0000},
1294         {0x0000008c, 0xff010000},
1295         {0x0000008e, 0xffffefff},
1296         {0x0000008f, 0xfff3efff},
1297         {0x00000090, 0xfff3efbf},
1298         {0x00000094, 0x00101101},
1299         {0x00000095, 0x00000fff},
1300         {0x00000096, 0x00116fff},
1301         {0x00000097, 0x60010000},
1302         {0x00000098, 0x10010000},
1303         {0x00000099, 0x00006000},
1304         {0x0000009a, 0x00001000},
1305         {0x0000009f, 0x00a77400}
1306 };
1307
1308 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1309         {0x0000006f, 0x03044000},
1310         {0x00000070, 0x0480c018},
1311         {0x00000071, 0x00000040},
1312         {0x00000072, 0x01000000},
1313         {0x00000074, 0x000000ff},
1314         {0x00000075, 0x00143400},
1315         {0x00000076, 0x08ec0800},
1316         {0x00000077, 0x040000cc},
1317         {0x00000079, 0x00000000},
1318         {0x0000007a, 0x21000409},
1319         {0x0000007c, 0x00000000},
1320         {0x0000007d, 0xe8000000},
1321         {0x0000007e, 0x044408a8},
1322         {0x0000007f, 0x00000003},
1323         {0x00000080, 0x00000000},
1324         {0x00000081, 0x01000000},
1325         {0x00000082, 0x02000000},
1326         {0x00000083, 0x00000000},
1327         {0x00000084, 0xe3f3e4f4},
1328         {0x00000085, 0x00052024},
1329         {0x00000087, 0x00000000},
1330         {0x00000088, 0x66036603},
1331         {0x00000089, 0x01000000},
1332         {0x0000008b, 0x1c0a0000},
1333         {0x0000008c, 0xff010000},
1334         {0x0000008e, 0xffffefff},
1335         {0x0000008f, 0xfff3efff},
1336         {0x00000090, 0xfff3efbf},
1337         {0x00000094, 0x00101101},
1338         {0x00000095, 0x00000fff},
1339         {0x00000096, 0x00116fff},
1340         {0x00000097, 0x60010000},
1341         {0x00000098, 0x10010000},
1342         {0x00000099, 0x00006000},
1343         {0x0000009a, 0x00001000},
1344         {0x0000009f, 0x00a47400}
1345 };
1346
1347 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1348         {0x0000006f, 0x03044000},
1349         {0x00000070, 0x0480c018},
1350         {0x00000071, 0x00000040},
1351         {0x00000072, 0x01000000},
1352         {0x00000074, 0x000000ff},
1353         {0x00000075, 0x00143400},
1354         {0x00000076, 0x08ec0800},
1355         {0x00000077, 0x040000cc},
1356         {0x00000079, 0x00000000},
1357         {0x0000007a, 0x21000409},
1358         {0x0000007c, 0x00000000},
1359         {0x0000007d, 0xe8000000},
1360         {0x0000007e, 0x044408a8},
1361         {0x0000007f, 0x00000003},
1362         {0x00000080, 0x00000000},
1363         {0x00000081, 0x01000000},
1364         {0x00000082, 0x02000000},
1365         {0x00000083, 0x00000000},
1366         {0x00000084, 0xe3f3e4f4},
1367         {0x00000085, 0x00052024},
1368         {0x00000087, 0x00000000},
1369         {0x00000088, 0x66036603},
1370         {0x00000089, 0x01000000},
1371         {0x0000008b, 0x1c0a0000},
1372         {0x0000008c, 0xff010000},
1373         {0x0000008e, 0xffffefff},
1374         {0x0000008f, 0xfff3efff},
1375         {0x00000090, 0xfff3efbf},
1376         {0x00000094, 0x00101101},
1377         {0x00000095, 0x00000fff},
1378         {0x00000096, 0x00116fff},
1379         {0x00000097, 0x60010000},
1380         {0x00000098, 0x10010000},
1381         {0x00000099, 0x00006000},
1382         {0x0000009a, 0x00001000},
1383         {0x0000009f, 0x00a37400}
1384 };
1385
1386 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1387         {0x0000006f, 0x03044000},
1388         {0x00000070, 0x0480c018},
1389         {0x00000071, 0x00000040},
1390         {0x00000072, 0x01000000},
1391         {0x00000074, 0x000000ff},
1392         {0x00000075, 0x00143400},
1393         {0x00000076, 0x08ec0800},
1394         {0x00000077, 0x040000cc},
1395         {0x00000079, 0x00000000},
1396         {0x0000007a, 0x21000409},
1397         {0x0000007c, 0x00000000},
1398         {0x0000007d, 0xe8000000},
1399         {0x0000007e, 0x044408a8},
1400         {0x0000007f, 0x00000003},
1401         {0x00000080, 0x00000000},
1402         {0x00000081, 0x01000000},
1403         {0x00000082, 0x02000000},
1404         {0x00000083, 0x00000000},
1405         {0x00000084, 0xe3f3e4f4},
1406         {0x00000085, 0x00052024},
1407         {0x00000087, 0x00000000},
1408         {0x00000088, 0x66036603},
1409         {0x00000089, 0x01000000},
1410         {0x0000008b, 0x1c0a0000},
1411         {0x0000008c, 0xff010000},
1412         {0x0000008e, 0xffffefff},
1413         {0x0000008f, 0xfff3efff},
1414         {0x00000090, 0xfff3efbf},
1415         {0x00000094, 0x00101101},
1416         {0x00000095, 0x00000fff},
1417         {0x00000096, 0x00116fff},
1418         {0x00000097, 0x60010000},
1419         {0x00000098, 0x10010000},
1420         {0x00000099, 0x00006000},
1421         {0x0000009a, 0x00001000},
1422         {0x0000009f, 0x00a17730}
1423 };
1424
1425 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1426         {0x0000006f, 0x03044000},
1427         {0x00000070, 0x0480c018},
1428         {0x00000071, 0x00000040},
1429         {0x00000072, 0x01000000},
1430         {0x00000074, 0x000000ff},
1431         {0x00000075, 0x00143400},
1432         {0x00000076, 0x08ec0800},
1433         {0x00000077, 0x040000cc},
1434         {0x00000079, 0x00000000},
1435         {0x0000007a, 0x21000409},
1436         {0x0000007c, 0x00000000},
1437         {0x0000007d, 0xe8000000},
1438         {0x0000007e, 0x044408a8},
1439         {0x0000007f, 0x00000003},
1440         {0x00000080, 0x00000000},
1441         {0x00000081, 0x01000000},
1442         {0x00000082, 0x02000000},
1443         {0x00000083, 0x00000000},
1444         {0x00000084, 0xe3f3e4f4},
1445         {0x00000085, 0x00052024},
1446         {0x00000087, 0x00000000},
1447         {0x00000088, 0x66036603},
1448         {0x00000089, 0x01000000},
1449         {0x0000008b, 0x1c0a0000},
1450         {0x0000008c, 0xff010000},
1451         {0x0000008e, 0xffffefff},
1452         {0x0000008f, 0xfff3efff},
1453         {0x00000090, 0xfff3efbf},
1454         {0x00000094, 0x00101101},
1455         {0x00000095, 0x00000fff},
1456         {0x00000096, 0x00116fff},
1457         {0x00000097, 0x60010000},
1458         {0x00000098, 0x10010000},
1459         {0x00000099, 0x00006000},
1460         {0x0000009a, 0x00001000},
1461         {0x0000009f, 0x00a07730}
1462 };
1463
1464 /* ucode loading */
1465 int si_mc_load_microcode(struct radeon_device *rdev)
1466 {
1467         const __be32 *fw_data;
1468         u32 running, blackout = 0;
1469         u32 *io_mc_regs;
1470         int i, ucode_size, regs_size;
1471
1472         if (!rdev->mc_fw)
1473                 return -EINVAL;
1474
1475         switch (rdev->family) {
1476         case CHIP_TAHITI:
1477                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1478                 ucode_size = SI_MC_UCODE_SIZE;
1479                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1480                 break;
1481         case CHIP_PITCAIRN:
1482                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1483                 ucode_size = SI_MC_UCODE_SIZE;
1484                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1485                 break;
1486         case CHIP_VERDE:
1487         default:
1488                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1489                 ucode_size = SI_MC_UCODE_SIZE;
1490                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1491                 break;
1492         case CHIP_OLAND:
1493                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1494                 ucode_size = OLAND_MC_UCODE_SIZE;
1495                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1496                 break;
1497         case CHIP_HAINAN:
1498                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1499                 ucode_size = OLAND_MC_UCODE_SIZE;
1500                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1501                 break;
1502         }
1503
1504         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1505
1506         if (running == 0) {
1507                 if (running) {
1508                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1509                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1510                 }
1511
1512                 /* reset the engine and set to writable */
1513                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1514                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1515
1516                 /* load mc io regs */
1517                 for (i = 0; i < regs_size; i++) {
1518                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1519                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1520                 }
1521                 /* load the MC ucode */
1522                 fw_data = (const __be32 *)rdev->mc_fw->data;
1523                 for (i = 0; i < ucode_size; i++)
1524                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1525
1526                 /* put the engine back into the active state */
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1528                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1529                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1530
1531                 /* wait for training to complete */
1532                 for (i = 0; i < rdev->usec_timeout; i++) {
1533                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1534                                 break;
1535                         udelay(1);
1536                 }
1537                 for (i = 0; i < rdev->usec_timeout; i++) {
1538                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1539                                 break;
1540                         udelay(1);
1541                 }
1542
1543                 if (running)
1544                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1545         }
1546
1547         return 0;
1548 }
1549
1550 static int si_init_microcode(struct radeon_device *rdev)
1551 {
1552         const char *chip_name;
1553         const char *rlc_chip_name;
1554         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1555         size_t smc_req_size;
1556         char fw_name[30];
1557         int err;
1558
1559         DRM_DEBUG("\n");
1560
1561         switch (rdev->family) {
1562         case CHIP_TAHITI:
1563                 chip_name = "TAHITI";
1564                 rlc_chip_name = "TAHITI";
1565                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1566                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1567                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1568                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1569                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1570                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1571                 break;
1572         case CHIP_PITCAIRN:
1573                 chip_name = "PITCAIRN";
1574                 rlc_chip_name = "PITCAIRN";
1575                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1576                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1577                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1578                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1579                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1580                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1581                 break;
1582         case CHIP_VERDE:
1583                 chip_name = "VERDE";
1584                 rlc_chip_name = "VERDE";
1585                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1586                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1587                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1588                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1589                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1590                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1591                 break;
1592         case CHIP_OLAND:
1593                 chip_name = "OLAND";
1594                 rlc_chip_name = "OLAND";
1595                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1596                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1597                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1598                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1599                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1600                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1601                 break;
1602         case CHIP_HAINAN:
1603                 chip_name = "HAINAN";
1604                 rlc_chip_name = "HAINAN";
1605                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1606                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1607                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1608                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1609                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1610                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1611                 break;
1612         default: BUG();
1613         }
1614
1615         DRM_INFO("Loading %s Microcode\n", chip_name);
1616
1617         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1618         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1619         if (err)
1620                 goto out;
1621         if (rdev->pfp_fw->size != pfp_req_size) {
1622                 printk(KERN_ERR
1623                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1624                        rdev->pfp_fw->size, fw_name);
1625                 err = -EINVAL;
1626                 goto out;
1627         }
1628
1629         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1630         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1631         if (err)
1632                 goto out;
1633         if (rdev->me_fw->size != me_req_size) {
1634                 printk(KERN_ERR
1635                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1636                        rdev->me_fw->size, fw_name);
1637                 err = -EINVAL;
1638         }
1639
1640         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1641         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1642         if (err)
1643                 goto out;
1644         if (rdev->ce_fw->size != ce_req_size) {
1645                 printk(KERN_ERR
1646                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1647                        rdev->ce_fw->size, fw_name);
1648                 err = -EINVAL;
1649         }
1650
1651         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1652         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1653         if (err)
1654                 goto out;
1655         if (rdev->rlc_fw->size != rlc_req_size) {
1656                 printk(KERN_ERR
1657                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1658                        rdev->rlc_fw->size, fw_name);
1659                 err = -EINVAL;
1660         }
1661
1662         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1663         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1664         if (err)
1665                 goto out;
1666         if (rdev->mc_fw->size != mc_req_size) {
1667                 printk(KERN_ERR
1668                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1669                        rdev->mc_fw->size, fw_name);
1670                 err = -EINVAL;
1671         }
1672
1673         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1674         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1675         if (err) {
1676                 printk(KERN_ERR
1677                        "smc: error loading firmware \"%s\"\n",
1678                        fw_name);
1679                 release_firmware(rdev->smc_fw);
1680                 rdev->smc_fw = NULL;
1681                 err = 0;
1682         } else if (rdev->smc_fw->size != smc_req_size) {
1683                 printk(KERN_ERR
1684                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1685                        rdev->smc_fw->size, fw_name);
1686                 err = -EINVAL;
1687         }
1688
1689 out:
1690         if (err) {
1691                 if (err != -EINVAL)
1692                         printk(KERN_ERR
1693                                "si_cp: Failed to load firmware \"%s\"\n",
1694                                fw_name);
1695                 release_firmware(rdev->pfp_fw);
1696                 rdev->pfp_fw = NULL;
1697                 release_firmware(rdev->me_fw);
1698                 rdev->me_fw = NULL;
1699                 release_firmware(rdev->ce_fw);
1700                 rdev->ce_fw = NULL;
1701                 release_firmware(rdev->rlc_fw);
1702                 rdev->rlc_fw = NULL;
1703                 release_firmware(rdev->mc_fw);
1704                 rdev->mc_fw = NULL;
1705                 release_firmware(rdev->smc_fw);
1706                 rdev->smc_fw = NULL;
1707         }
1708         return err;
1709 }
1710
1711 /* watermark setup */
1712 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1713                                    struct radeon_crtc *radeon_crtc,
1714                                    struct drm_display_mode *mode,
1715                                    struct drm_display_mode *other_mode)
1716 {
1717         u32 tmp, buffer_alloc, i;
1718         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1719         /*
1720          * Line Buffer Setup
1721          * There are 3 line buffers, each one shared by 2 display controllers.
1722          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1723          * the display controllers.  The paritioning is done via one of four
1724          * preset allocations specified in bits 21:20:
1725          *  0 - half lb
1726          *  2 - whole lb, other crtc must be disabled
1727          */
1728         /* this can get tricky if we have two large displays on a paired group
1729          * of crtcs.  Ideally for multiple large displays we'd assign them to
1730          * non-linked crtcs for maximum line buffer allocation.
1731          */
1732         if (radeon_crtc->base.enabled && mode) {
1733                 if (other_mode) {
1734                         tmp = 0; /* 1/2 */
1735                         buffer_alloc = 1;
1736                 } else {
1737                         tmp = 2; /* whole */
1738                         buffer_alloc = 2;
1739                 }
1740         } else {
1741                 tmp = 0;
1742                 buffer_alloc = 0;
1743         }
1744
1745         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1746                DC_LB_MEMORY_CONFIG(tmp));
1747
1748         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1749                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1750         for (i = 0; i < rdev->usec_timeout; i++) {
1751                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1752                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1753                         break;
1754                 udelay(1);
1755         }
1756
1757         if (radeon_crtc->base.enabled && mode) {
1758                 switch (tmp) {
1759                 case 0:
1760                 default:
1761                         return 4096 * 2;
1762                 case 2:
1763                         return 8192 * 2;
1764                 }
1765         }
1766
1767         /* controller not enabled, so no lb used */
1768         return 0;
1769 }
1770
1771 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1772 {
1773         u32 tmp = RREG32(MC_SHARED_CHMAP);
1774
1775         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1776         case 0:
1777         default:
1778                 return 1;
1779         case 1:
1780                 return 2;
1781         case 2:
1782                 return 4;
1783         case 3:
1784                 return 8;
1785         case 4:
1786                 return 3;
1787         case 5:
1788                 return 6;
1789         case 6:
1790                 return 10;
1791         case 7:
1792                 return 12;
1793         case 8:
1794                 return 16;
1795         }
1796 }
1797
1798 struct dce6_wm_params {
1799         u32 dram_channels; /* number of dram channels */
1800         u32 yclk;          /* bandwidth per dram data pin in kHz */
1801         u32 sclk;          /* engine clock in kHz */
1802         u32 disp_clk;      /* display clock in kHz */
1803         u32 src_width;     /* viewport width */
1804         u32 active_time;   /* active display time in ns */
1805         u32 blank_time;    /* blank time in ns */
1806         bool interlaced;    /* mode is interlaced */
1807         fixed20_12 vsc;    /* vertical scale ratio */
1808         u32 num_heads;     /* number of active crtcs */
1809         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1810         u32 lb_size;       /* line buffer allocated to pipe */
1811         u32 vtaps;         /* vertical scaler taps */
1812 };
1813
1814 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1815 {
1816         /* Calculate raw DRAM Bandwidth */
1817         fixed20_12 dram_efficiency; /* 0.7 */
1818         fixed20_12 yclk, dram_channels, bandwidth;
1819         fixed20_12 a;
1820
1821         a.full = dfixed_const(1000);
1822         yclk.full = dfixed_const(wm->yclk);
1823         yclk.full = dfixed_div(yclk, a);
1824         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1825         a.full = dfixed_const(10);
1826         dram_efficiency.full = dfixed_const(7);
1827         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1828         bandwidth.full = dfixed_mul(dram_channels, yclk);
1829         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1830
1831         return dfixed_trunc(bandwidth);
1832 }
1833
1834 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1835 {
1836         /* Calculate DRAM Bandwidth and the part allocated to display. */
1837         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1838         fixed20_12 yclk, dram_channels, bandwidth;
1839         fixed20_12 a;
1840
1841         a.full = dfixed_const(1000);
1842         yclk.full = dfixed_const(wm->yclk);
1843         yclk.full = dfixed_div(yclk, a);
1844         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1845         a.full = dfixed_const(10);
1846         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1847         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1848         bandwidth.full = dfixed_mul(dram_channels, yclk);
1849         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1850
1851         return dfixed_trunc(bandwidth);
1852 }
1853
1854 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1855 {
1856         /* Calculate the display Data return Bandwidth */
1857         fixed20_12 return_efficiency; /* 0.8 */
1858         fixed20_12 sclk, bandwidth;
1859         fixed20_12 a;
1860
1861         a.full = dfixed_const(1000);
1862         sclk.full = dfixed_const(wm->sclk);
1863         sclk.full = dfixed_div(sclk, a);
1864         a.full = dfixed_const(10);
1865         return_efficiency.full = dfixed_const(8);
1866         return_efficiency.full = dfixed_div(return_efficiency, a);
1867         a.full = dfixed_const(32);
1868         bandwidth.full = dfixed_mul(a, sclk);
1869         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1870
1871         return dfixed_trunc(bandwidth);
1872 }
1873
1874 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1875 {
1876         return 32;
1877 }
1878
1879 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1880 {
1881         /* Calculate the DMIF Request Bandwidth */
1882         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1883         fixed20_12 disp_clk, sclk, bandwidth;
1884         fixed20_12 a, b1, b2;
1885         u32 min_bandwidth;
1886
1887         a.full = dfixed_const(1000);
1888         disp_clk.full = dfixed_const(wm->disp_clk);
1889         disp_clk.full = dfixed_div(disp_clk, a);
1890         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1891         b1.full = dfixed_mul(a, disp_clk);
1892
1893         a.full = dfixed_const(1000);
1894         sclk.full = dfixed_const(wm->sclk);
1895         sclk.full = dfixed_div(sclk, a);
1896         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1897         b2.full = dfixed_mul(a, sclk);
1898
1899         a.full = dfixed_const(10);
1900         disp_clk_request_efficiency.full = dfixed_const(8);
1901         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1902
1903         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1904
1905         a.full = dfixed_const(min_bandwidth);
1906         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1907
1908         return dfixed_trunc(bandwidth);
1909 }
1910
1911 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1912 {
1913         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1914         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1915         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1916         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1917
1918         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1919 }
1920
1921 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1922 {
1923         /* Calculate the display mode Average Bandwidth
1924          * DisplayMode should contain the source and destination dimensions,
1925          * timing, etc.
1926          */
1927         fixed20_12 bpp;
1928         fixed20_12 line_time;
1929         fixed20_12 src_width;
1930         fixed20_12 bandwidth;
1931         fixed20_12 a;
1932
1933         a.full = dfixed_const(1000);
1934         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1935         line_time.full = dfixed_div(line_time, a);
1936         bpp.full = dfixed_const(wm->bytes_per_pixel);
1937         src_width.full = dfixed_const(wm->src_width);
1938         bandwidth.full = dfixed_mul(src_width, bpp);
1939         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1940         bandwidth.full = dfixed_div(bandwidth, line_time);
1941
1942         return dfixed_trunc(bandwidth);
1943 }
1944
1945 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1946 {
1947         /* First calcualte the latency in ns */
1948         u32 mc_latency = 2000; /* 2000 ns. */
1949         u32 available_bandwidth = dce6_available_bandwidth(wm);
1950         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1951         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1952         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1953         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1954                 (wm->num_heads * cursor_line_pair_return_time);
1955         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1956         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1957         u32 tmp, dmif_size = 12288;
1958         fixed20_12 a, b, c;
1959
1960         if (wm->num_heads == 0)
1961                 return 0;
1962
1963         a.full = dfixed_const(2);
1964         b.full = dfixed_const(1);
1965         if ((wm->vsc.full > a.full) ||
1966             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1967             (wm->vtaps >= 5) ||
1968             ((wm->vsc.full >= a.full) && wm->interlaced))
1969                 max_src_lines_per_dst_line = 4;
1970         else
1971                 max_src_lines_per_dst_line = 2;
1972
1973         a.full = dfixed_const(available_bandwidth);
1974         b.full = dfixed_const(wm->num_heads);
1975         a.full = dfixed_div(a, b);
1976
1977         b.full = dfixed_const(mc_latency + 512);
1978         c.full = dfixed_const(wm->disp_clk);
1979         b.full = dfixed_div(b, c);
1980
1981         c.full = dfixed_const(dmif_size);
1982         b.full = dfixed_div(c, b);
1983
1984         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1985
1986         b.full = dfixed_const(1000);
1987         c.full = dfixed_const(wm->disp_clk);
1988         b.full = dfixed_div(c, b);
1989         c.full = dfixed_const(wm->bytes_per_pixel);
1990         b.full = dfixed_mul(b, c);
1991
1992         lb_fill_bw = min(tmp, dfixed_trunc(b));
1993
1994         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1995         b.full = dfixed_const(1000);
1996         c.full = dfixed_const(lb_fill_bw);
1997         b.full = dfixed_div(c, b);
1998         a.full = dfixed_div(a, b);
1999         line_fill_time = dfixed_trunc(a);
2000
2001         if (line_fill_time < wm->active_time)
2002                 return latency;
2003         else
2004                 return latency + (line_fill_time - wm->active_time);
2005
2006 }
2007
2008 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2009 {
2010         if (dce6_average_bandwidth(wm) <=
2011             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2012                 return true;
2013         else
2014                 return false;
2015 };
2016
2017 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2018 {
2019         if (dce6_average_bandwidth(wm) <=
2020             (dce6_available_bandwidth(wm) / wm->num_heads))
2021                 return true;
2022         else
2023                 return false;
2024 };
2025
2026 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2027 {
2028         u32 lb_partitions = wm->lb_size / wm->src_width;
2029         u32 line_time = wm->active_time + wm->blank_time;
2030         u32 latency_tolerant_lines;
2031         u32 latency_hiding;
2032         fixed20_12 a;
2033
2034         a.full = dfixed_const(1);
2035         if (wm->vsc.full > a.full)
2036                 latency_tolerant_lines = 1;
2037         else {
2038                 if (lb_partitions <= (wm->vtaps + 1))
2039                         latency_tolerant_lines = 1;
2040                 else
2041                         latency_tolerant_lines = 2;
2042         }
2043
2044         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2045
2046         if (dce6_latency_watermark(wm) <= latency_hiding)
2047                 return true;
2048         else
2049                 return false;
2050 }
2051
2052 static void dce6_program_watermarks(struct radeon_device *rdev,
2053                                          struct radeon_crtc *radeon_crtc,
2054                                          u32 lb_size, u32 num_heads)
2055 {
2056         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2057         struct dce6_wm_params wm_low, wm_high;
2058         u32 dram_channels;
2059         u32 pixel_period;
2060         u32 line_time = 0;
2061         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2062         u32 priority_a_mark = 0, priority_b_mark = 0;
2063         u32 priority_a_cnt = PRIORITY_OFF;
2064         u32 priority_b_cnt = PRIORITY_OFF;
2065         u32 tmp, arb_control3;
2066         fixed20_12 a, b, c;
2067
2068         if (radeon_crtc->base.enabled && num_heads && mode) {
2069                 pixel_period = 1000000 / (u32)mode->clock;
2070                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2071                 priority_a_cnt = 0;
2072                 priority_b_cnt = 0;
2073
2074                 if (rdev->family == CHIP_ARUBA)
2075                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2076                 else
2077                         dram_channels = si_get_number_of_dram_channels(rdev);
2078
2079                 /* watermark for high clocks */
2080                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2081                         wm_high.yclk =
2082                                 radeon_dpm_get_mclk(rdev, false) * 10;
2083                         wm_high.sclk =
2084                                 radeon_dpm_get_sclk(rdev, false) * 10;
2085                 } else {
2086                         wm_high.yclk = rdev->pm.current_mclk * 10;
2087                         wm_high.sclk = rdev->pm.current_sclk * 10;
2088                 }
2089
2090                 wm_high.disp_clk = mode->clock;
2091                 wm_high.src_width = mode->crtc_hdisplay;
2092                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2093                 wm_high.blank_time = line_time - wm_high.active_time;
2094                 wm_high.interlaced = false;
2095                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2096                         wm_high.interlaced = true;
2097                 wm_high.vsc = radeon_crtc->vsc;
2098                 wm_high.vtaps = 1;
2099                 if (radeon_crtc->rmx_type != RMX_OFF)
2100                         wm_high.vtaps = 2;
2101                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2102                 wm_high.lb_size = lb_size;
2103                 wm_high.dram_channels = dram_channels;
2104                 wm_high.num_heads = num_heads;
2105
2106                 /* watermark for low clocks */
2107                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2108                         wm_low.yclk =
2109                                 radeon_dpm_get_mclk(rdev, true) * 10;
2110                         wm_low.sclk =
2111                                 radeon_dpm_get_sclk(rdev, true) * 10;
2112                 } else {
2113                         wm_low.yclk = rdev->pm.current_mclk * 10;
2114                         wm_low.sclk = rdev->pm.current_sclk * 10;
2115                 }
2116
2117                 wm_low.disp_clk = mode->clock;
2118                 wm_low.src_width = mode->crtc_hdisplay;
2119                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2120                 wm_low.blank_time = line_time - wm_low.active_time;
2121                 wm_low.interlaced = false;
2122                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2123                         wm_low.interlaced = true;
2124                 wm_low.vsc = radeon_crtc->vsc;
2125                 wm_low.vtaps = 1;
2126                 if (radeon_crtc->rmx_type != RMX_OFF)
2127                         wm_low.vtaps = 2;
2128                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2129                 wm_low.lb_size = lb_size;
2130                 wm_low.dram_channels = dram_channels;
2131                 wm_low.num_heads = num_heads;
2132
2133                 /* set for high clocks */
2134                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2135                 /* set for low clocks */
2136                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2137
2138                 /* possibly force display priority to high */
2139                 /* should really do this at mode validation time... */
2140                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2141                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2142                     !dce6_check_latency_hiding(&wm_high) ||
2143                     (rdev->disp_priority == 2)) {
2144                         DRM_DEBUG_KMS("force priority to high\n");
2145                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2146                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2147                 }
2148                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2149                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2150                     !dce6_check_latency_hiding(&wm_low) ||
2151                     (rdev->disp_priority == 2)) {
2152                         DRM_DEBUG_KMS("force priority to high\n");
2153                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2154                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2155                 }
2156
2157                 a.full = dfixed_const(1000);
2158                 b.full = dfixed_const(mode->clock);
2159                 b.full = dfixed_div(b, a);
2160                 c.full = dfixed_const(latency_watermark_a);
2161                 c.full = dfixed_mul(c, b);
2162                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2163                 c.full = dfixed_div(c, a);
2164                 a.full = dfixed_const(16);
2165                 c.full = dfixed_div(c, a);
2166                 priority_a_mark = dfixed_trunc(c);
2167                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2168
2169                 a.full = dfixed_const(1000);
2170                 b.full = dfixed_const(mode->clock);
2171                 b.full = dfixed_div(b, a);
2172                 c.full = dfixed_const(latency_watermark_b);
2173                 c.full = dfixed_mul(c, b);
2174                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2175                 c.full = dfixed_div(c, a);
2176                 a.full = dfixed_const(16);
2177                 c.full = dfixed_div(c, a);
2178                 priority_b_mark = dfixed_trunc(c);
2179                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2180         }
2181
2182         /* select wm A */
2183         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2184         tmp = arb_control3;
2185         tmp &= ~LATENCY_WATERMARK_MASK(3);
2186         tmp |= LATENCY_WATERMARK_MASK(1);
2187         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2188         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2189                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2190                 LATENCY_HIGH_WATERMARK(line_time)));
2191         /* select wm B */
2192         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2193         tmp &= ~LATENCY_WATERMARK_MASK(3);
2194         tmp |= LATENCY_WATERMARK_MASK(2);
2195         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2196         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2197                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2198                 LATENCY_HIGH_WATERMARK(line_time)));
2199         /* restore original selection */
2200         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2201
2202         /* write the priority marks */
2203         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2204         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2205
2206         /* save values for DPM */
2207         radeon_crtc->line_time = line_time;
2208         radeon_crtc->wm_high = latency_watermark_a;
2209         radeon_crtc->wm_low = latency_watermark_b;
2210 }
2211
2212 void dce6_bandwidth_update(struct radeon_device *rdev)
2213 {
2214         struct drm_display_mode *mode0 = NULL;
2215         struct drm_display_mode *mode1 = NULL;
2216         u32 num_heads = 0, lb_size;
2217         int i;
2218
2219         radeon_update_display_priority(rdev);
2220
2221         for (i = 0; i < rdev->num_crtc; i++) {
2222                 if (rdev->mode_info.crtcs[i]->base.enabled)
2223                         num_heads++;
2224         }
2225         for (i = 0; i < rdev->num_crtc; i += 2) {
2226                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2227                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2228                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2229                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2230                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2231                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2232         }
2233 }
2234
2235 /*
2236  * Core functions
2237  */
2238 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2239 {
2240         const u32 num_tile_mode_states = 32;
2241         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2242
2243         switch (rdev->config.si.mem_row_size_in_kb) {
2244         case 1:
2245                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2246                 break;
2247         case 2:
2248         default:
2249                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2250                 break;
2251         case 4:
2252                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2253                 break;
2254         }
2255
2256         if ((rdev->family == CHIP_TAHITI) ||
2257             (rdev->family == CHIP_PITCAIRN)) {
2258                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2259                         switch (reg_offset) {
2260                         case 0:  /* non-AA compressed depth or any compressed stencil */
2261                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2263                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2264                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2265                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2266                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2269                                 break;
2270                         case 1:  /* 2xAA/4xAA compressed depth only */
2271                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2273                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2274                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2275                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2276                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2278                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2279                                 break;
2280                         case 2:  /* 8xAA compressed depth only */
2281                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2282                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2283                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2284                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2285                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2286                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2287                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2288                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2289                                 break;
2290                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2291                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2293                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2294                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2295                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2296                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2298                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2299                                 break;
2300                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2303                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2304                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2305                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2306                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2308                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2309                                 break;
2310                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2311                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2313                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2314                                                  TILE_SPLIT(split_equal_to_row_size) |
2315                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2316                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2319                                 break;
2320                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2324                                                  TILE_SPLIT(split_equal_to_row_size) |
2325                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2326                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2328                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2329                                 break;
2330                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2331                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2333                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2334                                                  TILE_SPLIT(split_equal_to_row_size) |
2335                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2336                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2339                                 break;
2340                         case 8:  /* 1D and 1D Array Surfaces */
2341                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2342                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2344                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2345                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2346                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2349                                 break;
2350                         case 9:  /* Displayable maps. */
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2354                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2355                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2356                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2358                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2359                                 break;
2360                         case 10:  /* Display 8bpp. */
2361                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2364                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2365                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2366                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2369                                 break;
2370                         case 11:  /* Display 16bpp. */
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2374                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2375                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2376                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2379                                 break;
2380                         case 12:  /* Display 32bpp. */
2381                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2384                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2385                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2386                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2389                                 break;
2390                         case 13:  /* Thin. */
2391                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2392                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2393                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2394                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2395                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2396                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2399                                 break;
2400                         case 14:  /* Thin 8 bpp. */
2401                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2403                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2404                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2405                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2406                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2409                                 break;
2410                         case 15:  /* Thin 16 bpp. */
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2414                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2416                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2418                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2419                                 break;
2420                         case 16:  /* Thin 32 bpp. */
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2423                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2424                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2425                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2426                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2429                                 break;
2430                         case 17:  /* Thin 64 bpp. */
2431                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2433                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2434                                                  TILE_SPLIT(split_equal_to_row_size) |
2435                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2436                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2438                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2439                                 break;
2440                         case 21:  /* 8 bpp PRT. */
2441                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2442                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2443                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2444                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2445                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2446                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2447                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2449                                 break;
2450                         case 22:  /* 16 bpp PRT */
2451                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2453                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2454                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2455                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2456                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2459                                 break;
2460                         case 23:  /* 32 bpp PRT */
2461                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2463                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2464                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2466                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2469                                 break;
2470                         case 24:  /* 64 bpp PRT */
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2473                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2474                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2476                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2479                                 break;
2480                         case 25:  /* 128 bpp PRT */
2481                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2483                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2484                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2485                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2486                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2489                                 break;
2490                         default:
2491                                 gb_tile_moden = 0;
2492                                 break;
2493                         }
2494                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2495                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2496                 }
2497         } else if ((rdev->family == CHIP_VERDE) ||
2498                    (rdev->family == CHIP_OLAND) ||
2499                    (rdev->family == CHIP_HAINAN)) {
2500                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2501                         switch (reg_offset) {
2502                         case 0:  /* non-AA compressed depth or any compressed stencil */
2503                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2506                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2507                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2508                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2511                                 break;
2512                         case 1:  /* 2xAA/4xAA compressed depth only */
2513                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2516                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2518                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2520                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2521                                 break;
2522                         case 2:  /* 8xAA compressed depth only */
2523                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2526                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2528                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2531                                 break;
2532                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2533                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2536                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2537                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2538                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2541                                 break;
2542                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2547                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2548                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551                                 break;
2552                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2556                                                  TILE_SPLIT(split_equal_to_row_size) |
2557                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2558                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2561                                 break;
2562                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2566                                                  TILE_SPLIT(split_equal_to_row_size) |
2567                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2568                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571                                 break;
2572                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2576                                                  TILE_SPLIT(split_equal_to_row_size) |
2577                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2578                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2581                                 break;
2582                         case 8:  /* 1D and 1D Array Surfaces */
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2584                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2587                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2588                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591                                 break;
2592                         case 9:  /* Displayable maps. */
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2596                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2597                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2598                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2600                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2601                                 break;
2602                         case 10:  /* Display 8bpp. */
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2606                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2607                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2608                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2611                                 break;
2612                         case 11:  /* Display 16bpp. */
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2616                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2617                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2618                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621                                 break;
2622                         case 12:  /* Display 32bpp. */
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2626                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2628                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2631                                 break;
2632                         case 13:  /* Thin. */
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2637                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2638                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641                                 break;
2642                         case 14:  /* Thin 8 bpp. */
2643                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2646                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2647                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2648                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2651                                 break;
2652                         case 15:  /* Thin 16 bpp. */
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2658                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2660                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2661                                 break;
2662                         case 16:  /* Thin 32 bpp. */
2663                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2664                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2665                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2666                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2667                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2668                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2671                                 break;
2672                         case 17:  /* Thin 64 bpp. */
2673                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                                                  TILE_SPLIT(split_equal_to_row_size) |
2677                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2678                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2681                                 break;
2682                         case 21:  /* 8 bpp PRT. */
2683                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2685                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2686                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2687                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2688                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2689                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2690                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2691                                 break;
2692                         case 22:  /* 16 bpp PRT */
2693                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2696                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2697                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2698                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2701                                 break;
2702                         case 23:  /* 32 bpp PRT */
2703                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2705                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2706                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2707                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2708                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2711                                 break;
2712                         case 24:  /* 64 bpp PRT */
2713                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2718                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2721                                 break;
2722                         case 25:  /* 128 bpp PRT */
2723                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2725                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2726                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2727                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2728                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2731                                 break;
2732                         default:
2733                                 gb_tile_moden = 0;
2734                                 break;
2735                         }
2736                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2737                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2738                 }
2739         } else
2740                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2741 }
2742
2743 static void si_select_se_sh(struct radeon_device *rdev,
2744                             u32 se_num, u32 sh_num)
2745 {
2746         u32 data = INSTANCE_BROADCAST_WRITES;
2747
2748         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2749                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2750         else if (se_num == 0xffffffff)
2751                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2752         else if (sh_num == 0xffffffff)
2753                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2754         else
2755                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2756         WREG32(GRBM_GFX_INDEX, data);
2757 }
2758
2759 static u32 si_create_bitmask(u32 bit_width)
2760 {
2761         u32 i, mask = 0;
2762
2763         for (i = 0; i < bit_width; i++) {
2764                 mask <<= 1;
2765                 mask |= 1;
2766         }
2767         return mask;
2768 }
2769
2770 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2771 {
2772         u32 data, mask;
2773
2774         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2775         if (data & 1)
2776                 data &= INACTIVE_CUS_MASK;
2777         else
2778                 data = 0;
2779         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2780
2781         data >>= INACTIVE_CUS_SHIFT;
2782
2783         mask = si_create_bitmask(cu_per_sh);
2784
2785         return ~data & mask;
2786 }
2787
2788 static void si_setup_spi(struct radeon_device *rdev,
2789                          u32 se_num, u32 sh_per_se,
2790                          u32 cu_per_sh)
2791 {
2792         int i, j, k;
2793         u32 data, mask, active_cu;
2794
2795         for (i = 0; i < se_num; i++) {
2796                 for (j = 0; j < sh_per_se; j++) {
2797                         si_select_se_sh(rdev, i, j);
2798                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2799                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2800
2801                         mask = 1;
2802                         for (k = 0; k < 16; k++) {
2803                                 mask <<= k;
2804                                 if (active_cu & mask) {
2805                                         data &= ~mask;
2806                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2807                                         break;
2808                                 }
2809                         }
2810                 }
2811         }
2812         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2813 }
2814
2815 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2816                               u32 max_rb_num_per_se,
2817                               u32 sh_per_se)
2818 {
2819         u32 data, mask;
2820
2821         data = RREG32(CC_RB_BACKEND_DISABLE);
2822         if (data & 1)
2823                 data &= BACKEND_DISABLE_MASK;
2824         else
2825                 data = 0;
2826         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2827
2828         data >>= BACKEND_DISABLE_SHIFT;
2829
2830         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2831
2832         return data & mask;
2833 }
2834
2835 static void si_setup_rb(struct radeon_device *rdev,
2836                         u32 se_num, u32 sh_per_se,
2837                         u32 max_rb_num_per_se)
2838 {
2839         int i, j;
2840         u32 data, mask;
2841         u32 disabled_rbs = 0;
2842         u32 enabled_rbs = 0;
2843
2844         for (i = 0; i < se_num; i++) {
2845                 for (j = 0; j < sh_per_se; j++) {
2846                         si_select_se_sh(rdev, i, j);
2847                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2848                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2849                 }
2850         }
2851         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2852
2853         mask = 1;
2854         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2855                 if (!(disabled_rbs & mask))
2856                         enabled_rbs |= mask;
2857                 mask <<= 1;
2858         }
2859
2860         rdev->config.si.backend_enable_mask = enabled_rbs;
2861
2862         for (i = 0; i < se_num; i++) {
2863                 si_select_se_sh(rdev, i, 0xffffffff);
2864                 data = 0;
2865                 for (j = 0; j < sh_per_se; j++) {
2866                         switch (enabled_rbs & 3) {
2867                         case 1:
2868                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2869                                 break;
2870                         case 2:
2871                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2872                                 break;
2873                         case 3:
2874                         default:
2875                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2876                                 break;
2877                         }
2878                         enabled_rbs >>= 2;
2879                 }
2880                 WREG32(PA_SC_RASTER_CONFIG, data);
2881         }
2882         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2883 }
2884
2885 static void si_gpu_init(struct radeon_device *rdev)
2886 {
2887         u32 gb_addr_config = 0;
2888         u32 mc_shared_chmap, mc_arb_ramcfg;
2889         u32 sx_debug_1;
2890         u32 hdp_host_path_cntl;
2891         u32 tmp;
2892         int i, j;
2893
2894         switch (rdev->family) {
2895         case CHIP_TAHITI:
2896                 rdev->config.si.max_shader_engines = 2;
2897                 rdev->config.si.max_tile_pipes = 12;
2898                 rdev->config.si.max_cu_per_sh = 8;
2899                 rdev->config.si.max_sh_per_se = 2;
2900                 rdev->config.si.max_backends_per_se = 4;
2901                 rdev->config.si.max_texture_channel_caches = 12;
2902                 rdev->config.si.max_gprs = 256;
2903                 rdev->config.si.max_gs_threads = 32;
2904                 rdev->config.si.max_hw_contexts = 8;
2905
2906                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2907                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2908                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2909                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2910                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2911                 break;
2912         case CHIP_PITCAIRN:
2913                 rdev->config.si.max_shader_engines = 2;
2914                 rdev->config.si.max_tile_pipes = 8;
2915                 rdev->config.si.max_cu_per_sh = 5;
2916                 rdev->config.si.max_sh_per_se = 2;
2917                 rdev->config.si.max_backends_per_se = 4;
2918                 rdev->config.si.max_texture_channel_caches = 8;
2919                 rdev->config.si.max_gprs = 256;
2920                 rdev->config.si.max_gs_threads = 32;
2921                 rdev->config.si.max_hw_contexts = 8;
2922
2923                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2924                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2925                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2926                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2927                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2928                 break;
2929         case CHIP_VERDE:
2930         default:
2931                 rdev->config.si.max_shader_engines = 1;
2932                 rdev->config.si.max_tile_pipes = 4;
2933                 rdev->config.si.max_cu_per_sh = 5;
2934                 rdev->config.si.max_sh_per_se = 2;
2935                 rdev->config.si.max_backends_per_se = 4;
2936                 rdev->config.si.max_texture_channel_caches = 4;
2937                 rdev->config.si.max_gprs = 256;
2938                 rdev->config.si.max_gs_threads = 32;
2939                 rdev->config.si.max_hw_contexts = 8;
2940
2941                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2942                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2943                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2944                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2945                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2946                 break;
2947         case CHIP_OLAND:
2948                 rdev->config.si.max_shader_engines = 1;
2949                 rdev->config.si.max_tile_pipes = 4;
2950                 rdev->config.si.max_cu_per_sh = 6;
2951                 rdev->config.si.max_sh_per_se = 1;
2952                 rdev->config.si.max_backends_per_se = 2;
2953                 rdev->config.si.max_texture_channel_caches = 4;
2954                 rdev->config.si.max_gprs = 256;
2955                 rdev->config.si.max_gs_threads = 16;
2956                 rdev->config.si.max_hw_contexts = 8;
2957
2958                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2959                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2960                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2961                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2962                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2963                 break;
2964         case CHIP_HAINAN:
2965                 rdev->config.si.max_shader_engines = 1;
2966                 rdev->config.si.max_tile_pipes = 4;
2967                 rdev->config.si.max_cu_per_sh = 5;
2968                 rdev->config.si.max_sh_per_se = 1;
2969                 rdev->config.si.max_backends_per_se = 1;
2970                 rdev->config.si.max_texture_channel_caches = 2;
2971                 rdev->config.si.max_gprs = 256;
2972                 rdev->config.si.max_gs_threads = 16;
2973                 rdev->config.si.max_hw_contexts = 8;
2974
2975                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2976                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2977                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2978                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2979                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2980                 break;
2981         }
2982
2983         /* Initialize HDP */
2984         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2985                 WREG32((0x2c14 + j), 0x00000000);
2986                 WREG32((0x2c18 + j), 0x00000000);
2987                 WREG32((0x2c1c + j), 0x00000000);
2988                 WREG32((0x2c20 + j), 0x00000000);
2989                 WREG32((0x2c24 + j), 0x00000000);
2990         }
2991
2992         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2993
2994         evergreen_fix_pci_max_read_req_size(rdev);
2995
2996         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2997
2998         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2999         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3000
3001         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3002         rdev->config.si.mem_max_burst_length_bytes = 256;
3003         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3004         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3005         if (rdev->config.si.mem_row_size_in_kb > 4)
3006                 rdev->config.si.mem_row_size_in_kb = 4;
3007         /* XXX use MC settings? */
3008         rdev->config.si.shader_engine_tile_size = 32;
3009         rdev->config.si.num_gpus = 1;
3010         rdev->config.si.multi_gpu_tile_size = 64;
3011
3012         /* fix up row size */
3013         gb_addr_config &= ~ROW_SIZE_MASK;
3014         switch (rdev->config.si.mem_row_size_in_kb) {
3015         case 1:
3016         default:
3017                 gb_addr_config |= ROW_SIZE(0);
3018                 break;
3019         case 2:
3020                 gb_addr_config |= ROW_SIZE(1);
3021                 break;
3022         case 4:
3023                 gb_addr_config |= ROW_SIZE(2);
3024                 break;
3025         }
3026
3027         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3028          * not have bank info, so create a custom tiling dword.
3029          * bits 3:0   num_pipes
3030          * bits 7:4   num_banks
3031          * bits 11:8  group_size
3032          * bits 15:12 row_size
3033          */
3034         rdev->config.si.tile_config = 0;
3035         switch (rdev->config.si.num_tile_pipes) {
3036         case 1:
3037                 rdev->config.si.tile_config |= (0 << 0);
3038                 break;
3039         case 2:
3040                 rdev->config.si.tile_config |= (1 << 0);
3041                 break;
3042         case 4:
3043                 rdev->config.si.tile_config |= (2 << 0);
3044                 break;
3045         case 8:
3046         default:
3047                 /* XXX what about 12? */
3048                 rdev->config.si.tile_config |= (3 << 0);
3049                 break;
3050         }       
3051         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3052         case 0: /* four banks */
3053                 rdev->config.si.tile_config |= 0 << 4;
3054                 break;
3055         case 1: /* eight banks */
3056                 rdev->config.si.tile_config |= 1 << 4;
3057                 break;
3058         case 2: /* sixteen banks */
3059         default:
3060                 rdev->config.si.tile_config |= 2 << 4;
3061                 break;
3062         }
3063         rdev->config.si.tile_config |=
3064                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3065         rdev->config.si.tile_config |=
3066                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3067
3068         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3069         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3070         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3071         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3072         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3073         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3074         if (rdev->has_uvd) {
3075                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3076                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3077                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3078         }
3079
3080         si_tiling_mode_table_init(rdev);
3081
3082         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3083                     rdev->config.si.max_sh_per_se,
3084                     rdev->config.si.max_backends_per_se);
3085
3086         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3087                      rdev->config.si.max_sh_per_se,
3088                      rdev->config.si.max_cu_per_sh);
3089
3090
3091         /* set HW defaults for 3D engine */
3092         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3093                                      ROQ_IB2_START(0x2b)));
3094         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3095
3096         sx_debug_1 = RREG32(SX_DEBUG_1);
3097         WREG32(SX_DEBUG_1, sx_debug_1);
3098
3099         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3100
3101         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3102                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3103                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3104                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3105
3106         WREG32(VGT_NUM_INSTANCES, 1);
3107
3108         WREG32(CP_PERFMON_CNTL, 0);
3109
3110         WREG32(SQ_CONFIG, 0);
3111
3112         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3113                                           FORCE_EOV_MAX_REZ_CNT(255)));
3114
3115         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3116                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3117
3118         WREG32(VGT_GS_VERTEX_REUSE, 16);
3119         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3120
3121         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3122         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3123         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3124         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3125         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3126         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3127         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3128         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3129
3130         tmp = RREG32(HDP_MISC_CNTL);
3131         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3132         WREG32(HDP_MISC_CNTL, tmp);
3133
3134         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3135         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3136
3137         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3138
3139         udelay(50);
3140 }
3141
3142 /*
3143  * GPU scratch registers helpers function.
3144  */
3145 static void si_scratch_init(struct radeon_device *rdev)
3146 {
3147         int i;
3148
3149         rdev->scratch.num_reg = 7;
3150         rdev->scratch.reg_base = SCRATCH_REG0;
3151         for (i = 0; i < rdev->scratch.num_reg; i++) {
3152                 rdev->scratch.free[i] = true;
3153                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3154         }
3155 }
3156
3157 void si_fence_ring_emit(struct radeon_device *rdev,
3158                         struct radeon_fence *fence)
3159 {
3160         struct radeon_ring *ring = &rdev->ring[fence->ring];
3161         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3162
3163         /* flush read cache over gart */
3164         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3165         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3166         radeon_ring_write(ring, 0);
3167         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3168         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3169                           PACKET3_TC_ACTION_ENA |
3170                           PACKET3_SH_KCACHE_ACTION_ENA |
3171                           PACKET3_SH_ICACHE_ACTION_ENA);
3172         radeon_ring_write(ring, 0xFFFFFFFF);
3173         radeon_ring_write(ring, 0);
3174         radeon_ring_write(ring, 10); /* poll interval */
3175         /* EVENT_WRITE_EOP - flush caches, send int */
3176         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3177         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3178         radeon_ring_write(ring, addr & 0xffffffff);
3179         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3180         radeon_ring_write(ring, fence->seq);
3181         radeon_ring_write(ring, 0);
3182 }
3183
3184 /*
3185  * IB stuff
3186  */
3187 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3188 {
3189         struct radeon_ring *ring = &rdev->ring[ib->ring];
3190         u32 header;
3191
3192         if (ib->is_const_ib) {
3193                 /* set switch buffer packet before const IB */
3194                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3195                 radeon_ring_write(ring, 0);
3196
3197                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3198         } else {
3199                 u32 next_rptr;
3200                 if (ring->rptr_save_reg) {
3201                         next_rptr = ring->wptr + 3 + 4 + 8;
3202                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3203                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3204                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3205                         radeon_ring_write(ring, next_rptr);
3206                 } else if (rdev->wb.enabled) {
3207                         next_rptr = ring->wptr + 5 + 4 + 8;
3208                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3209                         radeon_ring_write(ring, (1 << 8));
3210                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212                         radeon_ring_write(ring, next_rptr);
3213                 }
3214
3215                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3216         }
3217
3218         radeon_ring_write(ring, header);
3219         radeon_ring_write(ring,
3220 #ifdef __BIG_ENDIAN
3221                           (2 << 0) |
3222 #endif
3223                           (ib->gpu_addr & 0xFFFFFFFC));
3224         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3225         radeon_ring_write(ring, ib->length_dw |
3226                           (ib->vm ? (ib->vm->id << 24) : 0));
3227
3228         if (!ib->is_const_ib) {
3229                 /* flush read cache over gart for this vmid */
3230                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3231                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3232                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3233                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3234                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3235                                   PACKET3_TC_ACTION_ENA |
3236                                   PACKET3_SH_KCACHE_ACTION_ENA |
3237                                   PACKET3_SH_ICACHE_ACTION_ENA);
3238                 radeon_ring_write(ring, 0xFFFFFFFF);
3239                 radeon_ring_write(ring, 0);
3240                 radeon_ring_write(ring, 10); /* poll interval */
3241         }
3242 }
3243
3244 /*
3245  * CP.
3246  */
3247 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3248 {
3249         if (enable)
3250                 WREG32(CP_ME_CNTL, 0);
3251         else {
3252                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3253                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3254                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3255                 WREG32(SCRATCH_UMSK, 0);
3256                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3257                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3258                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3259         }
3260         udelay(50);
3261 }
3262
3263 static int si_cp_load_microcode(struct radeon_device *rdev)
3264 {
3265         const __be32 *fw_data;
3266         int i;
3267
3268         if (!rdev->me_fw || !rdev->pfp_fw)
3269                 return -EINVAL;
3270
3271         si_cp_enable(rdev, false);
3272
3273         /* PFP */
3274         fw_data = (const __be32 *)rdev->pfp_fw->data;
3275         WREG32(CP_PFP_UCODE_ADDR, 0);
3276         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3277                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3278         WREG32(CP_PFP_UCODE_ADDR, 0);
3279
3280         /* CE */
3281         fw_data = (const __be32 *)rdev->ce_fw->data;
3282         WREG32(CP_CE_UCODE_ADDR, 0);
3283         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3284                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3285         WREG32(CP_CE_UCODE_ADDR, 0);
3286
3287         /* ME */
3288         fw_data = (const __be32 *)rdev->me_fw->data;
3289         WREG32(CP_ME_RAM_WADDR, 0);
3290         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3291                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3292         WREG32(CP_ME_RAM_WADDR, 0);
3293
3294         WREG32(CP_PFP_UCODE_ADDR, 0);
3295         WREG32(CP_CE_UCODE_ADDR, 0);
3296         WREG32(CP_ME_RAM_WADDR, 0);
3297         WREG32(CP_ME_RAM_RADDR, 0);
3298         return 0;
3299 }
3300
3301 static int si_cp_start(struct radeon_device *rdev)
3302 {
3303         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3304         int r, i;
3305
3306         r = radeon_ring_lock(rdev, ring, 7 + 4);
3307         if (r) {
3308                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3309                 return r;
3310         }
3311         /* init the CP */
3312         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3313         radeon_ring_write(ring, 0x1);
3314         radeon_ring_write(ring, 0x0);
3315         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3316         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3317         radeon_ring_write(ring, 0);
3318         radeon_ring_write(ring, 0);
3319
3320         /* init the CE partitions */
3321         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3322         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3323         radeon_ring_write(ring, 0xc000);
3324         radeon_ring_write(ring, 0xe000);
3325         radeon_ring_unlock_commit(rdev, ring);
3326
3327         si_cp_enable(rdev, true);
3328
3329         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3330         if (r) {
3331                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3332                 return r;
3333         }
3334
3335         /* setup clear context state */
3336         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3337         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3338
3339         for (i = 0; i < si_default_size; i++)
3340                 radeon_ring_write(ring, si_default_state[i]);
3341
3342         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3343         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3344
3345         /* set clear context state */
3346         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3347         radeon_ring_write(ring, 0);
3348
3349         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3350         radeon_ring_write(ring, 0x00000316);
3351         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3352         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3353
3354         radeon_ring_unlock_commit(rdev, ring);
3355
3356         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3357                 ring = &rdev->ring[i];
3358                 r = radeon_ring_lock(rdev, ring, 2);
3359
3360                 /* clear the compute context state */
3361                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3362                 radeon_ring_write(ring, 0);
3363
3364                 radeon_ring_unlock_commit(rdev, ring);
3365         }
3366
3367         return 0;
3368 }
3369
3370 static void si_cp_fini(struct radeon_device *rdev)
3371 {
3372         struct radeon_ring *ring;
3373         si_cp_enable(rdev, false);
3374
3375         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3376         radeon_ring_fini(rdev, ring);
3377         radeon_scratch_free(rdev, ring->rptr_save_reg);
3378
3379         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3380         radeon_ring_fini(rdev, ring);
3381         radeon_scratch_free(rdev, ring->rptr_save_reg);
3382
3383         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3384         radeon_ring_fini(rdev, ring);
3385         radeon_scratch_free(rdev, ring->rptr_save_reg);
3386 }
3387
3388 static int si_cp_resume(struct radeon_device *rdev)
3389 {
3390         struct radeon_ring *ring;
3391         u32 tmp;
3392         u32 rb_bufsz;
3393         int r;
3394
3395         si_enable_gui_idle_interrupt(rdev, false);
3396
3397         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3398         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3399
3400         /* Set the write pointer delay */
3401         WREG32(CP_RB_WPTR_DELAY, 0);
3402
3403         WREG32(CP_DEBUG, 0);
3404         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3405
3406         /* ring 0 - compute and gfx */
3407         /* Set ring buffer size */
3408         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3409         rb_bufsz = order_base_2(ring->ring_size / 8);
3410         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3411 #ifdef __BIG_ENDIAN
3412         tmp |= BUF_SWAP_32BIT;
3413 #endif
3414         WREG32(CP_RB0_CNTL, tmp);
3415
3416         /* Initialize the ring buffer's read and write pointers */
3417         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3418         ring->wptr = 0;
3419         WREG32(CP_RB0_WPTR, ring->wptr);
3420
3421         /* set the wb address whether it's enabled or not */
3422         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3423         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3424
3425         if (rdev->wb.enabled)
3426                 WREG32(SCRATCH_UMSK, 0xff);
3427         else {
3428                 tmp |= RB_NO_UPDATE;
3429                 WREG32(SCRATCH_UMSK, 0);
3430         }
3431
3432         mdelay(1);
3433         WREG32(CP_RB0_CNTL, tmp);
3434
3435         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3436
3437         /* ring1  - compute only */
3438         /* Set ring buffer size */
3439         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3440         rb_bufsz = order_base_2(ring->ring_size / 8);
3441         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3442 #ifdef __BIG_ENDIAN
3443         tmp |= BUF_SWAP_32BIT;
3444 #endif
3445         WREG32(CP_RB1_CNTL, tmp);
3446
3447         /* Initialize the ring buffer's read and write pointers */
3448         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3449         ring->wptr = 0;
3450         WREG32(CP_RB1_WPTR, ring->wptr);
3451
3452         /* set the wb address whether it's enabled or not */
3453         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3454         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3455
3456         mdelay(1);
3457         WREG32(CP_RB1_CNTL, tmp);
3458
3459         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3460
3461         /* ring2 - compute only */
3462         /* Set ring buffer size */
3463         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3464         rb_bufsz = order_base_2(ring->ring_size / 8);
3465         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3466 #ifdef __BIG_ENDIAN
3467         tmp |= BUF_SWAP_32BIT;
3468 #endif
3469         WREG32(CP_RB2_CNTL, tmp);
3470
3471         /* Initialize the ring buffer's read and write pointers */
3472         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3473         ring->wptr = 0;
3474         WREG32(CP_RB2_WPTR, ring->wptr);
3475
3476         /* set the wb address whether it's enabled or not */
3477         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3478         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3479
3480         mdelay(1);
3481         WREG32(CP_RB2_CNTL, tmp);
3482
3483         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3484
3485         /* start the rings */
3486         si_cp_start(rdev);
3487         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3488         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3489         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3490         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3491         if (r) {
3492                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3493                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3494                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3495                 return r;
3496         }
3497         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3498         if (r) {
3499                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3500         }
3501         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3502         if (r) {
3503                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3504         }
3505
3506         si_enable_gui_idle_interrupt(rdev, true);
3507
3508         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3509                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3510
3511         return 0;
3512 }
3513
3514 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3515 {
3516         u32 reset_mask = 0;
3517         u32 tmp;
3518
3519         /* GRBM_STATUS */
3520         tmp = RREG32(GRBM_STATUS);
3521         if (tmp & (PA_BUSY | SC_BUSY |
3522                    BCI_BUSY | SX_BUSY |
3523                    TA_BUSY | VGT_BUSY |
3524                    DB_BUSY | CB_BUSY |
3525                    GDS_BUSY | SPI_BUSY |
3526                    IA_BUSY | IA_BUSY_NO_DMA))
3527                 reset_mask |= RADEON_RESET_GFX;
3528
3529         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3530                    CP_BUSY | CP_COHERENCY_BUSY))
3531                 reset_mask |= RADEON_RESET_CP;
3532
3533         if (tmp & GRBM_EE_BUSY)
3534                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3535
3536         /* GRBM_STATUS2 */
3537         tmp = RREG32(GRBM_STATUS2);
3538         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3539                 reset_mask |= RADEON_RESET_RLC;
3540
3541         /* DMA_STATUS_REG 0 */
3542         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3543         if (!(tmp & DMA_IDLE))
3544                 reset_mask |= RADEON_RESET_DMA;
3545
3546         /* DMA_STATUS_REG 1 */
3547         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3548         if (!(tmp & DMA_IDLE))
3549                 reset_mask |= RADEON_RESET_DMA1;
3550
3551         /* SRBM_STATUS2 */
3552         tmp = RREG32(SRBM_STATUS2);
3553         if (tmp & DMA_BUSY)
3554                 reset_mask |= RADEON_RESET_DMA;
3555
3556         if (tmp & DMA1_BUSY)
3557                 reset_mask |= RADEON_RESET_DMA1;
3558
3559         /* SRBM_STATUS */
3560         tmp = RREG32(SRBM_STATUS);
3561
3562         if (tmp & IH_BUSY)
3563                 reset_mask |= RADEON_RESET_IH;
3564
3565         if (tmp & SEM_BUSY)
3566                 reset_mask |= RADEON_RESET_SEM;
3567
3568         if (tmp & GRBM_RQ_PENDING)
3569                 reset_mask |= RADEON_RESET_GRBM;
3570
3571         if (tmp & VMC_BUSY)
3572                 reset_mask |= RADEON_RESET_VMC;
3573
3574         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3575                    MCC_BUSY | MCD_BUSY))
3576                 reset_mask |= RADEON_RESET_MC;
3577
3578         if (evergreen_is_display_hung(rdev))
3579                 reset_mask |= RADEON_RESET_DISPLAY;
3580
3581         /* VM_L2_STATUS */
3582         tmp = RREG32(VM_L2_STATUS);
3583         if (tmp & L2_BUSY)
3584                 reset_mask |= RADEON_RESET_VMC;
3585
3586         /* Skip MC reset as it's mostly likely not hung, just busy */
3587         if (reset_mask & RADEON_RESET_MC) {
3588                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3589                 reset_mask &= ~RADEON_RESET_MC;
3590         }
3591
3592         return reset_mask;
3593 }
3594
3595 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3596 {
3597         struct evergreen_mc_save save;
3598         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3599         u32 tmp;
3600
3601         if (reset_mask == 0)
3602                 return;
3603
3604         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3605
3606         evergreen_print_gpu_status_regs(rdev);
3607         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3608                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3609         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3610                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3611
3612         /* disable PG/CG */
3613         si_fini_pg(rdev);
3614         si_fini_cg(rdev);
3615
3616         /* stop the rlc */
3617         si_rlc_stop(rdev);
3618
3619         /* Disable CP parsing/prefetching */
3620         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3621
3622         if (reset_mask & RADEON_RESET_DMA) {
3623                 /* dma0 */
3624                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3625                 tmp &= ~DMA_RB_ENABLE;
3626                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3627         }
3628         if (reset_mask & RADEON_RESET_DMA1) {
3629                 /* dma1 */
3630                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3631                 tmp &= ~DMA_RB_ENABLE;
3632                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3633         }
3634
3635         udelay(50);
3636
3637         evergreen_mc_stop(rdev, &save);
3638         if (evergreen_mc_wait_for_idle(rdev)) {
3639                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3640         }
3641
3642         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3643                 grbm_soft_reset = SOFT_RESET_CB |
3644                         SOFT_RESET_DB |
3645                         SOFT_RESET_GDS |
3646                         SOFT_RESET_PA |
3647                         SOFT_RESET_SC |
3648                         SOFT_RESET_BCI |
3649                         SOFT_RESET_SPI |
3650                         SOFT_RESET_SX |
3651                         SOFT_RESET_TC |
3652                         SOFT_RESET_TA |
3653                         SOFT_RESET_VGT |
3654                         SOFT_RESET_IA;
3655         }
3656
3657         if (reset_mask & RADEON_RESET_CP) {
3658                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3659
3660                 srbm_soft_reset |= SOFT_RESET_GRBM;
3661         }
3662
3663         if (reset_mask & RADEON_RESET_DMA)
3664                 srbm_soft_reset |= SOFT_RESET_DMA;
3665
3666         if (reset_mask & RADEON_RESET_DMA1)
3667                 srbm_soft_reset |= SOFT_RESET_DMA1;
3668
3669         if (reset_mask & RADEON_RESET_DISPLAY)
3670                 srbm_soft_reset |= SOFT_RESET_DC;
3671
3672         if (reset_mask & RADEON_RESET_RLC)
3673                 grbm_soft_reset |= SOFT_RESET_RLC;
3674
3675         if (reset_mask & RADEON_RESET_SEM)
3676                 srbm_soft_reset |= SOFT_RESET_SEM;
3677
3678         if (reset_mask & RADEON_RESET_IH)
3679                 srbm_soft_reset |= SOFT_RESET_IH;
3680
3681         if (reset_mask & RADEON_RESET_GRBM)
3682                 srbm_soft_reset |= SOFT_RESET_GRBM;
3683
3684         if (reset_mask & RADEON_RESET_VMC)
3685                 srbm_soft_reset |= SOFT_RESET_VMC;
3686
3687         if (reset_mask & RADEON_RESET_MC)
3688                 srbm_soft_reset |= SOFT_RESET_MC;
3689
3690         if (grbm_soft_reset) {
3691                 tmp = RREG32(GRBM_SOFT_RESET);
3692                 tmp |= grbm_soft_reset;
3693                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3694                 WREG32(GRBM_SOFT_RESET, tmp);
3695                 tmp = RREG32(GRBM_SOFT_RESET);
3696
3697                 udelay(50);
3698
3699                 tmp &= ~grbm_soft_reset;
3700                 WREG32(GRBM_SOFT_RESET, tmp);
3701                 tmp = RREG32(GRBM_SOFT_RESET);
3702         }
3703
3704         if (srbm_soft_reset) {
3705                 tmp = RREG32(SRBM_SOFT_RESET);
3706                 tmp |= srbm_soft_reset;
3707                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3708                 WREG32(SRBM_SOFT_RESET, tmp);
3709                 tmp = RREG32(SRBM_SOFT_RESET);
3710
3711                 udelay(50);
3712
3713                 tmp &= ~srbm_soft_reset;
3714                 WREG32(SRBM_SOFT_RESET, tmp);
3715                 tmp = RREG32(SRBM_SOFT_RESET);
3716         }
3717
3718         /* Wait a little for things to settle down */
3719         udelay(50);
3720
3721         evergreen_mc_resume(rdev, &save);
3722         udelay(50);
3723
3724         evergreen_print_gpu_status_regs(rdev);
3725 }
3726
3727 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3728 {
3729         u32 tmp, i;
3730
3731         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3732         tmp |= SPLL_BYPASS_EN;
3733         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3734
3735         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3736         tmp |= SPLL_CTLREQ_CHG;
3737         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3738
3739         for (i = 0; i < rdev->usec_timeout; i++) {
3740                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3741                         break;
3742                 udelay(1);
3743         }
3744
3745         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3746         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3747         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3748
3749         tmp = RREG32(MPLL_CNTL_MODE);
3750         tmp &= ~MPLL_MCLK_SEL;
3751         WREG32(MPLL_CNTL_MODE, tmp);
3752 }
3753
3754 static void si_spll_powerdown(struct radeon_device *rdev)
3755 {
3756         u32 tmp;
3757
3758         tmp = RREG32(SPLL_CNTL_MODE);
3759         tmp |= SPLL_SW_DIR_CONTROL;
3760         WREG32(SPLL_CNTL_MODE, tmp);
3761
3762         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3763         tmp |= SPLL_RESET;
3764         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3765
3766         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3767         tmp |= SPLL_SLEEP;
3768         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3769
3770         tmp = RREG32(SPLL_CNTL_MODE);
3771         tmp &= ~SPLL_SW_DIR_CONTROL;
3772         WREG32(SPLL_CNTL_MODE, tmp);
3773 }
3774
3775 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3776 {
3777         struct evergreen_mc_save save;
3778         u32 tmp, i;
3779
3780         dev_info(rdev->dev, "GPU pci config reset\n");
3781
3782         /* disable dpm? */
3783
3784         /* disable cg/pg */
3785         si_fini_pg(rdev);
3786         si_fini_cg(rdev);
3787
3788         /* Disable CP parsing/prefetching */
3789         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3790         /* dma0 */
3791         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3792         tmp &= ~DMA_RB_ENABLE;
3793         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3794         /* dma1 */
3795         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3796         tmp &= ~DMA_RB_ENABLE;
3797         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3798         /* XXX other engines? */
3799
3800         /* halt the rlc, disable cp internal ints */
3801         si_rlc_stop(rdev);
3802
3803         udelay(50);
3804
3805         /* disable mem access */
3806         evergreen_mc_stop(rdev, &save);
3807         if (evergreen_mc_wait_for_idle(rdev)) {
3808                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3809         }
3810
3811         /* set mclk/sclk to bypass */
3812         si_set_clk_bypass_mode(rdev);
3813         /* powerdown spll */
3814         si_spll_powerdown(rdev);
3815         /* disable BM */
3816         pci_clear_master(rdev->pdev);
3817         /* reset */
3818         radeon_pci_config_reset(rdev);
3819         /* wait for asic to come out of reset */
3820         for (i = 0; i < rdev->usec_timeout; i++) {
3821                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3822                         break;
3823                 udelay(1);
3824         }
3825 }
3826
3827 int si_asic_reset(struct radeon_device *rdev)
3828 {
3829         u32 reset_mask;
3830
3831         reset_mask = si_gpu_check_soft_reset(rdev);
3832
3833         if (reset_mask)
3834                 r600_set_bios_scratch_engine_hung(rdev, true);
3835
3836         /* try soft reset */
3837         si_gpu_soft_reset(rdev, reset_mask);
3838
3839         reset_mask = si_gpu_check_soft_reset(rdev);
3840
3841         /* try pci config reset */
3842         if (reset_mask && radeon_hard_reset)
3843                 si_gpu_pci_config_reset(rdev);
3844
3845         reset_mask = si_gpu_check_soft_reset(rdev);
3846
3847         if (!reset_mask)
3848                 r600_set_bios_scratch_engine_hung(rdev, false);
3849
3850         return 0;
3851 }
3852
3853 /**
3854  * si_gfx_is_lockup - Check if the GFX engine is locked up
3855  *
3856  * @rdev: radeon_device pointer
3857  * @ring: radeon_ring structure holding ring information
3858  *
3859  * Check if the GFX engine is locked up.
3860  * Returns true if the engine appears to be locked up, false if not.
3861  */
3862 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3863 {
3864         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3865
3866         if (!(reset_mask & (RADEON_RESET_GFX |
3867                             RADEON_RESET_COMPUTE |
3868                             RADEON_RESET_CP))) {
3869                 radeon_ring_lockup_update(rdev, ring);
3870                 return false;
3871         }
3872         return radeon_ring_test_lockup(rdev, ring);
3873 }
3874
3875 /* MC */
3876 static void si_mc_program(struct radeon_device *rdev)
3877 {
3878         struct evergreen_mc_save save;
3879         u32 tmp;
3880         int i, j;
3881
3882         /* Initialize HDP */
3883         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3884                 WREG32((0x2c14 + j), 0x00000000);
3885                 WREG32((0x2c18 + j), 0x00000000);
3886                 WREG32((0x2c1c + j), 0x00000000);
3887                 WREG32((0x2c20 + j), 0x00000000);
3888                 WREG32((0x2c24 + j), 0x00000000);
3889         }
3890         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3891
3892         evergreen_mc_stop(rdev, &save);
3893         if (radeon_mc_wait_for_idle(rdev)) {
3894                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3895         }
3896         if (!ASIC_IS_NODCE(rdev))
3897                 /* Lockout access through VGA aperture*/
3898                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3899         /* Update configuration */
3900         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3901                rdev->mc.vram_start >> 12);
3902         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3903                rdev->mc.vram_end >> 12);
3904         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3905                rdev->vram_scratch.gpu_addr >> 12);
3906         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3907         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3908         WREG32(MC_VM_FB_LOCATION, tmp);
3909         /* XXX double check these! */
3910         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3911         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3912         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3913         WREG32(MC_VM_AGP_BASE, 0);
3914         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3915         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3916         if (radeon_mc_wait_for_idle(rdev)) {
3917                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3918         }
3919         evergreen_mc_resume(rdev, &save);
3920         if (!ASIC_IS_NODCE(rdev)) {
3921                 /* we need to own VRAM, so turn off the VGA renderer here
3922                  * to stop it overwriting our objects */
3923                 rv515_vga_render_disable(rdev);
3924         }
3925 }
3926
3927 void si_vram_gtt_location(struct radeon_device *rdev,
3928                           struct radeon_mc *mc)
3929 {
3930         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3931                 /* leave room for at least 1024M GTT */
3932                 dev_warn(rdev->dev, "limiting VRAM\n");
3933                 mc->real_vram_size = 0xFFC0000000ULL;
3934                 mc->mc_vram_size = 0xFFC0000000ULL;
3935         }
3936         radeon_vram_location(rdev, &rdev->mc, 0);
3937         rdev->mc.gtt_base_align = 0;
3938         radeon_gtt_location(rdev, mc);
3939 }
3940
3941 static int si_mc_init(struct radeon_device *rdev)
3942 {
3943         u32 tmp;
3944         int chansize, numchan;
3945
3946         /* Get VRAM informations */
3947         rdev->mc.vram_is_ddr = true;
3948         tmp = RREG32(MC_ARB_RAMCFG);
3949         if (tmp & CHANSIZE_OVERRIDE) {
3950                 chansize = 16;
3951         } else if (tmp & CHANSIZE_MASK) {
3952                 chansize = 64;
3953         } else {
3954                 chansize = 32;
3955         }
3956         tmp = RREG32(MC_SHARED_CHMAP);
3957         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3958         case 0:
3959         default:
3960                 numchan = 1;
3961                 break;
3962         case 1:
3963                 numchan = 2;
3964                 break;
3965         case 2:
3966                 numchan = 4;
3967                 break;
3968         case 3:
3969                 numchan = 8;
3970                 break;
3971         case 4:
3972                 numchan = 3;
3973                 break;
3974         case 5:
3975                 numchan = 6;
3976                 break;
3977         case 6:
3978                 numchan = 10;
3979                 break;
3980         case 7:
3981                 numchan = 12;
3982                 break;
3983         case 8:
3984                 numchan = 16;
3985                 break;
3986         }
3987         rdev->mc.vram_width = numchan * chansize;
3988         /* Could aper size report 0 ? */
3989         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3990         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3991         /* size in MB on si */
3992         tmp = RREG32(CONFIG_MEMSIZE);
3993         /* some boards may have garbage in the upper 16 bits */
3994         if (tmp & 0xffff0000) {
3995                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
3996                 if (tmp & 0xffff)
3997                         tmp &= 0xffff;
3998         }
3999         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4000         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4001         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4002         si_vram_gtt_location(rdev, &rdev->mc);
4003         radeon_update_bandwidth_info(rdev);
4004
4005         return 0;
4006 }
4007
4008 /*
4009  * GART
4010  */
4011 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4012 {
4013         /* flush hdp cache */
4014         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4015
4016         /* bits 0-15 are the VM contexts0-15 */
4017         WREG32(VM_INVALIDATE_REQUEST, 1);
4018 }
4019
4020 static int si_pcie_gart_enable(struct radeon_device *rdev)
4021 {
4022         int r, i;
4023
4024         if (rdev->gart.robj == NULL) {
4025                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4026                 return -EINVAL;
4027         }
4028         r = radeon_gart_table_vram_pin(rdev);
4029         if (r)
4030                 return r;
4031         radeon_gart_restore(rdev);
4032         /* Setup TLB control */
4033         WREG32(MC_VM_MX_L1_TLB_CNTL,
4034                (0xA << 7) |
4035                ENABLE_L1_TLB |
4036                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4037                ENABLE_ADVANCED_DRIVER_MODEL |
4038                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4039         /* Setup L2 cache */
4040         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4041                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4042                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4043                EFFECTIVE_L2_QUEUE_SIZE(7) |
4044                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4045         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4046         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4047                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4048         /* setup context0 */
4049         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4050         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4051         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4052         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4053                         (u32)(rdev->dummy_page.addr >> 12));
4054         WREG32(VM_CONTEXT0_CNTL2, 0);
4055         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4056                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4057
4058         WREG32(0x15D4, 0);
4059         WREG32(0x15D8, 0);
4060         WREG32(0x15DC, 0);
4061
4062         /* empty context1-15 */
4063         /* set vm size, must be a multiple of 4 */
4064         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4065         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4066         /* Assign the pt base to something valid for now; the pts used for
4067          * the VMs are determined by the application and setup and assigned
4068          * on the fly in the vm part of radeon_gart.c
4069          */
4070         for (i = 1; i < 16; i++) {
4071                 if (i < 8)
4072                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4073                                rdev->gart.table_addr >> 12);
4074                 else
4075                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4076                                rdev->gart.table_addr >> 12);
4077         }
4078
4079         /* enable context1-15 */
4080         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4081                (u32)(rdev->dummy_page.addr >> 12));
4082         WREG32(VM_CONTEXT1_CNTL2, 4);
4083         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4084                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4085                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4086                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4087                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4088                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4089                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4090                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4091                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4092                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4093                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4094                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4095                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4096
4097         si_pcie_gart_tlb_flush(rdev);
4098         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4099                  (unsigned)(rdev->mc.gtt_size >> 20),
4100                  (unsigned long long)rdev->gart.table_addr);
4101         rdev->gart.ready = true;
4102         return 0;
4103 }
4104
4105 static void si_pcie_gart_disable(struct radeon_device *rdev)
4106 {
4107         /* Disable all tables */
4108         WREG32(VM_CONTEXT0_CNTL, 0);
4109         WREG32(VM_CONTEXT1_CNTL, 0);
4110         /* Setup TLB control */
4111         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4112                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4113         /* Setup L2 cache */
4114         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4115                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4116                EFFECTIVE_L2_QUEUE_SIZE(7) |
4117                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4118         WREG32(VM_L2_CNTL2, 0);
4119         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4120                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4121         radeon_gart_table_vram_unpin(rdev);
4122 }
4123
4124 static void si_pcie_gart_fini(struct radeon_device *rdev)
4125 {
4126         si_pcie_gart_disable(rdev);
4127         radeon_gart_table_vram_free(rdev);
4128         radeon_gart_fini(rdev);
4129 }
4130
4131 /* vm parser */
4132 static bool si_vm_reg_valid(u32 reg)
4133 {
4134         /* context regs are fine */
4135         if (reg >= 0x28000)
4136                 return true;
4137
4138         /* check config regs */
4139         switch (reg) {
4140         case GRBM_GFX_INDEX:
4141         case CP_STRMOUT_CNTL:
4142         case VGT_VTX_VECT_EJECT_REG:
4143         case VGT_CACHE_INVALIDATION:
4144         case VGT_ESGS_RING_SIZE:
4145         case VGT_GSVS_RING_SIZE:
4146         case VGT_GS_VERTEX_REUSE:
4147         case VGT_PRIMITIVE_TYPE:
4148         case VGT_INDEX_TYPE:
4149         case VGT_NUM_INDICES:
4150         case VGT_NUM_INSTANCES:
4151         case VGT_TF_RING_SIZE:
4152         case VGT_HS_OFFCHIP_PARAM:
4153         case VGT_TF_MEMORY_BASE:
4154         case PA_CL_ENHANCE:
4155         case PA_SU_LINE_STIPPLE_VALUE:
4156         case PA_SC_LINE_STIPPLE_STATE:
4157         case PA_SC_ENHANCE:
4158         case SQC_CACHES:
4159         case SPI_STATIC_THREAD_MGMT_1:
4160         case SPI_STATIC_THREAD_MGMT_2:
4161         case SPI_STATIC_THREAD_MGMT_3:
4162         case SPI_PS_MAX_WAVE_ID:
4163         case SPI_CONFIG_CNTL:
4164         case SPI_CONFIG_CNTL_1:
4165         case TA_CNTL_AUX:
4166                 return true;
4167         default:
4168                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4169                 return false;
4170         }
4171 }
4172
4173 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4174                                   u32 *ib, struct radeon_cs_packet *pkt)
4175 {
4176         switch (pkt->opcode) {
4177         case PACKET3_NOP:
4178         case PACKET3_SET_BASE:
4179         case PACKET3_SET_CE_DE_COUNTERS:
4180         case PACKET3_LOAD_CONST_RAM:
4181         case PACKET3_WRITE_CONST_RAM:
4182         case PACKET3_WRITE_CONST_RAM_OFFSET:
4183         case PACKET3_DUMP_CONST_RAM:
4184         case PACKET3_INCREMENT_CE_COUNTER:
4185         case PACKET3_WAIT_ON_DE_COUNTER:
4186         case PACKET3_CE_WRITE:
4187                 break;
4188         default:
4189                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4190                 return -EINVAL;
4191         }
4192         return 0;
4193 }
4194
4195 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4196 {
4197         u32 start_reg, reg, i;
4198         u32 command = ib[idx + 4];
4199         u32 info = ib[idx + 1];
4200         u32 idx_value = ib[idx];
4201         if (command & PACKET3_CP_DMA_CMD_SAS) {
4202                 /* src address space is register */
4203                 if (((info & 0x60000000) >> 29) == 0) {
4204                         start_reg = idx_value << 2;
4205                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4206                                 reg = start_reg;
4207                                 if (!si_vm_reg_valid(reg)) {
4208                                         DRM_ERROR("CP DMA Bad SRC register\n");
4209                                         return -EINVAL;
4210                                 }
4211                         } else {
4212                                 for (i = 0; i < (command & 0x1fffff); i++) {
4213                                         reg = start_reg + (4 * i);
4214                                         if (!si_vm_reg_valid(reg)) {
4215                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4216                                                 return -EINVAL;
4217                                         }
4218                                 }
4219                         }
4220                 }
4221         }
4222         if (command & PACKET3_CP_DMA_CMD_DAS) {
4223                 /* dst address space is register */
4224                 if (((info & 0x00300000) >> 20) == 0) {
4225                         start_reg = ib[idx + 2];
4226                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4227                                 reg = start_reg;
4228                                 if (!si_vm_reg_valid(reg)) {
4229                                         DRM_ERROR("CP DMA Bad DST register\n");
4230                                         return -EINVAL;
4231                                 }
4232                         } else {
4233                                 for (i = 0; i < (command & 0x1fffff); i++) {
4234                                         reg = start_reg + (4 * i);
4235                                 if (!si_vm_reg_valid(reg)) {
4236                                                 DRM_ERROR("CP DMA Bad DST register\n");
4237                                                 return -EINVAL;
4238                                         }
4239                                 }
4240                         }
4241                 }
4242         }
4243         return 0;
4244 }
4245
4246 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4247                                    u32 *ib, struct radeon_cs_packet *pkt)
4248 {
4249         int r;
4250         u32 idx = pkt->idx + 1;
4251         u32 idx_value = ib[idx];
4252         u32 start_reg, end_reg, reg, i;
4253
4254         switch (pkt->opcode) {
4255         case PACKET3_NOP:
4256         case PACKET3_SET_BASE:
4257         case PACKET3_CLEAR_STATE:
4258         case PACKET3_INDEX_BUFFER_SIZE:
4259         case PACKET3_DISPATCH_DIRECT:
4260         case PACKET3_DISPATCH_INDIRECT:
4261         case PACKET3_ALLOC_GDS:
4262         case PACKET3_WRITE_GDS_RAM:
4263         case PACKET3_ATOMIC_GDS:
4264         case PACKET3_ATOMIC:
4265         case PACKET3_OCCLUSION_QUERY:
4266         case PACKET3_SET_PREDICATION:
4267         case PACKET3_COND_EXEC:
4268         case PACKET3_PRED_EXEC:
4269         case PACKET3_DRAW_INDIRECT:
4270         case PACKET3_DRAW_INDEX_INDIRECT:
4271         case PACKET3_INDEX_BASE:
4272         case PACKET3_DRAW_INDEX_2:
4273         case PACKET3_CONTEXT_CONTROL:
4274         case PACKET3_INDEX_TYPE:
4275         case PACKET3_DRAW_INDIRECT_MULTI:
4276         case PACKET3_DRAW_INDEX_AUTO:
4277         case PACKET3_DRAW_INDEX_IMMD:
4278         case PACKET3_NUM_INSTANCES:
4279         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4280         case PACKET3_STRMOUT_BUFFER_UPDATE:
4281         case PACKET3_DRAW_INDEX_OFFSET_2:
4282         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4283         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4284         case PACKET3_MPEG_INDEX:
4285         case PACKET3_WAIT_REG_MEM:
4286         case PACKET3_MEM_WRITE:
4287         case PACKET3_PFP_SYNC_ME:
4288         case PACKET3_SURFACE_SYNC:
4289         case PACKET3_EVENT_WRITE:
4290         case PACKET3_EVENT_WRITE_EOP:
4291         case PACKET3_EVENT_WRITE_EOS:
4292         case PACKET3_SET_CONTEXT_REG:
4293         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4294         case PACKET3_SET_SH_REG:
4295         case PACKET3_SET_SH_REG_OFFSET:
4296         case PACKET3_INCREMENT_DE_COUNTER:
4297         case PACKET3_WAIT_ON_CE_COUNTER:
4298         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4299         case PACKET3_ME_WRITE:
4300                 break;
4301         case PACKET3_COPY_DATA:
4302                 if ((idx_value & 0xf00) == 0) {
4303                         reg = ib[idx + 3] * 4;
4304                         if (!si_vm_reg_valid(reg))
4305                                 return -EINVAL;
4306                 }
4307                 break;
4308         case PACKET3_WRITE_DATA:
4309                 if ((idx_value & 0xf00) == 0) {
4310                         start_reg = ib[idx + 1] * 4;
4311                         if (idx_value & 0x10000) {
4312                                 if (!si_vm_reg_valid(start_reg))
4313                                         return -EINVAL;
4314                         } else {
4315                                 for (i = 0; i < (pkt->count - 2); i++) {
4316                                         reg = start_reg + (4 * i);
4317                                         if (!si_vm_reg_valid(reg))
4318                                                 return -EINVAL;
4319                                 }
4320                         }
4321                 }
4322                 break;
4323         case PACKET3_COND_WRITE:
4324                 if (idx_value & 0x100) {
4325                         reg = ib[idx + 5] * 4;
4326                         if (!si_vm_reg_valid(reg))
4327                                 return -EINVAL;
4328                 }
4329                 break;
4330         case PACKET3_COPY_DW:
4331                 if (idx_value & 0x2) {
4332                         reg = ib[idx + 3] * 4;
4333                         if (!si_vm_reg_valid(reg))
4334                                 return -EINVAL;
4335                 }
4336                 break;
4337         case PACKET3_SET_CONFIG_REG:
4338                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4339                 end_reg = 4 * pkt->count + start_reg - 4;
4340                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4341                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4342                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4343                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4344                         return -EINVAL;
4345                 }
4346                 for (i = 0; i < pkt->count; i++) {
4347                         reg = start_reg + (4 * i);
4348                         if (!si_vm_reg_valid(reg))
4349                                 return -EINVAL;
4350                 }
4351                 break;
4352         case PACKET3_CP_DMA:
4353                 r = si_vm_packet3_cp_dma_check(ib, idx);
4354                 if (r)
4355                         return r;
4356                 break;
4357         default:
4358                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4359                 return -EINVAL;
4360         }
4361         return 0;
4362 }
4363
4364 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4365                                        u32 *ib, struct radeon_cs_packet *pkt)
4366 {
4367         int r;
4368         u32 idx = pkt->idx + 1;
4369         u32 idx_value = ib[idx];
4370         u32 start_reg, reg, i;
4371
4372         switch (pkt->opcode) {
4373         case PACKET3_NOP:
4374         case PACKET3_SET_BASE:
4375         case PACKET3_CLEAR_STATE:
4376         case PACKET3_DISPATCH_DIRECT:
4377         case PACKET3_DISPATCH_INDIRECT:
4378         case PACKET3_ALLOC_GDS:
4379         case PACKET3_WRITE_GDS_RAM:
4380         case PACKET3_ATOMIC_GDS:
4381         case PACKET3_ATOMIC:
4382         case PACKET3_OCCLUSION_QUERY:
4383         case PACKET3_SET_PREDICATION:
4384         case PACKET3_COND_EXEC:
4385         case PACKET3_PRED_EXEC:
4386         case PACKET3_CONTEXT_CONTROL:
4387         case PACKET3_STRMOUT_BUFFER_UPDATE:
4388         case PACKET3_WAIT_REG_MEM:
4389         case PACKET3_MEM_WRITE:
4390         case PACKET3_PFP_SYNC_ME:
4391         case PACKET3_SURFACE_SYNC:
4392         case PACKET3_EVENT_WRITE:
4393         case PACKET3_EVENT_WRITE_EOP:
4394         case PACKET3_EVENT_WRITE_EOS:
4395         case PACKET3_SET_CONTEXT_REG:
4396         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4397         case PACKET3_SET_SH_REG:
4398         case PACKET3_SET_SH_REG_OFFSET:
4399         case PACKET3_INCREMENT_DE_COUNTER:
4400         case PACKET3_WAIT_ON_CE_COUNTER:
4401         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4402         case PACKET3_ME_WRITE:
4403                 break;
4404         case PACKET3_COPY_DATA:
4405                 if ((idx_value & 0xf00) == 0) {
4406                         reg = ib[idx + 3] * 4;
4407                         if (!si_vm_reg_valid(reg))
4408                                 return -EINVAL;
4409                 }
4410                 break;
4411         case PACKET3_WRITE_DATA:
4412                 if ((idx_value & 0xf00) == 0) {
4413                         start_reg = ib[idx + 1] * 4;
4414                         if (idx_value & 0x10000) {
4415                                 if (!si_vm_reg_valid(start_reg))
4416                                         return -EINVAL;
4417                         } else {
4418                                 for (i = 0; i < (pkt->count - 2); i++) {
4419                                         reg = start_reg + (4 * i);
4420                                         if (!si_vm_reg_valid(reg))
4421                                                 return -EINVAL;
4422                                 }
4423                         }
4424                 }
4425                 break;
4426         case PACKET3_COND_WRITE:
4427                 if (idx_value & 0x100) {
4428                         reg = ib[idx + 5] * 4;
4429                         if (!si_vm_reg_valid(reg))
4430                                 return -EINVAL;
4431                 }
4432                 break;
4433         case PACKET3_COPY_DW:
4434                 if (idx_value & 0x2) {
4435                         reg = ib[idx + 3] * 4;
4436                         if (!si_vm_reg_valid(reg))
4437                                 return -EINVAL;
4438                 }
4439                 break;
4440         case PACKET3_CP_DMA:
4441                 r = si_vm_packet3_cp_dma_check(ib, idx);
4442                 if (r)
4443                         return r;
4444                 break;
4445         default:
4446                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4447                 return -EINVAL;
4448         }
4449         return 0;
4450 }
4451
4452 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4453 {
4454         int ret = 0;
4455         u32 idx = 0;
4456         struct radeon_cs_packet pkt;
4457
4458         do {
4459                 pkt.idx = idx;
4460                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4461                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4462                 pkt.one_reg_wr = 0;
4463                 switch (pkt.type) {
4464                 case RADEON_PACKET_TYPE0:
4465                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4466                         ret = -EINVAL;
4467                         break;
4468                 case RADEON_PACKET_TYPE2:
4469                         idx += 1;
4470                         break;
4471                 case RADEON_PACKET_TYPE3:
4472                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4473                         if (ib->is_const_ib)
4474                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4475                         else {
4476                                 switch (ib->ring) {
4477                                 case RADEON_RING_TYPE_GFX_INDEX:
4478                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4479                                         break;
4480                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4481                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4482                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4483                                         break;
4484                                 default:
4485                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4486                                         ret = -EINVAL;
4487                                         break;
4488                                 }
4489                         }
4490                         idx += pkt.count + 2;
4491                         break;
4492                 default:
4493                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4494                         ret = -EINVAL;
4495                         break;
4496                 }
4497                 if (ret)
4498                         break;
4499         } while (idx < ib->length_dw);
4500
4501         return ret;
4502 }
4503
4504 /*
4505  * vm
4506  */
4507 int si_vm_init(struct radeon_device *rdev)
4508 {
4509         /* number of VMs */
4510         rdev->vm_manager.nvm = 16;
4511         /* base offset of vram pages */
4512         rdev->vm_manager.vram_base_offset = 0;
4513
4514         return 0;
4515 }
4516
4517 void si_vm_fini(struct radeon_device *rdev)
4518 {
4519 }
4520
4521 /**
4522  * si_vm_decode_fault - print human readable fault info
4523  *
4524  * @rdev: radeon_device pointer
4525  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4526  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4527  *
4528  * Print human readable fault information (SI).
4529  */
4530 static void si_vm_decode_fault(struct radeon_device *rdev,
4531                                u32 status, u32 addr)
4532 {
4533         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4534         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4535         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4536         char *block;
4537
4538         if (rdev->family == CHIP_TAHITI) {
4539                 switch (mc_id) {
4540                 case 160:
4541                 case 144:
4542                 case 96:
4543                 case 80:
4544                 case 224:
4545                 case 208:
4546                 case 32:
4547                 case 16:
4548                         block = "CB";
4549                         break;
4550                 case 161:
4551                 case 145:
4552                 case 97:
4553                 case 81:
4554                 case 225:
4555                 case 209:
4556                 case 33:
4557                 case 17:
4558                         block = "CB_FMASK";
4559                         break;
4560                 case 162:
4561                 case 146:
4562                 case 98:
4563                 case 82:
4564                 case 226:
4565                 case 210:
4566                 case 34:
4567                 case 18:
4568                         block = "CB_CMASK";
4569                         break;
4570                 case 163:
4571                 case 147:
4572                 case 99:
4573                 case 83:
4574                 case 227:
4575                 case 211:
4576                 case 35:
4577                 case 19:
4578                         block = "CB_IMMED";
4579                         break;
4580                 case 164:
4581                 case 148:
4582                 case 100:
4583                 case 84:
4584                 case 228:
4585                 case 212:
4586                 case 36:
4587                 case 20:
4588                         block = "DB";
4589                         break;
4590                 case 165:
4591                 case 149:
4592                 case 101:
4593                 case 85:
4594                 case 229:
4595                 case 213:
4596                 case 37:
4597                 case 21:
4598                         block = "DB_HTILE";
4599                         break;
4600                 case 167:
4601                 case 151:
4602                 case 103:
4603                 case 87:
4604                 case 231:
4605                 case 215:
4606                 case 39:
4607                 case 23:
4608                         block = "DB_STEN";
4609                         break;
4610                 case 72:
4611                 case 68:
4612                 case 64:
4613                 case 8:
4614                 case 4:
4615                 case 0:
4616                 case 136:
4617                 case 132:
4618                 case 128:
4619                 case 200:
4620                 case 196:
4621                 case 192:
4622                         block = "TC";
4623                         break;
4624                 case 112:
4625                 case 48:
4626                         block = "CP";
4627                         break;
4628                 case 49:
4629                 case 177:
4630                 case 50:
4631                 case 178:
4632                         block = "SH";
4633                         break;
4634                 case 53:
4635                 case 190:
4636                         block = "VGT";
4637                         break;
4638                 case 117:
4639                         block = "IH";
4640                         break;
4641                 case 51:
4642                 case 115:
4643                         block = "RLC";
4644                         break;
4645                 case 119:
4646                 case 183:
4647                         block = "DMA0";
4648                         break;
4649                 case 61:
4650                         block = "DMA1";
4651                         break;
4652                 case 248:
4653                 case 120:
4654                         block = "HDP";
4655                         break;
4656                 default:
4657                         block = "unknown";
4658                         break;
4659                 }
4660         } else {
4661                 switch (mc_id) {
4662                 case 32:
4663                 case 16:
4664                 case 96:
4665                 case 80:
4666                 case 160:
4667                 case 144:
4668                 case 224:
4669                 case 208:
4670                         block = "CB";
4671                         break;
4672                 case 33:
4673                 case 17:
4674                 case 97:
4675                 case 81:
4676                 case 161:
4677                 case 145:
4678                 case 225:
4679                 case 209:
4680                         block = "CB_FMASK";
4681                         break;
4682                 case 34:
4683                 case 18:
4684                 case 98:
4685                 case 82:
4686                 case 162:
4687                 case 146:
4688                 case 226:
4689                 case 210:
4690                         block = "CB_CMASK";
4691                         break;
4692                 case 35:
4693                 case 19:
4694                 case 99:
4695                 case 83:
4696                 case 163:
4697                 case 147:
4698                 case 227:
4699                 case 211:
4700                         block = "CB_IMMED";
4701                         break;
4702                 case 36:
4703                 case 20:
4704                 case 100:
4705                 case 84:
4706                 case 164:
4707                 case 148:
4708                 case 228:
4709                 case 212:
4710                         block = "DB";
4711                         break;
4712                 case 37:
4713                 case 21:
4714                 case 101:
4715                 case 85:
4716                 case 165:
4717                 case 149:
4718                 case 229:
4719                 case 213:
4720                         block = "DB_HTILE";
4721                         break;
4722                 case 39:
4723                 case 23:
4724                 case 103:
4725                 case 87:
4726                 case 167:
4727                 case 151:
4728                 case 231:
4729                 case 215:
4730                         block = "DB_STEN";
4731                         break;
4732                 case 72:
4733                 case 68:
4734                 case 8:
4735                 case 4:
4736                 case 136:
4737                 case 132:
4738                 case 200:
4739                 case 196:
4740                         block = "TC";
4741                         break;
4742                 case 112:
4743                 case 48:
4744                         block = "CP";
4745                         break;
4746                 case 49:
4747                 case 177:
4748                 case 50:
4749                 case 178:
4750                         block = "SH";
4751                         break;
4752                 case 53:
4753                         block = "VGT";
4754                         break;
4755                 case 117:
4756                         block = "IH";
4757                         break;
4758                 case 51:
4759                 case 115:
4760                         block = "RLC";
4761                         break;
4762                 case 119:
4763                 case 183:
4764                         block = "DMA0";
4765                         break;
4766                 case 61:
4767                         block = "DMA1";
4768                         break;
4769                 case 248:
4770                 case 120:
4771                         block = "HDP";
4772                         break;
4773                 default:
4774                         block = "unknown";
4775                         break;
4776                 }
4777         }
4778
4779         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4780                protections, vmid, addr,
4781                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4782                block, mc_id);
4783 }
4784
4785 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4786 {
4787         struct radeon_ring *ring = &rdev->ring[ridx];
4788
4789         if (vm == NULL)
4790                 return;
4791
4792         /* write new base address */
4793         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795                                  WRITE_DATA_DST_SEL(0)));
4796
4797         if (vm->id < 8) {
4798                 radeon_ring_write(ring,
4799                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4800         } else {
4801                 radeon_ring_write(ring,
4802                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4803         }
4804         radeon_ring_write(ring, 0);
4805         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4806
4807         /* flush hdp cache */
4808         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4809         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4810                                  WRITE_DATA_DST_SEL(0)));
4811         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4812         radeon_ring_write(ring, 0);
4813         radeon_ring_write(ring, 0x1);
4814
4815         /* bits 0-15 are the VM contexts0-15 */
4816         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4817         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4818                                  WRITE_DATA_DST_SEL(0)));
4819         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4820         radeon_ring_write(ring, 0);
4821         radeon_ring_write(ring, 1 << vm->id);
4822
4823         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4824         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4825         radeon_ring_write(ring, 0x0);
4826 }
4827
4828 /*
4829  *  Power and clock gating
4830  */
4831 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4832 {
4833         int i;
4834
4835         for (i = 0; i < rdev->usec_timeout; i++) {
4836                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4837                         break;
4838                 udelay(1);
4839         }
4840
4841         for (i = 0; i < rdev->usec_timeout; i++) {
4842                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4843                         break;
4844                 udelay(1);
4845         }
4846 }
4847
4848 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4849                                          bool enable)
4850 {
4851         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4852         u32 mask;
4853         int i;
4854
4855         if (enable)
4856                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4857         else
4858                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4859         WREG32(CP_INT_CNTL_RING0, tmp);
4860
4861         if (!enable) {
4862                 /* read a gfx register */
4863                 tmp = RREG32(DB_DEPTH_INFO);
4864
4865                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4866                 for (i = 0; i < rdev->usec_timeout; i++) {
4867                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4868                                 break;
4869                         udelay(1);
4870                 }
4871         }
4872 }
4873
4874 static void si_set_uvd_dcm(struct radeon_device *rdev,
4875                            bool sw_mode)
4876 {
4877         u32 tmp, tmp2;
4878
4879         tmp = RREG32(UVD_CGC_CTRL);
4880         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4881         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4882
4883         if (sw_mode) {
4884                 tmp &= ~0x7ffff800;
4885                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4886         } else {
4887                 tmp |= 0x7ffff800;
4888                 tmp2 = 0;
4889         }
4890
4891         WREG32(UVD_CGC_CTRL, tmp);
4892         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4893 }
4894
4895 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4896 {
4897         bool hw_mode = true;
4898
4899         if (hw_mode) {
4900                 si_set_uvd_dcm(rdev, false);
4901         } else {
4902                 u32 tmp = RREG32(UVD_CGC_CTRL);
4903                 tmp &= ~DCM;
4904                 WREG32(UVD_CGC_CTRL, tmp);
4905         }
4906 }
4907
4908 static u32 si_halt_rlc(struct radeon_device *rdev)
4909 {
4910         u32 data, orig;
4911
4912         orig = data = RREG32(RLC_CNTL);
4913
4914         if (data & RLC_ENABLE) {
4915                 data &= ~RLC_ENABLE;
4916                 WREG32(RLC_CNTL, data);
4917
4918                 si_wait_for_rlc_serdes(rdev);
4919         }
4920
4921         return orig;
4922 }
4923
4924 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4925 {
4926         u32 tmp;
4927
4928         tmp = RREG32(RLC_CNTL);
4929         if (tmp != rlc)
4930                 WREG32(RLC_CNTL, rlc);
4931 }
4932
4933 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4934 {
4935         u32 data, orig;
4936
4937         orig = data = RREG32(DMA_PG);
4938         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4939                 data |= PG_CNTL_ENABLE;
4940         else
4941                 data &= ~PG_CNTL_ENABLE;
4942         if (orig != data)
4943                 WREG32(DMA_PG, data);
4944 }
4945
4946 static void si_init_dma_pg(struct radeon_device *rdev)
4947 {
4948         u32 tmp;
4949
4950         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4951         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4952
4953         for (tmp = 0; tmp < 5; tmp++)
4954                 WREG32(DMA_PGFSM_WRITE, 0);
4955 }
4956
4957 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4958                                bool enable)
4959 {
4960         u32 tmp;
4961
4962         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4963                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4964                 WREG32(RLC_TTOP_D, tmp);
4965
4966                 tmp = RREG32(RLC_PG_CNTL);
4967                 tmp |= GFX_PG_ENABLE;
4968                 WREG32(RLC_PG_CNTL, tmp);
4969
4970                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4971                 tmp |= AUTO_PG_EN;
4972                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4973         } else {
4974                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4975                 tmp &= ~AUTO_PG_EN;
4976                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4977
4978                 tmp = RREG32(DB_RENDER_CONTROL);
4979         }
4980 }
4981
4982 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4983 {
4984         u32 tmp;
4985
4986         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4987
4988         tmp = RREG32(RLC_PG_CNTL);
4989         tmp |= GFX_PG_SRC;
4990         WREG32(RLC_PG_CNTL, tmp);
4991
4992         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4993
4994         tmp = RREG32(RLC_AUTO_PG_CTRL);
4995
4996         tmp &= ~GRBM_REG_SGIT_MASK;
4997         tmp |= GRBM_REG_SGIT(0x700);
4998         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4999         WREG32(RLC_AUTO_PG_CTRL, tmp);
5000 }
5001
5002 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5003 {
5004         u32 mask = 0, tmp, tmp1;
5005         int i;
5006
5007         si_select_se_sh(rdev, se, sh);
5008         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5009         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5010         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5011
5012         tmp &= 0xffff0000;
5013
5014         tmp |= tmp1;
5015         tmp >>= 16;
5016
5017         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5018                 mask <<= 1;
5019                 mask |= 1;
5020         }
5021
5022         return (~tmp) & mask;
5023 }
5024
5025 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5026 {
5027         u32 i, j, k, active_cu_number = 0;
5028         u32 mask, counter, cu_bitmap;
5029         u32 tmp = 0;
5030
5031         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5032                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5033                         mask = 1;
5034                         cu_bitmap = 0;
5035                         counter  = 0;
5036                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5037                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5038                                         if (counter < 2)
5039                                                 cu_bitmap |= mask;
5040                                         counter++;
5041                                 }
5042                                 mask <<= 1;
5043                         }
5044
5045                         active_cu_number += counter;
5046                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5047                 }
5048         }
5049
5050         WREG32(RLC_PG_AO_CU_MASK, tmp);
5051
5052         tmp = RREG32(RLC_MAX_PG_CU);
5053         tmp &= ~MAX_PU_CU_MASK;
5054         tmp |= MAX_PU_CU(active_cu_number);
5055         WREG32(RLC_MAX_PG_CU, tmp);
5056 }
5057
5058 static void si_enable_cgcg(struct radeon_device *rdev,
5059                            bool enable)
5060 {
5061         u32 data, orig, tmp;
5062
5063         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5064
5065         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5066                 si_enable_gui_idle_interrupt(rdev, true);
5067
5068                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5069
5070                 tmp = si_halt_rlc(rdev);
5071
5072                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5073                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5074                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5075
5076                 si_wait_for_rlc_serdes(rdev);
5077
5078                 si_update_rlc(rdev, tmp);
5079
5080                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5081
5082                 data |= CGCG_EN | CGLS_EN;
5083         } else {
5084                 si_enable_gui_idle_interrupt(rdev, false);
5085
5086                 RREG32(CB_CGTT_SCLK_CTRL);
5087                 RREG32(CB_CGTT_SCLK_CTRL);
5088                 RREG32(CB_CGTT_SCLK_CTRL);
5089                 RREG32(CB_CGTT_SCLK_CTRL);
5090
5091                 data &= ~(CGCG_EN | CGLS_EN);
5092         }
5093
5094         if (orig != data)
5095                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5096 }
5097
5098 static void si_enable_mgcg(struct radeon_device *rdev,
5099                            bool enable)
5100 {
5101         u32 data, orig, tmp = 0;
5102
5103         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5104                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5105                 data = 0x96940200;
5106                 if (orig != data)
5107                         WREG32(CGTS_SM_CTRL_REG, data);
5108
5109                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5110                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5111                         data |= CP_MEM_LS_EN;
5112                         if (orig != data)
5113                                 WREG32(CP_MEM_SLP_CNTL, data);
5114                 }
5115
5116                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5117                 data &= 0xffffffc0;
5118                 if (orig != data)
5119                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5120
5121                 tmp = si_halt_rlc(rdev);
5122
5123                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5124                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5125                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5126
5127                 si_update_rlc(rdev, tmp);
5128         } else {
5129                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5130                 data |= 0x00000003;
5131                 if (orig != data)
5132                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5133
5134                 data = RREG32(CP_MEM_SLP_CNTL);
5135                 if (data & CP_MEM_LS_EN) {
5136                         data &= ~CP_MEM_LS_EN;
5137                         WREG32(CP_MEM_SLP_CNTL, data);
5138                 }
5139                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5140                 data |= LS_OVERRIDE | OVERRIDE;
5141                 if (orig != data)
5142                         WREG32(CGTS_SM_CTRL_REG, data);
5143
5144                 tmp = si_halt_rlc(rdev);
5145
5146                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5147                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5148                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5149
5150                 si_update_rlc(rdev, tmp);
5151         }
5152 }
5153
5154 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5155                                bool enable)
5156 {
5157         u32 orig, data, tmp;
5158
5159         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5160                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5161                 tmp |= 0x3fff;
5162                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5163
5164                 orig = data = RREG32(UVD_CGC_CTRL);
5165                 data |= DCM;
5166                 if (orig != data)
5167                         WREG32(UVD_CGC_CTRL, data);
5168
5169                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5170                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5171         } else {
5172                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5173                 tmp &= ~0x3fff;
5174                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5175
5176                 orig = data = RREG32(UVD_CGC_CTRL);
5177                 data &= ~DCM;
5178                 if (orig != data)
5179                         WREG32(UVD_CGC_CTRL, data);
5180
5181                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5182                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5183         }
5184 }
5185
5186 static const u32 mc_cg_registers[] =
5187 {
5188         MC_HUB_MISC_HUB_CG,
5189         MC_HUB_MISC_SIP_CG,
5190         MC_HUB_MISC_VM_CG,
5191         MC_XPB_CLK_GAT,
5192         ATC_MISC_CG,
5193         MC_CITF_MISC_WR_CG,
5194         MC_CITF_MISC_RD_CG,
5195         MC_CITF_MISC_VM_CG,
5196         VM_L2_CG,
5197 };
5198
5199 static void si_enable_mc_ls(struct radeon_device *rdev,
5200                             bool enable)
5201 {
5202         int i;
5203         u32 orig, data;
5204
5205         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5206                 orig = data = RREG32(mc_cg_registers[i]);
5207                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5208                         data |= MC_LS_ENABLE;
5209                 else
5210                         data &= ~MC_LS_ENABLE;
5211                 if (data != orig)
5212                         WREG32(mc_cg_registers[i], data);
5213         }
5214 }
5215
5216 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5217                                bool enable)
5218 {
5219         int i;
5220         u32 orig, data;
5221
5222         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5223                 orig = data = RREG32(mc_cg_registers[i]);
5224                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5225                         data |= MC_CG_ENABLE;
5226                 else
5227                         data &= ~MC_CG_ENABLE;
5228                 if (data != orig)
5229                         WREG32(mc_cg_registers[i], data);
5230         }
5231 }
5232
5233 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5234                                bool enable)
5235 {
5236         u32 orig, data, offset;
5237         int i;
5238
5239         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5240                 for (i = 0; i < 2; i++) {
5241                         if (i == 0)
5242                                 offset = DMA0_REGISTER_OFFSET;
5243                         else
5244                                 offset = DMA1_REGISTER_OFFSET;
5245                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5246                         data &= ~MEM_POWER_OVERRIDE;
5247                         if (data != orig)
5248                                 WREG32(DMA_POWER_CNTL + offset, data);
5249                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5250                 }
5251         } else {
5252                 for (i = 0; i < 2; i++) {
5253                         if (i == 0)
5254                                 offset = DMA0_REGISTER_OFFSET;
5255                         else
5256                                 offset = DMA1_REGISTER_OFFSET;
5257                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5258                         data |= MEM_POWER_OVERRIDE;
5259                         if (data != orig)
5260                                 WREG32(DMA_POWER_CNTL + offset, data);
5261
5262                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5263                         data = 0xff000000;
5264                         if (data != orig)
5265                                 WREG32(DMA_CLK_CTRL + offset, data);
5266                 }
5267         }
5268 }
5269
5270 static void si_enable_bif_mgls(struct radeon_device *rdev,
5271                                bool enable)
5272 {
5273         u32 orig, data;
5274
5275         orig = data = RREG32_PCIE(PCIE_CNTL2);
5276
5277         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5278                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5279                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5280         else
5281                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5282                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5283
5284         if (orig != data)
5285                 WREG32_PCIE(PCIE_CNTL2, data);
5286 }
5287
5288 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5289                                bool enable)
5290 {
5291         u32 orig, data;
5292
5293         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5294
5295         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5296                 data &= ~CLOCK_GATING_DIS;
5297         else
5298                 data |= CLOCK_GATING_DIS;
5299
5300         if (orig != data)
5301                 WREG32(HDP_HOST_PATH_CNTL, data);
5302 }
5303
5304 static void si_enable_hdp_ls(struct radeon_device *rdev,
5305                              bool enable)
5306 {
5307         u32 orig, data;
5308
5309         orig = data = RREG32(HDP_MEM_POWER_LS);
5310
5311         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5312                 data |= HDP_LS_ENABLE;
5313         else
5314                 data &= ~HDP_LS_ENABLE;
5315
5316         if (orig != data)
5317                 WREG32(HDP_MEM_POWER_LS, data);
5318 }
5319
5320 static void si_update_cg(struct radeon_device *rdev,
5321                          u32 block, bool enable)
5322 {
5323         if (block & RADEON_CG_BLOCK_GFX) {
5324                 si_enable_gui_idle_interrupt(rdev, false);
5325                 /* order matters! */
5326                 if (enable) {
5327                         si_enable_mgcg(rdev, true);
5328                         si_enable_cgcg(rdev, true);
5329                 } else {
5330                         si_enable_cgcg(rdev, false);
5331                         si_enable_mgcg(rdev, false);
5332                 }
5333                 si_enable_gui_idle_interrupt(rdev, true);
5334         }
5335
5336         if (block & RADEON_CG_BLOCK_MC) {
5337                 si_enable_mc_mgcg(rdev, enable);
5338                 si_enable_mc_ls(rdev, enable);
5339         }
5340
5341         if (block & RADEON_CG_BLOCK_SDMA) {
5342                 si_enable_dma_mgcg(rdev, enable);
5343         }
5344
5345         if (block & RADEON_CG_BLOCK_BIF) {
5346                 si_enable_bif_mgls(rdev, enable);
5347         }
5348
5349         if (block & RADEON_CG_BLOCK_UVD) {
5350                 if (rdev->has_uvd) {
5351                         si_enable_uvd_mgcg(rdev, enable);
5352                 }
5353         }
5354
5355         if (block & RADEON_CG_BLOCK_HDP) {
5356                 si_enable_hdp_mgcg(rdev, enable);
5357                 si_enable_hdp_ls(rdev, enable);
5358         }
5359 }
5360
5361 static void si_init_cg(struct radeon_device *rdev)
5362 {
5363         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5364                             RADEON_CG_BLOCK_MC |
5365                             RADEON_CG_BLOCK_SDMA |
5366                             RADEON_CG_BLOCK_BIF |
5367                             RADEON_CG_BLOCK_HDP), true);
5368         if (rdev->has_uvd) {
5369                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5370                 si_init_uvd_internal_cg(rdev);
5371         }
5372 }
5373
5374 static void si_fini_cg(struct radeon_device *rdev)
5375 {
5376         if (rdev->has_uvd) {
5377                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5378         }
5379         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5380                             RADEON_CG_BLOCK_MC |
5381                             RADEON_CG_BLOCK_SDMA |
5382                             RADEON_CG_BLOCK_BIF |
5383                             RADEON_CG_BLOCK_HDP), false);
5384 }
5385
5386 u32 si_get_csb_size(struct radeon_device *rdev)
5387 {
5388         u32 count = 0;
5389         const struct cs_section_def *sect = NULL;
5390         const struct cs_extent_def *ext = NULL;
5391
5392         if (rdev->rlc.cs_data == NULL)
5393                 return 0;
5394
5395         /* begin clear state */
5396         count += 2;
5397         /* context control state */
5398         count += 3;
5399
5400         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5401                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5402                         if (sect->id == SECT_CONTEXT)
5403                                 count += 2 + ext->reg_count;
5404                         else
5405                                 return 0;
5406                 }
5407         }
5408         /* pa_sc_raster_config */
5409         count += 3;
5410         /* end clear state */
5411         count += 2;
5412         /* clear state */
5413         count += 2;
5414
5415         return count;
5416 }
5417
5418 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5419 {
5420         u32 count = 0, i;
5421         const struct cs_section_def *sect = NULL;
5422         const struct cs_extent_def *ext = NULL;
5423
5424         if (rdev->rlc.cs_data == NULL)
5425                 return;
5426         if (buffer == NULL)
5427                 return;
5428
5429         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5430         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5431
5432         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5433         buffer[count++] = cpu_to_le32(0x80000000);
5434         buffer[count++] = cpu_to_le32(0x80000000);
5435
5436         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5437                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5438                         if (sect->id == SECT_CONTEXT) {
5439                                 buffer[count++] =
5440                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5441                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5442                                 for (i = 0; i < ext->reg_count; i++)
5443                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5444                         } else {
5445                                 return;
5446                         }
5447                 }
5448         }
5449
5450         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5451         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5452         switch (rdev->family) {
5453         case CHIP_TAHITI:
5454         case CHIP_PITCAIRN:
5455                 buffer[count++] = cpu_to_le32(0x2a00126a);
5456                 break;
5457         case CHIP_VERDE:
5458                 buffer[count++] = cpu_to_le32(0x0000124a);
5459                 break;
5460         case CHIP_OLAND:
5461                 buffer[count++] = cpu_to_le32(0x00000082);
5462                 break;
5463         case CHIP_HAINAN:
5464                 buffer[count++] = cpu_to_le32(0x00000000);
5465                 break;
5466         default:
5467                 buffer[count++] = cpu_to_le32(0x00000000);
5468                 break;
5469         }
5470
5471         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5472         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5473
5474         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5475         buffer[count++] = cpu_to_le32(0);
5476 }
5477
5478 static void si_init_pg(struct radeon_device *rdev)
5479 {
5480         if (rdev->pg_flags) {
5481                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5482                         si_init_dma_pg(rdev);
5483                 }
5484                 si_init_ao_cu_mask(rdev);
5485                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5486                         si_init_gfx_cgpg(rdev);
5487                 } else {
5488                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5489                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5490                 }
5491                 si_enable_dma_pg(rdev, true);
5492                 si_enable_gfx_cgpg(rdev, true);
5493         } else {
5494                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5495                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5496         }
5497 }
5498
5499 static void si_fini_pg(struct radeon_device *rdev)
5500 {
5501         if (rdev->pg_flags) {
5502                 si_enable_dma_pg(rdev, false);
5503                 si_enable_gfx_cgpg(rdev, false);
5504         }
5505 }
5506
5507 /*
5508  * RLC
5509  */
5510 void si_rlc_reset(struct radeon_device *rdev)
5511 {
5512         u32 tmp = RREG32(GRBM_SOFT_RESET);
5513
5514         tmp |= SOFT_RESET_RLC;
5515         WREG32(GRBM_SOFT_RESET, tmp);
5516         udelay(50);
5517         tmp &= ~SOFT_RESET_RLC;
5518         WREG32(GRBM_SOFT_RESET, tmp);
5519         udelay(50);
5520 }
5521
5522 static void si_rlc_stop(struct radeon_device *rdev)
5523 {
5524         WREG32(RLC_CNTL, 0);
5525
5526         si_enable_gui_idle_interrupt(rdev, false);
5527
5528         si_wait_for_rlc_serdes(rdev);
5529 }
5530
5531 static void si_rlc_start(struct radeon_device *rdev)
5532 {
5533         WREG32(RLC_CNTL, RLC_ENABLE);
5534
5535         si_enable_gui_idle_interrupt(rdev, true);
5536
5537         udelay(50);
5538 }
5539
5540 static bool si_lbpw_supported(struct radeon_device *rdev)
5541 {
5542         u32 tmp;
5543
5544         /* Enable LBPW only for DDR3 */
5545         tmp = RREG32(MC_SEQ_MISC0);
5546         if ((tmp & 0xF0000000) == 0xB0000000)
5547                 return true;
5548         return false;
5549 }
5550
5551 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5552 {
5553         u32 tmp;
5554
5555         tmp = RREG32(RLC_LB_CNTL);
5556         if (enable)
5557                 tmp |= LOAD_BALANCE_ENABLE;
5558         else
5559                 tmp &= ~LOAD_BALANCE_ENABLE;
5560         WREG32(RLC_LB_CNTL, tmp);
5561
5562         if (!enable) {
5563                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5564                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5565         }
5566 }
5567
5568 static int si_rlc_resume(struct radeon_device *rdev)
5569 {
5570         u32 i;
5571         const __be32 *fw_data;
5572
5573         if (!rdev->rlc_fw)
5574                 return -EINVAL;
5575
5576         si_rlc_stop(rdev);
5577
5578         si_rlc_reset(rdev);
5579
5580         si_init_pg(rdev);
5581
5582         si_init_cg(rdev);
5583
5584         WREG32(RLC_RL_BASE, 0);
5585         WREG32(RLC_RL_SIZE, 0);
5586         WREG32(RLC_LB_CNTL, 0);
5587         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5588         WREG32(RLC_LB_CNTR_INIT, 0);
5589         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5590
5591         WREG32(RLC_MC_CNTL, 0);
5592         WREG32(RLC_UCODE_CNTL, 0);
5593
5594         fw_data = (const __be32 *)rdev->rlc_fw->data;
5595         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5596                 WREG32(RLC_UCODE_ADDR, i);
5597                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5598         }
5599         WREG32(RLC_UCODE_ADDR, 0);
5600
5601         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5602
5603         si_rlc_start(rdev);
5604
5605         return 0;
5606 }
5607
5608 static void si_enable_interrupts(struct radeon_device *rdev)
5609 {
5610         u32 ih_cntl = RREG32(IH_CNTL);
5611         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5612
5613         ih_cntl |= ENABLE_INTR;
5614         ih_rb_cntl |= IH_RB_ENABLE;
5615         WREG32(IH_CNTL, ih_cntl);
5616         WREG32(IH_RB_CNTL, ih_rb_cntl);
5617         rdev->ih.enabled = true;
5618 }
5619
5620 static void si_disable_interrupts(struct radeon_device *rdev)
5621 {
5622         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5623         u32 ih_cntl = RREG32(IH_CNTL);
5624
5625         ih_rb_cntl &= ~IH_RB_ENABLE;
5626         ih_cntl &= ~ENABLE_INTR;
5627         WREG32(IH_RB_CNTL, ih_rb_cntl);
5628         WREG32(IH_CNTL, ih_cntl);
5629         /* set rptr, wptr to 0 */
5630         WREG32(IH_RB_RPTR, 0);
5631         WREG32(IH_RB_WPTR, 0);
5632         rdev->ih.enabled = false;
5633         rdev->ih.rptr = 0;
5634 }
5635
5636 static void si_disable_interrupt_state(struct radeon_device *rdev)
5637 {
5638         u32 tmp;
5639
5640         tmp = RREG32(CP_INT_CNTL_RING0) &
5641                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5642         WREG32(CP_INT_CNTL_RING0, tmp);
5643         WREG32(CP_INT_CNTL_RING1, 0);
5644         WREG32(CP_INT_CNTL_RING2, 0);
5645         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5646         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5647         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5648         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5649         WREG32(GRBM_INT_CNTL, 0);
5650         if (rdev->num_crtc >= 2) {
5651                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5652                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5653         }
5654         if (rdev->num_crtc >= 4) {
5655                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5656                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5657         }
5658         if (rdev->num_crtc >= 6) {
5659                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5660                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5661         }
5662
5663         if (rdev->num_crtc >= 2) {
5664                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5665                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5666         }
5667         if (rdev->num_crtc >= 4) {
5668                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5669                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5670         }
5671         if (rdev->num_crtc >= 6) {
5672                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5673                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5674         }
5675
5676         if (!ASIC_IS_NODCE(rdev)) {
5677                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5678
5679                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5680                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5681                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5682                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5683                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5684                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5685                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5686                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5687                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5688                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5689                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5690                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5691         }
5692 }
5693
5694 static int si_irq_init(struct radeon_device *rdev)
5695 {
5696         int ret = 0;
5697         int rb_bufsz;
5698         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5699
5700         /* allocate ring */
5701         ret = r600_ih_ring_alloc(rdev);
5702         if (ret)
5703                 return ret;
5704
5705         /* disable irqs */
5706         si_disable_interrupts(rdev);
5707
5708         /* init rlc */
5709         ret = si_rlc_resume(rdev);
5710         if (ret) {
5711                 r600_ih_ring_fini(rdev);
5712                 return ret;
5713         }
5714
5715         /* setup interrupt control */
5716         /* set dummy read address to ring address */
5717         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5718         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5719         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5720          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5721          */
5722         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5723         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5724         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5725         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5726
5727         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5728         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5729
5730         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5731                       IH_WPTR_OVERFLOW_CLEAR |
5732                       (rb_bufsz << 1));
5733
5734         if (rdev->wb.enabled)
5735                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5736
5737         /* set the writeback address whether it's enabled or not */
5738         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5739         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5740
5741         WREG32(IH_RB_CNTL, ih_rb_cntl);
5742
5743         /* set rptr, wptr to 0 */
5744         WREG32(IH_RB_RPTR, 0);
5745         WREG32(IH_RB_WPTR, 0);
5746
5747         /* Default settings for IH_CNTL (disabled at first) */
5748         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5749         /* RPTR_REARM only works if msi's are enabled */
5750         if (rdev->msi_enabled)
5751                 ih_cntl |= RPTR_REARM;
5752         WREG32(IH_CNTL, ih_cntl);
5753
5754         /* force the active interrupt state to all disabled */
5755         si_disable_interrupt_state(rdev);
5756
5757         pci_set_master(rdev->pdev);
5758
5759         /* enable irqs */
5760         si_enable_interrupts(rdev);
5761
5762         return ret;
5763 }
5764
5765 int si_irq_set(struct radeon_device *rdev)
5766 {
5767         u32 cp_int_cntl;
5768         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5769         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5770         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5771         u32 grbm_int_cntl = 0;
5772         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5773         u32 dma_cntl, dma_cntl1;
5774         u32 thermal_int = 0;
5775
5776         if (!rdev->irq.installed) {
5777                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5778                 return -EINVAL;
5779         }
5780         /* don't enable anything if the ih is disabled */
5781         if (!rdev->ih.enabled) {
5782                 si_disable_interrupts(rdev);
5783                 /* force the active interrupt state to all disabled */
5784                 si_disable_interrupt_state(rdev);
5785                 return 0;
5786         }
5787
5788         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5789                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5790
5791         if (!ASIC_IS_NODCE(rdev)) {
5792                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5793                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5794                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5795                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5796                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5797                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5798         }
5799
5800         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5801         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5802
5803         thermal_int = RREG32(CG_THERMAL_INT) &
5804                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5805
5806         /* enable CP interrupts on all rings */
5807         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5808                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5809                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5810         }
5811         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5812                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5813                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5814         }
5815         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5816                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5817                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5818         }
5819         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5820                 DRM_DEBUG("si_irq_set: sw int dma\n");
5821                 dma_cntl |= TRAP_ENABLE;
5822         }
5823
5824         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5825                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5826                 dma_cntl1 |= TRAP_ENABLE;
5827         }
5828         if (rdev->irq.crtc_vblank_int[0] ||
5829             atomic_read(&rdev->irq.pflip[0])) {
5830                 DRM_DEBUG("si_irq_set: vblank 0\n");
5831                 crtc1 |= VBLANK_INT_MASK;
5832         }
5833         if (rdev->irq.crtc_vblank_int[1] ||
5834             atomic_read(&rdev->irq.pflip[1])) {
5835                 DRM_DEBUG("si_irq_set: vblank 1\n");
5836                 crtc2 |= VBLANK_INT_MASK;
5837         }
5838         if (rdev->irq.crtc_vblank_int[2] ||
5839             atomic_read(&rdev->irq.pflip[2])) {
5840                 DRM_DEBUG("si_irq_set: vblank 2\n");
5841                 crtc3 |= VBLANK_INT_MASK;
5842         }
5843         if (rdev->irq.crtc_vblank_int[3] ||
5844             atomic_read(&rdev->irq.pflip[3])) {
5845                 DRM_DEBUG("si_irq_set: vblank 3\n");
5846                 crtc4 |= VBLANK_INT_MASK;
5847         }
5848         if (rdev->irq.crtc_vblank_int[4] ||
5849             atomic_read(&rdev->irq.pflip[4])) {
5850                 DRM_DEBUG("si_irq_set: vblank 4\n");
5851                 crtc5 |= VBLANK_INT_MASK;
5852         }
5853         if (rdev->irq.crtc_vblank_int[5] ||
5854             atomic_read(&rdev->irq.pflip[5])) {
5855                 DRM_DEBUG("si_irq_set: vblank 5\n");
5856                 crtc6 |= VBLANK_INT_MASK;
5857         }
5858         if (rdev->irq.hpd[0]) {
5859                 DRM_DEBUG("si_irq_set: hpd 1\n");
5860                 hpd1 |= DC_HPDx_INT_EN;
5861         }
5862         if (rdev->irq.hpd[1]) {
5863                 DRM_DEBUG("si_irq_set: hpd 2\n");
5864                 hpd2 |= DC_HPDx_INT_EN;
5865         }
5866         if (rdev->irq.hpd[2]) {
5867                 DRM_DEBUG("si_irq_set: hpd 3\n");
5868                 hpd3 |= DC_HPDx_INT_EN;
5869         }
5870         if (rdev->irq.hpd[3]) {
5871                 DRM_DEBUG("si_irq_set: hpd 4\n");
5872                 hpd4 |= DC_HPDx_INT_EN;
5873         }
5874         if (rdev->irq.hpd[4]) {
5875                 DRM_DEBUG("si_irq_set: hpd 5\n");
5876                 hpd5 |= DC_HPDx_INT_EN;
5877         }
5878         if (rdev->irq.hpd[5]) {
5879                 DRM_DEBUG("si_irq_set: hpd 6\n");
5880                 hpd6 |= DC_HPDx_INT_EN;
5881         }
5882
5883         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5884         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5885         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5886
5887         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5888         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5889
5890         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5891
5892         if (rdev->irq.dpm_thermal) {
5893                 DRM_DEBUG("dpm thermal\n");
5894                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5895         }
5896
5897         if (rdev->num_crtc >= 2) {
5898                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5899                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5900         }
5901         if (rdev->num_crtc >= 4) {
5902                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5903                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5904         }
5905         if (rdev->num_crtc >= 6) {
5906                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5907                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5908         }
5909
5910         if (rdev->num_crtc >= 2) {
5911                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5912                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5913         }
5914         if (rdev->num_crtc >= 4) {
5915                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5916                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5917         }
5918         if (rdev->num_crtc >= 6) {
5919                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5920                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5921         }
5922
5923         if (!ASIC_IS_NODCE(rdev)) {
5924                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5925                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5926                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5927                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5928                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5929                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5930         }
5931
5932         WREG32(CG_THERMAL_INT, thermal_int);
5933
5934         return 0;
5935 }
5936
5937 static inline void si_irq_ack(struct radeon_device *rdev)
5938 {
5939         u32 tmp;
5940
5941         if (ASIC_IS_NODCE(rdev))
5942                 return;
5943
5944         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5945         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5946         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5947         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5948         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5949         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5950         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5951         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5952         if (rdev->num_crtc >= 4) {
5953                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5954                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5955         }
5956         if (rdev->num_crtc >= 6) {
5957                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5958                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5959         }
5960
5961         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5962                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5963         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5964                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5965         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5966                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5967         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5968                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5969         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5970                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5971         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5972                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5973
5974         if (rdev->num_crtc >= 4) {
5975                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5976                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5977                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5978                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5979                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5980                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5981                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5982                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5983                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5984                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5985                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5986                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5987         }
5988
5989         if (rdev->num_crtc >= 6) {
5990                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5991                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5992                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5993                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5994                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5995                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5996                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5997                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5998                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5999                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6000                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6001                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6002         }
6003
6004         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6005                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6006                 tmp |= DC_HPDx_INT_ACK;
6007                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6008         }
6009         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6010                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6011                 tmp |= DC_HPDx_INT_ACK;
6012                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6013         }
6014         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6015                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6016                 tmp |= DC_HPDx_INT_ACK;
6017                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6018         }
6019         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6020                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6021                 tmp |= DC_HPDx_INT_ACK;
6022                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6023         }
6024         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6025                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6026                 tmp |= DC_HPDx_INT_ACK;
6027                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6028         }
6029         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6030                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6031                 tmp |= DC_HPDx_INT_ACK;
6032                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6033         }
6034 }
6035
6036 static void si_irq_disable(struct radeon_device *rdev)
6037 {
6038         si_disable_interrupts(rdev);
6039         /* Wait and acknowledge irq */
6040         mdelay(1);
6041         si_irq_ack(rdev);
6042         si_disable_interrupt_state(rdev);
6043 }
6044
6045 static void si_irq_suspend(struct radeon_device *rdev)
6046 {
6047         si_irq_disable(rdev);
6048         si_rlc_stop(rdev);
6049 }
6050
6051 static void si_irq_fini(struct radeon_device *rdev)
6052 {
6053         si_irq_suspend(rdev);
6054         r600_ih_ring_fini(rdev);
6055 }
6056
6057 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6058 {
6059         u32 wptr, tmp;
6060
6061         if (rdev->wb.enabled)
6062                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6063         else
6064                 wptr = RREG32(IH_RB_WPTR);
6065
6066         if (wptr & RB_OVERFLOW) {
6067                 /* When a ring buffer overflow happen start parsing interrupt
6068                  * from the last not overwritten vector (wptr + 16). Hopefully
6069                  * this should allow us to catchup.
6070                  */
6071                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6072                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6073                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6074                 tmp = RREG32(IH_RB_CNTL);
6075                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6076                 WREG32(IH_RB_CNTL, tmp);
6077         }
6078         return (wptr & rdev->ih.ptr_mask);
6079 }
6080
6081 /*        SI IV Ring
6082  * Each IV ring entry is 128 bits:
6083  * [7:0]    - interrupt source id
6084  * [31:8]   - reserved
6085  * [59:32]  - interrupt source data
6086  * [63:60]  - reserved
6087  * [71:64]  - RINGID
6088  * [79:72]  - VMID
6089  * [127:80] - reserved
6090  */
6091 int si_irq_process(struct radeon_device *rdev)
6092 {
6093         u32 wptr;
6094         u32 rptr;
6095         u32 src_id, src_data, ring_id;
6096         u32 ring_index;
6097         bool queue_hotplug = false;
6098         bool queue_thermal = false;
6099         u32 status, addr;
6100
6101         if (!rdev->ih.enabled || rdev->shutdown)
6102                 return IRQ_NONE;
6103
6104         wptr = si_get_ih_wptr(rdev);
6105
6106 restart_ih:
6107         /* is somebody else already processing irqs? */
6108         if (atomic_xchg(&rdev->ih.lock, 1))
6109                 return IRQ_NONE;
6110
6111         rptr = rdev->ih.rptr;
6112         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6113
6114         /* Order reading of wptr vs. reading of IH ring data */
6115         rmb();
6116
6117         /* display interrupts */
6118         si_irq_ack(rdev);
6119
6120         while (rptr != wptr) {
6121                 /* wptr/rptr are in bytes! */
6122                 ring_index = rptr / 4;
6123                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6124                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6125                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6126
6127                 switch (src_id) {
6128                 case 1: /* D1 vblank/vline */
6129                         switch (src_data) {
6130                         case 0: /* D1 vblank */
6131                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6132                                         if (rdev->irq.crtc_vblank_int[0]) {
6133                                                 drm_handle_vblank(rdev->ddev, 0);
6134                                                 rdev->pm.vblank_sync = true;
6135                                                 wake_up(&rdev->irq.vblank_queue);
6136                                         }
6137                                         if (atomic_read(&rdev->irq.pflip[0]))
6138                                                 radeon_crtc_handle_flip(rdev, 0);
6139                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6140                                         DRM_DEBUG("IH: D1 vblank\n");
6141                                 }
6142                                 break;
6143                         case 1: /* D1 vline */
6144                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6145                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6146                                         DRM_DEBUG("IH: D1 vline\n");
6147                                 }
6148                                 break;
6149                         default:
6150                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6151                                 break;
6152                         }
6153                         break;
6154                 case 2: /* D2 vblank/vline */
6155                         switch (src_data) {
6156                         case 0: /* D2 vblank */
6157                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6158                                         if (rdev->irq.crtc_vblank_int[1]) {
6159                                                 drm_handle_vblank(rdev->ddev, 1);
6160                                                 rdev->pm.vblank_sync = true;
6161                                                 wake_up(&rdev->irq.vblank_queue);
6162                                         }
6163                                         if (atomic_read(&rdev->irq.pflip[1]))
6164                                                 radeon_crtc_handle_flip(rdev, 1);
6165                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6166                                         DRM_DEBUG("IH: D2 vblank\n");
6167                                 }
6168                                 break;
6169                         case 1: /* D2 vline */
6170                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6171                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6172                                         DRM_DEBUG("IH: D2 vline\n");
6173                                 }
6174                                 break;
6175                         default:
6176                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6177                                 break;
6178                         }
6179                         break;
6180                 case 3: /* D3 vblank/vline */
6181                         switch (src_data) {
6182                         case 0: /* D3 vblank */
6183                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6184                                         if (rdev->irq.crtc_vblank_int[2]) {
6185                                                 drm_handle_vblank(rdev->ddev, 2);
6186                                                 rdev->pm.vblank_sync = true;
6187                                                 wake_up(&rdev->irq.vblank_queue);
6188                                         }
6189                                         if (atomic_read(&rdev->irq.pflip[2]))
6190                                                 radeon_crtc_handle_flip(rdev, 2);
6191                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6192                                         DRM_DEBUG("IH: D3 vblank\n");
6193                                 }
6194                                 break;
6195                         case 1: /* D3 vline */
6196                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6197                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6198                                         DRM_DEBUG("IH: D3 vline\n");
6199                                 }
6200                                 break;
6201                         default:
6202                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6203                                 break;
6204                         }
6205                         break;
6206                 case 4: /* D4 vblank/vline */
6207                         switch (src_data) {
6208                         case 0: /* D4 vblank */
6209                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6210                                         if (rdev->irq.crtc_vblank_int[3]) {
6211                                                 drm_handle_vblank(rdev->ddev, 3);
6212                                                 rdev->pm.vblank_sync = true;
6213                                                 wake_up(&rdev->irq.vblank_queue);
6214                                         }
6215                                         if (atomic_read(&rdev->irq.pflip[3]))
6216                                                 radeon_crtc_handle_flip(rdev, 3);
6217                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6218                                         DRM_DEBUG("IH: D4 vblank\n");
6219                                 }
6220                                 break;
6221                         case 1: /* D4 vline */
6222                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6223                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6224                                         DRM_DEBUG("IH: D4 vline\n");
6225                                 }
6226                                 break;
6227                         default:
6228                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6229                                 break;
6230                         }
6231                         break;
6232                 case 5: /* D5 vblank/vline */
6233                         switch (src_data) {
6234                         case 0: /* D5 vblank */
6235                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6236                                         if (rdev->irq.crtc_vblank_int[4]) {
6237                                                 drm_handle_vblank(rdev->ddev, 4);
6238                                                 rdev->pm.vblank_sync = true;
6239                                                 wake_up(&rdev->irq.vblank_queue);
6240                                         }
6241                                         if (atomic_read(&rdev->irq.pflip[4]))
6242                                                 radeon_crtc_handle_flip(rdev, 4);
6243                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6244                                         DRM_DEBUG("IH: D5 vblank\n");
6245                                 }
6246                                 break;
6247                         case 1: /* D5 vline */
6248                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6249                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6250                                         DRM_DEBUG("IH: D5 vline\n");
6251                                 }
6252                                 break;
6253                         default:
6254                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6255                                 break;
6256                         }
6257                         break;
6258                 case 6: /* D6 vblank/vline */
6259                         switch (src_data) {
6260                         case 0: /* D6 vblank */
6261                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6262                                         if (rdev->irq.crtc_vblank_int[5]) {
6263                                                 drm_handle_vblank(rdev->ddev, 5);
6264                                                 rdev->pm.vblank_sync = true;
6265                                                 wake_up(&rdev->irq.vblank_queue);
6266                                         }
6267                                         if (atomic_read(&rdev->irq.pflip[5]))
6268                                                 radeon_crtc_handle_flip(rdev, 5);
6269                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6270                                         DRM_DEBUG("IH: D6 vblank\n");
6271                                 }
6272                                 break;
6273                         case 1: /* D6 vline */
6274                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6275                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6276                                         DRM_DEBUG("IH: D6 vline\n");
6277                                 }
6278                                 break;
6279                         default:
6280                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6281                                 break;
6282                         }
6283                         break;
6284                 case 42: /* HPD hotplug */
6285                         switch (src_data) {
6286                         case 0:
6287                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6288                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6289                                         queue_hotplug = true;
6290                                         DRM_DEBUG("IH: HPD1\n");
6291                                 }
6292                                 break;
6293                         case 1:
6294                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6295                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6296                                         queue_hotplug = true;
6297                                         DRM_DEBUG("IH: HPD2\n");
6298                                 }
6299                                 break;
6300                         case 2:
6301                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6302                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6303                                         queue_hotplug = true;
6304                                         DRM_DEBUG("IH: HPD3\n");
6305                                 }
6306                                 break;
6307                         case 3:
6308                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6309                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6310                                         queue_hotplug = true;
6311                                         DRM_DEBUG("IH: HPD4\n");
6312                                 }
6313                                 break;
6314                         case 4:
6315                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6316                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6317                                         queue_hotplug = true;
6318                                         DRM_DEBUG("IH: HPD5\n");
6319                                 }
6320                                 break;
6321                         case 5:
6322                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6323                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6324                                         queue_hotplug = true;
6325                                         DRM_DEBUG("IH: HPD6\n");
6326                                 }
6327                                 break;
6328                         default:
6329                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6330                                 break;
6331                         }
6332                         break;
6333                 case 124: /* UVD */
6334                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6335                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6336                         break;
6337                 case 146:
6338                 case 147:
6339                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6340                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6341                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6342                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6343                                 addr);
6344                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6345                                 status);
6346                         si_vm_decode_fault(rdev, status, addr);
6347                         /* reset addr and status */
6348                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6349                         break;
6350                 case 176: /* RINGID0 CP_INT */
6351                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6352                         break;
6353                 case 177: /* RINGID1 CP_INT */
6354                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6355                         break;
6356                 case 178: /* RINGID2 CP_INT */
6357                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6358                         break;
6359                 case 181: /* CP EOP event */
6360                         DRM_DEBUG("IH: CP EOP\n");
6361                         switch (ring_id) {
6362                         case 0:
6363                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6364                                 break;
6365                         case 1:
6366                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6367                                 break;
6368                         case 2:
6369                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6370                                 break;
6371                         }
6372                         break;
6373                 case 224: /* DMA trap event */
6374                         DRM_DEBUG("IH: DMA trap\n");
6375                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6376                         break;
6377                 case 230: /* thermal low to high */
6378                         DRM_DEBUG("IH: thermal low to high\n");
6379                         rdev->pm.dpm.thermal.high_to_low = false;
6380                         queue_thermal = true;
6381                         break;
6382                 case 231: /* thermal high to low */
6383                         DRM_DEBUG("IH: thermal high to low\n");
6384                         rdev->pm.dpm.thermal.high_to_low = true;
6385                         queue_thermal = true;
6386                         break;
6387                 case 233: /* GUI IDLE */
6388                         DRM_DEBUG("IH: GUI idle\n");
6389                         break;
6390                 case 244: /* DMA trap event */
6391                         DRM_DEBUG("IH: DMA1 trap\n");
6392                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6393                         break;
6394                 default:
6395                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6396                         break;
6397                 }
6398
6399                 /* wptr/rptr are in bytes! */
6400                 rptr += 16;
6401                 rptr &= rdev->ih.ptr_mask;
6402         }
6403         if (queue_hotplug)
6404                 schedule_work(&rdev->hotplug_work);
6405         if (queue_thermal && rdev->pm.dpm_enabled)
6406                 schedule_work(&rdev->pm.dpm.thermal.work);
6407         rdev->ih.rptr = rptr;
6408         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6409         atomic_set(&rdev->ih.lock, 0);
6410
6411         /* make sure wptr hasn't changed while processing */
6412         wptr = si_get_ih_wptr(rdev);
6413         if (wptr != rptr)
6414                 goto restart_ih;
6415
6416         return IRQ_HANDLED;
6417 }
6418
6419 /*
6420  * startup/shutdown callbacks
6421  */
6422 static int si_startup(struct radeon_device *rdev)
6423 {
6424         struct radeon_ring *ring;
6425         int r;
6426
6427         /* enable pcie gen2/3 link */
6428         si_pcie_gen3_enable(rdev);
6429         /* enable aspm */
6430         si_program_aspm(rdev);
6431
6432         /* scratch needs to be initialized before MC */
6433         r = r600_vram_scratch_init(rdev);
6434         if (r)
6435                 return r;
6436
6437         si_mc_program(rdev);
6438
6439         if (!rdev->pm.dpm_enabled) {
6440                 r = si_mc_load_microcode(rdev);
6441                 if (r) {
6442                         DRM_ERROR("Failed to load MC firmware!\n");
6443                         return r;
6444                 }
6445         }
6446
6447         r = si_pcie_gart_enable(rdev);
6448         if (r)
6449                 return r;
6450         si_gpu_init(rdev);
6451
6452         /* allocate rlc buffers */
6453         if (rdev->family == CHIP_VERDE) {
6454                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6455                 rdev->rlc.reg_list_size =
6456                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6457         }
6458         rdev->rlc.cs_data = si_cs_data;
6459         r = sumo_rlc_init(rdev);
6460         if (r) {
6461                 DRM_ERROR("Failed to init rlc BOs!\n");
6462                 return r;
6463         }
6464
6465         /* allocate wb buffer */
6466         r = radeon_wb_init(rdev);
6467         if (r)
6468                 return r;
6469
6470         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6471         if (r) {
6472                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6473                 return r;
6474         }
6475
6476         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6477         if (r) {
6478                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6479                 return r;
6480         }
6481
6482         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6483         if (r) {
6484                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6485                 return r;
6486         }
6487
6488         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6489         if (r) {
6490                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6491                 return r;
6492         }
6493
6494         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6495         if (r) {
6496                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6497                 return r;
6498         }
6499
6500         if (rdev->has_uvd) {
6501                 r = uvd_v2_2_resume(rdev);
6502                 if (!r) {
6503                         r = radeon_fence_driver_start_ring(rdev,
6504                                                            R600_RING_TYPE_UVD_INDEX);
6505                         if (r)
6506                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6507                 }
6508                 if (r)
6509                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6510         }
6511
6512         /* Enable IRQ */
6513         if (!rdev->irq.installed) {
6514                 r = radeon_irq_kms_init(rdev);
6515                 if (r)
6516                         return r;
6517         }
6518
6519         r = si_irq_init(rdev);
6520         if (r) {
6521                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6522                 radeon_irq_kms_fini(rdev);
6523                 return r;
6524         }
6525         si_irq_set(rdev);
6526
6527         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6528         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6529                              RADEON_CP_PACKET2);
6530         if (r)
6531                 return r;
6532
6533         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6534         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6535                              RADEON_CP_PACKET2);
6536         if (r)
6537                 return r;
6538
6539         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6540         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6541                              RADEON_CP_PACKET2);
6542         if (r)
6543                 return r;
6544
6545         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6546         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6547                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6548         if (r)
6549                 return r;
6550
6551         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6552         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6553                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6554         if (r)
6555                 return r;
6556
6557         r = si_cp_load_microcode(rdev);
6558         if (r)
6559                 return r;
6560         r = si_cp_resume(rdev);
6561         if (r)
6562                 return r;
6563
6564         r = cayman_dma_resume(rdev);
6565         if (r)
6566                 return r;
6567
6568         if (rdev->has_uvd) {
6569                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6570                 if (ring->ring_size) {
6571                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6572                                              RADEON_CP_PACKET2);
6573                         if (!r)
6574                                 r = uvd_v1_0_init(rdev);
6575                         if (r)
6576                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6577                 }
6578         }
6579
6580         r = radeon_ib_pool_init(rdev);
6581         if (r) {
6582                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6583                 return r;
6584         }
6585
6586         r = radeon_vm_manager_init(rdev);
6587         if (r) {
6588                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6589                 return r;
6590         }
6591
6592         r = dce6_audio_init(rdev);
6593         if (r)
6594                 return r;
6595
6596         return 0;
6597 }
6598
6599 int si_resume(struct radeon_device *rdev)
6600 {
6601         int r;
6602
6603         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6604          * posting will perform necessary task to bring back GPU into good
6605          * shape.
6606          */
6607         /* post card */
6608         atom_asic_init(rdev->mode_info.atom_context);
6609
6610         /* init golden registers */
6611         si_init_golden_registers(rdev);
6612
6613         if (rdev->pm.pm_method == PM_METHOD_DPM)
6614                 radeon_pm_resume(rdev);
6615
6616         rdev->accel_working = true;
6617         r = si_startup(rdev);
6618         if (r) {
6619                 DRM_ERROR("si startup failed on resume\n");
6620                 rdev->accel_working = false;
6621                 return r;
6622         }
6623
6624         return r;
6625
6626 }
6627
6628 int si_suspend(struct radeon_device *rdev)
6629 {
6630         radeon_pm_suspend(rdev);
6631         dce6_audio_fini(rdev);
6632         radeon_vm_manager_fini(rdev);
6633         si_cp_enable(rdev, false);
6634         cayman_dma_stop(rdev);
6635         if (rdev->has_uvd) {
6636                 uvd_v1_0_fini(rdev);
6637                 radeon_uvd_suspend(rdev);
6638         }
6639         si_fini_pg(rdev);
6640         si_fini_cg(rdev);
6641         si_irq_suspend(rdev);
6642         radeon_wb_disable(rdev);
6643         si_pcie_gart_disable(rdev);
6644         return 0;
6645 }
6646
6647 /* Plan is to move initialization in that function and use
6648  * helper function so that radeon_device_init pretty much
6649  * do nothing more than calling asic specific function. This
6650  * should also allow to remove a bunch of callback function
6651  * like vram_info.
6652  */
6653 int si_init(struct radeon_device *rdev)
6654 {
6655         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6656         int r;
6657
6658         /* Read BIOS */
6659         if (!radeon_get_bios(rdev)) {
6660                 if (ASIC_IS_AVIVO(rdev))
6661                         return -EINVAL;
6662         }
6663         /* Must be an ATOMBIOS */
6664         if (!rdev->is_atom_bios) {
6665                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6666                 return -EINVAL;
6667         }
6668         r = radeon_atombios_init(rdev);
6669         if (r)
6670                 return r;
6671
6672         /* Post card if necessary */
6673         if (!radeon_card_posted(rdev)) {
6674                 if (!rdev->bios) {
6675                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6676                         return -EINVAL;
6677                 }
6678                 DRM_INFO("GPU not posted. posting now...\n");
6679                 atom_asic_init(rdev->mode_info.atom_context);
6680         }
6681         /* init golden registers */
6682         si_init_golden_registers(rdev);
6683         /* Initialize scratch registers */
6684         si_scratch_init(rdev);
6685         /* Initialize surface registers */
6686         radeon_surface_init(rdev);
6687         /* Initialize clocks */
6688         radeon_get_clock_info(rdev->ddev);
6689
6690         /* Fence driver */
6691         r = radeon_fence_driver_init(rdev);
6692         if (r)
6693                 return r;
6694
6695         /* initialize memory controller */
6696         r = si_mc_init(rdev);
6697         if (r)
6698                 return r;
6699         /* Memory manager */
6700         r = radeon_bo_init(rdev);
6701         if (r)
6702                 return r;
6703
6704         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6705             !rdev->rlc_fw || !rdev->mc_fw) {
6706                 r = si_init_microcode(rdev);
6707                 if (r) {
6708                         DRM_ERROR("Failed to load firmware!\n");
6709                         return r;
6710                 }
6711         }
6712
6713         /* Initialize power management */
6714         radeon_pm_init(rdev);
6715
6716         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6717         ring->ring_obj = NULL;
6718         r600_ring_init(rdev, ring, 1024 * 1024);
6719
6720         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6721         ring->ring_obj = NULL;
6722         r600_ring_init(rdev, ring, 1024 * 1024);
6723
6724         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6725         ring->ring_obj = NULL;
6726         r600_ring_init(rdev, ring, 1024 * 1024);
6727
6728         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6729         ring->ring_obj = NULL;
6730         r600_ring_init(rdev, ring, 64 * 1024);
6731
6732         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6733         ring->ring_obj = NULL;
6734         r600_ring_init(rdev, ring, 64 * 1024);
6735
6736         if (rdev->has_uvd) {
6737                 r = radeon_uvd_init(rdev);
6738                 if (!r) {
6739                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6740                         ring->ring_obj = NULL;
6741                         r600_ring_init(rdev, ring, 4096);
6742                 }
6743         }
6744
6745         rdev->ih.ring_obj = NULL;
6746         r600_ih_ring_init(rdev, 64 * 1024);
6747
6748         r = r600_pcie_gart_init(rdev);
6749         if (r)
6750                 return r;
6751
6752         rdev->accel_working = true;
6753         r = si_startup(rdev);
6754         if (r) {
6755                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6756                 si_cp_fini(rdev);
6757                 cayman_dma_fini(rdev);
6758                 si_irq_fini(rdev);
6759                 sumo_rlc_fini(rdev);
6760                 radeon_wb_fini(rdev);
6761                 radeon_ib_pool_fini(rdev);
6762                 radeon_vm_manager_fini(rdev);
6763                 radeon_irq_kms_fini(rdev);
6764                 si_pcie_gart_fini(rdev);
6765                 rdev->accel_working = false;
6766         }
6767
6768         /* Don't start up if the MC ucode is missing.
6769          * The default clocks and voltages before the MC ucode
6770          * is loaded are not suffient for advanced operations.
6771          */
6772         if (!rdev->mc_fw) {
6773                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6774                 return -EINVAL;
6775         }
6776
6777         return 0;
6778 }
6779
6780 void si_fini(struct radeon_device *rdev)
6781 {
6782         radeon_pm_fini(rdev);
6783         si_cp_fini(rdev);
6784         cayman_dma_fini(rdev);
6785         si_fini_pg(rdev);
6786         si_fini_cg(rdev);
6787         si_irq_fini(rdev);
6788         sumo_rlc_fini(rdev);
6789         radeon_wb_fini(rdev);
6790         radeon_vm_manager_fini(rdev);
6791         radeon_ib_pool_fini(rdev);
6792         radeon_irq_kms_fini(rdev);
6793         if (rdev->has_uvd) {
6794                 uvd_v1_0_fini(rdev);
6795                 radeon_uvd_fini(rdev);
6796         }
6797         si_pcie_gart_fini(rdev);
6798         r600_vram_scratch_fini(rdev);
6799         radeon_gem_fini(rdev);
6800         radeon_fence_driver_fini(rdev);
6801         radeon_bo_fini(rdev);
6802         radeon_atombios_fini(rdev);
6803         kfree(rdev->bios);
6804         rdev->bios = NULL;
6805 }
6806
6807 /**
6808  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6809  *
6810  * @rdev: radeon_device pointer
6811  *
6812  * Fetches a GPU clock counter snapshot (SI).
6813  * Returns the 64 bit clock counter snapshot.
6814  */
6815 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6816 {
6817         uint64_t clock;
6818
6819         mutex_lock(&rdev->gpu_clock_mutex);
6820         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6821         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6822                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6823         mutex_unlock(&rdev->gpu_clock_mutex);
6824         return clock;
6825 }
6826
6827 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6828 {
6829         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6830         int r;
6831
6832         /* bypass vclk and dclk with bclk */
6833         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6834                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6835                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6836
6837         /* put PLL in bypass mode */
6838         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6839
6840         if (!vclk || !dclk) {
6841                 /* keep the Bypass mode, put PLL to sleep */
6842                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6843                 return 0;
6844         }
6845
6846         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6847                                           16384, 0x03FFFFFF, 0, 128, 5,
6848                                           &fb_div, &vclk_div, &dclk_div);
6849         if (r)
6850                 return r;
6851
6852         /* set RESET_ANTI_MUX to 0 */
6853         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6854
6855         /* set VCO_MODE to 1 */
6856         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6857
6858         /* toggle UPLL_SLEEP to 1 then back to 0 */
6859         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6860         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6861
6862         /* deassert UPLL_RESET */
6863         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6864
6865         mdelay(1);
6866
6867         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6868         if (r)
6869                 return r;
6870
6871         /* assert UPLL_RESET again */
6872         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6873
6874         /* disable spread spectrum. */
6875         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6876
6877         /* set feedback divider */
6878         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6879
6880         /* set ref divider to 0 */
6881         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6882
6883         if (fb_div < 307200)
6884                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6885         else
6886                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6887
6888         /* set PDIV_A and PDIV_B */
6889         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6890                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6891                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6892
6893         /* give the PLL some time to settle */
6894         mdelay(15);
6895
6896         /* deassert PLL_RESET */
6897         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6898
6899         mdelay(15);
6900
6901         /* switch from bypass mode to normal mode */
6902         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6903
6904         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6905         if (r)
6906                 return r;
6907
6908         /* switch VCLK and DCLK selection */
6909         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6910                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6911                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6912
6913         mdelay(100);
6914
6915         return 0;
6916 }
6917
6918 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6919 {
6920         struct pci_dev *root = rdev->pdev->bus->self;
6921         int bridge_pos, gpu_pos;
6922         u32 speed_cntl, mask, current_data_rate;
6923         int ret, i;
6924         u16 tmp16;
6925
6926         if (radeon_pcie_gen2 == 0)
6927                 return;
6928
6929         if (rdev->flags & RADEON_IS_IGP)
6930                 return;
6931
6932         if (!(rdev->flags & RADEON_IS_PCIE))
6933                 return;
6934
6935         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6936         if (ret != 0)
6937                 return;
6938
6939         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6940                 return;
6941
6942         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6943         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6944                 LC_CURRENT_DATA_RATE_SHIFT;
6945         if (mask & DRM_PCIE_SPEED_80) {
6946                 if (current_data_rate == 2) {
6947                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6948                         return;
6949                 }
6950                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6951         } else if (mask & DRM_PCIE_SPEED_50) {
6952                 if (current_data_rate == 1) {
6953                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6954                         return;
6955                 }
6956                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6957         }
6958
6959         bridge_pos = pci_pcie_cap(root);
6960         if (!bridge_pos)
6961                 return;
6962
6963         gpu_pos = pci_pcie_cap(rdev->pdev);
6964         if (!gpu_pos)
6965                 return;
6966
6967         if (mask & DRM_PCIE_SPEED_80) {
6968                 /* re-try equalization if gen3 is not already enabled */
6969                 if (current_data_rate != 2) {
6970                         u16 bridge_cfg, gpu_cfg;
6971                         u16 bridge_cfg2, gpu_cfg2;
6972                         u32 max_lw, current_lw, tmp;
6973
6974                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6975                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6976
6977                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6978                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6979
6980                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6981                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6982
6983                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6984                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6985                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6986
6987                         if (current_lw < max_lw) {
6988                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6989                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6990                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6991                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6992                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6993                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6994                                 }
6995                         }
6996
6997                         for (i = 0; i < 10; i++) {
6998                                 /* check status */
6999                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7000                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7001                                         break;
7002
7003                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7004                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7005
7006                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7007                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7008
7009                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7010                                 tmp |= LC_SET_QUIESCE;
7011                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7012
7013                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7014                                 tmp |= LC_REDO_EQ;
7015                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7016
7017                                 mdelay(100);
7018
7019                                 /* linkctl */
7020                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7021                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7022                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7023                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7024
7025                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7026                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7027                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7028                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7029
7030                                 /* linkctl2 */
7031                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7032                                 tmp16 &= ~((1 << 4) | (7 << 9));
7033                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7034                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7035
7036                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7037                                 tmp16 &= ~((1 << 4) | (7 << 9));
7038                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7039                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7040
7041                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7042                                 tmp &= ~LC_SET_QUIESCE;
7043                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7044                         }
7045                 }
7046         }
7047
7048         /* set the link speed */
7049         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7050         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7051         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7052
7053         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7054         tmp16 &= ~0xf;
7055         if (mask & DRM_PCIE_SPEED_80)
7056                 tmp16 |= 3; /* gen3 */
7057         else if (mask & DRM_PCIE_SPEED_50)
7058                 tmp16 |= 2; /* gen2 */
7059         else
7060                 tmp16 |= 1; /* gen1 */
7061         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7062
7063         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7064         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7065         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7066
7067         for (i = 0; i < rdev->usec_timeout; i++) {
7068                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7069                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7070                         break;
7071                 udelay(1);
7072         }
7073 }
7074
7075 static void si_program_aspm(struct radeon_device *rdev)
7076 {
7077         u32 data, orig;
7078         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7079         bool disable_clkreq = false;
7080
7081         if (radeon_aspm == 0)
7082                 return;
7083
7084         if (!(rdev->flags & RADEON_IS_PCIE))
7085                 return;
7086
7087         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7088         data &= ~LC_XMIT_N_FTS_MASK;
7089         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7090         if (orig != data)
7091                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7092
7093         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7094         data |= LC_GO_TO_RECOVERY;
7095         if (orig != data)
7096                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7097
7098         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7099         data |= P_IGNORE_EDB_ERR;
7100         if (orig != data)
7101                 WREG32_PCIE(PCIE_P_CNTL, data);
7102
7103         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7104         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7105         data |= LC_PMI_TO_L1_DIS;
7106         if (!disable_l0s)
7107                 data |= LC_L0S_INACTIVITY(7);
7108
7109         if (!disable_l1) {
7110                 data |= LC_L1_INACTIVITY(7);
7111                 data &= ~LC_PMI_TO_L1_DIS;
7112                 if (orig != data)
7113                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7114
7115                 if (!disable_plloff_in_l1) {
7116                         bool clk_req_support;
7117
7118                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7119                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7120                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7121                         if (orig != data)
7122                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7123
7124                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7125                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7126                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7127                         if (orig != data)
7128                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7129
7130                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7131                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7132                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7133                         if (orig != data)
7134                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7135
7136                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7137                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7138                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7139                         if (orig != data)
7140                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7141
7142                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7143                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7144                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7145                                 if (orig != data)
7146                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7147
7148                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7149                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7150                                 if (orig != data)
7151                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7152
7153                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7154                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7155                                 if (orig != data)
7156                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7157
7158                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7159                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7160                                 if (orig != data)
7161                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7162
7163                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7164                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7165                                 if (orig != data)
7166                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7167
7168                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7169                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7170                                 if (orig != data)
7171                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7172
7173                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7174                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7175                                 if (orig != data)
7176                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7177
7178                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7179                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7180                                 if (orig != data)
7181                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7182                         }
7183                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7184                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7185                         data |= LC_DYN_LANES_PWR_STATE(3);
7186                         if (orig != data)
7187                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7188
7189                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7190                         data &= ~LS2_EXIT_TIME_MASK;
7191                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7192                                 data |= LS2_EXIT_TIME(5);
7193                         if (orig != data)
7194                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7195
7196                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7197                         data &= ~LS2_EXIT_TIME_MASK;
7198                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7199                                 data |= LS2_EXIT_TIME(5);
7200                         if (orig != data)
7201                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7202
7203                         if (!disable_clkreq) {
7204                                 struct pci_dev *root = rdev->pdev->bus->self;
7205                                 u32 lnkcap;
7206
7207                                 clk_req_support = false;
7208                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7209                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7210                                         clk_req_support = true;
7211                         } else {
7212                                 clk_req_support = false;
7213                         }
7214
7215                         if (clk_req_support) {
7216                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7217                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7218                                 if (orig != data)
7219                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7220
7221                                 orig = data = RREG32(THM_CLK_CNTL);
7222                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7223                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7224                                 if (orig != data)
7225                                         WREG32(THM_CLK_CNTL, data);
7226
7227                                 orig = data = RREG32(MISC_CLK_CNTL);
7228                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7229                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7230                                 if (orig != data)
7231                                         WREG32(MISC_CLK_CNTL, data);
7232
7233                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7234                                 data &= ~BCLK_AS_XCLK;
7235                                 if (orig != data)
7236                                         WREG32(CG_CLKPIN_CNTL, data);
7237
7238                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7239                                 data &= ~FORCE_BIF_REFCLK_EN;
7240                                 if (orig != data)
7241                                         WREG32(CG_CLKPIN_CNTL_2, data);
7242
7243                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7244                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7245                                 data |= MPLL_CLKOUT_SEL(4);
7246                                 if (orig != data)
7247                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7248
7249                                 orig = data = RREG32(SPLL_CNTL_MODE);
7250                                 data &= ~SPLL_REFCLK_SEL_MASK;
7251                                 if (orig != data)
7252                                         WREG32(SPLL_CNTL_MODE, data);
7253                         }
7254                 }
7255         } else {
7256                 if (orig != data)
7257                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7258         }
7259
7260         orig = data = RREG32_PCIE(PCIE_CNTL2);
7261         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7262         if (orig != data)
7263                 WREG32_PCIE(PCIE_CNTL2, data);
7264
7265         if (!disable_l0s) {
7266                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7267                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7268                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7269                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7270                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7271                                 data &= ~LC_L0S_INACTIVITY_MASK;
7272                                 if (orig != data)
7273                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7274                         }
7275                 }
7276         }
7277 }