x86, suspend, acpi: correct and add comments about Big Real Mode
[pandora-kernel.git] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         return 0;
140 }
141
142 static u8 r300_reg_flags[0x10000 >> 2];
143
144 void r300_init_reg_flags(struct drm_device *dev)
145 {
146         int i;
147         drm_radeon_private_t *dev_priv = dev->dev_private;
148
149         memset(r300_reg_flags, 0, 0x10000 >> 2);
150 #define ADD_RANGE_MARK(reg, count,mark) \
151                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
152                         r300_reg_flags[i]|=(mark);
153
154 #define MARK_SAFE               1
155 #define MARK_CHECK_OFFSET       2
156
157 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
158
159         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
160         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
161         ADD_RANGE(R300_VAP_CNTL, 1);
162         ADD_RANGE(R300_SE_VTE_CNTL, 2);
163         ADD_RANGE(0x2134, 2);
164         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
165         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
166         ADD_RANGE(0x21DC, 1);
167         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
168         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
169         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
170         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
171         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
172         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
173         ADD_RANGE(R300_GB_ENABLE, 1);
174         ADD_RANGE(R300_GB_MSPOS0, 5);
175         ADD_RANGE(R300_TX_CNTL, 1);
176         ADD_RANGE(R300_TX_ENABLE, 1);
177         ADD_RANGE(0x4200, 4);
178         ADD_RANGE(0x4214, 1);
179         ADD_RANGE(R300_RE_POINTSIZE, 1);
180         ADD_RANGE(0x4230, 3);
181         ADD_RANGE(R300_RE_LINE_CNT, 1);
182         ADD_RANGE(R300_RE_UNK4238, 1);
183         ADD_RANGE(0x4260, 3);
184         ADD_RANGE(R300_RE_SHADE, 4);
185         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
186         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
187         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
188         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
189         ADD_RANGE(R300_RE_CULL_CNTL, 1);
190         ADD_RANGE(0x42C0, 2);
191         ADD_RANGE(R300_RS_CNTL_0, 2);
192
193         ADD_RANGE(R300_SC_HYPERZ, 2);
194         ADD_RANGE(0x43E8, 1);
195
196         ADD_RANGE(0x46A4, 5);
197
198         ADD_RANGE(R300_RE_FOG_STATE, 1);
199         ADD_RANGE(R300_FOG_COLOR_R, 3);
200         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
201         ADD_RANGE(0x4BD8, 1);
202         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
203         ADD_RANGE(0x4E00, 1);
204         ADD_RANGE(R300_RB3D_CBLEND, 2);
205         ADD_RANGE(R300_RB3D_COLORMASK, 1);
206         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
207         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
208         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
209         ADD_RANGE(0x4E50, 9);
210         ADD_RANGE(0x4E88, 1);
211         ADD_RANGE(0x4EA0, 2);
212         ADD_RANGE(R300_ZB_CNTL, 3);
213         ADD_RANGE(R300_ZB_FORMAT, 4);
214         ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);      /* check offset */
215         ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
216         ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
217         ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
218
219         ADD_RANGE(R300_TX_FILTER_0, 16);
220         ADD_RANGE(R300_TX_FILTER1_0, 16);
221         ADD_RANGE(R300_TX_SIZE_0, 16);
222         ADD_RANGE(R300_TX_FORMAT_0, 16);
223         ADD_RANGE(R300_TX_PITCH_0, 16);
224         /* Texture offset is dangerous and needs more checking */
225         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
226         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
227         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
228
229         /* Sporadic registers used as primitives are emitted */
230         ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
231         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
232         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
233         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
234
235         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
236                 ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
237                 ADD_RANGE(R500_US_CONFIG, 2);
238                 ADD_RANGE(R500_US_CODE_ADDR, 3);
239                 ADD_RANGE(R500_US_FC_CTRL, 1);
240                 ADD_RANGE(R500_RS_IP_0, 16);
241                 ADD_RANGE(R500_RS_INST_0, 16);
242                 ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
243                 ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
244                 ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
245         } else {
246                 ADD_RANGE(R300_PFS_CNTL_0, 3);
247                 ADD_RANGE(R300_PFS_NODE_0, 4);
248                 ADD_RANGE(R300_PFS_TEXI_0, 64);
249                 ADD_RANGE(R300_PFS_INSTR0_0, 64);
250                 ADD_RANGE(R300_PFS_INSTR1_0, 64);
251                 ADD_RANGE(R300_PFS_INSTR2_0, 64);
252                 ADD_RANGE(R300_PFS_INSTR3_0, 64);
253                 ADD_RANGE(R300_RS_INTERP_0, 8);
254                 ADD_RANGE(R300_RS_ROUTE_0, 8);
255
256         }
257 }
258
259 static __inline__ int r300_check_range(unsigned reg, int count)
260 {
261         int i;
262         if (reg & ~0xffff)
263                 return -1;
264         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
265                 if (r300_reg_flags[i] != MARK_SAFE)
266                         return 1;
267         return 0;
268 }
269
270 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
271                                                           dev_priv,
272                                                           drm_radeon_kcmd_buffer_t
273                                                           * cmdbuf,
274                                                           drm_r300_cmd_header_t
275                                                           header)
276 {
277         int reg;
278         int sz;
279         int i;
280         int values[64];
281         RING_LOCALS;
282
283         sz = header.packet0.count;
284         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
285
286         if ((sz > 64) || (sz < 0)) {
287                 DRM_ERROR
288                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
289                      reg, sz);
290                 return -EINVAL;
291         }
292         for (i = 0; i < sz; i++) {
293                 values[i] = ((int *)cmdbuf->buf)[i];
294                 switch (r300_reg_flags[(reg >> 2) + i]) {
295                 case MARK_SAFE:
296                         break;
297                 case MARK_CHECK_OFFSET:
298                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
299                                 DRM_ERROR
300                                     ("Offset failed range check (reg=%04x sz=%d)\n",
301                                      reg, sz);
302                                 return -EINVAL;
303                         }
304                         break;
305                 default:
306                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
307                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
308                         return -EINVAL;
309                 }
310         }
311
312         BEGIN_RING(1 + sz);
313         OUT_RING(CP_PACKET0(reg, sz - 1));
314         OUT_RING_TABLE(values, sz);
315         ADVANCE_RING();
316
317         cmdbuf->buf += sz * 4;
318         cmdbuf->bufsz -= sz * 4;
319
320         return 0;
321 }
322
323 /**
324  * Emits a packet0 setting arbitrary registers.
325  * Called by r300_do_cp_cmdbuf.
326  *
327  * Note that checks are performed on contents and addresses of the registers
328  */
329 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
330                                         drm_radeon_kcmd_buffer_t *cmdbuf,
331                                         drm_r300_cmd_header_t header)
332 {
333         int reg;
334         int sz;
335         RING_LOCALS;
336
337         sz = header.packet0.count;
338         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
339
340         if (!sz)
341                 return 0;
342
343         if (sz * 4 > cmdbuf->bufsz)
344                 return -EINVAL;
345
346         if (reg + sz * 4 >= 0x10000) {
347                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
348                           sz);
349                 return -EINVAL;
350         }
351
352         if (r300_check_range(reg, sz)) {
353                 /* go and check everything */
354                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
355                                                            header);
356         }
357         /* the rest of the data is safe to emit, whatever the values the user passed */
358
359         BEGIN_RING(1 + sz);
360         OUT_RING(CP_PACKET0(reg, sz - 1));
361         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
362         ADVANCE_RING();
363
364         cmdbuf->buf += sz * 4;
365         cmdbuf->bufsz -= sz * 4;
366
367         return 0;
368 }
369
370 /**
371  * Uploads user-supplied vertex program instructions or parameters onto
372  * the graphics card.
373  * Called by r300_do_cp_cmdbuf.
374  */
375 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
376                                     drm_radeon_kcmd_buffer_t *cmdbuf,
377                                     drm_r300_cmd_header_t header)
378 {
379         int sz;
380         int addr;
381         RING_LOCALS;
382
383         sz = header.vpu.count;
384         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
385
386         if (!sz)
387                 return 0;
388         if (sz * 16 > cmdbuf->bufsz)
389                 return -EINVAL;
390
391         BEGIN_RING(5 + sz * 4);
392         /* Wait for VAP to come to senses.. */
393         /* there is no need to emit it multiple times, (only once before VAP is programmed,
394            but this optimization is for later */
395         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
396         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
397         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
398         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
399
400         ADVANCE_RING();
401
402         cmdbuf->buf += sz * 16;
403         cmdbuf->bufsz -= sz * 16;
404
405         return 0;
406 }
407
408 /**
409  * Emit a clear packet from userspace.
410  * Called by r300_emit_packet3.
411  */
412 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
413                                       drm_radeon_kcmd_buffer_t *cmdbuf)
414 {
415         RING_LOCALS;
416
417         if (8 * 4 > cmdbuf->bufsz)
418                 return -EINVAL;
419
420         BEGIN_RING(10);
421         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
422         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
423                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
424         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
425         ADVANCE_RING();
426
427         cmdbuf->buf += 8 * 4;
428         cmdbuf->bufsz -= 8 * 4;
429
430         return 0;
431 }
432
433 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
434                                                drm_radeon_kcmd_buffer_t *cmdbuf,
435                                                u32 header)
436 {
437         int count, i, k;
438 #define MAX_ARRAY_PACKET  64
439         u32 payload[MAX_ARRAY_PACKET];
440         u32 narrays;
441         RING_LOCALS;
442
443         count = (header >> 16) & 0x3fff;
444
445         if ((count + 1) > MAX_ARRAY_PACKET) {
446                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
447                           count);
448                 return -EINVAL;
449         }
450         memset(payload, 0, MAX_ARRAY_PACKET * 4);
451         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
452
453         /* carefully check packet contents */
454
455         narrays = payload[0];
456         k = 0;
457         i = 1;
458         while ((k < narrays) && (i < (count + 1))) {
459                 i++;            /* skip attribute field */
460                 if (!radeon_check_offset(dev_priv, payload[i])) {
461                         DRM_ERROR
462                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
463                              k, i);
464                         return -EINVAL;
465                 }
466                 k++;
467                 i++;
468                 if (k == narrays)
469                         break;
470                 /* have one more to process, they come in pairs */
471                 if (!radeon_check_offset(dev_priv, payload[i])) {
472                         DRM_ERROR
473                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
474                              k, i);
475                         return -EINVAL;
476                 }
477                 k++;
478                 i++;
479         }
480         /* do the counts match what we expect ? */
481         if ((k != narrays) || (i != (count + 1))) {
482                 DRM_ERROR
483                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
484                      k, i, narrays, count + 1);
485                 return -EINVAL;
486         }
487
488         /* all clear, output packet */
489
490         BEGIN_RING(count + 2);
491         OUT_RING(header);
492         OUT_RING_TABLE(payload, count + 1);
493         ADVANCE_RING();
494
495         cmdbuf->buf += (count + 2) * 4;
496         cmdbuf->bufsz -= (count + 2) * 4;
497
498         return 0;
499 }
500
501 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
502                                              drm_radeon_kcmd_buffer_t *cmdbuf)
503 {
504         u32 *cmd = (u32 *) cmdbuf->buf;
505         int count, ret;
506         RING_LOCALS;
507
508         count=(cmd[0]>>16) & 0x3fff;
509
510         if (cmd[0] & 0x8000) {
511                 u32 offset;
512
513                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
514                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
515                         offset = cmd[2] << 10;
516                         ret = !radeon_check_offset(dev_priv, offset);
517                         if (ret) {
518                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
519                                 return -EINVAL;
520                         }
521                 }
522
523                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
524                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
525                         offset = cmd[3] << 10;
526                         ret = !radeon_check_offset(dev_priv, offset);
527                         if (ret) {
528                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
529                                 return -EINVAL;
530                         }
531
532                 }
533         }
534
535         BEGIN_RING(count+2);
536         OUT_RING(cmd[0]);
537         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
538         ADVANCE_RING();
539
540         cmdbuf->buf += (count+2)*4;
541         cmdbuf->bufsz -= (count+2)*4;
542
543         return 0;
544 }
545
546 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
547                                              drm_radeon_kcmd_buffer_t *cmdbuf)
548 {
549         u32 *cmd = (u32 *) cmdbuf->buf;
550         int count, ret;
551         RING_LOCALS;
552
553         count=(cmd[0]>>16) & 0x3fff;
554
555         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
556                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
557                 return -EINVAL;
558         }
559         ret = !radeon_check_offset(dev_priv, cmd[2]);
560         if (ret) {
561                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
562                 return -EINVAL;
563         }
564
565         BEGIN_RING(count+2);
566         OUT_RING(cmd[0]);
567         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
568         ADVANCE_RING();
569
570         cmdbuf->buf += (count+2)*4;
571         cmdbuf->bufsz -= (count+2)*4;
572
573         return 0;
574 }
575
576 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
577                                             drm_radeon_kcmd_buffer_t *cmdbuf)
578 {
579         u32 header;
580         int count;
581         RING_LOCALS;
582
583         if (4 > cmdbuf->bufsz)
584                 return -EINVAL;
585
586         /* Fixme !! This simply emits a packet without much checking.
587            We need to be smarter. */
588
589         /* obtain first word - actual packet3 header */
590         header = *(u32 *) cmdbuf->buf;
591
592         /* Is it packet 3 ? */
593         if ((header >> 30) != 0x3) {
594                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
595                 return -EINVAL;
596         }
597
598         count = (header >> 16) & 0x3fff;
599
600         /* Check again now that we know how much data to expect */
601         if ((count + 2) * 4 > cmdbuf->bufsz) {
602                 DRM_ERROR
603                     ("Expected packet3 of length %d but have only %d bytes left\n",
604                      (count + 2) * 4, cmdbuf->bufsz);
605                 return -EINVAL;
606         }
607
608         /* Is it a packet type we know about ? */
609         switch (header & 0xff00) {
610         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
611                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
612
613         case RADEON_CNTL_BITBLT_MULTI:
614                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
615
616         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
617                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
618         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
619         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
620         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
621         case RADEON_WAIT_FOR_IDLE:
622         case RADEON_CP_NOP:
623                 /* these packets are safe */
624                 break;
625         default:
626                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
627                 return -EINVAL;
628         }
629
630         BEGIN_RING(count + 2);
631         OUT_RING(header);
632         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
633         ADVANCE_RING();
634
635         cmdbuf->buf += (count + 2) * 4;
636         cmdbuf->bufsz -= (count + 2) * 4;
637
638         return 0;
639 }
640
641 /**
642  * Emit a rendering packet3 from userspace.
643  * Called by r300_do_cp_cmdbuf.
644  */
645 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
646                                         drm_radeon_kcmd_buffer_t *cmdbuf,
647                                         drm_r300_cmd_header_t header)
648 {
649         int n;
650         int ret;
651         char *orig_buf = cmdbuf->buf;
652         int orig_bufsz = cmdbuf->bufsz;
653
654         /* This is a do-while-loop so that we run the interior at least once,
655          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
656          */
657         n = 0;
658         do {
659                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
660                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
661                         if (ret)
662                                 return ret;
663
664                         cmdbuf->buf = orig_buf;
665                         cmdbuf->bufsz = orig_bufsz;
666                 }
667
668                 switch (header.packet3.packet) {
669                 case R300_CMD_PACKET3_CLEAR:
670                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
671                         ret = r300_emit_clear(dev_priv, cmdbuf);
672                         if (ret) {
673                                 DRM_ERROR("r300_emit_clear failed\n");
674                                 return ret;
675                         }
676                         break;
677
678                 case R300_CMD_PACKET3_RAW:
679                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
680                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
681                         if (ret) {
682                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
683                                 return ret;
684                         }
685                         break;
686
687                 default:
688                         DRM_ERROR("bad packet3 type %i at %p\n",
689                                   header.packet3.packet,
690                                   cmdbuf->buf - sizeof(header));
691                         return -EINVAL;
692                 }
693
694                 n += R300_SIMULTANEOUS_CLIPRECTS;
695         } while (n < cmdbuf->nbox);
696
697         return 0;
698 }
699
700 /* Some of the R300 chips seem to be extremely touchy about the two registers
701  * that are configured in r300_pacify.
702  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
703  * sends a command buffer that contains only state setting commands and a
704  * vertex program/parameter upload sequence, this will eventually lead to a
705  * lockup, unless the sequence is bracketed by calls to r300_pacify.
706  * So we should take great care to *always* call r300_pacify before
707  * *anything* 3D related, and again afterwards. This is what the
708  * call bracket in r300_do_cp_cmdbuf is for.
709  */
710
711 /**
712  * Emit the sequence to pacify R300.
713  */
714 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
715 {
716         RING_LOCALS;
717
718         BEGIN_RING(6);
719         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
720         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
721         OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
722         OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE|
723                  R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
724         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
725         OUT_RING(0x0);
726         ADVANCE_RING();
727 }
728
729 /**
730  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
731  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
732  * be careful about how this function is called.
733  */
734 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
735 {
736         drm_radeon_private_t *dev_priv = dev->dev_private;
737         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
738
739         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
740         buf->pending = 1;
741         buf->used = 0;
742 }
743
744 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
745                           drm_r300_cmd_header_t header)
746 {
747         u32 wait_until;
748         RING_LOCALS;
749
750         if (!header.wait.flags)
751                 return;
752
753         wait_until = 0;
754
755         switch(header.wait.flags) {
756         case R300_WAIT_2D:
757                 wait_until = RADEON_WAIT_2D_IDLE;
758                 break;
759         case R300_WAIT_3D:
760                 wait_until = RADEON_WAIT_3D_IDLE;
761                 break;
762         case R300_NEW_WAIT_2D_3D:
763                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
764                 break;
765         case R300_NEW_WAIT_2D_2D_CLEAN:
766                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
767                 break;
768         case R300_NEW_WAIT_3D_3D_CLEAN:
769                 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
770                 break;
771         case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
772                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
773                 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
774                 break;
775         default:
776                 return;
777         }
778
779         BEGIN_RING(2);
780         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
781         OUT_RING(wait_until);
782         ADVANCE_RING();
783 }
784
785 static int r300_scratch(drm_radeon_private_t *dev_priv,
786                         drm_radeon_kcmd_buffer_t *cmdbuf,
787                         drm_r300_cmd_header_t header)
788 {
789         u32 *ref_age_base;
790         u32 i, buf_idx, h_pending;
791         RING_LOCALS;
792
793         if (cmdbuf->bufsz <
794             (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
795                 return -EINVAL;
796         }
797
798         if (header.scratch.reg >= 5) {
799                 return -EINVAL;
800         }
801
802         dev_priv->scratch_ages[header.scratch.reg]++;
803
804         ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
805
806         cmdbuf->buf += sizeof(u64);
807         cmdbuf->bufsz -= sizeof(u64);
808
809         for (i=0; i < header.scratch.n_bufs; i++) {
810                 buf_idx = *(u32 *)cmdbuf->buf;
811                 buf_idx *= 2; /* 8 bytes per buf */
812
813                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
814                         return -EINVAL;
815                 }
816
817                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
818                         return -EINVAL;
819                 }
820
821                 if (h_pending == 0) {
822                         return -EINVAL;
823                 }
824
825                 h_pending--;
826
827                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
828                         return -EINVAL;
829                 }
830
831                 cmdbuf->buf += sizeof(buf_idx);
832                 cmdbuf->bufsz -= sizeof(buf_idx);
833         }
834
835         BEGIN_RING(2);
836         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
837         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
838         ADVANCE_RING();
839
840         return 0;
841 }
842
843 /**
844  * Uploads user-supplied vertex program instructions or parameters onto
845  * the graphics card.
846  * Called by r300_do_cp_cmdbuf.
847  */
848 static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
849                                        drm_radeon_kcmd_buffer_t *cmdbuf,
850                                        drm_r300_cmd_header_t header)
851 {
852         int sz;
853         int addr;
854         int type;
855         int clamp;
856         int stride;
857         RING_LOCALS;
858
859         sz = header.r500fp.count;
860         /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
861         addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
862
863         type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
864         clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
865
866         addr |= (type << 16);
867         addr |= (clamp << 17);
868
869         stride = type ? 4 : 6;
870
871         DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
872         if (!sz)
873                 return 0;
874         if (sz * stride * 4 > cmdbuf->bufsz)
875                 return -EINVAL;
876
877         BEGIN_RING(3 + sz * stride);
878         OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
879         OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
880         OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
881
882         ADVANCE_RING();
883
884         cmdbuf->buf += sz * stride * 4;
885         cmdbuf->bufsz -= sz * stride * 4;
886
887         return 0;
888 }
889
890
891 /**
892  * Parses and validates a user-supplied command buffer and emits appropriate
893  * commands on the DMA ring buffer.
894  * Called by the ioctl handler function radeon_cp_cmdbuf.
895  */
896 int r300_do_cp_cmdbuf(struct drm_device *dev,
897                       struct drm_file *file_priv,
898                       drm_radeon_kcmd_buffer_t *cmdbuf)
899 {
900         drm_radeon_private_t *dev_priv = dev->dev_private;
901         struct drm_device_dma *dma = dev->dma;
902         struct drm_buf *buf = NULL;
903         int emit_dispatch_age = 0;
904         int ret = 0;
905
906         DRM_DEBUG("\n");
907
908         /* See the comment above r300_emit_begin3d for why this call must be here,
909          * and what the cleanup gotos are for. */
910         r300_pacify(dev_priv);
911
912         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
913                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
914                 if (ret)
915                         goto cleanup;
916         }
917
918         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
919                 int idx;
920                 drm_r300_cmd_header_t header;
921
922                 header.u = *(unsigned int *)cmdbuf->buf;
923
924                 cmdbuf->buf += sizeof(header);
925                 cmdbuf->bufsz -= sizeof(header);
926
927                 switch (header.header.cmd_type) {
928                 case R300_CMD_PACKET0:
929                         DRM_DEBUG("R300_CMD_PACKET0\n");
930                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
931                         if (ret) {
932                                 DRM_ERROR("r300_emit_packet0 failed\n");
933                                 goto cleanup;
934                         }
935                         break;
936
937                 case R300_CMD_VPU:
938                         DRM_DEBUG("R300_CMD_VPU\n");
939                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
940                         if (ret) {
941                                 DRM_ERROR("r300_emit_vpu failed\n");
942                                 goto cleanup;
943                         }
944                         break;
945
946                 case R300_CMD_PACKET3:
947                         DRM_DEBUG("R300_CMD_PACKET3\n");
948                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
949                         if (ret) {
950                                 DRM_ERROR("r300_emit_packet3 failed\n");
951                                 goto cleanup;
952                         }
953                         break;
954
955                 case R300_CMD_END3D:
956                         DRM_DEBUG("R300_CMD_END3D\n");
957                         /* TODO:
958                            Ideally userspace driver should not need to issue this call,
959                            i.e. the drm driver should issue it automatically and prevent
960                            lockups.
961
962                            In practice, we do not understand why this call is needed and what
963                            it does (except for some vague guesses that it has to do with cache
964                            coherence) and so the user space driver does it.
965
966                            Once we are sure which uses prevent lockups the code could be moved
967                            into the kernel and the userspace driver will not
968                            need to use this command.
969
970                            Note that issuing this command does not hurt anything
971                            except, possibly, performance */
972                         r300_pacify(dev_priv);
973                         break;
974
975                 case R300_CMD_CP_DELAY:
976                         /* simple enough, we can do it here */
977                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
978                         {
979                                 int i;
980                                 RING_LOCALS;
981
982                                 BEGIN_RING(header.delay.count);
983                                 for (i = 0; i < header.delay.count; i++)
984                                         OUT_RING(RADEON_CP_PACKET2);
985                                 ADVANCE_RING();
986                         }
987                         break;
988
989                 case R300_CMD_DMA_DISCARD:
990                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
991                         idx = header.dma.buf_idx;
992                         if (idx < 0 || idx >= dma->buf_count) {
993                                 DRM_ERROR("buffer index %d (of %d max)\n",
994                                           idx, dma->buf_count - 1);
995                                 ret = -EINVAL;
996                                 goto cleanup;
997                         }
998
999                         buf = dma->buflist[idx];
1000                         if (buf->file_priv != file_priv || buf->pending) {
1001                                 DRM_ERROR("bad buffer %p %p %d\n",
1002                                           buf->file_priv, file_priv,
1003                                           buf->pending);
1004                                 ret = -EINVAL;
1005                                 goto cleanup;
1006                         }
1007
1008                         emit_dispatch_age = 1;
1009                         r300_discard_buffer(dev, buf);
1010                         break;
1011
1012                 case R300_CMD_WAIT:
1013                         DRM_DEBUG("R300_CMD_WAIT\n");
1014                         r300_cmd_wait(dev_priv, header);
1015                         break;
1016
1017                 case R300_CMD_SCRATCH:
1018                         DRM_DEBUG("R300_CMD_SCRATCH\n");
1019                         ret = r300_scratch(dev_priv, cmdbuf, header);
1020                         if (ret) {
1021                                 DRM_ERROR("r300_scratch failed\n");
1022                                 goto cleanup;
1023                         }
1024                         break;
1025
1026                 case R300_CMD_R500FP:
1027                         if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1028                                 DRM_ERROR("Calling r500 command on r300 card\n");
1029                                 ret = -EINVAL;
1030                                 goto cleanup;
1031                         }
1032                         DRM_DEBUG("R300_CMD_R500FP\n");
1033                         ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1034                         if (ret) {
1035                                 DRM_ERROR("r300_emit_r500fp failed\n");
1036                                 goto cleanup;
1037                         }
1038                         break;
1039                 default:
1040                         DRM_ERROR("bad cmd_type %i at %p\n",
1041                                   header.header.cmd_type,
1042                                   cmdbuf->buf - sizeof(header));
1043                         ret = -EINVAL;
1044                         goto cleanup;
1045                 }
1046         }
1047
1048         DRM_DEBUG("END\n");
1049
1050       cleanup:
1051         r300_pacify(dev_priv);
1052
1053         /* We emit the vertex buffer age here, outside the pacifier "brackets"
1054          * for two reasons:
1055          *  (1) This may coalesce multiple age emissions into a single one and
1056          *  (2) more importantly, some chips lock up hard when scratch registers
1057          *      are written inside the pacifier bracket.
1058          */
1059         if (emit_dispatch_age) {
1060                 RING_LOCALS;
1061
1062                 /* Emit the vertex buffer age */
1063                 BEGIN_RING(2);
1064                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1065                 ADVANCE_RING();
1066         }
1067
1068         COMMIT_RING();
1069
1070         return ret;
1071 }