Merge ../linux-2.6-watchdog-mm
[pandora-kernel.git] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         drm_clip_rect_t box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return DRM_ERR(EFAULT);
78                         }
79
80                         box.x1 =
81                             (box.x1 +
82                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
83                         box.y1 =
84                             (box.y1 +
85                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86                         box.x2 =
87                             (box.x2 +
88                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
89                         box.y2 =
90                             (box.y2 +
91                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92
93                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
94                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
95                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
96                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
97                 }
98
99                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
100
101                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
102                  * client might be able to trample over memory.
103                  * The impact should be very limited, but I'd rather be safe than
104                  * sorry.
105                  */
106                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
107                 OUT_RING(0);
108                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
109                 ADVANCE_RING();
110         } else {
111                 /* Why we allow zero cliprect rendering:
112                  * There are some commands in a command buffer that must be submitted
113                  * even when there are no cliprects, e.g. DMA buffer discard
114                  * or state setting (though state setting could be avoided by
115                  * simulating a loss of context).
116                  *
117                  * Now since the cmdbuf interface is so chaotic right now (and is
118                  * bound to remain that way for a bit until things settle down),
119                  * it is basically impossible to filter out the commands that are
120                  * necessary and those that aren't.
121                  *
122                  * So I choose the safe way and don't do any filtering at all;
123                  * instead, I simply set up the engine so that all rendering
124                  * can't produce any fragments.
125                  */
126                 BEGIN_RING(2);
127                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
128                 ADVANCE_RING();
129         }
130
131         return 0;
132 }
133
134 static u8 r300_reg_flags[0x10000 >> 2];
135
136 void r300_init_reg_flags(void)
137 {
138         int i;
139         memset(r300_reg_flags, 0, 0x10000 >> 2);
140 #define ADD_RANGE_MARK(reg, count,mark) \
141                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
142                         r300_reg_flags[i]|=(mark);
143
144 #define MARK_SAFE               1
145 #define MARK_CHECK_OFFSET       2
146
147 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
148
149         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
150         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
151         ADD_RANGE(0x2080, 1);
152         ADD_RANGE(R300_SE_VTE_CNTL, 2);
153         ADD_RANGE(0x2134, 2);
154         ADD_RANGE(0x2140, 1);
155         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
156         ADD_RANGE(0x21DC, 1);
157         ADD_RANGE(0x221C, 1);
158         ADD_RANGE(0x2220, 4);
159         ADD_RANGE(0x2288, 1);
160         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
161         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
162         ADD_RANGE(R300_GB_ENABLE, 1);
163         ADD_RANGE(R300_GB_MSPOS0, 5);
164         ADD_RANGE(R300_TX_CNTL, 1);
165         ADD_RANGE(R300_TX_ENABLE, 1);
166         ADD_RANGE(0x4200, 4);
167         ADD_RANGE(0x4214, 1);
168         ADD_RANGE(R300_RE_POINTSIZE, 1);
169         ADD_RANGE(0x4230, 3);
170         ADD_RANGE(R300_RE_LINE_CNT, 1);
171         ADD_RANGE(0x4238, 1);
172         ADD_RANGE(0x4260, 3);
173         ADD_RANGE(0x4274, 4);
174         ADD_RANGE(0x4288, 5);
175         ADD_RANGE(0x42A0, 1);
176         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
177         ADD_RANGE(0x42B4, 1);
178         ADD_RANGE(R300_RE_CULL_CNTL, 1);
179         ADD_RANGE(0x42C0, 2);
180         ADD_RANGE(R300_RS_CNTL_0, 2);
181         ADD_RANGE(R300_RS_INTERP_0, 8);
182         ADD_RANGE(R300_RS_ROUTE_0, 8);
183         ADD_RANGE(0x43A4, 2);
184         ADD_RANGE(0x43E8, 1);
185         ADD_RANGE(R300_PFS_CNTL_0, 3);
186         ADD_RANGE(R300_PFS_NODE_0, 4);
187         ADD_RANGE(R300_PFS_TEXI_0, 64);
188         ADD_RANGE(0x46A4, 5);
189         ADD_RANGE(R300_PFS_INSTR0_0, 64);
190         ADD_RANGE(R300_PFS_INSTR1_0, 64);
191         ADD_RANGE(R300_PFS_INSTR2_0, 64);
192         ADD_RANGE(R300_PFS_INSTR3_0, 64);
193         ADD_RANGE(0x4BC0, 1);
194         ADD_RANGE(0x4BC8, 3);
195         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
196         ADD_RANGE(0x4BD8, 1);
197         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
198         ADD_RANGE(0x4E00, 1);
199         ADD_RANGE(R300_RB3D_CBLEND, 2);
200         ADD_RANGE(R300_RB3D_COLORMASK, 1);
201         ADD_RANGE(0x4E10, 3);
202         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
203         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
204         ADD_RANGE(0x4E50, 9);
205         ADD_RANGE(0x4E88, 1);
206         ADD_RANGE(0x4EA0, 2);
207         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
208         ADD_RANGE(0x4F10, 4);
209         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
210         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
211         ADD_RANGE(0x4F28, 1);
212         ADD_RANGE(0x4F30, 2);
213         ADD_RANGE(0x4F44, 1);
214         ADD_RANGE(0x4F54, 1);
215
216         ADD_RANGE(R300_TX_FILTER_0, 16);
217         ADD_RANGE(R300_TX_FILTER1_0, 16);
218         ADD_RANGE(R300_TX_SIZE_0, 16);
219         ADD_RANGE(R300_TX_FORMAT_0, 16);
220         ADD_RANGE(R300_TX_PITCH_0, 16);
221         /* Texture offset is dangerous and needs more checking */
222         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
223         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
224         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
225
226         /* Sporadic registers used as primitives are emitted */
227         ADD_RANGE(0x4f18, 1);
228         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
229         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
230         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
231
232 }
233
234 static __inline__ int r300_check_range(unsigned reg, int count)
235 {
236         int i;
237         if (reg & ~0xffff)
238                 return -1;
239         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
240                 if (r300_reg_flags[i] != MARK_SAFE)
241                         return 1;
242         return 0;
243 }
244
245 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
246                                                           dev_priv,
247                                                           drm_radeon_kcmd_buffer_t
248                                                           * cmdbuf,
249                                                           drm_r300_cmd_header_t
250                                                           header)
251 {
252         int reg;
253         int sz;
254         int i;
255         int values[64];
256         RING_LOCALS;
257
258         sz = header.packet0.count;
259         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
260
261         if ((sz > 64) || (sz < 0)) {
262                 DRM_ERROR
263                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
264                      reg, sz);
265                 return DRM_ERR(EINVAL);
266         }
267         for (i = 0; i < sz; i++) {
268                 values[i] = ((int *)cmdbuf->buf)[i];
269                 switch (r300_reg_flags[(reg >> 2) + i]) {
270                 case MARK_SAFE:
271                         break;
272                 case MARK_CHECK_OFFSET:
273                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
274                                 DRM_ERROR
275                                     ("Offset failed range check (reg=%04x sz=%d)\n",
276                                      reg, sz);
277                                 return DRM_ERR(EINVAL);
278                         }
279                         break;
280                 default:
281                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
282                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
283                         return DRM_ERR(EINVAL);
284                 }
285         }
286
287         BEGIN_RING(1 + sz);
288         OUT_RING(CP_PACKET0(reg, sz - 1));
289         OUT_RING_TABLE(values, sz);
290         ADVANCE_RING();
291
292         cmdbuf->buf += sz * 4;
293         cmdbuf->bufsz -= sz * 4;
294
295         return 0;
296 }
297
298 /**
299  * Emits a packet0 setting arbitrary registers.
300  * Called by r300_do_cp_cmdbuf.
301  *
302  * Note that checks are performed on contents and addresses of the registers
303  */
304 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
305                                         drm_radeon_kcmd_buffer_t *cmdbuf,
306                                         drm_r300_cmd_header_t header)
307 {
308         int reg;
309         int sz;
310         RING_LOCALS;
311
312         sz = header.packet0.count;
313         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
314
315         if (!sz)
316                 return 0;
317
318         if (sz * 4 > cmdbuf->bufsz)
319                 return DRM_ERR(EINVAL);
320
321         if (reg + sz * 4 >= 0x10000) {
322                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
323                           sz);
324                 return DRM_ERR(EINVAL);
325         }
326
327         if (r300_check_range(reg, sz)) {
328                 /* go and check everything */
329                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
330                                                            header);
331         }
332         /* the rest of the data is safe to emit, whatever the values the user passed */
333
334         BEGIN_RING(1 + sz);
335         OUT_RING(CP_PACKET0(reg, sz - 1));
336         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
337         ADVANCE_RING();
338
339         cmdbuf->buf += sz * 4;
340         cmdbuf->bufsz -= sz * 4;
341
342         return 0;
343 }
344
345 /**
346  * Uploads user-supplied vertex program instructions or parameters onto
347  * the graphics card.
348  * Called by r300_do_cp_cmdbuf.
349  */
350 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
351                                     drm_radeon_kcmd_buffer_t *cmdbuf,
352                                     drm_r300_cmd_header_t header)
353 {
354         int sz;
355         int addr;
356         RING_LOCALS;
357
358         sz = header.vpu.count;
359         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
360
361         if (!sz)
362                 return 0;
363         if (sz * 16 > cmdbuf->bufsz)
364                 return DRM_ERR(EINVAL);
365
366         BEGIN_RING(5 + sz * 4);
367         /* Wait for VAP to come to senses.. */
368         /* there is no need to emit it multiple times, (only once before VAP is programmed,
369            but this optimization is for later */
370         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
371         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
372         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
373         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
374
375         ADVANCE_RING();
376
377         cmdbuf->buf += sz * 16;
378         cmdbuf->bufsz -= sz * 16;
379
380         return 0;
381 }
382
383 /**
384  * Emit a clear packet from userspace.
385  * Called by r300_emit_packet3.
386  */
387 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
388                                       drm_radeon_kcmd_buffer_t *cmdbuf)
389 {
390         RING_LOCALS;
391
392         if (8 * 4 > cmdbuf->bufsz)
393                 return DRM_ERR(EINVAL);
394
395         BEGIN_RING(10);
396         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
397         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
398                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
399         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
400         ADVANCE_RING();
401
402         cmdbuf->buf += 8 * 4;
403         cmdbuf->bufsz -= 8 * 4;
404
405         return 0;
406 }
407
408 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
409                                                drm_radeon_kcmd_buffer_t *cmdbuf,
410                                                u32 header)
411 {
412         int count, i, k;
413 #define MAX_ARRAY_PACKET  64
414         u32 payload[MAX_ARRAY_PACKET];
415         u32 narrays;
416         RING_LOCALS;
417
418         count = (header >> 16) & 0x3fff;
419
420         if ((count + 1) > MAX_ARRAY_PACKET) {
421                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
422                           count);
423                 return DRM_ERR(EINVAL);
424         }
425         memset(payload, 0, MAX_ARRAY_PACKET * 4);
426         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
427
428         /* carefully check packet contents */
429
430         narrays = payload[0];
431         k = 0;
432         i = 1;
433         while ((k < narrays) && (i < (count + 1))) {
434                 i++;            /* skip attribute field */
435                 if (!radeon_check_offset(dev_priv, payload[i])) {
436                         DRM_ERROR
437                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
438                              k, i);
439                         return DRM_ERR(EINVAL);
440                 }
441                 k++;
442                 i++;
443                 if (k == narrays)
444                         break;
445                 /* have one more to process, they come in pairs */
446                 if (!radeon_check_offset(dev_priv, payload[i])) {
447                         DRM_ERROR
448                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
449                              k, i);
450                         return DRM_ERR(EINVAL);
451                 }
452                 k++;
453                 i++;
454         }
455         /* do the counts match what we expect ? */
456         if ((k != narrays) || (i != (count + 1))) {
457                 DRM_ERROR
458                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
459                      k, i, narrays, count + 1);
460                 return DRM_ERR(EINVAL);
461         }
462
463         /* all clear, output packet */
464
465         BEGIN_RING(count + 2);
466         OUT_RING(header);
467         OUT_RING_TABLE(payload, count + 1);
468         ADVANCE_RING();
469
470         cmdbuf->buf += (count + 2) * 4;
471         cmdbuf->bufsz -= (count + 2) * 4;
472
473         return 0;
474 }
475
476 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
477                                              drm_radeon_kcmd_buffer_t *cmdbuf)
478 {
479         u32 *cmd = (u32 *) cmdbuf->buf;
480         int count, ret;
481         RING_LOCALS;
482
483         count=(cmd[0]>>16) & 0x3fff;
484
485         if (cmd[0] & 0x8000) {
486                 u32 offset;
487
488                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL 
489                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
490                         offset = cmd[2] << 10;
491                         ret = !radeon_check_offset(dev_priv, offset);
492                         if (ret) {
493                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
494                                 return DRM_ERR(EINVAL);
495                         }
496                 }
497
498                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
499                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
500                         offset = cmd[3] << 10;
501                         ret = !radeon_check_offset(dev_priv, offset);
502                         if (ret) {
503                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
504                                 return DRM_ERR(EINVAL);
505                         }
506                         
507                 }
508         }
509
510         BEGIN_RING(count+2);
511         OUT_RING(cmd[0]);
512         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
513         ADVANCE_RING();
514
515         cmdbuf->buf += (count+2)*4;
516         cmdbuf->bufsz -= (count+2)*4;
517
518         return 0;
519 }
520
521 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
522                                              drm_radeon_kcmd_buffer_t *cmdbuf)
523 {
524         u32 *cmd = (u32 *) cmdbuf->buf;
525         int count, ret;
526         RING_LOCALS;
527
528         count=(cmd[0]>>16) & 0x3fff;
529
530         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
531                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
532                 return DRM_ERR(EINVAL);
533         }
534         ret = !radeon_check_offset(dev_priv, cmd[2]);
535         if (ret) {
536                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
537                 return DRM_ERR(EINVAL);
538         }
539
540         BEGIN_RING(count+2);
541         OUT_RING(cmd[0]);
542         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
543         ADVANCE_RING();
544
545         cmdbuf->buf += (count+2)*4;
546         cmdbuf->bufsz -= (count+2)*4;
547
548         return 0;
549 }
550
551 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
552                                             drm_radeon_kcmd_buffer_t *cmdbuf)
553 {
554         u32 header;
555         int count;
556         RING_LOCALS;
557
558         if (4 > cmdbuf->bufsz)
559                 return DRM_ERR(EINVAL);
560
561         /* Fixme !! This simply emits a packet without much checking.
562            We need to be smarter. */
563
564         /* obtain first word - actual packet3 header */
565         header = *(u32 *) cmdbuf->buf;
566
567         /* Is it packet 3 ? */
568         if ((header >> 30) != 0x3) {
569                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
570                 return DRM_ERR(EINVAL);
571         }
572
573         count = (header >> 16) & 0x3fff;
574
575         /* Check again now that we know how much data to expect */
576         if ((count + 2) * 4 > cmdbuf->bufsz) {
577                 DRM_ERROR
578                     ("Expected packet3 of length %d but have only %d bytes left\n",
579                      (count + 2) * 4, cmdbuf->bufsz);
580                 return DRM_ERR(EINVAL);
581         }
582
583         /* Is it a packet type we know about ? */
584         switch (header & 0xff00) {
585         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
586                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
587
588         case RADEON_CNTL_BITBLT_MULTI:
589                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
590
591         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
592                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
593         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
594         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
595         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
596         case RADEON_WAIT_FOR_IDLE:
597         case RADEON_CP_NOP:
598                 /* these packets are safe */
599                 break;
600         default:
601                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
602                 return DRM_ERR(EINVAL);
603         }
604
605         BEGIN_RING(count + 2);
606         OUT_RING(header);
607         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
608         ADVANCE_RING();
609
610         cmdbuf->buf += (count + 2) * 4;
611         cmdbuf->bufsz -= (count + 2) * 4;
612
613         return 0;
614 }
615
616 /**
617  * Emit a rendering packet3 from userspace.
618  * Called by r300_do_cp_cmdbuf.
619  */
620 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
621                                         drm_radeon_kcmd_buffer_t *cmdbuf,
622                                         drm_r300_cmd_header_t header)
623 {
624         int n;
625         int ret;
626         char *orig_buf = cmdbuf->buf;
627         int orig_bufsz = cmdbuf->bufsz;
628
629         /* This is a do-while-loop so that we run the interior at least once,
630          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
631          */
632         n = 0;
633         do {
634                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
635                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
636                         if (ret)
637                                 return ret;
638
639                         cmdbuf->buf = orig_buf;
640                         cmdbuf->bufsz = orig_bufsz;
641                 }
642
643                 switch (header.packet3.packet) {
644                 case R300_CMD_PACKET3_CLEAR:
645                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
646                         ret = r300_emit_clear(dev_priv, cmdbuf);
647                         if (ret) {
648                                 DRM_ERROR("r300_emit_clear failed\n");
649                                 return ret;
650                         }
651                         break;
652
653                 case R300_CMD_PACKET3_RAW:
654                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
655                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
656                         if (ret) {
657                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
658                                 return ret;
659                         }
660                         break;
661
662                 default:
663                         DRM_ERROR("bad packet3 type %i at %p\n",
664                                   header.packet3.packet,
665                                   cmdbuf->buf - sizeof(header));
666                         return DRM_ERR(EINVAL);
667                 }
668
669                 n += R300_SIMULTANEOUS_CLIPRECTS;
670         } while (n < cmdbuf->nbox);
671
672         return 0;
673 }
674
675 /* Some of the R300 chips seem to be extremely touchy about the two registers
676  * that are configured in r300_pacify.
677  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
678  * sends a command buffer that contains only state setting commands and a
679  * vertex program/parameter upload sequence, this will eventually lead to a
680  * lockup, unless the sequence is bracketed by calls to r300_pacify.
681  * So we should take great care to *always* call r300_pacify before
682  * *anything* 3D related, and again afterwards. This is what the
683  * call bracket in r300_do_cp_cmdbuf is for.
684  */
685
686 /**
687  * Emit the sequence to pacify R300.
688  */
689 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
690 {
691         RING_LOCALS;
692
693         BEGIN_RING(6);
694         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
695         OUT_RING(0xa);
696         OUT_RING(CP_PACKET0(0x4f18, 0));
697         OUT_RING(0x3);
698         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
699         OUT_RING(0x0);
700         ADVANCE_RING();
701 }
702
703 /**
704  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
705  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
706  * be careful about how this function is called.
707  */
708 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
709 {
710         drm_radeon_private_t *dev_priv = dev->dev_private;
711         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
712
713         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
714         buf->pending = 1;
715         buf->used = 0;
716 }
717
718 static int r300_scratch(drm_radeon_private_t *dev_priv,
719                         drm_radeon_kcmd_buffer_t *cmdbuf,
720                         drm_r300_cmd_header_t header)
721 {
722         u32 *ref_age_base;
723         u32 i, buf_idx, h_pending;
724         RING_LOCALS;
725         
726         if (cmdbuf->bufsz < 
727             (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
728                 return DRM_ERR(EINVAL);
729         }
730         
731         if (header.scratch.reg >= 5) {
732                 return DRM_ERR(EINVAL);
733         }
734         
735         dev_priv->scratch_ages[header.scratch.reg]++;
736         
737         ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
738         
739         cmdbuf->buf += sizeof(u64);
740         cmdbuf->bufsz -= sizeof(u64);
741         
742         for (i=0; i < header.scratch.n_bufs; i++) {
743                 buf_idx = *(u32 *)cmdbuf->buf;
744                 buf_idx *= 2; /* 8 bytes per buf */
745                 
746                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
747                         return DRM_ERR(EINVAL);
748                 }
749                                         
750                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
751                         return DRM_ERR(EINVAL);
752                 }
753                                         
754                 if (h_pending == 0) {
755                         return DRM_ERR(EINVAL);
756                 }
757                                         
758                 h_pending--;
759                                                 
760                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
761                         return DRM_ERR(EINVAL);
762                 }
763                                         
764                 cmdbuf->buf += sizeof(buf_idx);
765                 cmdbuf->bufsz -= sizeof(buf_idx);
766         }
767         
768         BEGIN_RING(2);
769         OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0));
770         OUT_RING(dev_priv->scratch_ages[header.scratch.reg]);
771         ADVANCE_RING();
772         
773         return 0;
774 }
775
776 /**
777  * Parses and validates a user-supplied command buffer and emits appropriate
778  * commands on the DMA ring buffer.
779  * Called by the ioctl handler function radeon_cp_cmdbuf.
780  */
781 int r300_do_cp_cmdbuf(drm_device_t *dev,
782                       DRMFILE filp,
783                       drm_file_t *filp_priv,
784                       drm_radeon_kcmd_buffer_t *cmdbuf)
785 {
786         drm_radeon_private_t *dev_priv = dev->dev_private;
787         drm_device_dma_t *dma = dev->dma;
788         drm_buf_t *buf = NULL;
789         int emit_dispatch_age = 0;
790         int ret = 0;
791
792         DRM_DEBUG("\n");
793
794         /* See the comment above r300_emit_begin3d for why this call must be here,
795          * and what the cleanup gotos are for. */
796         r300_pacify(dev_priv);
797
798         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
799                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
800                 if (ret)
801                         goto cleanup;
802         }
803
804         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
805                 int idx;
806                 drm_r300_cmd_header_t header;
807
808                 header.u = *(unsigned int *)cmdbuf->buf;
809
810                 cmdbuf->buf += sizeof(header);
811                 cmdbuf->bufsz -= sizeof(header);
812
813                 switch (header.header.cmd_type) {
814                 case R300_CMD_PACKET0:
815                         DRM_DEBUG("R300_CMD_PACKET0\n");
816                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
817                         if (ret) {
818                                 DRM_ERROR("r300_emit_packet0 failed\n");
819                                 goto cleanup;
820                         }
821                         break;
822
823                 case R300_CMD_VPU:
824                         DRM_DEBUG("R300_CMD_VPU\n");
825                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
826                         if (ret) {
827                                 DRM_ERROR("r300_emit_vpu failed\n");
828                                 goto cleanup;
829                         }
830                         break;
831
832                 case R300_CMD_PACKET3:
833                         DRM_DEBUG("R300_CMD_PACKET3\n");
834                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
835                         if (ret) {
836                                 DRM_ERROR("r300_emit_packet3 failed\n");
837                                 goto cleanup;
838                         }
839                         break;
840
841                 case R300_CMD_END3D:
842                         DRM_DEBUG("R300_CMD_END3D\n");
843                         /* TODO:
844                            Ideally userspace driver should not need to issue this call,
845                            i.e. the drm driver should issue it automatically and prevent
846                            lockups.
847
848                            In practice, we do not understand why this call is needed and what
849                            it does (except for some vague guesses that it has to do with cache
850                            coherence) and so the user space driver does it.
851
852                            Once we are sure which uses prevent lockups the code could be moved
853                            into the kernel and the userspace driver will not
854                            need to use this command.
855
856                            Note that issuing this command does not hurt anything
857                            except, possibly, performance */
858                         r300_pacify(dev_priv);
859                         break;
860
861                 case R300_CMD_CP_DELAY:
862                         /* simple enough, we can do it here */
863                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
864                         {
865                                 int i;
866                                 RING_LOCALS;
867
868                                 BEGIN_RING(header.delay.count);
869                                 for (i = 0; i < header.delay.count; i++)
870                                         OUT_RING(RADEON_CP_PACKET2);
871                                 ADVANCE_RING();
872                         }
873                         break;
874
875                 case R300_CMD_DMA_DISCARD:
876                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
877                         idx = header.dma.buf_idx;
878                         if (idx < 0 || idx >= dma->buf_count) {
879                                 DRM_ERROR("buffer index %d (of %d max)\n",
880                                           idx, dma->buf_count - 1);
881                                 ret = DRM_ERR(EINVAL);
882                                 goto cleanup;
883                         }
884
885                         buf = dma->buflist[idx];
886                         if (buf->filp != filp || buf->pending) {
887                                 DRM_ERROR("bad buffer %p %p %d\n",
888                                           buf->filp, filp, buf->pending);
889                                 ret = DRM_ERR(EINVAL);
890                                 goto cleanup;
891                         }
892
893                         emit_dispatch_age = 1;
894                         r300_discard_buffer(dev, buf);
895                         break;
896
897                 case R300_CMD_WAIT:
898                         /* simple enough, we can do it here */
899                         DRM_DEBUG("R300_CMD_WAIT\n");
900                         if (header.wait.flags == 0)
901                                 break;  /* nothing to do */
902
903                         {
904                                 RING_LOCALS;
905
906                                 BEGIN_RING(2);
907                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
908                                 OUT_RING((header.wait.flags & 0xf) << 14);
909                                 ADVANCE_RING();
910                         }
911                         break;
912
913                 case R300_CMD_SCRATCH:
914                         DRM_DEBUG("R300_CMD_SCRATCH\n");
915                         ret = r300_scratch(dev_priv, cmdbuf, header);
916                         if (ret) {
917                                 DRM_ERROR("r300_scratch failed\n");
918                                 goto cleanup;
919                         }
920                         break;
921                         
922                 default:
923                         DRM_ERROR("bad cmd_type %i at %p\n",
924                                   header.header.cmd_type,
925                                   cmdbuf->buf - sizeof(header));
926                         ret = DRM_ERR(EINVAL);
927                         goto cleanup;
928                 }
929         }
930
931         DRM_DEBUG("END\n");
932
933       cleanup:
934         r300_pacify(dev_priv);
935
936         /* We emit the vertex buffer age here, outside the pacifier "brackets"
937          * for two reasons:
938          *  (1) This may coalesce multiple age emissions into a single one and
939          *  (2) more importantly, some chips lock up hard when scratch registers
940          *      are written inside the pacifier bracket.
941          */
942         if (emit_dispatch_age) {
943                 RING_LOCALS;
944
945                 /* Emit the vertex buffer age */
946                 BEGIN_RING(2);
947                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
948                 ADVANCE_RING();
949         }
950
951         COMMIT_RING();
952
953         return ret;
954 }