x86: fix NODES_SHIFT Kconfig range
[pandora-kernel.git] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         return 0;
140 }
141
142 static u8 r300_reg_flags[0x10000 >> 2];
143
144 void r300_init_reg_flags(struct drm_device *dev)
145 {
146         int i;
147         drm_radeon_private_t *dev_priv = dev->dev_private;
148
149         memset(r300_reg_flags, 0, 0x10000 >> 2);
150 #define ADD_RANGE_MARK(reg, count,mark) \
151                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
152                         r300_reg_flags[i]|=(mark);
153
154 #define MARK_SAFE               1
155 #define MARK_CHECK_OFFSET       2
156
157 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
158
159         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
160         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
161         ADD_RANGE(R300_VAP_CNTL, 1);
162         ADD_RANGE(R300_SE_VTE_CNTL, 2);
163         ADD_RANGE(0x2134, 2);
164         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
165         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
166         ADD_RANGE(0x21DC, 1);
167         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
168         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
169         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
170         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
171         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
172         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
173         ADD_RANGE(R300_GB_ENABLE, 1);
174         ADD_RANGE(R300_GB_MSPOS0, 5);
175         ADD_RANGE(R300_TX_CNTL, 1);
176         ADD_RANGE(R300_TX_ENABLE, 1);
177         ADD_RANGE(0x4200, 4);
178         ADD_RANGE(0x4214, 1);
179         ADD_RANGE(R300_RE_POINTSIZE, 1);
180         ADD_RANGE(0x4230, 3);
181         ADD_RANGE(R300_RE_LINE_CNT, 1);
182         ADD_RANGE(R300_RE_UNK4238, 1);
183         ADD_RANGE(0x4260, 3);
184         ADD_RANGE(R300_RE_SHADE, 4);
185         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
186         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
187         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
188         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
189         ADD_RANGE(R300_RE_CULL_CNTL, 1);
190         ADD_RANGE(0x42C0, 2);
191         ADD_RANGE(R300_RS_CNTL_0, 2);
192         ADD_RANGE(R300_RS_INTERP_0, 8);
193         ADD_RANGE(R300_RS_ROUTE_0, 8);
194         ADD_RANGE(0x43A4, 2);
195         ADD_RANGE(0x43E8, 1);
196         ADD_RANGE(R300_PFS_CNTL_0, 3);
197         ADD_RANGE(R300_PFS_NODE_0, 4);
198         ADD_RANGE(R300_PFS_TEXI_0, 64);
199         ADD_RANGE(0x46A4, 5);
200         ADD_RANGE(R300_PFS_INSTR0_0, 64);
201         ADD_RANGE(R300_PFS_INSTR1_0, 64);
202         ADD_RANGE(R300_PFS_INSTR2_0, 64);
203         ADD_RANGE(R300_PFS_INSTR3_0, 64);
204         ADD_RANGE(R300_RE_FOG_STATE, 1);
205         ADD_RANGE(R300_FOG_COLOR_R, 3);
206         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
207         ADD_RANGE(0x4BD8, 1);
208         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
209         ADD_RANGE(0x4E00, 1);
210         ADD_RANGE(R300_RB3D_CBLEND, 2);
211         ADD_RANGE(R300_RB3D_COLORMASK, 1);
212         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
213         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
214         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
215         ADD_RANGE(0x4E50, 9);
216         ADD_RANGE(0x4E88, 1);
217         ADD_RANGE(0x4EA0, 2);
218         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
219         ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
220         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
221         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
222         ADD_RANGE(0x4F28, 1);
223         ADD_RANGE(0x4F30, 2);
224         ADD_RANGE(0x4F44, 1);
225         ADD_RANGE(0x4F54, 1);
226
227         ADD_RANGE(R300_TX_FILTER_0, 16);
228         ADD_RANGE(R300_TX_FILTER1_0, 16);
229         ADD_RANGE(R300_TX_SIZE_0, 16);
230         ADD_RANGE(R300_TX_FORMAT_0, 16);
231         ADD_RANGE(R300_TX_PITCH_0, 16);
232         /* Texture offset is dangerous and needs more checking */
233         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
234         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
235         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
236
237         /* Sporadic registers used as primitives are emitted */
238         ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
239         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
240         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
241         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
242
243         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
244                 ADD_RANGE(0x4074, 16);
245         }
246 }
247
248 static __inline__ int r300_check_range(unsigned reg, int count)
249 {
250         int i;
251         if (reg & ~0xffff)
252                 return -1;
253         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
254                 if (r300_reg_flags[i] != MARK_SAFE)
255                         return 1;
256         return 0;
257 }
258
259 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
260                                                           dev_priv,
261                                                           drm_radeon_kcmd_buffer_t
262                                                           * cmdbuf,
263                                                           drm_r300_cmd_header_t
264                                                           header)
265 {
266         int reg;
267         int sz;
268         int i;
269         int values[64];
270         RING_LOCALS;
271
272         sz = header.packet0.count;
273         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
274
275         if ((sz > 64) || (sz < 0)) {
276                 DRM_ERROR
277                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
278                      reg, sz);
279                 return -EINVAL;
280         }
281         for (i = 0; i < sz; i++) {
282                 values[i] = ((int *)cmdbuf->buf)[i];
283                 switch (r300_reg_flags[(reg >> 2) + i]) {
284                 case MARK_SAFE:
285                         break;
286                 case MARK_CHECK_OFFSET:
287                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
288                                 DRM_ERROR
289                                     ("Offset failed range check (reg=%04x sz=%d)\n",
290                                      reg, sz);
291                                 return -EINVAL;
292                         }
293                         break;
294                 default:
295                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
296                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
297                         return -EINVAL;
298                 }
299         }
300
301         BEGIN_RING(1 + sz);
302         OUT_RING(CP_PACKET0(reg, sz - 1));
303         OUT_RING_TABLE(values, sz);
304         ADVANCE_RING();
305
306         cmdbuf->buf += sz * 4;
307         cmdbuf->bufsz -= sz * 4;
308
309         return 0;
310 }
311
312 /**
313  * Emits a packet0 setting arbitrary registers.
314  * Called by r300_do_cp_cmdbuf.
315  *
316  * Note that checks are performed on contents and addresses of the registers
317  */
318 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
319                                         drm_radeon_kcmd_buffer_t *cmdbuf,
320                                         drm_r300_cmd_header_t header)
321 {
322         int reg;
323         int sz;
324         RING_LOCALS;
325
326         sz = header.packet0.count;
327         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
328
329         if (!sz)
330                 return 0;
331
332         if (sz * 4 > cmdbuf->bufsz)
333                 return -EINVAL;
334
335         if (reg + sz * 4 >= 0x10000) {
336                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
337                           sz);
338                 return -EINVAL;
339         }
340
341         if (r300_check_range(reg, sz)) {
342                 /* go and check everything */
343                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
344                                                            header);
345         }
346         /* the rest of the data is safe to emit, whatever the values the user passed */
347
348         BEGIN_RING(1 + sz);
349         OUT_RING(CP_PACKET0(reg, sz - 1));
350         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
351         ADVANCE_RING();
352
353         cmdbuf->buf += sz * 4;
354         cmdbuf->bufsz -= sz * 4;
355
356         return 0;
357 }
358
359 /**
360  * Uploads user-supplied vertex program instructions or parameters onto
361  * the graphics card.
362  * Called by r300_do_cp_cmdbuf.
363  */
364 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
365                                     drm_radeon_kcmd_buffer_t *cmdbuf,
366                                     drm_r300_cmd_header_t header)
367 {
368         int sz;
369         int addr;
370         RING_LOCALS;
371
372         sz = header.vpu.count;
373         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
374
375         if (!sz)
376                 return 0;
377         if (sz * 16 > cmdbuf->bufsz)
378                 return -EINVAL;
379
380         BEGIN_RING(5 + sz * 4);
381         /* Wait for VAP to come to senses.. */
382         /* there is no need to emit it multiple times, (only once before VAP is programmed,
383            but this optimization is for later */
384         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
385         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
386         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
387         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
388
389         ADVANCE_RING();
390
391         cmdbuf->buf += sz * 16;
392         cmdbuf->bufsz -= sz * 16;
393
394         return 0;
395 }
396
397 /**
398  * Emit a clear packet from userspace.
399  * Called by r300_emit_packet3.
400  */
401 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
402                                       drm_radeon_kcmd_buffer_t *cmdbuf)
403 {
404         RING_LOCALS;
405
406         if (8 * 4 > cmdbuf->bufsz)
407                 return -EINVAL;
408
409         BEGIN_RING(10);
410         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
411         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
412                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
413         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
414         ADVANCE_RING();
415
416         cmdbuf->buf += 8 * 4;
417         cmdbuf->bufsz -= 8 * 4;
418
419         return 0;
420 }
421
422 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
423                                                drm_radeon_kcmd_buffer_t *cmdbuf,
424                                                u32 header)
425 {
426         int count, i, k;
427 #define MAX_ARRAY_PACKET  64
428         u32 payload[MAX_ARRAY_PACKET];
429         u32 narrays;
430         RING_LOCALS;
431
432         count = (header >> 16) & 0x3fff;
433
434         if ((count + 1) > MAX_ARRAY_PACKET) {
435                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
436                           count);
437                 return -EINVAL;
438         }
439         memset(payload, 0, MAX_ARRAY_PACKET * 4);
440         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
441
442         /* carefully check packet contents */
443
444         narrays = payload[0];
445         k = 0;
446         i = 1;
447         while ((k < narrays) && (i < (count + 1))) {
448                 i++;            /* skip attribute field */
449                 if (!radeon_check_offset(dev_priv, payload[i])) {
450                         DRM_ERROR
451                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
452                              k, i);
453                         return -EINVAL;
454                 }
455                 k++;
456                 i++;
457                 if (k == narrays)
458                         break;
459                 /* have one more to process, they come in pairs */
460                 if (!radeon_check_offset(dev_priv, payload[i])) {
461                         DRM_ERROR
462                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
463                              k, i);
464                         return -EINVAL;
465                 }
466                 k++;
467                 i++;
468         }
469         /* do the counts match what we expect ? */
470         if ((k != narrays) || (i != (count + 1))) {
471                 DRM_ERROR
472                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
473                      k, i, narrays, count + 1);
474                 return -EINVAL;
475         }
476
477         /* all clear, output packet */
478
479         BEGIN_RING(count + 2);
480         OUT_RING(header);
481         OUT_RING_TABLE(payload, count + 1);
482         ADVANCE_RING();
483
484         cmdbuf->buf += (count + 2) * 4;
485         cmdbuf->bufsz -= (count + 2) * 4;
486
487         return 0;
488 }
489
490 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
491                                              drm_radeon_kcmd_buffer_t *cmdbuf)
492 {
493         u32 *cmd = (u32 *) cmdbuf->buf;
494         int count, ret;
495         RING_LOCALS;
496
497         count=(cmd[0]>>16) & 0x3fff;
498
499         if (cmd[0] & 0x8000) {
500                 u32 offset;
501
502                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
503                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
504                         offset = cmd[2] << 10;
505                         ret = !radeon_check_offset(dev_priv, offset);
506                         if (ret) {
507                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
508                                 return -EINVAL;
509                         }
510                 }
511
512                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
513                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
514                         offset = cmd[3] << 10;
515                         ret = !radeon_check_offset(dev_priv, offset);
516                         if (ret) {
517                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
518                                 return -EINVAL;
519                         }
520
521                 }
522         }
523
524         BEGIN_RING(count+2);
525         OUT_RING(cmd[0]);
526         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
527         ADVANCE_RING();
528
529         cmdbuf->buf += (count+2)*4;
530         cmdbuf->bufsz -= (count+2)*4;
531
532         return 0;
533 }
534
535 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
536                                              drm_radeon_kcmd_buffer_t *cmdbuf)
537 {
538         u32 *cmd = (u32 *) cmdbuf->buf;
539         int count, ret;
540         RING_LOCALS;
541
542         count=(cmd[0]>>16) & 0x3fff;
543
544         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
545                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
546                 return -EINVAL;
547         }
548         ret = !radeon_check_offset(dev_priv, cmd[2]);
549         if (ret) {
550                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
551                 return -EINVAL;
552         }
553
554         BEGIN_RING(count+2);
555         OUT_RING(cmd[0]);
556         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
557         ADVANCE_RING();
558
559         cmdbuf->buf += (count+2)*4;
560         cmdbuf->bufsz -= (count+2)*4;
561
562         return 0;
563 }
564
565 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
566                                             drm_radeon_kcmd_buffer_t *cmdbuf)
567 {
568         u32 header;
569         int count;
570         RING_LOCALS;
571
572         if (4 > cmdbuf->bufsz)
573                 return -EINVAL;
574
575         /* Fixme !! This simply emits a packet without much checking.
576            We need to be smarter. */
577
578         /* obtain first word - actual packet3 header */
579         header = *(u32 *) cmdbuf->buf;
580
581         /* Is it packet 3 ? */
582         if ((header >> 30) != 0x3) {
583                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
584                 return -EINVAL;
585         }
586
587         count = (header >> 16) & 0x3fff;
588
589         /* Check again now that we know how much data to expect */
590         if ((count + 2) * 4 > cmdbuf->bufsz) {
591                 DRM_ERROR
592                     ("Expected packet3 of length %d but have only %d bytes left\n",
593                      (count + 2) * 4, cmdbuf->bufsz);
594                 return -EINVAL;
595         }
596
597         /* Is it a packet type we know about ? */
598         switch (header & 0xff00) {
599         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
600                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
601
602         case RADEON_CNTL_BITBLT_MULTI:
603                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
604
605         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
606                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
607         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
608         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
609         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
610         case RADEON_WAIT_FOR_IDLE:
611         case RADEON_CP_NOP:
612                 /* these packets are safe */
613                 break;
614         default:
615                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
616                 return -EINVAL;
617         }
618
619         BEGIN_RING(count + 2);
620         OUT_RING(header);
621         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
622         ADVANCE_RING();
623
624         cmdbuf->buf += (count + 2) * 4;
625         cmdbuf->bufsz -= (count + 2) * 4;
626
627         return 0;
628 }
629
630 /**
631  * Emit a rendering packet3 from userspace.
632  * Called by r300_do_cp_cmdbuf.
633  */
634 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
635                                         drm_radeon_kcmd_buffer_t *cmdbuf,
636                                         drm_r300_cmd_header_t header)
637 {
638         int n;
639         int ret;
640         char *orig_buf = cmdbuf->buf;
641         int orig_bufsz = cmdbuf->bufsz;
642
643         /* This is a do-while-loop so that we run the interior at least once,
644          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
645          */
646         n = 0;
647         do {
648                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
649                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
650                         if (ret)
651                                 return ret;
652
653                         cmdbuf->buf = orig_buf;
654                         cmdbuf->bufsz = orig_bufsz;
655                 }
656
657                 switch (header.packet3.packet) {
658                 case R300_CMD_PACKET3_CLEAR:
659                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
660                         ret = r300_emit_clear(dev_priv, cmdbuf);
661                         if (ret) {
662                                 DRM_ERROR("r300_emit_clear failed\n");
663                                 return ret;
664                         }
665                         break;
666
667                 case R300_CMD_PACKET3_RAW:
668                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
669                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
670                         if (ret) {
671                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
672                                 return ret;
673                         }
674                         break;
675
676                 default:
677                         DRM_ERROR("bad packet3 type %i at %p\n",
678                                   header.packet3.packet,
679                                   cmdbuf->buf - sizeof(header));
680                         return -EINVAL;
681                 }
682
683                 n += R300_SIMULTANEOUS_CLIPRECTS;
684         } while (n < cmdbuf->nbox);
685
686         return 0;
687 }
688
689 /* Some of the R300 chips seem to be extremely touchy about the two registers
690  * that are configured in r300_pacify.
691  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
692  * sends a command buffer that contains only state setting commands and a
693  * vertex program/parameter upload sequence, this will eventually lead to a
694  * lockup, unless the sequence is bracketed by calls to r300_pacify.
695  * So we should take great care to *always* call r300_pacify before
696  * *anything* 3D related, and again afterwards. This is what the
697  * call bracket in r300_do_cp_cmdbuf is for.
698  */
699
700 /**
701  * Emit the sequence to pacify R300.
702  */
703 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
704 {
705         RING_LOCALS;
706
707         BEGIN_RING(6);
708         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
709         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
710         OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
711         OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
712         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
713         OUT_RING(0x0);
714         ADVANCE_RING();
715 }
716
717 /**
718  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
719  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
720  * be careful about how this function is called.
721  */
722 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
723 {
724         drm_radeon_private_t *dev_priv = dev->dev_private;
725         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
726
727         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
728         buf->pending = 1;
729         buf->used = 0;
730 }
731
732 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
733                           drm_r300_cmd_header_t header)
734 {
735         u32 wait_until;
736         RING_LOCALS;
737
738         if (!header.wait.flags)
739                 return;
740
741         wait_until = 0;
742
743         switch(header.wait.flags) {
744         case R300_WAIT_2D:
745                 wait_until = RADEON_WAIT_2D_IDLE;
746                 break;
747         case R300_WAIT_3D:
748                 wait_until = RADEON_WAIT_3D_IDLE;
749                 break;
750         case R300_NEW_WAIT_2D_3D:
751                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
752                 break;
753         case R300_NEW_WAIT_2D_2D_CLEAN:
754                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
755                 break;
756         case R300_NEW_WAIT_3D_3D_CLEAN:
757                 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
758                 break;
759         case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
760                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
761                 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
762                 break;
763         default:
764                 return;
765         }
766
767         BEGIN_RING(2);
768         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
769         OUT_RING(wait_until);
770         ADVANCE_RING();
771 }
772
773 static int r300_scratch(drm_radeon_private_t *dev_priv,
774                         drm_radeon_kcmd_buffer_t *cmdbuf,
775                         drm_r300_cmd_header_t header)
776 {
777         u32 *ref_age_base;
778         u32 i, buf_idx, h_pending;
779         RING_LOCALS;
780
781         if (cmdbuf->bufsz <
782             (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
783                 return -EINVAL;
784         }
785
786         if (header.scratch.reg >= 5) {
787                 return -EINVAL;
788         }
789
790         dev_priv->scratch_ages[header.scratch.reg]++;
791
792         ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
793
794         cmdbuf->buf += sizeof(u64);
795         cmdbuf->bufsz -= sizeof(u64);
796
797         for (i=0; i < header.scratch.n_bufs; i++) {
798                 buf_idx = *(u32 *)cmdbuf->buf;
799                 buf_idx *= 2; /* 8 bytes per buf */
800
801                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
802                         return -EINVAL;
803                 }
804
805                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
806                         return -EINVAL;
807                 }
808
809                 if (h_pending == 0) {
810                         return -EINVAL;
811                 }
812
813                 h_pending--;
814
815                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
816                         return -EINVAL;
817                 }
818
819                 cmdbuf->buf += sizeof(buf_idx);
820                 cmdbuf->bufsz -= sizeof(buf_idx);
821         }
822
823         BEGIN_RING(2);
824         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
825         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
826         ADVANCE_RING();
827
828         return 0;
829 }
830
831 /**
832  * Parses and validates a user-supplied command buffer and emits appropriate
833  * commands on the DMA ring buffer.
834  * Called by the ioctl handler function radeon_cp_cmdbuf.
835  */
836 int r300_do_cp_cmdbuf(struct drm_device *dev,
837                       struct drm_file *file_priv,
838                       drm_radeon_kcmd_buffer_t *cmdbuf)
839 {
840         drm_radeon_private_t *dev_priv = dev->dev_private;
841         struct drm_device_dma *dma = dev->dma;
842         struct drm_buf *buf = NULL;
843         int emit_dispatch_age = 0;
844         int ret = 0;
845
846         DRM_DEBUG("\n");
847
848         /* See the comment above r300_emit_begin3d for why this call must be here,
849          * and what the cleanup gotos are for. */
850         r300_pacify(dev_priv);
851
852         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
853                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
854                 if (ret)
855                         goto cleanup;
856         }
857
858         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
859                 int idx;
860                 drm_r300_cmd_header_t header;
861
862                 header.u = *(unsigned int *)cmdbuf->buf;
863
864                 cmdbuf->buf += sizeof(header);
865                 cmdbuf->bufsz -= sizeof(header);
866
867                 switch (header.header.cmd_type) {
868                 case R300_CMD_PACKET0:
869                         DRM_DEBUG("R300_CMD_PACKET0\n");
870                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
871                         if (ret) {
872                                 DRM_ERROR("r300_emit_packet0 failed\n");
873                                 goto cleanup;
874                         }
875                         break;
876
877                 case R300_CMD_VPU:
878                         DRM_DEBUG("R300_CMD_VPU\n");
879                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
880                         if (ret) {
881                                 DRM_ERROR("r300_emit_vpu failed\n");
882                                 goto cleanup;
883                         }
884                         break;
885
886                 case R300_CMD_PACKET3:
887                         DRM_DEBUG("R300_CMD_PACKET3\n");
888                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
889                         if (ret) {
890                                 DRM_ERROR("r300_emit_packet3 failed\n");
891                                 goto cleanup;
892                         }
893                         break;
894
895                 case R300_CMD_END3D:
896                         DRM_DEBUG("R300_CMD_END3D\n");
897                         /* TODO:
898                            Ideally userspace driver should not need to issue this call,
899                            i.e. the drm driver should issue it automatically and prevent
900                            lockups.
901
902                            In practice, we do not understand why this call is needed and what
903                            it does (except for some vague guesses that it has to do with cache
904                            coherence) and so the user space driver does it.
905
906                            Once we are sure which uses prevent lockups the code could be moved
907                            into the kernel and the userspace driver will not
908                            need to use this command.
909
910                            Note that issuing this command does not hurt anything
911                            except, possibly, performance */
912                         r300_pacify(dev_priv);
913                         break;
914
915                 case R300_CMD_CP_DELAY:
916                         /* simple enough, we can do it here */
917                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
918                         {
919                                 int i;
920                                 RING_LOCALS;
921
922                                 BEGIN_RING(header.delay.count);
923                                 for (i = 0; i < header.delay.count; i++)
924                                         OUT_RING(RADEON_CP_PACKET2);
925                                 ADVANCE_RING();
926                         }
927                         break;
928
929                 case R300_CMD_DMA_DISCARD:
930                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
931                         idx = header.dma.buf_idx;
932                         if (idx < 0 || idx >= dma->buf_count) {
933                                 DRM_ERROR("buffer index %d (of %d max)\n",
934                                           idx, dma->buf_count - 1);
935                                 ret = -EINVAL;
936                                 goto cleanup;
937                         }
938
939                         buf = dma->buflist[idx];
940                         if (buf->file_priv != file_priv || buf->pending) {
941                                 DRM_ERROR("bad buffer %p %p %d\n",
942                                           buf->file_priv, file_priv,
943                                           buf->pending);
944                                 ret = -EINVAL;
945                                 goto cleanup;
946                         }
947
948                         emit_dispatch_age = 1;
949                         r300_discard_buffer(dev, buf);
950                         break;
951
952                 case R300_CMD_WAIT:
953                         DRM_DEBUG("R300_CMD_WAIT\n");
954                         r300_cmd_wait(dev_priv, header);
955                         break;
956
957                 case R300_CMD_SCRATCH:
958                         DRM_DEBUG("R300_CMD_SCRATCH\n");
959                         ret = r300_scratch(dev_priv, cmdbuf, header);
960                         if (ret) {
961                                 DRM_ERROR("r300_scratch failed\n");
962                                 goto cleanup;
963                         }
964                         break;
965
966                 default:
967                         DRM_ERROR("bad cmd_type %i at %p\n",
968                                   header.header.cmd_type,
969                                   cmdbuf->buf - sizeof(header));
970                         ret = -EINVAL;
971                         goto cleanup;
972                 }
973         }
974
975         DRM_DEBUG("END\n");
976
977       cleanup:
978         r300_pacify(dev_priv);
979
980         /* We emit the vertex buffer age here, outside the pacifier "brackets"
981          * for two reasons:
982          *  (1) This may coalesce multiple age emissions into a single one and
983          *  (2) more importantly, some chips lock up hard when scratch registers
984          *      are written inside the pacifier bracket.
985          */
986         if (emit_dispatch_age) {
987                 RING_LOCALS;
988
989                 /* Emit the vertex buffer age */
990                 BEGIN_RING(2);
991                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
992                 ADVANCE_RING();
993         }
994
995         COMMIT_RING();
996
997         return ret;
998 }