Merge master.kernel.org:/pub/scm/linux/kernel/git/wim/linux-2.6-watchdog
[pandora-kernel.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if ((off >= dev_priv->fb_location &&
66              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67             (off >= dev_priv->gart_vm_start &&
68              off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = filp_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > (dev_priv->fb_location + dev_priv->fb_size))
82                 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83                         dev_priv->gart_vm_start;
84
85         /* Now recheck and fail if out of bounds */
86         if ((off >= dev_priv->fb_location &&
87              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88             (off >= dev_priv->gart_vm_start &&
89              off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90                 DRM_DEBUG("offset fixed up to 0x%x\n", off);
91                 *offset = off;
92                 return 0;
93         }
94         return DRM_ERR(EINVAL);
95 }
96
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98                                                      dev_priv,
99                                                      drm_file_t * filp_priv,
100                                                      int id, u32 *data)
101 {
102         switch (id) {
103
104         case RADEON_EMIT_PP_MISC:
105                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107                         DRM_ERROR("Invalid depth buffer offset\n");
108                         return DRM_ERR(EINVAL);
109                 }
110                 break;
111
112         case RADEON_EMIT_PP_CNTL:
113                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115                         DRM_ERROR("Invalid colour buffer offset\n");
116                         return DRM_ERR(EINVAL);
117                 }
118                 break;
119
120         case R200_EMIT_PP_TXOFFSET_0:
121         case R200_EMIT_PP_TXOFFSET_1:
122         case R200_EMIT_PP_TXOFFSET_2:
123         case R200_EMIT_PP_TXOFFSET_3:
124         case R200_EMIT_PP_TXOFFSET_4:
125         case R200_EMIT_PP_TXOFFSET_5:
126                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127                                                   &data[0])) {
128                         DRM_ERROR("Invalid R200 texture offset\n");
129                         return DRM_ERR(EINVAL);
130                 }
131                 break;
132
133         case RADEON_EMIT_PP_TXFILTER_0:
134         case RADEON_EMIT_PP_TXFILTER_1:
135         case RADEON_EMIT_PP_TXFILTER_2:
136                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138                         DRM_ERROR("Invalid R100 texture offset\n");
139                         return DRM_ERR(EINVAL);
140                 }
141                 break;
142
143         case R200_EMIT_PP_CUBIC_OFFSETS_0:
144         case R200_EMIT_PP_CUBIC_OFFSETS_1:
145         case R200_EMIT_PP_CUBIC_OFFSETS_2:
146         case R200_EMIT_PP_CUBIC_OFFSETS_3:
147         case R200_EMIT_PP_CUBIC_OFFSETS_4:
148         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149                         int i;
150                         for (i = 0; i < 5; i++) {
151                                 if (radeon_check_and_fixup_offset(dev_priv,
152                                                                   filp_priv,
153                                                                   &data[i])) {
154                                         DRM_ERROR
155                                             ("Invalid R200 cubic texture offset\n");
156                                         return DRM_ERR(EINVAL);
157                                 }
158                         }
159                         break;
160                 }
161
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165                         int i;
166                         for (i = 0; i < 5; i++) {
167                                 if (radeon_check_and_fixup_offset(dev_priv,
168                                                                   filp_priv,
169                                                                   &data[i])) {
170                                         DRM_ERROR
171                                             ("Invalid R100 cubic texture offset\n");
172                                         return DRM_ERR(EINVAL);
173                                 }
174                         }
175                 }
176                 break;
177
178         case RADEON_EMIT_RB3D_COLORPITCH:
179         case RADEON_EMIT_RE_LINE_PATTERN:
180         case RADEON_EMIT_SE_LINE_WIDTH:
181         case RADEON_EMIT_PP_LUM_MATRIX:
182         case RADEON_EMIT_PP_ROT_MATRIX_0:
183         case RADEON_EMIT_RB3D_STENCILREFMASK:
184         case RADEON_EMIT_SE_VPORT_XSCALE:
185         case RADEON_EMIT_SE_CNTL:
186         case RADEON_EMIT_SE_CNTL_STATUS:
187         case RADEON_EMIT_RE_MISC:
188         case RADEON_EMIT_PP_BORDER_COLOR_0:
189         case RADEON_EMIT_PP_BORDER_COLOR_1:
190         case RADEON_EMIT_PP_BORDER_COLOR_2:
191         case RADEON_EMIT_SE_ZBIAS_FACTOR:
192         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
193         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
194         case R200_EMIT_PP_TXCBLEND_0:
195         case R200_EMIT_PP_TXCBLEND_1:
196         case R200_EMIT_PP_TXCBLEND_2:
197         case R200_EMIT_PP_TXCBLEND_3:
198         case R200_EMIT_PP_TXCBLEND_4:
199         case R200_EMIT_PP_TXCBLEND_5:
200         case R200_EMIT_PP_TXCBLEND_6:
201         case R200_EMIT_PP_TXCBLEND_7:
202         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
203         case R200_EMIT_TFACTOR_0:
204         case R200_EMIT_VTX_FMT_0:
205         case R200_EMIT_VAP_CTL:
206         case R200_EMIT_MATRIX_SELECT_0:
207         case R200_EMIT_TEX_PROC_CTL_2:
208         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209         case R200_EMIT_PP_TXFILTER_0:
210         case R200_EMIT_PP_TXFILTER_1:
211         case R200_EMIT_PP_TXFILTER_2:
212         case R200_EMIT_PP_TXFILTER_3:
213         case R200_EMIT_PP_TXFILTER_4:
214         case R200_EMIT_PP_TXFILTER_5:
215         case R200_EMIT_VTE_CNTL:
216         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217         case R200_EMIT_PP_TAM_DEBUG3:
218         case R200_EMIT_PP_CNTL_X:
219         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221         case R200_EMIT_RE_SCISSOR_TL_0:
222         case R200_EMIT_RE_SCISSOR_TL_1:
223         case R200_EMIT_RE_SCISSOR_TL_2:
224         case R200_EMIT_SE_VAP_CNTL_STATUS:
225         case R200_EMIT_SE_VTX_STATE_CNTL:
226         case R200_EMIT_RE_POINTSIZE:
227         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228         case R200_EMIT_PP_CUBIC_FACES_0:
229         case R200_EMIT_PP_CUBIC_FACES_1:
230         case R200_EMIT_PP_CUBIC_FACES_2:
231         case R200_EMIT_PP_CUBIC_FACES_3:
232         case R200_EMIT_PP_CUBIC_FACES_4:
233         case R200_EMIT_PP_CUBIC_FACES_5:
234         case RADEON_EMIT_PP_TEX_SIZE_0:
235         case RADEON_EMIT_PP_TEX_SIZE_1:
236         case RADEON_EMIT_PP_TEX_SIZE_2:
237         case R200_EMIT_RB3D_BLENDCOLOR:
238         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239         case RADEON_EMIT_PP_CUBIC_FACES_0:
240         case RADEON_EMIT_PP_CUBIC_FACES_1:
241         case RADEON_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_TRI_PERF_CNTL:
243         case R200_EMIT_PP_AFS_0:
244         case R200_EMIT_PP_AFS_1:
245         case R200_EMIT_ATF_TFACTOR:
246         case R200_EMIT_PP_TXCTLALL_0:
247         case R200_EMIT_PP_TXCTLALL_1:
248         case R200_EMIT_PP_TXCTLALL_2:
249         case R200_EMIT_PP_TXCTLALL_3:
250         case R200_EMIT_PP_TXCTLALL_4:
251         case R200_EMIT_PP_TXCTLALL_5:
252         case R200_EMIT_VAP_PVS_CNTL:
253                 /* These packets don't contain memory offsets */
254                 break;
255
256         default:
257                 DRM_ERROR("Unknown state packet ID %d\n", id);
258                 return DRM_ERR(EINVAL);
259         }
260
261         return 0;
262 }
263
264 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
265                                                      dev_priv,
266                                                      drm_file_t *filp_priv,
267                                                      drm_radeon_kcmd_buffer_t *
268                                                      cmdbuf,
269                                                      unsigned int *cmdsz)
270 {
271         u32 *cmd = (u32 *) cmdbuf->buf;
272
273         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
274
275         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
276                 DRM_ERROR("Not a type 3 packet\n");
277                 return DRM_ERR(EINVAL);
278         }
279
280         if (4 * *cmdsz > cmdbuf->bufsz) {
281                 DRM_ERROR("Packet size larger than size of data provided\n");
282                 return DRM_ERR(EINVAL);
283         }
284
285         /* Check client state and fix it up if necessary */
286         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
287                 u32 offset;
288
289                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
290                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
291                         offset = cmd[2] << 10;
292                         if (radeon_check_and_fixup_offset
293                             (dev_priv, filp_priv, &offset)) {
294                                 DRM_ERROR("Invalid first packet offset\n");
295                                 return DRM_ERR(EINVAL);
296                         }
297                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
298                 }
299
300                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
301                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
302                         offset = cmd[3] << 10;
303                         if (radeon_check_and_fixup_offset
304                             (dev_priv, filp_priv, &offset)) {
305                                 DRM_ERROR("Invalid second packet offset\n");
306                                 return DRM_ERR(EINVAL);
307                         }
308                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
309                 }
310         }
311
312         return 0;
313 }
314
315 /* ================================================================
316  * CP hardware state programming functions
317  */
318
319 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
320                                              drm_clip_rect_t * box)
321 {
322         RING_LOCALS;
323
324         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
325                   box->x1, box->y1, box->x2, box->y2);
326
327         BEGIN_RING(4);
328         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
329         OUT_RING((box->y1 << 16) | box->x1);
330         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
331         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
332         ADVANCE_RING();
333 }
334
335 /* Emit 1.1 state
336  */
337 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
338                              drm_file_t * filp_priv,
339                              drm_radeon_context_regs_t * ctx,
340                              drm_radeon_texture_regs_t * tex,
341                              unsigned int dirty)
342 {
343         RING_LOCALS;
344         DRM_DEBUG("dirty=0x%08x\n", dirty);
345
346         if (dirty & RADEON_UPLOAD_CONTEXT) {
347                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
348                                                   &ctx->rb3d_depthoffset)) {
349                         DRM_ERROR("Invalid depth buffer offset\n");
350                         return DRM_ERR(EINVAL);
351                 }
352
353                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354                                                   &ctx->rb3d_coloroffset)) {
355                         DRM_ERROR("Invalid depth buffer offset\n");
356                         return DRM_ERR(EINVAL);
357                 }
358
359                 BEGIN_RING(14);
360                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
361                 OUT_RING(ctx->pp_misc);
362                 OUT_RING(ctx->pp_fog_color);
363                 OUT_RING(ctx->re_solid_color);
364                 OUT_RING(ctx->rb3d_blendcntl);
365                 OUT_RING(ctx->rb3d_depthoffset);
366                 OUT_RING(ctx->rb3d_depthpitch);
367                 OUT_RING(ctx->rb3d_zstencilcntl);
368                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
369                 OUT_RING(ctx->pp_cntl);
370                 OUT_RING(ctx->rb3d_cntl);
371                 OUT_RING(ctx->rb3d_coloroffset);
372                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
373                 OUT_RING(ctx->rb3d_colorpitch);
374                 ADVANCE_RING();
375         }
376
377         if (dirty & RADEON_UPLOAD_VERTFMT) {
378                 BEGIN_RING(2);
379                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
380                 OUT_RING(ctx->se_coord_fmt);
381                 ADVANCE_RING();
382         }
383
384         if (dirty & RADEON_UPLOAD_LINE) {
385                 BEGIN_RING(5);
386                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
387                 OUT_RING(ctx->re_line_pattern);
388                 OUT_RING(ctx->re_line_state);
389                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
390                 OUT_RING(ctx->se_line_width);
391                 ADVANCE_RING();
392         }
393
394         if (dirty & RADEON_UPLOAD_BUMPMAP) {
395                 BEGIN_RING(5);
396                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
397                 OUT_RING(ctx->pp_lum_matrix);
398                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
399                 OUT_RING(ctx->pp_rot_matrix_0);
400                 OUT_RING(ctx->pp_rot_matrix_1);
401                 ADVANCE_RING();
402         }
403
404         if (dirty & RADEON_UPLOAD_MASKS) {
405                 BEGIN_RING(4);
406                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
407                 OUT_RING(ctx->rb3d_stencilrefmask);
408                 OUT_RING(ctx->rb3d_ropcntl);
409                 OUT_RING(ctx->rb3d_planemask);
410                 ADVANCE_RING();
411         }
412
413         if (dirty & RADEON_UPLOAD_VIEWPORT) {
414                 BEGIN_RING(7);
415                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
416                 OUT_RING(ctx->se_vport_xscale);
417                 OUT_RING(ctx->se_vport_xoffset);
418                 OUT_RING(ctx->se_vport_yscale);
419                 OUT_RING(ctx->se_vport_yoffset);
420                 OUT_RING(ctx->se_vport_zscale);
421                 OUT_RING(ctx->se_vport_zoffset);
422                 ADVANCE_RING();
423         }
424
425         if (dirty & RADEON_UPLOAD_SETUP) {
426                 BEGIN_RING(4);
427                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
428                 OUT_RING(ctx->se_cntl);
429                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
430                 OUT_RING(ctx->se_cntl_status);
431                 ADVANCE_RING();
432         }
433
434         if (dirty & RADEON_UPLOAD_MISC) {
435                 BEGIN_RING(2);
436                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
437                 OUT_RING(ctx->re_misc);
438                 ADVANCE_RING();
439         }
440
441         if (dirty & RADEON_UPLOAD_TEX0) {
442                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
443                                                   &tex[0].pp_txoffset)) {
444                         DRM_ERROR("Invalid texture offset for unit 0\n");
445                         return DRM_ERR(EINVAL);
446                 }
447
448                 BEGIN_RING(9);
449                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
450                 OUT_RING(tex[0].pp_txfilter);
451                 OUT_RING(tex[0].pp_txformat);
452                 OUT_RING(tex[0].pp_txoffset);
453                 OUT_RING(tex[0].pp_txcblend);
454                 OUT_RING(tex[0].pp_txablend);
455                 OUT_RING(tex[0].pp_tfactor);
456                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
457                 OUT_RING(tex[0].pp_border_color);
458                 ADVANCE_RING();
459         }
460
461         if (dirty & RADEON_UPLOAD_TEX1) {
462                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
463                                                   &tex[1].pp_txoffset)) {
464                         DRM_ERROR("Invalid texture offset for unit 1\n");
465                         return DRM_ERR(EINVAL);
466                 }
467
468                 BEGIN_RING(9);
469                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
470                 OUT_RING(tex[1].pp_txfilter);
471                 OUT_RING(tex[1].pp_txformat);
472                 OUT_RING(tex[1].pp_txoffset);
473                 OUT_RING(tex[1].pp_txcblend);
474                 OUT_RING(tex[1].pp_txablend);
475                 OUT_RING(tex[1].pp_tfactor);
476                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
477                 OUT_RING(tex[1].pp_border_color);
478                 ADVANCE_RING();
479         }
480
481         if (dirty & RADEON_UPLOAD_TEX2) {
482                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
483                                                   &tex[2].pp_txoffset)) {
484                         DRM_ERROR("Invalid texture offset for unit 2\n");
485                         return DRM_ERR(EINVAL);
486                 }
487
488                 BEGIN_RING(9);
489                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
490                 OUT_RING(tex[2].pp_txfilter);
491                 OUT_RING(tex[2].pp_txformat);
492                 OUT_RING(tex[2].pp_txoffset);
493                 OUT_RING(tex[2].pp_txcblend);
494                 OUT_RING(tex[2].pp_txablend);
495                 OUT_RING(tex[2].pp_tfactor);
496                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
497                 OUT_RING(tex[2].pp_border_color);
498                 ADVANCE_RING();
499         }
500
501         return 0;
502 }
503
504 /* Emit 1.2 state
505  */
506 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
507                               drm_file_t * filp_priv,
508                               drm_radeon_state_t * state)
509 {
510         RING_LOCALS;
511
512         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
513                 BEGIN_RING(3);
514                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
515                 OUT_RING(state->context2.se_zbias_factor);
516                 OUT_RING(state->context2.se_zbias_constant);
517                 ADVANCE_RING();
518         }
519
520         return radeon_emit_state(dev_priv, filp_priv, &state->context,
521                                  state->tex, state->dirty);
522 }
523
524 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
525  * 1.3 cmdbuffers allow all previous state to be updated as well as
526  * the tcl scalar and vector areas.
527  */
528 static struct {
529         int start;
530         int len;
531         const char *name;
532 } packet[RADEON_MAX_STATE_PACKETS] = {
533         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
534         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
535         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
536         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
537         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
538         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
539         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
540         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
541         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
542         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
543         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
544         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
545         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
546         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
547         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
548         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
549         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
550         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
551         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
552         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
553         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
554                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
555         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
556         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
557         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
558         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
559         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
560         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
561         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
562         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
563         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
564         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
565         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
566         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
567         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
568         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
569         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
570         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
571         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
572         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
573         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
574         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
575         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
576         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
577         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
578         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
579         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
580         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
581         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
582         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
583         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
584          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
585         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
586         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
587         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
588         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
589         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
590         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
591         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
592         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
593         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
594         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
595         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
596                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
597         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
598         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
599         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
600         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
601         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
602         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
603         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
604         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
605         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
606         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
607         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
608         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
609         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
610         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
611         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
612         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
613         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
614         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
615         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
616         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
617         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
618         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
619         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
620         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
621         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
622         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
623         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
624         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
625         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
626         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
627         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
628         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
629         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
630         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
631 };
632
633 /* ================================================================
634  * Performance monitoring functions
635  */
636
637 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
638                              int x, int y, int w, int h, int r, int g, int b)
639 {
640         u32 color;
641         RING_LOCALS;
642
643         x += dev_priv->sarea_priv->boxes[0].x1;
644         y += dev_priv->sarea_priv->boxes[0].y1;
645
646         switch (dev_priv->color_fmt) {
647         case RADEON_COLOR_FORMAT_RGB565:
648                 color = (((r & 0xf8) << 8) |
649                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
650                 break;
651         case RADEON_COLOR_FORMAT_ARGB8888:
652         default:
653                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
654                 break;
655         }
656
657         BEGIN_RING(4);
658         RADEON_WAIT_UNTIL_3D_IDLE();
659         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
660         OUT_RING(0xffffffff);
661         ADVANCE_RING();
662
663         BEGIN_RING(6);
664
665         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
666         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
667                  RADEON_GMC_BRUSH_SOLID_COLOR |
668                  (dev_priv->color_fmt << 8) |
669                  RADEON_GMC_SRC_DATATYPE_COLOR |
670                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
671
672         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
673                 OUT_RING(dev_priv->front_pitch_offset);
674         } else {
675                 OUT_RING(dev_priv->back_pitch_offset);
676         }
677
678         OUT_RING(color);
679
680         OUT_RING((x << 16) | y);
681         OUT_RING((w << 16) | h);
682
683         ADVANCE_RING();
684 }
685
686 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
687 {
688         /* Collapse various things into a wait flag -- trying to
689          * guess if userspase slept -- better just to have them tell us.
690          */
691         if (dev_priv->stats.last_frame_reads > 1 ||
692             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
693                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
694         }
695
696         if (dev_priv->stats.freelist_loops) {
697                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
698         }
699
700         /* Purple box for page flipping
701          */
702         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
703                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
704
705         /* Red box if we have to wait for idle at any point
706          */
707         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
708                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
709
710         /* Blue box: lost context?
711          */
712
713         /* Yellow box for texture swaps
714          */
715         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
716                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
717
718         /* Green box if hardware never idles (as far as we can tell)
719          */
720         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
721                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
722
723         /* Draw bars indicating number of buffers allocated
724          * (not a great measure, easily confused)
725          */
726         if (dev_priv->stats.requested_bufs) {
727                 if (dev_priv->stats.requested_bufs > 100)
728                         dev_priv->stats.requested_bufs = 100;
729
730                 radeon_clear_box(dev_priv, 4, 16,
731                                  dev_priv->stats.requested_bufs, 4,
732                                  196, 128, 128);
733         }
734
735         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
736
737 }
738
739 /* ================================================================
740  * CP command dispatch functions
741  */
742
743 static void radeon_cp_dispatch_clear(drm_device_t * dev,
744                                      drm_radeon_clear_t * clear,
745                                      drm_radeon_clear_rect_t * depth_boxes)
746 {
747         drm_radeon_private_t *dev_priv = dev->dev_private;
748         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
749         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
750         int nbox = sarea_priv->nbox;
751         drm_clip_rect_t *pbox = sarea_priv->boxes;
752         unsigned int flags = clear->flags;
753         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
754         int i;
755         RING_LOCALS;
756         DRM_DEBUG("flags = 0x%x\n", flags);
757
758         dev_priv->stats.clears++;
759
760         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
761                 unsigned int tmp = flags;
762
763                 flags &= ~(RADEON_FRONT | RADEON_BACK);
764                 if (tmp & RADEON_FRONT)
765                         flags |= RADEON_BACK;
766                 if (tmp & RADEON_BACK)
767                         flags |= RADEON_FRONT;
768         }
769
770         if (flags & (RADEON_FRONT | RADEON_BACK)) {
771
772                 BEGIN_RING(4);
773
774                 /* Ensure the 3D stream is idle before doing a
775                  * 2D fill to clear the front or back buffer.
776                  */
777                 RADEON_WAIT_UNTIL_3D_IDLE();
778
779                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
780                 OUT_RING(clear->color_mask);
781
782                 ADVANCE_RING();
783
784                 /* Make sure we restore the 3D state next time.
785                  */
786                 dev_priv->sarea_priv->ctx_owner = 0;
787
788                 for (i = 0; i < nbox; i++) {
789                         int x = pbox[i].x1;
790                         int y = pbox[i].y1;
791                         int w = pbox[i].x2 - x;
792                         int h = pbox[i].y2 - y;
793
794                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
795                                   x, y, w, h, flags);
796
797                         if (flags & RADEON_FRONT) {
798                                 BEGIN_RING(6);
799
800                                 OUT_RING(CP_PACKET3
801                                          (RADEON_CNTL_PAINT_MULTI, 4));
802                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
803                                          RADEON_GMC_BRUSH_SOLID_COLOR |
804                                          (dev_priv->
805                                           color_fmt << 8) |
806                                          RADEON_GMC_SRC_DATATYPE_COLOR |
807                                          RADEON_ROP3_P |
808                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
809
810                                 OUT_RING(dev_priv->front_pitch_offset);
811                                 OUT_RING(clear->clear_color);
812
813                                 OUT_RING((x << 16) | y);
814                                 OUT_RING((w << 16) | h);
815
816                                 ADVANCE_RING();
817                         }
818
819                         if (flags & RADEON_BACK) {
820                                 BEGIN_RING(6);
821
822                                 OUT_RING(CP_PACKET3
823                                          (RADEON_CNTL_PAINT_MULTI, 4));
824                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
825                                          RADEON_GMC_BRUSH_SOLID_COLOR |
826                                          (dev_priv->
827                                           color_fmt << 8) |
828                                          RADEON_GMC_SRC_DATATYPE_COLOR |
829                                          RADEON_ROP3_P |
830                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
831
832                                 OUT_RING(dev_priv->back_pitch_offset);
833                                 OUT_RING(clear->clear_color);
834
835                                 OUT_RING((x << 16) | y);
836                                 OUT_RING((w << 16) | h);
837
838                                 ADVANCE_RING();
839                         }
840                 }
841         }
842
843         /* hyper z clear */
844         /* no docs available, based on reverse engeneering by Stephane Marchesin */
845         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
846             && (flags & RADEON_CLEAR_FASTZ)) {
847
848                 int i;
849                 int depthpixperline =
850                     dev_priv->depth_fmt ==
851                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
852                                                        2) : (dev_priv->
853                                                              depth_pitch / 4);
854
855                 u32 clearmask;
856
857                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
858                     ((clear->depth_mask & 0xff) << 24);
859
860                 /* Make sure we restore the 3D state next time.
861                  * we haven't touched any "normal" state - still need this?
862                  */
863                 dev_priv->sarea_priv->ctx_owner = 0;
864
865                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
866                     && (flags & RADEON_USE_HIERZ)) {
867                         /* FIXME : reverse engineer that for Rx00 cards */
868                         /* FIXME : the mask supposedly contains low-res z values. So can't set
869                            just to the max (0xff? or actually 0x3fff?), need to take z clear
870                            value into account? */
871                         /* pattern seems to work for r100, though get slight
872                            rendering errors with glxgears. If hierz is not enabled for r100,
873                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
874                            other ones are ignored, and the same clear mask can be used. That's
875                            very different behaviour than R200 which needs different clear mask
876                            and different number of tiles to clear if hierz is enabled or not !?!
877                          */
878                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
879                 } else {
880                         /* clear mask : chooses the clearing pattern.
881                            rv250: could be used to clear only parts of macrotiles
882                            (but that would get really complicated...)?
883                            bit 0 and 1 (either or both of them ?!?!) are used to
884                            not clear tile (or maybe one of the bits indicates if the tile is
885                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
886                            Pattern is as follows:
887                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
888                            bits -------------------------------------------------
889                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
890                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
891                            covers 256 pixels ?!?
892                          */
893                         clearmask = 0x0;
894                 }
895
896                 BEGIN_RING(8);
897                 RADEON_WAIT_UNTIL_2D_IDLE();
898                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
899                              tempRB3D_DEPTHCLEARVALUE);
900                 /* what offset is this exactly ? */
901                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
902                 /* need ctlstat, otherwise get some strange black flickering */
903                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
904                              RADEON_RB3D_ZC_FLUSH_ALL);
905                 ADVANCE_RING();
906
907                 for (i = 0; i < nbox; i++) {
908                         int tileoffset, nrtilesx, nrtilesy, j;
909                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
910                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
911                             && !(dev_priv->microcode_version == UCODE_R200)) {
912                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
913                                    maybe r200 actually doesn't need to put the low-res z value into
914                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
915                                    Works for R100, both with hierz and without.
916                                    R100 seems to operate on 2x1 8x8 tiles, but...
917                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
918                                    problematic with resolutions which are not 64 pix aligned? */
919                                 tileoffset =
920                                     ((pbox[i].y1 >> 3) * depthpixperline +
921                                      pbox[i].x1) >> 6;
922                                 nrtilesx =
923                                     ((pbox[i].x2 & ~63) -
924                                      (pbox[i].x1 & ~63)) >> 4;
925                                 nrtilesy =
926                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
927                                 for (j = 0; j <= nrtilesy; j++) {
928                                         BEGIN_RING(4);
929                                         OUT_RING(CP_PACKET3
930                                                  (RADEON_3D_CLEAR_ZMASK, 2));
931                                         /* first tile */
932                                         OUT_RING(tileoffset * 8);
933                                         /* the number of tiles to clear */
934                                         OUT_RING(nrtilesx + 4);
935                                         /* clear mask : chooses the clearing pattern. */
936                                         OUT_RING(clearmask);
937                                         ADVANCE_RING();
938                                         tileoffset += depthpixperline >> 6;
939                                 }
940                         } else if (dev_priv->microcode_version == UCODE_R200) {
941                                 /* works for rv250. */
942                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
943                                 tileoffset =
944                                     ((pbox[i].y1 >> 3) * depthpixperline +
945                                      pbox[i].x1) >> 5;
946                                 nrtilesx =
947                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
948                                 nrtilesy =
949                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
950                                 for (j = 0; j <= nrtilesy; j++) {
951                                         BEGIN_RING(4);
952                                         OUT_RING(CP_PACKET3
953                                                  (RADEON_3D_CLEAR_ZMASK, 2));
954                                         /* first tile */
955                                         /* judging by the first tile offset needed, could possibly
956                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
957                                            macro tiles, though would still need clear mask for
958                                            right/bottom if truely 4x4 granularity is desired ? */
959                                         OUT_RING(tileoffset * 16);
960                                         /* the number of tiles to clear */
961                                         OUT_RING(nrtilesx + 1);
962                                         /* clear mask : chooses the clearing pattern. */
963                                         OUT_RING(clearmask);
964                                         ADVANCE_RING();
965                                         tileoffset += depthpixperline >> 5;
966                                 }
967                         } else {        /* rv 100 */
968                                 /* rv100 might not need 64 pix alignment, who knows */
969                                 /* offsets are, hmm, weird */
970                                 tileoffset =
971                                     ((pbox[i].y1 >> 4) * depthpixperline +
972                                      pbox[i].x1) >> 6;
973                                 nrtilesx =
974                                     ((pbox[i].x2 & ~63) -
975                                      (pbox[i].x1 & ~63)) >> 4;
976                                 nrtilesy =
977                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
978                                 for (j = 0; j <= nrtilesy; j++) {
979                                         BEGIN_RING(4);
980                                         OUT_RING(CP_PACKET3
981                                                  (RADEON_3D_CLEAR_ZMASK, 2));
982                                         OUT_RING(tileoffset * 128);
983                                         /* the number of tiles to clear */
984                                         OUT_RING(nrtilesx + 4);
985                                         /* clear mask : chooses the clearing pattern. */
986                                         OUT_RING(clearmask);
987                                         ADVANCE_RING();
988                                         tileoffset += depthpixperline >> 6;
989                                 }
990                         }
991                 }
992
993                 /* TODO don't always clear all hi-level z tiles */
994                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
995                     && (dev_priv->microcode_version == UCODE_R200)
996                     && (flags & RADEON_USE_HIERZ))
997                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
998                         /* FIXME : the mask supposedly contains low-res z values. So can't set
999                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1000                            value into account? */
1001                 {
1002                         BEGIN_RING(4);
1003                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1004                         OUT_RING(0x0);  /* First tile */
1005                         OUT_RING(0x3cc0);
1006                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1007                         ADVANCE_RING();
1008                 }
1009         }
1010
1011         /* We have to clear the depth and/or stencil buffers by
1012          * rendering a quad into just those buffers.  Thus, we have to
1013          * make sure the 3D engine is configured correctly.
1014          */
1015         else if ((dev_priv->microcode_version == UCODE_R200) &&
1016                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1017
1018                 int tempPP_CNTL;
1019                 int tempRE_CNTL;
1020                 int tempRB3D_CNTL;
1021                 int tempRB3D_ZSTENCILCNTL;
1022                 int tempRB3D_STENCILREFMASK;
1023                 int tempRB3D_PLANEMASK;
1024                 int tempSE_CNTL;
1025                 int tempSE_VTE_CNTL;
1026                 int tempSE_VTX_FMT_0;
1027                 int tempSE_VTX_FMT_1;
1028                 int tempSE_VAP_CNTL;
1029                 int tempRE_AUX_SCISSOR_CNTL;
1030
1031                 tempPP_CNTL = 0;
1032                 tempRE_CNTL = 0;
1033
1034                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1035
1036                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1037                 tempRB3D_STENCILREFMASK = 0x0;
1038
1039                 tempSE_CNTL = depth_clear->se_cntl;
1040
1041                 /* Disable TCL */
1042
1043                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1044                                           (0x9 <<
1045                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1046
1047                 tempRB3D_PLANEMASK = 0x0;
1048
1049                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1050
1051                 tempSE_VTE_CNTL =
1052                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1053
1054                 /* Vertex format (X, Y, Z, W) */
1055                 tempSE_VTX_FMT_0 =
1056                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1057                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1058                 tempSE_VTX_FMT_1 = 0x0;
1059
1060                 /*
1061                  * Depth buffer specific enables
1062                  */
1063                 if (flags & RADEON_DEPTH) {
1064                         /* Enable depth buffer */
1065                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1066                 } else {
1067                         /* Disable depth buffer */
1068                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1069                 }
1070
1071                 /*
1072                  * Stencil buffer specific enables
1073                  */
1074                 if (flags & RADEON_STENCIL) {
1075                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1076                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1077                 } else {
1078                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1079                         tempRB3D_STENCILREFMASK = 0x00000000;
1080                 }
1081
1082                 if (flags & RADEON_USE_COMP_ZBUF) {
1083                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1084                             RADEON_Z_DECOMPRESSION_ENABLE;
1085                 }
1086                 if (flags & RADEON_USE_HIERZ) {
1087                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1088                 }
1089
1090                 BEGIN_RING(26);
1091                 RADEON_WAIT_UNTIL_2D_IDLE();
1092
1093                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1094                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1095                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1096                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1097                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1098                              tempRB3D_STENCILREFMASK);
1099                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1100                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1101                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1102                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1103                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1104                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1105                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1106                 ADVANCE_RING();
1107
1108                 /* Make sure we restore the 3D state next time.
1109                  */
1110                 dev_priv->sarea_priv->ctx_owner = 0;
1111
1112                 for (i = 0; i < nbox; i++) {
1113
1114                         /* Funny that this should be required --
1115                          *  sets top-left?
1116                          */
1117                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1118
1119                         BEGIN_RING(14);
1120                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1121                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1122                                   RADEON_PRIM_WALK_RING |
1123                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1124                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1125                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1126                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1127                         OUT_RING(0x3f800000);
1128                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1129                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1130                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1131                         OUT_RING(0x3f800000);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1133                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1134                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1135                         OUT_RING(0x3f800000);
1136                         ADVANCE_RING();
1137                 }
1138         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1139
1140                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1141
1142                 rb3d_cntl = depth_clear->rb3d_cntl;
1143
1144                 if (flags & RADEON_DEPTH) {
1145                         rb3d_cntl |= RADEON_Z_ENABLE;
1146                 } else {
1147                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1148                 }
1149
1150                 if (flags & RADEON_STENCIL) {
1151                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1152                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1153                 } else {
1154                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1155                         rb3d_stencilrefmask = 0x00000000;
1156                 }
1157
1158                 if (flags & RADEON_USE_COMP_ZBUF) {
1159                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1160                             RADEON_Z_DECOMPRESSION_ENABLE;
1161                 }
1162                 if (flags & RADEON_USE_HIERZ) {
1163                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1164                 }
1165
1166                 BEGIN_RING(13);
1167                 RADEON_WAIT_UNTIL_2D_IDLE();
1168
1169                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1170                 OUT_RING(0x00000000);
1171                 OUT_RING(rb3d_cntl);
1172
1173                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1174                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1175                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1176                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1177                 ADVANCE_RING();
1178
1179                 /* Make sure we restore the 3D state next time.
1180                  */
1181                 dev_priv->sarea_priv->ctx_owner = 0;
1182
1183                 for (i = 0; i < nbox; i++) {
1184
1185                         /* Funny that this should be required --
1186                          *  sets top-left?
1187                          */
1188                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1189
1190                         BEGIN_RING(15);
1191
1192                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1193                         OUT_RING(RADEON_VTX_Z_PRESENT |
1194                                  RADEON_VTX_PKCOLOR_PRESENT);
1195                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1196                                   RADEON_PRIM_WALK_RING |
1197                                   RADEON_MAOS_ENABLE |
1198                                   RADEON_VTX_FMT_RADEON_MODE |
1199                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1200
1201                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1202                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1203                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1204                         OUT_RING(0x0);
1205
1206                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1207                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1208                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1209                         OUT_RING(0x0);
1210
1211                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1212                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1213                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1214                         OUT_RING(0x0);
1215
1216                         ADVANCE_RING();
1217                 }
1218         }
1219
1220         /* Increment the clear counter.  The client-side 3D driver must
1221          * wait on this value before performing the clear ioctl.  We
1222          * need this because the card's so damned fast...
1223          */
1224         dev_priv->sarea_priv->last_clear++;
1225
1226         BEGIN_RING(4);
1227
1228         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1229         RADEON_WAIT_UNTIL_IDLE();
1230
1231         ADVANCE_RING();
1232 }
1233
1234 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1235 {
1236         drm_radeon_private_t *dev_priv = dev->dev_private;
1237         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1238         int nbox = sarea_priv->nbox;
1239         drm_clip_rect_t *pbox = sarea_priv->boxes;
1240         int i;
1241         RING_LOCALS;
1242         DRM_DEBUG("\n");
1243
1244         /* Do some trivial performance monitoring...
1245          */
1246         if (dev_priv->do_boxes)
1247                 radeon_cp_performance_boxes(dev_priv);
1248
1249         /* Wait for the 3D stream to idle before dispatching the bitblt.
1250          * This will prevent data corruption between the two streams.
1251          */
1252         BEGIN_RING(2);
1253
1254         RADEON_WAIT_UNTIL_3D_IDLE();
1255
1256         ADVANCE_RING();
1257
1258         for (i = 0; i < nbox; i++) {
1259                 int x = pbox[i].x1;
1260                 int y = pbox[i].y1;
1261                 int w = pbox[i].x2 - x;
1262                 int h = pbox[i].y2 - y;
1263
1264                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1265
1266                 BEGIN_RING(7);
1267
1268                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1269                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1270                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1271                          RADEON_GMC_BRUSH_NONE |
1272                          (dev_priv->color_fmt << 8) |
1273                          RADEON_GMC_SRC_DATATYPE_COLOR |
1274                          RADEON_ROP3_S |
1275                          RADEON_DP_SRC_SOURCE_MEMORY |
1276                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1277
1278                 /* Make this work even if front & back are flipped:
1279                  */
1280                 if (dev_priv->current_page == 0) {
1281                         OUT_RING(dev_priv->back_pitch_offset);
1282                         OUT_RING(dev_priv->front_pitch_offset);
1283                 } else {
1284                         OUT_RING(dev_priv->front_pitch_offset);
1285                         OUT_RING(dev_priv->back_pitch_offset);
1286                 }
1287
1288                 OUT_RING((x << 16) | y);
1289                 OUT_RING((x << 16) | y);
1290                 OUT_RING((w << 16) | h);
1291
1292                 ADVANCE_RING();
1293         }
1294
1295         /* Increment the frame counter.  The client-side 3D driver must
1296          * throttle the framerate by waiting for this value before
1297          * performing the swapbuffer ioctl.
1298          */
1299         dev_priv->sarea_priv->last_frame++;
1300
1301         BEGIN_RING(4);
1302
1303         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1304         RADEON_WAIT_UNTIL_2D_IDLE();
1305
1306         ADVANCE_RING();
1307 }
1308
1309 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1310 {
1311         drm_radeon_private_t *dev_priv = dev->dev_private;
1312         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1313         int offset = (dev_priv->current_page == 1)
1314             ? dev_priv->front_offset : dev_priv->back_offset;
1315         RING_LOCALS;
1316         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1317                   __FUNCTION__,
1318                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1319
1320         /* Do some trivial performance monitoring...
1321          */
1322         if (dev_priv->do_boxes) {
1323                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1324                 radeon_cp_performance_boxes(dev_priv);
1325         }
1326
1327         /* Update the frame offsets for both CRTCs
1328          */
1329         BEGIN_RING(6);
1330
1331         RADEON_WAIT_UNTIL_3D_IDLE();
1332         OUT_RING_REG(RADEON_CRTC_OFFSET,
1333                      ((sarea->frame.y * dev_priv->front_pitch +
1334                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1335                      + offset);
1336         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1337                      + offset);
1338
1339         ADVANCE_RING();
1340
1341         /* Increment the frame counter.  The client-side 3D driver must
1342          * throttle the framerate by waiting for this value before
1343          * performing the swapbuffer ioctl.
1344          */
1345         dev_priv->sarea_priv->last_frame++;
1346         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1347             1 - dev_priv->current_page;
1348
1349         BEGIN_RING(2);
1350
1351         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1352
1353         ADVANCE_RING();
1354 }
1355
1356 static int bad_prim_vertex_nr(int primitive, int nr)
1357 {
1358         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1359         case RADEON_PRIM_TYPE_NONE:
1360         case RADEON_PRIM_TYPE_POINT:
1361                 return nr < 1;
1362         case RADEON_PRIM_TYPE_LINE:
1363                 return (nr & 1) || nr == 0;
1364         case RADEON_PRIM_TYPE_LINE_STRIP:
1365                 return nr < 2;
1366         case RADEON_PRIM_TYPE_TRI_LIST:
1367         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1368         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1369         case RADEON_PRIM_TYPE_RECT_LIST:
1370                 return nr % 3 || nr == 0;
1371         case RADEON_PRIM_TYPE_TRI_FAN:
1372         case RADEON_PRIM_TYPE_TRI_STRIP:
1373                 return nr < 3;
1374         default:
1375                 return 1;
1376         }
1377 }
1378
1379 typedef struct {
1380         unsigned int start;
1381         unsigned int finish;
1382         unsigned int prim;
1383         unsigned int numverts;
1384         unsigned int offset;
1385         unsigned int vc_format;
1386 } drm_radeon_tcl_prim_t;
1387
1388 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1389                                       drm_buf_t * buf,
1390                                       drm_radeon_tcl_prim_t * prim)
1391 {
1392         drm_radeon_private_t *dev_priv = dev->dev_private;
1393         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1394         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1395         int numverts = (int)prim->numverts;
1396         int nbox = sarea_priv->nbox;
1397         int i = 0;
1398         RING_LOCALS;
1399
1400         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1401                   prim->prim,
1402                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1403
1404         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1405                 DRM_ERROR("bad prim %x numverts %d\n",
1406                           prim->prim, prim->numverts);
1407                 return;
1408         }
1409
1410         do {
1411                 /* Emit the next cliprect */
1412                 if (i < nbox) {
1413                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1414                 }
1415
1416                 /* Emit the vertex buffer rendering commands */
1417                 BEGIN_RING(5);
1418
1419                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1420                 OUT_RING(offset);
1421                 OUT_RING(numverts);
1422                 OUT_RING(prim->vc_format);
1423                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1424                          RADEON_COLOR_ORDER_RGBA |
1425                          RADEON_VTX_FMT_RADEON_MODE |
1426                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1427
1428                 ADVANCE_RING();
1429
1430                 i++;
1431         } while (i < nbox);
1432 }
1433
1434 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1435 {
1436         drm_radeon_private_t *dev_priv = dev->dev_private;
1437         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1438         RING_LOCALS;
1439
1440         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1441
1442         /* Emit the vertex buffer age */
1443         BEGIN_RING(2);
1444         RADEON_DISPATCH_AGE(buf_priv->age);
1445         ADVANCE_RING();
1446
1447         buf->pending = 1;
1448         buf->used = 0;
1449 }
1450
1451 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1452                                         drm_buf_t * buf, int start, int end)
1453 {
1454         drm_radeon_private_t *dev_priv = dev->dev_private;
1455         RING_LOCALS;
1456         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1457
1458         if (start != end) {
1459                 int offset = (dev_priv->gart_buffers_offset
1460                               + buf->offset + start);
1461                 int dwords = (end - start + 3) / sizeof(u32);
1462
1463                 /* Indirect buffer data must be an even number of
1464                  * dwords, so if we've been given an odd number we must
1465                  * pad the data with a Type-2 CP packet.
1466                  */
1467                 if (dwords & 1) {
1468                         u32 *data = (u32 *)
1469                             ((char *)dev->agp_buffer_map->handle
1470                              + buf->offset + start);
1471                         data[dwords++] = RADEON_CP_PACKET2;
1472                 }
1473
1474                 /* Fire off the indirect buffer */
1475                 BEGIN_RING(3);
1476
1477                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1478                 OUT_RING(offset);
1479                 OUT_RING(dwords);
1480
1481                 ADVANCE_RING();
1482         }
1483 }
1484
1485 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1486                                        drm_buf_t * elt_buf,
1487                                        drm_radeon_tcl_prim_t * prim)
1488 {
1489         drm_radeon_private_t *dev_priv = dev->dev_private;
1490         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1491         int offset = dev_priv->gart_buffers_offset + prim->offset;
1492         u32 *data;
1493         int dwords;
1494         int i = 0;
1495         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1496         int count = (prim->finish - start) / sizeof(u16);
1497         int nbox = sarea_priv->nbox;
1498
1499         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1500                   prim->prim,
1501                   prim->vc_format,
1502                   prim->start, prim->finish, prim->offset, prim->numverts);
1503
1504         if (bad_prim_vertex_nr(prim->prim, count)) {
1505                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1506                 return;
1507         }
1508
1509         if (start >= prim->finish || (prim->start & 0x7)) {
1510                 DRM_ERROR("buffer prim %d\n", prim->prim);
1511                 return;
1512         }
1513
1514         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1515
1516         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1517                         elt_buf->offset + prim->start);
1518
1519         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1520         data[1] = offset;
1521         data[2] = prim->numverts;
1522         data[3] = prim->vc_format;
1523         data[4] = (prim->prim |
1524                    RADEON_PRIM_WALK_IND |
1525                    RADEON_COLOR_ORDER_RGBA |
1526                    RADEON_VTX_FMT_RADEON_MODE |
1527                    (count << RADEON_NUM_VERTICES_SHIFT));
1528
1529         do {
1530                 if (i < nbox)
1531                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1532
1533                 radeon_cp_dispatch_indirect(dev, elt_buf,
1534                                             prim->start, prim->finish);
1535
1536                 i++;
1537         } while (i < nbox);
1538
1539 }
1540
1541 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1542
1543 static int radeon_cp_dispatch_texture(DRMFILE filp,
1544                                       drm_device_t * dev,
1545                                       drm_radeon_texture_t * tex,
1546                                       drm_radeon_tex_image_t * image)
1547 {
1548         drm_radeon_private_t *dev_priv = dev->dev_private;
1549         drm_file_t *filp_priv;
1550         drm_buf_t *buf;
1551         u32 format;
1552         u32 *buffer;
1553         const u8 __user *data;
1554         int size, dwords, tex_width, blit_width, spitch;
1555         u32 height;
1556         int i;
1557         u32 texpitch, microtile;
1558         u32 offset;
1559         RING_LOCALS;
1560
1561         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1562
1563         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1564                 DRM_ERROR("Invalid destination offset\n");
1565                 return DRM_ERR(EINVAL);
1566         }
1567
1568         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1569
1570         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1571          * up with the texture data from the host data blit, otherwise
1572          * part of the texture image may be corrupted.
1573          */
1574         BEGIN_RING(4);
1575         RADEON_FLUSH_CACHE();
1576         RADEON_WAIT_UNTIL_IDLE();
1577         ADVANCE_RING();
1578
1579         /* The compiler won't optimize away a division by a variable,
1580          * even if the only legal values are powers of two.  Thus, we'll
1581          * use a shift instead.
1582          */
1583         switch (tex->format) {
1584         case RADEON_TXFORMAT_ARGB8888:
1585         case RADEON_TXFORMAT_RGBA8888:
1586                 format = RADEON_COLOR_FORMAT_ARGB8888;
1587                 tex_width = tex->width * 4;
1588                 blit_width = image->width * 4;
1589                 break;
1590         case RADEON_TXFORMAT_AI88:
1591         case RADEON_TXFORMAT_ARGB1555:
1592         case RADEON_TXFORMAT_RGB565:
1593         case RADEON_TXFORMAT_ARGB4444:
1594         case RADEON_TXFORMAT_VYUY422:
1595         case RADEON_TXFORMAT_YVYU422:
1596                 format = RADEON_COLOR_FORMAT_RGB565;
1597                 tex_width = tex->width * 2;
1598                 blit_width = image->width * 2;
1599                 break;
1600         case RADEON_TXFORMAT_I8:
1601         case RADEON_TXFORMAT_RGB332:
1602                 format = RADEON_COLOR_FORMAT_CI8;
1603                 tex_width = tex->width * 1;
1604                 blit_width = image->width * 1;
1605                 break;
1606         default:
1607                 DRM_ERROR("invalid texture format %d\n", tex->format);
1608                 return DRM_ERR(EINVAL);
1609         }
1610         spitch = blit_width >> 6;
1611         if (spitch == 0 && image->height > 1)
1612                 return DRM_ERR(EINVAL);
1613
1614         texpitch = tex->pitch;
1615         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1616                 microtile = 1;
1617                 if (tex_width < 64) {
1618                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1619                         /* we got tiled coordinates, untile them */
1620                         image->x *= 2;
1621                 }
1622         } else
1623                 microtile = 0;
1624
1625         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1626
1627         do {
1628                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1629                           tex->offset >> 10, tex->pitch, tex->format,
1630                           image->x, image->y, image->width, image->height);
1631
1632                 /* Make a copy of some parameters in case we have to
1633                  * update them for a multi-pass texture blit.
1634                  */
1635                 height = image->height;
1636                 data = (const u8 __user *)image->data;
1637
1638                 size = height * blit_width;
1639
1640                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1641                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1642                         size = height * blit_width;
1643                 } else if (size < 4 && size > 0) {
1644                         size = 4;
1645                 } else if (size == 0) {
1646                         return 0;
1647                 }
1648
1649                 buf = radeon_freelist_get(dev);
1650                 if (0 && !buf) {
1651                         radeon_do_cp_idle(dev_priv);
1652                         buf = radeon_freelist_get(dev);
1653                 }
1654                 if (!buf) {
1655                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1656                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1657                                 return DRM_ERR(EFAULT);
1658                         return DRM_ERR(EAGAIN);
1659                 }
1660
1661                 /* Dispatch the indirect buffer.
1662                  */
1663                 buffer =
1664                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1665                 dwords = size / 4;
1666
1667 #define RADEON_COPY_MT(_buf, _data, _width) \
1668         do { \
1669                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1670                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1671                         return DRM_ERR(EFAULT); \
1672                 } \
1673         } while(0)
1674
1675                 if (microtile) {
1676                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1677                            however, we cannot use blitter directly for texture width < 64 bytes,
1678                            since minimum tex pitch is 64 bytes and we need this to match
1679                            the texture width, otherwise the blitter will tile it wrong.
1680                            Thus, tiling manually in this case. Additionally, need to special
1681                            case tex height = 1, since our actual image will have height 2
1682                            and we need to ensure we don't read beyond the texture size
1683                            from user space. */
1684                         if (tex->height == 1) {
1685                                 if (tex_width >= 64 || tex_width <= 16) {
1686                                         RADEON_COPY_MT(buffer, data,
1687                                                 (int)(tex_width * sizeof(u32)));
1688                                 } else if (tex_width == 32) {
1689                                         RADEON_COPY_MT(buffer, data, 16);
1690                                         RADEON_COPY_MT(buffer + 8,
1691                                                        data + 16, 16);
1692                                 }
1693                         } else if (tex_width >= 64 || tex_width == 16) {
1694                                 RADEON_COPY_MT(buffer, data,
1695                                                (int)(dwords * sizeof(u32)));
1696                         } else if (tex_width < 16) {
1697                                 for (i = 0; i < tex->height; i++) {
1698                                         RADEON_COPY_MT(buffer, data, tex_width);
1699                                         buffer += 4;
1700                                         data += tex_width;
1701                                 }
1702                         } else if (tex_width == 32) {
1703                                 /* TODO: make sure this works when not fitting in one buffer
1704                                    (i.e. 32bytes x 2048...) */
1705                                 for (i = 0; i < tex->height; i += 2) {
1706                                         RADEON_COPY_MT(buffer, data, 16);
1707                                         data += 16;
1708                                         RADEON_COPY_MT(buffer + 8, data, 16);
1709                                         data += 16;
1710                                         RADEON_COPY_MT(buffer + 4, data, 16);
1711                                         data += 16;
1712                                         RADEON_COPY_MT(buffer + 12, data, 16);
1713                                         data += 16;
1714                                         buffer += 16;
1715                                 }
1716                         }
1717                 } else {
1718                         if (tex_width >= 32) {
1719                                 /* Texture image width is larger than the minimum, so we
1720                                  * can upload it directly.
1721                                  */
1722                                 RADEON_COPY_MT(buffer, data,
1723                                                (int)(dwords * sizeof(u32)));
1724                         } else {
1725                                 /* Texture image width is less than the minimum, so we
1726                                  * need to pad out each image scanline to the minimum
1727                                  * width.
1728                                  */
1729                                 for (i = 0; i < tex->height; i++) {
1730                                         RADEON_COPY_MT(buffer, data, tex_width);
1731                                         buffer += 8;
1732                                         data += tex_width;
1733                                 }
1734                         }
1735                 }
1736
1737 #undef RADEON_COPY_MT
1738                 buf->filp = filp;
1739                 buf->used = size;
1740                 offset = dev_priv->gart_buffers_offset + buf->offset;
1741                 BEGIN_RING(9);
1742                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1743                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1744                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1745                          RADEON_GMC_BRUSH_NONE |
1746                          (format << 8) |
1747                          RADEON_GMC_SRC_DATATYPE_COLOR |
1748                          RADEON_ROP3_S |
1749                          RADEON_DP_SRC_SOURCE_MEMORY |
1750                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1751                 OUT_RING((spitch << 22) | (offset >> 10));
1752                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1753                 OUT_RING(0);
1754                 OUT_RING((image->x << 16) | image->y);
1755                 OUT_RING((image->width << 16) | height);
1756                 RADEON_WAIT_UNTIL_2D_IDLE();
1757                 ADVANCE_RING();
1758
1759                 radeon_cp_discard_buffer(dev, buf);
1760
1761                 /* Update the input parameters for next time */
1762                 image->y += height;
1763                 image->height -= height;
1764                 image->data = (const u8 __user *)image->data + size;
1765         } while (image->height > 0);
1766
1767         /* Flush the pixel cache after the blit completes.  This ensures
1768          * the texture data is written out to memory before rendering
1769          * continues.
1770          */
1771         BEGIN_RING(4);
1772         RADEON_FLUSH_CACHE();
1773         RADEON_WAIT_UNTIL_2D_IDLE();
1774         ADVANCE_RING();
1775         return 0;
1776 }
1777
1778 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1779 {
1780         drm_radeon_private_t *dev_priv = dev->dev_private;
1781         int i;
1782         RING_LOCALS;
1783         DRM_DEBUG("\n");
1784
1785         BEGIN_RING(35);
1786
1787         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1788         OUT_RING(0x00000000);
1789
1790         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1791         for (i = 0; i < 32; i++) {
1792                 OUT_RING(stipple[i]);
1793         }
1794
1795         ADVANCE_RING();
1796 }
1797
1798 static void radeon_apply_surface_regs(int surf_index,
1799                                       drm_radeon_private_t *dev_priv)
1800 {
1801         if (!dev_priv->mmio)
1802                 return;
1803
1804         radeon_do_cp_idle(dev_priv);
1805
1806         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1807                      dev_priv->surfaces[surf_index].flags);
1808         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1809                      dev_priv->surfaces[surf_index].lower);
1810         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1811                      dev_priv->surfaces[surf_index].upper);
1812 }
1813
1814 /* Allocates a virtual surface
1815  * doesn't always allocate a real surface, will stretch an existing
1816  * surface when possible.
1817  *
1818  * Note that refcount can be at most 2, since during a free refcount=3
1819  * might mean we have to allocate a new surface which might not always
1820  * be available.
1821  * For example : we allocate three contigous surfaces ABC. If B is
1822  * freed, we suddenly need two surfaces to store A and C, which might
1823  * not always be available.
1824  */
1825 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1826                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1827 {
1828         struct radeon_virt_surface *s;
1829         int i;
1830         int virt_surface_index;
1831         uint32_t new_upper, new_lower;
1832
1833         new_lower = new->address;
1834         new_upper = new_lower + new->size - 1;
1835
1836         /* sanity check */
1837         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1838             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1839              RADEON_SURF_ADDRESS_FIXED_MASK)
1840             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1841                 return -1;
1842
1843         /* make sure there is no overlap with existing surfaces */
1844         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1845                 if ((dev_priv->surfaces[i].refcount != 0) &&
1846                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1847                       (new_lower < dev_priv->surfaces[i].upper)) ||
1848                      ((new_lower < dev_priv->surfaces[i].lower) &&
1849                       (new_upper > dev_priv->surfaces[i].lower)))) {
1850                         return -1;
1851                 }
1852         }
1853
1854         /* find a virtual surface */
1855         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1856                 if (dev_priv->virt_surfaces[i].filp == 0)
1857                         break;
1858         if (i == 2 * RADEON_MAX_SURFACES) {
1859                 return -1;
1860         }
1861         virt_surface_index = i;
1862
1863         /* try to reuse an existing surface */
1864         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1865                 /* extend before */
1866                 if ((dev_priv->surfaces[i].refcount == 1) &&
1867                     (new->flags == dev_priv->surfaces[i].flags) &&
1868                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1869                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1870                         s->surface_index = i;
1871                         s->lower = new_lower;
1872                         s->upper = new_upper;
1873                         s->flags = new->flags;
1874                         s->filp = filp;
1875                         dev_priv->surfaces[i].refcount++;
1876                         dev_priv->surfaces[i].lower = s->lower;
1877                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1878                         return virt_surface_index;
1879                 }
1880
1881                 /* extend after */
1882                 if ((dev_priv->surfaces[i].refcount == 1) &&
1883                     (new->flags == dev_priv->surfaces[i].flags) &&
1884                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1885                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1886                         s->surface_index = i;
1887                         s->lower = new_lower;
1888                         s->upper = new_upper;
1889                         s->flags = new->flags;
1890                         s->filp = filp;
1891                         dev_priv->surfaces[i].refcount++;
1892                         dev_priv->surfaces[i].upper = s->upper;
1893                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1894                         return virt_surface_index;
1895                 }
1896         }
1897
1898         /* okay, we need a new one */
1899         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1900                 if (dev_priv->surfaces[i].refcount == 0) {
1901                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1902                         s->surface_index = i;
1903                         s->lower = new_lower;
1904                         s->upper = new_upper;
1905                         s->flags = new->flags;
1906                         s->filp = filp;
1907                         dev_priv->surfaces[i].refcount = 1;
1908                         dev_priv->surfaces[i].lower = s->lower;
1909                         dev_priv->surfaces[i].upper = s->upper;
1910                         dev_priv->surfaces[i].flags = s->flags;
1911                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1912                         return virt_surface_index;
1913                 }
1914         }
1915
1916         /* we didn't find anything */
1917         return -1;
1918 }
1919
1920 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1921                         int lower)
1922 {
1923         struct radeon_virt_surface *s;
1924         int i;
1925         /* find the virtual surface */
1926         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1927                 s = &(dev_priv->virt_surfaces[i]);
1928                 if (s->filp) {
1929                         if ((lower == s->lower) && (filp == s->filp)) {
1930                                 if (dev_priv->surfaces[s->surface_index].
1931                                     lower == s->lower)
1932                                         dev_priv->surfaces[s->surface_index].
1933                                             lower = s->upper;
1934
1935                                 if (dev_priv->surfaces[s->surface_index].
1936                                     upper == s->upper)
1937                                         dev_priv->surfaces[s->surface_index].
1938                                             upper = s->lower;
1939
1940                                 dev_priv->surfaces[s->surface_index].refcount--;
1941                                 if (dev_priv->surfaces[s->surface_index].
1942                                     refcount == 0)
1943                                         dev_priv->surfaces[s->surface_index].
1944                                             flags = 0;
1945                                 s->filp = NULL;
1946                                 radeon_apply_surface_regs(s->surface_index,
1947                                                           dev_priv);
1948                                 return 0;
1949                         }
1950                 }
1951         }
1952         return 1;
1953 }
1954
1955 static void radeon_surfaces_release(DRMFILE filp,
1956                                     drm_radeon_private_t * dev_priv)
1957 {
1958         int i;
1959         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1960                 if (dev_priv->virt_surfaces[i].filp == filp)
1961                         free_surface(filp, dev_priv,
1962                                      dev_priv->virt_surfaces[i].lower);
1963         }
1964 }
1965
1966 /* ================================================================
1967  * IOCTL functions
1968  */
1969 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1970 {
1971         DRM_DEVICE;
1972         drm_radeon_private_t *dev_priv = dev->dev_private;
1973         drm_radeon_surface_alloc_t alloc;
1974
1975         DRM_COPY_FROM_USER_IOCTL(alloc,
1976                                  (drm_radeon_surface_alloc_t __user *) data,
1977                                  sizeof(alloc));
1978
1979         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1980                 return DRM_ERR(EINVAL);
1981         else
1982                 return 0;
1983 }
1984
1985 static int radeon_surface_free(DRM_IOCTL_ARGS)
1986 {
1987         DRM_DEVICE;
1988         drm_radeon_private_t *dev_priv = dev->dev_private;
1989         drm_radeon_surface_free_t memfree;
1990
1991         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1992                                  sizeof(memfree));
1993
1994         if (free_surface(filp, dev_priv, memfree.address))
1995                 return DRM_ERR(EINVAL);
1996         else
1997                 return 0;
1998 }
1999
2000 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2001 {
2002         DRM_DEVICE;
2003         drm_radeon_private_t *dev_priv = dev->dev_private;
2004         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2005         drm_radeon_clear_t clear;
2006         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2007         DRM_DEBUG("\n");
2008
2009         LOCK_TEST_WITH_RETURN(dev, filp);
2010
2011         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2012                                  sizeof(clear));
2013
2014         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2015
2016         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2017                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2018
2019         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2020                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2021                 return DRM_ERR(EFAULT);
2022
2023         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2024
2025         COMMIT_RING();
2026         return 0;
2027 }
2028
2029 /* Not sure why this isn't set all the time:
2030  */
2031 static int radeon_do_init_pageflip(drm_device_t * dev)
2032 {
2033         drm_radeon_private_t *dev_priv = dev->dev_private;
2034         RING_LOCALS;
2035
2036         DRM_DEBUG("\n");
2037
2038         BEGIN_RING(6);
2039         RADEON_WAIT_UNTIL_3D_IDLE();
2040         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2041         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2042                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2043         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2044         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2045                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2046         ADVANCE_RING();
2047
2048         dev_priv->page_flipping = 1;
2049         dev_priv->current_page = 0;
2050         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2051
2052         return 0;
2053 }
2054
2055 /* Called whenever a client dies, from drm_release.
2056  * NOTE:  Lock isn't necessarily held when this is called!
2057  */
2058 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2059 {
2060         drm_radeon_private_t *dev_priv = dev->dev_private;
2061         DRM_DEBUG("\n");
2062
2063         if (dev_priv->current_page != 0)
2064                 radeon_cp_dispatch_flip(dev);
2065
2066         dev_priv->page_flipping = 0;
2067         return 0;
2068 }
2069
2070 /* Swapping and flipping are different operations, need different ioctls.
2071  * They can & should be intermixed to support multiple 3d windows.
2072  */
2073 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2074 {
2075         DRM_DEVICE;
2076         drm_radeon_private_t *dev_priv = dev->dev_private;
2077         DRM_DEBUG("\n");
2078
2079         LOCK_TEST_WITH_RETURN(dev, filp);
2080
2081         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2082
2083         if (!dev_priv->page_flipping)
2084                 radeon_do_init_pageflip(dev);
2085
2086         radeon_cp_dispatch_flip(dev);
2087
2088         COMMIT_RING();
2089         return 0;
2090 }
2091
2092 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2093 {
2094         DRM_DEVICE;
2095         drm_radeon_private_t *dev_priv = dev->dev_private;
2096         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2097         DRM_DEBUG("\n");
2098
2099         LOCK_TEST_WITH_RETURN(dev, filp);
2100
2101         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2102
2103         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2104                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2105
2106         radeon_cp_dispatch_swap(dev);
2107         dev_priv->sarea_priv->ctx_owner = 0;
2108
2109         COMMIT_RING();
2110         return 0;
2111 }
2112
2113 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2114 {
2115         DRM_DEVICE;
2116         drm_radeon_private_t *dev_priv = dev->dev_private;
2117         drm_file_t *filp_priv;
2118         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2119         drm_device_dma_t *dma = dev->dma;
2120         drm_buf_t *buf;
2121         drm_radeon_vertex_t vertex;
2122         drm_radeon_tcl_prim_t prim;
2123
2124         LOCK_TEST_WITH_RETURN(dev, filp);
2125
2126         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2127
2128         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2129                                  sizeof(vertex));
2130
2131         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2132                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2133
2134         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2135                 DRM_ERROR("buffer index %d (of %d max)\n",
2136                           vertex.idx, dma->buf_count - 1);
2137                 return DRM_ERR(EINVAL);
2138         }
2139         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2140                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2141                 return DRM_ERR(EINVAL);
2142         }
2143
2144         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2145         VB_AGE_TEST_WITH_RETURN(dev_priv);
2146
2147         buf = dma->buflist[vertex.idx];
2148
2149         if (buf->filp != filp) {
2150                 DRM_ERROR("process %d using buffer owned by %p\n",
2151                           DRM_CURRENTPID, buf->filp);
2152                 return DRM_ERR(EINVAL);
2153         }
2154         if (buf->pending) {
2155                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2156                 return DRM_ERR(EINVAL);
2157         }
2158
2159         /* Build up a prim_t record:
2160          */
2161         if (vertex.count) {
2162                 buf->used = vertex.count;       /* not used? */
2163
2164                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2165                         if (radeon_emit_state(dev_priv, filp_priv,
2166                                               &sarea_priv->context_state,
2167                                               sarea_priv->tex_state,
2168                                               sarea_priv->dirty)) {
2169                                 DRM_ERROR("radeon_emit_state failed\n");
2170                                 return DRM_ERR(EINVAL);
2171                         }
2172
2173                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2174                                                RADEON_UPLOAD_TEX1IMAGES |
2175                                                RADEON_UPLOAD_TEX2IMAGES |
2176                                                RADEON_REQUIRE_QUIESCENCE);
2177                 }
2178
2179                 prim.start = 0;
2180                 prim.finish = vertex.count;     /* unused */
2181                 prim.prim = vertex.prim;
2182                 prim.numverts = vertex.count;
2183                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2184
2185                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2186         }
2187
2188         if (vertex.discard) {
2189                 radeon_cp_discard_buffer(dev, buf);
2190         }
2191
2192         COMMIT_RING();
2193         return 0;
2194 }
2195
2196 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2197 {
2198         DRM_DEVICE;
2199         drm_radeon_private_t *dev_priv = dev->dev_private;
2200         drm_file_t *filp_priv;
2201         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2202         drm_device_dma_t *dma = dev->dma;
2203         drm_buf_t *buf;
2204         drm_radeon_indices_t elts;
2205         drm_radeon_tcl_prim_t prim;
2206         int count;
2207
2208         LOCK_TEST_WITH_RETURN(dev, filp);
2209
2210         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2211
2212         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2213                                  sizeof(elts));
2214
2215         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2216                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2217
2218         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2219                 DRM_ERROR("buffer index %d (of %d max)\n",
2220                           elts.idx, dma->buf_count - 1);
2221                 return DRM_ERR(EINVAL);
2222         }
2223         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2224                 DRM_ERROR("buffer prim %d\n", elts.prim);
2225                 return DRM_ERR(EINVAL);
2226         }
2227
2228         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2229         VB_AGE_TEST_WITH_RETURN(dev_priv);
2230
2231         buf = dma->buflist[elts.idx];
2232
2233         if (buf->filp != filp) {
2234                 DRM_ERROR("process %d using buffer owned by %p\n",
2235                           DRM_CURRENTPID, buf->filp);
2236                 return DRM_ERR(EINVAL);
2237         }
2238         if (buf->pending) {
2239                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2240                 return DRM_ERR(EINVAL);
2241         }
2242
2243         count = (elts.end - elts.start) / sizeof(u16);
2244         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2245
2246         if (elts.start & 0x7) {
2247                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2248                 return DRM_ERR(EINVAL);
2249         }
2250         if (elts.start < buf->used) {
2251                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2252                 return DRM_ERR(EINVAL);
2253         }
2254
2255         buf->used = elts.end;
2256
2257         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2258                 if (radeon_emit_state(dev_priv, filp_priv,
2259                                       &sarea_priv->context_state,
2260                                       sarea_priv->tex_state,
2261                                       sarea_priv->dirty)) {
2262                         DRM_ERROR("radeon_emit_state failed\n");
2263                         return DRM_ERR(EINVAL);
2264                 }
2265
2266                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2267                                        RADEON_UPLOAD_TEX1IMAGES |
2268                                        RADEON_UPLOAD_TEX2IMAGES |
2269                                        RADEON_REQUIRE_QUIESCENCE);
2270         }
2271
2272         /* Build up a prim_t record:
2273          */
2274         prim.start = elts.start;
2275         prim.finish = elts.end;
2276         prim.prim = elts.prim;
2277         prim.offset = 0;        /* offset from start of dma buffers */
2278         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2279         prim.vc_format = dev_priv->sarea_priv->vc_format;
2280
2281         radeon_cp_dispatch_indices(dev, buf, &prim);
2282         if (elts.discard) {
2283                 radeon_cp_discard_buffer(dev, buf);
2284         }
2285
2286         COMMIT_RING();
2287         return 0;
2288 }
2289
2290 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2291 {
2292         DRM_DEVICE;
2293         drm_radeon_private_t *dev_priv = dev->dev_private;
2294         drm_radeon_texture_t tex;
2295         drm_radeon_tex_image_t image;
2296         int ret;
2297
2298         LOCK_TEST_WITH_RETURN(dev, filp);
2299
2300         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2301                                  sizeof(tex));
2302
2303         if (tex.image == NULL) {
2304                 DRM_ERROR("null texture image!\n");
2305                 return DRM_ERR(EINVAL);
2306         }
2307
2308         if (DRM_COPY_FROM_USER(&image,
2309                                (drm_radeon_tex_image_t __user *) tex.image,
2310                                sizeof(image)))
2311                 return DRM_ERR(EFAULT);
2312
2313         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2314         VB_AGE_TEST_WITH_RETURN(dev_priv);
2315
2316         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2317
2318         COMMIT_RING();
2319         return ret;
2320 }
2321
2322 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2323 {
2324         DRM_DEVICE;
2325         drm_radeon_private_t *dev_priv = dev->dev_private;
2326         drm_radeon_stipple_t stipple;
2327         u32 mask[32];
2328
2329         LOCK_TEST_WITH_RETURN(dev, filp);
2330
2331         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2332                                  sizeof(stipple));
2333
2334         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2335                 return DRM_ERR(EFAULT);
2336
2337         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2338
2339         radeon_cp_dispatch_stipple(dev, mask);
2340
2341         COMMIT_RING();
2342         return 0;
2343 }
2344
2345 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2346 {
2347         DRM_DEVICE;
2348         drm_radeon_private_t *dev_priv = dev->dev_private;
2349         drm_device_dma_t *dma = dev->dma;
2350         drm_buf_t *buf;
2351         drm_radeon_indirect_t indirect;
2352         RING_LOCALS;
2353
2354         LOCK_TEST_WITH_RETURN(dev, filp);
2355
2356         DRM_COPY_FROM_USER_IOCTL(indirect,
2357                                  (drm_radeon_indirect_t __user *) data,
2358                                  sizeof(indirect));
2359
2360         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2361                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2362
2363         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2364                 DRM_ERROR("buffer index %d (of %d max)\n",
2365                           indirect.idx, dma->buf_count - 1);
2366                 return DRM_ERR(EINVAL);
2367         }
2368
2369         buf = dma->buflist[indirect.idx];
2370
2371         if (buf->filp != filp) {
2372                 DRM_ERROR("process %d using buffer owned by %p\n",
2373                           DRM_CURRENTPID, buf->filp);
2374                 return DRM_ERR(EINVAL);
2375         }
2376         if (buf->pending) {
2377                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2378                 return DRM_ERR(EINVAL);
2379         }
2380
2381         if (indirect.start < buf->used) {
2382                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2383                           indirect.start, buf->used);
2384                 return DRM_ERR(EINVAL);
2385         }
2386
2387         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2388         VB_AGE_TEST_WITH_RETURN(dev_priv);
2389
2390         buf->used = indirect.end;
2391
2392         /* Wait for the 3D stream to idle before the indirect buffer
2393          * containing 2D acceleration commands is processed.
2394          */
2395         BEGIN_RING(2);
2396
2397         RADEON_WAIT_UNTIL_3D_IDLE();
2398
2399         ADVANCE_RING();
2400
2401         /* Dispatch the indirect buffer full of commands from the
2402          * X server.  This is insecure and is thus only available to
2403          * privileged clients.
2404          */
2405         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2406         if (indirect.discard) {
2407                 radeon_cp_discard_buffer(dev, buf);
2408         }
2409
2410         COMMIT_RING();
2411         return 0;
2412 }
2413
2414 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2415 {
2416         DRM_DEVICE;
2417         drm_radeon_private_t *dev_priv = dev->dev_private;
2418         drm_file_t *filp_priv;
2419         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2420         drm_device_dma_t *dma = dev->dma;
2421         drm_buf_t *buf;
2422         drm_radeon_vertex2_t vertex;
2423         int i;
2424         unsigned char laststate;
2425
2426         LOCK_TEST_WITH_RETURN(dev, filp);
2427
2428         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2429
2430         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2431                                  sizeof(vertex));
2432
2433         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2434                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2435
2436         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2437                 DRM_ERROR("buffer index %d (of %d max)\n",
2438                           vertex.idx, dma->buf_count - 1);
2439                 return DRM_ERR(EINVAL);
2440         }
2441
2442         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2443         VB_AGE_TEST_WITH_RETURN(dev_priv);
2444
2445         buf = dma->buflist[vertex.idx];
2446
2447         if (buf->filp != filp) {
2448                 DRM_ERROR("process %d using buffer owned by %p\n",
2449                           DRM_CURRENTPID, buf->filp);
2450                 return DRM_ERR(EINVAL);
2451         }
2452
2453         if (buf->pending) {
2454                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2455                 return DRM_ERR(EINVAL);
2456         }
2457
2458         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2459                 return DRM_ERR(EINVAL);
2460
2461         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2462                 drm_radeon_prim_t prim;
2463                 drm_radeon_tcl_prim_t tclprim;
2464
2465                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2466                         return DRM_ERR(EFAULT);
2467
2468                 if (prim.stateidx != laststate) {
2469                         drm_radeon_state_t state;
2470
2471                         if (DRM_COPY_FROM_USER(&state,
2472                                                &vertex.state[prim.stateidx],
2473                                                sizeof(state)))
2474                                 return DRM_ERR(EFAULT);
2475
2476                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2477                                 DRM_ERROR("radeon_emit_state2 failed\n");
2478                                 return DRM_ERR(EINVAL);
2479                         }
2480
2481                         laststate = prim.stateidx;
2482                 }
2483
2484                 tclprim.start = prim.start;
2485                 tclprim.finish = prim.finish;
2486                 tclprim.prim = prim.prim;
2487                 tclprim.vc_format = prim.vc_format;
2488
2489                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2490                         tclprim.offset = prim.numverts * 64;
2491                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2492
2493                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2494                 } else {
2495                         tclprim.numverts = prim.numverts;
2496                         tclprim.offset = 0;     /* not used */
2497
2498                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2499                 }
2500
2501                 if (sarea_priv->nbox == 1)
2502                         sarea_priv->nbox = 0;
2503         }
2504
2505         if (vertex.discard) {
2506                 radeon_cp_discard_buffer(dev, buf);
2507         }
2508
2509         COMMIT_RING();
2510         return 0;
2511 }
2512
2513 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2514                                drm_file_t * filp_priv,
2515                                drm_radeon_cmd_header_t header,
2516                                drm_radeon_kcmd_buffer_t *cmdbuf)
2517 {
2518         int id = (int)header.packet.packet_id;
2519         int sz, reg;
2520         int *data = (int *)cmdbuf->buf;
2521         RING_LOCALS;
2522
2523         if (id >= RADEON_MAX_STATE_PACKETS)
2524                 return DRM_ERR(EINVAL);
2525
2526         sz = packet[id].len;
2527         reg = packet[id].start;
2528
2529         if (sz * sizeof(int) > cmdbuf->bufsz) {
2530                 DRM_ERROR("Packet size provided larger than data provided\n");
2531                 return DRM_ERR(EINVAL);
2532         }
2533
2534         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2535                 DRM_ERROR("Packet verification failed\n");
2536                 return DRM_ERR(EINVAL);
2537         }
2538
2539         BEGIN_RING(sz + 1);
2540         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2541         OUT_RING_TABLE(data, sz);
2542         ADVANCE_RING();
2543
2544         cmdbuf->buf += sz * sizeof(int);
2545         cmdbuf->bufsz -= sz * sizeof(int);
2546         return 0;
2547 }
2548
2549 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2550                                           drm_radeon_cmd_header_t header,
2551                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2552 {
2553         int sz = header.scalars.count;
2554         int start = header.scalars.offset;
2555         int stride = header.scalars.stride;
2556         RING_LOCALS;
2557
2558         BEGIN_RING(3 + sz);
2559         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2560         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2561         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2562         OUT_RING_TABLE(cmdbuf->buf, sz);
2563         ADVANCE_RING();
2564         cmdbuf->buf += sz * sizeof(int);
2565         cmdbuf->bufsz -= sz * sizeof(int);
2566         return 0;
2567 }
2568
2569 /* God this is ugly
2570  */
2571 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2572                                            drm_radeon_cmd_header_t header,
2573                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2574 {
2575         int sz = header.scalars.count;
2576         int start = ((unsigned int)header.scalars.offset) + 0x100;
2577         int stride = header.scalars.stride;
2578         RING_LOCALS;
2579
2580         BEGIN_RING(3 + sz);
2581         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2582         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2583         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2584         OUT_RING_TABLE(cmdbuf->buf, sz);
2585         ADVANCE_RING();
2586         cmdbuf->buf += sz * sizeof(int);
2587         cmdbuf->bufsz -= sz * sizeof(int);
2588         return 0;
2589 }
2590
2591 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2592                                           drm_radeon_cmd_header_t header,
2593                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2594 {
2595         int sz = header.vectors.count;
2596         int start = header.vectors.offset;
2597         int stride = header.vectors.stride;
2598         RING_LOCALS;
2599
2600         BEGIN_RING(5 + sz);
2601         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2602         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2603         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2604         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2605         OUT_RING_TABLE(cmdbuf->buf, sz);
2606         ADVANCE_RING();
2607
2608         cmdbuf->buf += sz * sizeof(int);
2609         cmdbuf->bufsz -= sz * sizeof(int);
2610         return 0;
2611 }
2612
2613 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2614                                           drm_radeon_cmd_header_t header,
2615                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2616 {
2617         int sz = header.veclinear.count * 4;
2618         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2619         RING_LOCALS;
2620
2621         if (!sz)
2622                 return 0;
2623         if (sz * 4 > cmdbuf->bufsz)
2624                 return DRM_ERR(EINVAL);
2625
2626         BEGIN_RING(5 + sz);
2627         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2628         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2629         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2630         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2631         OUT_RING_TABLE(cmdbuf->buf, sz);
2632         ADVANCE_RING();
2633
2634         cmdbuf->buf += sz * sizeof(int);
2635         cmdbuf->bufsz -= sz * sizeof(int);
2636         return 0;
2637 }
2638
2639 static int radeon_emit_packet3(drm_device_t * dev,
2640                                drm_file_t * filp_priv,
2641                                drm_radeon_kcmd_buffer_t *cmdbuf)
2642 {
2643         drm_radeon_private_t *dev_priv = dev->dev_private;
2644         unsigned int cmdsz;
2645         int ret;
2646         RING_LOCALS;
2647
2648         DRM_DEBUG("\n");
2649
2650         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2651                                                   cmdbuf, &cmdsz))) {
2652                 DRM_ERROR("Packet verification failed\n");
2653                 return ret;
2654         }
2655
2656         BEGIN_RING(cmdsz);
2657         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2658         ADVANCE_RING();
2659
2660         cmdbuf->buf += cmdsz * 4;
2661         cmdbuf->bufsz -= cmdsz * 4;
2662         return 0;
2663 }
2664
2665 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2666                                         drm_file_t *filp_priv,
2667                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2668                                         int orig_nbox)
2669 {
2670         drm_radeon_private_t *dev_priv = dev->dev_private;
2671         drm_clip_rect_t box;
2672         unsigned int cmdsz;
2673         int ret;
2674         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2675         int i = 0;
2676         RING_LOCALS;
2677
2678         DRM_DEBUG("\n");
2679
2680         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2681                                                   cmdbuf, &cmdsz))) {
2682                 DRM_ERROR("Packet verification failed\n");
2683                 return ret;
2684         }
2685
2686         if (!orig_nbox)
2687                 goto out;
2688
2689         do {
2690                 if (i < cmdbuf->nbox) {
2691                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2692                                 return DRM_ERR(EFAULT);
2693                         /* FIXME The second and subsequent times round
2694                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2695                          * calling emit_clip_rect(). This fixes a
2696                          * lockup on fast machines when sending
2697                          * several cliprects with a cmdbuf, as when
2698                          * waving a 2D window over a 3D
2699                          * window. Something in the commands from user
2700                          * space seems to hang the card when they're
2701                          * sent several times in a row. That would be
2702                          * the correct place to fix it but this works
2703                          * around it until I can figure that out - Tim
2704                          * Smith */
2705                         if (i) {
2706                                 BEGIN_RING(2);
2707                                 RADEON_WAIT_UNTIL_3D_IDLE();
2708                                 ADVANCE_RING();
2709                         }
2710                         radeon_emit_clip_rect(dev_priv, &box);
2711                 }
2712
2713                 BEGIN_RING(cmdsz);
2714                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2715                 ADVANCE_RING();
2716
2717         } while (++i < cmdbuf->nbox);
2718         if (cmdbuf->nbox == 1)
2719                 cmdbuf->nbox = 0;
2720
2721       out:
2722         cmdbuf->buf += cmdsz * 4;
2723         cmdbuf->bufsz -= cmdsz * 4;
2724         return 0;
2725 }
2726
2727 static int radeon_emit_wait(drm_device_t * dev, int flags)
2728 {
2729         drm_radeon_private_t *dev_priv = dev->dev_private;
2730         RING_LOCALS;
2731
2732         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2733         switch (flags) {
2734         case RADEON_WAIT_2D:
2735                 BEGIN_RING(2);
2736                 RADEON_WAIT_UNTIL_2D_IDLE();
2737                 ADVANCE_RING();
2738                 break;
2739         case RADEON_WAIT_3D:
2740                 BEGIN_RING(2);
2741                 RADEON_WAIT_UNTIL_3D_IDLE();
2742                 ADVANCE_RING();
2743                 break;
2744         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2745                 BEGIN_RING(2);
2746                 RADEON_WAIT_UNTIL_IDLE();
2747                 ADVANCE_RING();
2748                 break;
2749         default:
2750                 return DRM_ERR(EINVAL);
2751         }
2752
2753         return 0;
2754 }
2755
2756 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2757 {
2758         DRM_DEVICE;
2759         drm_radeon_private_t *dev_priv = dev->dev_private;
2760         drm_file_t *filp_priv;
2761         drm_device_dma_t *dma = dev->dma;
2762         drm_buf_t *buf = NULL;
2763         int idx;
2764         drm_radeon_kcmd_buffer_t cmdbuf;
2765         drm_radeon_cmd_header_t header;
2766         int orig_nbox, orig_bufsz;
2767         char *kbuf = NULL;
2768
2769         LOCK_TEST_WITH_RETURN(dev, filp);
2770
2771         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2772
2773         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2774                                  (drm_radeon_cmd_buffer_t __user *) data,
2775                                  sizeof(cmdbuf));
2776
2777         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2778         VB_AGE_TEST_WITH_RETURN(dev_priv);
2779
2780         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2781                 return DRM_ERR(EINVAL);
2782         }
2783
2784         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2785          * races between checking values and using those values in other code,
2786          * and simply to avoid a lot of function calls to copy in data.
2787          */
2788         orig_bufsz = cmdbuf.bufsz;
2789         if (orig_bufsz != 0) {
2790                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2791                 if (kbuf == NULL)
2792                         return DRM_ERR(ENOMEM);
2793                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2794                                        cmdbuf.bufsz)) {
2795                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2796                         return DRM_ERR(EFAULT);
2797                 }
2798                 cmdbuf.buf = kbuf;
2799         }
2800
2801         orig_nbox = cmdbuf.nbox;
2802
2803         if (dev_priv->microcode_version == UCODE_R300) {
2804                 int temp;
2805                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2806
2807                 if (orig_bufsz != 0)
2808                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2809
2810                 return temp;
2811         }
2812
2813         /* microcode_version != r300 */
2814         while (cmdbuf.bufsz >= sizeof(header)) {
2815
2816                 header.i = *(int *)cmdbuf.buf;
2817                 cmdbuf.buf += sizeof(header);
2818                 cmdbuf.bufsz -= sizeof(header);
2819
2820                 switch (header.header.cmd_type) {
2821                 case RADEON_CMD_PACKET:
2822                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2823                         if (radeon_emit_packets
2824                             (dev_priv, filp_priv, header, &cmdbuf)) {
2825                                 DRM_ERROR("radeon_emit_packets failed\n");
2826                                 goto err;
2827                         }
2828                         break;
2829
2830                 case RADEON_CMD_SCALARS:
2831                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2832                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2833                                 DRM_ERROR("radeon_emit_scalars failed\n");
2834                                 goto err;
2835                         }
2836                         break;
2837
2838                 case RADEON_CMD_VECTORS:
2839                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2840                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2841                                 DRM_ERROR("radeon_emit_vectors failed\n");
2842                                 goto err;
2843                         }
2844                         break;
2845
2846                 case RADEON_CMD_DMA_DISCARD:
2847                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2848                         idx = header.dma.buf_idx;
2849                         if (idx < 0 || idx >= dma->buf_count) {
2850                                 DRM_ERROR("buffer index %d (of %d max)\n",
2851                                           idx, dma->buf_count - 1);
2852                                 goto err;
2853                         }
2854
2855                         buf = dma->buflist[idx];
2856                         if (buf->filp != filp || buf->pending) {
2857                                 DRM_ERROR("bad buffer %p %p %d\n",
2858                                           buf->filp, filp, buf->pending);
2859                                 goto err;
2860                         }
2861
2862                         radeon_cp_discard_buffer(dev, buf);
2863                         break;
2864
2865                 case RADEON_CMD_PACKET3:
2866                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2867                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2868                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2869                                 goto err;
2870                         }
2871                         break;
2872
2873                 case RADEON_CMD_PACKET3_CLIP:
2874                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2875                         if (radeon_emit_packet3_cliprect
2876                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2877                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2878                                 goto err;
2879                         }
2880                         break;
2881
2882                 case RADEON_CMD_SCALARS2:
2883                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2884                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2885                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2886                                 goto err;
2887                         }
2888                         break;
2889
2890                 case RADEON_CMD_WAIT:
2891                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2892                         if (radeon_emit_wait(dev, header.wait.flags)) {
2893                                 DRM_ERROR("radeon_emit_wait failed\n");
2894                                 goto err;
2895                         }
2896                         break;
2897                 case RADEON_CMD_VECLINEAR:
2898                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2899                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2900                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2901                                 goto err;
2902                         }
2903                         break;
2904
2905                 default:
2906                         DRM_ERROR("bad cmd_type %d at %p\n",
2907                                   header.header.cmd_type,
2908                                   cmdbuf.buf - sizeof(header));
2909                         goto err;
2910                 }
2911         }
2912
2913         if (orig_bufsz != 0)
2914                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2915
2916         DRM_DEBUG("DONE\n");
2917         COMMIT_RING();
2918         return 0;
2919
2920       err:
2921         if (orig_bufsz != 0)
2922                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2923         return DRM_ERR(EINVAL);
2924 }
2925
2926 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2927 {
2928         DRM_DEVICE;
2929         drm_radeon_private_t *dev_priv = dev->dev_private;
2930         drm_radeon_getparam_t param;
2931         int value;
2932
2933         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2934                                  sizeof(param));
2935
2936         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2937
2938         switch (param.param) {
2939         case RADEON_PARAM_GART_BUFFER_OFFSET:
2940                 value = dev_priv->gart_buffers_offset;
2941                 break;
2942         case RADEON_PARAM_LAST_FRAME:
2943                 dev_priv->stats.last_frame_reads++;
2944                 value = GET_SCRATCH(0);
2945                 break;
2946         case RADEON_PARAM_LAST_DISPATCH:
2947                 value = GET_SCRATCH(1);
2948                 break;
2949         case RADEON_PARAM_LAST_CLEAR:
2950                 dev_priv->stats.last_clear_reads++;
2951                 value = GET_SCRATCH(2);
2952                 break;
2953         case RADEON_PARAM_IRQ_NR:
2954                 value = dev->irq;
2955                 break;
2956         case RADEON_PARAM_GART_BASE:
2957                 value = dev_priv->gart_vm_start;
2958                 break;
2959         case RADEON_PARAM_REGISTER_HANDLE:
2960                 value = dev_priv->mmio->offset;
2961                 break;
2962         case RADEON_PARAM_STATUS_HANDLE:
2963                 value = dev_priv->ring_rptr_offset;
2964                 break;
2965 #if BITS_PER_LONG == 32
2966                 /*
2967                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2968                  * pointer which can't fit into an int-sized variable.  According to
2969                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2970                  * not supporting it shouldn't be a problem.  If the same functionality
2971                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2972                  * so backwards-compatibility for the embedded platforms can be
2973                  * maintained.  --davidm 4-Feb-2004.
2974                  */
2975         case RADEON_PARAM_SAREA_HANDLE:
2976                 /* The lock is the first dword in the sarea. */
2977                 value = (long)dev->lock.hw_lock;
2978                 break;
2979 #endif
2980         case RADEON_PARAM_GART_TEX_HANDLE:
2981                 value = dev_priv->gart_textures_offset;
2982                 break;
2983         
2984         case RADEON_PARAM_CARD_TYPE:
2985                 if (dev_priv->flags & CHIP_IS_PCIE)
2986                         value = RADEON_CARD_PCIE;
2987                 else if (dev_priv->flags & CHIP_IS_AGP)
2988                         value = RADEON_CARD_AGP;
2989                 else
2990                         value = RADEON_CARD_PCI;
2991                 break;
2992         default:
2993                 return DRM_ERR(EINVAL);
2994         }
2995
2996         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2997                 DRM_ERROR("copy_to_user\n");
2998                 return DRM_ERR(EFAULT);
2999         }
3000
3001         return 0;
3002 }
3003
3004 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3005 {
3006         DRM_DEVICE;
3007         drm_radeon_private_t *dev_priv = dev->dev_private;
3008         drm_file_t *filp_priv;
3009         drm_radeon_setparam_t sp;
3010         struct drm_radeon_driver_file_fields *radeon_priv;
3011
3012         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3013
3014         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3015                                  sizeof(sp));
3016
3017         switch (sp.param) {
3018         case RADEON_SETPARAM_FB_LOCATION:
3019                 radeon_priv = filp_priv->driver_priv;
3020                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3021                 break;
3022         case RADEON_SETPARAM_SWITCH_TILING:
3023                 if (sp.value == 0) {
3024                         DRM_DEBUG("color tiling disabled\n");
3025                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3026                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3027                         dev_priv->sarea_priv->tiling_enabled = 0;
3028                 } else if (sp.value == 1) {
3029                         DRM_DEBUG("color tiling enabled\n");
3030                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3031                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3032                         dev_priv->sarea_priv->tiling_enabled = 1;
3033                 }
3034                 break;
3035         case RADEON_SETPARAM_PCIGART_LOCATION:
3036                 dev_priv->pcigart_offset = sp.value;
3037                 break;
3038         case RADEON_SETPARAM_NEW_MEMMAP:
3039                 dev_priv->new_memmap = sp.value;
3040                 break;
3041         default:
3042                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3043                 return DRM_ERR(EINVAL);
3044         }
3045
3046         return 0;
3047 }
3048
3049 /* When a client dies:
3050  *    - Check for and clean up flipped page state
3051  *    - Free any alloced GART memory.
3052  *    - Free any alloced radeon surfaces.
3053  *
3054  * DRM infrastructure takes care of reclaiming dma buffers.
3055  */
3056 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3057 {
3058         if (dev->dev_private) {
3059                 drm_radeon_private_t *dev_priv = dev->dev_private;
3060                 if (dev_priv->page_flipping) {
3061                         radeon_do_cleanup_pageflip(dev);
3062                 }
3063                 radeon_mem_release(filp, dev_priv->gart_heap);
3064                 radeon_mem_release(filp, dev_priv->fb_heap);
3065                 radeon_surfaces_release(filp, dev_priv);
3066         }
3067 }
3068
3069 void radeon_driver_lastclose(drm_device_t * dev)
3070 {
3071         radeon_do_release(dev);
3072 }
3073
3074 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3075 {
3076         drm_radeon_private_t *dev_priv = dev->dev_private;
3077         struct drm_radeon_driver_file_fields *radeon_priv;
3078
3079         DRM_DEBUG("\n");
3080         radeon_priv =
3081             (struct drm_radeon_driver_file_fields *)
3082             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3083
3084         if (!radeon_priv)
3085                 return -ENOMEM;
3086
3087         filp_priv->driver_priv = radeon_priv;
3088
3089         if (dev_priv)
3090                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3091         else
3092                 radeon_priv->radeon_fb_delta = 0;
3093         return 0;
3094 }
3095
3096 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3097 {
3098         struct drm_radeon_driver_file_fields *radeon_priv =
3099             filp_priv->driver_priv;
3100
3101         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3102 }
3103
3104 drm_ioctl_desc_t radeon_ioctls[] = {
3105         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3106         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3107         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3108         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3109         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3110         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3111         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3112         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3113         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3114         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3115         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3116         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3117         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3118         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3119         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3121         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3122         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3123         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3124         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3125         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3126         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3127         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3128         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3129         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3130         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3131         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3132 };
3133
3134 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);