drm: rework radeon memory map (radeon 1.23)
[pandora-kernel.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if ((off >= dev_priv->fb_location &&
66              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67             (off >= dev_priv->gart_vm_start &&
68              off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = filp_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > (dev_priv->fb_location + dev_priv->fb_size))
82                 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83                         dev_priv->gart_vm_start;
84
85         /* Now recheck and fail if out of bounds */
86         if ((off >= dev_priv->fb_location &&
87              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88             (off >= dev_priv->gart_vm_start &&
89              off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90                 DRM_DEBUG("offset fixed up to 0x%x\n", off);
91                 *offset = off;
92                 return 0;
93         }
94         return DRM_ERR(EINVAL);
95 }
96
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98                                                      dev_priv,
99                                                      drm_file_t * filp_priv,
100                                                      int id, u32 *data)
101 {
102         switch (id) {
103
104         case RADEON_EMIT_PP_MISC:
105                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107                         DRM_ERROR("Invalid depth buffer offset\n");
108                         return DRM_ERR(EINVAL);
109                 }
110                 break;
111
112         case RADEON_EMIT_PP_CNTL:
113                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115                         DRM_ERROR("Invalid colour buffer offset\n");
116                         return DRM_ERR(EINVAL);
117                 }
118                 break;
119
120         case R200_EMIT_PP_TXOFFSET_0:
121         case R200_EMIT_PP_TXOFFSET_1:
122         case R200_EMIT_PP_TXOFFSET_2:
123         case R200_EMIT_PP_TXOFFSET_3:
124         case R200_EMIT_PP_TXOFFSET_4:
125         case R200_EMIT_PP_TXOFFSET_5:
126                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127                                                   &data[0])) {
128                         DRM_ERROR("Invalid R200 texture offset\n");
129                         return DRM_ERR(EINVAL);
130                 }
131                 break;
132
133         case RADEON_EMIT_PP_TXFILTER_0:
134         case RADEON_EMIT_PP_TXFILTER_1:
135         case RADEON_EMIT_PP_TXFILTER_2:
136                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138                         DRM_ERROR("Invalid R100 texture offset\n");
139                         return DRM_ERR(EINVAL);
140                 }
141                 break;
142
143         case R200_EMIT_PP_CUBIC_OFFSETS_0:
144         case R200_EMIT_PP_CUBIC_OFFSETS_1:
145         case R200_EMIT_PP_CUBIC_OFFSETS_2:
146         case R200_EMIT_PP_CUBIC_OFFSETS_3:
147         case R200_EMIT_PP_CUBIC_OFFSETS_4:
148         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149                         int i;
150                         for (i = 0; i < 5; i++) {
151                                 if (radeon_check_and_fixup_offset(dev_priv,
152                                                                   filp_priv,
153                                                                   &data[i])) {
154                                         DRM_ERROR
155                                             ("Invalid R200 cubic texture offset\n");
156                                         return DRM_ERR(EINVAL);
157                                 }
158                         }
159                         break;
160                 }
161
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165                         int i;
166                         for (i = 0; i < 5; i++) {
167                                 if (radeon_check_and_fixup_offset(dev_priv,
168                                                                   filp_priv,
169                                                                   &data[i])) {
170                                         DRM_ERROR
171                                             ("Invalid R100 cubic texture offset\n");
172                                         return DRM_ERR(EINVAL);
173                                 }
174                         }
175                 }
176                 break;
177
178         case RADEON_EMIT_RB3D_COLORPITCH:
179         case RADEON_EMIT_RE_LINE_PATTERN:
180         case RADEON_EMIT_SE_LINE_WIDTH:
181         case RADEON_EMIT_PP_LUM_MATRIX:
182         case RADEON_EMIT_PP_ROT_MATRIX_0:
183         case RADEON_EMIT_RB3D_STENCILREFMASK:
184         case RADEON_EMIT_SE_VPORT_XSCALE:
185         case RADEON_EMIT_SE_CNTL:
186         case RADEON_EMIT_SE_CNTL_STATUS:
187         case RADEON_EMIT_RE_MISC:
188         case RADEON_EMIT_PP_BORDER_COLOR_0:
189         case RADEON_EMIT_PP_BORDER_COLOR_1:
190         case RADEON_EMIT_PP_BORDER_COLOR_2:
191         case RADEON_EMIT_SE_ZBIAS_FACTOR:
192         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
193         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
194         case R200_EMIT_PP_TXCBLEND_0:
195         case R200_EMIT_PP_TXCBLEND_1:
196         case R200_EMIT_PP_TXCBLEND_2:
197         case R200_EMIT_PP_TXCBLEND_3:
198         case R200_EMIT_PP_TXCBLEND_4:
199         case R200_EMIT_PP_TXCBLEND_5:
200         case R200_EMIT_PP_TXCBLEND_6:
201         case R200_EMIT_PP_TXCBLEND_7:
202         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
203         case R200_EMIT_TFACTOR_0:
204         case R200_EMIT_VTX_FMT_0:
205         case R200_EMIT_VAP_CTL:
206         case R200_EMIT_MATRIX_SELECT_0:
207         case R200_EMIT_TEX_PROC_CTL_2:
208         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209         case R200_EMIT_PP_TXFILTER_0:
210         case R200_EMIT_PP_TXFILTER_1:
211         case R200_EMIT_PP_TXFILTER_2:
212         case R200_EMIT_PP_TXFILTER_3:
213         case R200_EMIT_PP_TXFILTER_4:
214         case R200_EMIT_PP_TXFILTER_5:
215         case R200_EMIT_VTE_CNTL:
216         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217         case R200_EMIT_PP_TAM_DEBUG3:
218         case R200_EMIT_PP_CNTL_X:
219         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221         case R200_EMIT_RE_SCISSOR_TL_0:
222         case R200_EMIT_RE_SCISSOR_TL_1:
223         case R200_EMIT_RE_SCISSOR_TL_2:
224         case R200_EMIT_SE_VAP_CNTL_STATUS:
225         case R200_EMIT_SE_VTX_STATE_CNTL:
226         case R200_EMIT_RE_POINTSIZE:
227         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228         case R200_EMIT_PP_CUBIC_FACES_0:
229         case R200_EMIT_PP_CUBIC_FACES_1:
230         case R200_EMIT_PP_CUBIC_FACES_2:
231         case R200_EMIT_PP_CUBIC_FACES_3:
232         case R200_EMIT_PP_CUBIC_FACES_4:
233         case R200_EMIT_PP_CUBIC_FACES_5:
234         case RADEON_EMIT_PP_TEX_SIZE_0:
235         case RADEON_EMIT_PP_TEX_SIZE_1:
236         case RADEON_EMIT_PP_TEX_SIZE_2:
237         case R200_EMIT_RB3D_BLENDCOLOR:
238         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239         case RADEON_EMIT_PP_CUBIC_FACES_0:
240         case RADEON_EMIT_PP_CUBIC_FACES_1:
241         case RADEON_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_TRI_PERF_CNTL:
243         case R200_EMIT_PP_AFS_0:
244         case R200_EMIT_PP_AFS_1:
245         case R200_EMIT_ATF_TFACTOR:
246         case R200_EMIT_PP_TXCTLALL_0:
247         case R200_EMIT_PP_TXCTLALL_1:
248         case R200_EMIT_PP_TXCTLALL_2:
249         case R200_EMIT_PP_TXCTLALL_3:
250         case R200_EMIT_PP_TXCTLALL_4:
251         case R200_EMIT_PP_TXCTLALL_5:
252                 /* These packets don't contain memory offsets */
253                 break;
254
255         default:
256                 DRM_ERROR("Unknown state packet ID %d\n", id);
257                 return DRM_ERR(EINVAL);
258         }
259
260         return 0;
261 }
262
263 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
264                                                      dev_priv,
265                                                      drm_file_t *filp_priv,
266                                                      drm_radeon_kcmd_buffer_t *
267                                                      cmdbuf,
268                                                      unsigned int *cmdsz)
269 {
270         u32 *cmd = (u32 *) cmdbuf->buf;
271
272         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
273
274         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
275                 DRM_ERROR("Not a type 3 packet\n");
276                 return DRM_ERR(EINVAL);
277         }
278
279         if (4 * *cmdsz > cmdbuf->bufsz) {
280                 DRM_ERROR("Packet size larger than size of data provided\n");
281                 return DRM_ERR(EINVAL);
282         }
283
284         /* Check client state and fix it up if necessary */
285         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
286                 u32 offset;
287
288                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
289                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
290                         offset = cmd[2] << 10;
291                         if (radeon_check_and_fixup_offset
292                             (dev_priv, filp_priv, &offset)) {
293                                 DRM_ERROR("Invalid first packet offset\n");
294                                 return DRM_ERR(EINVAL);
295                         }
296                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
297                 }
298
299                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
300                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
301                         offset = cmd[3] << 10;
302                         if (radeon_check_and_fixup_offset
303                             (dev_priv, filp_priv, &offset)) {
304                                 DRM_ERROR("Invalid second packet offset\n");
305                                 return DRM_ERR(EINVAL);
306                         }
307                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
308                 }
309         }
310
311         return 0;
312 }
313
314 /* ================================================================
315  * CP hardware state programming functions
316  */
317
318 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
319                                              drm_clip_rect_t * box)
320 {
321         RING_LOCALS;
322
323         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
324                   box->x1, box->y1, box->x2, box->y2);
325
326         BEGIN_RING(4);
327         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
328         OUT_RING((box->y1 << 16) | box->x1);
329         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
330         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
331         ADVANCE_RING();
332 }
333
334 /* Emit 1.1 state
335  */
336 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
337                              drm_file_t * filp_priv,
338                              drm_radeon_context_regs_t * ctx,
339                              drm_radeon_texture_regs_t * tex,
340                              unsigned int dirty)
341 {
342         RING_LOCALS;
343         DRM_DEBUG("dirty=0x%08x\n", dirty);
344
345         if (dirty & RADEON_UPLOAD_CONTEXT) {
346                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
347                                                   &ctx->rb3d_depthoffset)) {
348                         DRM_ERROR("Invalid depth buffer offset\n");
349                         return DRM_ERR(EINVAL);
350                 }
351
352                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
353                                                   &ctx->rb3d_coloroffset)) {
354                         DRM_ERROR("Invalid depth buffer offset\n");
355                         return DRM_ERR(EINVAL);
356                 }
357
358                 BEGIN_RING(14);
359                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
360                 OUT_RING(ctx->pp_misc);
361                 OUT_RING(ctx->pp_fog_color);
362                 OUT_RING(ctx->re_solid_color);
363                 OUT_RING(ctx->rb3d_blendcntl);
364                 OUT_RING(ctx->rb3d_depthoffset);
365                 OUT_RING(ctx->rb3d_depthpitch);
366                 OUT_RING(ctx->rb3d_zstencilcntl);
367                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
368                 OUT_RING(ctx->pp_cntl);
369                 OUT_RING(ctx->rb3d_cntl);
370                 OUT_RING(ctx->rb3d_coloroffset);
371                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
372                 OUT_RING(ctx->rb3d_colorpitch);
373                 ADVANCE_RING();
374         }
375
376         if (dirty & RADEON_UPLOAD_VERTFMT) {
377                 BEGIN_RING(2);
378                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
379                 OUT_RING(ctx->se_coord_fmt);
380                 ADVANCE_RING();
381         }
382
383         if (dirty & RADEON_UPLOAD_LINE) {
384                 BEGIN_RING(5);
385                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
386                 OUT_RING(ctx->re_line_pattern);
387                 OUT_RING(ctx->re_line_state);
388                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
389                 OUT_RING(ctx->se_line_width);
390                 ADVANCE_RING();
391         }
392
393         if (dirty & RADEON_UPLOAD_BUMPMAP) {
394                 BEGIN_RING(5);
395                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
396                 OUT_RING(ctx->pp_lum_matrix);
397                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
398                 OUT_RING(ctx->pp_rot_matrix_0);
399                 OUT_RING(ctx->pp_rot_matrix_1);
400                 ADVANCE_RING();
401         }
402
403         if (dirty & RADEON_UPLOAD_MASKS) {
404                 BEGIN_RING(4);
405                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
406                 OUT_RING(ctx->rb3d_stencilrefmask);
407                 OUT_RING(ctx->rb3d_ropcntl);
408                 OUT_RING(ctx->rb3d_planemask);
409                 ADVANCE_RING();
410         }
411
412         if (dirty & RADEON_UPLOAD_VIEWPORT) {
413                 BEGIN_RING(7);
414                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
415                 OUT_RING(ctx->se_vport_xscale);
416                 OUT_RING(ctx->se_vport_xoffset);
417                 OUT_RING(ctx->se_vport_yscale);
418                 OUT_RING(ctx->se_vport_yoffset);
419                 OUT_RING(ctx->se_vport_zscale);
420                 OUT_RING(ctx->se_vport_zoffset);
421                 ADVANCE_RING();
422         }
423
424         if (dirty & RADEON_UPLOAD_SETUP) {
425                 BEGIN_RING(4);
426                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
427                 OUT_RING(ctx->se_cntl);
428                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
429                 OUT_RING(ctx->se_cntl_status);
430                 ADVANCE_RING();
431         }
432
433         if (dirty & RADEON_UPLOAD_MISC) {
434                 BEGIN_RING(2);
435                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
436                 OUT_RING(ctx->re_misc);
437                 ADVANCE_RING();
438         }
439
440         if (dirty & RADEON_UPLOAD_TEX0) {
441                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
442                                                   &tex[0].pp_txoffset)) {
443                         DRM_ERROR("Invalid texture offset for unit 0\n");
444                         return DRM_ERR(EINVAL);
445                 }
446
447                 BEGIN_RING(9);
448                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
449                 OUT_RING(tex[0].pp_txfilter);
450                 OUT_RING(tex[0].pp_txformat);
451                 OUT_RING(tex[0].pp_txoffset);
452                 OUT_RING(tex[0].pp_txcblend);
453                 OUT_RING(tex[0].pp_txablend);
454                 OUT_RING(tex[0].pp_tfactor);
455                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
456                 OUT_RING(tex[0].pp_border_color);
457                 ADVANCE_RING();
458         }
459
460         if (dirty & RADEON_UPLOAD_TEX1) {
461                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
462                                                   &tex[1].pp_txoffset)) {
463                         DRM_ERROR("Invalid texture offset for unit 1\n");
464                         return DRM_ERR(EINVAL);
465                 }
466
467                 BEGIN_RING(9);
468                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
469                 OUT_RING(tex[1].pp_txfilter);
470                 OUT_RING(tex[1].pp_txformat);
471                 OUT_RING(tex[1].pp_txoffset);
472                 OUT_RING(tex[1].pp_txcblend);
473                 OUT_RING(tex[1].pp_txablend);
474                 OUT_RING(tex[1].pp_tfactor);
475                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
476                 OUT_RING(tex[1].pp_border_color);
477                 ADVANCE_RING();
478         }
479
480         if (dirty & RADEON_UPLOAD_TEX2) {
481                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
482                                                   &tex[2].pp_txoffset)) {
483                         DRM_ERROR("Invalid texture offset for unit 2\n");
484                         return DRM_ERR(EINVAL);
485                 }
486
487                 BEGIN_RING(9);
488                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
489                 OUT_RING(tex[2].pp_txfilter);
490                 OUT_RING(tex[2].pp_txformat);
491                 OUT_RING(tex[2].pp_txoffset);
492                 OUT_RING(tex[2].pp_txcblend);
493                 OUT_RING(tex[2].pp_txablend);
494                 OUT_RING(tex[2].pp_tfactor);
495                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
496                 OUT_RING(tex[2].pp_border_color);
497                 ADVANCE_RING();
498         }
499
500         return 0;
501 }
502
503 /* Emit 1.2 state
504  */
505 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
506                               drm_file_t * filp_priv,
507                               drm_radeon_state_t * state)
508 {
509         RING_LOCALS;
510
511         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
512                 BEGIN_RING(3);
513                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
514                 OUT_RING(state->context2.se_zbias_factor);
515                 OUT_RING(state->context2.se_zbias_constant);
516                 ADVANCE_RING();
517         }
518
519         return radeon_emit_state(dev_priv, filp_priv, &state->context,
520                                  state->tex, state->dirty);
521 }
522
523 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
524  * 1.3 cmdbuffers allow all previous state to be updated as well as
525  * the tcl scalar and vector areas.
526  */
527 static struct {
528         int start;
529         int len;
530         const char *name;
531 } packet[RADEON_MAX_STATE_PACKETS] = {
532         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
533         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
534         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
535         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
536         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
537         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
538         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
539         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
540         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
541         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
542         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
543         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
544         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
545         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
546         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
547         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
548         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
549         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
550         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
551         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
552         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
553                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
554         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
555         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
556         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
557         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
558         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
559         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
560         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
561         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
562         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
563         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
564         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
565         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
566         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
567         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
568         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
569         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
570         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
571         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
572         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
573         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
574         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
575         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
576         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
577         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
578         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
579         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
580         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
581         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
582         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
583          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
584         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
585         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
586         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
587         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
588         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
589         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
590         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
591         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
592         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
593         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
594         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
595                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
596         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
597         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
598         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
599         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
600         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
601         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
602         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
603         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
604         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
605         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
606         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
607         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
608         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
609         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
610         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
611         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
612         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
613         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
614         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
615         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
616         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
617         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
618         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
619         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
620         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
621         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
622         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
623         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
624         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
625         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
626         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
627         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
628         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
629 };
630
631 /* ================================================================
632  * Performance monitoring functions
633  */
634
635 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
636                              int x, int y, int w, int h, int r, int g, int b)
637 {
638         u32 color;
639         RING_LOCALS;
640
641         x += dev_priv->sarea_priv->boxes[0].x1;
642         y += dev_priv->sarea_priv->boxes[0].y1;
643
644         switch (dev_priv->color_fmt) {
645         case RADEON_COLOR_FORMAT_RGB565:
646                 color = (((r & 0xf8) << 8) |
647                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
648                 break;
649         case RADEON_COLOR_FORMAT_ARGB8888:
650         default:
651                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
652                 break;
653         }
654
655         BEGIN_RING(4);
656         RADEON_WAIT_UNTIL_3D_IDLE();
657         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
658         OUT_RING(0xffffffff);
659         ADVANCE_RING();
660
661         BEGIN_RING(6);
662
663         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
664         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
665                  RADEON_GMC_BRUSH_SOLID_COLOR |
666                  (dev_priv->color_fmt << 8) |
667                  RADEON_GMC_SRC_DATATYPE_COLOR |
668                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
669
670         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
671                 OUT_RING(dev_priv->front_pitch_offset);
672         } else {
673                 OUT_RING(dev_priv->back_pitch_offset);
674         }
675
676         OUT_RING(color);
677
678         OUT_RING((x << 16) | y);
679         OUT_RING((w << 16) | h);
680
681         ADVANCE_RING();
682 }
683
684 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
685 {
686         /* Collapse various things into a wait flag -- trying to
687          * guess if userspase slept -- better just to have them tell us.
688          */
689         if (dev_priv->stats.last_frame_reads > 1 ||
690             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
691                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
692         }
693
694         if (dev_priv->stats.freelist_loops) {
695                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
696         }
697
698         /* Purple box for page flipping
699          */
700         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
701                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
702
703         /* Red box if we have to wait for idle at any point
704          */
705         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
706                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
707
708         /* Blue box: lost context?
709          */
710
711         /* Yellow box for texture swaps
712          */
713         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
714                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
715
716         /* Green box if hardware never idles (as far as we can tell)
717          */
718         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
719                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
720
721         /* Draw bars indicating number of buffers allocated
722          * (not a great measure, easily confused)
723          */
724         if (dev_priv->stats.requested_bufs) {
725                 if (dev_priv->stats.requested_bufs > 100)
726                         dev_priv->stats.requested_bufs = 100;
727
728                 radeon_clear_box(dev_priv, 4, 16,
729                                  dev_priv->stats.requested_bufs, 4,
730                                  196, 128, 128);
731         }
732
733         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
734
735 }
736
737 /* ================================================================
738  * CP command dispatch functions
739  */
740
741 static void radeon_cp_dispatch_clear(drm_device_t * dev,
742                                      drm_radeon_clear_t * clear,
743                                      drm_radeon_clear_rect_t * depth_boxes)
744 {
745         drm_radeon_private_t *dev_priv = dev->dev_private;
746         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
747         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
748         int nbox = sarea_priv->nbox;
749         drm_clip_rect_t *pbox = sarea_priv->boxes;
750         unsigned int flags = clear->flags;
751         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
752         int i;
753         RING_LOCALS;
754         DRM_DEBUG("flags = 0x%x\n", flags);
755
756         dev_priv->stats.clears++;
757
758         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
759                 unsigned int tmp = flags;
760
761                 flags &= ~(RADEON_FRONT | RADEON_BACK);
762                 if (tmp & RADEON_FRONT)
763                         flags |= RADEON_BACK;
764                 if (tmp & RADEON_BACK)
765                         flags |= RADEON_FRONT;
766         }
767
768         if (flags & (RADEON_FRONT | RADEON_BACK)) {
769
770                 BEGIN_RING(4);
771
772                 /* Ensure the 3D stream is idle before doing a
773                  * 2D fill to clear the front or back buffer.
774                  */
775                 RADEON_WAIT_UNTIL_3D_IDLE();
776
777                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
778                 OUT_RING(clear->color_mask);
779
780                 ADVANCE_RING();
781
782                 /* Make sure we restore the 3D state next time.
783                  */
784                 dev_priv->sarea_priv->ctx_owner = 0;
785
786                 for (i = 0; i < nbox; i++) {
787                         int x = pbox[i].x1;
788                         int y = pbox[i].y1;
789                         int w = pbox[i].x2 - x;
790                         int h = pbox[i].y2 - y;
791
792                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
793                                   x, y, w, h, flags);
794
795                         if (flags & RADEON_FRONT) {
796                                 BEGIN_RING(6);
797
798                                 OUT_RING(CP_PACKET3
799                                          (RADEON_CNTL_PAINT_MULTI, 4));
800                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
801                                          RADEON_GMC_BRUSH_SOLID_COLOR |
802                                          (dev_priv->
803                                           color_fmt << 8) |
804                                          RADEON_GMC_SRC_DATATYPE_COLOR |
805                                          RADEON_ROP3_P |
806                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
807
808                                 OUT_RING(dev_priv->front_pitch_offset);
809                                 OUT_RING(clear->clear_color);
810
811                                 OUT_RING((x << 16) | y);
812                                 OUT_RING((w << 16) | h);
813
814                                 ADVANCE_RING();
815                         }
816
817                         if (flags & RADEON_BACK) {
818                                 BEGIN_RING(6);
819
820                                 OUT_RING(CP_PACKET3
821                                          (RADEON_CNTL_PAINT_MULTI, 4));
822                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
823                                          RADEON_GMC_BRUSH_SOLID_COLOR |
824                                          (dev_priv->
825                                           color_fmt << 8) |
826                                          RADEON_GMC_SRC_DATATYPE_COLOR |
827                                          RADEON_ROP3_P |
828                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
829
830                                 OUT_RING(dev_priv->back_pitch_offset);
831                                 OUT_RING(clear->clear_color);
832
833                                 OUT_RING((x << 16) | y);
834                                 OUT_RING((w << 16) | h);
835
836                                 ADVANCE_RING();
837                         }
838                 }
839         }
840
841         /* hyper z clear */
842         /* no docs available, based on reverse engeneering by Stephane Marchesin */
843         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
844             && (flags & RADEON_CLEAR_FASTZ)) {
845
846                 int i;
847                 int depthpixperline =
848                     dev_priv->depth_fmt ==
849                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
850                                                        2) : (dev_priv->
851                                                              depth_pitch / 4);
852
853                 u32 clearmask;
854
855                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
856                     ((clear->depth_mask & 0xff) << 24);
857
858                 /* Make sure we restore the 3D state next time.
859                  * we haven't touched any "normal" state - still need this?
860                  */
861                 dev_priv->sarea_priv->ctx_owner = 0;
862
863                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
864                     && (flags & RADEON_USE_HIERZ)) {
865                         /* FIXME : reverse engineer that for Rx00 cards */
866                         /* FIXME : the mask supposedly contains low-res z values. So can't set
867                            just to the max (0xff? or actually 0x3fff?), need to take z clear
868                            value into account? */
869                         /* pattern seems to work for r100, though get slight
870                            rendering errors with glxgears. If hierz is not enabled for r100,
871                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
872                            other ones are ignored, and the same clear mask can be used. That's
873                            very different behaviour than R200 which needs different clear mask
874                            and different number of tiles to clear if hierz is enabled or not !?!
875                          */
876                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
877                 } else {
878                         /* clear mask : chooses the clearing pattern.
879                            rv250: could be used to clear only parts of macrotiles
880                            (but that would get really complicated...)?
881                            bit 0 and 1 (either or both of them ?!?!) are used to
882                            not clear tile (or maybe one of the bits indicates if the tile is
883                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
884                            Pattern is as follows:
885                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
886                            bits -------------------------------------------------
887                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
888                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
889                            covers 256 pixels ?!?
890                          */
891                         clearmask = 0x0;
892                 }
893
894                 BEGIN_RING(8);
895                 RADEON_WAIT_UNTIL_2D_IDLE();
896                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
897                              tempRB3D_DEPTHCLEARVALUE);
898                 /* what offset is this exactly ? */
899                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
900                 /* need ctlstat, otherwise get some strange black flickering */
901                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
902                              RADEON_RB3D_ZC_FLUSH_ALL);
903                 ADVANCE_RING();
904
905                 for (i = 0; i < nbox; i++) {
906                         int tileoffset, nrtilesx, nrtilesy, j;
907                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
908                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
909                             && !(dev_priv->microcode_version == UCODE_R200)) {
910                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
911                                    maybe r200 actually doesn't need to put the low-res z value into
912                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
913                                    Works for R100, both with hierz and without.
914                                    R100 seems to operate on 2x1 8x8 tiles, but...
915                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
916                                    problematic with resolutions which are not 64 pix aligned? */
917                                 tileoffset =
918                                     ((pbox[i].y1 >> 3) * depthpixperline +
919                                      pbox[i].x1) >> 6;
920                                 nrtilesx =
921                                     ((pbox[i].x2 & ~63) -
922                                      (pbox[i].x1 & ~63)) >> 4;
923                                 nrtilesy =
924                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
925                                 for (j = 0; j <= nrtilesy; j++) {
926                                         BEGIN_RING(4);
927                                         OUT_RING(CP_PACKET3
928                                                  (RADEON_3D_CLEAR_ZMASK, 2));
929                                         /* first tile */
930                                         OUT_RING(tileoffset * 8);
931                                         /* the number of tiles to clear */
932                                         OUT_RING(nrtilesx + 4);
933                                         /* clear mask : chooses the clearing pattern. */
934                                         OUT_RING(clearmask);
935                                         ADVANCE_RING();
936                                         tileoffset += depthpixperline >> 6;
937                                 }
938                         } else if (dev_priv->microcode_version == UCODE_R200) {
939                                 /* works for rv250. */
940                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
941                                 tileoffset =
942                                     ((pbox[i].y1 >> 3) * depthpixperline +
943                                      pbox[i].x1) >> 5;
944                                 nrtilesx =
945                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
946                                 nrtilesy =
947                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
948                                 for (j = 0; j <= nrtilesy; j++) {
949                                         BEGIN_RING(4);
950                                         OUT_RING(CP_PACKET3
951                                                  (RADEON_3D_CLEAR_ZMASK, 2));
952                                         /* first tile */
953                                         /* judging by the first tile offset needed, could possibly
954                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
955                                            macro tiles, though would still need clear mask for
956                                            right/bottom if truely 4x4 granularity is desired ? */
957                                         OUT_RING(tileoffset * 16);
958                                         /* the number of tiles to clear */
959                                         OUT_RING(nrtilesx + 1);
960                                         /* clear mask : chooses the clearing pattern. */
961                                         OUT_RING(clearmask);
962                                         ADVANCE_RING();
963                                         tileoffset += depthpixperline >> 5;
964                                 }
965                         } else {        /* rv 100 */
966                                 /* rv100 might not need 64 pix alignment, who knows */
967                                 /* offsets are, hmm, weird */
968                                 tileoffset =
969                                     ((pbox[i].y1 >> 4) * depthpixperline +
970                                      pbox[i].x1) >> 6;
971                                 nrtilesx =
972                                     ((pbox[i].x2 & ~63) -
973                                      (pbox[i].x1 & ~63)) >> 4;
974                                 nrtilesy =
975                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
976                                 for (j = 0; j <= nrtilesy; j++) {
977                                         BEGIN_RING(4);
978                                         OUT_RING(CP_PACKET3
979                                                  (RADEON_3D_CLEAR_ZMASK, 2));
980                                         OUT_RING(tileoffset * 128);
981                                         /* the number of tiles to clear */
982                                         OUT_RING(nrtilesx + 4);
983                                         /* clear mask : chooses the clearing pattern. */
984                                         OUT_RING(clearmask);
985                                         ADVANCE_RING();
986                                         tileoffset += depthpixperline >> 6;
987                                 }
988                         }
989                 }
990
991                 /* TODO don't always clear all hi-level z tiles */
992                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
993                     && (dev_priv->microcode_version == UCODE_R200)
994                     && (flags & RADEON_USE_HIERZ))
995                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
996                         /* FIXME : the mask supposedly contains low-res z values. So can't set
997                            just to the max (0xff? or actually 0x3fff?), need to take z clear
998                            value into account? */
999                 {
1000                         BEGIN_RING(4);
1001                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1002                         OUT_RING(0x0);  /* First tile */
1003                         OUT_RING(0x3cc0);
1004                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1005                         ADVANCE_RING();
1006                 }
1007         }
1008
1009         /* We have to clear the depth and/or stencil buffers by
1010          * rendering a quad into just those buffers.  Thus, we have to
1011          * make sure the 3D engine is configured correctly.
1012          */
1013         else if ((dev_priv->microcode_version == UCODE_R200) &&
1014                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1015
1016                 int tempPP_CNTL;
1017                 int tempRE_CNTL;
1018                 int tempRB3D_CNTL;
1019                 int tempRB3D_ZSTENCILCNTL;
1020                 int tempRB3D_STENCILREFMASK;
1021                 int tempRB3D_PLANEMASK;
1022                 int tempSE_CNTL;
1023                 int tempSE_VTE_CNTL;
1024                 int tempSE_VTX_FMT_0;
1025                 int tempSE_VTX_FMT_1;
1026                 int tempSE_VAP_CNTL;
1027                 int tempRE_AUX_SCISSOR_CNTL;
1028
1029                 tempPP_CNTL = 0;
1030                 tempRE_CNTL = 0;
1031
1032                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1033
1034                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1035                 tempRB3D_STENCILREFMASK = 0x0;
1036
1037                 tempSE_CNTL = depth_clear->se_cntl;
1038
1039                 /* Disable TCL */
1040
1041                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1042                                           (0x9 <<
1043                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1044
1045                 tempRB3D_PLANEMASK = 0x0;
1046
1047                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1048
1049                 tempSE_VTE_CNTL =
1050                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1051
1052                 /* Vertex format (X, Y, Z, W) */
1053                 tempSE_VTX_FMT_0 =
1054                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1055                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1056                 tempSE_VTX_FMT_1 = 0x0;
1057
1058                 /*
1059                  * Depth buffer specific enables
1060                  */
1061                 if (flags & RADEON_DEPTH) {
1062                         /* Enable depth buffer */
1063                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1064                 } else {
1065                         /* Disable depth buffer */
1066                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1067                 }
1068
1069                 /*
1070                  * Stencil buffer specific enables
1071                  */
1072                 if (flags & RADEON_STENCIL) {
1073                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1074                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1075                 } else {
1076                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1077                         tempRB3D_STENCILREFMASK = 0x00000000;
1078                 }
1079
1080                 if (flags & RADEON_USE_COMP_ZBUF) {
1081                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1082                             RADEON_Z_DECOMPRESSION_ENABLE;
1083                 }
1084                 if (flags & RADEON_USE_HIERZ) {
1085                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1086                 }
1087
1088                 BEGIN_RING(26);
1089                 RADEON_WAIT_UNTIL_2D_IDLE();
1090
1091                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1092                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1093                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1094                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1095                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1096                              tempRB3D_STENCILREFMASK);
1097                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1098                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1099                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1100                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1101                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1102                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1103                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1104                 ADVANCE_RING();
1105
1106                 /* Make sure we restore the 3D state next time.
1107                  */
1108                 dev_priv->sarea_priv->ctx_owner = 0;
1109
1110                 for (i = 0; i < nbox; i++) {
1111
1112                         /* Funny that this should be required --
1113                          *  sets top-left?
1114                          */
1115                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1116
1117                         BEGIN_RING(14);
1118                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1119                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1120                                   RADEON_PRIM_WALK_RING |
1121                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1122                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1123                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1124                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1125                         OUT_RING(0x3f800000);
1126                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1127                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1128                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1129                         OUT_RING(0x3f800000);
1130                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1131                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133                         OUT_RING(0x3f800000);
1134                         ADVANCE_RING();
1135                 }
1136         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1137
1138                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1139
1140                 rb3d_cntl = depth_clear->rb3d_cntl;
1141
1142                 if (flags & RADEON_DEPTH) {
1143                         rb3d_cntl |= RADEON_Z_ENABLE;
1144                 } else {
1145                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1146                 }
1147
1148                 if (flags & RADEON_STENCIL) {
1149                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1150                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1151                 } else {
1152                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1153                         rb3d_stencilrefmask = 0x00000000;
1154                 }
1155
1156                 if (flags & RADEON_USE_COMP_ZBUF) {
1157                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1158                             RADEON_Z_DECOMPRESSION_ENABLE;
1159                 }
1160                 if (flags & RADEON_USE_HIERZ) {
1161                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1162                 }
1163
1164                 BEGIN_RING(13);
1165                 RADEON_WAIT_UNTIL_2D_IDLE();
1166
1167                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1168                 OUT_RING(0x00000000);
1169                 OUT_RING(rb3d_cntl);
1170
1171                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1172                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1173                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1174                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1175                 ADVANCE_RING();
1176
1177                 /* Make sure we restore the 3D state next time.
1178                  */
1179                 dev_priv->sarea_priv->ctx_owner = 0;
1180
1181                 for (i = 0; i < nbox; i++) {
1182
1183                         /* Funny that this should be required --
1184                          *  sets top-left?
1185                          */
1186                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1187
1188                         BEGIN_RING(15);
1189
1190                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1191                         OUT_RING(RADEON_VTX_Z_PRESENT |
1192                                  RADEON_VTX_PKCOLOR_PRESENT);
1193                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1194                                   RADEON_PRIM_WALK_RING |
1195                                   RADEON_MAOS_ENABLE |
1196                                   RADEON_VTX_FMT_RADEON_MODE |
1197                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1198
1199                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1200                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1201                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1202                         OUT_RING(0x0);
1203
1204                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1205                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1206                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1207                         OUT_RING(0x0);
1208
1209                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1210                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1211                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1212                         OUT_RING(0x0);
1213
1214                         ADVANCE_RING();
1215                 }
1216         }
1217
1218         /* Increment the clear counter.  The client-side 3D driver must
1219          * wait on this value before performing the clear ioctl.  We
1220          * need this because the card's so damned fast...
1221          */
1222         dev_priv->sarea_priv->last_clear++;
1223
1224         BEGIN_RING(4);
1225
1226         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1227         RADEON_WAIT_UNTIL_IDLE();
1228
1229         ADVANCE_RING();
1230 }
1231
1232 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1233 {
1234         drm_radeon_private_t *dev_priv = dev->dev_private;
1235         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1236         int nbox = sarea_priv->nbox;
1237         drm_clip_rect_t *pbox = sarea_priv->boxes;
1238         int i;
1239         RING_LOCALS;
1240         DRM_DEBUG("\n");
1241
1242         /* Do some trivial performance monitoring...
1243          */
1244         if (dev_priv->do_boxes)
1245                 radeon_cp_performance_boxes(dev_priv);
1246
1247         /* Wait for the 3D stream to idle before dispatching the bitblt.
1248          * This will prevent data corruption between the two streams.
1249          */
1250         BEGIN_RING(2);
1251
1252         RADEON_WAIT_UNTIL_3D_IDLE();
1253
1254         ADVANCE_RING();
1255
1256         for (i = 0; i < nbox; i++) {
1257                 int x = pbox[i].x1;
1258                 int y = pbox[i].y1;
1259                 int w = pbox[i].x2 - x;
1260                 int h = pbox[i].y2 - y;
1261
1262                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1263
1264                 BEGIN_RING(7);
1265
1266                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1267                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1268                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1269                          RADEON_GMC_BRUSH_NONE |
1270                          (dev_priv->color_fmt << 8) |
1271                          RADEON_GMC_SRC_DATATYPE_COLOR |
1272                          RADEON_ROP3_S |
1273                          RADEON_DP_SRC_SOURCE_MEMORY |
1274                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1275
1276                 /* Make this work even if front & back are flipped:
1277                  */
1278                 if (dev_priv->current_page == 0) {
1279                         OUT_RING(dev_priv->back_pitch_offset);
1280                         OUT_RING(dev_priv->front_pitch_offset);
1281                 } else {
1282                         OUT_RING(dev_priv->front_pitch_offset);
1283                         OUT_RING(dev_priv->back_pitch_offset);
1284                 }
1285
1286                 OUT_RING((x << 16) | y);
1287                 OUT_RING((x << 16) | y);
1288                 OUT_RING((w << 16) | h);
1289
1290                 ADVANCE_RING();
1291         }
1292
1293         /* Increment the frame counter.  The client-side 3D driver must
1294          * throttle the framerate by waiting for this value before
1295          * performing the swapbuffer ioctl.
1296          */
1297         dev_priv->sarea_priv->last_frame++;
1298
1299         BEGIN_RING(4);
1300
1301         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1302         RADEON_WAIT_UNTIL_2D_IDLE();
1303
1304         ADVANCE_RING();
1305 }
1306
1307 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1308 {
1309         drm_radeon_private_t *dev_priv = dev->dev_private;
1310         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1311         int offset = (dev_priv->current_page == 1)
1312             ? dev_priv->front_offset : dev_priv->back_offset;
1313         RING_LOCALS;
1314         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1315                   __FUNCTION__,
1316                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1317
1318         /* Do some trivial performance monitoring...
1319          */
1320         if (dev_priv->do_boxes) {
1321                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1322                 radeon_cp_performance_boxes(dev_priv);
1323         }
1324
1325         /* Update the frame offsets for both CRTCs
1326          */
1327         BEGIN_RING(6);
1328
1329         RADEON_WAIT_UNTIL_3D_IDLE();
1330         OUT_RING_REG(RADEON_CRTC_OFFSET,
1331                      ((sarea->frame.y * dev_priv->front_pitch +
1332                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1333                      + offset);
1334         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1335                      + offset);
1336
1337         ADVANCE_RING();
1338
1339         /* Increment the frame counter.  The client-side 3D driver must
1340          * throttle the framerate by waiting for this value before
1341          * performing the swapbuffer ioctl.
1342          */
1343         dev_priv->sarea_priv->last_frame++;
1344         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1345             1 - dev_priv->current_page;
1346
1347         BEGIN_RING(2);
1348
1349         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1350
1351         ADVANCE_RING();
1352 }
1353
1354 static int bad_prim_vertex_nr(int primitive, int nr)
1355 {
1356         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1357         case RADEON_PRIM_TYPE_NONE:
1358         case RADEON_PRIM_TYPE_POINT:
1359                 return nr < 1;
1360         case RADEON_PRIM_TYPE_LINE:
1361                 return (nr & 1) || nr == 0;
1362         case RADEON_PRIM_TYPE_LINE_STRIP:
1363                 return nr < 2;
1364         case RADEON_PRIM_TYPE_TRI_LIST:
1365         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1366         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1367         case RADEON_PRIM_TYPE_RECT_LIST:
1368                 return nr % 3 || nr == 0;
1369         case RADEON_PRIM_TYPE_TRI_FAN:
1370         case RADEON_PRIM_TYPE_TRI_STRIP:
1371                 return nr < 3;
1372         default:
1373                 return 1;
1374         }
1375 }
1376
1377 typedef struct {
1378         unsigned int start;
1379         unsigned int finish;
1380         unsigned int prim;
1381         unsigned int numverts;
1382         unsigned int offset;
1383         unsigned int vc_format;
1384 } drm_radeon_tcl_prim_t;
1385
1386 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1387                                       drm_buf_t * buf,
1388                                       drm_radeon_tcl_prim_t * prim)
1389 {
1390         drm_radeon_private_t *dev_priv = dev->dev_private;
1391         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1392         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1393         int numverts = (int)prim->numverts;
1394         int nbox = sarea_priv->nbox;
1395         int i = 0;
1396         RING_LOCALS;
1397
1398         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1399                   prim->prim,
1400                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1401
1402         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1403                 DRM_ERROR("bad prim %x numverts %d\n",
1404                           prim->prim, prim->numverts);
1405                 return;
1406         }
1407
1408         do {
1409                 /* Emit the next cliprect */
1410                 if (i < nbox) {
1411                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1412                 }
1413
1414                 /* Emit the vertex buffer rendering commands */
1415                 BEGIN_RING(5);
1416
1417                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1418                 OUT_RING(offset);
1419                 OUT_RING(numverts);
1420                 OUT_RING(prim->vc_format);
1421                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1422                          RADEON_COLOR_ORDER_RGBA |
1423                          RADEON_VTX_FMT_RADEON_MODE |
1424                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1425
1426                 ADVANCE_RING();
1427
1428                 i++;
1429         } while (i < nbox);
1430 }
1431
1432 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1433 {
1434         drm_radeon_private_t *dev_priv = dev->dev_private;
1435         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1436         RING_LOCALS;
1437
1438         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1439
1440         /* Emit the vertex buffer age */
1441         BEGIN_RING(2);
1442         RADEON_DISPATCH_AGE(buf_priv->age);
1443         ADVANCE_RING();
1444
1445         buf->pending = 1;
1446         buf->used = 0;
1447 }
1448
1449 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1450                                         drm_buf_t * buf, int start, int end)
1451 {
1452         drm_radeon_private_t *dev_priv = dev->dev_private;
1453         RING_LOCALS;
1454         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1455
1456         if (start != end) {
1457                 int offset = (dev_priv->gart_buffers_offset
1458                               + buf->offset + start);
1459                 int dwords = (end - start + 3) / sizeof(u32);
1460
1461                 /* Indirect buffer data must be an even number of
1462                  * dwords, so if we've been given an odd number we must
1463                  * pad the data with a Type-2 CP packet.
1464                  */
1465                 if (dwords & 1) {
1466                         u32 *data = (u32 *)
1467                             ((char *)dev->agp_buffer_map->handle
1468                              + buf->offset + start);
1469                         data[dwords++] = RADEON_CP_PACKET2;
1470                 }
1471
1472                 /* Fire off the indirect buffer */
1473                 BEGIN_RING(3);
1474
1475                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1476                 OUT_RING(offset);
1477                 OUT_RING(dwords);
1478
1479                 ADVANCE_RING();
1480         }
1481 }
1482
1483 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1484                                        drm_buf_t * elt_buf,
1485                                        drm_radeon_tcl_prim_t * prim)
1486 {
1487         drm_radeon_private_t *dev_priv = dev->dev_private;
1488         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1489         int offset = dev_priv->gart_buffers_offset + prim->offset;
1490         u32 *data;
1491         int dwords;
1492         int i = 0;
1493         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1494         int count = (prim->finish - start) / sizeof(u16);
1495         int nbox = sarea_priv->nbox;
1496
1497         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1498                   prim->prim,
1499                   prim->vc_format,
1500                   prim->start, prim->finish, prim->offset, prim->numverts);
1501
1502         if (bad_prim_vertex_nr(prim->prim, count)) {
1503                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1504                 return;
1505         }
1506
1507         if (start >= prim->finish || (prim->start & 0x7)) {
1508                 DRM_ERROR("buffer prim %d\n", prim->prim);
1509                 return;
1510         }
1511
1512         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1513
1514         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1515                         elt_buf->offset + prim->start);
1516
1517         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1518         data[1] = offset;
1519         data[2] = prim->numverts;
1520         data[3] = prim->vc_format;
1521         data[4] = (prim->prim |
1522                    RADEON_PRIM_WALK_IND |
1523                    RADEON_COLOR_ORDER_RGBA |
1524                    RADEON_VTX_FMT_RADEON_MODE |
1525                    (count << RADEON_NUM_VERTICES_SHIFT));
1526
1527         do {
1528                 if (i < nbox)
1529                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1530
1531                 radeon_cp_dispatch_indirect(dev, elt_buf,
1532                                             prim->start, prim->finish);
1533
1534                 i++;
1535         } while (i < nbox);
1536
1537 }
1538
1539 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1540
1541 static int radeon_cp_dispatch_texture(DRMFILE filp,
1542                                       drm_device_t * dev,
1543                                       drm_radeon_texture_t * tex,
1544                                       drm_radeon_tex_image_t * image)
1545 {
1546         drm_radeon_private_t *dev_priv = dev->dev_private;
1547         drm_file_t *filp_priv;
1548         drm_buf_t *buf;
1549         u32 format;
1550         u32 *buffer;
1551         const u8 __user *data;
1552         int size, dwords, tex_width, blit_width, spitch;
1553         u32 height;
1554         int i;
1555         u32 texpitch, microtile;
1556         u32 offset;
1557         RING_LOCALS;
1558
1559         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1560
1561         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1562                 DRM_ERROR("Invalid destination offset\n");
1563                 return DRM_ERR(EINVAL);
1564         }
1565
1566         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1567
1568         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1569          * up with the texture data from the host data blit, otherwise
1570          * part of the texture image may be corrupted.
1571          */
1572         BEGIN_RING(4);
1573         RADEON_FLUSH_CACHE();
1574         RADEON_WAIT_UNTIL_IDLE();
1575         ADVANCE_RING();
1576
1577         /* The compiler won't optimize away a division by a variable,
1578          * even if the only legal values are powers of two.  Thus, we'll
1579          * use a shift instead.
1580          */
1581         switch (tex->format) {
1582         case RADEON_TXFORMAT_ARGB8888:
1583         case RADEON_TXFORMAT_RGBA8888:
1584                 format = RADEON_COLOR_FORMAT_ARGB8888;
1585                 tex_width = tex->width * 4;
1586                 blit_width = image->width * 4;
1587                 break;
1588         case RADEON_TXFORMAT_AI88:
1589         case RADEON_TXFORMAT_ARGB1555:
1590         case RADEON_TXFORMAT_RGB565:
1591         case RADEON_TXFORMAT_ARGB4444:
1592         case RADEON_TXFORMAT_VYUY422:
1593         case RADEON_TXFORMAT_YVYU422:
1594                 format = RADEON_COLOR_FORMAT_RGB565;
1595                 tex_width = tex->width * 2;
1596                 blit_width = image->width * 2;
1597                 break;
1598         case RADEON_TXFORMAT_I8:
1599         case RADEON_TXFORMAT_RGB332:
1600                 format = RADEON_COLOR_FORMAT_CI8;
1601                 tex_width = tex->width * 1;
1602                 blit_width = image->width * 1;
1603                 break;
1604         default:
1605                 DRM_ERROR("invalid texture format %d\n", tex->format);
1606                 return DRM_ERR(EINVAL);
1607         }
1608         spitch = blit_width >> 6;
1609         if (spitch == 0 && image->height > 1)
1610                 return DRM_ERR(EINVAL);
1611
1612         texpitch = tex->pitch;
1613         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1614                 microtile = 1;
1615                 if (tex_width < 64) {
1616                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1617                         /* we got tiled coordinates, untile them */
1618                         image->x *= 2;
1619                 }
1620         } else
1621                 microtile = 0;
1622
1623         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1624
1625         do {
1626                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1627                           tex->offset >> 10, tex->pitch, tex->format,
1628                           image->x, image->y, image->width, image->height);
1629
1630                 /* Make a copy of some parameters in case we have to
1631                  * update them for a multi-pass texture blit.
1632                  */
1633                 height = image->height;
1634                 data = (const u8 __user *)image->data;
1635
1636                 size = height * blit_width;
1637
1638                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1639                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1640                         size = height * blit_width;
1641                 } else if (size < 4 && size > 0) {
1642                         size = 4;
1643                 } else if (size == 0) {
1644                         return 0;
1645                 }
1646
1647                 buf = radeon_freelist_get(dev);
1648                 if (0 && !buf) {
1649                         radeon_do_cp_idle(dev_priv);
1650                         buf = radeon_freelist_get(dev);
1651                 }
1652                 if (!buf) {
1653                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1654                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1655                                 return DRM_ERR(EFAULT);
1656                         return DRM_ERR(EAGAIN);
1657                 }
1658
1659                 /* Dispatch the indirect buffer.
1660                  */
1661                 buffer =
1662                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1663                 dwords = size / 4;
1664
1665 #define RADEON_COPY_MT(_buf, _data, _width) \
1666         do { \
1667                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1668                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1669                         return DRM_ERR(EFAULT); \
1670                 } \
1671         } while(0)
1672
1673                 if (microtile) {
1674                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1675                            however, we cannot use blitter directly for texture width < 64 bytes,
1676                            since minimum tex pitch is 64 bytes and we need this to match
1677                            the texture width, otherwise the blitter will tile it wrong.
1678                            Thus, tiling manually in this case. Additionally, need to special
1679                            case tex height = 1, since our actual image will have height 2
1680                            and we need to ensure we don't read beyond the texture size
1681                            from user space. */
1682                         if (tex->height == 1) {
1683                                 if (tex_width >= 64 || tex_width <= 16) {
1684                                         RADEON_COPY_MT(buffer, data,
1685                                                 (int)(tex_width * sizeof(u32)));
1686                                 } else if (tex_width == 32) {
1687                                         RADEON_COPY_MT(buffer, data, 16);
1688                                         RADEON_COPY_MT(buffer + 8,
1689                                                        data + 16, 16);
1690                                 }
1691                         } else if (tex_width >= 64 || tex_width == 16) {
1692                                 RADEON_COPY_MT(buffer, data,
1693                                                (int)(dwords * sizeof(u32)));
1694                         } else if (tex_width < 16) {
1695                                 for (i = 0; i < tex->height; i++) {
1696                                         RADEON_COPY_MT(buffer, data, tex_width);
1697                                         buffer += 4;
1698                                         data += tex_width;
1699                                 }
1700                         } else if (tex_width == 32) {
1701                                 /* TODO: make sure this works when not fitting in one buffer
1702                                    (i.e. 32bytes x 2048...) */
1703                                 for (i = 0; i < tex->height; i += 2) {
1704                                         RADEON_COPY_MT(buffer, data, 16);
1705                                         data += 16;
1706                                         RADEON_COPY_MT(buffer + 8, data, 16);
1707                                         data += 16;
1708                                         RADEON_COPY_MT(buffer + 4, data, 16);
1709                                         data += 16;
1710                                         RADEON_COPY_MT(buffer + 12, data, 16);
1711                                         data += 16;
1712                                         buffer += 16;
1713                                 }
1714                         }
1715                 } else {
1716                         if (tex_width >= 32) {
1717                                 /* Texture image width is larger than the minimum, so we
1718                                  * can upload it directly.
1719                                  */
1720                                 RADEON_COPY_MT(buffer, data,
1721                                                (int)(dwords * sizeof(u32)));
1722                         } else {
1723                                 /* Texture image width is less than the minimum, so we
1724                                  * need to pad out each image scanline to the minimum
1725                                  * width.
1726                                  */
1727                                 for (i = 0; i < tex->height; i++) {
1728                                         RADEON_COPY_MT(buffer, data, tex_width);
1729                                         buffer += 8;
1730                                         data += tex_width;
1731                                 }
1732                         }
1733                 }
1734
1735 #undef RADEON_COPY_MT
1736                 buf->filp = filp;
1737                 buf->used = size;
1738                 offset = dev_priv->gart_buffers_offset + buf->offset;
1739                 BEGIN_RING(9);
1740                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1741                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1742                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1743                          RADEON_GMC_BRUSH_NONE |
1744                          (format << 8) |
1745                          RADEON_GMC_SRC_DATATYPE_COLOR |
1746                          RADEON_ROP3_S |
1747                          RADEON_DP_SRC_SOURCE_MEMORY |
1748                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1749                 OUT_RING((spitch << 22) | (offset >> 10));
1750                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1751                 OUT_RING(0);
1752                 OUT_RING((image->x << 16) | image->y);
1753                 OUT_RING((image->width << 16) | height);
1754                 RADEON_WAIT_UNTIL_2D_IDLE();
1755                 ADVANCE_RING();
1756
1757                 radeon_cp_discard_buffer(dev, buf);
1758
1759                 /* Update the input parameters for next time */
1760                 image->y += height;
1761                 image->height -= height;
1762                 image->data = (const u8 __user *)image->data + size;
1763         } while (image->height > 0);
1764
1765         /* Flush the pixel cache after the blit completes.  This ensures
1766          * the texture data is written out to memory before rendering
1767          * continues.
1768          */
1769         BEGIN_RING(4);
1770         RADEON_FLUSH_CACHE();
1771         RADEON_WAIT_UNTIL_2D_IDLE();
1772         ADVANCE_RING();
1773         return 0;
1774 }
1775
1776 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1777 {
1778         drm_radeon_private_t *dev_priv = dev->dev_private;
1779         int i;
1780         RING_LOCALS;
1781         DRM_DEBUG("\n");
1782
1783         BEGIN_RING(35);
1784
1785         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1786         OUT_RING(0x00000000);
1787
1788         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1789         for (i = 0; i < 32; i++) {
1790                 OUT_RING(stipple[i]);
1791         }
1792
1793         ADVANCE_RING();
1794 }
1795
1796 static void radeon_apply_surface_regs(int surf_index,
1797                                       drm_radeon_private_t *dev_priv)
1798 {
1799         if (!dev_priv->mmio)
1800                 return;
1801
1802         radeon_do_cp_idle(dev_priv);
1803
1804         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1805                      dev_priv->surfaces[surf_index].flags);
1806         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1807                      dev_priv->surfaces[surf_index].lower);
1808         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1809                      dev_priv->surfaces[surf_index].upper);
1810 }
1811
1812 /* Allocates a virtual surface
1813  * doesn't always allocate a real surface, will stretch an existing
1814  * surface when possible.
1815  *
1816  * Note that refcount can be at most 2, since during a free refcount=3
1817  * might mean we have to allocate a new surface which might not always
1818  * be available.
1819  * For example : we allocate three contigous surfaces ABC. If B is
1820  * freed, we suddenly need two surfaces to store A and C, which might
1821  * not always be available.
1822  */
1823 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1824                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1825 {
1826         struct radeon_virt_surface *s;
1827         int i;
1828         int virt_surface_index;
1829         uint32_t new_upper, new_lower;
1830
1831         new_lower = new->address;
1832         new_upper = new_lower + new->size - 1;
1833
1834         /* sanity check */
1835         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1836             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1837              RADEON_SURF_ADDRESS_FIXED_MASK)
1838             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1839                 return -1;
1840
1841         /* make sure there is no overlap with existing surfaces */
1842         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1843                 if ((dev_priv->surfaces[i].refcount != 0) &&
1844                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1845                       (new_lower < dev_priv->surfaces[i].upper)) ||
1846                      ((new_lower < dev_priv->surfaces[i].lower) &&
1847                       (new_upper > dev_priv->surfaces[i].lower)))) {
1848                         return -1;
1849                 }
1850         }
1851
1852         /* find a virtual surface */
1853         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1854                 if (dev_priv->virt_surfaces[i].filp == 0)
1855                         break;
1856         if (i == 2 * RADEON_MAX_SURFACES) {
1857                 return -1;
1858         }
1859         virt_surface_index = i;
1860
1861         /* try to reuse an existing surface */
1862         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1863                 /* extend before */
1864                 if ((dev_priv->surfaces[i].refcount == 1) &&
1865                     (new->flags == dev_priv->surfaces[i].flags) &&
1866                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1867                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1868                         s->surface_index = i;
1869                         s->lower = new_lower;
1870                         s->upper = new_upper;
1871                         s->flags = new->flags;
1872                         s->filp = filp;
1873                         dev_priv->surfaces[i].refcount++;
1874                         dev_priv->surfaces[i].lower = s->lower;
1875                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1876                         return virt_surface_index;
1877                 }
1878
1879                 /* extend after */
1880                 if ((dev_priv->surfaces[i].refcount == 1) &&
1881                     (new->flags == dev_priv->surfaces[i].flags) &&
1882                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1883                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1884                         s->surface_index = i;
1885                         s->lower = new_lower;
1886                         s->upper = new_upper;
1887                         s->flags = new->flags;
1888                         s->filp = filp;
1889                         dev_priv->surfaces[i].refcount++;
1890                         dev_priv->surfaces[i].upper = s->upper;
1891                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1892                         return virt_surface_index;
1893                 }
1894         }
1895
1896         /* okay, we need a new one */
1897         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1898                 if (dev_priv->surfaces[i].refcount == 0) {
1899                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1900                         s->surface_index = i;
1901                         s->lower = new_lower;
1902                         s->upper = new_upper;
1903                         s->flags = new->flags;
1904                         s->filp = filp;
1905                         dev_priv->surfaces[i].refcount = 1;
1906                         dev_priv->surfaces[i].lower = s->lower;
1907                         dev_priv->surfaces[i].upper = s->upper;
1908                         dev_priv->surfaces[i].flags = s->flags;
1909                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1910                         return virt_surface_index;
1911                 }
1912         }
1913
1914         /* we didn't find anything */
1915         return -1;
1916 }
1917
1918 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1919                         int lower)
1920 {
1921         struct radeon_virt_surface *s;
1922         int i;
1923         /* find the virtual surface */
1924         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1925                 s = &(dev_priv->virt_surfaces[i]);
1926                 if (s->filp) {
1927                         if ((lower == s->lower) && (filp == s->filp)) {
1928                                 if (dev_priv->surfaces[s->surface_index].
1929                                     lower == s->lower)
1930                                         dev_priv->surfaces[s->surface_index].
1931                                             lower = s->upper;
1932
1933                                 if (dev_priv->surfaces[s->surface_index].
1934                                     upper == s->upper)
1935                                         dev_priv->surfaces[s->surface_index].
1936                                             upper = s->lower;
1937
1938                                 dev_priv->surfaces[s->surface_index].refcount--;
1939                                 if (dev_priv->surfaces[s->surface_index].
1940                                     refcount == 0)
1941                                         dev_priv->surfaces[s->surface_index].
1942                                             flags = 0;
1943                                 s->filp = NULL;
1944                                 radeon_apply_surface_regs(s->surface_index,
1945                                                           dev_priv);
1946                                 return 0;
1947                         }
1948                 }
1949         }
1950         return 1;
1951 }
1952
1953 static void radeon_surfaces_release(DRMFILE filp,
1954                                     drm_radeon_private_t * dev_priv)
1955 {
1956         int i;
1957         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1958                 if (dev_priv->virt_surfaces[i].filp == filp)
1959                         free_surface(filp, dev_priv,
1960                                      dev_priv->virt_surfaces[i].lower);
1961         }
1962 }
1963
1964 /* ================================================================
1965  * IOCTL functions
1966  */
1967 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1968 {
1969         DRM_DEVICE;
1970         drm_radeon_private_t *dev_priv = dev->dev_private;
1971         drm_radeon_surface_alloc_t alloc;
1972
1973         if (!dev_priv) {
1974                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1975                 return DRM_ERR(EINVAL);
1976         }
1977
1978         DRM_COPY_FROM_USER_IOCTL(alloc,
1979                                  (drm_radeon_surface_alloc_t __user *) data,
1980                                  sizeof(alloc));
1981
1982         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1983                 return DRM_ERR(EINVAL);
1984         else
1985                 return 0;
1986 }
1987
1988 static int radeon_surface_free(DRM_IOCTL_ARGS)
1989 {
1990         DRM_DEVICE;
1991         drm_radeon_private_t *dev_priv = dev->dev_private;
1992         drm_radeon_surface_free_t memfree;
1993
1994         if (!dev_priv) {
1995                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1996                 return DRM_ERR(EINVAL);
1997         }
1998
1999         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
2000                                  sizeof(memfree));
2001
2002         if (free_surface(filp, dev_priv, memfree.address))
2003                 return DRM_ERR(EINVAL);
2004         else
2005                 return 0;
2006 }
2007
2008 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2009 {
2010         DRM_DEVICE;
2011         drm_radeon_private_t *dev_priv = dev->dev_private;
2012         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2013         drm_radeon_clear_t clear;
2014         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2015         DRM_DEBUG("\n");
2016
2017         LOCK_TEST_WITH_RETURN(dev, filp);
2018
2019         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2020                                  sizeof(clear));
2021
2022         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2023
2024         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2025                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2026
2027         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2028                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2029                 return DRM_ERR(EFAULT);
2030
2031         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2032
2033         COMMIT_RING();
2034         return 0;
2035 }
2036
2037 /* Not sure why this isn't set all the time:
2038  */
2039 static int radeon_do_init_pageflip(drm_device_t * dev)
2040 {
2041         drm_radeon_private_t *dev_priv = dev->dev_private;
2042         RING_LOCALS;
2043
2044         DRM_DEBUG("\n");
2045
2046         BEGIN_RING(6);
2047         RADEON_WAIT_UNTIL_3D_IDLE();
2048         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2049         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2050                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2051         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2052         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2053                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2054         ADVANCE_RING();
2055
2056         dev_priv->page_flipping = 1;
2057         dev_priv->current_page = 0;
2058         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2059
2060         return 0;
2061 }
2062
2063 /* Called whenever a client dies, from drm_release.
2064  * NOTE:  Lock isn't necessarily held when this is called!
2065  */
2066 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2067 {
2068         drm_radeon_private_t *dev_priv = dev->dev_private;
2069         DRM_DEBUG("\n");
2070
2071         if (dev_priv->current_page != 0)
2072                 radeon_cp_dispatch_flip(dev);
2073
2074         dev_priv->page_flipping = 0;
2075         return 0;
2076 }
2077
2078 /* Swapping and flipping are different operations, need different ioctls.
2079  * They can & should be intermixed to support multiple 3d windows.
2080  */
2081 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2082 {
2083         DRM_DEVICE;
2084         drm_radeon_private_t *dev_priv = dev->dev_private;
2085         DRM_DEBUG("\n");
2086
2087         LOCK_TEST_WITH_RETURN(dev, filp);
2088
2089         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2090
2091         if (!dev_priv->page_flipping)
2092                 radeon_do_init_pageflip(dev);
2093
2094         radeon_cp_dispatch_flip(dev);
2095
2096         COMMIT_RING();
2097         return 0;
2098 }
2099
2100 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2101 {
2102         DRM_DEVICE;
2103         drm_radeon_private_t *dev_priv = dev->dev_private;
2104         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105         DRM_DEBUG("\n");
2106
2107         LOCK_TEST_WITH_RETURN(dev, filp);
2108
2109         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2110
2111         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2112                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2113
2114         radeon_cp_dispatch_swap(dev);
2115         dev_priv->sarea_priv->ctx_owner = 0;
2116
2117         COMMIT_RING();
2118         return 0;
2119 }
2120
2121 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2122 {
2123         DRM_DEVICE;
2124         drm_radeon_private_t *dev_priv = dev->dev_private;
2125         drm_file_t *filp_priv;
2126         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2127         drm_device_dma_t *dma = dev->dma;
2128         drm_buf_t *buf;
2129         drm_radeon_vertex_t vertex;
2130         drm_radeon_tcl_prim_t prim;
2131
2132         LOCK_TEST_WITH_RETURN(dev, filp);
2133
2134         if (!dev_priv) {
2135                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2136                 return DRM_ERR(EINVAL);
2137         }
2138
2139         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2140
2141         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2142                                  sizeof(vertex));
2143
2144         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2145                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2146
2147         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2148                 DRM_ERROR("buffer index %d (of %d max)\n",
2149                           vertex.idx, dma->buf_count - 1);
2150                 return DRM_ERR(EINVAL);
2151         }
2152         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2153                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2154                 return DRM_ERR(EINVAL);
2155         }
2156
2157         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2158         VB_AGE_TEST_WITH_RETURN(dev_priv);
2159
2160         buf = dma->buflist[vertex.idx];
2161
2162         if (buf->filp != filp) {
2163                 DRM_ERROR("process %d using buffer owned by %p\n",
2164                           DRM_CURRENTPID, buf->filp);
2165                 return DRM_ERR(EINVAL);
2166         }
2167         if (buf->pending) {
2168                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2169                 return DRM_ERR(EINVAL);
2170         }
2171
2172         /* Build up a prim_t record:
2173          */
2174         if (vertex.count) {
2175                 buf->used = vertex.count;       /* not used? */
2176
2177                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2178                         if (radeon_emit_state(dev_priv, filp_priv,
2179                                               &sarea_priv->context_state,
2180                                               sarea_priv->tex_state,
2181                                               sarea_priv->dirty)) {
2182                                 DRM_ERROR("radeon_emit_state failed\n");
2183                                 return DRM_ERR(EINVAL);
2184                         }
2185
2186                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2187                                                RADEON_UPLOAD_TEX1IMAGES |
2188                                                RADEON_UPLOAD_TEX2IMAGES |
2189                                                RADEON_REQUIRE_QUIESCENCE);
2190                 }
2191
2192                 prim.start = 0;
2193                 prim.finish = vertex.count;     /* unused */
2194                 prim.prim = vertex.prim;
2195                 prim.numverts = vertex.count;
2196                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2197
2198                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2199         }
2200
2201         if (vertex.discard) {
2202                 radeon_cp_discard_buffer(dev, buf);
2203         }
2204
2205         COMMIT_RING();
2206         return 0;
2207 }
2208
2209 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2210 {
2211         DRM_DEVICE;
2212         drm_radeon_private_t *dev_priv = dev->dev_private;
2213         drm_file_t *filp_priv;
2214         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2215         drm_device_dma_t *dma = dev->dma;
2216         drm_buf_t *buf;
2217         drm_radeon_indices_t elts;
2218         drm_radeon_tcl_prim_t prim;
2219         int count;
2220
2221         LOCK_TEST_WITH_RETURN(dev, filp);
2222
2223         if (!dev_priv) {
2224                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2225                 return DRM_ERR(EINVAL);
2226         }
2227
2228         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2229
2230         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2231                                  sizeof(elts));
2232
2233         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2234                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2235
2236         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2237                 DRM_ERROR("buffer index %d (of %d max)\n",
2238                           elts.idx, dma->buf_count - 1);
2239                 return DRM_ERR(EINVAL);
2240         }
2241         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2242                 DRM_ERROR("buffer prim %d\n", elts.prim);
2243                 return DRM_ERR(EINVAL);
2244         }
2245
2246         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2247         VB_AGE_TEST_WITH_RETURN(dev_priv);
2248
2249         buf = dma->buflist[elts.idx];
2250
2251         if (buf->filp != filp) {
2252                 DRM_ERROR("process %d using buffer owned by %p\n",
2253                           DRM_CURRENTPID, buf->filp);
2254                 return DRM_ERR(EINVAL);
2255         }
2256         if (buf->pending) {
2257                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2258                 return DRM_ERR(EINVAL);
2259         }
2260
2261         count = (elts.end - elts.start) / sizeof(u16);
2262         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2263
2264         if (elts.start & 0x7) {
2265                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2266                 return DRM_ERR(EINVAL);
2267         }
2268         if (elts.start < buf->used) {
2269                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2270                 return DRM_ERR(EINVAL);
2271         }
2272
2273         buf->used = elts.end;
2274
2275         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2276                 if (radeon_emit_state(dev_priv, filp_priv,
2277                                       &sarea_priv->context_state,
2278                                       sarea_priv->tex_state,
2279                                       sarea_priv->dirty)) {
2280                         DRM_ERROR("radeon_emit_state failed\n");
2281                         return DRM_ERR(EINVAL);
2282                 }
2283
2284                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2285                                        RADEON_UPLOAD_TEX1IMAGES |
2286                                        RADEON_UPLOAD_TEX2IMAGES |
2287                                        RADEON_REQUIRE_QUIESCENCE);
2288         }
2289
2290         /* Build up a prim_t record:
2291          */
2292         prim.start = elts.start;
2293         prim.finish = elts.end;
2294         prim.prim = elts.prim;
2295         prim.offset = 0;        /* offset from start of dma buffers */
2296         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2297         prim.vc_format = dev_priv->sarea_priv->vc_format;
2298
2299         radeon_cp_dispatch_indices(dev, buf, &prim);
2300         if (elts.discard) {
2301                 radeon_cp_discard_buffer(dev, buf);
2302         }
2303
2304         COMMIT_RING();
2305         return 0;
2306 }
2307
2308 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2309 {
2310         DRM_DEVICE;
2311         drm_radeon_private_t *dev_priv = dev->dev_private;
2312         drm_radeon_texture_t tex;
2313         drm_radeon_tex_image_t image;
2314         int ret;
2315
2316         LOCK_TEST_WITH_RETURN(dev, filp);
2317
2318         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2319                                  sizeof(tex));
2320
2321         if (tex.image == NULL) {
2322                 DRM_ERROR("null texture image!\n");
2323                 return DRM_ERR(EINVAL);
2324         }
2325
2326         if (DRM_COPY_FROM_USER(&image,
2327                                (drm_radeon_tex_image_t __user *) tex.image,
2328                                sizeof(image)))
2329                 return DRM_ERR(EFAULT);
2330
2331         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2332         VB_AGE_TEST_WITH_RETURN(dev_priv);
2333
2334         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2335
2336         COMMIT_RING();
2337         return ret;
2338 }
2339
2340 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2341 {
2342         DRM_DEVICE;
2343         drm_radeon_private_t *dev_priv = dev->dev_private;
2344         drm_radeon_stipple_t stipple;
2345         u32 mask[32];
2346
2347         LOCK_TEST_WITH_RETURN(dev, filp);
2348
2349         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2350                                  sizeof(stipple));
2351
2352         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2353                 return DRM_ERR(EFAULT);
2354
2355         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2356
2357         radeon_cp_dispatch_stipple(dev, mask);
2358
2359         COMMIT_RING();
2360         return 0;
2361 }
2362
2363 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2364 {
2365         DRM_DEVICE;
2366         drm_radeon_private_t *dev_priv = dev->dev_private;
2367         drm_device_dma_t *dma = dev->dma;
2368         drm_buf_t *buf;
2369         drm_radeon_indirect_t indirect;
2370         RING_LOCALS;
2371
2372         LOCK_TEST_WITH_RETURN(dev, filp);
2373
2374         if (!dev_priv) {
2375                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2376                 return DRM_ERR(EINVAL);
2377         }
2378
2379         DRM_COPY_FROM_USER_IOCTL(indirect,
2380                                  (drm_radeon_indirect_t __user *) data,
2381                                  sizeof(indirect));
2382
2383         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2384                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2385
2386         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2387                 DRM_ERROR("buffer index %d (of %d max)\n",
2388                           indirect.idx, dma->buf_count - 1);
2389                 return DRM_ERR(EINVAL);
2390         }
2391
2392         buf = dma->buflist[indirect.idx];
2393
2394         if (buf->filp != filp) {
2395                 DRM_ERROR("process %d using buffer owned by %p\n",
2396                           DRM_CURRENTPID, buf->filp);
2397                 return DRM_ERR(EINVAL);
2398         }
2399         if (buf->pending) {
2400                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2401                 return DRM_ERR(EINVAL);
2402         }
2403
2404         if (indirect.start < buf->used) {
2405                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2406                           indirect.start, buf->used);
2407                 return DRM_ERR(EINVAL);
2408         }
2409
2410         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2411         VB_AGE_TEST_WITH_RETURN(dev_priv);
2412
2413         buf->used = indirect.end;
2414
2415         /* Wait for the 3D stream to idle before the indirect buffer
2416          * containing 2D acceleration commands is processed.
2417          */
2418         BEGIN_RING(2);
2419
2420         RADEON_WAIT_UNTIL_3D_IDLE();
2421
2422         ADVANCE_RING();
2423
2424         /* Dispatch the indirect buffer full of commands from the
2425          * X server.  This is insecure and is thus only available to
2426          * privileged clients.
2427          */
2428         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2429         if (indirect.discard) {
2430                 radeon_cp_discard_buffer(dev, buf);
2431         }
2432
2433         COMMIT_RING();
2434         return 0;
2435 }
2436
2437 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2438 {
2439         DRM_DEVICE;
2440         drm_radeon_private_t *dev_priv = dev->dev_private;
2441         drm_file_t *filp_priv;
2442         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2443         drm_device_dma_t *dma = dev->dma;
2444         drm_buf_t *buf;
2445         drm_radeon_vertex2_t vertex;
2446         int i;
2447         unsigned char laststate;
2448
2449         LOCK_TEST_WITH_RETURN(dev, filp);
2450
2451         if (!dev_priv) {
2452                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2453                 return DRM_ERR(EINVAL);
2454         }
2455
2456         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2457
2458         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2459                                  sizeof(vertex));
2460
2461         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2462                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2463
2464         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2465                 DRM_ERROR("buffer index %d (of %d max)\n",
2466                           vertex.idx, dma->buf_count - 1);
2467                 return DRM_ERR(EINVAL);
2468         }
2469
2470         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2471         VB_AGE_TEST_WITH_RETURN(dev_priv);
2472
2473         buf = dma->buflist[vertex.idx];
2474
2475         if (buf->filp != filp) {
2476                 DRM_ERROR("process %d using buffer owned by %p\n",
2477                           DRM_CURRENTPID, buf->filp);
2478                 return DRM_ERR(EINVAL);
2479         }
2480
2481         if (buf->pending) {
2482                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2483                 return DRM_ERR(EINVAL);
2484         }
2485
2486         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2487                 return DRM_ERR(EINVAL);
2488
2489         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2490                 drm_radeon_prim_t prim;
2491                 drm_radeon_tcl_prim_t tclprim;
2492
2493                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2494                         return DRM_ERR(EFAULT);
2495
2496                 if (prim.stateidx != laststate) {
2497                         drm_radeon_state_t state;
2498
2499                         if (DRM_COPY_FROM_USER(&state,
2500                                                &vertex.state[prim.stateidx],
2501                                                sizeof(state)))
2502                                 return DRM_ERR(EFAULT);
2503
2504                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2505                                 DRM_ERROR("radeon_emit_state2 failed\n");
2506                                 return DRM_ERR(EINVAL);
2507                         }
2508
2509                         laststate = prim.stateidx;
2510                 }
2511
2512                 tclprim.start = prim.start;
2513                 tclprim.finish = prim.finish;
2514                 tclprim.prim = prim.prim;
2515                 tclprim.vc_format = prim.vc_format;
2516
2517                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2518                         tclprim.offset = prim.numverts * 64;
2519                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2520
2521                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2522                 } else {
2523                         tclprim.numverts = prim.numverts;
2524                         tclprim.offset = 0;     /* not used */
2525
2526                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2527                 }
2528
2529                 if (sarea_priv->nbox == 1)
2530                         sarea_priv->nbox = 0;
2531         }
2532
2533         if (vertex.discard) {
2534                 radeon_cp_discard_buffer(dev, buf);
2535         }
2536
2537         COMMIT_RING();
2538         return 0;
2539 }
2540
2541 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2542                                drm_file_t * filp_priv,
2543                                drm_radeon_cmd_header_t header,
2544                                drm_radeon_kcmd_buffer_t *cmdbuf)
2545 {
2546         int id = (int)header.packet.packet_id;
2547         int sz, reg;
2548         int *data = (int *)cmdbuf->buf;
2549         RING_LOCALS;
2550
2551         if (id >= RADEON_MAX_STATE_PACKETS)
2552                 return DRM_ERR(EINVAL);
2553
2554         sz = packet[id].len;
2555         reg = packet[id].start;
2556
2557         if (sz * sizeof(int) > cmdbuf->bufsz) {
2558                 DRM_ERROR("Packet size provided larger than data provided\n");
2559                 return DRM_ERR(EINVAL);
2560         }
2561
2562         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2563                 DRM_ERROR("Packet verification failed\n");
2564                 return DRM_ERR(EINVAL);
2565         }
2566
2567         BEGIN_RING(sz + 1);
2568         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2569         OUT_RING_TABLE(data, sz);
2570         ADVANCE_RING();
2571
2572         cmdbuf->buf += sz * sizeof(int);
2573         cmdbuf->bufsz -= sz * sizeof(int);
2574         return 0;
2575 }
2576
2577 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2578                                           drm_radeon_cmd_header_t header,
2579                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2580 {
2581         int sz = header.scalars.count;
2582         int start = header.scalars.offset;
2583         int stride = header.scalars.stride;
2584         RING_LOCALS;
2585
2586         BEGIN_RING(3 + sz);
2587         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2588         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2589         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2590         OUT_RING_TABLE(cmdbuf->buf, sz);
2591         ADVANCE_RING();
2592         cmdbuf->buf += sz * sizeof(int);
2593         cmdbuf->bufsz -= sz * sizeof(int);
2594         return 0;
2595 }
2596
2597 /* God this is ugly
2598  */
2599 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2600                                            drm_radeon_cmd_header_t header,
2601                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2602 {
2603         int sz = header.scalars.count;
2604         int start = ((unsigned int)header.scalars.offset) + 0x100;
2605         int stride = header.scalars.stride;
2606         RING_LOCALS;
2607
2608         BEGIN_RING(3 + sz);
2609         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2610         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2611         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2612         OUT_RING_TABLE(cmdbuf->buf, sz);
2613         ADVANCE_RING();
2614         cmdbuf->buf += sz * sizeof(int);
2615         cmdbuf->bufsz -= sz * sizeof(int);
2616         return 0;
2617 }
2618
2619 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2620                                           drm_radeon_cmd_header_t header,
2621                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2622 {
2623         int sz = header.vectors.count;
2624         int start = header.vectors.offset;
2625         int stride = header.vectors.stride;
2626         RING_LOCALS;
2627
2628         BEGIN_RING(3 + sz);
2629         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2630         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2631         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2632         OUT_RING_TABLE(cmdbuf->buf, sz);
2633         ADVANCE_RING();
2634
2635         cmdbuf->buf += sz * sizeof(int);
2636         cmdbuf->bufsz -= sz * sizeof(int);
2637         return 0;
2638 }
2639
2640 static int radeon_emit_packet3(drm_device_t * dev,
2641                                drm_file_t * filp_priv,
2642                                drm_radeon_kcmd_buffer_t *cmdbuf)
2643 {
2644         drm_radeon_private_t *dev_priv = dev->dev_private;
2645         unsigned int cmdsz;
2646         int ret;
2647         RING_LOCALS;
2648
2649         DRM_DEBUG("\n");
2650
2651         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2652                                                   cmdbuf, &cmdsz))) {
2653                 DRM_ERROR("Packet verification failed\n");
2654                 return ret;
2655         }
2656
2657         BEGIN_RING(cmdsz);
2658         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2659         ADVANCE_RING();
2660
2661         cmdbuf->buf += cmdsz * 4;
2662         cmdbuf->bufsz -= cmdsz * 4;
2663         return 0;
2664 }
2665
2666 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2667                                         drm_file_t *filp_priv,
2668                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2669                                         int orig_nbox)
2670 {
2671         drm_radeon_private_t *dev_priv = dev->dev_private;
2672         drm_clip_rect_t box;
2673         unsigned int cmdsz;
2674         int ret;
2675         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2676         int i = 0;
2677         RING_LOCALS;
2678
2679         DRM_DEBUG("\n");
2680
2681         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2682                                                   cmdbuf, &cmdsz))) {
2683                 DRM_ERROR("Packet verification failed\n");
2684                 return ret;
2685         }
2686
2687         if (!orig_nbox)
2688                 goto out;
2689
2690         do {
2691                 if (i < cmdbuf->nbox) {
2692                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2693                                 return DRM_ERR(EFAULT);
2694                         /* FIXME The second and subsequent times round
2695                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2696                          * calling emit_clip_rect(). This fixes a
2697                          * lockup on fast machines when sending
2698                          * several cliprects with a cmdbuf, as when
2699                          * waving a 2D window over a 3D
2700                          * window. Something in the commands from user
2701                          * space seems to hang the card when they're
2702                          * sent several times in a row. That would be
2703                          * the correct place to fix it but this works
2704                          * around it until I can figure that out - Tim
2705                          * Smith */
2706                         if (i) {
2707                                 BEGIN_RING(2);
2708                                 RADEON_WAIT_UNTIL_3D_IDLE();
2709                                 ADVANCE_RING();
2710                         }
2711                         radeon_emit_clip_rect(dev_priv, &box);
2712                 }
2713
2714                 BEGIN_RING(cmdsz);
2715                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2716                 ADVANCE_RING();
2717
2718         } while (++i < cmdbuf->nbox);
2719         if (cmdbuf->nbox == 1)
2720                 cmdbuf->nbox = 0;
2721
2722       out:
2723         cmdbuf->buf += cmdsz * 4;
2724         cmdbuf->bufsz -= cmdsz * 4;
2725         return 0;
2726 }
2727
2728 static int radeon_emit_wait(drm_device_t * dev, int flags)
2729 {
2730         drm_radeon_private_t *dev_priv = dev->dev_private;
2731         RING_LOCALS;
2732
2733         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2734         switch (flags) {
2735         case RADEON_WAIT_2D:
2736                 BEGIN_RING(2);
2737                 RADEON_WAIT_UNTIL_2D_IDLE();
2738                 ADVANCE_RING();
2739                 break;
2740         case RADEON_WAIT_3D:
2741                 BEGIN_RING(2);
2742                 RADEON_WAIT_UNTIL_3D_IDLE();
2743                 ADVANCE_RING();
2744                 break;
2745         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2746                 BEGIN_RING(2);
2747                 RADEON_WAIT_UNTIL_IDLE();
2748                 ADVANCE_RING();
2749                 break;
2750         default:
2751                 return DRM_ERR(EINVAL);
2752         }
2753
2754         return 0;
2755 }
2756
2757 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2758 {
2759         DRM_DEVICE;
2760         drm_radeon_private_t *dev_priv = dev->dev_private;
2761         drm_file_t *filp_priv;
2762         drm_device_dma_t *dma = dev->dma;
2763         drm_buf_t *buf = NULL;
2764         int idx;
2765         drm_radeon_kcmd_buffer_t cmdbuf;
2766         drm_radeon_cmd_header_t header;
2767         int orig_nbox, orig_bufsz;
2768         char *kbuf = NULL;
2769
2770         LOCK_TEST_WITH_RETURN(dev, filp);
2771
2772         if (!dev_priv) {
2773                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2774                 return DRM_ERR(EINVAL);
2775         }
2776
2777         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2778
2779         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2780                                  (drm_radeon_cmd_buffer_t __user *) data,
2781                                  sizeof(cmdbuf));
2782
2783         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2784         VB_AGE_TEST_WITH_RETURN(dev_priv);
2785
2786         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2787                 return DRM_ERR(EINVAL);
2788         }
2789
2790         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2791          * races between checking values and using those values in other code,
2792          * and simply to avoid a lot of function calls to copy in data.
2793          */
2794         orig_bufsz = cmdbuf.bufsz;
2795         if (orig_bufsz != 0) {
2796                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2797                 if (kbuf == NULL)
2798                         return DRM_ERR(ENOMEM);
2799                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2800                                        cmdbuf.bufsz)) {
2801                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2802                         return DRM_ERR(EFAULT);
2803                 }
2804                 cmdbuf.buf = kbuf;
2805         }
2806
2807         orig_nbox = cmdbuf.nbox;
2808
2809         if (dev_priv->microcode_version == UCODE_R300) {
2810                 int temp;
2811                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2812
2813                 if (orig_bufsz != 0)
2814                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2815
2816                 return temp;
2817         }
2818
2819         /* microcode_version != r300 */
2820         while (cmdbuf.bufsz >= sizeof(header)) {
2821
2822                 header.i = *(int *)cmdbuf.buf;
2823                 cmdbuf.buf += sizeof(header);
2824                 cmdbuf.bufsz -= sizeof(header);
2825
2826                 switch (header.header.cmd_type) {
2827                 case RADEON_CMD_PACKET:
2828                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2829                         if (radeon_emit_packets
2830                             (dev_priv, filp_priv, header, &cmdbuf)) {
2831                                 DRM_ERROR("radeon_emit_packets failed\n");
2832                                 goto err;
2833                         }
2834                         break;
2835
2836                 case RADEON_CMD_SCALARS:
2837                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2838                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2839                                 DRM_ERROR("radeon_emit_scalars failed\n");
2840                                 goto err;
2841                         }
2842                         break;
2843
2844                 case RADEON_CMD_VECTORS:
2845                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2846                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2847                                 DRM_ERROR("radeon_emit_vectors failed\n");
2848                                 goto err;
2849                         }
2850                         break;
2851
2852                 case RADEON_CMD_DMA_DISCARD:
2853                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2854                         idx = header.dma.buf_idx;
2855                         if (idx < 0 || idx >= dma->buf_count) {
2856                                 DRM_ERROR("buffer index %d (of %d max)\n",
2857                                           idx, dma->buf_count - 1);
2858                                 goto err;
2859                         }
2860
2861                         buf = dma->buflist[idx];
2862                         if (buf->filp != filp || buf->pending) {
2863                                 DRM_ERROR("bad buffer %p %p %d\n",
2864                                           buf->filp, filp, buf->pending);
2865                                 goto err;
2866                         }
2867
2868                         radeon_cp_discard_buffer(dev, buf);
2869                         break;
2870
2871                 case RADEON_CMD_PACKET3:
2872                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2873                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2874                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2875                                 goto err;
2876                         }
2877                         break;
2878
2879                 case RADEON_CMD_PACKET3_CLIP:
2880                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2881                         if (radeon_emit_packet3_cliprect
2882                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2883                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2884                                 goto err;
2885                         }
2886                         break;
2887
2888                 case RADEON_CMD_SCALARS2:
2889                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2890                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2891                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2892                                 goto err;
2893                         }
2894                         break;
2895
2896                 case RADEON_CMD_WAIT:
2897                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2898                         if (radeon_emit_wait(dev, header.wait.flags)) {
2899                                 DRM_ERROR("radeon_emit_wait failed\n");
2900                                 goto err;
2901                         }
2902                         break;
2903                 default:
2904                         DRM_ERROR("bad cmd_type %d at %p\n",
2905                                   header.header.cmd_type,
2906                                   cmdbuf.buf - sizeof(header));
2907                         goto err;
2908                 }
2909         }
2910
2911         if (orig_bufsz != 0)
2912                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2913
2914         DRM_DEBUG("DONE\n");
2915         COMMIT_RING();
2916         return 0;
2917
2918       err:
2919         if (orig_bufsz != 0)
2920                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2921         return DRM_ERR(EINVAL);
2922 }
2923
2924 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2925 {
2926         DRM_DEVICE;
2927         drm_radeon_private_t *dev_priv = dev->dev_private;
2928         drm_radeon_getparam_t param;
2929         int value;
2930
2931         if (!dev_priv) {
2932                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2933                 return DRM_ERR(EINVAL);
2934         }
2935
2936         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2937                                  sizeof(param));
2938
2939         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2940
2941         switch (param.param) {
2942         case RADEON_PARAM_GART_BUFFER_OFFSET:
2943                 value = dev_priv->gart_buffers_offset;
2944                 break;
2945         case RADEON_PARAM_LAST_FRAME:
2946                 dev_priv->stats.last_frame_reads++;
2947                 value = GET_SCRATCH(0);
2948                 break;
2949         case RADEON_PARAM_LAST_DISPATCH:
2950                 value = GET_SCRATCH(1);
2951                 break;
2952         case RADEON_PARAM_LAST_CLEAR:
2953                 dev_priv->stats.last_clear_reads++;
2954                 value = GET_SCRATCH(2);
2955                 break;
2956         case RADEON_PARAM_IRQ_NR:
2957                 value = dev->irq;
2958                 break;
2959         case RADEON_PARAM_GART_BASE:
2960                 value = dev_priv->gart_vm_start;
2961                 break;
2962         case RADEON_PARAM_REGISTER_HANDLE:
2963                 value = dev_priv->mmio->offset;
2964                 break;
2965         case RADEON_PARAM_STATUS_HANDLE:
2966                 value = dev_priv->ring_rptr_offset;
2967                 break;
2968 #if BITS_PER_LONG == 32
2969                 /*
2970                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2971                  * pointer which can't fit into an int-sized variable.  According to
2972                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2973                  * not supporting it shouldn't be a problem.  If the same functionality
2974                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2975                  * so backwards-compatibility for the embedded platforms can be
2976                  * maintained.  --davidm 4-Feb-2004.
2977                  */
2978         case RADEON_PARAM_SAREA_HANDLE:
2979                 /* The lock is the first dword in the sarea. */
2980                 value = (long)dev->lock.hw_lock;
2981                 break;
2982 #endif
2983         case RADEON_PARAM_GART_TEX_HANDLE:
2984                 value = dev_priv->gart_textures_offset;
2985                 break;
2986         
2987         case RADEON_PARAM_CARD_TYPE:
2988                 if (dev_priv->flags & CHIP_IS_PCIE)
2989                         value = RADEON_CARD_PCIE;
2990                 else if (dev_priv->flags & CHIP_IS_AGP)
2991                         value = RADEON_CARD_AGP;
2992                 else
2993                         value = RADEON_CARD_PCI;
2994                 break;
2995         default:
2996                 return DRM_ERR(EINVAL);
2997         }
2998
2999         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3000                 DRM_ERROR("copy_to_user\n");
3001                 return DRM_ERR(EFAULT);
3002         }
3003
3004         return 0;
3005 }
3006
3007 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3008 {
3009         DRM_DEVICE;
3010         drm_radeon_private_t *dev_priv = dev->dev_private;
3011         drm_file_t *filp_priv;
3012         drm_radeon_setparam_t sp;
3013         struct drm_radeon_driver_file_fields *radeon_priv;
3014
3015         if (!dev_priv) {
3016                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3017                 return DRM_ERR(EINVAL);
3018         }
3019
3020         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3021
3022         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3023                                  sizeof(sp));
3024
3025         switch (sp.param) {
3026         case RADEON_SETPARAM_FB_LOCATION:
3027                 radeon_priv = filp_priv->driver_priv;
3028                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3029                 break;
3030         case RADEON_SETPARAM_SWITCH_TILING:
3031                 if (sp.value == 0) {
3032                         DRM_DEBUG("color tiling disabled\n");
3033                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3034                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3035                         dev_priv->sarea_priv->tiling_enabled = 0;
3036                 } else if (sp.value == 1) {
3037                         DRM_DEBUG("color tiling enabled\n");
3038                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3039                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3040                         dev_priv->sarea_priv->tiling_enabled = 1;
3041                 }
3042                 break;
3043         case RADEON_SETPARAM_PCIGART_LOCATION:
3044                 dev_priv->pcigart_offset = sp.value;
3045                 break;
3046         case RADEON_SETPARAM_NEW_MEMMAP:
3047                 dev_priv->new_memmap = sp.value;
3048                 break;
3049         default:
3050                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3051                 return DRM_ERR(EINVAL);
3052         }
3053
3054         return 0;
3055 }
3056
3057 /* When a client dies:
3058  *    - Check for and clean up flipped page state
3059  *    - Free any alloced GART memory.
3060  *    - Free any alloced radeon surfaces.
3061  *
3062  * DRM infrastructure takes care of reclaiming dma buffers.
3063  */
3064 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3065 {
3066         if (dev->dev_private) {
3067                 drm_radeon_private_t *dev_priv = dev->dev_private;
3068                 if (dev_priv->page_flipping) {
3069                         radeon_do_cleanup_pageflip(dev);
3070                 }
3071                 radeon_mem_release(filp, dev_priv->gart_heap);
3072                 radeon_mem_release(filp, dev_priv->fb_heap);
3073                 radeon_surfaces_release(filp, dev_priv);
3074         }
3075 }
3076
3077 void radeon_driver_lastclose(drm_device_t * dev)
3078 {
3079         radeon_do_release(dev);
3080 }
3081
3082 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3083 {
3084         drm_radeon_private_t *dev_priv = dev->dev_private;
3085         struct drm_radeon_driver_file_fields *radeon_priv;
3086
3087         DRM_DEBUG("\n");
3088         radeon_priv =
3089             (struct drm_radeon_driver_file_fields *)
3090             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3091
3092         if (!radeon_priv)
3093                 return -ENOMEM;
3094
3095         filp_priv->driver_priv = radeon_priv;
3096
3097         if (dev_priv)
3098                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3099         else
3100                 radeon_priv->radeon_fb_delta = 0;
3101         return 0;
3102 }
3103
3104 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3105 {
3106         struct drm_radeon_driver_file_fields *radeon_priv =
3107             filp_priv->driver_priv;
3108
3109         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3110 }
3111
3112 drm_ioctl_desc_t radeon_ioctls[] = {
3113         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3114         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3115         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3116         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3117         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3118         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3119         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3120         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3121         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3122         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3123         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3124         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3125         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3126         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3127         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3128         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3129         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3130         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3131         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3132         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3133         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3134         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3135         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3136         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3137         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3138         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3139         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3140 };
3141
3142 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);