Merge linux-2.6 into linux-acpi-2.6 test
[pandora-kernel.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
41                                                      drm_file_t *filp_priv,
42                                                      u32 *offset ) {
43         u32 off = *offset;
44         struct drm_radeon_driver_file_fields *radeon_priv;
45
46         if ( off >= dev_priv->fb_location &&
47              off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
48                 return 0;
49
50         radeon_priv = filp_priv->driver_priv;
51         off += radeon_priv->radeon_fb_delta;
52
53         DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55         if ( off < dev_priv->fb_location ||
56              off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57                 return DRM_ERR( EINVAL );
58
59         *offset = off;
60
61         return 0;
62 }
63
64 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
65                                                       drm_file_t *filp_priv,
66                                                       int id,
67                                                       u32 __user *data ) {
68         switch ( id ) {
69
70         case RADEON_EMIT_PP_MISC:
71                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
72                                                     &data[( RADEON_RB3D_DEPTHOFFSET
73                                                             - RADEON_PP_MISC ) / 4] ) ) {
74                         DRM_ERROR( "Invalid depth buffer offset\n" );
75                         return DRM_ERR( EINVAL );
76                 }
77                 break;
78
79         case RADEON_EMIT_PP_CNTL:
80                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
81                                                     &data[( RADEON_RB3D_COLOROFFSET
82                                                             - RADEON_PP_CNTL ) / 4] ) ) {
83                         DRM_ERROR( "Invalid colour buffer offset\n" );
84                         return DRM_ERR( EINVAL );
85                 }
86                 break;
87
88         case R200_EMIT_PP_TXOFFSET_0:
89         case R200_EMIT_PP_TXOFFSET_1:
90         case R200_EMIT_PP_TXOFFSET_2:
91         case R200_EMIT_PP_TXOFFSET_3:
92         case R200_EMIT_PP_TXOFFSET_4:
93         case R200_EMIT_PP_TXOFFSET_5:
94                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
95                                                     &data[0] ) ) {
96                         DRM_ERROR( "Invalid R200 texture offset\n" );
97                         return DRM_ERR( EINVAL );
98                 }
99                 break;
100
101         case RADEON_EMIT_PP_TXFILTER_0:
102         case RADEON_EMIT_PP_TXFILTER_1:
103         case RADEON_EMIT_PP_TXFILTER_2:
104                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
105                                                     &data[( RADEON_PP_TXOFFSET_0
106                                                             - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
107                         DRM_ERROR( "Invalid R100 texture offset\n" );
108                         return DRM_ERR( EINVAL );
109                 }
110                 break;
111
112         case R200_EMIT_PP_CUBIC_OFFSETS_0:
113         case R200_EMIT_PP_CUBIC_OFFSETS_1:
114         case R200_EMIT_PP_CUBIC_OFFSETS_2:
115         case R200_EMIT_PP_CUBIC_OFFSETS_3:
116         case R200_EMIT_PP_CUBIC_OFFSETS_4:
117         case R200_EMIT_PP_CUBIC_OFFSETS_5: {
118                 int i;
119                 for ( i = 0; i < 5; i++ ) {
120                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
121                                                             &data[i] ) ) {
122                                 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
123                                 return DRM_ERR( EINVAL );
124                         }
125                 }
126                 break;
127         }
128
129         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
130         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
131         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
132                         int i;
133                         for (i = 0; i < 5; i++) {
134                                 if (radeon_check_and_fixup_offset(dev_priv,
135                                                                   filp_priv,
136                                                                   &data[i])) {
137                                         DRM_ERROR
138                                             ("Invalid R100 cubic texture offset\n");
139                                         return DRM_ERR(EINVAL);
140                                 }
141                         }
142                 }
143                 break;
144
145         case RADEON_EMIT_RB3D_COLORPITCH:
146         case RADEON_EMIT_RE_LINE_PATTERN:
147         case RADEON_EMIT_SE_LINE_WIDTH:
148         case RADEON_EMIT_PP_LUM_MATRIX:
149         case RADEON_EMIT_PP_ROT_MATRIX_0:
150         case RADEON_EMIT_RB3D_STENCILREFMASK:
151         case RADEON_EMIT_SE_VPORT_XSCALE:
152         case RADEON_EMIT_SE_CNTL:
153         case RADEON_EMIT_SE_CNTL_STATUS:
154         case RADEON_EMIT_RE_MISC:
155         case RADEON_EMIT_PP_BORDER_COLOR_0:
156         case RADEON_EMIT_PP_BORDER_COLOR_1:
157         case RADEON_EMIT_PP_BORDER_COLOR_2:
158         case RADEON_EMIT_SE_ZBIAS_FACTOR:
159         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161         case R200_EMIT_PP_TXCBLEND_0:
162         case R200_EMIT_PP_TXCBLEND_1:
163         case R200_EMIT_PP_TXCBLEND_2:
164         case R200_EMIT_PP_TXCBLEND_3:
165         case R200_EMIT_PP_TXCBLEND_4:
166         case R200_EMIT_PP_TXCBLEND_5:
167         case R200_EMIT_PP_TXCBLEND_6:
168         case R200_EMIT_PP_TXCBLEND_7:
169         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170         case R200_EMIT_TFACTOR_0:
171         case R200_EMIT_VTX_FMT_0:
172         case R200_EMIT_VAP_CTL:
173         case R200_EMIT_MATRIX_SELECT_0:
174         case R200_EMIT_TEX_PROC_CTL_2:
175         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176         case R200_EMIT_PP_TXFILTER_0:
177         case R200_EMIT_PP_TXFILTER_1:
178         case R200_EMIT_PP_TXFILTER_2:
179         case R200_EMIT_PP_TXFILTER_3:
180         case R200_EMIT_PP_TXFILTER_4:
181         case R200_EMIT_PP_TXFILTER_5:
182         case R200_EMIT_VTE_CNTL:
183         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184         case R200_EMIT_PP_TAM_DEBUG3:
185         case R200_EMIT_PP_CNTL_X:
186         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188         case R200_EMIT_RE_SCISSOR_TL_0:
189         case R200_EMIT_RE_SCISSOR_TL_1:
190         case R200_EMIT_RE_SCISSOR_TL_2:
191         case R200_EMIT_SE_VAP_CNTL_STATUS:
192         case R200_EMIT_SE_VTX_STATE_CNTL:
193         case R200_EMIT_RE_POINTSIZE:
194         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195         case R200_EMIT_PP_CUBIC_FACES_0:
196         case R200_EMIT_PP_CUBIC_FACES_1:
197         case R200_EMIT_PP_CUBIC_FACES_2:
198         case R200_EMIT_PP_CUBIC_FACES_3:
199         case R200_EMIT_PP_CUBIC_FACES_4:
200         case R200_EMIT_PP_CUBIC_FACES_5:
201         case RADEON_EMIT_PP_TEX_SIZE_0:
202         case RADEON_EMIT_PP_TEX_SIZE_1:
203         case RADEON_EMIT_PP_TEX_SIZE_2:
204         case R200_EMIT_RB3D_BLENDCOLOR:
205         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
206         case RADEON_EMIT_PP_CUBIC_FACES_0:
207         case RADEON_EMIT_PP_CUBIC_FACES_1:
208         case RADEON_EMIT_PP_CUBIC_FACES_2:
209         case R200_EMIT_PP_TRI_PERF_CNTL:
210                 /* These packets don't contain memory offsets */
211                 break;
212
213         default:
214                 DRM_ERROR( "Unknown state packet ID %d\n", id );
215                 return DRM_ERR( EINVAL );
216         }
217
218         return 0;
219 }
220
221 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
222                                                       drm_file_t *filp_priv,
223                                                       drm_radeon_cmd_buffer_t *cmdbuf,
224                                                       unsigned int *cmdsz ) {
225         u32 *cmd = (u32 *) cmdbuf->buf;
226
227         *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
228
229         if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230                 DRM_ERROR( "Not a type 3 packet\n" );
231                 return DRM_ERR( EINVAL );
232         }
233
234         if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235                 DRM_ERROR( "Packet size larger than size of data provided\n" );
236                 return DRM_ERR( EINVAL );
237         }
238
239         /* Check client state and fix it up if necessary */
240         if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
241                 u32 offset;
242
243                 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245                         offset = cmd[2] << 10;
246                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247                                 DRM_ERROR( "Invalid first packet offset\n" );
248                                 return DRM_ERR( EINVAL );
249                         }
250                         cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
251                 }
252
253                 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254                      ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255                         offset = cmd[3] << 10;
256                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257                                 DRM_ERROR( "Invalid second packet offset\n" );
258                                 return DRM_ERR( EINVAL );
259                         }
260                         cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
261                 }
262         }
263
264         return 0;
265 }
266
267
268 /* ================================================================
269  * CP hardware state programming functions
270  */
271
272 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
273                                           drm_clip_rect_t *box )
274 {
275         RING_LOCALS;
276
277         DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
278                    box->x1, box->y1, box->x2, box->y2 );
279
280         BEGIN_RING( 4 );
281         OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
282         OUT_RING( (box->y1 << 16) | box->x1 );
283         OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
284         OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
285         ADVANCE_RING();
286 }
287
288 /* Emit 1.1 state
289  */
290 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
291                               drm_file_t *filp_priv,
292                               drm_radeon_context_regs_t *ctx,
293                               drm_radeon_texture_regs_t *tex,
294                               unsigned int dirty )
295 {
296         RING_LOCALS;
297         DRM_DEBUG( "dirty=0x%08x\n", dirty );
298
299         if ( dirty & RADEON_UPLOAD_CONTEXT ) {
300                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
301                                                     &ctx->rb3d_depthoffset ) ) {
302                         DRM_ERROR( "Invalid depth buffer offset\n" );
303                         return DRM_ERR( EINVAL );
304                 }
305
306                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
307                                                     &ctx->rb3d_coloroffset ) ) {
308                         DRM_ERROR( "Invalid depth buffer offset\n" );
309                         return DRM_ERR( EINVAL );
310                 }
311
312                 BEGIN_RING( 14 );
313                 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
314                 OUT_RING( ctx->pp_misc );
315                 OUT_RING( ctx->pp_fog_color );
316                 OUT_RING( ctx->re_solid_color );
317                 OUT_RING( ctx->rb3d_blendcntl );
318                 OUT_RING( ctx->rb3d_depthoffset );
319                 OUT_RING( ctx->rb3d_depthpitch );
320                 OUT_RING( ctx->rb3d_zstencilcntl );
321                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
322                 OUT_RING( ctx->pp_cntl );
323                 OUT_RING( ctx->rb3d_cntl );
324                 OUT_RING( ctx->rb3d_coloroffset );
325                 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
326                 OUT_RING( ctx->rb3d_colorpitch );
327                 ADVANCE_RING();
328         }
329
330         if ( dirty & RADEON_UPLOAD_VERTFMT ) {
331                 BEGIN_RING( 2 );
332                 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
333                 OUT_RING( ctx->se_coord_fmt );
334                 ADVANCE_RING();
335         }
336
337         if ( dirty & RADEON_UPLOAD_LINE ) {
338                 BEGIN_RING( 5 );
339                 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
340                 OUT_RING( ctx->re_line_pattern );
341                 OUT_RING( ctx->re_line_state );
342                 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
343                 OUT_RING( ctx->se_line_width );
344                 ADVANCE_RING();
345         }
346
347         if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
348                 BEGIN_RING( 5 );
349                 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
350                 OUT_RING( ctx->pp_lum_matrix );
351                 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
352                 OUT_RING( ctx->pp_rot_matrix_0 );
353                 OUT_RING( ctx->pp_rot_matrix_1 );
354                 ADVANCE_RING();
355         }
356
357         if ( dirty & RADEON_UPLOAD_MASKS ) {
358                 BEGIN_RING( 4 );
359                 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
360                 OUT_RING( ctx->rb3d_stencilrefmask );
361                 OUT_RING( ctx->rb3d_ropcntl );
362                 OUT_RING( ctx->rb3d_planemask );
363                 ADVANCE_RING();
364         }
365
366         if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
367                 BEGIN_RING( 7 );
368                 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
369                 OUT_RING( ctx->se_vport_xscale );
370                 OUT_RING( ctx->se_vport_xoffset );
371                 OUT_RING( ctx->se_vport_yscale );
372                 OUT_RING( ctx->se_vport_yoffset );
373                 OUT_RING( ctx->se_vport_zscale );
374                 OUT_RING( ctx->se_vport_zoffset );
375                 ADVANCE_RING();
376         }
377
378         if ( dirty & RADEON_UPLOAD_SETUP ) {
379                 BEGIN_RING( 4 );
380                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
381                 OUT_RING( ctx->se_cntl );
382                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
383                 OUT_RING( ctx->se_cntl_status );
384                 ADVANCE_RING();
385         }
386
387         if ( dirty & RADEON_UPLOAD_MISC ) {
388                 BEGIN_RING( 2 );
389                 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
390                 OUT_RING( ctx->re_misc );
391                 ADVANCE_RING();
392         }
393
394         if ( dirty & RADEON_UPLOAD_TEX0 ) {
395                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
396                                                     &tex[0].pp_txoffset ) ) {
397                         DRM_ERROR( "Invalid texture offset for unit 0\n" );
398                         return DRM_ERR( EINVAL );
399                 }
400
401                 BEGIN_RING( 9 );
402                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
403                 OUT_RING( tex[0].pp_txfilter );
404                 OUT_RING( tex[0].pp_txformat );
405                 OUT_RING( tex[0].pp_txoffset );
406                 OUT_RING( tex[0].pp_txcblend );
407                 OUT_RING( tex[0].pp_txablend );
408                 OUT_RING( tex[0].pp_tfactor );
409                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
410                 OUT_RING( tex[0].pp_border_color );
411                 ADVANCE_RING();
412         }
413
414         if ( dirty & RADEON_UPLOAD_TEX1 ) {
415                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
416                                                     &tex[1].pp_txoffset ) ) {
417                         DRM_ERROR( "Invalid texture offset for unit 1\n" );
418                         return DRM_ERR( EINVAL );
419                 }
420
421                 BEGIN_RING( 9 );
422                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
423                 OUT_RING( tex[1].pp_txfilter );
424                 OUT_RING( tex[1].pp_txformat );
425                 OUT_RING( tex[1].pp_txoffset );
426                 OUT_RING( tex[1].pp_txcblend );
427                 OUT_RING( tex[1].pp_txablend );
428                 OUT_RING( tex[1].pp_tfactor );
429                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
430                 OUT_RING( tex[1].pp_border_color );
431                 ADVANCE_RING();
432         }
433
434         if ( dirty & RADEON_UPLOAD_TEX2 ) {
435                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
436                                                     &tex[2].pp_txoffset ) ) {
437                         DRM_ERROR( "Invalid texture offset for unit 2\n" );
438                         return DRM_ERR( EINVAL );
439                 }
440
441                 BEGIN_RING( 9 );
442                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
443                 OUT_RING( tex[2].pp_txfilter );
444                 OUT_RING( tex[2].pp_txformat );
445                 OUT_RING( tex[2].pp_txoffset );
446                 OUT_RING( tex[2].pp_txcblend );
447                 OUT_RING( tex[2].pp_txablend );
448                 OUT_RING( tex[2].pp_tfactor );
449                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
450                 OUT_RING( tex[2].pp_border_color );
451                 ADVANCE_RING();
452         }
453
454         return 0;
455 }
456
457 /* Emit 1.2 state
458  */
459 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
460                                drm_file_t *filp_priv,
461                                drm_radeon_state_t *state )
462 {
463         RING_LOCALS;
464
465         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
466                 BEGIN_RING( 3 );
467                 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
468                 OUT_RING( state->context2.se_zbias_factor ); 
469                 OUT_RING( state->context2.se_zbias_constant ); 
470                 ADVANCE_RING();
471         }
472
473         return radeon_emit_state( dev_priv, filp_priv, &state->context,
474                            state->tex, state->dirty );
475 }
476
477 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
478  * 1.3 cmdbuffers allow all previous state to be updated as well as
479  * the tcl scalar and vector areas.  
480  */
481 static struct { 
482         int start; 
483         int len; 
484         const char *name;
485 } packet[RADEON_MAX_STATE_PACKETS] = {
486         { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
487         { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
488         { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
489         { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
490         { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
491         { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
492         { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
493         { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
494         { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
495         { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
496         { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
497         { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
498         { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
499         { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
500         { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
501         { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
502         { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
503         { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
504         { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
505         { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
506         { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
507         { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
508         { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
509         { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
510         { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
511         { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
512         { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
513         { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
514         { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
515         { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
516         { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
517         { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
518         { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
519         { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
520         { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
521         { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
522         { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
523         { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
524         { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
525         { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
526         { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
527         { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
528         { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
529         { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
530         { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
531         { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
532         { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
533         { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
534         { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
535         { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
536         { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
537         { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
538         { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
539         { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
540         { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
541         { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
542         { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
543         { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
544         { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
545         { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
546         { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
547         { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
548         { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
549         { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
550         { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
551         { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
552         { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
553         { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
554         { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
555         { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
556         { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
557         { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
558         { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
559         { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
560         { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
561         { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
562         { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
563         { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
564         { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
565         { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
566         { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
567         { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
568         { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
569         { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
570         { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
571 };
572
573
574
575 /* ================================================================
576  * Performance monitoring functions
577  */
578
579 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
580                               int x, int y, int w, int h,
581                               int r, int g, int b )
582 {
583         u32 color;
584         RING_LOCALS;
585
586         x += dev_priv->sarea_priv->boxes[0].x1;
587         y += dev_priv->sarea_priv->boxes[0].y1;
588
589         switch ( dev_priv->color_fmt ) {
590         case RADEON_COLOR_FORMAT_RGB565:
591                 color = (((r & 0xf8) << 8) |
592                          ((g & 0xfc) << 3) |
593                          ((b & 0xf8) >> 3));
594                 break;
595         case RADEON_COLOR_FORMAT_ARGB8888:
596         default:
597                 color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
598                 break;
599         }
600
601         BEGIN_RING( 4 );
602         RADEON_WAIT_UNTIL_3D_IDLE();            
603         OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
604         OUT_RING( 0xffffffff );
605         ADVANCE_RING();
606
607         BEGIN_RING( 6 );
608
609         OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
610         OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
611                   RADEON_GMC_BRUSH_SOLID_COLOR |
612                   (dev_priv->color_fmt << 8) |
613                   RADEON_GMC_SRC_DATATYPE_COLOR |
614                   RADEON_ROP3_P |
615                   RADEON_GMC_CLR_CMP_CNTL_DIS );
616
617         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
618                 OUT_RING( dev_priv->front_pitch_offset );
619         } else {         
620                 OUT_RING( dev_priv->back_pitch_offset );
621         } 
622
623         OUT_RING( color );
624
625         OUT_RING( (x << 16) | y );
626         OUT_RING( (w << 16) | h );
627
628         ADVANCE_RING();
629 }
630
631 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
632 {
633         /* Collapse various things into a wait flag -- trying to
634          * guess if userspase slept -- better just to have them tell us.
635          */
636         if (dev_priv->stats.last_frame_reads > 1 ||
637             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
638                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
639         }
640
641         if (dev_priv->stats.freelist_loops) {
642                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
643         }
644
645         /* Purple box for page flipping
646          */
647         if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
648                 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
649
650         /* Red box if we have to wait for idle at any point
651          */
652         if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
653                 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
654
655         /* Blue box: lost context?
656          */
657
658         /* Yellow box for texture swaps
659          */
660         if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
661                 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
662
663         /* Green box if hardware never idles (as far as we can tell)
664          */
665         if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
666                 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
667
668
669         /* Draw bars indicating number of buffers allocated 
670          * (not a great measure, easily confused)
671          */
672         if (dev_priv->stats.requested_bufs) {
673                 if (dev_priv->stats.requested_bufs > 100)
674                         dev_priv->stats.requested_bufs = 100;
675
676                 radeon_clear_box( dev_priv, 4, 16,  
677                                   dev_priv->stats.requested_bufs, 4,
678                                   196, 128, 128 );
679         }
680
681         memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
682
683 }
684 /* ================================================================
685  * CP command dispatch functions
686  */
687
688 static void radeon_cp_dispatch_clear( drm_device_t *dev,
689                                       drm_radeon_clear_t *clear,
690                                       drm_radeon_clear_rect_t *depth_boxes )
691 {
692         drm_radeon_private_t *dev_priv = dev->dev_private;
693         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
694         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
695         int nbox = sarea_priv->nbox;
696         drm_clip_rect_t *pbox = sarea_priv->boxes;
697         unsigned int flags = clear->flags;
698         u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
699         int i;
700         RING_LOCALS;
701         DRM_DEBUG( "flags = 0x%x\n", flags );
702
703         dev_priv->stats.clears++;
704
705         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
706                 unsigned int tmp = flags;
707
708                 flags &= ~(RADEON_FRONT | RADEON_BACK);
709                 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
710                 if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
711         }
712
713         if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
714
715                 BEGIN_RING( 4 );
716
717                 /* Ensure the 3D stream is idle before doing a
718                  * 2D fill to clear the front or back buffer.
719                  */
720                 RADEON_WAIT_UNTIL_3D_IDLE();
721                 
722                 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
723                 OUT_RING( clear->color_mask );
724
725                 ADVANCE_RING();
726
727                 /* Make sure we restore the 3D state next time.
728                  */
729                 dev_priv->sarea_priv->ctx_owner = 0;
730
731                 for ( i = 0 ; i < nbox ; i++ ) {
732                         int x = pbox[i].x1;
733                         int y = pbox[i].y1;
734                         int w = pbox[i].x2 - x;
735                         int h = pbox[i].y2 - y;
736
737                         DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
738                                    x, y, w, h, flags );
739
740                         if ( flags & RADEON_FRONT ) {
741                                 BEGIN_RING( 6 );
742                                 
743                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
744                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
745                                           RADEON_GMC_BRUSH_SOLID_COLOR |
746                                           (dev_priv->color_fmt << 8) |
747                                           RADEON_GMC_SRC_DATATYPE_COLOR |
748                                           RADEON_ROP3_P |
749                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
750
751                                 OUT_RING( dev_priv->front_pitch_offset );
752                                 OUT_RING( clear->clear_color );
753                                 
754                                 OUT_RING( (x << 16) | y );
755                                 OUT_RING( (w << 16) | h );
756                                 
757                                 ADVANCE_RING();
758                         }
759                         
760                         if ( flags & RADEON_BACK ) {
761                                 BEGIN_RING( 6 );
762                                 
763                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
764                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
765                                           RADEON_GMC_BRUSH_SOLID_COLOR |
766                                           (dev_priv->color_fmt << 8) |
767                                           RADEON_GMC_SRC_DATATYPE_COLOR |
768                                           RADEON_ROP3_P |
769                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
770                                 
771                                 OUT_RING( dev_priv->back_pitch_offset );
772                                 OUT_RING( clear->clear_color );
773
774                                 OUT_RING( (x << 16) | y );
775                                 OUT_RING( (w << 16) | h );
776
777                                 ADVANCE_RING();
778                         }
779                 }
780         }
781         
782         /* hyper z clear */
783         /* no docs available, based on reverse engeneering by Stephane Marchesin */
784         if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
785
786                 int i;
787                 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
788                         (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
789                 
790                 u32 clearmask;
791
792                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
793                         ((clear->depth_mask & 0xff) << 24);
794         
795                 
796                 /* Make sure we restore the 3D state next time.
797                  * we haven't touched any "normal" state - still need this?
798                  */
799                 dev_priv->sarea_priv->ctx_owner = 0;
800
801                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
802                 /* FIXME : reverse engineer that for Rx00 cards */
803                 /* FIXME : the mask supposedly contains low-res z values. So can't set
804                    just to the max (0xff? or actually 0x3fff?), need to take z clear
805                    value into account? */
806                 /* pattern seems to work for r100, though get slight
807                    rendering errors with glxgears. If hierz is not enabled for r100,
808                    only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
809                    other ones are ignored, and the same clear mask can be used. That's
810                    very different behaviour than R200 which needs different clear mask
811                    and different number of tiles to clear if hierz is enabled or not !?!
812                 */
813                         clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
814                 }
815                 else {
816                 /* clear mask : chooses the clearing pattern.
817                    rv250: could be used to clear only parts of macrotiles
818                    (but that would get really complicated...)?
819                    bit 0 and 1 (either or both of them ?!?!) are used to
820                    not clear tile (or maybe one of the bits indicates if the tile is
821                    compressed or not), bit 2 and 3 to not clear tile 1,...,.
822                    Pattern is as follows:
823                         | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
824                    bits -------------------------------------------------
825                         | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
826                    rv100: clearmask covers 2x8 4x1 tiles, but one clear still
827                    covers 256 pixels ?!?
828                 */
829                         clearmask = 0x0;
830                 }
831
832                 BEGIN_RING( 8 );
833                 RADEON_WAIT_UNTIL_2D_IDLE();
834                 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
835                         tempRB3D_DEPTHCLEARVALUE);
836                 /* what offset is this exactly ? */
837                 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
838                 /* need ctlstat, otherwise get some strange black flickering */
839                 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
840                 ADVANCE_RING();
841
842                 for (i = 0; i < nbox; i++) {
843                         int tileoffset, nrtilesx, nrtilesy, j;
844                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
845                         if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
846                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
847                                    maybe r200 actually doesn't need to put the low-res z value into
848                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
849                                    Works for R100, both with hierz and without.
850                                    R100 seems to operate on 2x1 8x8 tiles, but...
851                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
852                                    problematic with resolutions which are not 64 pix aligned? */
853                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
854                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
855                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
856                                 for (j = 0; j <= nrtilesy; j++) {
857                                         BEGIN_RING( 4 );
858                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
859                                         /* first tile */
860                                         OUT_RING( tileoffset * 8 );
861                                         /* the number of tiles to clear */
862                                         OUT_RING( nrtilesx + 4 );
863                                         /* clear mask : chooses the clearing pattern. */
864                                         OUT_RING( clearmask );
865                                         ADVANCE_RING();
866                                         tileoffset += depthpixperline >> 6;
867                                 }
868                         }
869                         else if (dev_priv->microcode_version==UCODE_R200) {
870                                 /* works for rv250. */
871                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
872                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
873                                 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
874                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875                                 for (j = 0; j <= nrtilesy; j++) {
876                                         BEGIN_RING( 4 );
877                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
878                                         /* first tile */
879                                         /* judging by the first tile offset needed, could possibly
880                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
881                                            macro tiles, though would still need clear mask for
882                                            right/bottom if truely 4x4 granularity is desired ? */
883                                         OUT_RING( tileoffset * 16 );
884                                         /* the number of tiles to clear */
885                                         OUT_RING( nrtilesx + 1 );
886                                         /* clear mask : chooses the clearing pattern. */
887                                         OUT_RING( clearmask );
888                                         ADVANCE_RING();
889                                         tileoffset += depthpixperline >> 5;
890                                 }
891                         }
892                         else { /* rv 100 */
893                                 /* rv100 might not need 64 pix alignment, who knows */
894                                 /* offsets are, hmm, weird */
895                                 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
896                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
897                                 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
898                                 for (j = 0; j <= nrtilesy; j++) {
899                                         BEGIN_RING( 4 );
900                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
901                                         OUT_RING( tileoffset * 128 );
902                                         /* the number of tiles to clear */
903                                         OUT_RING( nrtilesx + 4 );
904                                         /* clear mask : chooses the clearing pattern. */
905                                         OUT_RING( clearmask );
906                                         ADVANCE_RING();
907                                         tileoffset += depthpixperline >> 6;
908                                 }
909                         }
910                 }
911
912                 /* TODO don't always clear all hi-level z tiles */
913                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
914                         && (flags & RADEON_USE_HIERZ))
915                 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
916                 /* FIXME : the mask supposedly contains low-res z values. So can't set
917                    just to the max (0xff? or actually 0x3fff?), need to take z clear
918                    value into account? */
919                 {
920                         BEGIN_RING( 4 );
921                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
922                         OUT_RING( 0x0 ); /* First tile */
923                         OUT_RING( 0x3cc0 );
924                         OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
925                         ADVANCE_RING();
926                 }
927         }
928
929         /* We have to clear the depth and/or stencil buffers by
930          * rendering a quad into just those buffers.  Thus, we have to
931          * make sure the 3D engine is configured correctly.
932          */
933         if ((dev_priv->microcode_version == UCODE_R200) &&
934             (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
935
936                 int tempPP_CNTL;
937                 int tempRE_CNTL;
938                 int tempRB3D_CNTL;
939                 int tempRB3D_ZSTENCILCNTL;
940                 int tempRB3D_STENCILREFMASK;
941                 int tempRB3D_PLANEMASK;
942                 int tempSE_CNTL;
943                 int tempSE_VTE_CNTL;
944                 int tempSE_VTX_FMT_0;
945                 int tempSE_VTX_FMT_1;
946                 int tempSE_VAP_CNTL;
947                 int tempRE_AUX_SCISSOR_CNTL;
948
949                 tempPP_CNTL = 0;
950                 tempRE_CNTL = 0;
951
952                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
953
954                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
955                 tempRB3D_STENCILREFMASK = 0x0;
956
957                 tempSE_CNTL = depth_clear->se_cntl;
958
959
960
961                 /* Disable TCL */
962
963                 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
964                                    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
965
966                 tempRB3D_PLANEMASK = 0x0;
967
968                 tempRE_AUX_SCISSOR_CNTL = 0x0;
969
970                 tempSE_VTE_CNTL =
971                         SE_VTE_CNTL__VTX_XY_FMT_MASK |
972                         SE_VTE_CNTL__VTX_Z_FMT_MASK;
973
974                 /* Vertex format (X, Y, Z, W)*/
975                 tempSE_VTX_FMT_0 =
976                         SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
977                         SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
978                 tempSE_VTX_FMT_1 = 0x0;
979
980
981                 /* 
982                  * Depth buffer specific enables 
983                  */
984                 if (flags & RADEON_DEPTH) {
985                         /* Enable depth buffer */
986                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
987                 } else {
988                         /* Disable depth buffer */
989                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
990                 }
991
992                 /* 
993                  * Stencil buffer specific enables
994                  */
995                 if ( flags & RADEON_STENCIL ) {
996                         tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
997                         tempRB3D_STENCILREFMASK = clear->depth_mask; 
998                 } else {
999                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000                         tempRB3D_STENCILREFMASK = 0x00000000;
1001                 }
1002
1003                 if (flags & RADEON_USE_COMP_ZBUF) {
1004                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005                                 RADEON_Z_DECOMPRESSION_ENABLE;
1006                 }
1007                 if (flags & RADEON_USE_HIERZ) {
1008                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1009                 }
1010
1011                 BEGIN_RING( 26 );
1012                 RADEON_WAIT_UNTIL_2D_IDLE();
1013
1014                 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015                 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016                 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018                               tempRB3D_ZSTENCILCNTL );
1019                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
1020                               tempRB3D_STENCILREFMASK );
1021                 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022                 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023                 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024                 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025                 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026                 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027                 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
1028                               tempRE_AUX_SCISSOR_CNTL );
1029                 ADVANCE_RING();
1030
1031                 /* Make sure we restore the 3D state next time.
1032                  */
1033                 dev_priv->sarea_priv->ctx_owner = 0;
1034
1035                 for ( i = 0 ; i < nbox ; i++ ) {
1036                         
1037                         /* Funny that this should be required -- 
1038                          *  sets top-left?
1039                          */
1040                         radeon_emit_clip_rect( dev_priv,
1041                                                &sarea_priv->boxes[i] );
1042
1043                         BEGIN_RING( 14 );
1044                         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046                                    RADEON_PRIM_WALK_RING |
1047                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051                         OUT_RING( 0x3f800000 );
1052                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055                         OUT_RING( 0x3f800000 );
1056                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059                         OUT_RING( 0x3f800000 );
1060                         ADVANCE_RING();
1061                 }
1062         } 
1063         else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1064
1065                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1066
1067                 rb3d_cntl = depth_clear->rb3d_cntl;
1068
1069                 if ( flags & RADEON_DEPTH ) {
1070                         rb3d_cntl |=  RADEON_Z_ENABLE;
1071                 } else {
1072                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1073                 }
1074
1075                 if ( flags & RADEON_STENCIL ) {
1076                         rb3d_cntl |=  RADEON_STENCIL_ENABLE;
1077                         rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1078                 } else {
1079                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080                         rb3d_stencilrefmask = 0x00000000;
1081                 }
1082
1083                 if (flags & RADEON_USE_COMP_ZBUF) {
1084                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085                                 RADEON_Z_DECOMPRESSION_ENABLE;
1086                 }
1087                 if (flags & RADEON_USE_HIERZ) {
1088                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089                 }
1090
1091                 BEGIN_RING( 13 );
1092                 RADEON_WAIT_UNTIL_2D_IDLE();
1093
1094                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095                 OUT_RING( 0x00000000 );
1096                 OUT_RING( rb3d_cntl );
1097                 
1098                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100                               rb3d_stencilrefmask );
1101                 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1102                               0x00000000 );
1103                 OUT_RING_REG( RADEON_SE_CNTL,
1104                               depth_clear->se_cntl );
1105                 ADVANCE_RING();
1106
1107                 /* Make sure we restore the 3D state next time.
1108                  */
1109                 dev_priv->sarea_priv->ctx_owner = 0;
1110
1111                 for ( i = 0 ; i < nbox ; i++ ) {
1112                         
1113                         /* Funny that this should be required -- 
1114                          *  sets top-left?
1115                          */
1116                         radeon_emit_clip_rect( dev_priv,
1117                                                &sarea_priv->boxes[i] );
1118
1119                         BEGIN_RING( 15 );
1120
1121                         OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122                         OUT_RING( RADEON_VTX_Z_PRESENT |
1123                                   RADEON_VTX_PKCOLOR_PRESENT);
1124                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125                                    RADEON_PRIM_WALK_RING |
1126                                    RADEON_MAOS_ENABLE |
1127                                    RADEON_VTX_FMT_RADEON_MODE |
1128                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1129
1130
1131                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1134                         OUT_RING( 0x0 );
1135
1136                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139                         OUT_RING( 0x0 );
1140
1141                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144                         OUT_RING( 0x0 );
1145
1146                         ADVANCE_RING();
1147                 }
1148         }
1149
1150         /* Increment the clear counter.  The client-side 3D driver must
1151          * wait on this value before performing the clear ioctl.  We
1152          * need this because the card's so damned fast...
1153          */
1154         dev_priv->sarea_priv->last_clear++;
1155
1156         BEGIN_RING( 4 );
1157
1158         RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159         RADEON_WAIT_UNTIL_IDLE();
1160
1161         ADVANCE_RING();
1162 }
1163
1164 static void radeon_cp_dispatch_swap( drm_device_t *dev )
1165 {
1166         drm_radeon_private_t *dev_priv = dev->dev_private;
1167         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168         int nbox = sarea_priv->nbox;
1169         drm_clip_rect_t *pbox = sarea_priv->boxes;
1170         int i;
1171         RING_LOCALS;
1172         DRM_DEBUG( "\n" );
1173
1174         /* Do some trivial performance monitoring...
1175          */
1176         if (dev_priv->do_boxes)
1177                 radeon_cp_performance_boxes( dev_priv );
1178
1179
1180         /* Wait for the 3D stream to idle before dispatching the bitblt.
1181          * This will prevent data corruption between the two streams.
1182          */
1183         BEGIN_RING( 2 );
1184
1185         RADEON_WAIT_UNTIL_3D_IDLE();
1186
1187         ADVANCE_RING();
1188
1189         for ( i = 0 ; i < nbox ; i++ ) {
1190                 int x = pbox[i].x1;
1191                 int y = pbox[i].y1;
1192                 int w = pbox[i].x2 - x;
1193                 int h = pbox[i].y2 - y;
1194
1195                 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1196                            x, y, w, h );
1197
1198                 BEGIN_RING( 7 );
1199
1200                 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201                 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202                           RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203                           RADEON_GMC_BRUSH_NONE |
1204                           (dev_priv->color_fmt << 8) |
1205                           RADEON_GMC_SRC_DATATYPE_COLOR |
1206                           RADEON_ROP3_S |
1207                           RADEON_DP_SRC_SOURCE_MEMORY |
1208                           RADEON_GMC_CLR_CMP_CNTL_DIS |
1209                           RADEON_GMC_WR_MSK_DIS );
1210                 
1211                 /* Make this work even if front & back are flipped:
1212                  */
1213                 if (dev_priv->current_page == 0) {
1214                         OUT_RING( dev_priv->back_pitch_offset );
1215                         OUT_RING( dev_priv->front_pitch_offset );
1216                 } 
1217                 else {
1218                         OUT_RING( dev_priv->front_pitch_offset );
1219                         OUT_RING( dev_priv->back_pitch_offset );
1220                 }
1221
1222                 OUT_RING( (x << 16) | y );
1223                 OUT_RING( (x << 16) | y );
1224                 OUT_RING( (w << 16) | h );
1225
1226                 ADVANCE_RING();
1227         }
1228
1229         /* Increment the frame counter.  The client-side 3D driver must
1230          * throttle the framerate by waiting for this value before
1231          * performing the swapbuffer ioctl.
1232          */
1233         dev_priv->sarea_priv->last_frame++;
1234
1235         BEGIN_RING( 4 );
1236
1237         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238         RADEON_WAIT_UNTIL_2D_IDLE();
1239
1240         ADVANCE_RING();
1241 }
1242
1243 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1244 {
1245         drm_radeon_private_t *dev_priv = dev->dev_private;
1246         drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247         int offset = (dev_priv->current_page == 1)
1248                    ? dev_priv->front_offset : dev_priv->back_offset;
1249         RING_LOCALS;
1250         DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1251                 __FUNCTION__, 
1252                 dev_priv->current_page,
1253                 dev_priv->sarea_priv->pfCurrentPage);
1254
1255         /* Do some trivial performance monitoring...
1256          */
1257         if (dev_priv->do_boxes) {
1258                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259                 radeon_cp_performance_boxes( dev_priv );
1260         }
1261
1262         /* Update the frame offsets for both CRTCs
1263          */
1264         BEGIN_RING( 6 );
1265
1266         RADEON_WAIT_UNTIL_3D_IDLE();
1267         OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1268                                               + sarea->frame.x 
1269                                               * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1270                                           + offset );
1271         OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1272                                            + offset );
1273
1274         ADVANCE_RING();
1275
1276         /* Increment the frame counter.  The client-side 3D driver must
1277          * throttle the framerate by waiting for this value before
1278          * performing the swapbuffer ioctl.
1279          */
1280         dev_priv->sarea_priv->last_frame++;
1281         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282                                               1 - dev_priv->current_page;
1283
1284         BEGIN_RING( 2 );
1285
1286         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1287
1288         ADVANCE_RING();
1289 }
1290
1291 static int bad_prim_vertex_nr( int primitive, int nr )
1292 {
1293         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294         case RADEON_PRIM_TYPE_NONE:
1295         case RADEON_PRIM_TYPE_POINT:
1296                 return nr < 1;
1297         case RADEON_PRIM_TYPE_LINE:
1298                 return (nr & 1) || nr == 0;
1299         case RADEON_PRIM_TYPE_LINE_STRIP:
1300                 return nr < 2;
1301         case RADEON_PRIM_TYPE_TRI_LIST:
1302         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304         case RADEON_PRIM_TYPE_RECT_LIST:
1305                 return nr % 3 || nr == 0;
1306         case RADEON_PRIM_TYPE_TRI_FAN:
1307         case RADEON_PRIM_TYPE_TRI_STRIP:
1308                 return nr < 3;
1309         default:
1310                 return 1;
1311         }       
1312 }
1313
1314
1315
1316 typedef struct {
1317         unsigned int start;
1318         unsigned int finish;
1319         unsigned int prim;
1320         unsigned int numverts;
1321         unsigned int offset;   
1322         unsigned int vc_format;
1323 } drm_radeon_tcl_prim_t;
1324
1325 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1326                                        drm_buf_t *buf,
1327                                        drm_radeon_tcl_prim_t *prim )
1328
1329 {
1330         drm_radeon_private_t *dev_priv = dev->dev_private;
1331         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333         int numverts = (int)prim->numverts;
1334         int nbox = sarea_priv->nbox;
1335         int i = 0;
1336         RING_LOCALS;
1337
1338         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1339                   prim->prim,
1340                   prim->vc_format,
1341                   prim->start,
1342                   prim->finish,
1343                   prim->numverts);
1344
1345         if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346                 DRM_ERROR( "bad prim %x numverts %d\n", 
1347                            prim->prim, prim->numverts );
1348                 return;
1349         }
1350
1351         do {
1352                 /* Emit the next cliprect */
1353                 if ( i < nbox ) {
1354                         radeon_emit_clip_rect( dev_priv, 
1355                                                &sarea_priv->boxes[i] );
1356                 }
1357
1358                 /* Emit the vertex buffer rendering commands */
1359                 BEGIN_RING( 5 );
1360
1361                 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1362                 OUT_RING( offset );
1363                 OUT_RING( numverts );
1364                 OUT_RING( prim->vc_format );
1365                 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366                           RADEON_COLOR_ORDER_RGBA |
1367                           RADEON_VTX_FMT_RADEON_MODE |
1368                           (numverts << RADEON_NUM_VERTICES_SHIFT) );
1369
1370                 ADVANCE_RING();
1371
1372                 i++;
1373         } while ( i < nbox );
1374 }
1375
1376
1377
1378 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1379 {
1380         drm_radeon_private_t *dev_priv = dev->dev_private;
1381         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1382         RING_LOCALS;
1383
1384         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1385
1386         /* Emit the vertex buffer age */
1387         BEGIN_RING( 2 );
1388         RADEON_DISPATCH_AGE( buf_priv->age );
1389         ADVANCE_RING();
1390
1391         buf->pending = 1;
1392         buf->used = 0;
1393 }
1394
1395 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1396                                          drm_buf_t *buf,
1397                                          int start, int end )
1398 {
1399         drm_radeon_private_t *dev_priv = dev->dev_private;
1400         RING_LOCALS;
1401         DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402                    buf->idx, start, end );
1403
1404         if ( start != end ) {
1405                 int offset = (dev_priv->gart_buffers_offset
1406                               + buf->offset + start);
1407                 int dwords = (end - start + 3) / sizeof(u32);
1408
1409                 /* Indirect buffer data must be an even number of
1410                  * dwords, so if we've been given an odd number we must
1411                  * pad the data with a Type-2 CP packet.
1412                  */
1413                 if ( dwords & 1 ) {
1414                         u32 *data = (u32 *)
1415                                 ((char *)dev->agp_buffer_map->handle
1416                                  + buf->offset + start);
1417                         data[dwords++] = RADEON_CP_PACKET2;
1418                 }
1419
1420                 /* Fire off the indirect buffer */
1421                 BEGIN_RING( 3 );
1422
1423                 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1424                 OUT_RING( offset );
1425                 OUT_RING( dwords );
1426
1427                 ADVANCE_RING();
1428         }
1429 }
1430
1431
1432 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1433                                         drm_buf_t *elt_buf,
1434                                         drm_radeon_tcl_prim_t *prim )
1435 {
1436         drm_radeon_private_t *dev_priv = dev->dev_private;
1437         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438         int offset = dev_priv->gart_buffers_offset + prim->offset;
1439         u32 *data;
1440         int dwords;
1441         int i = 0;
1442         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443         int count = (prim->finish - start) / sizeof(u16);
1444         int nbox = sarea_priv->nbox;
1445
1446         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1447                   prim->prim,
1448                   prim->vc_format,
1449                   prim->start,
1450                   prim->finish,
1451                   prim->offset,
1452                   prim->numverts);
1453
1454         if (bad_prim_vertex_nr( prim->prim, count )) {
1455                 DRM_ERROR( "bad prim %x count %d\n", 
1456                            prim->prim, count );
1457                 return;
1458         }
1459
1460
1461         if ( start >= prim->finish ||
1462              (prim->start & 0x7) ) {
1463                 DRM_ERROR( "buffer prim %d\n", prim->prim );
1464                 return;
1465         }
1466
1467         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1468
1469         data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470                        elt_buf->offset + prim->start);
1471
1472         data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1473         data[1] = offset;
1474         data[2] = prim->numverts;
1475         data[3] = prim->vc_format;
1476         data[4] = (prim->prim |
1477                    RADEON_PRIM_WALK_IND |
1478                    RADEON_COLOR_ORDER_RGBA |
1479                    RADEON_VTX_FMT_RADEON_MODE |
1480                    (count << RADEON_NUM_VERTICES_SHIFT) );
1481
1482         do {
1483                 if ( i < nbox ) 
1484                         radeon_emit_clip_rect( dev_priv, 
1485                                                &sarea_priv->boxes[i] );
1486
1487                 radeon_cp_dispatch_indirect( dev, elt_buf,
1488                                              prim->start,
1489                                              prim->finish );
1490
1491                 i++;
1492         } while ( i < nbox );
1493
1494 }
1495
1496 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1497
1498 static int radeon_cp_dispatch_texture( DRMFILE filp,
1499                                        drm_device_t *dev,
1500                                        drm_radeon_texture_t *tex,
1501                                        drm_radeon_tex_image_t *image )
1502 {
1503         drm_radeon_private_t *dev_priv = dev->dev_private;
1504         drm_file_t *filp_priv;
1505         drm_buf_t *buf;
1506         u32 format;
1507         u32 *buffer;
1508         const u8 __user *data;
1509         int size, dwords, tex_width, blit_width, spitch;
1510         u32 height;
1511         int i;
1512         u32 texpitch, microtile;
1513         u32 offset;
1514         RING_LOCALS;
1515
1516         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1517
1518         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1519                 DRM_ERROR( "Invalid destination offset\n" );
1520                 return DRM_ERR( EINVAL );
1521         }
1522
1523         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1524
1525         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1526          * up with the texture data from the host data blit, otherwise
1527          * part of the texture image may be corrupted.
1528          */
1529         BEGIN_RING( 4 );
1530         RADEON_FLUSH_CACHE();
1531         RADEON_WAIT_UNTIL_IDLE();
1532         ADVANCE_RING();
1533
1534         /* The compiler won't optimize away a division by a variable,
1535          * even if the only legal values are powers of two.  Thus, we'll
1536          * use a shift instead.
1537          */
1538         switch ( tex->format ) {
1539         case RADEON_TXFORMAT_ARGB8888:
1540         case RADEON_TXFORMAT_RGBA8888:
1541                 format = RADEON_COLOR_FORMAT_ARGB8888;
1542                 tex_width = tex->width * 4;
1543                 blit_width = image->width * 4;
1544                 break;
1545         case RADEON_TXFORMAT_AI88:
1546         case RADEON_TXFORMAT_ARGB1555:
1547         case RADEON_TXFORMAT_RGB565:
1548         case RADEON_TXFORMAT_ARGB4444:
1549         case RADEON_TXFORMAT_VYUY422:
1550         case RADEON_TXFORMAT_YVYU422:
1551                 format = RADEON_COLOR_FORMAT_RGB565;
1552                 tex_width = tex->width * 2;
1553                 blit_width = image->width * 2;
1554                 break;
1555         case RADEON_TXFORMAT_I8:
1556         case RADEON_TXFORMAT_RGB332:
1557                 format = RADEON_COLOR_FORMAT_CI8;
1558                 tex_width = tex->width * 1;
1559                 blit_width = image->width * 1;
1560                 break;
1561         default:
1562                 DRM_ERROR( "invalid texture format %d\n", tex->format );
1563                 return DRM_ERR(EINVAL);
1564         }
1565         spitch = blit_width >> 6;
1566         if (spitch == 0 && image->height > 1)
1567                 return DRM_ERR(EINVAL);
1568
1569         texpitch = tex->pitch;
1570         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1571                 microtile = 1;
1572                 if (tex_width < 64) {
1573                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1574                         /* we got tiled coordinates, untile them */
1575                         image->x *= 2;
1576                 }
1577         }
1578         else microtile = 0;
1579
1580         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1581
1582         do {
1583                 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1584                            tex->offset >> 10, tex->pitch, tex->format,
1585                            image->x, image->y, image->width, image->height );
1586
1587                 /* Make a copy of some parameters in case we have to
1588                  * update them for a multi-pass texture blit.
1589                  */
1590                 height = image->height;
1591                 data = (const u8 __user *)image->data;
1592                 
1593                 size = height * blit_width;
1594
1595                 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1596                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1597                         size = height * blit_width;
1598                 } else if ( size < 4 && size > 0 ) {
1599                         size = 4;
1600                 } else if ( size == 0 ) {
1601                         return 0;
1602                 }
1603
1604                 buf = radeon_freelist_get( dev );
1605                 if ( 0 && !buf ) {
1606                         radeon_do_cp_idle( dev_priv );
1607                         buf = radeon_freelist_get( dev );
1608                 }
1609                 if ( !buf ) {
1610                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1611                         if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1612                                 return DRM_ERR(EFAULT);
1613                         return DRM_ERR(EAGAIN);
1614                 }
1615
1616
1617                 /* Dispatch the indirect buffer.
1618                  */
1619                 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1620                 dwords = size / 4;
1621
1622                 if (microtile) {
1623                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1624                            however, we cannot use blitter directly for texture width < 64 bytes,
1625                            since minimum tex pitch is 64 bytes and we need this to match
1626                            the texture width, otherwise the blitter will tile it wrong.
1627                            Thus, tiling manually in this case. Additionally, need to special
1628                            case tex height = 1, since our actual image will have height 2
1629                            and we need to ensure we don't read beyond the texture size
1630                            from user space. */
1631                         if (tex->height == 1) {
1632                                 if (tex_width >= 64 || tex_width <= 16) {
1633                                         if (DRM_COPY_FROM_USER(buffer, data,
1634                                                                tex_width * sizeof(u32))) {
1635                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1636                                                           tex_width);
1637                                                 return DRM_ERR(EFAULT);
1638                                         }
1639                                 } else if (tex_width == 32) {
1640                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1641                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1642                                                           tex_width);
1643                                                 return DRM_ERR(EFAULT);
1644                                         }
1645                                         if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1646                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1647                                                           tex_width);
1648                                                 return DRM_ERR(EFAULT);
1649                                         }
1650                                 }
1651                         } else if (tex_width >= 64 || tex_width == 16) {
1652                                 if (DRM_COPY_FROM_USER(buffer, data,
1653                                                        dwords * sizeof(u32))) {
1654                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1655                                                   dwords);
1656                                         return DRM_ERR(EFAULT);
1657                                 }
1658                         } else if (tex_width < 16) {
1659                                 for (i = 0; i < tex->height; i++) {
1660                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1661                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1662                                                           tex_width);
1663                                                 return DRM_ERR(EFAULT);
1664                                         }
1665                                         buffer += 4;
1666                                         data += tex_width;
1667                                 }
1668                         } else if (tex_width == 32) {
1669                                 /* TODO: make sure this works when not fitting in one buffer
1670                                    (i.e. 32bytes x 2048...) */
1671                                 for (i = 0; i < tex->height; i += 2) {
1672                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1673                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1674                                                           tex_width);
1675                                                 return DRM_ERR(EFAULT);
1676                                         }
1677                                         data += 16;
1678                                         if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1679                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1680                                                           tex_width);
1681                                                 return DRM_ERR(EFAULT);
1682                                         }
1683                                         data += 16;
1684                                         if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1685                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1686                                                           tex_width);
1687                                                 return DRM_ERR(EFAULT);
1688                                         }
1689                                         data += 16;
1690                                         if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1691                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1692                                                           tex_width);
1693                                                 return DRM_ERR(EFAULT);
1694                                         }
1695                                         data += 16;
1696                                         buffer += 16;
1697                                 }
1698                         }
1699                 }
1700                 else {
1701                         if (tex_width >= 32) {
1702                                 /* Texture image width is larger than the minimum, so we
1703                                  * can upload it directly.
1704                                  */
1705                                 if (DRM_COPY_FROM_USER(buffer, data,
1706                                                        dwords * sizeof(u32))) {
1707                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1708                                                   dwords);
1709                                         return DRM_ERR(EFAULT);
1710                                 }
1711                         } else {
1712                                 /* Texture image width is less than the minimum, so we
1713                                  * need to pad out each image scanline to the minimum
1714                                  * width.
1715                                  */
1716                                 for (i = 0 ; i < tex->height ; i++) {
1717                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width )) {
1718                                                 DRM_ERROR("EFAULT on pad, %d bytes\n", tex_width);
1719                                                 return DRM_ERR(EFAULT);
1720                                         }
1721                                         buffer += 8;
1722                                         data += tex_width;
1723                                 }
1724                         }
1725                 }
1726
1727                 buf->filp = filp;
1728                 buf->used = size;
1729                 offset = dev_priv->gart_buffers_offset + buf->offset;
1730                 BEGIN_RING(9);
1731                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1732                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1733                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1734                          RADEON_GMC_BRUSH_NONE |
1735                          (format << 8) |
1736                          RADEON_GMC_SRC_DATATYPE_COLOR |
1737                          RADEON_ROP3_S |
1738                          RADEON_DP_SRC_SOURCE_MEMORY |
1739                          RADEON_GMC_CLR_CMP_CNTL_DIS |
1740                          RADEON_GMC_WR_MSK_DIS );
1741                 OUT_RING((spitch << 22) | (offset >> 10));
1742                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1743                 OUT_RING(0);
1744                 OUT_RING((image->x << 16) | image->y);
1745                 OUT_RING((image->width << 16) | height);
1746                 RADEON_WAIT_UNTIL_2D_IDLE();
1747                 ADVANCE_RING();
1748
1749                 radeon_cp_discard_buffer(dev, buf);
1750
1751                 /* Update the input parameters for next time */
1752                 image->y += height;
1753                 image->height -= height;
1754                 image->data = (const u8 __user *)image->data + size;
1755         } while (image->height > 0);
1756
1757         /* Flush the pixel cache after the blit completes.  This ensures
1758          * the texture data is written out to memory before rendering
1759          * continues.
1760          */
1761         BEGIN_RING( 4 );
1762         RADEON_FLUSH_CACHE();
1763         RADEON_WAIT_UNTIL_2D_IDLE();
1764         ADVANCE_RING();
1765         return 0;
1766 }
1767
1768
1769 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1770 {
1771         drm_radeon_private_t *dev_priv = dev->dev_private;
1772         int i;
1773         RING_LOCALS;
1774         DRM_DEBUG( "\n" );
1775
1776         BEGIN_RING( 35 );
1777
1778         OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1779         OUT_RING( 0x00000000 );
1780
1781         OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1782         for ( i = 0 ; i < 32 ; i++ ) {
1783                 OUT_RING( stipple[i] );
1784         }
1785
1786         ADVANCE_RING();
1787 }
1788
1789 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1790 {
1791         if (!dev_priv->mmio)
1792                 return;
1793
1794         radeon_do_cp_idle(dev_priv);
1795
1796         RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1797                 dev_priv->surfaces[surf_index].flags);
1798         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1799                 dev_priv->surfaces[surf_index].lower);
1800         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1801                 dev_priv->surfaces[surf_index].upper);
1802 }
1803
1804
1805 /* Allocates a virtual surface
1806  * doesn't always allocate a real surface, will stretch an existing 
1807  * surface when possible.
1808  *
1809  * Note that refcount can be at most 2, since during a free refcount=3
1810  * might mean we have to allocate a new surface which might not always
1811  * be available.
1812  * For example : we allocate three contigous surfaces ABC. If B is 
1813  * freed, we suddenly need two surfaces to store A and C, which might
1814  * not always be available.
1815  */
1816 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1817 {
1818         struct radeon_virt_surface *s;
1819         int i;
1820         int virt_surface_index;
1821         uint32_t new_upper, new_lower;
1822
1823         new_lower = new->address;
1824         new_upper = new_lower + new->size - 1;
1825
1826         /* sanity check */
1827         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1828                 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1829                 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1830                 return -1;
1831
1832         /* make sure there is no overlap with existing surfaces */
1833         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1834                 if ((dev_priv->surfaces[i].refcount != 0) &&
1835                 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1836                         (new_lower < dev_priv->surfaces[i].upper) ) ||
1837                  ( (new_lower < dev_priv->surfaces[i].lower) &&
1838                         (new_upper > dev_priv->surfaces[i].lower) )) ){
1839                 return -1;}
1840         }
1841
1842         /* find a virtual surface */
1843         for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1844                 if (dev_priv->virt_surfaces[i].filp == 0)
1845                         break;
1846         if (i == 2*RADEON_MAX_SURFACES) {
1847                 return -1;}
1848         virt_surface_index = i;
1849
1850         /* try to reuse an existing surface */
1851         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1852                 /* extend before */
1853                 if ((dev_priv->surfaces[i].refcount == 1) &&
1854                   (new->flags == dev_priv->surfaces[i].flags) &&
1855                   (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1856                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1857                         s->surface_index = i;
1858                         s->lower = new_lower;
1859                         s->upper = new_upper;
1860                         s->flags = new->flags;
1861                         s->filp = filp;
1862                         dev_priv->surfaces[i].refcount++;
1863                         dev_priv->surfaces[i].lower = s->lower;
1864                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1865                         return virt_surface_index;
1866                 }
1867
1868                 /* extend after */
1869                 if ((dev_priv->surfaces[i].refcount == 1) &&
1870                   (new->flags == dev_priv->surfaces[i].flags) &&
1871                   (new_lower == dev_priv->surfaces[i].upper + 1)) {
1872                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1873                         s->surface_index = i;
1874                         s->lower = new_lower;
1875                         s->upper = new_upper;
1876                         s->flags = new->flags;
1877                         s->filp = filp;
1878                         dev_priv->surfaces[i].refcount++;
1879                         dev_priv->surfaces[i].upper = s->upper;
1880                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1881                         return virt_surface_index;
1882                 }
1883         }
1884
1885         /* okay, we need a new one */
1886         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1887                 if (dev_priv->surfaces[i].refcount == 0) {
1888                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1889                         s->surface_index = i;
1890                         s->lower = new_lower;
1891                         s->upper = new_upper;
1892                         s->flags = new->flags;
1893                         s->filp = filp;
1894                         dev_priv->surfaces[i].refcount = 1;
1895                         dev_priv->surfaces[i].lower = s->lower;
1896                         dev_priv->surfaces[i].upper = s->upper;
1897                         dev_priv->surfaces[i].flags = s->flags;
1898                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1899                         return virt_surface_index;
1900                 }
1901         }
1902
1903         /* we didn't find anything */
1904         return -1;
1905 }
1906
1907 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1908 {
1909         struct radeon_virt_surface *s;
1910         int i;
1911         /* find the virtual surface */
1912         for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1913                 s = &(dev_priv->virt_surfaces[i]);
1914                 if (s->filp) {
1915                         if ((lower == s->lower) && (filp == s->filp)) {
1916                                 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1917                                         dev_priv->surfaces[s->surface_index].lower = s->upper;
1918
1919                                 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1920                                         dev_priv->surfaces[s->surface_index].upper = s->lower;
1921
1922                                 dev_priv->surfaces[s->surface_index].refcount--;
1923                                 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1924                                         dev_priv->surfaces[s->surface_index].flags = 0;
1925                                 s->filp = NULL;
1926                                 radeon_apply_surface_regs(s->surface_index, dev_priv);
1927                                 return 0;
1928                         }
1929                 }
1930         }
1931         return 1;
1932 }
1933
1934 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1935 {
1936         int i;
1937         for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1938         {
1939                 if (dev_priv->virt_surfaces[i].filp == filp)
1940                         free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1941         }
1942 }
1943
1944 /* ================================================================
1945  * IOCTL functions
1946  */
1947 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1948 {
1949         DRM_DEVICE;
1950         drm_radeon_private_t *dev_priv = dev->dev_private;
1951         drm_radeon_surface_alloc_t alloc;
1952
1953         if (!dev_priv) {
1954                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1955                 return DRM_ERR(EINVAL);
1956         }
1957
1958         DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1959                                   sizeof(alloc));
1960
1961         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1962                 return DRM_ERR(EINVAL);
1963         else
1964                 return 0;
1965 }
1966
1967 static int radeon_surface_free(DRM_IOCTL_ARGS)
1968 {
1969         DRM_DEVICE;
1970         drm_radeon_private_t *dev_priv = dev->dev_private;
1971         drm_radeon_surface_free_t memfree;
1972
1973         if (!dev_priv) {
1974                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1975                 return DRM_ERR(EINVAL);
1976         }
1977
1978         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1979                                   sizeof(memfree) );
1980
1981         if (free_surface(filp, dev_priv, memfree.address))
1982                 return DRM_ERR(EINVAL);
1983         else
1984                 return 0;
1985 }
1986
1987 static int radeon_cp_clear( DRM_IOCTL_ARGS )
1988 {
1989         DRM_DEVICE;
1990         drm_radeon_private_t *dev_priv = dev->dev_private;
1991         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1992         drm_radeon_clear_t clear;
1993         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1994         DRM_DEBUG( "\n" );
1995
1996         LOCK_TEST_WITH_RETURN( dev, filp );
1997
1998         DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
1999                              sizeof(clear) );
2000
2001         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2002
2003         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2004                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2005
2006         if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
2007                              sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
2008                 return DRM_ERR(EFAULT);
2009
2010         radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
2011
2012         COMMIT_RING();
2013         return 0;
2014 }
2015
2016
2017 /* Not sure why this isn't set all the time:
2018  */ 
2019 static int radeon_do_init_pageflip( drm_device_t *dev )
2020 {
2021         drm_radeon_private_t *dev_priv = dev->dev_private;
2022         RING_LOCALS;
2023
2024         DRM_DEBUG( "\n" );
2025
2026         BEGIN_RING( 6 );
2027         RADEON_WAIT_UNTIL_3D_IDLE();
2028         OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
2029         OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2030         OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
2031         OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2032         ADVANCE_RING();
2033
2034         dev_priv->page_flipping = 1;
2035         dev_priv->current_page = 0;
2036         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2037
2038         return 0;
2039 }
2040
2041 /* Called whenever a client dies, from drm_release.
2042  * NOTE:  Lock isn't necessarily held when this is called!
2043  */
2044 static int radeon_do_cleanup_pageflip( drm_device_t *dev )
2045 {
2046         drm_radeon_private_t *dev_priv = dev->dev_private;
2047         DRM_DEBUG( "\n" );
2048
2049         if (dev_priv->current_page != 0)
2050                 radeon_cp_dispatch_flip( dev );
2051
2052         dev_priv->page_flipping = 0;
2053         return 0;
2054 }
2055
2056 /* Swapping and flipping are different operations, need different ioctls.
2057  * They can & should be intermixed to support multiple 3d windows.  
2058  */
2059 static int radeon_cp_flip( DRM_IOCTL_ARGS )
2060 {
2061         DRM_DEVICE;
2062         drm_radeon_private_t *dev_priv = dev->dev_private;
2063         DRM_DEBUG( "\n" );
2064
2065         LOCK_TEST_WITH_RETURN( dev, filp );
2066
2067         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2068
2069         if (!dev_priv->page_flipping) 
2070                 radeon_do_init_pageflip( dev );
2071                 
2072         radeon_cp_dispatch_flip( dev );
2073
2074         COMMIT_RING();
2075         return 0;
2076 }
2077
2078 static int radeon_cp_swap( DRM_IOCTL_ARGS )
2079 {
2080         DRM_DEVICE;
2081         drm_radeon_private_t *dev_priv = dev->dev_private;
2082         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2083         DRM_DEBUG( "\n" );
2084
2085         LOCK_TEST_WITH_RETURN( dev, filp );
2086
2087         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2088
2089         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2090                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2091
2092         radeon_cp_dispatch_swap( dev );
2093         dev_priv->sarea_priv->ctx_owner = 0;
2094
2095         COMMIT_RING();
2096         return 0;
2097 }
2098
2099 static int radeon_cp_vertex( DRM_IOCTL_ARGS )
2100 {
2101         DRM_DEVICE;
2102         drm_radeon_private_t *dev_priv = dev->dev_private;
2103         drm_file_t *filp_priv;
2104         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105         drm_device_dma_t *dma = dev->dma;
2106         drm_buf_t *buf;
2107         drm_radeon_vertex_t vertex;
2108         drm_radeon_tcl_prim_t prim;
2109
2110         LOCK_TEST_WITH_RETURN( dev, filp );
2111
2112         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2113
2114         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2115                              sizeof(vertex) );
2116
2117         DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2118                    DRM_CURRENTPID,
2119                    vertex.idx, vertex.count, vertex.discard );
2120
2121         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2122                 DRM_ERROR( "buffer index %d (of %d max)\n",
2123                            vertex.idx, dma->buf_count - 1 );
2124                 return DRM_ERR(EINVAL);
2125         }
2126         if ( vertex.prim < 0 ||
2127              vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2128                 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2129                 return DRM_ERR(EINVAL);
2130         }
2131
2132         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2133         VB_AGE_TEST_WITH_RETURN( dev_priv );
2134
2135         buf = dma->buflist[vertex.idx];
2136
2137         if ( buf->filp != filp ) {
2138                 DRM_ERROR( "process %d using buffer owned by %p\n",
2139                            DRM_CURRENTPID, buf->filp );
2140                 return DRM_ERR(EINVAL);
2141         }
2142         if ( buf->pending ) {
2143                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2144                 return DRM_ERR(EINVAL);
2145         }
2146
2147         /* Build up a prim_t record:
2148          */
2149         if (vertex.count) {
2150                 buf->used = vertex.count; /* not used? */
2151
2152                 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2153                         if ( radeon_emit_state( dev_priv, filp_priv,
2154                                                 &sarea_priv->context_state,
2155                                                 sarea_priv->tex_state,
2156                                                 sarea_priv->dirty ) ) {
2157                                 DRM_ERROR( "radeon_emit_state failed\n" );
2158                                 return DRM_ERR( EINVAL );
2159                         }
2160
2161                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2162                                                RADEON_UPLOAD_TEX1IMAGES |
2163                                                RADEON_UPLOAD_TEX2IMAGES |
2164                                                RADEON_REQUIRE_QUIESCENCE);
2165                 }
2166
2167                 prim.start = 0;
2168                 prim.finish = vertex.count; /* unused */
2169                 prim.prim = vertex.prim;
2170                 prim.numverts = vertex.count;
2171                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2172                 
2173                 radeon_cp_dispatch_vertex( dev, buf, &prim );
2174         }
2175
2176         if (vertex.discard) {
2177                 radeon_cp_discard_buffer( dev, buf );
2178         }
2179
2180         COMMIT_RING();
2181         return 0;
2182 }
2183
2184 static int radeon_cp_indices( DRM_IOCTL_ARGS )
2185 {
2186         DRM_DEVICE;
2187         drm_radeon_private_t *dev_priv = dev->dev_private;
2188         drm_file_t *filp_priv;
2189         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2190         drm_device_dma_t *dma = dev->dma;
2191         drm_buf_t *buf;
2192         drm_radeon_indices_t elts;
2193         drm_radeon_tcl_prim_t prim;
2194         int count;
2195
2196         LOCK_TEST_WITH_RETURN( dev, filp );
2197
2198         if ( !dev_priv ) {
2199                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2200                 return DRM_ERR(EINVAL);
2201         }
2202
2203         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2204
2205         DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2206                              sizeof(elts) );
2207
2208         DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2209                    DRM_CURRENTPID,
2210                    elts.idx, elts.start, elts.end, elts.discard );
2211
2212         if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2213                 DRM_ERROR( "buffer index %d (of %d max)\n",
2214                            elts.idx, dma->buf_count - 1 );
2215                 return DRM_ERR(EINVAL);
2216         }
2217         if ( elts.prim < 0 ||
2218              elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2219                 DRM_ERROR( "buffer prim %d\n", elts.prim );
2220                 return DRM_ERR(EINVAL);
2221         }
2222
2223         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2224         VB_AGE_TEST_WITH_RETURN( dev_priv );
2225
2226         buf = dma->buflist[elts.idx];
2227
2228         if ( buf->filp != filp ) {
2229                 DRM_ERROR( "process %d using buffer owned by %p\n",
2230                            DRM_CURRENTPID, buf->filp );
2231                 return DRM_ERR(EINVAL);
2232         }
2233         if ( buf->pending ) {
2234                 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2235                 return DRM_ERR(EINVAL);
2236         }
2237
2238         count = (elts.end - elts.start) / sizeof(u16);
2239         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2240
2241         if ( elts.start & 0x7 ) {
2242                 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2243                 return DRM_ERR(EINVAL);
2244         }
2245         if ( elts.start < buf->used ) {
2246                 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2247                 return DRM_ERR(EINVAL);
2248         }
2249
2250         buf->used = elts.end;
2251
2252         if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2253                 if ( radeon_emit_state( dev_priv, filp_priv,
2254                                         &sarea_priv->context_state,
2255                                         sarea_priv->tex_state,
2256                                         sarea_priv->dirty ) ) {
2257                         DRM_ERROR( "radeon_emit_state failed\n" );
2258                         return DRM_ERR( EINVAL );
2259                 }
2260
2261                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2262                                        RADEON_UPLOAD_TEX1IMAGES |
2263                                        RADEON_UPLOAD_TEX2IMAGES |
2264                                        RADEON_REQUIRE_QUIESCENCE);
2265         }
2266
2267
2268         /* Build up a prim_t record:
2269          */
2270         prim.start = elts.start;
2271         prim.finish = elts.end; 
2272         prim.prim = elts.prim;
2273         prim.offset = 0;        /* offset from start of dma buffers */
2274         prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2275         prim.vc_format = dev_priv->sarea_priv->vc_format;
2276         
2277         radeon_cp_dispatch_indices( dev, buf, &prim );
2278         if (elts.discard) {
2279                 radeon_cp_discard_buffer( dev, buf );
2280         }
2281
2282         COMMIT_RING();
2283         return 0;
2284 }
2285
2286 static int radeon_cp_texture( DRM_IOCTL_ARGS )
2287 {
2288         DRM_DEVICE;
2289         drm_radeon_private_t *dev_priv = dev->dev_private;
2290         drm_radeon_texture_t tex;
2291         drm_radeon_tex_image_t image;
2292         int ret;
2293
2294         LOCK_TEST_WITH_RETURN( dev, filp );
2295
2296         DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2297
2298         if ( tex.image == NULL ) {
2299                 DRM_ERROR( "null texture image!\n" );
2300                 return DRM_ERR(EINVAL);
2301         }
2302
2303         if ( DRM_COPY_FROM_USER( &image,
2304                              (drm_radeon_tex_image_t __user *)tex.image,
2305                              sizeof(image) ) )
2306                 return DRM_ERR(EFAULT);
2307
2308         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2309         VB_AGE_TEST_WITH_RETURN( dev_priv );
2310
2311         ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2312
2313         COMMIT_RING();
2314         return ret;
2315 }
2316
2317 static int radeon_cp_stipple( DRM_IOCTL_ARGS )
2318 {
2319         DRM_DEVICE;
2320         drm_radeon_private_t *dev_priv = dev->dev_private;
2321         drm_radeon_stipple_t stipple;
2322         u32 mask[32];
2323
2324         LOCK_TEST_WITH_RETURN( dev, filp );
2325
2326         DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2327                              sizeof(stipple) );
2328
2329         if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2330                 return DRM_ERR(EFAULT);
2331
2332         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2333
2334         radeon_cp_dispatch_stipple( dev, mask );
2335
2336         COMMIT_RING();
2337         return 0;
2338 }
2339
2340 static int radeon_cp_indirect( DRM_IOCTL_ARGS )
2341 {
2342         DRM_DEVICE;
2343         drm_radeon_private_t *dev_priv = dev->dev_private;
2344         drm_device_dma_t *dma = dev->dma;
2345         drm_buf_t *buf;
2346         drm_radeon_indirect_t indirect;
2347         RING_LOCALS;
2348
2349         LOCK_TEST_WITH_RETURN( dev, filp );
2350
2351         if ( !dev_priv ) {
2352                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2353                 return DRM_ERR(EINVAL);
2354         }
2355
2356         DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2357                              sizeof(indirect) );
2358
2359         DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2360                    indirect.idx, indirect.start,
2361                    indirect.end, indirect.discard );
2362
2363         if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2364                 DRM_ERROR( "buffer index %d (of %d max)\n",
2365                            indirect.idx, dma->buf_count - 1 );
2366                 return DRM_ERR(EINVAL);
2367         }
2368
2369         buf = dma->buflist[indirect.idx];
2370
2371         if ( buf->filp != filp ) {
2372                 DRM_ERROR( "process %d using buffer owned by %p\n",
2373                            DRM_CURRENTPID, buf->filp );
2374                 return DRM_ERR(EINVAL);
2375         }
2376         if ( buf->pending ) {
2377                 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2378                 return DRM_ERR(EINVAL);
2379         }
2380
2381         if ( indirect.start < buf->used ) {
2382                 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2383                            indirect.start, buf->used );
2384                 return DRM_ERR(EINVAL);
2385         }
2386
2387         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2388         VB_AGE_TEST_WITH_RETURN( dev_priv );
2389
2390         buf->used = indirect.end;
2391
2392         /* Wait for the 3D stream to idle before the indirect buffer
2393          * containing 2D acceleration commands is processed.
2394          */
2395         BEGIN_RING( 2 );
2396
2397         RADEON_WAIT_UNTIL_3D_IDLE();
2398
2399         ADVANCE_RING();
2400
2401         /* Dispatch the indirect buffer full of commands from the
2402          * X server.  This is insecure and is thus only available to
2403          * privileged clients.
2404          */
2405         radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2406         if (indirect.discard) {
2407                 radeon_cp_discard_buffer( dev, buf );
2408         }
2409
2410
2411         COMMIT_RING();
2412         return 0;
2413 }
2414
2415 static int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2416 {
2417         DRM_DEVICE;
2418         drm_radeon_private_t *dev_priv = dev->dev_private;
2419         drm_file_t *filp_priv;
2420         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2421         drm_device_dma_t *dma = dev->dma;
2422         drm_buf_t *buf;
2423         drm_radeon_vertex2_t vertex;
2424         int i;
2425         unsigned char laststate;
2426
2427         LOCK_TEST_WITH_RETURN( dev, filp );
2428
2429         if ( !dev_priv ) {
2430                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2431                 return DRM_ERR(EINVAL);
2432         }
2433
2434         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2435
2436         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2437                              sizeof(vertex) );
2438
2439         DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2440                    DRM_CURRENTPID,
2441                    vertex.idx, vertex.discard );
2442
2443         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2444                 DRM_ERROR( "buffer index %d (of %d max)\n",
2445                            vertex.idx, dma->buf_count - 1 );
2446                 return DRM_ERR(EINVAL);
2447         }
2448
2449         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2450         VB_AGE_TEST_WITH_RETURN( dev_priv );
2451
2452         buf = dma->buflist[vertex.idx];
2453
2454         if ( buf->filp != filp ) {
2455                 DRM_ERROR( "process %d using buffer owned by %p\n",
2456                            DRM_CURRENTPID, buf->filp );
2457                 return DRM_ERR(EINVAL);
2458         }
2459
2460         if ( buf->pending ) {
2461                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2462                 return DRM_ERR(EINVAL);
2463         }
2464         
2465         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466                 return DRM_ERR(EINVAL);
2467
2468         for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2469                 drm_radeon_prim_t prim;
2470                 drm_radeon_tcl_prim_t tclprim;
2471                 
2472                 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2473                         return DRM_ERR(EFAULT);
2474                 
2475                 if ( prim.stateidx != laststate ) {
2476                         drm_radeon_state_t state;                              
2477                                 
2478                         if ( DRM_COPY_FROM_USER( &state, 
2479                                              &vertex.state[prim.stateidx], 
2480                                              sizeof(state) ) )
2481                                 return DRM_ERR(EFAULT);
2482
2483                         if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2484                                 DRM_ERROR( "radeon_emit_state2 failed\n" );
2485                                 return DRM_ERR( EINVAL );
2486                         }
2487
2488                         laststate = prim.stateidx;
2489                 }
2490
2491                 tclprim.start = prim.start;
2492                 tclprim.finish = prim.finish;
2493                 tclprim.prim = prim.prim;
2494                 tclprim.vc_format = prim.vc_format;
2495
2496                 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2497                         tclprim.offset = prim.numverts * 64;
2498                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2499
2500                         radeon_cp_dispatch_indices( dev, buf, &tclprim );
2501                 } else {
2502                         tclprim.numverts = prim.numverts;
2503                         tclprim.offset = 0; /* not used */
2504
2505                         radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2506                 }
2507                 
2508                 if (sarea_priv->nbox == 1)
2509                         sarea_priv->nbox = 0;
2510         }
2511
2512         if ( vertex.discard ) {
2513                 radeon_cp_discard_buffer( dev, buf );
2514         }
2515
2516         COMMIT_RING();
2517         return 0;
2518 }
2519
2520
2521 static int radeon_emit_packets( 
2522         drm_radeon_private_t *dev_priv,
2523         drm_file_t *filp_priv,
2524         drm_radeon_cmd_header_t header,
2525         drm_radeon_cmd_buffer_t *cmdbuf )
2526 {
2527         int id = (int)header.packet.packet_id;
2528         int sz, reg;
2529         int *data = (int *)cmdbuf->buf;
2530         RING_LOCALS;
2531    
2532         if (id >= RADEON_MAX_STATE_PACKETS)
2533                 return DRM_ERR(EINVAL);
2534
2535         sz = packet[id].len;
2536         reg = packet[id].start;
2537
2538         if (sz * sizeof(int) > cmdbuf->bufsz) {
2539                 DRM_ERROR( "Packet size provided larger than data provided\n" );
2540                 return DRM_ERR(EINVAL);
2541         }
2542
2543         if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2544                 DRM_ERROR( "Packet verification failed\n" );
2545                 return DRM_ERR( EINVAL );
2546         }
2547
2548         BEGIN_RING(sz+1);
2549         OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2550         OUT_RING_TABLE( data, sz );
2551         ADVANCE_RING();
2552
2553         cmdbuf->buf += sz * sizeof(int);
2554         cmdbuf->bufsz -= sz * sizeof(int);
2555         return 0;
2556 }
2557
2558 static __inline__ int radeon_emit_scalars( 
2559         drm_radeon_private_t *dev_priv,
2560         drm_radeon_cmd_header_t header,
2561         drm_radeon_cmd_buffer_t *cmdbuf )
2562 {
2563         int sz = header.scalars.count;
2564         int start = header.scalars.offset;
2565         int stride = header.scalars.stride;
2566         RING_LOCALS;
2567
2568         BEGIN_RING( 3+sz );
2569         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2570         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2571         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2572         OUT_RING_TABLE( cmdbuf->buf, sz );
2573         ADVANCE_RING();
2574         cmdbuf->buf += sz * sizeof(int);
2575         cmdbuf->bufsz -= sz * sizeof(int);
2576         return 0;
2577 }
2578
2579 /* God this is ugly
2580  */
2581 static __inline__ int radeon_emit_scalars2( 
2582         drm_radeon_private_t *dev_priv,
2583         drm_radeon_cmd_header_t header,
2584         drm_radeon_cmd_buffer_t *cmdbuf )
2585 {
2586         int sz = header.scalars.count;
2587         int start = ((unsigned int)header.scalars.offset) + 0x100;
2588         int stride = header.scalars.stride;
2589         RING_LOCALS;
2590
2591         BEGIN_RING( 3+sz );
2592         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2593         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2594         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2595         OUT_RING_TABLE( cmdbuf->buf, sz );
2596         ADVANCE_RING();
2597         cmdbuf->buf += sz * sizeof(int);
2598         cmdbuf->bufsz -= sz * sizeof(int);
2599         return 0;
2600 }
2601
2602 static __inline__ int radeon_emit_vectors( 
2603         drm_radeon_private_t *dev_priv,
2604         drm_radeon_cmd_header_t header,
2605         drm_radeon_cmd_buffer_t *cmdbuf )
2606 {
2607         int sz = header.vectors.count;
2608         int start = header.vectors.offset;
2609         int stride = header.vectors.stride;
2610         RING_LOCALS;
2611
2612         BEGIN_RING( 3+sz );
2613         OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2614         OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2615         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2616         OUT_RING_TABLE( cmdbuf->buf, sz );
2617         ADVANCE_RING();
2618
2619         cmdbuf->buf += sz * sizeof(int);
2620         cmdbuf->bufsz -= sz * sizeof(int);
2621         return 0;
2622 }
2623
2624
2625 static int radeon_emit_packet3( drm_device_t *dev,
2626                                 drm_file_t *filp_priv,
2627                                 drm_radeon_cmd_buffer_t *cmdbuf )
2628 {
2629         drm_radeon_private_t *dev_priv = dev->dev_private;
2630         unsigned int cmdsz;
2631         int ret;
2632         RING_LOCALS;
2633
2634         DRM_DEBUG("\n");
2635
2636         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2637                                                      cmdbuf, &cmdsz ) ) ) {
2638                 DRM_ERROR( "Packet verification failed\n" );
2639                 return ret;
2640         }
2641
2642         BEGIN_RING( cmdsz );
2643         OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2644         ADVANCE_RING();
2645
2646         cmdbuf->buf += cmdsz * 4;
2647         cmdbuf->bufsz -= cmdsz * 4;
2648         return 0;
2649 }
2650
2651
2652 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2653                                          drm_file_t *filp_priv,
2654                                          drm_radeon_cmd_buffer_t *cmdbuf,
2655                                          int orig_nbox )
2656 {
2657         drm_radeon_private_t *dev_priv = dev->dev_private;
2658         drm_clip_rect_t box;
2659         unsigned int cmdsz;
2660         int ret;
2661         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2662         int i = 0;
2663         RING_LOCALS;
2664
2665         DRM_DEBUG("\n");
2666
2667         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2668                                                      cmdbuf, &cmdsz ) ) ) {
2669                 DRM_ERROR( "Packet verification failed\n" );
2670                 return ret;
2671         }
2672
2673         if (!orig_nbox)
2674                 goto out;
2675
2676         do {
2677                 if ( i < cmdbuf->nbox ) {
2678                         if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2679                                 return DRM_ERR(EFAULT);
2680                         /* FIXME The second and subsequent times round
2681                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2682                          * calling emit_clip_rect(). This fixes a
2683                          * lockup on fast machines when sending
2684                          * several cliprects with a cmdbuf, as when
2685                          * waving a 2D window over a 3D
2686                          * window. Something in the commands from user
2687                          * space seems to hang the card when they're
2688                          * sent several times in a row. That would be
2689                          * the correct place to fix it but this works
2690                          * around it until I can figure that out - Tim
2691                          * Smith */
2692                         if ( i ) {
2693                                 BEGIN_RING( 2 );
2694                                 RADEON_WAIT_UNTIL_3D_IDLE();
2695                                 ADVANCE_RING();
2696                         }
2697                         radeon_emit_clip_rect( dev_priv, &box );
2698                 }
2699                 
2700                 BEGIN_RING( cmdsz );
2701                 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2702                 ADVANCE_RING();
2703
2704         } while ( ++i < cmdbuf->nbox );
2705         if (cmdbuf->nbox == 1)
2706                 cmdbuf->nbox = 0;
2707
2708  out:
2709         cmdbuf->buf += cmdsz * 4;
2710         cmdbuf->bufsz -= cmdsz * 4;
2711         return 0;
2712 }
2713
2714
2715 static int radeon_emit_wait( drm_device_t *dev, int flags )
2716 {
2717         drm_radeon_private_t *dev_priv = dev->dev_private;
2718         RING_LOCALS;
2719
2720         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2721         switch (flags) {
2722         case RADEON_WAIT_2D:
2723                 BEGIN_RING( 2 );
2724                 RADEON_WAIT_UNTIL_2D_IDLE(); 
2725                 ADVANCE_RING();
2726                 break;
2727         case RADEON_WAIT_3D:
2728                 BEGIN_RING( 2 );
2729                 RADEON_WAIT_UNTIL_3D_IDLE(); 
2730                 ADVANCE_RING();
2731                 break;
2732         case RADEON_WAIT_2D|RADEON_WAIT_3D:
2733                 BEGIN_RING( 2 );
2734                 RADEON_WAIT_UNTIL_IDLE(); 
2735                 ADVANCE_RING();
2736                 break;
2737         default:
2738                 return DRM_ERR(EINVAL);
2739         }
2740
2741         return 0;
2742 }
2743
2744 static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2745 {
2746         DRM_DEVICE;
2747         drm_radeon_private_t *dev_priv = dev->dev_private;
2748         drm_file_t *filp_priv;
2749         drm_device_dma_t *dma = dev->dma;
2750         drm_buf_t *buf = NULL;
2751         int idx;
2752         drm_radeon_cmd_buffer_t cmdbuf;
2753         drm_radeon_cmd_header_t header;
2754         int orig_nbox, orig_bufsz;
2755         char *kbuf=NULL;
2756
2757         LOCK_TEST_WITH_RETURN( dev, filp );
2758
2759         if ( !dev_priv ) {
2760                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2761                 return DRM_ERR(EINVAL);
2762         }
2763
2764         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2765
2766         DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2767                              sizeof(cmdbuf) );
2768
2769         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2770         VB_AGE_TEST_WITH_RETURN( dev_priv );
2771
2772         if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2773                 return DRM_ERR(EINVAL);
2774         }
2775
2776         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2777          * races between checking values and using those values in other code,
2778          * and simply to avoid a lot of function calls to copy in data.
2779          */
2780         orig_bufsz = cmdbuf.bufsz;
2781         if (orig_bufsz != 0) {
2782                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2783                 if (kbuf == NULL)
2784                         return DRM_ERR(ENOMEM);
2785                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2786                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2787                         return DRM_ERR(EFAULT);
2788                 }
2789                 cmdbuf.buf = kbuf;
2790         }
2791
2792         orig_nbox = cmdbuf.nbox;
2793
2794         if(dev_priv->microcode_version == UCODE_R300) {
2795                 int temp;
2796                 temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2797         
2798                 if (orig_bufsz != 0)
2799                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2800         
2801                 return temp;
2802         }
2803
2804         /* microcode_version != r300 */
2805         while ( cmdbuf.bufsz >= sizeof(header) ) {
2806
2807                 header.i = *(int *)cmdbuf.buf;
2808                 cmdbuf.buf += sizeof(header);
2809                 cmdbuf.bufsz -= sizeof(header);
2810
2811                 switch (header.header.cmd_type) {
2812                 case RADEON_CMD_PACKET: 
2813                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2814                         if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2815                                 DRM_ERROR("radeon_emit_packets failed\n");
2816                                 goto err;
2817                         }
2818                         break;
2819
2820                 case RADEON_CMD_SCALARS:
2821                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2822                         if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2823                                 DRM_ERROR("radeon_emit_scalars failed\n");
2824                                 goto err;
2825                         }
2826                         break;
2827
2828                 case RADEON_CMD_VECTORS:
2829                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2830                         if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2831                                 DRM_ERROR("radeon_emit_vectors failed\n");
2832                                 goto err;
2833                         }
2834                         break;
2835
2836                 case RADEON_CMD_DMA_DISCARD:
2837                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2838                         idx = header.dma.buf_idx;
2839                         if ( idx < 0 || idx >= dma->buf_count ) {
2840                                 DRM_ERROR( "buffer index %d (of %d max)\n",
2841                                            idx, dma->buf_count - 1 );
2842                                 goto err;
2843                         }
2844
2845                         buf = dma->buflist[idx];
2846                         if ( buf->filp != filp || buf->pending ) {
2847                                 DRM_ERROR( "bad buffer %p %p %d\n",
2848                                            buf->filp, filp, buf->pending);
2849                                 goto err;
2850                         }
2851
2852                         radeon_cp_discard_buffer( dev, buf );
2853                         break;
2854
2855                 case RADEON_CMD_PACKET3:
2856                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2857                         if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2858                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2859                                 goto err;
2860                         }
2861                         break;
2862
2863                 case RADEON_CMD_PACKET3_CLIP:
2864                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2865                         if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2866                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2867                                 goto err;
2868                         }
2869                         break;
2870
2871                 case RADEON_CMD_SCALARS2:
2872                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2873                         if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2874                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2875                                 goto err;
2876                         }
2877                         break;
2878
2879                 case RADEON_CMD_WAIT:
2880                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2881                         if (radeon_emit_wait( dev, header.wait.flags )) {
2882                                 DRM_ERROR("radeon_emit_wait failed\n");
2883                                 goto err;
2884                         }
2885                         break;
2886                 default:
2887                         DRM_ERROR("bad cmd_type %d at %p\n", 
2888                                   header.header.cmd_type,
2889                                   cmdbuf.buf - sizeof(header));
2890                         goto err;
2891                 }
2892         }
2893
2894         if (orig_bufsz != 0)
2895                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2896
2897         DRM_DEBUG("DONE\n");
2898         COMMIT_RING();
2899         return 0;
2900
2901 err:
2902         if (orig_bufsz != 0)
2903                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2904         return DRM_ERR(EINVAL);
2905 }
2906
2907
2908
2909 static int radeon_cp_getparam( DRM_IOCTL_ARGS )
2910 {
2911         DRM_DEVICE;
2912         drm_radeon_private_t *dev_priv = dev->dev_private;
2913         drm_radeon_getparam_t param;
2914         int value;
2915
2916         if ( !dev_priv ) {
2917                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2918                 return DRM_ERR(EINVAL);
2919         }
2920
2921         DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2922                              sizeof(param) );
2923
2924         DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2925
2926         switch( param.param ) {
2927         case RADEON_PARAM_GART_BUFFER_OFFSET:
2928                 value = dev_priv->gart_buffers_offset;
2929                 break;
2930         case RADEON_PARAM_LAST_FRAME:
2931                 dev_priv->stats.last_frame_reads++;
2932                 value = GET_SCRATCH( 0 );
2933                 break;
2934         case RADEON_PARAM_LAST_DISPATCH:
2935                 value = GET_SCRATCH( 1 );
2936                 break;
2937         case RADEON_PARAM_LAST_CLEAR:
2938                 dev_priv->stats.last_clear_reads++;
2939                 value = GET_SCRATCH( 2 );
2940                 break;
2941         case RADEON_PARAM_IRQ_NR:
2942                 value = dev->irq;
2943                 break;
2944         case RADEON_PARAM_GART_BASE:
2945                 value = dev_priv->gart_vm_start;
2946                 break;
2947         case RADEON_PARAM_REGISTER_HANDLE:
2948                 value = dev_priv->mmio_offset;
2949                 break;
2950         case RADEON_PARAM_STATUS_HANDLE:
2951                 value = dev_priv->ring_rptr_offset;
2952                 break;
2953 #if BITS_PER_LONG == 32
2954         /*
2955          * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2956          * pointer which can't fit into an int-sized variable.  According to
2957          * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2958          * not supporting it shouldn't be a problem.  If the same functionality
2959          * is needed on 64-bit platforms, a new ioctl() would have to be added,
2960          * so backwards-compatibility for the embedded platforms can be
2961          * maintained.  --davidm 4-Feb-2004.
2962          */
2963         case RADEON_PARAM_SAREA_HANDLE:
2964                 /* The lock is the first dword in the sarea. */
2965                 value = (long)dev->lock.hw_lock;
2966                 break;
2967 #endif
2968         case RADEON_PARAM_GART_TEX_HANDLE:
2969                 value = dev_priv->gart_textures_offset;
2970                 break;
2971         default:
2972                 return DRM_ERR(EINVAL);
2973         }
2974
2975         if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2976                 DRM_ERROR( "copy_to_user\n" );
2977                 return DRM_ERR(EFAULT);
2978         }
2979         
2980         return 0;
2981 }
2982
2983 static int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2984         DRM_DEVICE;
2985         drm_radeon_private_t *dev_priv = dev->dev_private;
2986         drm_file_t *filp_priv;
2987         drm_radeon_setparam_t sp;
2988         struct drm_radeon_driver_file_fields *radeon_priv;
2989
2990         if ( !dev_priv ) {
2991                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2992                 return DRM_ERR( EINVAL );
2993         }
2994
2995         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2996
2997         DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
2998                                   sizeof( sp ) );
2999
3000         switch( sp.param ) {
3001         case RADEON_SETPARAM_FB_LOCATION:
3002                 radeon_priv = filp_priv->driver_priv;
3003                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3004                 break;
3005         case RADEON_SETPARAM_SWITCH_TILING:
3006                 if (sp.value == 0) {
3007                         DRM_DEBUG( "color tiling disabled\n" );
3008                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3009                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3010                         dev_priv->sarea_priv->tiling_enabled = 0;
3011                 }
3012                 else if (sp.value == 1) {
3013                         DRM_DEBUG( "color tiling enabled\n" );
3014                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3015                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3016                         dev_priv->sarea_priv->tiling_enabled = 1;
3017                 }
3018                 break;  
3019         default:
3020                 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
3021                 return DRM_ERR( EINVAL );
3022         }
3023
3024         return 0;
3025 }
3026
3027 /* When a client dies:
3028  *    - Check for and clean up flipped page state
3029  *    - Free any alloced GART memory.
3030  *
3031  * DRM infrastructure takes care of reclaiming dma buffers.
3032  */
3033 void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
3034 {
3035         if ( dev->dev_private ) {                               
3036                 drm_radeon_private_t *dev_priv = dev->dev_private; 
3037                 if ( dev_priv->page_flipping ) {                
3038                         radeon_do_cleanup_pageflip( dev );      
3039                 }                                               
3040                 radeon_mem_release( filp, dev_priv->gart_heap ); 
3041                 radeon_mem_release( filp, dev_priv->fb_heap );  
3042                 radeon_surfaces_release(filp, dev_priv);
3043         }                               
3044 }
3045
3046 void radeon_driver_pretakedown(drm_device_t *dev)
3047 {
3048         radeon_do_release(dev);
3049 }
3050
3051 int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
3052 {
3053         drm_radeon_private_t *dev_priv = dev->dev_private;
3054         struct drm_radeon_driver_file_fields *radeon_priv;
3055         
3056         radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3057         
3058         if (!radeon_priv)
3059                 return -ENOMEM;
3060
3061         filp_priv->driver_priv = radeon_priv;
3062         if ( dev_priv )
3063                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3064         else
3065                 radeon_priv->radeon_fb_delta = 0;
3066         return 0;
3067 }
3068
3069
3070 void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
3071 {
3072          struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
3073          
3074          drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3075 }
3076
3077 drm_ioctl_desc_t radeon_ioctls[] = {
3078         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)]    = { radeon_cp_init,      1, 1 },
3079         [DRM_IOCTL_NR(DRM_RADEON_CP_START)]   = { radeon_cp_start,     1, 1 },
3080         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)]    = { radeon_cp_stop,      1, 1 },
3081         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)]   = { radeon_cp_reset,     1, 1 },
3082         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)]    = { radeon_cp_idle,      1, 0 },
3083         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)]  = { radeon_cp_resume,    1, 0 },
3084         [DRM_IOCTL_NR(DRM_RADEON_RESET)]      = { radeon_engine_reset, 1, 0 },
3085         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen,   1, 0 },
3086         [DRM_IOCTL_NR(DRM_RADEON_SWAP)]       = { radeon_cp_swap,      1, 0 },
3087         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)]      = { radeon_cp_clear,     1, 0 },
3088         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)]     = { radeon_cp_vertex,    1, 0 },
3089         [DRM_IOCTL_NR(DRM_RADEON_INDICES)]    = { radeon_cp_indices,   1, 0 },
3090         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)]    = { radeon_cp_texture,   1, 0 },
3091         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)]    = { radeon_cp_stipple,   1, 0 },
3092         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)]   = { radeon_cp_indirect,  1, 1 },
3093         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)]    = { radeon_cp_vertex2,   1, 0 },
3094         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)]     = { radeon_cp_cmdbuf,    1, 0 },
3095         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)]   = { radeon_cp_getparam,  1, 0 },
3096         [DRM_IOCTL_NR(DRM_RADEON_FLIP)]       = { radeon_cp_flip,      1, 0 },
3097         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)]      = { radeon_mem_alloc,    1, 0 },
3098         [DRM_IOCTL_NR(DRM_RADEON_FREE)]       = { radeon_mem_free,     1, 0 },
3099         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)]  = { radeon_mem_init_heap,1, 1 },
3100         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)]   = { radeon_irq_emit,     1, 0 },
3101         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)]   = { radeon_irq_wait,     1, 0 },
3102         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)]   = { radeon_cp_setparam,  1, 0 },
3103         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
3104         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)]  = { radeon_surface_free, 1, 0 }
3105 };
3106
3107 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);