Merge branch 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...
[pandora-kernel.git] / drivers / gpu / drm / savage / savage_state.c
1 /* savage_state.c -- State and drawing support for Savage
2  *
3  * Copyright 2004  Felix Kuehling
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sub license,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #include "drmP.h"
26 #include "savage_drm.h"
27 #include "savage_drv.h"
28
29 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
30                                const struct drm_clip_rect * pbox)
31 {
32         uint32_t scstart = dev_priv->state.s3d.new_scstart;
33         uint32_t scend = dev_priv->state.s3d.new_scend;
34         scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
35             ((uint32_t) pbox->x1 & 0x000007ff) |
36             (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
37         scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
38             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
39             ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
40         if (scstart != dev_priv->state.s3d.scstart ||
41             scend != dev_priv->state.s3d.scend) {
42                 DMA_LOCALS;
43                 BEGIN_DMA(4);
44                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
45                 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
46                 DMA_WRITE(scstart);
47                 DMA_WRITE(scend);
48                 dev_priv->state.s3d.scstart = scstart;
49                 dev_priv->state.s3d.scend = scend;
50                 dev_priv->waiting = 1;
51                 DMA_COMMIT();
52         }
53 }
54
55 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
56                               const struct drm_clip_rect * pbox)
57 {
58         uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
59         uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
60         drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
61             ((uint32_t) pbox->x1 & 0x000007ff) |
62             (((uint32_t) pbox->y1 << 12) & 0x00fff000);
63         drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
64             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
65             ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
66         if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
67             drawctrl1 != dev_priv->state.s4.drawctrl1) {
68                 DMA_LOCALS;
69                 BEGIN_DMA(4);
70                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
71                 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
72                 DMA_WRITE(drawctrl0);
73                 DMA_WRITE(drawctrl1);
74                 dev_priv->state.s4.drawctrl0 = drawctrl0;
75                 dev_priv->state.s4.drawctrl1 = drawctrl1;
76                 dev_priv->waiting = 1;
77                 DMA_COMMIT();
78         }
79 }
80
81 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
82                                  uint32_t addr)
83 {
84         if ((addr & 6) != 2) {  /* reserved bits */
85                 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
86                 return -EINVAL;
87         }
88         if (!(addr & 1)) {      /* local */
89                 addr &= ~7;
90                 if (addr < dev_priv->texture_offset ||
91                     addr >= dev_priv->texture_offset + dev_priv->texture_size) {
92                         DRM_ERROR
93                             ("bad texAddr%d %08x (local addr out of range)\n",
94                              unit, addr);
95                         return -EINVAL;
96                 }
97         } else {                /* AGP */
98                 if (!dev_priv->agp_textures) {
99                         DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
100                                   unit, addr);
101                         return -EINVAL;
102                 }
103                 addr &= ~7;
104                 if (addr < dev_priv->agp_textures->offset ||
105                     addr >= (dev_priv->agp_textures->offset +
106                              dev_priv->agp_textures->size)) {
107                         DRM_ERROR
108                             ("bad texAddr%d %08x (AGP addr out of range)\n",
109                              unit, addr);
110                         return -EINVAL;
111                 }
112         }
113         return 0;
114 }
115
116 #define SAVE_STATE(reg,where)                   \
117         if(start <= reg && start+count > reg)   \
118                 dev_priv->state.where = regs[reg - start]
119 #define SAVE_STATE_MASK(reg,where,mask) do {                    \
120         if(start <= reg && start+count > reg) {                 \
121                 uint32_t tmp;                                   \
122                 tmp = regs[reg - start];                        \
123                 dev_priv->state.where = (tmp & (mask)) |        \
124                         (dev_priv->state.where & ~(mask));      \
125         }                                                       \
126 } while (0)
127
128 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
129                                    unsigned int start, unsigned int count,
130                                    const uint32_t *regs)
131 {
132         if (start < SAVAGE_TEXPALADDR_S3D ||
133             start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
134                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
135                           start, start + count - 1);
136                 return -EINVAL;
137         }
138
139         SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
140                         ~SAVAGE_SCISSOR_MASK_S3D);
141         SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
142                         ~SAVAGE_SCISSOR_MASK_S3D);
143
144         /* if any texture regs were changed ... */
145         if (start <= SAVAGE_TEXCTRL_S3D &&
146             start + count > SAVAGE_TEXPALADDR_S3D) {
147                 /* ... check texture state */
148                 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
149                 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
150                 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
151                         return savage_verify_texaddr(dev_priv, 0,
152                                                 dev_priv->state.s3d.texaddr);
153         }
154
155         return 0;
156 }
157
158 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
159                                   unsigned int start, unsigned int count,
160                                   const uint32_t *regs)
161 {
162         int ret = 0;
163
164         if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
165             start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
166                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
167                           start, start + count - 1);
168                 return -EINVAL;
169         }
170
171         SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
172                         ~SAVAGE_SCISSOR_MASK_S4);
173         SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
174                         ~SAVAGE_SCISSOR_MASK_S4);
175
176         /* if any texture regs were changed ... */
177         if (start <= SAVAGE_TEXDESCR_S4 &&
178             start + count > SAVAGE_TEXPALADDR_S4) {
179                 /* ... check texture state */
180                 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
181                 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
182                 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
183                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
184                         ret |= savage_verify_texaddr(dev_priv, 0,
185                                                 dev_priv->state.s4.texaddr0);
186                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
187                         ret |= savage_verify_texaddr(dev_priv, 1,
188                                                 dev_priv->state.s4.texaddr1);
189         }
190
191         return ret;
192 }
193
194 #undef SAVE_STATE
195 #undef SAVE_STATE_MASK
196
197 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
198                                  const drm_savage_cmd_header_t * cmd_header,
199                                  const uint32_t *regs)
200 {
201         unsigned int count = cmd_header->state.count;
202         unsigned int start = cmd_header->state.start;
203         unsigned int count2 = 0;
204         unsigned int bci_size;
205         int ret;
206         DMA_LOCALS;
207
208         if (!count)
209                 return 0;
210
211         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
212                 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
213                 if (ret != 0)
214                         return ret;
215                 /* scissor regs are emitted in savage_dispatch_draw */
216                 if (start < SAVAGE_SCSTART_S3D) {
217                         if (start + count > SAVAGE_SCEND_S3D + 1)
218                                 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
219                         if (start + count > SAVAGE_SCSTART_S3D)
220                                 count = SAVAGE_SCSTART_S3D - start;
221                 } else if (start <= SAVAGE_SCEND_S3D) {
222                         if (start + count > SAVAGE_SCEND_S3D + 1) {
223                                 count -= SAVAGE_SCEND_S3D + 1 - start;
224                                 start = SAVAGE_SCEND_S3D + 1;
225                         } else
226                                 return 0;
227                 }
228         } else {
229                 ret = savage_verify_state_s4(dev_priv, start, count, regs);
230                 if (ret != 0)
231                         return ret;
232                 /* scissor regs are emitted in savage_dispatch_draw */
233                 if (start < SAVAGE_DRAWCTRL0_S4) {
234                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
235                                 count2 = count -
236                                          (SAVAGE_DRAWCTRL1_S4 + 1 - start);
237                         if (start + count > SAVAGE_DRAWCTRL0_S4)
238                                 count = SAVAGE_DRAWCTRL0_S4 - start;
239                 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
240                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
241                                 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
242                                 start = SAVAGE_DRAWCTRL1_S4 + 1;
243                         } else
244                                 return 0;
245                 }
246         }
247
248         bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
249
250         if (cmd_header->state.global) {
251                 BEGIN_DMA(bci_size + 1);
252                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
253                 dev_priv->waiting = 1;
254         } else {
255                 BEGIN_DMA(bci_size);
256         }
257
258         do {
259                 while (count > 0) {
260                         unsigned int n = count < 255 ? count : 255;
261                         DMA_SET_REGISTERS(start, n);
262                         DMA_COPY(regs, n);
263                         count -= n;
264                         start += n;
265                         regs += n;
266                 }
267                 start += 2;
268                 regs += 2;
269                 count = count2;
270                 count2 = 0;
271         } while (count);
272
273         DMA_COMMIT();
274
275         return 0;
276 }
277
278 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
279                                     const drm_savage_cmd_header_t * cmd_header,
280                                     const struct drm_buf * dmabuf)
281 {
282         unsigned char reorder = 0;
283         unsigned int prim = cmd_header->prim.prim;
284         unsigned int skip = cmd_header->prim.skip;
285         unsigned int n = cmd_header->prim.count;
286         unsigned int start = cmd_header->prim.start;
287         unsigned int i;
288         BCI_LOCALS;
289
290         if (!dmabuf) {
291                 DRM_ERROR("called without dma buffers!\n");
292                 return -EINVAL;
293         }
294
295         if (!n)
296                 return 0;
297
298         switch (prim) {
299         case SAVAGE_PRIM_TRILIST_201:
300                 reorder = 1;
301                 prim = SAVAGE_PRIM_TRILIST;
302         case SAVAGE_PRIM_TRILIST:
303                 if (n % 3 != 0) {
304                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
305                                   n);
306                         return -EINVAL;
307                 }
308                 break;
309         case SAVAGE_PRIM_TRISTRIP:
310         case SAVAGE_PRIM_TRIFAN:
311                 if (n < 3) {
312                         DRM_ERROR
313                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
314                              n);
315                         return -EINVAL;
316                 }
317                 break;
318         default:
319                 DRM_ERROR("invalid primitive type %u\n", prim);
320                 return -EINVAL;
321         }
322
323         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
324                 if (skip != 0) {
325                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
326                         return -EINVAL;
327                 }
328         } else {
329                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
330                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
331                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
332                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
333                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
334                         return -EINVAL;
335                 }
336                 if (reorder) {
337                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
338                         return -EINVAL;
339                 }
340         }
341
342         if (start + n > dmabuf->total / 32) {
343                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
344                           start, start + n - 1, dmabuf->total / 32);
345                 return -EINVAL;
346         }
347
348         /* Vertex DMA doesn't work with command DMA at the same time,
349          * so we use BCI_... to submit commands here. Flush buffered
350          * faked DMA first. */
351         DMA_FLUSH();
352
353         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
354                 BEGIN_BCI(2);
355                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
356                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
357                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
358         }
359         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
360                 /* Workaround for what looks like a hardware bug. If a
361                  * WAIT_3D_IDLE was emitted some time before the
362                  * indexed drawing command then the engine will lock
363                  * up. There are two known workarounds:
364                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
365                 BEGIN_BCI(63);
366                 for (i = 0; i < 63; ++i)
367                         BCI_WRITE(BCI_CMD_WAIT);
368                 dev_priv->waiting = 0;
369         }
370
371         prim <<= 25;
372         while (n != 0) {
373                 /* Can emit up to 255 indices (85 triangles) at once. */
374                 unsigned int count = n > 255 ? 255 : n;
375                 if (reorder) {
376                         /* Need to reorder indices for correct flat
377                          * shading while preserving the clock sense
378                          * for correct culling. Only on Savage3D. */
379                         int reorder[3] = { -1, -1, -1 };
380                         reorder[start % 3] = 2;
381
382                         BEGIN_BCI((count + 1 + 1) / 2);
383                         BCI_DRAW_INDICES_S3D(count, prim, start + 2);
384
385                         for (i = start + 1; i + 1 < start + count; i += 2)
386                                 BCI_WRITE((i + reorder[i % 3]) |
387                                           ((i + 1 +
388                                             reorder[(i + 1) % 3]) << 16));
389                         if (i < start + count)
390                                 BCI_WRITE(i + reorder[i % 3]);
391                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
392                         BEGIN_BCI((count + 1 + 1) / 2);
393                         BCI_DRAW_INDICES_S3D(count, prim, start);
394
395                         for (i = start + 1; i + 1 < start + count; i += 2)
396                                 BCI_WRITE(i | ((i + 1) << 16));
397                         if (i < start + count)
398                                 BCI_WRITE(i);
399                 } else {
400                         BEGIN_BCI((count + 2 + 1) / 2);
401                         BCI_DRAW_INDICES_S4(count, prim, skip);
402
403                         for (i = start; i + 1 < start + count; i += 2)
404                                 BCI_WRITE(i | ((i + 1) << 16));
405                         if (i < start + count)
406                                 BCI_WRITE(i);
407                 }
408
409                 start += count;
410                 n -= count;
411
412                 prim |= BCI_CMD_DRAW_CONT;
413         }
414
415         return 0;
416 }
417
418 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
419                                    const drm_savage_cmd_header_t * cmd_header,
420                                    const uint32_t *vtxbuf, unsigned int vb_size,
421                                    unsigned int vb_stride)
422 {
423         unsigned char reorder = 0;
424         unsigned int prim = cmd_header->prim.prim;
425         unsigned int skip = cmd_header->prim.skip;
426         unsigned int n = cmd_header->prim.count;
427         unsigned int start = cmd_header->prim.start;
428         unsigned int vtx_size;
429         unsigned int i;
430         DMA_LOCALS;
431
432         if (!n)
433                 return 0;
434
435         switch (prim) {
436         case SAVAGE_PRIM_TRILIST_201:
437                 reorder = 1;
438                 prim = SAVAGE_PRIM_TRILIST;
439         case SAVAGE_PRIM_TRILIST:
440                 if (n % 3 != 0) {
441                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
442                                   n);
443                         return -EINVAL;
444                 }
445                 break;
446         case SAVAGE_PRIM_TRISTRIP:
447         case SAVAGE_PRIM_TRIFAN:
448                 if (n < 3) {
449                         DRM_ERROR
450                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
451                              n);
452                         return -EINVAL;
453                 }
454                 break;
455         default:
456                 DRM_ERROR("invalid primitive type %u\n", prim);
457                 return -EINVAL;
458         }
459
460         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
461                 if (skip > SAVAGE_SKIP_ALL_S3D) {
462                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
463                         return -EINVAL;
464                 }
465                 vtx_size = 8;   /* full vertex */
466         } else {
467                 if (skip > SAVAGE_SKIP_ALL_S4) {
468                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
469                         return -EINVAL;
470                 }
471                 vtx_size = 10;  /* full vertex */
472         }
473
474         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
475             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
476             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
477
478         if (vtx_size > vb_stride) {
479                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
480                           vtx_size, vb_stride);
481                 return -EINVAL;
482         }
483
484         if (start + n > vb_size / (vb_stride * 4)) {
485                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
486                           start, start + n - 1, vb_size / (vb_stride * 4));
487                 return -EINVAL;
488         }
489
490         prim <<= 25;
491         while (n != 0) {
492                 /* Can emit up to 255 vertices (85 triangles) at once. */
493                 unsigned int count = n > 255 ? 255 : n;
494                 if (reorder) {
495                         /* Need to reorder vertices for correct flat
496                          * shading while preserving the clock sense
497                          * for correct culling. Only on Savage3D. */
498                         int reorder[3] = { -1, -1, -1 };
499                         reorder[start % 3] = 2;
500
501                         BEGIN_DMA(count * vtx_size + 1);
502                         DMA_DRAW_PRIMITIVE(count, prim, skip);
503
504                         for (i = start; i < start + count; ++i) {
505                                 unsigned int j = i + reorder[i % 3];
506                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
507                         }
508
509                         DMA_COMMIT();
510                 } else {
511                         BEGIN_DMA(count * vtx_size + 1);
512                         DMA_DRAW_PRIMITIVE(count, prim, skip);
513
514                         if (vb_stride == vtx_size) {
515                                 DMA_COPY(&vtxbuf[vb_stride * start],
516                                          vtx_size * count);
517                         } else {
518                                 for (i = start; i < start + count; ++i) {
519                                         DMA_COPY(&vtxbuf [vb_stride * i],
520                                                  vtx_size);
521                                 }
522                         }
523
524                         DMA_COMMIT();
525                 }
526
527                 start += count;
528                 n -= count;
529
530                 prim |= BCI_CMD_DRAW_CONT;
531         }
532
533         return 0;
534 }
535
536 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
537                                    const drm_savage_cmd_header_t * cmd_header,
538                                    const uint16_t *idx,
539                                    const struct drm_buf * dmabuf)
540 {
541         unsigned char reorder = 0;
542         unsigned int prim = cmd_header->idx.prim;
543         unsigned int skip = cmd_header->idx.skip;
544         unsigned int n = cmd_header->idx.count;
545         unsigned int i;
546         BCI_LOCALS;
547
548         if (!dmabuf) {
549                 DRM_ERROR("called without dma buffers!\n");
550                 return -EINVAL;
551         }
552
553         if (!n)
554                 return 0;
555
556         switch (prim) {
557         case SAVAGE_PRIM_TRILIST_201:
558                 reorder = 1;
559                 prim = SAVAGE_PRIM_TRILIST;
560         case SAVAGE_PRIM_TRILIST:
561                 if (n % 3 != 0) {
562                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
563                         return -EINVAL;
564                 }
565                 break;
566         case SAVAGE_PRIM_TRISTRIP:
567         case SAVAGE_PRIM_TRIFAN:
568                 if (n < 3) {
569                         DRM_ERROR
570                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
571                         return -EINVAL;
572                 }
573                 break;
574         default:
575                 DRM_ERROR("invalid primitive type %u\n", prim);
576                 return -EINVAL;
577         }
578
579         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
580                 if (skip != 0) {
581                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
582                         return -EINVAL;
583                 }
584         } else {
585                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
586                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
587                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
588                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
589                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
590                         return -EINVAL;
591                 }
592                 if (reorder) {
593                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
594                         return -EINVAL;
595                 }
596         }
597
598         /* Vertex DMA doesn't work with command DMA at the same time,
599          * so we use BCI_... to submit commands here. Flush buffered
600          * faked DMA first. */
601         DMA_FLUSH();
602
603         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
604                 BEGIN_BCI(2);
605                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
606                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
607                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
608         }
609         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
610                 /* Workaround for what looks like a hardware bug. If a
611                  * WAIT_3D_IDLE was emitted some time before the
612                  * indexed drawing command then the engine will lock
613                  * up. There are two known workarounds:
614                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
615                 BEGIN_BCI(63);
616                 for (i = 0; i < 63; ++i)
617                         BCI_WRITE(BCI_CMD_WAIT);
618                 dev_priv->waiting = 0;
619         }
620
621         prim <<= 25;
622         while (n != 0) {
623                 /* Can emit up to 255 indices (85 triangles) at once. */
624                 unsigned int count = n > 255 ? 255 : n;
625
626                 /* check indices */
627                 for (i = 0; i < count; ++i) {
628                         if (idx[i] > dmabuf->total / 32) {
629                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
630                                           i, idx[i], dmabuf->total / 32);
631                                 return -EINVAL;
632                         }
633                 }
634
635                 if (reorder) {
636                         /* Need to reorder indices for correct flat
637                          * shading while preserving the clock sense
638                          * for correct culling. Only on Savage3D. */
639                         int reorder[3] = { 2, -1, -1 };
640
641                         BEGIN_BCI((count + 1 + 1) / 2);
642                         BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
643
644                         for (i = 1; i + 1 < count; i += 2)
645                                 BCI_WRITE(idx[i + reorder[i % 3]] |
646                                           (idx[i + 1 +
647                                            reorder[(i + 1) % 3]] << 16));
648                         if (i < count)
649                                 BCI_WRITE(idx[i + reorder[i % 3]]);
650                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
651                         BEGIN_BCI((count + 1 + 1) / 2);
652                         BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
653
654                         for (i = 1; i + 1 < count; i += 2)
655                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
656                         if (i < count)
657                                 BCI_WRITE(idx[i]);
658                 } else {
659                         BEGIN_BCI((count + 2 + 1) / 2);
660                         BCI_DRAW_INDICES_S4(count, prim, skip);
661
662                         for (i = 0; i + 1 < count; i += 2)
663                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
664                         if (i < count)
665                                 BCI_WRITE(idx[i]);
666                 }
667
668                 idx += count;
669                 n -= count;
670
671                 prim |= BCI_CMD_DRAW_CONT;
672         }
673
674         return 0;
675 }
676
677 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
678                                   const drm_savage_cmd_header_t * cmd_header,
679                                   const uint16_t *idx,
680                                   const uint32_t *vtxbuf,
681                                   unsigned int vb_size, unsigned int vb_stride)
682 {
683         unsigned char reorder = 0;
684         unsigned int prim = cmd_header->idx.prim;
685         unsigned int skip = cmd_header->idx.skip;
686         unsigned int n = cmd_header->idx.count;
687         unsigned int vtx_size;
688         unsigned int i;
689         DMA_LOCALS;
690
691         if (!n)
692                 return 0;
693
694         switch (prim) {
695         case SAVAGE_PRIM_TRILIST_201:
696                 reorder = 1;
697                 prim = SAVAGE_PRIM_TRILIST;
698         case SAVAGE_PRIM_TRILIST:
699                 if (n % 3 != 0) {
700                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
701                         return -EINVAL;
702                 }
703                 break;
704         case SAVAGE_PRIM_TRISTRIP:
705         case SAVAGE_PRIM_TRIFAN:
706                 if (n < 3) {
707                         DRM_ERROR
708                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
709                         return -EINVAL;
710                 }
711                 break;
712         default:
713                 DRM_ERROR("invalid primitive type %u\n", prim);
714                 return -EINVAL;
715         }
716
717         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
718                 if (skip > SAVAGE_SKIP_ALL_S3D) {
719                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
720                         return -EINVAL;
721                 }
722                 vtx_size = 8;   /* full vertex */
723         } else {
724                 if (skip > SAVAGE_SKIP_ALL_S4) {
725                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
726                         return -EINVAL;
727                 }
728                 vtx_size = 10;  /* full vertex */
729         }
730
731         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
732             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
733             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
734
735         if (vtx_size > vb_stride) {
736                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
737                           vtx_size, vb_stride);
738                 return -EINVAL;
739         }
740
741         prim <<= 25;
742         while (n != 0) {
743                 /* Can emit up to 255 vertices (85 triangles) at once. */
744                 unsigned int count = n > 255 ? 255 : n;
745
746                 /* Check indices */
747                 for (i = 0; i < count; ++i) {
748                         if (idx[i] > vb_size / (vb_stride * 4)) {
749                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
750                                           i, idx[i], vb_size / (vb_stride * 4));
751                                 return -EINVAL;
752                         }
753                 }
754
755                 if (reorder) {
756                         /* Need to reorder vertices for correct flat
757                          * shading while preserving the clock sense
758                          * for correct culling. Only on Savage3D. */
759                         int reorder[3] = { 2, -1, -1 };
760
761                         BEGIN_DMA(count * vtx_size + 1);
762                         DMA_DRAW_PRIMITIVE(count, prim, skip);
763
764                         for (i = 0; i < count; ++i) {
765                                 unsigned int j = idx[i + reorder[i % 3]];
766                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
767                         }
768
769                         DMA_COMMIT();
770                 } else {
771                         BEGIN_DMA(count * vtx_size + 1);
772                         DMA_DRAW_PRIMITIVE(count, prim, skip);
773
774                         for (i = 0; i < count; ++i) {
775                                 unsigned int j = idx[i];
776                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
777                         }
778
779                         DMA_COMMIT();
780                 }
781
782                 idx += count;
783                 n -= count;
784
785                 prim |= BCI_CMD_DRAW_CONT;
786         }
787
788         return 0;
789 }
790
791 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
792                                  const drm_savage_cmd_header_t * cmd_header,
793                                  const drm_savage_cmd_header_t *data,
794                                  unsigned int nbox,
795                                  const struct drm_clip_rect *boxes)
796 {
797         unsigned int flags = cmd_header->clear0.flags;
798         unsigned int clear_cmd;
799         unsigned int i, nbufs;
800         DMA_LOCALS;
801
802         if (nbox == 0)
803                 return 0;
804
805         clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
806             BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
807         BCI_CMD_SET_ROP(clear_cmd, 0xCC);
808
809         nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
810             ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
811         if (nbufs == 0)
812                 return 0;
813
814         if (data->clear1.mask != 0xffffffff) {
815                 /* set mask */
816                 BEGIN_DMA(2);
817                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
818                 DMA_WRITE(data->clear1.mask);
819                 DMA_COMMIT();
820         }
821         for (i = 0; i < nbox; ++i) {
822                 unsigned int x, y, w, h;
823                 unsigned int buf;
824                 x = boxes[i].x1, y = boxes[i].y1;
825                 w = boxes[i].x2 - boxes[i].x1;
826                 h = boxes[i].y2 - boxes[i].y1;
827                 BEGIN_DMA(nbufs * 6);
828                 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
829                         if (!(flags & buf))
830                                 continue;
831                         DMA_WRITE(clear_cmd);
832                         switch (buf) {
833                         case SAVAGE_FRONT:
834                                 DMA_WRITE(dev_priv->front_offset);
835                                 DMA_WRITE(dev_priv->front_bd);
836                                 break;
837                         case SAVAGE_BACK:
838                                 DMA_WRITE(dev_priv->back_offset);
839                                 DMA_WRITE(dev_priv->back_bd);
840                                 break;
841                         case SAVAGE_DEPTH:
842                                 DMA_WRITE(dev_priv->depth_offset);
843                                 DMA_WRITE(dev_priv->depth_bd);
844                                 break;
845                         }
846                         DMA_WRITE(data->clear1.value);
847                         DMA_WRITE(BCI_X_Y(x, y));
848                         DMA_WRITE(BCI_W_H(w, h));
849                 }
850                 DMA_COMMIT();
851         }
852         if (data->clear1.mask != 0xffffffff) {
853                 /* reset mask */
854                 BEGIN_DMA(2);
855                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
856                 DMA_WRITE(0xffffffff);
857                 DMA_COMMIT();
858         }
859
860         return 0;
861 }
862
863 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
864                                 unsigned int nbox, const struct drm_clip_rect *boxes)
865 {
866         unsigned int swap_cmd;
867         unsigned int i;
868         DMA_LOCALS;
869
870         if (nbox == 0)
871                 return 0;
872
873         swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
874             BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
875         BCI_CMD_SET_ROP(swap_cmd, 0xCC);
876
877         for (i = 0; i < nbox; ++i) {
878                 BEGIN_DMA(6);
879                 DMA_WRITE(swap_cmd);
880                 DMA_WRITE(dev_priv->back_offset);
881                 DMA_WRITE(dev_priv->back_bd);
882                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
883                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
884                 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
885                                   boxes[i].y2 - boxes[i].y1));
886                 DMA_COMMIT();
887         }
888
889         return 0;
890 }
891
892 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
893                                 const drm_savage_cmd_header_t *start,
894                                 const drm_savage_cmd_header_t *end,
895                                 const struct drm_buf * dmabuf,
896                                 const unsigned int *vtxbuf,
897                                 unsigned int vb_size, unsigned int vb_stride,
898                                 unsigned int nbox,
899                                 const struct drm_clip_rect *boxes)
900 {
901         unsigned int i, j;
902         int ret;
903
904         for (i = 0; i < nbox; ++i) {
905                 const drm_savage_cmd_header_t *cmdbuf;
906                 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
907
908                 cmdbuf = start;
909                 while (cmdbuf < end) {
910                         drm_savage_cmd_header_t cmd_header;
911                         cmd_header = *cmdbuf;
912                         cmdbuf++;
913                         switch (cmd_header.cmd.cmd) {
914                         case SAVAGE_CMD_DMA_PRIM:
915                                 ret = savage_dispatch_dma_prim(
916                                         dev_priv, &cmd_header, dmabuf);
917                                 break;
918                         case SAVAGE_CMD_VB_PRIM:
919                                 ret = savage_dispatch_vb_prim(
920                                         dev_priv, &cmd_header,
921                                         vtxbuf, vb_size, vb_stride);
922                                 break;
923                         case SAVAGE_CMD_DMA_IDX:
924                                 j = (cmd_header.idx.count + 3) / 4;
925                                 /* j was check in savage_bci_cmdbuf */
926                                 ret = savage_dispatch_dma_idx(dev_priv,
927                                         &cmd_header, (const uint16_t *)cmdbuf,
928                                         dmabuf);
929                                 cmdbuf += j;
930                                 break;
931                         case SAVAGE_CMD_VB_IDX:
932                                 j = (cmd_header.idx.count + 3) / 4;
933                                 /* j was check in savage_bci_cmdbuf */
934                                 ret = savage_dispatch_vb_idx(dev_priv,
935                                         &cmd_header, (const uint16_t *)cmdbuf,
936                                         (const uint32_t *)vtxbuf, vb_size,
937                                         vb_stride);
938                                 cmdbuf += j;
939                                 break;
940                         default:
941                                 /* What's the best return code? EFAULT? */
942                                 DRM_ERROR("IMPLEMENTATION ERROR: "
943                                           "non-drawing-command %d\n",
944                                           cmd_header.cmd.cmd);
945                                 return -EINVAL;
946                         }
947
948                         if (ret != 0)
949                                 return ret;
950                 }
951         }
952
953         return 0;
954 }
955
956 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
957 {
958         drm_savage_private_t *dev_priv = dev->dev_private;
959         struct drm_device_dma *dma = dev->dma;
960         struct drm_buf *dmabuf;
961         drm_savage_cmdbuf_t *cmdbuf = data;
962         drm_savage_cmd_header_t *kcmd_addr = NULL;
963         drm_savage_cmd_header_t *first_draw_cmd;
964         unsigned int *kvb_addr = NULL;
965         struct drm_clip_rect *kbox_addr = NULL;
966         unsigned int i, j;
967         int ret = 0;
968
969         DRM_DEBUG("\n");
970
971         LOCK_TEST_WITH_RETURN(dev, file_priv);
972
973         if (dma && dma->buflist) {
974                 if (cmdbuf->dma_idx > dma->buf_count) {
975                         DRM_ERROR
976                             ("vertex buffer index %u out of range (0-%u)\n",
977                              cmdbuf->dma_idx, dma->buf_count - 1);
978                         return -EINVAL;
979                 }
980                 dmabuf = dma->buflist[cmdbuf->dma_idx];
981         } else {
982                 dmabuf = NULL;
983         }
984
985         /* Copy the user buffers into kernel temporary areas.  This hasn't been
986          * a performance loss compared to VERIFYAREA_READ/
987          * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
988          * for locking on FreeBSD.
989          */
990         if (cmdbuf->size) {
991                 kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER);
992                 if (kcmd_addr == NULL)
993                         return -ENOMEM;
994
995                 if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
996                                        cmdbuf->size * 8))
997                 {
998                         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
999                         return -EFAULT;
1000                 }
1001                 cmdbuf->cmd_addr = kcmd_addr;
1002         }
1003         if (cmdbuf->vb_size) {
1004                 kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER);
1005                 if (kvb_addr == NULL) {
1006                         ret = -ENOMEM;
1007                         goto done;
1008                 }
1009
1010                 if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr,
1011                                        cmdbuf->vb_size)) {
1012                         ret = -EFAULT;
1013                         goto done;
1014                 }
1015                 cmdbuf->vb_addr = kvb_addr;
1016         }
1017         if (cmdbuf->nbox) {
1018                 kbox_addr = drm_alloc(cmdbuf->nbox * sizeof(struct drm_clip_rect),
1019                                        DRM_MEM_DRIVER);
1020                 if (kbox_addr == NULL) {
1021                         ret = -ENOMEM;
1022                         goto done;
1023                 }
1024
1025                 if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr,
1026                                        cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
1027                         ret = -EFAULT;
1028                         goto done;
1029                 }
1030         cmdbuf->box_addr = kbox_addr;
1031         }
1032
1033         /* Make sure writes to DMA buffers are finished before sending
1034          * DMA commands to the graphics hardware. */
1035         DRM_MEMORYBARRIER();
1036
1037         /* Coming from user space. Don't know if the Xserver has
1038          * emitted wait commands. Assuming the worst. */
1039         dev_priv->waiting = 1;
1040
1041         i = 0;
1042         first_draw_cmd = NULL;
1043         while (i < cmdbuf->size) {
1044                 drm_savage_cmd_header_t cmd_header;
1045                 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1046                 cmdbuf->cmd_addr++;
1047                 i++;
1048
1049                 /* Group drawing commands with same state to minimize
1050                  * iterations over clip rects. */
1051                 j = 0;
1052                 switch (cmd_header.cmd.cmd) {
1053                 case SAVAGE_CMD_DMA_IDX:
1054                 case SAVAGE_CMD_VB_IDX:
1055                         j = (cmd_header.idx.count + 3) / 4;
1056                         if (i + j > cmdbuf->size) {
1057                                 DRM_ERROR("indexed drawing command extends "
1058                                           "beyond end of command buffer\n");
1059                                 DMA_FLUSH();
1060                                 return -EINVAL;
1061                         }
1062                         /* fall through */
1063                 case SAVAGE_CMD_DMA_PRIM:
1064                 case SAVAGE_CMD_VB_PRIM:
1065                         if (!first_draw_cmd)
1066                                 first_draw_cmd = cmdbuf->cmd_addr - 1;
1067                         cmdbuf->cmd_addr += j;
1068                         i += j;
1069                         break;
1070                 default:
1071                         if (first_draw_cmd) {
1072                                 ret = savage_dispatch_draw(
1073                                       dev_priv, first_draw_cmd,
1074                                       cmdbuf->cmd_addr - 1,
1075                                       dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
1076                                       cmdbuf->vb_stride,
1077                                       cmdbuf->nbox, cmdbuf->box_addr);
1078                                 if (ret != 0)
1079                                         return ret;
1080                                 first_draw_cmd = NULL;
1081                         }
1082                 }
1083                 if (first_draw_cmd)
1084                         continue;
1085
1086                 switch (cmd_header.cmd.cmd) {
1087                 case SAVAGE_CMD_STATE:
1088                         j = (cmd_header.state.count + 1) / 2;
1089                         if (i + j > cmdbuf->size) {
1090                                 DRM_ERROR("command SAVAGE_CMD_STATE extends "
1091                                           "beyond end of command buffer\n");
1092                                 DMA_FLUSH();
1093                                 ret = -EINVAL;
1094                                 goto done;
1095                         }
1096                         ret = savage_dispatch_state(dev_priv, &cmd_header,
1097                                 (const uint32_t *)cmdbuf->cmd_addr);
1098                         cmdbuf->cmd_addr += j;
1099                         i += j;
1100                         break;
1101                 case SAVAGE_CMD_CLEAR:
1102                         if (i + 1 > cmdbuf->size) {
1103                                 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1104                                           "beyond end of command buffer\n");
1105                                 DMA_FLUSH();
1106                                 ret = -EINVAL;
1107                                 goto done;
1108                         }
1109                         ret = savage_dispatch_clear(dev_priv, &cmd_header,
1110                                                     cmdbuf->cmd_addr,
1111                                                     cmdbuf->nbox,
1112                                                     cmdbuf->box_addr);
1113                         cmdbuf->cmd_addr++;
1114                         i++;
1115                         break;
1116                 case SAVAGE_CMD_SWAP:
1117                         ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1118                                                    cmdbuf->box_addr);
1119                         break;
1120                 default:
1121                         DRM_ERROR("invalid command 0x%x\n",
1122                                   cmd_header.cmd.cmd);
1123                         DMA_FLUSH();
1124                         ret = -EINVAL;
1125                         goto done;
1126                 }
1127
1128                 if (ret != 0) {
1129                         DMA_FLUSH();
1130                         goto done;
1131                 }
1132         }
1133
1134         if (first_draw_cmd) {
1135                 ret = savage_dispatch_draw (
1136                         dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1137                         cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1138                         cmdbuf->nbox, cmdbuf->box_addr);
1139                 if (ret != 0) {
1140                         DMA_FLUSH();
1141                         goto done;
1142                 }
1143         }
1144
1145         DMA_FLUSH();
1146
1147         if (dmabuf && cmdbuf->discard) {
1148                 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1149                 uint16_t event;
1150                 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1151                 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1152                 savage_freelist_put(dev, dmabuf);
1153         }
1154
1155 done:
1156         /* If we didn't need to allocate them, these'll be NULL */
1157         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
1158         drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER);
1159         drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect),
1160                  DRM_MEM_DRIVER);
1161
1162         return ret;
1163 }