[media] coda: free context buffers under buffer mutex
[pandora-kernel.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/platform_device.h>
19 #include <linux/reset.h>
20 #include <linux/slab.h>
21 #include <linux/videodev2.h>
22
23 #include <media/v4l2-common.h>
24 #include <media/v4l2-ctrls.h>
25 #include <media/v4l2-fh.h>
26 #include <media/v4l2-mem2mem.h>
27 #include <media/videobuf2-core.h>
28 #include <media/videobuf2-dma-contig.h>
29 #include <media/videobuf2-vmalloc.h>
30
31 #include "coda.h"
32
33 #define CODA7_PS_BUF_SIZE       0x28000
34 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
35
36 #define CODA_DEFAULT_GAMMA      4096
37 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
38
39 static inline int coda_is_initialized(struct coda_dev *dev)
40 {
41         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
42 }
43
44 static inline unsigned long coda_isbusy(struct coda_dev *dev)
45 {
46         return coda_read(dev, CODA_REG_BIT_BUSY);
47 }
48
49 static int coda_wait_timeout(struct coda_dev *dev)
50 {
51         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
52
53         while (coda_isbusy(dev)) {
54                 if (time_after(jiffies, timeout))
55                         return -ETIMEDOUT;
56         }
57         return 0;
58 }
59
60 static void coda_command_async(struct coda_ctx *ctx, int cmd)
61 {
62         struct coda_dev *dev = ctx->dev;
63
64         if (dev->devtype->product == CODA_960 ||
65             dev->devtype->product == CODA_7541) {
66                 /* Restore context related registers to CODA */
67                 coda_write(dev, ctx->bit_stream_param,
68                                 CODA_REG_BIT_BIT_STREAM_PARAM);
69                 coda_write(dev, ctx->frm_dis_flg,
70                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
71                 coda_write(dev, ctx->frame_mem_ctrl,
72                                 CODA_REG_BIT_FRAME_MEM_CTRL);
73                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
74         }
75
76         if (dev->devtype->product == CODA_960) {
77                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
78                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
79         }
80
81         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
82
83         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
84         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
85         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
86
87         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
88 }
89
90 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
91 {
92         struct coda_dev *dev = ctx->dev;
93
94         coda_command_async(ctx, cmd);
95         return coda_wait_timeout(dev);
96 }
97
98 int coda_hw_reset(struct coda_ctx *ctx)
99 {
100         struct coda_dev *dev = ctx->dev;
101         unsigned long timeout;
102         unsigned int idx;
103         int ret;
104
105         if (!dev->rstc)
106                 return -ENOENT;
107
108         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
109
110         if (dev->devtype->product == CODA_960) {
111                 timeout = jiffies + msecs_to_jiffies(100);
112                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
113                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
114                         if (time_after(jiffies, timeout))
115                                 return -ETIME;
116                         cpu_relax();
117                 }
118         }
119
120         ret = reset_control_reset(dev->rstc);
121         if (ret < 0)
122                 return ret;
123
124         if (dev->devtype->product == CODA_960)
125                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
126         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
127         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
128         ret = coda_wait_timeout(dev);
129         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
130
131         return ret;
132 }
133
134 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
135 {
136         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
137         struct coda_dev *dev = ctx->dev;
138         u32 rd_ptr;
139
140         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
141         kfifo->out = (kfifo->in & ~kfifo->mask) |
142                       (rd_ptr - ctx->bitstream.paddr);
143         if (kfifo->out > kfifo->in)
144                 kfifo->out -= kfifo->mask + 1;
145 }
146
147 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr, wr_ptr;
152
153         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
154         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
155         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
156         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
157 }
158
159 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
160 {
161         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
162         struct coda_dev *dev = ctx->dev;
163         u32 wr_ptr;
164
165         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
166         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
167 }
168
169 static int coda_bitstream_queue(struct coda_ctx *ctx,
170                                 struct vb2_buffer *src_buf)
171 {
172         u32 src_size = vb2_get_plane_payload(src_buf, 0);
173         u32 n;
174
175         n = kfifo_in(&ctx->bitstream_fifo, vb2_plane_vaddr(src_buf, 0),
176                      src_size);
177         if (n < src_size)
178                 return -ENOSPC;
179
180         dma_sync_single_for_device(&ctx->dev->plat_dev->dev,
181                                    ctx->bitstream.paddr, ctx->bitstream.size,
182                                    DMA_TO_DEVICE);
183
184         src_buf->v4l2_buf.sequence = ctx->qsequence++;
185
186         return 0;
187 }
188
189 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
190                                      struct vb2_buffer *src_buf)
191 {
192         int ret;
193
194         if (coda_get_bitstream_payload(ctx) +
195             vb2_get_plane_payload(src_buf, 0) + 512 >= ctx->bitstream.size)
196                 return false;
197
198         if (vb2_plane_vaddr(src_buf, 0) == NULL) {
199                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
200                 return true;
201         }
202
203         ret = coda_bitstream_queue(ctx, src_buf);
204         if (ret < 0) {
205                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
206                 return false;
207         }
208         /* Sync read pointer to device */
209         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
210                 coda_kfifo_sync_to_device_write(ctx);
211
212         ctx->hold = false;
213
214         return true;
215 }
216
217 void coda_fill_bitstream(struct coda_ctx *ctx)
218 {
219         struct vb2_buffer *src_buf;
220         struct coda_buffer_meta *meta;
221         u32 start;
222
223         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
224                 /*
225                  * Only queue a single JPEG into the bitstream buffer, except
226                  * to increase payload over 512 bytes or if in hold state.
227                  */
228                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
229                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
230                         break;
231
232                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
233
234                 /* Drop frames that do not start/end with a SOI/EOI markers */
235                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
236                     !coda_jpeg_check_buffer(ctx, src_buf)) {
237                         v4l2_err(&ctx->dev->v4l2_dev,
238                                  "dropping invalid JPEG frame\n");
239                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
240                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
241                         continue;
242                 }
243
244                 /* Buffer start position */
245                 start = ctx->bitstream_fifo.kfifo.in &
246                         ctx->bitstream_fifo.kfifo.mask;
247
248                 if (coda_bitstream_try_queue(ctx, src_buf)) {
249                         /*
250                          * Source buffer is queued in the bitstream ringbuffer;
251                          * queue the timestamp and mark source buffer as done
252                          */
253                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
254
255                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
256                         if (meta) {
257                                 meta->sequence = src_buf->v4l2_buf.sequence;
258                                 meta->timecode = src_buf->v4l2_buf.timecode;
259                                 meta->timestamp = src_buf->v4l2_buf.timestamp;
260                                 meta->start = start;
261                                 meta->end = ctx->bitstream_fifo.kfifo.in &
262                                             ctx->bitstream_fifo.kfifo.mask;
263                                 list_add_tail(&meta->list,
264                                               &ctx->buffer_meta_list);
265                         }
266
267                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
268                 } else {
269                         break;
270                 }
271         }
272 }
273
274 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
275 {
276         struct coda_dev *dev = ctx->dev;
277
278         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
279
280         /* If this context is currently running, update the hardware flag */
281         if ((dev->devtype->product == CODA_960) &&
282             coda_isbusy(dev) &&
283             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
284                 coda_write(dev, ctx->bit_stream_param,
285                            CODA_REG_BIT_BIT_STREAM_PARAM);
286         }
287 }
288
289 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
290 {
291         struct coda_dev *dev = ctx->dev;
292         u32 *p = ctx->parabuf.vaddr;
293
294         if (dev->devtype->product == CODA_DX6)
295                 p[index] = value;
296         else
297                 p[index ^ 1] = value;
298 }
299
300 static void coda_free_framebuffers(struct coda_ctx *ctx)
301 {
302         int i;
303
304         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
305                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
306 }
307
308 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
309                                    struct coda_q_data *q_data, u32 fourcc)
310 {
311         struct coda_dev *dev = ctx->dev;
312         int width, height;
313         dma_addr_t paddr;
314         int ysize;
315         int ret;
316         int i;
317
318         if (ctx->codec && (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
319              ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264)) {
320                 width = round_up(q_data->width, 16);
321                 height = round_up(q_data->height, 16);
322         } else {
323                 width = round_up(q_data->width, 8);
324                 height = q_data->height;
325         }
326         ysize = width * height;
327
328         /* Allocate frame buffers */
329         for (i = 0; i < ctx->num_internal_frames; i++) {
330                 size_t size;
331                 char *name;
332
333                 size = ysize + ysize / 2;
334                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
335                     dev->devtype->product != CODA_DX6)
336                         size += ysize / 4;
337                 name = kasprintf(GFP_KERNEL, "fb%d", i);
338                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
339                                              size, name);
340                 kfree(name);
341                 if (ret < 0) {
342                         coda_free_framebuffers(ctx);
343                         return ret;
344                 }
345         }
346
347         /* Register frame buffers in the parameter buffer */
348         for (i = 0; i < ctx->num_internal_frames; i++) {
349                 paddr = ctx->internal_frames[i].paddr;
350                 /* Start addresses of Y, Cb, Cr planes */
351                 coda_parabuf_write(ctx, i * 3 + 0, paddr);
352                 coda_parabuf_write(ctx, i * 3 + 1, paddr + ysize);
353                 coda_parabuf_write(ctx, i * 3 + 2, paddr + ysize + ysize / 4);
354
355                 /* mvcol buffer for h.264 */
356                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
357                     dev->devtype->product != CODA_DX6)
358                         coda_parabuf_write(ctx, 96 + i,
359                                            ctx->internal_frames[i].paddr +
360                                            ysize + ysize/4 + ysize/4);
361         }
362
363         /* mvcol buffer for mpeg4 */
364         if ((dev->devtype->product != CODA_DX6) &&
365             (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
366                 coda_parabuf_write(ctx, 97, ctx->internal_frames[i].paddr +
367                                             ysize + ysize/4 + ysize/4);
368
369         return 0;
370 }
371
372 static void coda_free_context_buffers(struct coda_ctx *ctx)
373 {
374         struct coda_dev *dev = ctx->dev;
375
376         coda_free_aux_buf(dev, &ctx->slicebuf);
377         coda_free_aux_buf(dev, &ctx->psbuf);
378         if (dev->devtype->product != CODA_DX6)
379                 coda_free_aux_buf(dev, &ctx->workbuf);
380 }
381
382 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
383                                       struct coda_q_data *q_data)
384 {
385         struct coda_dev *dev = ctx->dev;
386         size_t size;
387         int ret;
388
389         if (dev->devtype->product == CODA_DX6)
390                 return 0;
391
392         if (ctx->psbuf.vaddr) {
393                 v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n");
394                 return -EBUSY;
395         }
396         if (ctx->slicebuf.vaddr) {
397                 v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n");
398                 return -EBUSY;
399         }
400         if (ctx->workbuf.vaddr) {
401                 v4l2_err(&dev->v4l2_dev, "context buffer still allocated\n");
402                 ret = -EBUSY;
403                 return -ENOMEM;
404         }
405
406         if (q_data->fourcc == V4L2_PIX_FMT_H264) {
407                 /* worst case slice size */
408                 size = (DIV_ROUND_UP(q_data->width, 16) *
409                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
410                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
411                                              "slicebuf");
412                 if (ret < 0) {
413                         v4l2_err(&dev->v4l2_dev,
414                                  "failed to allocate %d byte slice buffer",
415                                  ctx->slicebuf.size);
416                         return ret;
417                 }
418         }
419
420         if (dev->devtype->product == CODA_7541) {
421                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
422                                              CODA7_PS_BUF_SIZE, "psbuf");
423                 if (ret < 0) {
424                         v4l2_err(&dev->v4l2_dev,
425                                  "failed to allocate psmem buffer");
426                         goto err;
427                 }
428         }
429
430         size = dev->devtype->workbuf_size;
431         if (dev->devtype->product == CODA_960 &&
432             q_data->fourcc == V4L2_PIX_FMT_H264)
433                 size += CODA9_PS_SAVE_SIZE;
434         ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size, "workbuf");
435         if (ret < 0) {
436                 v4l2_err(&dev->v4l2_dev,
437                          "failed to allocate %d byte context buffer",
438                          ctx->workbuf.size);
439                 goto err;
440         }
441
442         return 0;
443
444 err:
445         coda_free_context_buffers(ctx);
446         return ret;
447 }
448
449 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
450                               int header_code, u8 *header, int *size)
451 {
452         struct coda_dev *dev = ctx->dev;
453         size_t bufsize;
454         int ret;
455         int i;
456
457         if (dev->devtype->product == CODA_960)
458                 memset(vb2_plane_vaddr(buf, 0), 0, 64);
459
460         coda_write(dev, vb2_dma_contig_plane_dma_addr(buf, 0),
461                    CODA_CMD_ENC_HEADER_BB_START);
462         bufsize = vb2_plane_size(buf, 0);
463         if (dev->devtype->product == CODA_960)
464                 bufsize /= 1024;
465         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
466         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
467         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
468         if (ret < 0) {
469                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
470                 return ret;
471         }
472
473         if (dev->devtype->product == CODA_960) {
474                 for (i = 63; i > 0; i--)
475                         if (((char *)vb2_plane_vaddr(buf, 0))[i] != 0)
476                                 break;
477                 *size = i + 1;
478         } else {
479                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
480                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
481         }
482         memcpy(header, vb2_plane_vaddr(buf, 0), *size);
483
484         return 0;
485 }
486
487 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
488 {
489         phys_addr_t ret;
490
491         size = round_up(size, 1024);
492         if (size > iram->remaining)
493                 return 0;
494         iram->remaining -= size;
495
496         ret = iram->next_paddr;
497         iram->next_paddr += size;
498
499         return ret;
500 }
501
502 static void coda_setup_iram(struct coda_ctx *ctx)
503 {
504         struct coda_iram_info *iram_info = &ctx->iram_info;
505         struct coda_dev *dev = ctx->dev;
506         int w64, w128;
507         int mb_width;
508         int dbk_bits;
509         int bit_bits;
510         int ip_bits;
511
512         memset(iram_info, 0, sizeof(*iram_info));
513         iram_info->next_paddr = dev->iram.paddr;
514         iram_info->remaining = dev->iram.size;
515
516         if (!dev->iram.vaddr)
517                 return;
518
519         switch (dev->devtype->product) {
520         case CODA_7541:
521                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
522                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
523                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
524                 break;
525         case CODA_960:
526                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
527                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
528                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
529                 break;
530         default: /* CODA_DX6 */
531                 return;
532         }
533
534         if (ctx->inst_type == CODA_INST_ENCODER) {
535                 struct coda_q_data *q_data_src;
536
537                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
538                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
539                 w128 = mb_width * 128;
540                 w64 = mb_width * 64;
541
542                 /* Prioritize in case IRAM is too small for everything */
543                 if (dev->devtype->product == CODA_7541) {
544                         iram_info->search_ram_size = round_up(mb_width * 16 *
545                                                               36 + 2048, 1024);
546                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
547                                                 iram_info->search_ram_size);
548                         if (!iram_info->search_ram_paddr) {
549                                 pr_err("IRAM is smaller than the search ram size\n");
550                                 goto out;
551                         }
552                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
553                                                    CODA7_USE_ME_ENABLE;
554                 }
555
556                 /* Only H.264BP and H.263P3 are considered */
557                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
558                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
559                 if (!iram_info->buf_dbk_c_use)
560                         goto out;
561                 iram_info->axi_sram_use |= dbk_bits;
562
563                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
564                 if (!iram_info->buf_bit_use)
565                         goto out;
566                 iram_info->axi_sram_use |= bit_bits;
567
568                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
569                 if (!iram_info->buf_ip_ac_dc_use)
570                         goto out;
571                 iram_info->axi_sram_use |= ip_bits;
572
573                 /* OVL and BTP disabled for encoder */
574         } else if (ctx->inst_type == CODA_INST_DECODER) {
575                 struct coda_q_data *q_data_dst;
576
577                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
578                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
579                 w128 = mb_width * 128;
580
581                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
582                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
583                 if (!iram_info->buf_dbk_c_use)
584                         goto out;
585                 iram_info->axi_sram_use |= dbk_bits;
586
587                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
588                 if (!iram_info->buf_bit_use)
589                         goto out;
590                 iram_info->axi_sram_use |= bit_bits;
591
592                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
593                 if (!iram_info->buf_ip_ac_dc_use)
594                         goto out;
595                 iram_info->axi_sram_use |= ip_bits;
596
597                 /* OVL and BTP unused as there is no VC1 support yet */
598         }
599
600 out:
601         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
602                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
603                          "IRAM smaller than needed\n");
604
605         if (dev->devtype->product == CODA_7541) {
606                 /* TODO - Enabling these causes picture errors on CODA7541 */
607                 if (ctx->inst_type == CODA_INST_DECODER) {
608                         /* fw 1.4.50 */
609                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
610                                                      CODA7_USE_IP_ENABLE);
611                 } else {
612                         /* fw 13.4.29 */
613                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
614                                                      CODA7_USE_HOST_DBK_ENABLE |
615                                                      CODA7_USE_IP_ENABLE |
616                                                      CODA7_USE_DBK_ENABLE);
617                 }
618         }
619 }
620
621 static u32 coda_supported_firmwares[] = {
622         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
623         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
624         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
625 };
626
627 static bool coda_firmware_supported(u32 vernum)
628 {
629         int i;
630
631         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
632                 if (vernum == coda_supported_firmwares[i])
633                         return true;
634         return false;
635 }
636
637 int coda_check_firmware(struct coda_dev *dev)
638 {
639         u16 product, major, minor, release;
640         u32 data;
641         int ret;
642
643         ret = clk_prepare_enable(dev->clk_per);
644         if (ret)
645                 goto err_clk_per;
646
647         ret = clk_prepare_enable(dev->clk_ahb);
648         if (ret)
649                 goto err_clk_ahb;
650
651         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
652         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
653         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
654         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
655         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
656         if (coda_wait_timeout(dev)) {
657                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
658                 ret = -EIO;
659                 goto err_run_cmd;
660         }
661
662         if (dev->devtype->product == CODA_960) {
663                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
664                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
665                           data);
666         }
667
668         /* Check we are compatible with the loaded firmware */
669         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
670         product = CODA_FIRMWARE_PRODUCT(data);
671         major = CODA_FIRMWARE_MAJOR(data);
672         minor = CODA_FIRMWARE_MINOR(data);
673         release = CODA_FIRMWARE_RELEASE(data);
674
675         clk_disable_unprepare(dev->clk_per);
676         clk_disable_unprepare(dev->clk_ahb);
677
678         if (product != dev->devtype->product) {
679                 v4l2_err(&dev->v4l2_dev,
680                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
681                          coda_product_name(dev->devtype->product),
682                          coda_product_name(product), major, minor, release);
683                 return -EINVAL;
684         }
685
686         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
687                   coda_product_name(product));
688
689         if (coda_firmware_supported(data)) {
690                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
691                           major, minor, release);
692         } else {
693                 v4l2_warn(&dev->v4l2_dev,
694                           "Unsupported firmware version: %u.%u.%u\n",
695                           major, minor, release);
696         }
697
698         return 0;
699
700 err_run_cmd:
701         clk_disable_unprepare(dev->clk_ahb);
702 err_clk_ahb:
703         clk_disable_unprepare(dev->clk_per);
704 err_clk_per:
705         return ret;
706 }
707
708 /*
709  * Encoder context operations
710  */
711
712 static int coda_start_encoding(struct coda_ctx *ctx)
713 {
714         struct coda_dev *dev = ctx->dev;
715         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
716         struct coda_q_data *q_data_src, *q_data_dst;
717         u32 bitstream_buf, bitstream_size;
718         struct vb2_buffer *buf;
719         int gamma, ret, value;
720         u32 dst_fourcc;
721         int num_fb;
722         u32 stride;
723
724         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
725         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
726         dst_fourcc = q_data_dst->fourcc;
727
728         /* Allocate per-instance buffers */
729         ret = coda_alloc_context_buffers(ctx, q_data_src);
730         if (ret < 0)
731                 return ret;
732
733         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
734         bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0);
735         bitstream_size = q_data_dst->sizeimage;
736
737         if (!coda_is_initialized(dev)) {
738                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
739                 return -EFAULT;
740         }
741
742         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
743                 if (!ctx->params.jpeg_qmat_tab[0])
744                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
745                 if (!ctx->params.jpeg_qmat_tab[1])
746                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
747                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
748         }
749
750         mutex_lock(&dev->coda_mutex);
751
752         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
753         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
754         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
755         switch (dev->devtype->product) {
756         case CODA_DX6:
757                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
758                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
759                 break;
760         case CODA_960:
761                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
762                 /* fallthrough */
763         case CODA_7541:
764                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
765                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
766                 break;
767         }
768
769         ctx->frame_mem_ctrl &= ~CODA_FRAME_CHROMA_INTERLEAVE;
770         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
771                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
772         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
773
774         if (dev->devtype->product == CODA_DX6) {
775                 /* Configure the coda */
776                 coda_write(dev, dev->iram.paddr,
777                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
778         }
779
780         /* Could set rotation here if needed */
781         value = 0;
782         switch (dev->devtype->product) {
783         case CODA_DX6:
784                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
785                         << CODADX6_PICWIDTH_OFFSET;
786                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
787                          << CODA_PICHEIGHT_OFFSET;
788                 break;
789         case CODA_7541:
790                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
791                         value = (round_up(q_data_src->width, 16) &
792                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
793                         value |= (round_up(q_data_src->height, 16) &
794                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
795                         break;
796                 }
797                 /* fallthrough */
798         case CODA_960:
799                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
800                         << CODA7_PICWIDTH_OFFSET;
801                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
802                          << CODA_PICHEIGHT_OFFSET;
803         }
804         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
805         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
806                 ctx->params.framerate = 0;
807         coda_write(dev, ctx->params.framerate,
808                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
809
810         ctx->params.codec_mode = ctx->codec->mode;
811         switch (dst_fourcc) {
812         case V4L2_PIX_FMT_MPEG4:
813                 if (dev->devtype->product == CODA_960)
814                         coda_write(dev, CODA9_STD_MPEG4,
815                                    CODA_CMD_ENC_SEQ_COD_STD);
816                 else
817                         coda_write(dev, CODA_STD_MPEG4,
818                                    CODA_CMD_ENC_SEQ_COD_STD);
819                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
820                 break;
821         case V4L2_PIX_FMT_H264:
822                 if (dev->devtype->product == CODA_960)
823                         coda_write(dev, CODA9_STD_H264,
824                                    CODA_CMD_ENC_SEQ_COD_STD);
825                 else
826                         coda_write(dev, CODA_STD_H264,
827                                    CODA_CMD_ENC_SEQ_COD_STD);
828                 if (ctx->params.h264_deblk_enabled) {
829                         value = ((ctx->params.h264_deblk_alpha &
830                                   CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
831                                  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
832                                 ((ctx->params.h264_deblk_beta &
833                                   CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
834                                  CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
835                 } else {
836                         value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
837                 }
838                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
839                 break;
840         case V4L2_PIX_FMT_JPEG:
841                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
842                 coda_write(dev, ctx->params.jpeg_restart_interval,
843                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
844                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
845                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
846                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
847
848                 coda_jpeg_write_tables(ctx);
849                 break;
850         default:
851                 v4l2_err(v4l2_dev,
852                          "dst format (0x%08x) invalid.\n", dst_fourcc);
853                 ret = -EINVAL;
854                 goto out;
855         }
856
857         /*
858          * slice mode and GOP size registers are used for thumb size/offset
859          * in JPEG mode
860          */
861         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
862                 switch (ctx->params.slice_mode) {
863                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
864                         value = 0;
865                         break;
866                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
867                         value  = (ctx->params.slice_max_mb &
868                                   CODA_SLICING_SIZE_MASK)
869                                  << CODA_SLICING_SIZE_OFFSET;
870                         value |= (1 & CODA_SLICING_UNIT_MASK)
871                                  << CODA_SLICING_UNIT_OFFSET;
872                         value |=  1 & CODA_SLICING_MODE_MASK;
873                         break;
874                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
875                         value  = (ctx->params.slice_max_bits &
876                                   CODA_SLICING_SIZE_MASK)
877                                  << CODA_SLICING_SIZE_OFFSET;
878                         value |= (0 & CODA_SLICING_UNIT_MASK)
879                                  << CODA_SLICING_UNIT_OFFSET;
880                         value |=  1 & CODA_SLICING_MODE_MASK;
881                         break;
882                 }
883                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
884                 value = ctx->params.gop_size & CODA_GOP_SIZE_MASK;
885                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
886         }
887
888         if (ctx->params.bitrate) {
889                 /* Rate control enabled */
890                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
891                         << CODA_RATECONTROL_BITRATE_OFFSET;
892                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
893                 if (dev->devtype->product == CODA_960)
894                         value |= BIT(31); /* disable autoskip */
895         } else {
896                 value = 0;
897         }
898         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
899
900         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
901         coda_write(dev, ctx->params.intra_refresh,
902                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
903
904         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
905         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
906
907
908         value = 0;
909         if (dev->devtype->product == CODA_960)
910                 gamma = CODA9_DEFAULT_GAMMA;
911         else
912                 gamma = CODA_DEFAULT_GAMMA;
913         if (gamma > 0) {
914                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
915                            CODA_CMD_ENC_SEQ_RC_GAMMA);
916         }
917
918         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
919                 coda_write(dev,
920                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
921                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
922                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
923         }
924         if (dev->devtype->product == CODA_960) {
925                 if (ctx->params.h264_max_qp)
926                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
927                 if (CODA_DEFAULT_GAMMA > 0)
928                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
929         } else {
930                 if (CODA_DEFAULT_GAMMA > 0) {
931                         if (dev->devtype->product == CODA_DX6)
932                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
933                         else
934                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
935                 }
936                 if (ctx->params.h264_min_qp)
937                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
938                 if (ctx->params.h264_max_qp)
939                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
940         }
941         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
942
943         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
944
945         coda_setup_iram(ctx);
946
947         if (dst_fourcc == V4L2_PIX_FMT_H264) {
948                 switch (dev->devtype->product) {
949                 case CODA_DX6:
950                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
951                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
952                         break;
953                 case CODA_7541:
954                         coda_write(dev, ctx->iram_info.search_ram_paddr,
955                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
956                         coda_write(dev, ctx->iram_info.search_ram_size,
957                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
958                         break;
959                 case CODA_960:
960                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
961                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
962                 }
963         }
964
965         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
966         if (ret < 0) {
967                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
968                 goto out;
969         }
970
971         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
972                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
973                 ret = -EFAULT;
974                 goto out;
975         }
976
977         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
978                 if (dev->devtype->product == CODA_960)
979                         ctx->num_internal_frames = 4;
980                 else
981                         ctx->num_internal_frames = 2;
982                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
983                 if (ret < 0) {
984                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
985                         goto out;
986                 }
987                 num_fb = 2;
988                 stride = q_data_src->bytesperline;
989         } else {
990                 ctx->num_internal_frames = 0;
991                 num_fb = 0;
992                 stride = 0;
993         }
994         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
995         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
996
997         if (dev->devtype->product == CODA_7541) {
998                 coda_write(dev, q_data_src->bytesperline,
999                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1000         }
1001         if (dev->devtype->product != CODA_DX6) {
1002                 coda_write(dev, ctx->iram_info.buf_bit_use,
1003                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1004                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1005                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1006                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1007                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1008                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1009                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1010                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1011                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1012                 if (dev->devtype->product == CODA_960) {
1013                         coda_write(dev, ctx->iram_info.buf_btp_use,
1014                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1015
1016                         /* FIXME */
1017                         coda_write(dev, ctx->internal_frames[2].paddr,
1018                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1019                         coda_write(dev, ctx->internal_frames[3].paddr,
1020                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1021                 }
1022         }
1023
1024         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1025         if (ret < 0) {
1026                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1027                 goto out;
1028         }
1029
1030         /* Save stream headers */
1031         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1032         switch (dst_fourcc) {
1033         case V4L2_PIX_FMT_H264:
1034                 /*
1035                  * Get SPS in the first frame and copy it to an
1036                  * intermediate buffer.
1037                  */
1038                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1039                                          &ctx->vpu_header[0][0],
1040                                          &ctx->vpu_header_size[0]);
1041                 if (ret < 0)
1042                         goto out;
1043
1044                 /*
1045                  * Get PPS in the first frame and copy it to an
1046                  * intermediate buffer.
1047                  */
1048                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1049                                          &ctx->vpu_header[1][0],
1050                                          &ctx->vpu_header_size[1]);
1051                 if (ret < 0)
1052                         goto out;
1053
1054                 /*
1055                  * Length of H.264 headers is variable and thus it might not be
1056                  * aligned for the coda to append the encoded frame. In that is
1057                  * the case a filler NAL must be added to header 2.
1058                  */
1059                 ctx->vpu_header_size[2] = coda_h264_padding(
1060                                         (ctx->vpu_header_size[0] +
1061                                          ctx->vpu_header_size[1]),
1062                                          ctx->vpu_header[2]);
1063                 break;
1064         case V4L2_PIX_FMT_MPEG4:
1065                 /*
1066                  * Get VOS in the first frame and copy it to an
1067                  * intermediate buffer
1068                  */
1069                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1070                                          &ctx->vpu_header[0][0],
1071                                          &ctx->vpu_header_size[0]);
1072                 if (ret < 0)
1073                         goto out;
1074
1075                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1076                                          &ctx->vpu_header[1][0],
1077                                          &ctx->vpu_header_size[1]);
1078                 if (ret < 0)
1079                         goto out;
1080
1081                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1082                                          &ctx->vpu_header[2][0],
1083                                          &ctx->vpu_header_size[2]);
1084                 if (ret < 0)
1085                         goto out;
1086                 break;
1087         default:
1088                 /* No more formats need to save headers at the moment */
1089                 break;
1090         }
1091
1092 out:
1093         mutex_unlock(&dev->coda_mutex);
1094         return ret;
1095 }
1096
1097 static int coda_prepare_encode(struct coda_ctx *ctx)
1098 {
1099         struct coda_q_data *q_data_src, *q_data_dst;
1100         struct vb2_buffer *src_buf, *dst_buf;
1101         struct coda_dev *dev = ctx->dev;
1102         int force_ipicture;
1103         int quant_param = 0;
1104         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1105         u32 rot_mode = 0;
1106         u32 dst_fourcc;
1107         u32 reg;
1108
1109         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1110         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1111         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1112         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1113         dst_fourcc = q_data_dst->fourcc;
1114
1115         src_buf->v4l2_buf.sequence = ctx->osequence;
1116         dst_buf->v4l2_buf.sequence = ctx->osequence;
1117         ctx->osequence++;
1118
1119         /*
1120          * Workaround coda firmware BUG that only marks the first
1121          * frame as IDR. This is a problem for some decoders that can't
1122          * recover when a frame is lost.
1123          */
1124         if (src_buf->v4l2_buf.sequence % ctx->params.gop_size) {
1125                 src_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
1126                 src_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1127         } else {
1128                 src_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
1129                 src_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
1130         }
1131
1132         if (dev->devtype->product == CODA_960)
1133                 coda_set_gdi_regs(ctx);
1134
1135         /*
1136          * Copy headers at the beginning of the first frame for H.264 only.
1137          * In MPEG4 they are already copied by the coda.
1138          */
1139         if (src_buf->v4l2_buf.sequence == 0) {
1140                 pic_stream_buffer_addr =
1141                         vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
1142                         ctx->vpu_header_size[0] +
1143                         ctx->vpu_header_size[1] +
1144                         ctx->vpu_header_size[2];
1145                 pic_stream_buffer_size = q_data_dst->sizeimage -
1146                         ctx->vpu_header_size[0] -
1147                         ctx->vpu_header_size[1] -
1148                         ctx->vpu_header_size[2];
1149                 memcpy(vb2_plane_vaddr(dst_buf, 0),
1150                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1151                 memcpy(vb2_plane_vaddr(dst_buf, 0) + ctx->vpu_header_size[0],
1152                        &ctx->vpu_header[1][0], ctx->vpu_header_size[1]);
1153                 memcpy(vb2_plane_vaddr(dst_buf, 0) + ctx->vpu_header_size[0] +
1154                         ctx->vpu_header_size[1], &ctx->vpu_header[2][0],
1155                         ctx->vpu_header_size[2]);
1156         } else {
1157                 pic_stream_buffer_addr =
1158                         vb2_dma_contig_plane_dma_addr(dst_buf, 0);
1159                 pic_stream_buffer_size = q_data_dst->sizeimage;
1160         }
1161
1162         if (src_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) {
1163                 force_ipicture = 1;
1164                 switch (dst_fourcc) {
1165                 case V4L2_PIX_FMT_H264:
1166                         quant_param = ctx->params.h264_intra_qp;
1167                         break;
1168                 case V4L2_PIX_FMT_MPEG4:
1169                         quant_param = ctx->params.mpeg4_intra_qp;
1170                         break;
1171                 case V4L2_PIX_FMT_JPEG:
1172                         quant_param = 30;
1173                         break;
1174                 default:
1175                         v4l2_warn(&ctx->dev->v4l2_dev,
1176                                 "cannot set intra qp, fmt not supported\n");
1177                         break;
1178                 }
1179         } else {
1180                 force_ipicture = 0;
1181                 switch (dst_fourcc) {
1182                 case V4L2_PIX_FMT_H264:
1183                         quant_param = ctx->params.h264_inter_qp;
1184                         break;
1185                 case V4L2_PIX_FMT_MPEG4:
1186                         quant_param = ctx->params.mpeg4_inter_qp;
1187                         break;
1188                 default:
1189                         v4l2_warn(&ctx->dev->v4l2_dev,
1190                                 "cannot set inter qp, fmt not supported\n");
1191                         break;
1192                 }
1193         }
1194
1195         /* submit */
1196         if (ctx->params.rot_mode)
1197                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1198         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1199         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1200
1201         if (dev->devtype->product == CODA_960) {
1202                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1203                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1204                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1205
1206                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1207         } else {
1208                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1209         }
1210         coda_write_base(ctx, q_data_src, src_buf, reg);
1211
1212         coda_write(dev, force_ipicture << 1 & 0x2,
1213                    CODA_CMD_ENC_PIC_OPTION);
1214
1215         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1216         coda_write(dev, pic_stream_buffer_size / 1024,
1217                    CODA_CMD_ENC_PIC_BB_SIZE);
1218
1219         if (!ctx->streamon_out) {
1220                 /* After streamoff on the output side, set stream end flag */
1221                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1222                 coda_write(dev, ctx->bit_stream_param,
1223                            CODA_REG_BIT_BIT_STREAM_PARAM);
1224         }
1225
1226         if (dev->devtype->product != CODA_DX6)
1227                 coda_write(dev, ctx->iram_info.axi_sram_use,
1228                                 CODA7_REG_BIT_AXI_SRAM_USE);
1229
1230         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1231
1232         return 0;
1233 }
1234
1235 static void coda_finish_encode(struct coda_ctx *ctx)
1236 {
1237         struct vb2_buffer *src_buf, *dst_buf;
1238         struct coda_dev *dev = ctx->dev;
1239         u32 wr_ptr, start_ptr;
1240
1241         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1242         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1243
1244         /* Get results from the coda */
1245         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1246         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1247
1248         /* Calculate bytesused field */
1249         if (dst_buf->v4l2_buf.sequence == 0) {
1250                 vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
1251                                         ctx->vpu_header_size[0] +
1252                                         ctx->vpu_header_size[1] +
1253                                         ctx->vpu_header_size[2]);
1254         } else {
1255                 vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr);
1256         }
1257
1258         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1259                  wr_ptr - start_ptr);
1260
1261         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1262         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1263
1264         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1265                 dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
1266                 dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
1267         } else {
1268                 dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
1269                 dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1270         }
1271
1272         dst_buf->v4l2_buf.timestamp = src_buf->v4l2_buf.timestamp;
1273         dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1274         dst_buf->v4l2_buf.flags |=
1275                 src_buf->v4l2_buf.flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1276         dst_buf->v4l2_buf.timecode = src_buf->v4l2_buf.timecode;
1277
1278         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1279
1280         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1281         v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
1282
1283         ctx->gopcounter--;
1284         if (ctx->gopcounter < 0)
1285                 ctx->gopcounter = ctx->params.gop_size - 1;
1286
1287         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1288                 "job finished: encoding frame (%d) (%s)\n",
1289                 dst_buf->v4l2_buf.sequence,
1290                 (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
1291                 "KEYFRAME" : "PFRAME");
1292 }
1293
1294 static void coda_seq_end_work(struct work_struct *work)
1295 {
1296         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1297         struct coda_dev *dev = ctx->dev;
1298
1299         mutex_lock(&ctx->buffer_mutex);
1300         mutex_lock(&dev->coda_mutex);
1301
1302         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1303                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1304                  __func__);
1305         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1306                 v4l2_err(&dev->v4l2_dev,
1307                          "CODA_COMMAND_SEQ_END failed\n");
1308         }
1309
1310         kfifo_init(&ctx->bitstream_fifo,
1311                 ctx->bitstream.vaddr, ctx->bitstream.size);
1312
1313         coda_free_framebuffers(ctx);
1314         coda_free_context_buffers(ctx);
1315
1316         mutex_unlock(&dev->coda_mutex);
1317         mutex_unlock(&ctx->buffer_mutex);
1318 }
1319
1320 static void coda_bit_release(struct coda_ctx *ctx)
1321 {
1322         mutex_lock(&ctx->buffer_mutex);
1323         coda_free_framebuffers(ctx);
1324         coda_free_context_buffers(ctx);
1325         mutex_unlock(&ctx->buffer_mutex);
1326 }
1327
1328 const struct coda_context_ops coda_bit_encode_ops = {
1329         .queue_init = coda_encoder_queue_init,
1330         .start_streaming = coda_start_encoding,
1331         .prepare_run = coda_prepare_encode,
1332         .finish_run = coda_finish_encode,
1333         .seq_end_work = coda_seq_end_work,
1334         .release = coda_bit_release,
1335 };
1336
1337 /*
1338  * Decoder context operations
1339  */
1340
1341 static int __coda_start_decoding(struct coda_ctx *ctx)
1342 {
1343         struct coda_q_data *q_data_src, *q_data_dst;
1344         u32 bitstream_buf, bitstream_size;
1345         struct coda_dev *dev = ctx->dev;
1346         int width, height;
1347         u32 src_fourcc, dst_fourcc;
1348         u32 val;
1349         int ret;
1350
1351         /* Start decoding */
1352         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1353         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1354         bitstream_buf = ctx->bitstream.paddr;
1355         bitstream_size = ctx->bitstream.size;
1356         src_fourcc = q_data_src->fourcc;
1357         dst_fourcc = q_data_dst->fourcc;
1358
1359         /* Allocate per-instance buffers */
1360         ret = coda_alloc_context_buffers(ctx, q_data_src);
1361         if (ret < 0)
1362                 return ret;
1363
1364         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1365
1366         /* Update coda bitstream read and write pointers from kfifo */
1367         coda_kfifo_sync_to_device_full(ctx);
1368
1369         ctx->frame_mem_ctrl &= ~CODA_FRAME_CHROMA_INTERLEAVE;
1370         if (dst_fourcc == V4L2_PIX_FMT_NV12)
1371                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1372         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1373
1374         ctx->display_idx = -1;
1375         ctx->frm_dis_flg = 0;
1376         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1377
1378         coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
1379                         CODA_REG_BIT_BIT_STREAM_PARAM);
1380
1381         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1382         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1383         val = 0;
1384         if ((dev->devtype->product == CODA_7541) ||
1385             (dev->devtype->product == CODA_960))
1386                 val |= CODA_REORDER_ENABLE;
1387         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1388                 val |= CODA_NO_INT_ENABLE;
1389         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1390
1391         ctx->params.codec_mode = ctx->codec->mode;
1392         if (dev->devtype->product == CODA_960 &&
1393             src_fourcc == V4L2_PIX_FMT_MPEG4)
1394                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1395         else
1396                 ctx->params.codec_mode_aux = 0;
1397         if (src_fourcc == V4L2_PIX_FMT_H264) {
1398                 if (dev->devtype->product == CODA_7541) {
1399                         coda_write(dev, ctx->psbuf.paddr,
1400                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1401                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1402                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1403                 }
1404                 if (dev->devtype->product == CODA_960) {
1405                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1406                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1407                 }
1408         }
1409         if (dev->devtype->product != CODA_960)
1410                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1411
1412         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
1413                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1414                 coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1415                 return -ETIMEDOUT;
1416         }
1417
1418         /* Update kfifo out pointer from coda bitstream read pointer */
1419         coda_kfifo_sync_from_device(ctx);
1420
1421         coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1422
1423         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1424                 v4l2_err(&dev->v4l2_dev,
1425                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1426                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1427                 return -EAGAIN;
1428         }
1429
1430         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1431         if (dev->devtype->product == CODA_DX6) {
1432                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1433                 height = val & CODADX6_PICHEIGHT_MASK;
1434         } else {
1435                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1436                 height = val & CODA7_PICHEIGHT_MASK;
1437         }
1438
1439         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1440                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1441                          width, height, q_data_dst->bytesperline,
1442                          q_data_dst->height);
1443                 return -EINVAL;
1444         }
1445
1446         width = round_up(width, 16);
1447         height = round_up(height, 16);
1448
1449         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1450                  __func__, ctx->idx, width, height);
1451
1452         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1453         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1454                 v4l2_err(&dev->v4l2_dev,
1455                          "not enough framebuffers to decode (%d < %d)\n",
1456                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1457                 return -EINVAL;
1458         }
1459
1460         if (src_fourcc == V4L2_PIX_FMT_H264) {
1461                 u32 left_right;
1462                 u32 top_bottom;
1463
1464                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1465                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1466
1467                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1468                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1469                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1470                                          (left_right & 0x3ff);
1471                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1472                                           (top_bottom & 0x3ff);
1473         }
1474
1475         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1476         if (ret < 0) {
1477                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1478                 return ret;
1479         }
1480
1481         /* Tell the decoder how many frame buffers we allocated. */
1482         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1483         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1484
1485         if (dev->devtype->product != CODA_DX6) {
1486                 /* Set secondary AXI IRAM */
1487                 coda_setup_iram(ctx);
1488
1489                 coda_write(dev, ctx->iram_info.buf_bit_use,
1490                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1491                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1492                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1493                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1494                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1495                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1496                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1497                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1498                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1499                 if (dev->devtype->product == CODA_960)
1500                         coda_write(dev, ctx->iram_info.buf_btp_use,
1501                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1502         }
1503
1504         if (dev->devtype->product == CODA_960) {
1505                 int cbb_size, crb_size;
1506
1507                 coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1508                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
1509                 coda_write(dev, 0x20262024, CODA9_CMD_SET_FRAME_CACHE_SIZE);
1510
1511                 if (dst_fourcc == V4L2_PIX_FMT_NV12) {
1512                         cbb_size = 0;
1513                         crb_size = 16;
1514                 } else {
1515                         cbb_size = 8;
1516                         crb_size = 8;
1517                 }
1518                 coda_write(dev, 2 << CODA9_CACHE_PAGEMERGE_OFFSET |
1519                                 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
1520                                 cbb_size << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET |
1521                                 crb_size << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET,
1522                                 CODA9_CMD_SET_FRAME_CACHE_CONFIG);
1523         }
1524
1525         if (src_fourcc == V4L2_PIX_FMT_H264) {
1526                 coda_write(dev, ctx->slicebuf.paddr,
1527                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1528                 coda_write(dev, ctx->slicebuf.size / 1024,
1529                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1530         }
1531
1532         if (dev->devtype->product == CODA_7541) {
1533                 int max_mb_x = 1920 / 16;
1534                 int max_mb_y = 1088 / 16;
1535                 int max_mb_num = max_mb_x * max_mb_y;
1536
1537                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1538                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1539         } else if (dev->devtype->product == CODA_960) {
1540                 int max_mb_x = 1920 / 16;
1541                 int max_mb_y = 1088 / 16;
1542                 int max_mb_num = max_mb_x * max_mb_y;
1543
1544                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1545                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1546         }
1547
1548         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1549                 v4l2_err(&ctx->dev->v4l2_dev,
1550                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1551                 return -ETIMEDOUT;
1552         }
1553
1554         return 0;
1555 }
1556
1557 static int coda_start_decoding(struct coda_ctx *ctx)
1558 {
1559         struct coda_dev *dev = ctx->dev;
1560         int ret;
1561
1562         mutex_lock(&dev->coda_mutex);
1563         ret = __coda_start_decoding(ctx);
1564         mutex_unlock(&dev->coda_mutex);
1565
1566         return ret;
1567 }
1568
1569 static int coda_prepare_decode(struct coda_ctx *ctx)
1570 {
1571         struct vb2_buffer *dst_buf;
1572         struct coda_dev *dev = ctx->dev;
1573         struct coda_q_data *q_data_dst;
1574         struct coda_buffer_meta *meta;
1575         u32 reg_addr, reg_stride;
1576
1577         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1578         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1579
1580         /* Try to copy source buffer contents into the bitstream ringbuffer */
1581         mutex_lock(&ctx->bitstream_mutex);
1582         coda_fill_bitstream(ctx);
1583         mutex_unlock(&ctx->bitstream_mutex);
1584
1585         if (coda_get_bitstream_payload(ctx) < 512 &&
1586             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1587                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1588                          "bitstream payload: %d, skipping\n",
1589                          coda_get_bitstream_payload(ctx));
1590                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1591                 return -EAGAIN;
1592         }
1593
1594         /* Run coda_start_decoding (again) if not yet initialized */
1595         if (!ctx->initialized) {
1596                 int ret = __coda_start_decoding(ctx);
1597
1598                 if (ret < 0) {
1599                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1600                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1601                         return -EAGAIN;
1602                 } else {
1603                         ctx->initialized = 1;
1604                 }
1605         }
1606
1607         if (dev->devtype->product == CODA_960)
1608                 coda_set_gdi_regs(ctx);
1609
1610         if (dev->devtype->product == CODA_960) {
1611                 /*
1612                  * The CODA960 seems to have an internal list of buffers with
1613                  * 64 entries that includes the registered frame buffers as
1614                  * well as the rotator buffer output.
1615                  * ROT_INDEX needs to be < 0x40, but > ctx->num_internal_frames.
1616                  */
1617                 coda_write(dev, CODA_MAX_FRAMEBUFFERS + dst_buf->v4l2_buf.index,
1618                                 CODA9_CMD_DEC_PIC_ROT_INDEX);
1619
1620                 reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1621                 reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1622         } else {
1623                 reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1624                 reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1625         }
1626         coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1627         coda_write(dev, q_data_dst->bytesperline, reg_stride);
1628
1629         coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
1630                         CODA_CMD_DEC_PIC_ROT_MODE);
1631
1632         switch (dev->devtype->product) {
1633         case CODA_DX6:
1634                 /* TBD */
1635         case CODA_7541:
1636                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1637                 break;
1638         case CODA_960:
1639                 /* 'hardcode to use interrupt disable mode'? */
1640                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1641                 break;
1642         }
1643
1644         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1645
1646         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1647         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1648
1649         if (dev->devtype->product != CODA_DX6)
1650                 coda_write(dev, ctx->iram_info.axi_sram_use,
1651                                 CODA7_REG_BIT_AXI_SRAM_USE);
1652
1653         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1654                                         struct coda_buffer_meta, list);
1655
1656         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1657
1658                 /* If this is the last buffer in the bitstream, add padding */
1659                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1660                                   ctx->bitstream_fifo.kfifo.mask)) {
1661                         static unsigned char buf[512];
1662                         unsigned int pad;
1663
1664                         /* Pad to multiple of 256 and then add 256 more */
1665                         pad = ((0 - meta->end) & 0xff) + 256;
1666
1667                         memset(buf, 0xff, sizeof(buf));
1668
1669                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1670                 }
1671         }
1672
1673         coda_kfifo_sync_to_device_full(ctx);
1674
1675         /* Clear decode success flag */
1676         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1677
1678         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1679
1680         return 0;
1681 }
1682
1683 static void coda_finish_decode(struct coda_ctx *ctx)
1684 {
1685         struct coda_dev *dev = ctx->dev;
1686         struct coda_q_data *q_data_src;
1687         struct coda_q_data *q_data_dst;
1688         struct vb2_buffer *dst_buf;
1689         struct coda_buffer_meta *meta;
1690         unsigned long payload;
1691         int width, height;
1692         int decoded_idx;
1693         int display_idx;
1694         u32 src_fourcc;
1695         int success;
1696         u32 err_mb;
1697         u32 val;
1698
1699         /* Update kfifo out pointer from coda bitstream read pointer */
1700         coda_kfifo_sync_from_device(ctx);
1701
1702         /*
1703          * in stream-end mode, the read pointer can overshoot the write pointer
1704          * by up to 512 bytes
1705          */
1706         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1707                 if (coda_get_bitstream_payload(ctx) >= CODA_MAX_FRAME_SIZE - 512)
1708                         kfifo_init(&ctx->bitstream_fifo,
1709                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1710         }
1711
1712         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1713         src_fourcc = q_data_src->fourcc;
1714
1715         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1716         if (val != 1)
1717                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
1718
1719         success = val & 0x1;
1720         if (!success)
1721                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
1722
1723         if (src_fourcc == V4L2_PIX_FMT_H264) {
1724                 if (val & (1 << 3))
1725                         v4l2_err(&dev->v4l2_dev,
1726                                  "insufficient PS buffer space (%d bytes)\n",
1727                                  ctx->psbuf.size);
1728                 if (val & (1 << 2))
1729                         v4l2_err(&dev->v4l2_dev,
1730                                  "insufficient slice buffer space (%d bytes)\n",
1731                                  ctx->slicebuf.size);
1732         }
1733
1734         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
1735         width = (val >> 16) & 0xffff;
1736         height = val & 0xffff;
1737
1738         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1739
1740         /* frame crop information */
1741         if (src_fourcc == V4L2_PIX_FMT_H264) {
1742                 u32 left_right;
1743                 u32 top_bottom;
1744
1745                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
1746                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
1747
1748                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
1749                         /* Keep current crop information */
1750                 } else {
1751                         struct v4l2_rect *rect = &q_data_dst->rect;
1752
1753                         rect->left = left_right >> 16 & 0xffff;
1754                         rect->top = top_bottom >> 16 & 0xffff;
1755                         rect->width = width - rect->left -
1756                                       (left_right & 0xffff);
1757                         rect->height = height - rect->top -
1758                                        (top_bottom & 0xffff);
1759                 }
1760         } else {
1761                 /* no cropping */
1762         }
1763
1764         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
1765         if (err_mb > 0)
1766                 v4l2_err(&dev->v4l2_dev,
1767                          "errors in %d macroblocks\n", err_mb);
1768
1769         if (dev->devtype->product == CODA_7541) {
1770                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
1771                 if (val == 0) {
1772                         /* not enough bitstream data */
1773                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1774                                  "prescan failed: %d\n", val);
1775                         ctx->hold = true;
1776                         return;
1777                 }
1778         }
1779
1780         ctx->frm_dis_flg = coda_read(dev,
1781                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1782
1783         /*
1784          * The previous display frame was copied out by the rotator,
1785          * now it can be overwritten again
1786          */
1787         if (ctx->display_idx >= 0 &&
1788             ctx->display_idx < ctx->num_internal_frames) {
1789                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
1790                 coda_write(dev, ctx->frm_dis_flg,
1791                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1792         }
1793
1794         /*
1795          * The index of the last decoded frame, not necessarily in
1796          * display order, and the index of the next display frame.
1797          * The latter could have been decoded in a previous run.
1798          */
1799         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
1800         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
1801
1802         if (decoded_idx == -1) {
1803                 /* no frame was decoded, but we might have a display frame */
1804                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
1805                         ctx->sequence_offset++;
1806                 else if (ctx->display_idx < 0)
1807                         ctx->hold = true;
1808         } else if (decoded_idx == -2) {
1809                 /* no frame was decoded, we still return remaining buffers */
1810         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
1811                 v4l2_err(&dev->v4l2_dev,
1812                          "decoded frame index out of range: %d\n", decoded_idx);
1813         } else {
1814                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
1815                 val -= ctx->sequence_offset;
1816                 mutex_lock(&ctx->bitstream_mutex);
1817                 if (!list_empty(&ctx->buffer_meta_list)) {
1818                         meta = list_first_entry(&ctx->buffer_meta_list,
1819                                               struct coda_buffer_meta, list);
1820                         list_del(&meta->list);
1821                         if (val != (meta->sequence & 0xffff)) {
1822                                 v4l2_err(&dev->v4l2_dev,
1823                                          "sequence number mismatch (%d(%d) != %d)\n",
1824                                          val, ctx->sequence_offset,
1825                                          meta->sequence);
1826                         }
1827                         ctx->frame_metas[decoded_idx] = *meta;
1828                         kfree(meta);
1829                 } else {
1830                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
1831                         memset(&ctx->frame_metas[decoded_idx], 0,
1832                                sizeof(struct coda_buffer_meta));
1833                         ctx->frame_metas[decoded_idx].sequence = val;
1834                         ctx->sequence_offset++;
1835                 }
1836                 mutex_unlock(&ctx->bitstream_mutex);
1837
1838                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
1839                 if (val == 0)
1840                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
1841                 else if (val == 1)
1842                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
1843                 else
1844                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
1845
1846                 ctx->frame_errors[decoded_idx] = err_mb;
1847         }
1848
1849         if (display_idx == -1) {
1850                 /*
1851                  * no more frames to be decoded, but there could still
1852                  * be rotator output to dequeue
1853                  */
1854                 ctx->hold = true;
1855         } else if (display_idx == -3) {
1856                 /* possibly prescan failure */
1857         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
1858                 v4l2_err(&dev->v4l2_dev,
1859                          "presentation frame index out of range: %d\n",
1860                          display_idx);
1861         }
1862
1863         /* If a frame was copied out, return it */
1864         if (ctx->display_idx >= 0 &&
1865             ctx->display_idx < ctx->num_internal_frames) {
1866                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1867                 dst_buf->v4l2_buf.sequence = ctx->osequence++;
1868
1869                 dst_buf->v4l2_buf.flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
1870                                              V4L2_BUF_FLAG_PFRAME |
1871                                              V4L2_BUF_FLAG_BFRAME);
1872                 dst_buf->v4l2_buf.flags |= ctx->frame_types[ctx->display_idx];
1873                 meta = &ctx->frame_metas[ctx->display_idx];
1874                 dst_buf->v4l2_buf.timecode = meta->timecode;
1875                 dst_buf->v4l2_buf.timestamp = meta->timestamp;
1876
1877                 switch (q_data_dst->fourcc) {
1878                 case V4L2_PIX_FMT_YUV420:
1879                 case V4L2_PIX_FMT_YVU420:
1880                 case V4L2_PIX_FMT_NV12:
1881                 default:
1882                         payload = width * height * 3 / 2;
1883                         break;
1884                 case V4L2_PIX_FMT_YUV422P:
1885                         payload = width * height * 2;
1886                         break;
1887                 }
1888                 vb2_set_plane_payload(dst_buf, 0, payload);
1889
1890                 v4l2_m2m_buf_done(dst_buf, ctx->frame_errors[display_idx] ?
1891                                   VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE);
1892
1893                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1894                         "job finished: decoding frame (%d) (%s)\n",
1895                         dst_buf->v4l2_buf.sequence,
1896                         (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
1897                         "KEYFRAME" : "PFRAME");
1898         } else {
1899                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1900                         "job finished: no frame decoded\n");
1901         }
1902
1903         /* The rotator will copy the current display frame next time */
1904         ctx->display_idx = display_idx;
1905 }
1906
1907 const struct coda_context_ops coda_bit_decode_ops = {
1908         .queue_init = coda_decoder_queue_init,
1909         .start_streaming = coda_start_decoding,
1910         .prepare_run = coda_prepare_decode,
1911         .finish_run = coda_finish_decode,
1912         .seq_end_work = coda_seq_end_work,
1913         .release = coda_bit_release,
1914 };
1915
1916 irqreturn_t coda_irq_handler(int irq, void *data)
1917 {
1918         struct coda_dev *dev = data;
1919         struct coda_ctx *ctx;
1920
1921         /* read status register to attend the IRQ */
1922         coda_read(dev, CODA_REG_BIT_INT_STATUS);
1923         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
1924                       CODA_REG_BIT_INT_CLEAR);
1925
1926         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1927         if (ctx == NULL) {
1928                 v4l2_err(&dev->v4l2_dev,
1929                          "Instance released before the end of transaction\n");
1930                 mutex_unlock(&dev->coda_mutex);
1931                 return IRQ_HANDLED;
1932         }
1933
1934         if (ctx->aborting) {
1935                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
1936                          "task has been aborted\n");
1937         }
1938
1939         if (coda_isbusy(ctx->dev)) {
1940                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
1941                          "coda is still busy!!!!\n");
1942                 return IRQ_NONE;
1943         }
1944
1945         complete(&ctx->completion);
1946
1947         return IRQ_HANDLED;
1948 }