GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_960 ||
72             dev->devtype->product == CODA_7541) {
73                 /* Restore context related registers to CODA */
74                 coda_write(dev, ctx->bit_stream_param,
75                                 CODA_REG_BIT_BIT_STREAM_PARAM);
76                 coda_write(dev, ctx->frm_dis_flg,
77                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
78                 coda_write(dev, ctx->frame_mem_ctrl,
79                                 CODA_REG_BIT_FRAME_MEM_CTRL);
80                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
81         }
82
83         if (dev->devtype->product == CODA_960) {
84                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
85                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
86         }
87
88         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
89
90         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
91         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
92         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
93
94         trace_coda_bit_run(ctx, cmd);
95
96         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
97 }
98
99 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
100 {
101         struct coda_dev *dev = ctx->dev;
102         int ret;
103
104         coda_command_async(ctx, cmd);
105         ret = coda_wait_timeout(dev);
106         trace_coda_bit_done(ctx);
107
108         return ret;
109 }
110
111 int coda_hw_reset(struct coda_ctx *ctx)
112 {
113         struct coda_dev *dev = ctx->dev;
114         unsigned long timeout;
115         unsigned int idx;
116         int ret;
117
118         if (!dev->rstc)
119                 return -ENOENT;
120
121         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
122
123         if (dev->devtype->product == CODA_960) {
124                 timeout = jiffies + msecs_to_jiffies(100);
125                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
126                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
127                         if (time_after(jiffies, timeout))
128                                 return -ETIME;
129                         cpu_relax();
130                 }
131         }
132
133         ret = reset_control_reset(dev->rstc);
134         if (ret < 0)
135                 return ret;
136
137         if (dev->devtype->product == CODA_960)
138                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
139         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
140         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
141         ret = coda_wait_timeout(dev);
142         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
143
144         return ret;
145 }
146
147 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr;
152
153         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
154         kfifo->out = (kfifo->in & ~kfifo->mask) |
155                       (rd_ptr - ctx->bitstream.paddr);
156         if (kfifo->out > kfifo->in)
157                 kfifo->out -= kfifo->mask + 1;
158 }
159
160 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
161 {
162         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
163         struct coda_dev *dev = ctx->dev;
164         u32 rd_ptr, wr_ptr;
165
166         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
167         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
168         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
169         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
170 }
171
172 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
173 {
174         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
175         struct coda_dev *dev = ctx->dev;
176         u32 wr_ptr;
177
178         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
179         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
180 }
181
182 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
183 {
184         unsigned char *buf;
185         u32 n;
186
187         if (size < 6)
188                 size = 6;
189
190         buf = kmalloc(size, GFP_KERNEL);
191         if (!buf)
192                 return -ENOMEM;
193
194         coda_h264_filler_nal(size, buf);
195         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
196         kfree(buf);
197
198         return (n < size) ? -ENOSPC : 0;
199 }
200
201 static int coda_bitstream_queue(struct coda_ctx *ctx,
202                                 struct vb2_v4l2_buffer *src_buf)
203 {
204         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
205         u32 n;
206
207         n = kfifo_in(&ctx->bitstream_fifo,
208                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
209         if (n < src_size)
210                 return -ENOSPC;
211
212         src_buf->sequence = ctx->qsequence++;
213
214         return 0;
215 }
216
217 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
218                                      struct vb2_v4l2_buffer *src_buf)
219 {
220         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
221         int ret;
222
223         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
224             ctx->bitstream.size)
225                 return false;
226
227         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
228                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
229                 return true;
230         }
231
232         /* Add zero padding before the first H.264 buffer, if it is too small */
233         if (ctx->qsequence == 0 && payload < 512 &&
234             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
235                 coda_bitstream_pad(ctx, 512 - payload);
236
237         ret = coda_bitstream_queue(ctx, src_buf);
238         if (ret < 0) {
239                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
240                 return false;
241         }
242         /* Sync read pointer to device */
243         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
244                 coda_kfifo_sync_to_device_write(ctx);
245
246         ctx->hold = false;
247
248         return true;
249 }
250
251 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
252 {
253         struct vb2_v4l2_buffer *src_buf;
254         struct coda_buffer_meta *meta;
255         unsigned long flags;
256         u32 start;
257
258         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
259                 return;
260
261         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
262                 /*
263                  * Only queue a single JPEG into the bitstream buffer, except
264                  * to increase payload over 512 bytes or if in hold state.
265                  */
266                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
267                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
268                         break;
269
270                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
271
272                 /* Drop frames that do not start/end with a SOI/EOI markers */
273                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
274                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
275                         v4l2_err(&ctx->dev->v4l2_dev,
276                                  "dropping invalid JPEG frame %d\n",
277                                  ctx->qsequence);
278                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
279                         if (buffer_list) {
280                                 struct v4l2_m2m_buffer *m2m_buf;
281
282                                 m2m_buf = container_of(src_buf,
283                                                        struct v4l2_m2m_buffer,
284                                                        vb);
285                                 list_add_tail(&m2m_buf->list, buffer_list);
286                         } else {
287                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
288                         }
289                         continue;
290                 }
291
292                 /* Dump empty buffers */
293                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
294                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
295                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
296                         continue;
297                 }
298
299                 /* Buffer start position */
300                 start = ctx->bitstream_fifo.kfifo.in &
301                         ctx->bitstream_fifo.kfifo.mask;
302
303                 if (coda_bitstream_try_queue(ctx, src_buf)) {
304                         /*
305                          * Source buffer is queued in the bitstream ringbuffer;
306                          * queue the timestamp and mark source buffer as done
307                          */
308                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
309
310                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
311                         if (meta) {
312                                 meta->sequence = src_buf->sequence;
313                                 meta->timecode = src_buf->timecode;
314                                 meta->timestamp = src_buf->vb2_buf.timestamp;
315                                 meta->start = start;
316                                 meta->end = ctx->bitstream_fifo.kfifo.in &
317                                             ctx->bitstream_fifo.kfifo.mask;
318                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
319                                                   flags);
320                                 list_add_tail(&meta->list,
321                                               &ctx->buffer_meta_list);
322                                 ctx->num_metas++;
323                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
324                                                        flags);
325
326                                 trace_coda_bit_queue(ctx, src_buf, meta);
327                         }
328
329                         if (buffer_list) {
330                                 struct v4l2_m2m_buffer *m2m_buf;
331
332                                 m2m_buf = container_of(src_buf,
333                                                        struct v4l2_m2m_buffer,
334                                                        vb);
335                                 list_add_tail(&m2m_buf->list, buffer_list);
336                         } else {
337                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
338                         }
339                 } else {
340                         break;
341                 }
342         }
343 }
344
345 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
346 {
347         struct coda_dev *dev = ctx->dev;
348
349         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
350
351         /* If this context is currently running, update the hardware flag */
352         if ((dev->devtype->product == CODA_960) &&
353             coda_isbusy(dev) &&
354             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
355                 coda_write(dev, ctx->bit_stream_param,
356                            CODA_REG_BIT_BIT_STREAM_PARAM);
357         }
358 }
359
360 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
361 {
362         struct coda_dev *dev = ctx->dev;
363         u32 *p = ctx->parabuf.vaddr;
364
365         if (dev->devtype->product == CODA_DX6)
366                 p[index] = value;
367         else
368                 p[index ^ 1] = value;
369 }
370
371 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
372                                          struct coda_aux_buf *buf, size_t size,
373                                          const char *name)
374 {
375         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
376 }
377
378
379 static void coda_free_framebuffers(struct coda_ctx *ctx)
380 {
381         int i;
382
383         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
384                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
385 }
386
387 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
388                                    struct coda_q_data *q_data, u32 fourcc)
389 {
390         struct coda_dev *dev = ctx->dev;
391         int width, height;
392         int ysize;
393         int ret;
394         int i;
395
396         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
397             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) {
399                 width = round_up(q_data->width, 16);
400                 height = round_up(q_data->height, 16);
401         } else {
402                 width = round_up(q_data->width, 8);
403                 height = q_data->height;
404         }
405         ysize = width * height;
406
407         /* Allocate frame buffers */
408         for (i = 0; i < ctx->num_internal_frames; i++) {
409                 size_t size;
410                 char *name;
411
412                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
413                         size = round_up(ysize, 4096) + ysize / 2;
414                 else
415                         size = ysize + ysize / 2;
416                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
417                     dev->devtype->product != CODA_DX6)
418                         size += ysize / 4;
419                 name = kasprintf(GFP_KERNEL, "fb%d", i);
420                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
421                                              size, name);
422                 kfree(name);
423                 if (ret < 0) {
424                         coda_free_framebuffers(ctx);
425                         return ret;
426                 }
427         }
428
429         /* Register frame buffers in the parameter buffer */
430         for (i = 0; i < ctx->num_internal_frames; i++) {
431                 u32 y, cb, cr, mvcol;
432
433                 /* Start addresses of Y, Cb, Cr planes */
434                 y = ctx->internal_frames[i].paddr;
435                 cb = y + ysize;
436                 cr = y + ysize + ysize/4;
437                 mvcol = y + ysize + ysize/4 + ysize/4;
438                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
439                         cb = round_up(cb, 4096);
440                         mvcol = cb + ysize/2;
441                         cr = 0;
442                         /* Packed 20-bit MSB of base addresses */
443                         /* YYYYYCCC, CCyyyyyc, cccc.... */
444                         y = (y & 0xfffff000) | cb >> 20;
445                         cb = (cb & 0x000ff000) << 12;
446                 }
447                 coda_parabuf_write(ctx, i * 3 + 0, y);
448                 coda_parabuf_write(ctx, i * 3 + 1, cb);
449                 coda_parabuf_write(ctx, i * 3 + 2, cr);
450
451                 /* mvcol buffer for h.264 */
452                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
453                     dev->devtype->product != CODA_DX6)
454                         coda_parabuf_write(ctx, 96 + i, mvcol);
455         }
456
457         /* mvcol buffer for mpeg4 */
458         if ((dev->devtype->product != CODA_DX6) &&
459             (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
460                 coda_parabuf_write(ctx, 97, ctx->internal_frames[0].paddr +
461                                             ysize + ysize/4 + ysize/4);
462
463         return 0;
464 }
465
466 static void coda_free_context_buffers(struct coda_ctx *ctx)
467 {
468         struct coda_dev *dev = ctx->dev;
469
470         coda_free_aux_buf(dev, &ctx->slicebuf);
471         coda_free_aux_buf(dev, &ctx->psbuf);
472         if (dev->devtype->product != CODA_DX6)
473                 coda_free_aux_buf(dev, &ctx->workbuf);
474         coda_free_aux_buf(dev, &ctx->parabuf);
475 }
476
477 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
478                                       struct coda_q_data *q_data)
479 {
480         struct coda_dev *dev = ctx->dev;
481         size_t size;
482         int ret;
483
484         if (!ctx->parabuf.vaddr) {
485                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
486                                              CODA_PARA_BUF_SIZE, "parabuf");
487                 if (ret < 0)
488                         return ret;
489         }
490
491         if (dev->devtype->product == CODA_DX6)
492                 return 0;
493
494         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
495                 /* worst case slice size */
496                 size = (DIV_ROUND_UP(q_data->width, 16) *
497                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
498                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
499                                              "slicebuf");
500                 if (ret < 0)
501                         goto err;
502         }
503
504         if (!ctx->psbuf.vaddr && dev->devtype->product == CODA_7541) {
505                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
506                                              CODA7_PS_BUF_SIZE, "psbuf");
507                 if (ret < 0)
508                         goto err;
509         }
510
511         if (!ctx->workbuf.vaddr) {
512                 size = dev->devtype->workbuf_size;
513                 if (dev->devtype->product == CODA_960 &&
514                     q_data->fourcc == V4L2_PIX_FMT_H264)
515                         size += CODA9_PS_SAVE_SIZE;
516                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
517                                              "workbuf");
518                 if (ret < 0)
519                         goto err;
520         }
521
522         return 0;
523
524 err:
525         coda_free_context_buffers(ctx);
526         return ret;
527 }
528
529 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
530                               int header_code, u8 *header, int *size)
531 {
532         struct vb2_buffer *vb = &buf->vb2_buf;
533         struct coda_dev *dev = ctx->dev;
534         size_t bufsize;
535         int ret;
536         int i;
537
538         if (dev->devtype->product == CODA_960)
539                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
540
541         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
542                    CODA_CMD_ENC_HEADER_BB_START);
543         bufsize = vb2_plane_size(vb, 0);
544         if (dev->devtype->product == CODA_960)
545                 bufsize /= 1024;
546         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
547         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
548         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
549         if (ret < 0) {
550                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
551                 return ret;
552         }
553
554         if (dev->devtype->product == CODA_960) {
555                 for (i = 63; i > 0; i--)
556                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
557                                 break;
558                 *size = i + 1;
559         } else {
560                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
561                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
562         }
563         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
564
565         return 0;
566 }
567
568 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
569 {
570         phys_addr_t ret;
571
572         size = round_up(size, 1024);
573         if (size > iram->remaining)
574                 return 0;
575         iram->remaining -= size;
576
577         ret = iram->next_paddr;
578         iram->next_paddr += size;
579
580         return ret;
581 }
582
583 static void coda_setup_iram(struct coda_ctx *ctx)
584 {
585         struct coda_iram_info *iram_info = &ctx->iram_info;
586         struct coda_dev *dev = ctx->dev;
587         int w64, w128;
588         int mb_width;
589         int dbk_bits;
590         int bit_bits;
591         int ip_bits;
592
593         memset(iram_info, 0, sizeof(*iram_info));
594         iram_info->next_paddr = dev->iram.paddr;
595         iram_info->remaining = dev->iram.size;
596
597         if (!dev->iram.vaddr)
598                 return;
599
600         switch (dev->devtype->product) {
601         case CODA_7541:
602                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
603                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
604                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
605                 break;
606         case CODA_960:
607                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
608                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
609                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
610                 break;
611         default: /* CODA_DX6 */
612                 return;
613         }
614
615         if (ctx->inst_type == CODA_INST_ENCODER) {
616                 struct coda_q_data *q_data_src;
617
618                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
619                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
620                 w128 = mb_width * 128;
621                 w64 = mb_width * 64;
622
623                 /* Prioritize in case IRAM is too small for everything */
624                 if (dev->devtype->product == CODA_7541) {
625                         iram_info->search_ram_size = round_up(mb_width * 16 *
626                                                               36 + 2048, 1024);
627                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
628                                                 iram_info->search_ram_size);
629                         if (!iram_info->search_ram_paddr) {
630                                 pr_err("IRAM is smaller than the search ram size\n");
631                                 goto out;
632                         }
633                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
634                                                    CODA7_USE_ME_ENABLE;
635                 }
636
637                 /* Only H.264BP and H.263P3 are considered */
638                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
639                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
640                 if (!iram_info->buf_dbk_c_use)
641                         goto out;
642                 iram_info->axi_sram_use |= dbk_bits;
643
644                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
645                 if (!iram_info->buf_bit_use)
646                         goto out;
647                 iram_info->axi_sram_use |= bit_bits;
648
649                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
650                 if (!iram_info->buf_ip_ac_dc_use)
651                         goto out;
652                 iram_info->axi_sram_use |= ip_bits;
653
654                 /* OVL and BTP disabled for encoder */
655         } else if (ctx->inst_type == CODA_INST_DECODER) {
656                 struct coda_q_data *q_data_dst;
657
658                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
659                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
660                 w128 = mb_width * 128;
661
662                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
663                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
664                 if (!iram_info->buf_dbk_c_use)
665                         goto out;
666                 iram_info->axi_sram_use |= dbk_bits;
667
668                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
669                 if (!iram_info->buf_bit_use)
670                         goto out;
671                 iram_info->axi_sram_use |= bit_bits;
672
673                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
674                 if (!iram_info->buf_ip_ac_dc_use)
675                         goto out;
676                 iram_info->axi_sram_use |= ip_bits;
677
678                 /* OVL and BTP unused as there is no VC1 support yet */
679         }
680
681 out:
682         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
683                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
684                          "IRAM smaller than needed\n");
685
686         if (dev->devtype->product == CODA_7541) {
687                 /* TODO - Enabling these causes picture errors on CODA7541 */
688                 if (ctx->inst_type == CODA_INST_DECODER) {
689                         /* fw 1.4.50 */
690                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
691                                                      CODA7_USE_IP_ENABLE);
692                 } else {
693                         /* fw 13.4.29 */
694                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
695                                                      CODA7_USE_HOST_DBK_ENABLE |
696                                                      CODA7_USE_IP_ENABLE |
697                                                      CODA7_USE_DBK_ENABLE);
698                 }
699         }
700 }
701
702 static u32 coda_supported_firmwares[] = {
703         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
704         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
705         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
706         CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
707         CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
708 };
709
710 static bool coda_firmware_supported(u32 vernum)
711 {
712         int i;
713
714         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
715                 if (vernum == coda_supported_firmwares[i])
716                         return true;
717         return false;
718 }
719
720 int coda_check_firmware(struct coda_dev *dev)
721 {
722         u16 product, major, minor, release;
723         u32 data;
724         int ret;
725
726         ret = clk_prepare_enable(dev->clk_per);
727         if (ret)
728                 goto err_clk_per;
729
730         ret = clk_prepare_enable(dev->clk_ahb);
731         if (ret)
732                 goto err_clk_ahb;
733
734         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
735         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
736         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
737         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
738         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
739         if (coda_wait_timeout(dev)) {
740                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
741                 ret = -EIO;
742                 goto err_run_cmd;
743         }
744
745         if (dev->devtype->product == CODA_960) {
746                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
747                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
748                           data);
749         }
750
751         /* Check we are compatible with the loaded firmware */
752         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
753         product = CODA_FIRMWARE_PRODUCT(data);
754         major = CODA_FIRMWARE_MAJOR(data);
755         minor = CODA_FIRMWARE_MINOR(data);
756         release = CODA_FIRMWARE_RELEASE(data);
757
758         clk_disable_unprepare(dev->clk_per);
759         clk_disable_unprepare(dev->clk_ahb);
760
761         if (product != dev->devtype->product) {
762                 v4l2_err(&dev->v4l2_dev,
763                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
764                          coda_product_name(dev->devtype->product),
765                          coda_product_name(product), major, minor, release);
766                 return -EINVAL;
767         }
768
769         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
770                   coda_product_name(product));
771
772         if (coda_firmware_supported(data)) {
773                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
774                           major, minor, release);
775         } else {
776                 v4l2_warn(&dev->v4l2_dev,
777                           "Unsupported firmware version: %u.%u.%u\n",
778                           major, minor, release);
779         }
780
781         return 0;
782
783 err_run_cmd:
784         clk_disable_unprepare(dev->clk_ahb);
785 err_clk_ahb:
786         clk_disable_unprepare(dev->clk_per);
787 err_clk_per:
788         return ret;
789 }
790
791 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
792 {
793         u32 cache_size, cache_config;
794
795         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
796                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
797                 cache_size = 0x20262024;
798                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
799         } else {
800                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
801                 cache_size = 0x02440243;
802                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
803         }
804         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
805         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
806                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
807                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
808                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
809         } else {
810                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
811                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
812                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
813         }
814         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
815 }
816
817 /*
818  * Encoder context operations
819  */
820
821 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
822                                 struct v4l2_requestbuffers *rb)
823 {
824         struct coda_q_data *q_data_src;
825         int ret;
826
827         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
828                 return 0;
829
830         if (rb->count) {
831                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
832                 ret = coda_alloc_context_buffers(ctx, q_data_src);
833                 if (ret < 0)
834                         return ret;
835         } else {
836                 coda_free_context_buffers(ctx);
837         }
838
839         return 0;
840 }
841
842 static int coda_start_encoding(struct coda_ctx *ctx)
843 {
844         struct coda_dev *dev = ctx->dev;
845         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
846         struct coda_q_data *q_data_src, *q_data_dst;
847         u32 bitstream_buf, bitstream_size;
848         struct vb2_v4l2_buffer *buf;
849         int gamma, ret, value;
850         u32 dst_fourcc;
851         int num_fb;
852         u32 stride;
853
854         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
855         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
856         dst_fourcc = q_data_dst->fourcc;
857
858         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
859         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
860         bitstream_size = q_data_dst->sizeimage;
861
862         if (!coda_is_initialized(dev)) {
863                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
864                 return -EFAULT;
865         }
866
867         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
868                 if (!ctx->params.jpeg_qmat_tab[0])
869                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
870                 if (!ctx->params.jpeg_qmat_tab[1])
871                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
872                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
873         }
874
875         mutex_lock(&dev->coda_mutex);
876
877         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
878         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
879         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
880         switch (dev->devtype->product) {
881         case CODA_DX6:
882                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
883                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
884                 break;
885         case CODA_960:
886                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
887                 /* fallthrough */
888         case CODA_7541:
889                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
890                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
891                 break;
892         }
893
894         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
895                                  CODA9_FRAME_TILED2LINEAR);
896         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
897                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
898         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
899                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
900         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
901
902         if (dev->devtype->product == CODA_DX6) {
903                 /* Configure the coda */
904                 coda_write(dev, dev->iram.paddr,
905                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
906         }
907
908         /* Could set rotation here if needed */
909         value = 0;
910         switch (dev->devtype->product) {
911         case CODA_DX6:
912                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
913                         << CODADX6_PICWIDTH_OFFSET;
914                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
915                          << CODA_PICHEIGHT_OFFSET;
916                 break;
917         case CODA_7541:
918                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
919                         value = (round_up(q_data_src->width, 16) &
920                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
921                         value |= (round_up(q_data_src->height, 16) &
922                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
923                         break;
924                 }
925                 /* fallthrough */
926         case CODA_960:
927                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
928                         << CODA7_PICWIDTH_OFFSET;
929                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
930                          << CODA_PICHEIGHT_OFFSET;
931         }
932         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
933         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
934                 ctx->params.framerate = 0;
935         coda_write(dev, ctx->params.framerate,
936                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
937
938         ctx->params.codec_mode = ctx->codec->mode;
939         switch (dst_fourcc) {
940         case V4L2_PIX_FMT_MPEG4:
941                 if (dev->devtype->product == CODA_960)
942                         coda_write(dev, CODA9_STD_MPEG4,
943                                    CODA_CMD_ENC_SEQ_COD_STD);
944                 else
945                         coda_write(dev, CODA_STD_MPEG4,
946                                    CODA_CMD_ENC_SEQ_COD_STD);
947                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
948                 break;
949         case V4L2_PIX_FMT_H264:
950                 if (dev->devtype->product == CODA_960)
951                         coda_write(dev, CODA9_STD_H264,
952                                    CODA_CMD_ENC_SEQ_COD_STD);
953                 else
954                         coda_write(dev, CODA_STD_H264,
955                                    CODA_CMD_ENC_SEQ_COD_STD);
956                 value = ((ctx->params.h264_disable_deblocking_filter_idc &
957                           CODA_264PARAM_DISABLEDEBLK_MASK) <<
958                          CODA_264PARAM_DISABLEDEBLK_OFFSET) |
959                         ((ctx->params.h264_slice_alpha_c0_offset_div2 &
960                           CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
961                          CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
962                         ((ctx->params.h264_slice_beta_offset_div2 &
963                           CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
964                          CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
965                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
966                 break;
967         case V4L2_PIX_FMT_JPEG:
968                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
969                 coda_write(dev, ctx->params.jpeg_restart_interval,
970                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
971                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
972                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
973                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
974
975                 coda_jpeg_write_tables(ctx);
976                 break;
977         default:
978                 v4l2_err(v4l2_dev,
979                          "dst format (0x%08x) invalid.\n", dst_fourcc);
980                 ret = -EINVAL;
981                 goto out;
982         }
983
984         /*
985          * slice mode and GOP size registers are used for thumb size/offset
986          * in JPEG mode
987          */
988         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
989                 switch (ctx->params.slice_mode) {
990                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
991                         value = 0;
992                         break;
993                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
994                         value  = (ctx->params.slice_max_mb &
995                                   CODA_SLICING_SIZE_MASK)
996                                  << CODA_SLICING_SIZE_OFFSET;
997                         value |= (1 & CODA_SLICING_UNIT_MASK)
998                                  << CODA_SLICING_UNIT_OFFSET;
999                         value |=  1 & CODA_SLICING_MODE_MASK;
1000                         break;
1001                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1002                         value  = (ctx->params.slice_max_bits &
1003                                   CODA_SLICING_SIZE_MASK)
1004                                  << CODA_SLICING_SIZE_OFFSET;
1005                         value |= (0 & CODA_SLICING_UNIT_MASK)
1006                                  << CODA_SLICING_UNIT_OFFSET;
1007                         value |=  1 & CODA_SLICING_MODE_MASK;
1008                         break;
1009                 }
1010                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1011                 value = ctx->params.gop_size;
1012                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1013         }
1014
1015         if (ctx->params.bitrate) {
1016                 /* Rate control enabled */
1017                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1018                         << CODA_RATECONTROL_BITRATE_OFFSET;
1019                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1020                 value |= (ctx->params.vbv_delay &
1021                           CODA_RATECONTROL_INITIALDELAY_MASK)
1022                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1023                 if (dev->devtype->product == CODA_960)
1024                         value |= BIT(31); /* disable autoskip */
1025         } else {
1026                 value = 0;
1027         }
1028         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1029
1030         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1031         coda_write(dev, ctx->params.intra_refresh,
1032                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1033
1034         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1035         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1036
1037
1038         value = 0;
1039         if (dev->devtype->product == CODA_960)
1040                 gamma = CODA9_DEFAULT_GAMMA;
1041         else
1042                 gamma = CODA_DEFAULT_GAMMA;
1043         if (gamma > 0) {
1044                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1045                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1046         }
1047
1048         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1049                 coda_write(dev,
1050                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1051                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1052                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1053         }
1054         if (dev->devtype->product == CODA_960) {
1055                 if (ctx->params.h264_max_qp)
1056                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1057                 if (CODA_DEFAULT_GAMMA > 0)
1058                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1059         } else {
1060                 if (CODA_DEFAULT_GAMMA > 0) {
1061                         if (dev->devtype->product == CODA_DX6)
1062                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1063                         else
1064                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1065                 }
1066                 if (ctx->params.h264_min_qp)
1067                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1068                 if (ctx->params.h264_max_qp)
1069                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1070         }
1071         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1072
1073         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1074
1075         coda_setup_iram(ctx);
1076
1077         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1078                 switch (dev->devtype->product) {
1079                 case CODA_DX6:
1080                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1081                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1082                         break;
1083                 case CODA_7541:
1084                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1085                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1086                         coda_write(dev, ctx->iram_info.search_ram_size,
1087                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1088                         break;
1089                 case CODA_960:
1090                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1091                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1092                 }
1093         }
1094
1095         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1096         if (ret < 0) {
1097                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1098                 goto out;
1099         }
1100
1101         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1102                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1103                 ret = -EFAULT;
1104                 goto out;
1105         }
1106         ctx->initialized = 1;
1107
1108         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1109                 if (dev->devtype->product == CODA_960)
1110                         ctx->num_internal_frames = 4;
1111                 else
1112                         ctx->num_internal_frames = 2;
1113                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1114                 if (ret < 0) {
1115                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1116                         goto out;
1117                 }
1118                 num_fb = 2;
1119                 stride = q_data_src->bytesperline;
1120         } else {
1121                 ctx->num_internal_frames = 0;
1122                 num_fb = 0;
1123                 stride = 0;
1124         }
1125         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1126         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1127
1128         if (dev->devtype->product == CODA_7541) {
1129                 coda_write(dev, q_data_src->bytesperline,
1130                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1131         }
1132         if (dev->devtype->product != CODA_DX6) {
1133                 coda_write(dev, ctx->iram_info.buf_bit_use,
1134                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1135                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1136                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1137                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1138                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1139                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1140                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1141                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1142                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1143                 if (dev->devtype->product == CODA_960) {
1144                         coda_write(dev, ctx->iram_info.buf_btp_use,
1145                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1146
1147                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1148
1149                         /* FIXME */
1150                         coda_write(dev, ctx->internal_frames[2].paddr,
1151                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1152                         coda_write(dev, ctx->internal_frames[3].paddr,
1153                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1154                 }
1155         }
1156
1157         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1158         if (ret < 0) {
1159                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1160                 goto out;
1161         }
1162
1163         /* Save stream headers */
1164         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1165         switch (dst_fourcc) {
1166         case V4L2_PIX_FMT_H264:
1167                 /*
1168                  * Get SPS in the first frame and copy it to an
1169                  * intermediate buffer.
1170                  */
1171                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1172                                          &ctx->vpu_header[0][0],
1173                                          &ctx->vpu_header_size[0]);
1174                 if (ret < 0)
1175                         goto out;
1176
1177                 /*
1178                  * Get PPS in the first frame and copy it to an
1179                  * intermediate buffer.
1180                  */
1181                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1182                                          &ctx->vpu_header[1][0],
1183                                          &ctx->vpu_header_size[1]);
1184                 if (ret < 0)
1185                         goto out;
1186
1187                 /*
1188                  * Length of H.264 headers is variable and thus it might not be
1189                  * aligned for the coda to append the encoded frame. In that is
1190                  * the case a filler NAL must be added to header 2.
1191                  */
1192                 ctx->vpu_header_size[2] = coda_h264_padding(
1193                                         (ctx->vpu_header_size[0] +
1194                                          ctx->vpu_header_size[1]),
1195                                          ctx->vpu_header[2]);
1196                 break;
1197         case V4L2_PIX_FMT_MPEG4:
1198                 /*
1199                  * Get VOS in the first frame and copy it to an
1200                  * intermediate buffer
1201                  */
1202                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1203                                          &ctx->vpu_header[0][0],
1204                                          &ctx->vpu_header_size[0]);
1205                 if (ret < 0)
1206                         goto out;
1207
1208                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1209                                          &ctx->vpu_header[1][0],
1210                                          &ctx->vpu_header_size[1]);
1211                 if (ret < 0)
1212                         goto out;
1213
1214                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1215                                          &ctx->vpu_header[2][0],
1216                                          &ctx->vpu_header_size[2]);
1217                 if (ret < 0)
1218                         goto out;
1219                 break;
1220         default:
1221                 /* No more formats need to save headers at the moment */
1222                 break;
1223         }
1224
1225 out:
1226         mutex_unlock(&dev->coda_mutex);
1227         return ret;
1228 }
1229
1230 static int coda_prepare_encode(struct coda_ctx *ctx)
1231 {
1232         struct coda_q_data *q_data_src, *q_data_dst;
1233         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1234         struct coda_dev *dev = ctx->dev;
1235         int force_ipicture;
1236         int quant_param = 0;
1237         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1238         u32 rot_mode = 0;
1239         u32 dst_fourcc;
1240         u32 reg;
1241
1242         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1243         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1244         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1245         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1246         dst_fourcc = q_data_dst->fourcc;
1247
1248         src_buf->sequence = ctx->osequence;
1249         dst_buf->sequence = ctx->osequence;
1250         ctx->osequence++;
1251
1252         force_ipicture = ctx->params.force_ipicture;
1253         if (force_ipicture)
1254                 ctx->params.force_ipicture = false;
1255         else if (ctx->params.gop_size != 0 &&
1256                  (src_buf->sequence % ctx->params.gop_size) == 0)
1257                 force_ipicture = 1;
1258
1259         /*
1260          * Workaround coda firmware BUG that only marks the first
1261          * frame as IDR. This is a problem for some decoders that can't
1262          * recover when a frame is lost.
1263          */
1264         if (!force_ipicture) {
1265                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1266                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1267         } else {
1268                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1269                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1270         }
1271
1272         if (dev->devtype->product == CODA_960)
1273                 coda_set_gdi_regs(ctx);
1274
1275         /*
1276          * Copy headers in front of the first frame and forced I frames for
1277          * H.264 only. In MPEG4 they are already copied by the CODA.
1278          */
1279         if (src_buf->sequence == 0 || force_ipicture) {
1280                 pic_stream_buffer_addr =
1281                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1282                         ctx->vpu_header_size[0] +
1283                         ctx->vpu_header_size[1] +
1284                         ctx->vpu_header_size[2];
1285                 pic_stream_buffer_size = q_data_dst->sizeimage -
1286                         ctx->vpu_header_size[0] -
1287                         ctx->vpu_header_size[1] -
1288                         ctx->vpu_header_size[2];
1289                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1290                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1291                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1292                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1293                         ctx->vpu_header_size[1]);
1294                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1295                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1296                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1297         } else {
1298                 pic_stream_buffer_addr =
1299                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1300                 pic_stream_buffer_size = q_data_dst->sizeimage;
1301         }
1302
1303         if (force_ipicture) {
1304                 switch (dst_fourcc) {
1305                 case V4L2_PIX_FMT_H264:
1306                         quant_param = ctx->params.h264_intra_qp;
1307                         break;
1308                 case V4L2_PIX_FMT_MPEG4:
1309                         quant_param = ctx->params.mpeg4_intra_qp;
1310                         break;
1311                 case V4L2_PIX_FMT_JPEG:
1312                         quant_param = 30;
1313                         break;
1314                 default:
1315                         v4l2_warn(&ctx->dev->v4l2_dev,
1316                                 "cannot set intra qp, fmt not supported\n");
1317                         break;
1318                 }
1319         } else {
1320                 switch (dst_fourcc) {
1321                 case V4L2_PIX_FMT_H264:
1322                         quant_param = ctx->params.h264_inter_qp;
1323                         break;
1324                 case V4L2_PIX_FMT_MPEG4:
1325                         quant_param = ctx->params.mpeg4_inter_qp;
1326                         break;
1327                 default:
1328                         v4l2_warn(&ctx->dev->v4l2_dev,
1329                                 "cannot set inter qp, fmt not supported\n");
1330                         break;
1331                 }
1332         }
1333
1334         /* submit */
1335         if (ctx->params.rot_mode)
1336                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1337         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1338         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1339
1340         if (dev->devtype->product == CODA_960) {
1341                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1342                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1343                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1344
1345                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1346         } else {
1347                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1348         }
1349         coda_write_base(ctx, q_data_src, src_buf, reg);
1350
1351         coda_write(dev, force_ipicture << 1 & 0x2,
1352                    CODA_CMD_ENC_PIC_OPTION);
1353
1354         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1355         coda_write(dev, pic_stream_buffer_size / 1024,
1356                    CODA_CMD_ENC_PIC_BB_SIZE);
1357
1358         if (!ctx->streamon_out) {
1359                 /* After streamoff on the output side, set stream end flag */
1360                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1361                 coda_write(dev, ctx->bit_stream_param,
1362                            CODA_REG_BIT_BIT_STREAM_PARAM);
1363         }
1364
1365         if (dev->devtype->product != CODA_DX6)
1366                 coda_write(dev, ctx->iram_info.axi_sram_use,
1367                                 CODA7_REG_BIT_AXI_SRAM_USE);
1368
1369         trace_coda_enc_pic_run(ctx, src_buf);
1370
1371         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1372
1373         return 0;
1374 }
1375
1376 static void coda_finish_encode(struct coda_ctx *ctx)
1377 {
1378         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1379         struct coda_dev *dev = ctx->dev;
1380         u32 wr_ptr, start_ptr;
1381
1382         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1383         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1384
1385         trace_coda_enc_pic_done(ctx, dst_buf);
1386
1387         /* Get results from the coda */
1388         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1389         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1390
1391         /* Calculate bytesused field */
1392         if (dst_buf->sequence == 0 ||
1393             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1394                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1395                                         ctx->vpu_header_size[0] +
1396                                         ctx->vpu_header_size[1] +
1397                                         ctx->vpu_header_size[2]);
1398         } else {
1399                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1400         }
1401
1402         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1403                  wr_ptr - start_ptr);
1404
1405         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1406         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1407
1408         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1409                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1410                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1411         } else {
1412                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1413                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1414         }
1415
1416         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1417         dst_buf->field = src_buf->field;
1418         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1419         dst_buf->flags |=
1420                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1421         dst_buf->timecode = src_buf->timecode;
1422
1423         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1424
1425         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1426         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1427
1428         ctx->gopcounter--;
1429         if (ctx->gopcounter < 0)
1430                 ctx->gopcounter = ctx->params.gop_size - 1;
1431
1432         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1433                 "job finished: encoding frame (%d) (%s)\n",
1434                 dst_buf->sequence,
1435                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1436                 "KEYFRAME" : "PFRAME");
1437 }
1438
1439 static void coda_seq_end_work(struct work_struct *work)
1440 {
1441         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1442         struct coda_dev *dev = ctx->dev;
1443
1444         mutex_lock(&ctx->buffer_mutex);
1445         mutex_lock(&dev->coda_mutex);
1446
1447         if (ctx->initialized == 0)
1448                 goto out;
1449
1450         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1451                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1452                  __func__);
1453         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1454                 v4l2_err(&dev->v4l2_dev,
1455                          "CODA_COMMAND_SEQ_END failed\n");
1456         }
1457
1458         /*
1459          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1460          * from the output stream after the h.264 decoder has run. Resetting the
1461          * hardware after the decoder has finished seems to help.
1462          */
1463         if (dev->devtype->product == CODA_960)
1464                 coda_hw_reset(ctx);
1465
1466         kfifo_init(&ctx->bitstream_fifo,
1467                 ctx->bitstream.vaddr, ctx->bitstream.size);
1468
1469         coda_free_framebuffers(ctx);
1470
1471         ctx->initialized = 0;
1472
1473 out:
1474         mutex_unlock(&dev->coda_mutex);
1475         mutex_unlock(&ctx->buffer_mutex);
1476 }
1477
1478 static void coda_bit_release(struct coda_ctx *ctx)
1479 {
1480         mutex_lock(&ctx->buffer_mutex);
1481         coda_free_framebuffers(ctx);
1482         coda_free_context_buffers(ctx);
1483         coda_free_bitstream_buffer(ctx);
1484         mutex_unlock(&ctx->buffer_mutex);
1485 }
1486
1487 const struct coda_context_ops coda_bit_encode_ops = {
1488         .queue_init = coda_encoder_queue_init,
1489         .reqbufs = coda_encoder_reqbufs,
1490         .start_streaming = coda_start_encoding,
1491         .prepare_run = coda_prepare_encode,
1492         .finish_run = coda_finish_encode,
1493         .seq_end_work = coda_seq_end_work,
1494         .release = coda_bit_release,
1495 };
1496
1497 /*
1498  * Decoder context operations
1499  */
1500
1501 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1502                                        struct coda_q_data *q_data)
1503 {
1504         if (ctx->bitstream.vaddr)
1505                 return 0;
1506
1507         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1508         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1509                                             ctx->bitstream.size,
1510                                             &ctx->bitstream.paddr, GFP_KERNEL);
1511         if (!ctx->bitstream.vaddr) {
1512                 v4l2_err(&ctx->dev->v4l2_dev,
1513                          "failed to allocate bitstream ringbuffer");
1514                 return -ENOMEM;
1515         }
1516         kfifo_init(&ctx->bitstream_fifo,
1517                    ctx->bitstream.vaddr, ctx->bitstream.size);
1518
1519         return 0;
1520 }
1521
1522 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1523 {
1524         if (ctx->bitstream.vaddr == NULL)
1525                 return;
1526
1527         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1528                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1529         ctx->bitstream.vaddr = NULL;
1530         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1531 }
1532
1533 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1534                                 struct v4l2_requestbuffers *rb)
1535 {
1536         struct coda_q_data *q_data_src;
1537         int ret;
1538
1539         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1540                 return 0;
1541
1542         if (rb->count) {
1543                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1544                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1545                 if (ret < 0)
1546                         return ret;
1547                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1548                 if (ret < 0) {
1549                         coda_free_context_buffers(ctx);
1550                         return ret;
1551                 }
1552         } else {
1553                 coda_free_bitstream_buffer(ctx);
1554                 coda_free_context_buffers(ctx);
1555         }
1556
1557         return 0;
1558 }
1559
1560 static bool coda_reorder_enable(struct coda_ctx *ctx)
1561 {
1562         const char * const *profile_names;
1563         const char * const *level_names;
1564         struct coda_dev *dev = ctx->dev;
1565         int profile, level;
1566
1567         if (dev->devtype->product != CODA_7541 &&
1568             dev->devtype->product != CODA_960)
1569                 return false;
1570
1571         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1572                 return false;
1573
1574         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1575                 return true;
1576
1577         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1578         if (profile < 0) {
1579                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Profile: %d\n",
1580                          ctx->params.h264_profile_idc);
1581                 return false;
1582         }
1583
1584         level = coda_h264_level(ctx->params.h264_level_idc);
1585         if (level < 0) {
1586                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Level: %d\n",
1587                          ctx->params.h264_level_idc);
1588                 return false;
1589         }
1590
1591         profile_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_PROFILE);
1592         level_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_LEVEL);
1593
1594         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "H264 Profile/Level: %s L%s\n",
1595                  profile_names[profile], level_names[level]);
1596
1597         /* Baseline profile does not support reordering */
1598         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1599 }
1600
1601 static int __coda_start_decoding(struct coda_ctx *ctx)
1602 {
1603         struct coda_q_data *q_data_src, *q_data_dst;
1604         u32 bitstream_buf, bitstream_size;
1605         struct coda_dev *dev = ctx->dev;
1606         int width, height;
1607         u32 src_fourcc, dst_fourcc;
1608         u32 val;
1609         int ret;
1610
1611         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1612                  "Video Data Order Adapter: %s\n",
1613                  ctx->use_vdoa ? "Enabled" : "Disabled");
1614
1615         /* Start decoding */
1616         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1617         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1618         bitstream_buf = ctx->bitstream.paddr;
1619         bitstream_size = ctx->bitstream.size;
1620         src_fourcc = q_data_src->fourcc;
1621         dst_fourcc = q_data_dst->fourcc;
1622
1623         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1624
1625         /* Update coda bitstream read and write pointers from kfifo */
1626         coda_kfifo_sync_to_device_full(ctx);
1627
1628         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1629                                  CODA9_FRAME_TILED2LINEAR);
1630         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1631                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1632         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1633                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1634                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1635         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1636
1637         ctx->display_idx = -1;
1638         ctx->frm_dis_flg = 0;
1639         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1640
1641         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1642         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1643         val = 0;
1644         if (coda_reorder_enable(ctx))
1645                 val |= CODA_REORDER_ENABLE;
1646         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1647                 val |= CODA_NO_INT_ENABLE;
1648         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1649
1650         ctx->params.codec_mode = ctx->codec->mode;
1651         if (dev->devtype->product == CODA_960 &&
1652             src_fourcc == V4L2_PIX_FMT_MPEG4)
1653                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1654         else
1655                 ctx->params.codec_mode_aux = 0;
1656         if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1657                 coda_write(dev, CODA_MP4_CLASS_MPEG4,
1658                            CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1659         }
1660         if (src_fourcc == V4L2_PIX_FMT_H264) {
1661                 if (dev->devtype->product == CODA_7541) {
1662                         coda_write(dev, ctx->psbuf.paddr,
1663                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1664                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1665                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1666                 }
1667                 if (dev->devtype->product == CODA_960) {
1668                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1669                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1670                 }
1671         }
1672         if (dev->devtype->product != CODA_960)
1673                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1674
1675         ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1676         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1677         ctx->bit_stream_param = 0;
1678         if (ret) {
1679                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1680                 return ret;
1681         }
1682         ctx->sequence_offset = ~0U;
1683         ctx->initialized = 1;
1684
1685         /* Update kfifo out pointer from coda bitstream read pointer */
1686         coda_kfifo_sync_from_device(ctx);
1687
1688         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1689                 v4l2_err(&dev->v4l2_dev,
1690                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1691                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1692                 return -EAGAIN;
1693         }
1694
1695         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1696         if (dev->devtype->product == CODA_DX6) {
1697                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1698                 height = val & CODADX6_PICHEIGHT_MASK;
1699         } else {
1700                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1701                 height = val & CODA7_PICHEIGHT_MASK;
1702         }
1703
1704         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1705                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1706                          width, height, q_data_dst->bytesperline,
1707                          q_data_dst->height);
1708                 return -EINVAL;
1709         }
1710
1711         width = round_up(width, 16);
1712         height = round_up(height, 16);
1713
1714         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1715                  __func__, ctx->idx, width, height);
1716
1717         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1718         /*
1719          * If the VDOA is used, the decoder needs one additional frame,
1720          * because the frames are freed when the next frame is decoded.
1721          * Otherwise there are visible errors in the decoded frames (green
1722          * regions in displayed frames) and a broken order of frames (earlier
1723          * frames are sporadically displayed after later frames).
1724          */
1725         if (ctx->use_vdoa)
1726                 ctx->num_internal_frames += 1;
1727         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1728                 v4l2_err(&dev->v4l2_dev,
1729                          "not enough framebuffers to decode (%d < %d)\n",
1730                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1731                 return -EINVAL;
1732         }
1733
1734         if (src_fourcc == V4L2_PIX_FMT_H264) {
1735                 u32 left_right;
1736                 u32 top_bottom;
1737
1738                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1739                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1740
1741                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1742                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1743                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1744                                          (left_right & 0x3ff);
1745                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1746                                           (top_bottom & 0x3ff);
1747         }
1748
1749         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1750         if (ret < 0) {
1751                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1752                 return ret;
1753         }
1754
1755         /* Tell the decoder how many frame buffers we allocated. */
1756         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1757         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1758
1759         if (dev->devtype->product != CODA_DX6) {
1760                 /* Set secondary AXI IRAM */
1761                 coda_setup_iram(ctx);
1762
1763                 coda_write(dev, ctx->iram_info.buf_bit_use,
1764                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1765                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1766                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1767                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1768                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1769                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1770                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1771                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1772                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1773                 if (dev->devtype->product == CODA_960) {
1774                         coda_write(dev, ctx->iram_info.buf_btp_use,
1775                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1776
1777                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1778                         coda9_set_frame_cache(ctx, dst_fourcc);
1779                 }
1780         }
1781
1782         if (src_fourcc == V4L2_PIX_FMT_H264) {
1783                 coda_write(dev, ctx->slicebuf.paddr,
1784                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1785                 coda_write(dev, ctx->slicebuf.size / 1024,
1786                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1787         }
1788
1789         if (dev->devtype->product == CODA_7541) {
1790                 int max_mb_x = 1920 / 16;
1791                 int max_mb_y = 1088 / 16;
1792                 int max_mb_num = max_mb_x * max_mb_y;
1793
1794                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1795                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1796         } else if (dev->devtype->product == CODA_960) {
1797                 int max_mb_x = 1920 / 16;
1798                 int max_mb_y = 1088 / 16;
1799                 int max_mb_num = max_mb_x * max_mb_y;
1800
1801                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1802                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1803         }
1804
1805         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1806                 v4l2_err(&ctx->dev->v4l2_dev,
1807                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1808                 return -ETIMEDOUT;
1809         }
1810
1811         return 0;
1812 }
1813
1814 static int coda_start_decoding(struct coda_ctx *ctx)
1815 {
1816         struct coda_dev *dev = ctx->dev;
1817         int ret;
1818
1819         mutex_lock(&dev->coda_mutex);
1820         ret = __coda_start_decoding(ctx);
1821         mutex_unlock(&dev->coda_mutex);
1822
1823         return ret;
1824 }
1825
1826 static int coda_prepare_decode(struct coda_ctx *ctx)
1827 {
1828         struct vb2_v4l2_buffer *dst_buf;
1829         struct coda_dev *dev = ctx->dev;
1830         struct coda_q_data *q_data_dst;
1831         struct coda_buffer_meta *meta;
1832         unsigned long flags;
1833         u32 rot_mode = 0;
1834         u32 reg_addr, reg_stride;
1835
1836         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1837         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1838
1839         /* Try to copy source buffer contents into the bitstream ringbuffer */
1840         mutex_lock(&ctx->bitstream_mutex);
1841         coda_fill_bitstream(ctx, NULL);
1842         mutex_unlock(&ctx->bitstream_mutex);
1843
1844         if (coda_get_bitstream_payload(ctx) < 512 &&
1845             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1846                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1847                          "bitstream payload: %d, skipping\n",
1848                          coda_get_bitstream_payload(ctx));
1849                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1850                 return -EAGAIN;
1851         }
1852
1853         /* Run coda_start_decoding (again) if not yet initialized */
1854         if (!ctx->initialized) {
1855                 int ret = __coda_start_decoding(ctx);
1856
1857                 if (ret < 0) {
1858                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1859                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1860                         return -EAGAIN;
1861                 } else {
1862                         ctx->initialized = 1;
1863                 }
1864         }
1865
1866         if (dev->devtype->product == CODA_960)
1867                 coda_set_gdi_regs(ctx);
1868
1869         if (ctx->use_vdoa &&
1870             ctx->display_idx >= 0 &&
1871             ctx->display_idx < ctx->num_internal_frames) {
1872                 vdoa_device_run(ctx->vdoa,
1873                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1874                                 ctx->internal_frames[ctx->display_idx].paddr);
1875         } else {
1876                 if (dev->devtype->product == CODA_960) {
1877                         /*
1878                          * The CODA960 seems to have an internal list of
1879                          * buffers with 64 entries that includes the
1880                          * registered frame buffers as well as the rotator
1881                          * buffer output.
1882                          *
1883                          * ROT_INDEX needs to be < 0x40, but >
1884                          * ctx->num_internal_frames.
1885                          */
1886                         coda_write(dev,
1887                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1888                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1889
1890                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1891                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1892                 } else {
1893                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1894                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1895                 }
1896                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1897                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1898
1899                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1900         }
1901
1902         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1903
1904         switch (dev->devtype->product) {
1905         case CODA_DX6:
1906                 /* TBD */
1907         case CODA_7541:
1908                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1909                 break;
1910         case CODA_960:
1911                 /* 'hardcode to use interrupt disable mode'? */
1912                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1913                 break;
1914         }
1915
1916         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1917
1918         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1919         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1920
1921         if (dev->devtype->product != CODA_DX6)
1922                 coda_write(dev, ctx->iram_info.axi_sram_use,
1923                                 CODA7_REG_BIT_AXI_SRAM_USE);
1924
1925         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1926         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1927                                         struct coda_buffer_meta, list);
1928
1929         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1930
1931                 /* If this is the last buffer in the bitstream, add padding */
1932                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1933                                   ctx->bitstream_fifo.kfifo.mask)) {
1934                         static unsigned char buf[512];
1935                         unsigned int pad;
1936
1937                         /* Pad to multiple of 256 and then add 256 more */
1938                         pad = ((0 - meta->end) & 0xff) + 256;
1939
1940                         memset(buf, 0xff, sizeof(buf));
1941
1942                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1943                 }
1944         }
1945         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1946
1947         coda_kfifo_sync_to_device_full(ctx);
1948
1949         /* Clear decode success flag */
1950         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1951
1952         /* Clear error return value */
1953         coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB);
1954
1955         trace_coda_dec_pic_run(ctx, meta);
1956
1957         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1958
1959         return 0;
1960 }
1961
1962 static void coda_finish_decode(struct coda_ctx *ctx)
1963 {
1964         struct coda_dev *dev = ctx->dev;
1965         struct coda_q_data *q_data_src;
1966         struct coda_q_data *q_data_dst;
1967         struct vb2_v4l2_buffer *dst_buf;
1968         struct coda_buffer_meta *meta;
1969         unsigned long payload;
1970         unsigned long flags;
1971         int width, height;
1972         int decoded_idx;
1973         int display_idx;
1974         u32 src_fourcc;
1975         int success;
1976         u32 err_mb;
1977         int err_vdoa = 0;
1978         u32 val;
1979
1980         /* Update kfifo out pointer from coda bitstream read pointer */
1981         coda_kfifo_sync_from_device(ctx);
1982
1983         /*
1984          * in stream-end mode, the read pointer can overshoot the write pointer
1985          * by up to 512 bytes
1986          */
1987         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1988                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
1989                         kfifo_init(&ctx->bitstream_fifo,
1990                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1991         }
1992
1993         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1994         src_fourcc = q_data_src->fourcc;
1995
1996         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1997         if (val != 1)
1998                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
1999
2000         success = val & 0x1;
2001         if (!success)
2002                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
2003
2004         if (src_fourcc == V4L2_PIX_FMT_H264) {
2005                 if (val & (1 << 3))
2006                         v4l2_err(&dev->v4l2_dev,
2007                                  "insufficient PS buffer space (%d bytes)\n",
2008                                  ctx->psbuf.size);
2009                 if (val & (1 << 2))
2010                         v4l2_err(&dev->v4l2_dev,
2011                                  "insufficient slice buffer space (%d bytes)\n",
2012                                  ctx->slicebuf.size);
2013         }
2014
2015         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2016         width = (val >> 16) & 0xffff;
2017         height = val & 0xffff;
2018
2019         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2020
2021         /* frame crop information */
2022         if (src_fourcc == V4L2_PIX_FMT_H264) {
2023                 u32 left_right;
2024                 u32 top_bottom;
2025
2026                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2027                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2028
2029                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2030                         /* Keep current crop information */
2031                 } else {
2032                         struct v4l2_rect *rect = &q_data_dst->rect;
2033
2034                         rect->left = left_right >> 16 & 0xffff;
2035                         rect->top = top_bottom >> 16 & 0xffff;
2036                         rect->width = width - rect->left -
2037                                       (left_right & 0xffff);
2038                         rect->height = height - rect->top -
2039                                        (top_bottom & 0xffff);
2040                 }
2041         } else {
2042                 /* no cropping */
2043         }
2044
2045         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2046         if (err_mb > 0)
2047                 v4l2_err(&dev->v4l2_dev,
2048                          "errors in %d macroblocks\n", err_mb);
2049
2050         if (dev->devtype->product == CODA_7541) {
2051                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2052                 if (val == 0) {
2053                         /* not enough bitstream data */
2054                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2055                                  "prescan failed: %d\n", val);
2056                         ctx->hold = true;
2057                         return;
2058                 }
2059         }
2060
2061         /* Wait until the VDOA finished writing the previous display frame */
2062         if (ctx->use_vdoa &&
2063             ctx->display_idx >= 0 &&
2064             ctx->display_idx < ctx->num_internal_frames) {
2065                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2066         }
2067
2068         ctx->frm_dis_flg = coda_read(dev,
2069                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2070
2071         /* The previous display frame was copied out and can be overwritten */
2072         if (ctx->display_idx >= 0 &&
2073             ctx->display_idx < ctx->num_internal_frames) {
2074                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2075                 coda_write(dev, ctx->frm_dis_flg,
2076                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2077         }
2078
2079         /*
2080          * The index of the last decoded frame, not necessarily in
2081          * display order, and the index of the next display frame.
2082          * The latter could have been decoded in a previous run.
2083          */
2084         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2085         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2086
2087         if (decoded_idx == -1) {
2088                 /* no frame was decoded, but we might have a display frame */
2089                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2090                         ctx->sequence_offset++;
2091                 else if (ctx->display_idx < 0)
2092                         ctx->hold = true;
2093         } else if (decoded_idx == -2) {
2094                 if (ctx->display_idx >= 0 &&
2095                     ctx->display_idx < ctx->num_internal_frames)
2096                         ctx->sequence_offset++;
2097                 /* no frame was decoded, we still return remaining buffers */
2098         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2099                 v4l2_err(&dev->v4l2_dev,
2100                          "decoded frame index out of range: %d\n", decoded_idx);
2101         } else {
2102                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM);
2103                 if (ctx->sequence_offset == -1)
2104                         ctx->sequence_offset = val;
2105                 val -= ctx->sequence_offset;
2106                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2107                 if (!list_empty(&ctx->buffer_meta_list)) {
2108                         meta = list_first_entry(&ctx->buffer_meta_list,
2109                                               struct coda_buffer_meta, list);
2110                         list_del(&meta->list);
2111                         ctx->num_metas--;
2112                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2113                         /*
2114                          * Clamp counters to 16 bits for comparison, as the HW
2115                          * counter rolls over at this point for h.264. This
2116                          * may be different for other formats, but using 16 bits
2117                          * should be enough to detect most errors and saves us
2118                          * from doing different things based on the format.
2119                          */
2120                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2121                                 v4l2_err(&dev->v4l2_dev,
2122                                          "sequence number mismatch (%d(%d) != %d)\n",
2123                                          val, ctx->sequence_offset,
2124                                          meta->sequence);
2125                         }
2126                         ctx->frame_metas[decoded_idx] = *meta;
2127                         kfree(meta);
2128                 } else {
2129                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2130                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2131                         memset(&ctx->frame_metas[decoded_idx], 0,
2132                                sizeof(struct coda_buffer_meta));
2133                         ctx->frame_metas[decoded_idx].sequence = val;
2134                         ctx->sequence_offset++;
2135                 }
2136
2137                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2138
2139                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2140                 if (val == 0)
2141                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2142                 else if (val == 1)
2143                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2144                 else
2145                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2146
2147                 ctx->frame_errors[decoded_idx] = err_mb;
2148         }
2149
2150         if (display_idx == -1) {
2151                 /*
2152                  * no more frames to be decoded, but there could still
2153                  * be rotator output to dequeue
2154                  */
2155                 ctx->hold = true;
2156         } else if (display_idx == -3) {
2157                 /* possibly prescan failure */
2158         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2159                 v4l2_err(&dev->v4l2_dev,
2160                          "presentation frame index out of range: %d\n",
2161                          display_idx);
2162         }
2163
2164         /* If a frame was copied out, return it */
2165         if (ctx->display_idx >= 0 &&
2166             ctx->display_idx < ctx->num_internal_frames) {
2167                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2168                 dst_buf->sequence = ctx->osequence++;
2169
2170                 dst_buf->field = V4L2_FIELD_NONE;
2171                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2172                                              V4L2_BUF_FLAG_PFRAME |
2173                                              V4L2_BUF_FLAG_BFRAME);
2174                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2175                 meta = &ctx->frame_metas[ctx->display_idx];
2176                 dst_buf->timecode = meta->timecode;
2177                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2178
2179                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2180
2181                 switch (q_data_dst->fourcc) {
2182                 case V4L2_PIX_FMT_YUYV:
2183                         payload = width * height * 2;
2184                         break;
2185                 case V4L2_PIX_FMT_YUV420:
2186                 case V4L2_PIX_FMT_YVU420:
2187                 case V4L2_PIX_FMT_NV12:
2188                 default:
2189                         payload = width * height * 3 / 2;
2190                         break;
2191                 case V4L2_PIX_FMT_YUV422P:
2192                         payload = width * height * 2;
2193                         break;
2194                 }
2195                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2196
2197                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2198                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2199                 else
2200                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2201
2202                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2203                         "job finished: decoding frame (%d) (%s)\n",
2204                         dst_buf->sequence,
2205                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2206                         "KEYFRAME" : "PFRAME");
2207         } else {
2208                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2209                         "job finished: no frame decoded\n");
2210         }
2211
2212         /* The rotator will copy the current display frame next time */
2213         ctx->display_idx = display_idx;
2214 }
2215
2216 static void coda_decode_timeout(struct coda_ctx *ctx)
2217 {
2218         struct vb2_v4l2_buffer *dst_buf;
2219
2220         /*
2221          * For now this only handles the case where we would deadlock with
2222          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2223          * but after a failed decode run we would hold the context and wait for
2224          * userspace to queue more buffers.
2225          */
2226         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2227                 return;
2228
2229         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2230         dst_buf->sequence = ctx->qsequence - 1;
2231
2232         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2233 }
2234
2235 const struct coda_context_ops coda_bit_decode_ops = {
2236         .queue_init = coda_decoder_queue_init,
2237         .reqbufs = coda_decoder_reqbufs,
2238         .start_streaming = coda_start_decoding,
2239         .prepare_run = coda_prepare_decode,
2240         .finish_run = coda_finish_decode,
2241         .run_timeout = coda_decode_timeout,
2242         .seq_end_work = coda_seq_end_work,
2243         .release = coda_bit_release,
2244 };
2245
2246 irqreturn_t coda_irq_handler(int irq, void *data)
2247 {
2248         struct coda_dev *dev = data;
2249         struct coda_ctx *ctx;
2250
2251         /* read status register to attend the IRQ */
2252         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2253         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2254                       CODA_REG_BIT_INT_CLEAR);
2255
2256         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2257         if (ctx == NULL) {
2258                 v4l2_err(&dev->v4l2_dev,
2259                          "Instance released before the end of transaction\n");
2260                 return IRQ_HANDLED;
2261         }
2262
2263         trace_coda_bit_done(ctx);
2264
2265         if (ctx->aborting) {
2266                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2267                          "task has been aborted\n");
2268         }
2269
2270         if (coda_isbusy(ctx->dev)) {
2271                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2272                          "coda is still busy!!!!\n");
2273                 return IRQ_NONE;
2274         }
2275
2276         complete(&ctx->completion);
2277
2278         return IRQ_HANDLED;
2279 }