GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_HX4 ||
72             dev->devtype->product == CODA_7541 ||
73             dev->devtype->product == CODA_960) {
74                 /* Restore context related registers to CODA */
75                 coda_write(dev, ctx->bit_stream_param,
76                                 CODA_REG_BIT_BIT_STREAM_PARAM);
77                 coda_write(dev, ctx->frm_dis_flg,
78                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
79                 coda_write(dev, ctx->frame_mem_ctrl,
80                                 CODA_REG_BIT_FRAME_MEM_CTRL);
81                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
82         }
83
84         if (dev->devtype->product == CODA_960) {
85                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
86                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
87         }
88
89         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
90
91         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
92         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
93         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
94
95         trace_coda_bit_run(ctx, cmd);
96
97         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
98 }
99
100 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
101 {
102         struct coda_dev *dev = ctx->dev;
103         int ret;
104
105         coda_command_async(ctx, cmd);
106         ret = coda_wait_timeout(dev);
107         trace_coda_bit_done(ctx);
108
109         return ret;
110 }
111
112 int coda_hw_reset(struct coda_ctx *ctx)
113 {
114         struct coda_dev *dev = ctx->dev;
115         unsigned long timeout;
116         unsigned int idx;
117         int ret;
118
119         if (!dev->rstc)
120                 return -ENOENT;
121
122         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
123
124         if (dev->devtype->product == CODA_960) {
125                 timeout = jiffies + msecs_to_jiffies(100);
126                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
127                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
128                         if (time_after(jiffies, timeout))
129                                 return -ETIME;
130                         cpu_relax();
131                 }
132         }
133
134         ret = reset_control_reset(dev->rstc);
135         if (ret < 0)
136                 return ret;
137
138         if (dev->devtype->product == CODA_960)
139                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
140         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
141         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
142         ret = coda_wait_timeout(dev);
143         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
144
145         return ret;
146 }
147
148 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
149 {
150         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
151         struct coda_dev *dev = ctx->dev;
152         u32 rd_ptr;
153
154         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
155         kfifo->out = (kfifo->in & ~kfifo->mask) |
156                       (rd_ptr - ctx->bitstream.paddr);
157         if (kfifo->out > kfifo->in)
158                 kfifo->out -= kfifo->mask + 1;
159 }
160
161 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
162 {
163         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
164         struct coda_dev *dev = ctx->dev;
165         u32 rd_ptr, wr_ptr;
166
167         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
168         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
169         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
170         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
171 }
172
173 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
174 {
175         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
176         struct coda_dev *dev = ctx->dev;
177         u32 wr_ptr;
178
179         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
180         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
181 }
182
183 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
184 {
185         unsigned char *buf;
186         u32 n;
187
188         if (size < 6)
189                 size = 6;
190
191         buf = kmalloc(size, GFP_KERNEL);
192         if (!buf)
193                 return -ENOMEM;
194
195         coda_h264_filler_nal(size, buf);
196         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
197         kfree(buf);
198
199         return (n < size) ? -ENOSPC : 0;
200 }
201
202 static int coda_bitstream_queue(struct coda_ctx *ctx,
203                                 struct vb2_v4l2_buffer *src_buf)
204 {
205         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
206         u32 n;
207
208         n = kfifo_in(&ctx->bitstream_fifo,
209                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
210         if (n < src_size)
211                 return -ENOSPC;
212
213         src_buf->sequence = ctx->qsequence++;
214
215         return 0;
216 }
217
218 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
219                                      struct vb2_v4l2_buffer *src_buf)
220 {
221         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
222         int ret;
223
224         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
225             ctx->bitstream.size)
226                 return false;
227
228         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
229                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
230                 return true;
231         }
232
233         /* Add zero padding before the first H.264 buffer, if it is too small */
234         if (ctx->qsequence == 0 && payload < 512 &&
235             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
236                 coda_bitstream_pad(ctx, 512 - payload);
237
238         ret = coda_bitstream_queue(ctx, src_buf);
239         if (ret < 0) {
240                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
241                 return false;
242         }
243         /* Sync read pointer to device */
244         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
245                 coda_kfifo_sync_to_device_write(ctx);
246
247         ctx->hold = false;
248
249         return true;
250 }
251
252 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
253 {
254         struct vb2_v4l2_buffer *src_buf;
255         struct coda_buffer_meta *meta;
256         unsigned long flags;
257         u32 start;
258
259         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
260                 return;
261
262         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
263                 /*
264                  * Only queue two JPEGs into the bitstream buffer to keep
265                  * latency low. We need at least one complete buffer and the
266                  * header of another buffer (for prescan) in the bitstream.
267                  */
268                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
269                     ctx->num_metas > 1)
270                         break;
271
272                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
273
274                 /* Drop frames that do not start/end with a SOI/EOI markers */
275                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
276                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
277                         v4l2_err(&ctx->dev->v4l2_dev,
278                                  "dropping invalid JPEG frame %d\n",
279                                  ctx->qsequence);
280                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
281                         if (buffer_list) {
282                                 struct v4l2_m2m_buffer *m2m_buf;
283
284                                 m2m_buf = container_of(src_buf,
285                                                        struct v4l2_m2m_buffer,
286                                                        vb);
287                                 list_add_tail(&m2m_buf->list, buffer_list);
288                         } else {
289                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
290                         }
291                         continue;
292                 }
293
294                 /* Dump empty buffers */
295                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
296                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
297                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
298                         continue;
299                 }
300
301                 /* Buffer start position */
302                 start = ctx->bitstream_fifo.kfifo.in &
303                         ctx->bitstream_fifo.kfifo.mask;
304
305                 if (coda_bitstream_try_queue(ctx, src_buf)) {
306                         /*
307                          * Source buffer is queued in the bitstream ringbuffer;
308                          * queue the timestamp and mark source buffer as done
309                          */
310                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
311
312                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
313                         if (meta) {
314                                 meta->sequence = src_buf->sequence;
315                                 meta->timecode = src_buf->timecode;
316                                 meta->timestamp = src_buf->vb2_buf.timestamp;
317                                 meta->start = start;
318                                 meta->end = ctx->bitstream_fifo.kfifo.in &
319                                             ctx->bitstream_fifo.kfifo.mask;
320                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
321                                                   flags);
322                                 list_add_tail(&meta->list,
323                                               &ctx->buffer_meta_list);
324                                 ctx->num_metas++;
325                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
326                                                        flags);
327
328                                 trace_coda_bit_queue(ctx, src_buf, meta);
329                         }
330
331                         if (buffer_list) {
332                                 struct v4l2_m2m_buffer *m2m_buf;
333
334                                 m2m_buf = container_of(src_buf,
335                                                        struct v4l2_m2m_buffer,
336                                                        vb);
337                                 list_add_tail(&m2m_buf->list, buffer_list);
338                         } else {
339                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
340                         }
341                 } else {
342                         break;
343                 }
344         }
345 }
346
347 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
348 {
349         struct coda_dev *dev = ctx->dev;
350
351         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
352
353         /* If this context is currently running, update the hardware flag */
354         if ((dev->devtype->product == CODA_960) &&
355             coda_isbusy(dev) &&
356             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
357                 coda_write(dev, ctx->bit_stream_param,
358                            CODA_REG_BIT_BIT_STREAM_PARAM);
359         }
360 }
361
362 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
363 {
364         struct coda_dev *dev = ctx->dev;
365         u32 *p = ctx->parabuf.vaddr;
366
367         if (dev->devtype->product == CODA_DX6)
368                 p[index] = value;
369         else
370                 p[index ^ 1] = value;
371 }
372
373 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
374                                          struct coda_aux_buf *buf, size_t size,
375                                          const char *name)
376 {
377         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
378 }
379
380
381 static void coda_free_framebuffers(struct coda_ctx *ctx)
382 {
383         int i;
384
385         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
386                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
387 }
388
389 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
390                                    struct coda_q_data *q_data, u32 fourcc)
391 {
392         struct coda_dev *dev = ctx->dev;
393         unsigned int ysize, ycbcr_size;
394         int ret;
395         int i;
396
397         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
399             ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 ||
400             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4)
401                 ysize = round_up(q_data->rect.width, 16) *
402                         round_up(q_data->rect.height, 16);
403         else
404                 ysize = round_up(q_data->rect.width, 8) * q_data->rect.height;
405
406         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
407                 ycbcr_size = round_up(ysize, 4096) + ysize / 2;
408         else
409                 ycbcr_size = ysize + ysize / 2;
410
411         /* Allocate frame buffers */
412         for (i = 0; i < ctx->num_internal_frames; i++) {
413                 size_t size = ycbcr_size;
414                 char *name;
415
416                 /* Add space for mvcol buffers */
417                 if (dev->devtype->product != CODA_DX6 &&
418                     (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
419                      (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)))
420                         size += ysize / 4;
421                 name = kasprintf(GFP_KERNEL, "fb%d", i);
422                 if (!name) {
423                         coda_free_framebuffers(ctx);
424                         return -ENOMEM;
425                 }
426                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
427                                              size, name);
428                 kfree(name);
429                 if (ret < 0) {
430                         coda_free_framebuffers(ctx);
431                         return ret;
432                 }
433         }
434
435         /* Register frame buffers in the parameter buffer */
436         for (i = 0; i < ctx->num_internal_frames; i++) {
437                 u32 y, cb, cr, mvcol;
438
439                 /* Start addresses of Y, Cb, Cr planes */
440                 y = ctx->internal_frames[i].paddr;
441                 cb = y + ysize;
442                 cr = y + ysize + ysize/4;
443                 mvcol = y + ysize + ysize/4 + ysize/4;
444                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
445                         cb = round_up(cb, 4096);
446                         mvcol = cb + ysize/2;
447                         cr = 0;
448                         /* Packed 20-bit MSB of base addresses */
449                         /* YYYYYCCC, CCyyyyyc, cccc.... */
450                         y = (y & 0xfffff000) | cb >> 20;
451                         cb = (cb & 0x000ff000) << 12;
452                 }
453                 coda_parabuf_write(ctx, i * 3 + 0, y);
454                 coda_parabuf_write(ctx, i * 3 + 1, cb);
455                 coda_parabuf_write(ctx, i * 3 + 2, cr);
456
457                 if (dev->devtype->product == CODA_DX6)
458                         continue;
459
460                 /* mvcol buffer for h.264 and mpeg4 */
461                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
462                         coda_parabuf_write(ctx, 96 + i, mvcol);
463                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)
464                         coda_parabuf_write(ctx, 97, mvcol);
465         }
466
467         return 0;
468 }
469
470 static void coda_free_context_buffers(struct coda_ctx *ctx)
471 {
472         struct coda_dev *dev = ctx->dev;
473
474         coda_free_aux_buf(dev, &ctx->slicebuf);
475         coda_free_aux_buf(dev, &ctx->psbuf);
476         if (dev->devtype->product != CODA_DX6)
477                 coda_free_aux_buf(dev, &ctx->workbuf);
478         coda_free_aux_buf(dev, &ctx->parabuf);
479 }
480
481 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
482                                       struct coda_q_data *q_data)
483 {
484         struct coda_dev *dev = ctx->dev;
485         size_t size;
486         int ret;
487
488         if (!ctx->parabuf.vaddr) {
489                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
490                                              CODA_PARA_BUF_SIZE, "parabuf");
491                 if (ret < 0)
492                         return ret;
493         }
494
495         if (dev->devtype->product == CODA_DX6)
496                 return 0;
497
498         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
499                 /* worst case slice size */
500                 size = (DIV_ROUND_UP(q_data->rect.width, 16) *
501                         DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512;
502                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
503                                              "slicebuf");
504                 if (ret < 0)
505                         goto err;
506         }
507
508         if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 ||
509                                   dev->devtype->product == CODA_7541)) {
510                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
511                                              CODA7_PS_BUF_SIZE, "psbuf");
512                 if (ret < 0)
513                         goto err;
514         }
515
516         if (!ctx->workbuf.vaddr) {
517                 size = dev->devtype->workbuf_size;
518                 if (dev->devtype->product == CODA_960 &&
519                     q_data->fourcc == V4L2_PIX_FMT_H264)
520                         size += CODA9_PS_SAVE_SIZE;
521                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
522                                              "workbuf");
523                 if (ret < 0)
524                         goto err;
525         }
526
527         return 0;
528
529 err:
530         coda_free_context_buffers(ctx);
531         return ret;
532 }
533
534 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
535                               int header_code, u8 *header, int *size)
536 {
537         struct vb2_buffer *vb = &buf->vb2_buf;
538         struct coda_dev *dev = ctx->dev;
539         struct coda_q_data *q_data_src;
540         struct v4l2_rect *r;
541         size_t bufsize;
542         int ret;
543         int i;
544
545         if (dev->devtype->product == CODA_960)
546                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
547
548         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
549                    CODA_CMD_ENC_HEADER_BB_START);
550         bufsize = vb2_plane_size(vb, 0);
551         if (dev->devtype->product == CODA_960)
552                 bufsize /= 1024;
553         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
554         if (dev->devtype->product == CODA_960 &&
555             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 &&
556             header_code == CODA_HEADER_H264_SPS) {
557                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
558                 r = &q_data_src->rect;
559
560                 if (r->width % 16 || r->height % 16) {
561                         u32 crop_right = round_up(r->width, 16) -  r->width;
562                         u32 crop_bottom = round_up(r->height, 16) - r->height;
563
564                         coda_write(dev, crop_right,
565                                    CODA9_CMD_ENC_HEADER_FRAME_CROP_H);
566                         coda_write(dev, crop_bottom,
567                                    CODA9_CMD_ENC_HEADER_FRAME_CROP_V);
568                         header_code |= CODA9_HEADER_FRAME_CROP;
569                 }
570         }
571         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
572         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
573         if (ret < 0) {
574                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
575                 return ret;
576         }
577
578         if (dev->devtype->product == CODA_960) {
579                 for (i = 63; i > 0; i--)
580                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
581                                 break;
582                 *size = i + 1;
583         } else {
584                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
585                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
586         }
587         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
588
589         return 0;
590 }
591
592 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
593 {
594         phys_addr_t ret;
595
596         size = round_up(size, 1024);
597         if (size > iram->remaining)
598                 return 0;
599         iram->remaining -= size;
600
601         ret = iram->next_paddr;
602         iram->next_paddr += size;
603
604         return ret;
605 }
606
607 static void coda_setup_iram(struct coda_ctx *ctx)
608 {
609         struct coda_iram_info *iram_info = &ctx->iram_info;
610         struct coda_dev *dev = ctx->dev;
611         int w64, w128;
612         int mb_width;
613         int dbk_bits;
614         int bit_bits;
615         int ip_bits;
616         int me_bits;
617
618         memset(iram_info, 0, sizeof(*iram_info));
619         iram_info->next_paddr = dev->iram.paddr;
620         iram_info->remaining = dev->iram.size;
621
622         if (!dev->iram.vaddr)
623                 return;
624
625         switch (dev->devtype->product) {
626         case CODA_HX4:
627                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE;
628                 bit_bits = CODA7_USE_HOST_BIT_ENABLE;
629                 ip_bits = CODA7_USE_HOST_IP_ENABLE;
630                 me_bits = CODA7_USE_HOST_ME_ENABLE;
631                 break;
632         case CODA_7541:
633                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
634                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
635                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
636                 me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE;
637                 break;
638         case CODA_960:
639                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
640                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
641                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
642                 me_bits = 0;
643                 break;
644         default: /* CODA_DX6 */
645                 return;
646         }
647
648         if (ctx->inst_type == CODA_INST_ENCODER) {
649                 struct coda_q_data *q_data_src;
650
651                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
652                 mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16);
653                 w128 = mb_width * 128;
654                 w64 = mb_width * 64;
655
656                 /* Prioritize in case IRAM is too small for everything */
657                 if (dev->devtype->product == CODA_HX4 ||
658                     dev->devtype->product == CODA_7541) {
659                         iram_info->search_ram_size = round_up(mb_width * 16 *
660                                                               36 + 2048, 1024);
661                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
662                                                 iram_info->search_ram_size);
663                         if (!iram_info->search_ram_paddr) {
664                                 pr_err("IRAM is smaller than the search ram size\n");
665                                 goto out;
666                         }
667                         iram_info->axi_sram_use |= me_bits;
668                 }
669
670                 /* Only H.264BP and H.263P3 are considered */
671                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
672                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
673                 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
674                         goto out;
675                 iram_info->axi_sram_use |= dbk_bits;
676
677                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
678                 if (!iram_info->buf_bit_use)
679                         goto out;
680                 iram_info->axi_sram_use |= bit_bits;
681
682                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
683                 if (!iram_info->buf_ip_ac_dc_use)
684                         goto out;
685                 iram_info->axi_sram_use |= ip_bits;
686
687                 /* OVL and BTP disabled for encoder */
688         } else if (ctx->inst_type == CODA_INST_DECODER) {
689                 struct coda_q_data *q_data_dst;
690
691                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
692                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
693                 w128 = mb_width * 128;
694
695                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
696                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
697                 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
698                         goto out;
699                 iram_info->axi_sram_use |= dbk_bits;
700
701                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
702                 if (!iram_info->buf_bit_use)
703                         goto out;
704                 iram_info->axi_sram_use |= bit_bits;
705
706                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
707                 if (!iram_info->buf_ip_ac_dc_use)
708                         goto out;
709                 iram_info->axi_sram_use |= ip_bits;
710
711                 /* OVL and BTP unused as there is no VC1 support yet */
712         }
713
714 out:
715         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
716                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
717                          "IRAM smaller than needed\n");
718
719         if (dev->devtype->product == CODA_HX4 ||
720             dev->devtype->product == CODA_7541) {
721                 /* TODO - Enabling these causes picture errors on CODA7541 */
722                 if (ctx->inst_type == CODA_INST_DECODER) {
723                         /* fw 1.4.50 */
724                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
725                                                      CODA7_USE_IP_ENABLE);
726                 } else {
727                         /* fw 13.4.29 */
728                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
729                                                      CODA7_USE_HOST_DBK_ENABLE |
730                                                      CODA7_USE_IP_ENABLE |
731                                                      CODA7_USE_DBK_ENABLE);
732                 }
733         }
734 }
735
736 static u32 coda_supported_firmwares[] = {
737         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
738         CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50),
739         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
740         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
741         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9),
742         CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
743         CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
744 };
745
746 static bool coda_firmware_supported(u32 vernum)
747 {
748         int i;
749
750         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
751                 if (vernum == coda_supported_firmwares[i])
752                         return true;
753         return false;
754 }
755
756 int coda_check_firmware(struct coda_dev *dev)
757 {
758         u16 product, major, minor, release;
759         u32 data;
760         int ret;
761
762         ret = clk_prepare_enable(dev->clk_per);
763         if (ret)
764                 goto err_clk_per;
765
766         ret = clk_prepare_enable(dev->clk_ahb);
767         if (ret)
768                 goto err_clk_ahb;
769
770         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
771         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
772         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
773         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
774         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
775         if (coda_wait_timeout(dev)) {
776                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
777                 ret = -EIO;
778                 goto err_run_cmd;
779         }
780
781         if (dev->devtype->product == CODA_960) {
782                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
783                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
784                           data);
785         }
786
787         /* Check we are compatible with the loaded firmware */
788         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
789         product = CODA_FIRMWARE_PRODUCT(data);
790         major = CODA_FIRMWARE_MAJOR(data);
791         minor = CODA_FIRMWARE_MINOR(data);
792         release = CODA_FIRMWARE_RELEASE(data);
793
794         clk_disable_unprepare(dev->clk_per);
795         clk_disable_unprepare(dev->clk_ahb);
796
797         if (product != dev->devtype->product) {
798                 v4l2_err(&dev->v4l2_dev,
799                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
800                          coda_product_name(dev->devtype->product),
801                          coda_product_name(product), major, minor, release);
802                 return -EINVAL;
803         }
804
805         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
806                   coda_product_name(product));
807
808         if (coda_firmware_supported(data)) {
809                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
810                           major, minor, release);
811         } else {
812                 v4l2_warn(&dev->v4l2_dev,
813                           "Unsupported firmware version: %u.%u.%u\n",
814                           major, minor, release);
815         }
816
817         return 0;
818
819 err_run_cmd:
820         clk_disable_unprepare(dev->clk_ahb);
821 err_clk_ahb:
822         clk_disable_unprepare(dev->clk_per);
823 err_clk_per:
824         return ret;
825 }
826
827 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
828 {
829         u32 cache_size, cache_config;
830
831         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
832                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
833                 cache_size = 0x20262024;
834                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
835         } else {
836                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
837                 cache_size = 0x02440243;
838                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
839         }
840         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
841         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
842                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
843                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
844                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
845         } else {
846                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
847                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
848                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
849         }
850         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
851 }
852
853 /*
854  * Encoder context operations
855  */
856
857 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
858                                 struct v4l2_requestbuffers *rb)
859 {
860         struct coda_q_data *q_data_src;
861         int ret;
862
863         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
864                 return 0;
865
866         if (rb->count) {
867                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
868                 ret = coda_alloc_context_buffers(ctx, q_data_src);
869                 if (ret < 0)
870                         return ret;
871         } else {
872                 coda_free_context_buffers(ctx);
873         }
874
875         return 0;
876 }
877
878 static int coda_start_encoding(struct coda_ctx *ctx)
879 {
880         struct coda_dev *dev = ctx->dev;
881         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
882         struct coda_q_data *q_data_src, *q_data_dst;
883         u32 bitstream_buf, bitstream_size;
884         struct vb2_v4l2_buffer *buf;
885         int gamma, ret, value;
886         u32 dst_fourcc;
887         int num_fb;
888         u32 stride;
889
890         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
891         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
892         dst_fourcc = q_data_dst->fourcc;
893
894         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
895         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
896         bitstream_size = q_data_dst->sizeimage;
897
898         if (!coda_is_initialized(dev)) {
899                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
900                 return -EFAULT;
901         }
902
903         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
904                 if (!ctx->params.jpeg_qmat_tab[0]) {
905                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
906                         if (!ctx->params.jpeg_qmat_tab[0])
907                                 return -ENOMEM;
908                 }
909                 if (!ctx->params.jpeg_qmat_tab[1]) {
910                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
911                         if (!ctx->params.jpeg_qmat_tab[1])
912                                 return -ENOMEM;
913                 }
914                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
915         }
916
917         mutex_lock(&dev->coda_mutex);
918
919         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
920         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
921         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
922         switch (dev->devtype->product) {
923         case CODA_DX6:
924                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
925                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
926                 break;
927         case CODA_960:
928                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
929                 /* fallthrough */
930         case CODA_HX4:
931         case CODA_7541:
932                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
933                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
934                 break;
935         }
936
937         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
938                                  CODA9_FRAME_TILED2LINEAR);
939         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
940                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
941         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
942                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
943         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
944
945         if (dev->devtype->product == CODA_DX6) {
946                 /* Configure the coda */
947                 coda_write(dev, dev->iram.paddr,
948                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
949         }
950
951         /* Could set rotation here if needed */
952         value = 0;
953         switch (dev->devtype->product) {
954         case CODA_DX6:
955                 value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK)
956                         << CODADX6_PICWIDTH_OFFSET;
957                 value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK)
958                          << CODA_PICHEIGHT_OFFSET;
959                 break;
960         case CODA_HX4:
961         case CODA_7541:
962                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
963                         value = (round_up(q_data_src->rect.width, 16) &
964                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
965                         value |= (round_up(q_data_src->rect.height, 16) &
966                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
967                         break;
968                 }
969                 /* fallthrough */
970         case CODA_960:
971                 value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK)
972                         << CODA7_PICWIDTH_OFFSET;
973                 value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK)
974                          << CODA_PICHEIGHT_OFFSET;
975         }
976         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
977         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
978                 ctx->params.framerate = 0;
979         coda_write(dev, ctx->params.framerate,
980                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
981
982         ctx->params.codec_mode = ctx->codec->mode;
983         switch (dst_fourcc) {
984         case V4L2_PIX_FMT_MPEG4:
985                 if (dev->devtype->product == CODA_960)
986                         coda_write(dev, CODA9_STD_MPEG4,
987                                    CODA_CMD_ENC_SEQ_COD_STD);
988                 else
989                         coda_write(dev, CODA_STD_MPEG4,
990                                    CODA_CMD_ENC_SEQ_COD_STD);
991                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
992                 break;
993         case V4L2_PIX_FMT_H264:
994                 if (dev->devtype->product == CODA_960)
995                         coda_write(dev, CODA9_STD_H264,
996                                    CODA_CMD_ENC_SEQ_COD_STD);
997                 else
998                         coda_write(dev, CODA_STD_H264,
999                                    CODA_CMD_ENC_SEQ_COD_STD);
1000                 value = ((ctx->params.h264_disable_deblocking_filter_idc &
1001                           CODA_264PARAM_DISABLEDEBLK_MASK) <<
1002                          CODA_264PARAM_DISABLEDEBLK_OFFSET) |
1003                         ((ctx->params.h264_slice_alpha_c0_offset_div2 &
1004                           CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
1005                          CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
1006                         ((ctx->params.h264_slice_beta_offset_div2 &
1007                           CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
1008                          CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
1009                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
1010                 break;
1011         case V4L2_PIX_FMT_JPEG:
1012                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
1013                 coda_write(dev, ctx->params.jpeg_restart_interval,
1014                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
1015                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
1016                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
1017                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
1018
1019                 coda_jpeg_write_tables(ctx);
1020                 break;
1021         default:
1022                 v4l2_err(v4l2_dev,
1023                          "dst format (0x%08x) invalid.\n", dst_fourcc);
1024                 ret = -EINVAL;
1025                 goto out;
1026         }
1027
1028         /*
1029          * slice mode and GOP size registers are used for thumb size/offset
1030          * in JPEG mode
1031          */
1032         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1033                 switch (ctx->params.slice_mode) {
1034                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
1035                         value = 0;
1036                         break;
1037                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
1038                         value  = (ctx->params.slice_max_mb &
1039                                   CODA_SLICING_SIZE_MASK)
1040                                  << CODA_SLICING_SIZE_OFFSET;
1041                         value |= (1 & CODA_SLICING_UNIT_MASK)
1042                                  << CODA_SLICING_UNIT_OFFSET;
1043                         value |=  1 & CODA_SLICING_MODE_MASK;
1044                         break;
1045                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1046                         value  = (ctx->params.slice_max_bits &
1047                                   CODA_SLICING_SIZE_MASK)
1048                                  << CODA_SLICING_SIZE_OFFSET;
1049                         value |= (0 & CODA_SLICING_UNIT_MASK)
1050                                  << CODA_SLICING_UNIT_OFFSET;
1051                         value |=  1 & CODA_SLICING_MODE_MASK;
1052                         break;
1053                 }
1054                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1055                 value = ctx->params.gop_size;
1056                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1057         }
1058
1059         if (ctx->params.bitrate) {
1060                 /* Rate control enabled */
1061                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1062                         << CODA_RATECONTROL_BITRATE_OFFSET;
1063                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1064                 value |= (ctx->params.vbv_delay &
1065                           CODA_RATECONTROL_INITIALDELAY_MASK)
1066                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1067                 if (dev->devtype->product == CODA_960)
1068                         value |= BIT(31); /* disable autoskip */
1069         } else {
1070                 value = 0;
1071         }
1072         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1073
1074         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1075         coda_write(dev, ctx->params.intra_refresh,
1076                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1077
1078         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1079         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1080
1081
1082         value = 0;
1083         if (dev->devtype->product == CODA_960)
1084                 gamma = CODA9_DEFAULT_GAMMA;
1085         else
1086                 gamma = CODA_DEFAULT_GAMMA;
1087         if (gamma > 0) {
1088                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1089                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1090         }
1091
1092         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1093                 coda_write(dev,
1094                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1095                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1096                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1097         }
1098         if (dev->devtype->product == CODA_960) {
1099                 if (ctx->params.h264_max_qp)
1100                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1101                 if (CODA_DEFAULT_GAMMA > 0)
1102                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1103         } else {
1104                 if (CODA_DEFAULT_GAMMA > 0) {
1105                         if (dev->devtype->product == CODA_DX6)
1106                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1107                         else
1108                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1109                 }
1110                 if (ctx->params.h264_min_qp)
1111                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1112                 if (ctx->params.h264_max_qp)
1113                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1114         }
1115         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1116
1117         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1118
1119         coda_setup_iram(ctx);
1120
1121         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1122                 switch (dev->devtype->product) {
1123                 case CODA_DX6:
1124                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1125                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1126                         break;
1127                 case CODA_HX4:
1128                 case CODA_7541:
1129                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1130                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1131                         coda_write(dev, ctx->iram_info.search_ram_size,
1132                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1133                         break;
1134                 case CODA_960:
1135                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1136                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1137                 }
1138         }
1139
1140         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1141         if (ret < 0) {
1142                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1143                 goto out;
1144         }
1145
1146         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1147                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1148                 ret = -EFAULT;
1149                 goto out;
1150         }
1151         ctx->initialized = 1;
1152
1153         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1154                 if (dev->devtype->product == CODA_960)
1155                         ctx->num_internal_frames = 4;
1156                 else
1157                         ctx->num_internal_frames = 2;
1158                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1159                 if (ret < 0) {
1160                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1161                         goto out;
1162                 }
1163                 num_fb = 2;
1164                 stride = q_data_src->bytesperline;
1165         } else {
1166                 ctx->num_internal_frames = 0;
1167                 num_fb = 0;
1168                 stride = 0;
1169         }
1170         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1171         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1172
1173         if (dev->devtype->product == CODA_HX4 ||
1174             dev->devtype->product == CODA_7541) {
1175                 coda_write(dev, q_data_src->bytesperline,
1176                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1177         }
1178         if (dev->devtype->product != CODA_DX6) {
1179                 coda_write(dev, ctx->iram_info.buf_bit_use,
1180                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1181                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1182                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1183                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1184                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1185                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1186                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1187                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1188                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1189                 if (dev->devtype->product == CODA_960) {
1190                         coda_write(dev, ctx->iram_info.buf_btp_use,
1191                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1192
1193                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1194
1195                         /* FIXME */
1196                         coda_write(dev, ctx->internal_frames[2].paddr,
1197                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1198                         coda_write(dev, ctx->internal_frames[3].paddr,
1199                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1200                 }
1201         }
1202
1203         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1204         if (ret < 0) {
1205                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1206                 goto out;
1207         }
1208
1209         /* Save stream headers */
1210         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1211         switch (dst_fourcc) {
1212         case V4L2_PIX_FMT_H264:
1213                 /*
1214                  * Get SPS in the first frame and copy it to an
1215                  * intermediate buffer.
1216                  */
1217                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1218                                          &ctx->vpu_header[0][0],
1219                                          &ctx->vpu_header_size[0]);
1220                 if (ret < 0)
1221                         goto out;
1222
1223                 /*
1224                  * If visible width or height are not aligned to macroblock
1225                  * size, the crop_right and crop_bottom SPS fields must be set
1226                  * to the difference between visible and coded size.  This is
1227                  * only supported by CODA960 firmware. All others do not allow
1228                  * writing frame cropping parameters, so we have to manually
1229                  * fix up the SPS RBSP (Sequence Parameter Set Raw Byte
1230                  * Sequence Payload) ourselves.
1231                  */
1232                 if (ctx->dev->devtype->product != CODA_960 &&
1233                     ((q_data_src->rect.width % 16) ||
1234                      (q_data_src->rect.height % 16))) {
1235                         ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width,
1236                                                   q_data_src->rect.height,
1237                                                   &ctx->vpu_header[0][0],
1238                                                   &ctx->vpu_header_size[0],
1239                                                   sizeof(ctx->vpu_header[0]));
1240                         if (ret < 0)
1241                                 goto out;
1242                 }
1243
1244                 /*
1245                  * Get PPS in the first frame and copy it to an
1246                  * intermediate buffer.
1247                  */
1248                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1249                                          &ctx->vpu_header[1][0],
1250                                          &ctx->vpu_header_size[1]);
1251                 if (ret < 0)
1252                         goto out;
1253
1254                 /*
1255                  * Length of H.264 headers is variable and thus it might not be
1256                  * aligned for the coda to append the encoded frame. In that is
1257                  * the case a filler NAL must be added to header 2.
1258                  */
1259                 ctx->vpu_header_size[2] = coda_h264_padding(
1260                                         (ctx->vpu_header_size[0] +
1261                                          ctx->vpu_header_size[1]),
1262                                          ctx->vpu_header[2]);
1263                 break;
1264         case V4L2_PIX_FMT_MPEG4:
1265                 /*
1266                  * Get VOS in the first frame and copy it to an
1267                  * intermediate buffer
1268                  */
1269                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1270                                          &ctx->vpu_header[0][0],
1271                                          &ctx->vpu_header_size[0]);
1272                 if (ret < 0)
1273                         goto out;
1274
1275                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1276                                          &ctx->vpu_header[1][0],
1277                                          &ctx->vpu_header_size[1]);
1278                 if (ret < 0)
1279                         goto out;
1280
1281                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1282                                          &ctx->vpu_header[2][0],
1283                                          &ctx->vpu_header_size[2]);
1284                 if (ret < 0)
1285                         goto out;
1286                 break;
1287         default:
1288                 /* No more formats need to save headers at the moment */
1289                 break;
1290         }
1291
1292 out:
1293         mutex_unlock(&dev->coda_mutex);
1294         return ret;
1295 }
1296
1297 static int coda_prepare_encode(struct coda_ctx *ctx)
1298 {
1299         struct coda_q_data *q_data_src, *q_data_dst;
1300         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1301         struct coda_dev *dev = ctx->dev;
1302         int force_ipicture;
1303         int quant_param = 0;
1304         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1305         u32 rot_mode = 0;
1306         u32 dst_fourcc;
1307         u32 reg;
1308
1309         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1310         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1311         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1312         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1313         dst_fourcc = q_data_dst->fourcc;
1314
1315         src_buf->sequence = ctx->osequence;
1316         dst_buf->sequence = ctx->osequence;
1317         ctx->osequence++;
1318
1319         force_ipicture = ctx->params.force_ipicture;
1320         if (force_ipicture)
1321                 ctx->params.force_ipicture = false;
1322         else if (ctx->params.gop_size != 0 &&
1323                  (src_buf->sequence % ctx->params.gop_size) == 0)
1324                 force_ipicture = 1;
1325
1326         /*
1327          * Workaround coda firmware BUG that only marks the first
1328          * frame as IDR. This is a problem for some decoders that can't
1329          * recover when a frame is lost.
1330          */
1331         if (!force_ipicture) {
1332                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1333                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1334         } else {
1335                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1336                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1337         }
1338
1339         if (dev->devtype->product == CODA_960)
1340                 coda_set_gdi_regs(ctx);
1341
1342         /*
1343          * Copy headers in front of the first frame and forced I frames for
1344          * H.264 only. In MPEG4 they are already copied by the CODA.
1345          */
1346         if (src_buf->sequence == 0 || force_ipicture) {
1347                 pic_stream_buffer_addr =
1348                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1349                         ctx->vpu_header_size[0] +
1350                         ctx->vpu_header_size[1] +
1351                         ctx->vpu_header_size[2];
1352                 pic_stream_buffer_size = q_data_dst->sizeimage -
1353                         ctx->vpu_header_size[0] -
1354                         ctx->vpu_header_size[1] -
1355                         ctx->vpu_header_size[2];
1356                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1357                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1358                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1359                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1360                         ctx->vpu_header_size[1]);
1361                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1362                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1363                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1364         } else {
1365                 pic_stream_buffer_addr =
1366                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1367                 pic_stream_buffer_size = q_data_dst->sizeimage;
1368         }
1369
1370         if (force_ipicture) {
1371                 switch (dst_fourcc) {
1372                 case V4L2_PIX_FMT_H264:
1373                         quant_param = ctx->params.h264_intra_qp;
1374                         break;
1375                 case V4L2_PIX_FMT_MPEG4:
1376                         quant_param = ctx->params.mpeg4_intra_qp;
1377                         break;
1378                 case V4L2_PIX_FMT_JPEG:
1379                         quant_param = 30;
1380                         break;
1381                 default:
1382                         v4l2_warn(&ctx->dev->v4l2_dev,
1383                                 "cannot set intra qp, fmt not supported\n");
1384                         break;
1385                 }
1386         } else {
1387                 switch (dst_fourcc) {
1388                 case V4L2_PIX_FMT_H264:
1389                         quant_param = ctx->params.h264_inter_qp;
1390                         break;
1391                 case V4L2_PIX_FMT_MPEG4:
1392                         quant_param = ctx->params.mpeg4_inter_qp;
1393                         break;
1394                 default:
1395                         v4l2_warn(&ctx->dev->v4l2_dev,
1396                                 "cannot set inter qp, fmt not supported\n");
1397                         break;
1398                 }
1399         }
1400
1401         /* submit */
1402         if (ctx->params.rot_mode)
1403                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1404         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1405         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1406
1407         if (dev->devtype->product == CODA_960) {
1408                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1409                 coda_write(dev, q_data_src->bytesperline,
1410                            CODA9_CMD_ENC_PIC_SRC_STRIDE);
1411                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1412
1413                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1414         } else {
1415                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1416         }
1417         coda_write_base(ctx, q_data_src, src_buf, reg);
1418
1419         coda_write(dev, force_ipicture << 1 & 0x2,
1420                    CODA_CMD_ENC_PIC_OPTION);
1421
1422         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1423         coda_write(dev, pic_stream_buffer_size / 1024,
1424                    CODA_CMD_ENC_PIC_BB_SIZE);
1425
1426         if (!ctx->streamon_out) {
1427                 /* After streamoff on the output side, set stream end flag */
1428                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1429                 coda_write(dev, ctx->bit_stream_param,
1430                            CODA_REG_BIT_BIT_STREAM_PARAM);
1431         }
1432
1433         if (dev->devtype->product != CODA_DX6)
1434                 coda_write(dev, ctx->iram_info.axi_sram_use,
1435                                 CODA7_REG_BIT_AXI_SRAM_USE);
1436
1437         trace_coda_enc_pic_run(ctx, src_buf);
1438
1439         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1440
1441         return 0;
1442 }
1443
1444 static void coda_finish_encode(struct coda_ctx *ctx)
1445 {
1446         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1447         struct coda_dev *dev = ctx->dev;
1448         u32 wr_ptr, start_ptr;
1449
1450         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1451         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1452
1453         trace_coda_enc_pic_done(ctx, dst_buf);
1454
1455         /* Get results from the coda */
1456         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1457         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1458
1459         /* Calculate bytesused field */
1460         if (dst_buf->sequence == 0 ||
1461             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1462                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1463                                         ctx->vpu_header_size[0] +
1464                                         ctx->vpu_header_size[1] +
1465                                         ctx->vpu_header_size[2]);
1466         } else {
1467                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1468         }
1469
1470         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1471                  wr_ptr - start_ptr);
1472
1473         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1474         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1475
1476         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1477                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1478                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1479         } else {
1480                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1481                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1482         }
1483
1484         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1485         dst_buf->field = src_buf->field;
1486         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1487         dst_buf->flags |=
1488                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1489         dst_buf->timecode = src_buf->timecode;
1490
1491         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1492
1493         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1494         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1495
1496         ctx->gopcounter--;
1497         if (ctx->gopcounter < 0)
1498                 ctx->gopcounter = ctx->params.gop_size - 1;
1499
1500         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1501                 "job finished: encoding frame (%d) (%s)\n",
1502                 dst_buf->sequence,
1503                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1504                 "KEYFRAME" : "PFRAME");
1505 }
1506
1507 static void coda_seq_end_work(struct work_struct *work)
1508 {
1509         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1510         struct coda_dev *dev = ctx->dev;
1511
1512         mutex_lock(&ctx->buffer_mutex);
1513         mutex_lock(&dev->coda_mutex);
1514
1515         if (ctx->initialized == 0)
1516                 goto out;
1517
1518         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1519                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1520                  __func__);
1521         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1522                 v4l2_err(&dev->v4l2_dev,
1523                          "CODA_COMMAND_SEQ_END failed\n");
1524         }
1525
1526         /*
1527          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1528          * from the output stream after the h.264 decoder has run. Resetting the
1529          * hardware after the decoder has finished seems to help.
1530          */
1531         if (dev->devtype->product == CODA_960)
1532                 coda_hw_reset(ctx);
1533
1534         kfifo_init(&ctx->bitstream_fifo,
1535                 ctx->bitstream.vaddr, ctx->bitstream.size);
1536
1537         coda_free_framebuffers(ctx);
1538
1539         ctx->initialized = 0;
1540
1541 out:
1542         mutex_unlock(&dev->coda_mutex);
1543         mutex_unlock(&ctx->buffer_mutex);
1544 }
1545
1546 static void coda_bit_release(struct coda_ctx *ctx)
1547 {
1548         mutex_lock(&ctx->buffer_mutex);
1549         coda_free_framebuffers(ctx);
1550         coda_free_context_buffers(ctx);
1551         coda_free_bitstream_buffer(ctx);
1552         mutex_unlock(&ctx->buffer_mutex);
1553 }
1554
1555 const struct coda_context_ops coda_bit_encode_ops = {
1556         .queue_init = coda_encoder_queue_init,
1557         .reqbufs = coda_encoder_reqbufs,
1558         .start_streaming = coda_start_encoding,
1559         .prepare_run = coda_prepare_encode,
1560         .finish_run = coda_finish_encode,
1561         .seq_end_work = coda_seq_end_work,
1562         .release = coda_bit_release,
1563 };
1564
1565 /*
1566  * Decoder context operations
1567  */
1568
1569 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1570                                        struct coda_q_data *q_data)
1571 {
1572         if (ctx->bitstream.vaddr)
1573                 return 0;
1574
1575         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1576         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1577                                             ctx->bitstream.size,
1578                                             &ctx->bitstream.paddr, GFP_KERNEL);
1579         if (!ctx->bitstream.vaddr) {
1580                 v4l2_err(&ctx->dev->v4l2_dev,
1581                          "failed to allocate bitstream ringbuffer");
1582                 return -ENOMEM;
1583         }
1584         kfifo_init(&ctx->bitstream_fifo,
1585                    ctx->bitstream.vaddr, ctx->bitstream.size);
1586
1587         return 0;
1588 }
1589
1590 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1591 {
1592         if (ctx->bitstream.vaddr == NULL)
1593                 return;
1594
1595         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1596                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1597         ctx->bitstream.vaddr = NULL;
1598         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1599 }
1600
1601 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1602                                 struct v4l2_requestbuffers *rb)
1603 {
1604         struct coda_q_data *q_data_src;
1605         int ret;
1606
1607         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1608                 return 0;
1609
1610         if (rb->count) {
1611                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1612                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1613                 if (ret < 0)
1614                         return ret;
1615                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1616                 if (ret < 0) {
1617                         coda_free_context_buffers(ctx);
1618                         return ret;
1619                 }
1620         } else {
1621                 coda_free_bitstream_buffer(ctx);
1622                 coda_free_context_buffers(ctx);
1623         }
1624
1625         return 0;
1626 }
1627
1628 static bool coda_reorder_enable(struct coda_ctx *ctx)
1629 {
1630         struct coda_dev *dev = ctx->dev;
1631         int profile;
1632
1633         if (dev->devtype->product != CODA_HX4 &&
1634             dev->devtype->product != CODA_7541 &&
1635             dev->devtype->product != CODA_960)
1636                 return false;
1637
1638         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1639                 return false;
1640
1641         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1642                 return true;
1643
1644         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1645         if (profile < 0)
1646                 v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n",
1647                           ctx->params.h264_profile_idc);
1648
1649         /* Baseline profile does not support reordering */
1650         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1651 }
1652
1653 static int __coda_start_decoding(struct coda_ctx *ctx)
1654 {
1655         struct coda_q_data *q_data_src, *q_data_dst;
1656         u32 bitstream_buf, bitstream_size;
1657         struct coda_dev *dev = ctx->dev;
1658         int width, height;
1659         u32 src_fourcc, dst_fourcc;
1660         u32 val;
1661         int ret;
1662
1663         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1664                  "Video Data Order Adapter: %s\n",
1665                  ctx->use_vdoa ? "Enabled" : "Disabled");
1666
1667         /* Start decoding */
1668         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1669         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1670         bitstream_buf = ctx->bitstream.paddr;
1671         bitstream_size = ctx->bitstream.size;
1672         src_fourcc = q_data_src->fourcc;
1673         dst_fourcc = q_data_dst->fourcc;
1674
1675         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1676
1677         /* Update coda bitstream read and write pointers from kfifo */
1678         coda_kfifo_sync_to_device_full(ctx);
1679
1680         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1681                                  CODA9_FRAME_TILED2LINEAR);
1682         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1683                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1684         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1685                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1686                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1687         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1688
1689         ctx->display_idx = -1;
1690         ctx->frm_dis_flg = 0;
1691         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1692
1693         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1694         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1695         val = 0;
1696         if (coda_reorder_enable(ctx))
1697                 val |= CODA_REORDER_ENABLE;
1698         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1699                 val |= CODA_NO_INT_ENABLE;
1700         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1701
1702         ctx->params.codec_mode = ctx->codec->mode;
1703         if (dev->devtype->product == CODA_960 &&
1704             src_fourcc == V4L2_PIX_FMT_MPEG4)
1705                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1706         else
1707                 ctx->params.codec_mode_aux = 0;
1708         if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1709                 coda_write(dev, CODA_MP4_CLASS_MPEG4,
1710                            CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1711         }
1712         if (src_fourcc == V4L2_PIX_FMT_H264) {
1713                 if (dev->devtype->product == CODA_HX4 ||
1714                     dev->devtype->product == CODA_7541) {
1715                         coda_write(dev, ctx->psbuf.paddr,
1716                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1717                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1718                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1719                 }
1720                 if (dev->devtype->product == CODA_960) {
1721                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1722                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1723                 }
1724         }
1725         if (src_fourcc == V4L2_PIX_FMT_JPEG)
1726                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN);
1727         if (dev->devtype->product != CODA_960)
1728                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1729
1730         ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1731         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1732         ctx->bit_stream_param = 0;
1733         if (ret) {
1734                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1735                 return ret;
1736         }
1737         ctx->sequence_offset = ~0U;
1738         ctx->initialized = 1;
1739
1740         /* Update kfifo out pointer from coda bitstream read pointer */
1741         coda_kfifo_sync_from_device(ctx);
1742
1743         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1744                 v4l2_err(&dev->v4l2_dev,
1745                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1746                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1747                 return -EAGAIN;
1748         }
1749
1750         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1751         if (dev->devtype->product == CODA_DX6) {
1752                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1753                 height = val & CODADX6_PICHEIGHT_MASK;
1754         } else {
1755                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1756                 height = val & CODA7_PICHEIGHT_MASK;
1757         }
1758
1759         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1760                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1761                          width, height, q_data_dst->bytesperline,
1762                          q_data_dst->height);
1763                 return -EINVAL;
1764         }
1765
1766         width = round_up(width, 16);
1767         height = round_up(height, 16);
1768
1769         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1770                  __func__, ctx->idx, width, height);
1771
1772         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1773         /*
1774          * If the VDOA is used, the decoder needs one additional frame,
1775          * because the frames are freed when the next frame is decoded.
1776          * Otherwise there are visible errors in the decoded frames (green
1777          * regions in displayed frames) and a broken order of frames (earlier
1778          * frames are sporadically displayed after later frames).
1779          */
1780         if (ctx->use_vdoa)
1781                 ctx->num_internal_frames += 1;
1782         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1783                 v4l2_err(&dev->v4l2_dev,
1784                          "not enough framebuffers to decode (%d < %d)\n",
1785                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1786                 return -EINVAL;
1787         }
1788
1789         if (src_fourcc == V4L2_PIX_FMT_H264) {
1790                 u32 left_right;
1791                 u32 top_bottom;
1792
1793                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1794                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1795
1796                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1797                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1798                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1799                                          (left_right & 0x3ff);
1800                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1801                                           (top_bottom & 0x3ff);
1802         }
1803
1804         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1805         if (ret < 0) {
1806                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1807                 return ret;
1808         }
1809
1810         /* Tell the decoder how many frame buffers we allocated. */
1811         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1812         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1813
1814         if (dev->devtype->product != CODA_DX6) {
1815                 /* Set secondary AXI IRAM */
1816                 coda_setup_iram(ctx);
1817
1818                 coda_write(dev, ctx->iram_info.buf_bit_use,
1819                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1820                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1821                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1822                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1823                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1824                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1825                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1826                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1827                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1828                 if (dev->devtype->product == CODA_960) {
1829                         coda_write(dev, ctx->iram_info.buf_btp_use,
1830                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1831
1832                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1833                         coda9_set_frame_cache(ctx, dst_fourcc);
1834                 }
1835         }
1836
1837         if (src_fourcc == V4L2_PIX_FMT_H264) {
1838                 coda_write(dev, ctx->slicebuf.paddr,
1839                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1840                 coda_write(dev, ctx->slicebuf.size / 1024,
1841                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1842         }
1843
1844         if (dev->devtype->product == CODA_HX4 ||
1845             dev->devtype->product == CODA_7541) {
1846                 int max_mb_x = 1920 / 16;
1847                 int max_mb_y = 1088 / 16;
1848                 int max_mb_num = max_mb_x * max_mb_y;
1849
1850                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1851                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1852         } else if (dev->devtype->product == CODA_960) {
1853                 int max_mb_x = 1920 / 16;
1854                 int max_mb_y = 1088 / 16;
1855                 int max_mb_num = max_mb_x * max_mb_y;
1856
1857                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1858                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1859         }
1860
1861         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1862                 v4l2_err(&ctx->dev->v4l2_dev,
1863                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1864                 return -ETIMEDOUT;
1865         }
1866
1867         return 0;
1868 }
1869
1870 static int coda_start_decoding(struct coda_ctx *ctx)
1871 {
1872         struct coda_dev *dev = ctx->dev;
1873         int ret;
1874
1875         mutex_lock(&dev->coda_mutex);
1876         ret = __coda_start_decoding(ctx);
1877         mutex_unlock(&dev->coda_mutex);
1878
1879         return ret;
1880 }
1881
1882 static int coda_prepare_decode(struct coda_ctx *ctx)
1883 {
1884         struct vb2_v4l2_buffer *dst_buf;
1885         struct coda_dev *dev = ctx->dev;
1886         struct coda_q_data *q_data_dst;
1887         struct coda_buffer_meta *meta;
1888         unsigned long flags;
1889         u32 rot_mode = 0;
1890         u32 reg_addr, reg_stride;
1891
1892         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1893         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1894
1895         /* Try to copy source buffer contents into the bitstream ringbuffer */
1896         mutex_lock(&ctx->bitstream_mutex);
1897         coda_fill_bitstream(ctx, NULL);
1898         mutex_unlock(&ctx->bitstream_mutex);
1899
1900         if (coda_get_bitstream_payload(ctx) < 512 &&
1901             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1902                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1903                          "bitstream payload: %d, skipping\n",
1904                          coda_get_bitstream_payload(ctx));
1905                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1906                 return -EAGAIN;
1907         }
1908
1909         /* Run coda_start_decoding (again) if not yet initialized */
1910         if (!ctx->initialized) {
1911                 int ret = __coda_start_decoding(ctx);
1912
1913                 if (ret < 0) {
1914                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1915                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1916                         return -EAGAIN;
1917                 } else {
1918                         ctx->initialized = 1;
1919                 }
1920         }
1921
1922         if (dev->devtype->product == CODA_960)
1923                 coda_set_gdi_regs(ctx);
1924
1925         if (ctx->use_vdoa &&
1926             ctx->display_idx >= 0 &&
1927             ctx->display_idx < ctx->num_internal_frames) {
1928                 vdoa_device_run(ctx->vdoa,
1929                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1930                                 ctx->internal_frames[ctx->display_idx].paddr);
1931         } else {
1932                 if (dev->devtype->product == CODA_960) {
1933                         /*
1934                          * The CODA960 seems to have an internal list of
1935                          * buffers with 64 entries that includes the
1936                          * registered frame buffers as well as the rotator
1937                          * buffer output.
1938                          *
1939                          * ROT_INDEX needs to be < 0x40, but >
1940                          * ctx->num_internal_frames.
1941                          */
1942                         coda_write(dev,
1943                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1944                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1945
1946                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1947                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1948                 } else {
1949                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1950                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1951                 }
1952                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1953                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1954
1955                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1956         }
1957
1958         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1959
1960         switch (dev->devtype->product) {
1961         case CODA_DX6:
1962                 /* TBD */
1963         case CODA_HX4:
1964         case CODA_7541:
1965                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1966                 break;
1967         case CODA_960:
1968                 /* 'hardcode to use interrupt disable mode'? */
1969                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1970                 break;
1971         }
1972
1973         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1974
1975         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1976         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1977
1978         if (dev->devtype->product != CODA_DX6)
1979                 coda_write(dev, ctx->iram_info.axi_sram_use,
1980                                 CODA7_REG_BIT_AXI_SRAM_USE);
1981
1982         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1983         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1984                                         struct coda_buffer_meta, list);
1985
1986         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1987
1988                 /* If this is the last buffer in the bitstream, add padding */
1989                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1990                                   ctx->bitstream_fifo.kfifo.mask)) {
1991                         static unsigned char buf[512];
1992                         unsigned int pad;
1993
1994                         /* Pad to multiple of 256 and then add 256 more */
1995                         pad = ((0 - meta->end) & 0xff) + 256;
1996
1997                         memset(buf, 0xff, sizeof(buf));
1998
1999                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
2000                 }
2001         }
2002         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2003
2004         coda_kfifo_sync_to_device_full(ctx);
2005
2006         /* Clear decode success flag */
2007         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
2008
2009         /* Clear error return value */
2010         coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB);
2011
2012         trace_coda_dec_pic_run(ctx, meta);
2013
2014         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
2015
2016         return 0;
2017 }
2018
2019 static void coda_finish_decode(struct coda_ctx *ctx)
2020 {
2021         struct coda_dev *dev = ctx->dev;
2022         struct coda_q_data *q_data_src;
2023         struct coda_q_data *q_data_dst;
2024         struct vb2_v4l2_buffer *dst_buf;
2025         struct coda_buffer_meta *meta;
2026         unsigned long payload;
2027         unsigned long flags;
2028         int width, height;
2029         int decoded_idx;
2030         int display_idx;
2031         u32 src_fourcc;
2032         int success;
2033         u32 err_mb;
2034         int err_vdoa = 0;
2035         u32 val;
2036
2037         /* Update kfifo out pointer from coda bitstream read pointer */
2038         coda_kfifo_sync_from_device(ctx);
2039
2040         /*
2041          * in stream-end mode, the read pointer can overshoot the write pointer
2042          * by up to 512 bytes
2043          */
2044         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
2045                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
2046                         kfifo_init(&ctx->bitstream_fifo,
2047                                 ctx->bitstream.vaddr, ctx->bitstream.size);
2048         }
2049
2050         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
2051         src_fourcc = q_data_src->fourcc;
2052
2053         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
2054         if (val != 1)
2055                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
2056
2057         success = val & 0x1;
2058         if (!success)
2059                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
2060
2061         if (src_fourcc == V4L2_PIX_FMT_H264) {
2062                 if (val & (1 << 3))
2063                         v4l2_err(&dev->v4l2_dev,
2064                                  "insufficient PS buffer space (%d bytes)\n",
2065                                  ctx->psbuf.size);
2066                 if (val & (1 << 2))
2067                         v4l2_err(&dev->v4l2_dev,
2068                                  "insufficient slice buffer space (%d bytes)\n",
2069                                  ctx->slicebuf.size);
2070         }
2071
2072         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2073         width = (val >> 16) & 0xffff;
2074         height = val & 0xffff;
2075
2076         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2077
2078         /* frame crop information */
2079         if (src_fourcc == V4L2_PIX_FMT_H264) {
2080                 u32 left_right;
2081                 u32 top_bottom;
2082
2083                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2084                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2085
2086                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2087                         /* Keep current crop information */
2088                 } else {
2089                         struct v4l2_rect *rect = &q_data_dst->rect;
2090
2091                         rect->left = left_right >> 16 & 0xffff;
2092                         rect->top = top_bottom >> 16 & 0xffff;
2093                         rect->width = width - rect->left -
2094                                       (left_right & 0xffff);
2095                         rect->height = height - rect->top -
2096                                        (top_bottom & 0xffff);
2097                 }
2098         } else {
2099                 /* no cropping */
2100         }
2101
2102         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2103         if (err_mb > 0)
2104                 v4l2_err(&dev->v4l2_dev,
2105                          "errors in %d macroblocks\n", err_mb);
2106
2107         if (dev->devtype->product == CODA_HX4 ||
2108             dev->devtype->product == CODA_7541) {
2109                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2110                 if (val == 0) {
2111                         /* not enough bitstream data */
2112                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2113                                  "prescan failed: %d\n", val);
2114                         ctx->hold = true;
2115                         return;
2116                 }
2117         }
2118
2119         /* Wait until the VDOA finished writing the previous display frame */
2120         if (ctx->use_vdoa &&
2121             ctx->display_idx >= 0 &&
2122             ctx->display_idx < ctx->num_internal_frames) {
2123                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2124         }
2125
2126         ctx->frm_dis_flg = coda_read(dev,
2127                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2128
2129         /* The previous display frame was copied out and can be overwritten */
2130         if (ctx->display_idx >= 0 &&
2131             ctx->display_idx < ctx->num_internal_frames) {
2132                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2133                 coda_write(dev, ctx->frm_dis_flg,
2134                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2135         }
2136
2137         /*
2138          * The index of the last decoded frame, not necessarily in
2139          * display order, and the index of the next display frame.
2140          * The latter could have been decoded in a previous run.
2141          */
2142         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2143         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2144
2145         if (decoded_idx == -1) {
2146                 /* no frame was decoded, but we might have a display frame */
2147                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2148                         ctx->sequence_offset++;
2149                 else if (ctx->display_idx < 0)
2150                         ctx->hold = true;
2151         } else if (decoded_idx == -2) {
2152                 if (ctx->display_idx >= 0 &&
2153                     ctx->display_idx < ctx->num_internal_frames)
2154                         ctx->sequence_offset++;
2155                 /* no frame was decoded, we still return remaining buffers */
2156         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2157                 v4l2_err(&dev->v4l2_dev,
2158                          "decoded frame index out of range: %d\n", decoded_idx);
2159         } else {
2160                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM);
2161                 if (ctx->sequence_offset == -1)
2162                         ctx->sequence_offset = val;
2163                 val -= ctx->sequence_offset;
2164                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2165                 if (!list_empty(&ctx->buffer_meta_list)) {
2166                         meta = list_first_entry(&ctx->buffer_meta_list,
2167                                               struct coda_buffer_meta, list);
2168                         list_del(&meta->list);
2169                         ctx->num_metas--;
2170                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2171                         /*
2172                          * Clamp counters to 16 bits for comparison, as the HW
2173                          * counter rolls over at this point for h.264. This
2174                          * may be different for other formats, but using 16 bits
2175                          * should be enough to detect most errors and saves us
2176                          * from doing different things based on the format.
2177                          */
2178                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2179                                 v4l2_err(&dev->v4l2_dev,
2180                                          "sequence number mismatch (%d(%d) != %d)\n",
2181                                          val, ctx->sequence_offset,
2182                                          meta->sequence);
2183                         }
2184                         ctx->frame_metas[decoded_idx] = *meta;
2185                         kfree(meta);
2186                 } else {
2187                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2188                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2189                         memset(&ctx->frame_metas[decoded_idx], 0,
2190                                sizeof(struct coda_buffer_meta));
2191                         ctx->frame_metas[decoded_idx].sequence = val;
2192                         ctx->sequence_offset++;
2193                 }
2194
2195                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2196
2197                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2198                 if (val == 0)
2199                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2200                 else if (val == 1)
2201                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2202                 else
2203                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2204
2205                 ctx->frame_errors[decoded_idx] = err_mb;
2206         }
2207
2208         if (display_idx == -1) {
2209                 /*
2210                  * no more frames to be decoded, but there could still
2211                  * be rotator output to dequeue
2212                  */
2213                 ctx->hold = true;
2214         } else if (display_idx == -3) {
2215                 /* possibly prescan failure */
2216         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2217                 v4l2_err(&dev->v4l2_dev,
2218                          "presentation frame index out of range: %d\n",
2219                          display_idx);
2220         }
2221
2222         /* If a frame was copied out, return it */
2223         if (ctx->display_idx >= 0 &&
2224             ctx->display_idx < ctx->num_internal_frames) {
2225                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2226                 dst_buf->sequence = ctx->osequence++;
2227
2228                 dst_buf->field = V4L2_FIELD_NONE;
2229                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2230                                              V4L2_BUF_FLAG_PFRAME |
2231                                              V4L2_BUF_FLAG_BFRAME);
2232                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2233                 meta = &ctx->frame_metas[ctx->display_idx];
2234                 dst_buf->timecode = meta->timecode;
2235                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2236
2237                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2238
2239                 switch (q_data_dst->fourcc) {
2240                 case V4L2_PIX_FMT_YUYV:
2241                         payload = width * height * 2;
2242                         break;
2243                 case V4L2_PIX_FMT_YUV420:
2244                 case V4L2_PIX_FMT_YVU420:
2245                 case V4L2_PIX_FMT_NV12:
2246                 default:
2247                         payload = width * height * 3 / 2;
2248                         break;
2249                 case V4L2_PIX_FMT_YUV422P:
2250                         payload = width * height * 2;
2251                         break;
2252                 }
2253                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2254
2255                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2256                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2257                 else
2258                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2259
2260                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2261                         "job finished: decoding frame (%d) (%s)\n",
2262                         dst_buf->sequence,
2263                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2264                         "KEYFRAME" : "PFRAME");
2265         } else {
2266                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2267                         "job finished: no frame decoded\n");
2268         }
2269
2270         /* The rotator will copy the current display frame next time */
2271         ctx->display_idx = display_idx;
2272 }
2273
2274 static void coda_decode_timeout(struct coda_ctx *ctx)
2275 {
2276         struct vb2_v4l2_buffer *dst_buf;
2277
2278         /*
2279          * For now this only handles the case where we would deadlock with
2280          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2281          * but after a failed decode run we would hold the context and wait for
2282          * userspace to queue more buffers.
2283          */
2284         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2285                 return;
2286
2287         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2288         dst_buf->sequence = ctx->qsequence - 1;
2289
2290         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2291 }
2292
2293 const struct coda_context_ops coda_bit_decode_ops = {
2294         .queue_init = coda_decoder_queue_init,
2295         .reqbufs = coda_decoder_reqbufs,
2296         .start_streaming = coda_start_decoding,
2297         .prepare_run = coda_prepare_decode,
2298         .finish_run = coda_finish_decode,
2299         .run_timeout = coda_decode_timeout,
2300         .seq_end_work = coda_seq_end_work,
2301         .release = coda_bit_release,
2302 };
2303
2304 irqreturn_t coda_irq_handler(int irq, void *data)
2305 {
2306         struct coda_dev *dev = data;
2307         struct coda_ctx *ctx;
2308
2309         /* read status register to attend the IRQ */
2310         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2311         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2312                       CODA_REG_BIT_INT_CLEAR);
2313
2314         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2315         if (ctx == NULL) {
2316                 v4l2_err(&dev->v4l2_dev,
2317                          "Instance released before the end of transaction\n");
2318                 return IRQ_HANDLED;
2319         }
2320
2321         trace_coda_bit_done(ctx);
2322
2323         if (ctx->aborting) {
2324                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2325                          "task has been aborted\n");
2326         }
2327
2328         if (coda_isbusy(ctx->dev)) {
2329                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2330                          "coda is still busy!!!!\n");
2331                 return IRQ_NONE;
2332         }
2333
2334         complete(&ctx->completion);
2335
2336         return IRQ_HANDLED;
2337 }