diff options
Diffstat (limited to 'drivers/media/platform/coda.c')
-rw-r--r-- | drivers/media/platform/coda.c | 1484 |
1 files changed, 1305 insertions, 179 deletions
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c index df4ada880e42..449d2fec9e87 100644 --- a/drivers/media/platform/coda.c +++ b/drivers/media/platform/coda.c @@ -18,6 +18,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/irq.h> +#include <linux/kfifo.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/platform_device.h> @@ -28,6 +29,7 @@ #include <media/v4l2-ctrls.h> #include <media/v4l2-device.h> +#include <media/v4l2-event.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-mem2mem.h> #include <media/videobuf2-core.h> @@ -41,13 +43,16 @@ #define CODA_FMO_BUF_SIZE 32 #define CODADX6_WORK_BUF_SIZE (288 * 1024 + CODA_FMO_BUF_SIZE * 8 * 1024) -#define CODA7_WORK_BUF_SIZE (512 * 1024 + CODA_FMO_BUF_SIZE * 8 * 1024) +#define CODA7_WORK_BUF_SIZE (128 * 1024) +#define CODA7_TEMP_BUF_SIZE (304 * 1024) #define CODA_PARA_BUF_SIZE (10 * 1024) #define CODA_ISRAM_SIZE (2048 * 2) #define CODADX6_IRAM_SIZE 0xb000 -#define CODA7_IRAM_SIZE 0x14000 /* 81920 bytes */ +#define CODA7_IRAM_SIZE 0x14000 -#define CODA_MAX_FRAMEBUFFERS 2 +#define CODA7_PS_BUF_SIZE 0x28000 + +#define CODA_MAX_FRAMEBUFFERS 8 #define MAX_W 8192 #define MAX_H 8192 @@ -129,6 +134,7 @@ struct coda_dev { struct clk *clk_ahb; struct coda_aux_buf codebuf; + struct coda_aux_buf tempbuf; struct coda_aux_buf workbuf; struct gen_pool *iram_pool; long unsigned int iram_vaddr; @@ -153,6 +159,7 @@ struct coda_params { u8 mpeg4_inter_qp; u8 gop_size; int codec_mode; + int codec_mode_aux; enum v4l2_mpeg_video_multi_slice_mode slice_mode; u32 framerate; u16 bitrate; @@ -160,13 +167,30 @@ struct coda_params { u32 slice_max_mb; }; +struct coda_iram_info { + u32 axi_sram_use; + phys_addr_t buf_bit_use; + phys_addr_t buf_ip_ac_dc_use; + phys_addr_t buf_dbk_y_use; + phys_addr_t buf_dbk_c_use; + phys_addr_t buf_ovl_use; + phys_addr_t buf_btp_use; + phys_addr_t search_ram_paddr; + int search_ram_size; +}; + struct coda_ctx { struct coda_dev *dev; + struct mutex buffer_mutex; struct list_head list; + struct work_struct skip_run; int aborting; + int initialized; int streamon_out; int streamon_cap; u32 isequence; + u32 qsequence; + u32 osequence; struct coda_q_data q_data[2]; enum coda_inst_type inst_type; struct coda_codec *codec; @@ -176,12 +200,25 @@ struct coda_ctx { struct v4l2_ctrl_handler ctrls; struct v4l2_fh fh; int gopcounter; + int runcounter; char vpu_header[3][64]; int vpu_header_size[3]; + struct kfifo bitstream_fifo; + struct mutex bitstream_mutex; + struct coda_aux_buf bitstream; + bool prescan_failed; struct coda_aux_buf parabuf; + struct coda_aux_buf psbuf; + struct coda_aux_buf slicebuf; struct coda_aux_buf internal_frames[CODA_MAX_FRAMEBUFFERS]; + struct coda_aux_buf workbuf; int num_internal_frames; int idx; + int reg_idx; + struct coda_iram_info iram_info; + u32 bit_stream_param; + u32 frm_dis_flg; + int display_idx; }; static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c, 0xff, @@ -228,10 +265,22 @@ static int coda_wait_timeout(struct coda_dev *dev) static void coda_command_async(struct coda_ctx *ctx, int cmd) { struct coda_dev *dev = ctx->dev; + + if (dev->devtype->product == CODA_7541) { + /* Restore context related registers to CODA */ + coda_write(dev, ctx->bit_stream_param, + CODA_REG_BIT_BIT_STREAM_PARAM); + coda_write(dev, ctx->frm_dis_flg, + CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); + coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR); + } + coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX); coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD); + coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD); + coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND); } @@ -297,6 +346,8 @@ static struct coda_codec codadx6_codecs[] = { static struct coda_codec coda7_codecs[] = { CODA_CODEC(CODA7_MODE_ENCODE_H264, V4L2_PIX_FMT_YUV420, V4L2_PIX_FMT_H264, 1280, 720), CODA_CODEC(CODA7_MODE_ENCODE_MP4, V4L2_PIX_FMT_YUV420, V4L2_PIX_FMT_MPEG4, 1280, 720), + CODA_CODEC(CODA7_MODE_DECODE_H264, V4L2_PIX_FMT_H264, V4L2_PIX_FMT_YUV420, 1920, 1080), + CODA_CODEC(CODA7_MODE_DECODE_MP4, V4L2_PIX_FMT_MPEG4, V4L2_PIX_FMT_YUV420, 1920, 1080), }; static bool coda_format_is_yuv(u32 fourcc) @@ -365,7 +416,7 @@ static int vidioc_querycap(struct file *file, void *priv, } static int enum_fmt(void *priv, struct v4l2_fmtdesc *f, - enum v4l2_buf_type type) + enum v4l2_buf_type type, int src_fourcc) { struct coda_ctx *ctx = fh_to_ctx(priv); struct coda_codec *codecs = ctx->dev->devtype->codecs; @@ -377,7 +428,8 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f, for (i = 0; i < num_formats; i++) { /* Both uncompressed formats are always supported */ - if (coda_format_is_yuv(formats[i].fourcc)) { + if (coda_format_is_yuv(formats[i].fourcc) && + !coda_format_is_yuv(src_fourcc)) { if (num == f->index) break; ++num; @@ -385,8 +437,10 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f, } /* Compressed formats may be supported, check the codec list */ for (k = 0; k < num_codecs; k++) { + /* if src_fourcc is set, only consider matching codecs */ if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE && - formats[i].fourcc == codecs[k].dst_fourcc) + formats[i].fourcc == codecs[k].dst_fourcc && + (!src_fourcc || src_fourcc == codecs[k].src_fourcc)) break; if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT && formats[i].fourcc == codecs[k].src_fourcc) @@ -413,13 +467,26 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f, static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { - return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE); + struct coda_ctx *ctx = fh_to_ctx(priv); + struct vb2_queue *src_vq; + struct coda_q_data *q_data_src; + + /* If the source format is already fixed, only list matching formats */ + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + if (vb2_is_streaming(src_vq)) { + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + + return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE, + q_data_src->fourcc); + } + + return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE, 0); } static int vidioc_enum_fmt_vid_out(struct file *file, void *priv, struct v4l2_fmtdesc *f) { - return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT); + return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT, 0); } static int vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f) @@ -492,15 +559,45 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct coda_ctx *ctx = fh_to_ctx(priv); - struct coda_codec *codec = NULL; + struct coda_codec *codec; + struct vb2_queue *src_vq; + int ret; - /* Determine codec by the encoded format */ - codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420, - f->fmt.pix.pixelformat); + /* + * If the source format is already fixed, try to find a codec that + * converts to the given destination format + */ + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + if (vb2_is_streaming(src_vq)) { + struct coda_q_data *q_data_src; + + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + codec = coda_find_codec(ctx->dev, q_data_src->fourcc, + f->fmt.pix.pixelformat); + if (!codec) + return -EINVAL; + } else { + /* Otherwise determine codec by encoded format, if possible */ + codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420, + f->fmt.pix.pixelformat); + } f->fmt.pix.colorspace = ctx->colorspace; - return vidioc_try_fmt(codec, f); + ret = vidioc_try_fmt(codec, f); + if (ret < 0) + return ret; + + /* The h.264 decoder only returns complete 16x16 macroblocks */ + if (codec && codec->src_fourcc == V4L2_PIX_FMT_H264) { + f->fmt.pix.width = round_up(f->fmt.pix.width, 16); + f->fmt.pix.height = round_up(f->fmt.pix.height, 16); + f->fmt.pix.bytesperline = f->fmt.pix.width; + f->fmt.pix.sizeimage = f->fmt.pix.bytesperline * + f->fmt.pix.height * 3 / 2; + } + + return 0; } static int vidioc_try_fmt_vid_out(struct file *file, void *priv, @@ -610,11 +707,35 @@ static int vidioc_expbuf(struct file *file, void *priv, return v4l2_m2m_expbuf(file, ctx->m2m_ctx, eb); } +static bool coda_buf_is_end_of_stream(struct coda_ctx *ctx, + struct v4l2_buffer *buf) +{ + struct vb2_queue *src_vq; + + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + + return ((ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) && + (buf->sequence == (ctx->qsequence - 1))); +} + static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *buf) { struct coda_ctx *ctx = fh_to_ctx(priv); + int ret; + + ret = v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf); + + /* If this is the last capture buffer, emit an end-of-stream event */ + if (buf->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && + coda_buf_is_end_of_stream(ctx, buf)) { + const struct v4l2_event eos_event = { + .type = V4L2_EVENT_EOS + }; - return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf); + v4l2_event_queue_fh(&ctx->fh, &eos_event); + } + + return ret; } static int vidioc_create_bufs(struct file *file, void *priv, @@ -637,8 +758,53 @@ static int vidioc_streamoff(struct file *file, void *priv, enum v4l2_buf_type type) { struct coda_ctx *ctx = fh_to_ctx(priv); + int ret; - return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type); + /* + * This indirectly calls __vb2_queue_cancel, which dequeues all buffers. + * We therefore have to lock it against running hardware in this context, + * which still needs the buffers. + */ + mutex_lock(&ctx->buffer_mutex); + ret = v4l2_m2m_streamoff(file, ctx->m2m_ctx, type); + mutex_unlock(&ctx->buffer_mutex); + + return ret; +} + +static int vidioc_decoder_cmd(struct file *file, void *fh, + struct v4l2_decoder_cmd *dc) +{ + struct coda_ctx *ctx = fh_to_ctx(fh); + + if (dc->cmd != V4L2_DEC_CMD_STOP) + return -EINVAL; + + if ((dc->flags & V4L2_DEC_CMD_STOP_TO_BLACK) || + (dc->flags & V4L2_DEC_CMD_STOP_IMMEDIATELY)) + return -EINVAL; + + if (dc->stop.pts != 0) + return -EINVAL; + + if (ctx->inst_type != CODA_INST_DECODER) + return -EINVAL; + + /* Set the strem-end flag on this context */ + ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; + + return 0; +} + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_EOS: + return v4l2_event_subscribe(fh, sub, 0, NULL); + default: + return v4l2_ctrl_subscribe_event(fh, sub); + } } static const struct v4l2_ioctl_ops coda_ioctl_ops = { @@ -664,14 +830,206 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = { .vidioc_streamon = vidioc_streamon, .vidioc_streamoff = vidioc_streamoff, + + .vidioc_decoder_cmd = vidioc_decoder_cmd, + + .vidioc_subscribe_event = vidioc_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; +static int coda_start_decoding(struct coda_ctx *ctx); + +static void coda_skip_run(struct work_struct *work) +{ + struct coda_ctx *ctx = container_of(work, struct coda_ctx, skip_run); + + v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx); +} + +static inline int coda_get_bitstream_payload(struct coda_ctx *ctx) +{ + return kfifo_len(&ctx->bitstream_fifo); +} + +static void coda_kfifo_sync_from_device(struct coda_ctx *ctx) +{ + struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; + struct coda_dev *dev = ctx->dev; + u32 rd_ptr; + + rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); + kfifo->out = (kfifo->in & ~kfifo->mask) | + (rd_ptr - ctx->bitstream.paddr); + if (kfifo->out > kfifo->in) + kfifo->out -= kfifo->mask + 1; +} + +static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx) +{ + struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; + struct coda_dev *dev = ctx->dev; + u32 rd_ptr, wr_ptr; + + rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask); + coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); + wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); + coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); +} + +static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx) +{ + struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; + struct coda_dev *dev = ctx->dev; + u32 wr_ptr; + + wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); + coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); +} + +static int coda_bitstream_queue(struct coda_ctx *ctx, struct vb2_buffer *src_buf) +{ + u32 src_size = vb2_get_plane_payload(src_buf, 0); + u32 n; + + n = kfifo_in(&ctx->bitstream_fifo, vb2_plane_vaddr(src_buf, 0), src_size); + if (n < src_size) + return -ENOSPC; + + dma_sync_single_for_device(&ctx->dev->plat_dev->dev, ctx->bitstream.paddr, + ctx->bitstream.size, DMA_TO_DEVICE); + + ctx->qsequence++; + + return 0; +} + +static bool coda_bitstream_try_queue(struct coda_ctx *ctx, + struct vb2_buffer *src_buf) +{ + int ret; + + if (coda_get_bitstream_payload(ctx) + + vb2_get_plane_payload(src_buf, 0) + 512 >= ctx->bitstream.size) + return false; + + if (vb2_plane_vaddr(src_buf, 0) == NULL) { + v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n"); + return true; + } + + ret = coda_bitstream_queue(ctx, src_buf); + if (ret < 0) { + v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n"); + return false; + } + /* Sync read pointer to device */ + if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev)) + coda_kfifo_sync_to_device_write(ctx); + + ctx->prescan_failed = false; + + return true; +} + +static void coda_fill_bitstream(struct coda_ctx *ctx) +{ + struct vb2_buffer *src_buf; + + while (v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) > 0) { + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + + if (coda_bitstream_try_queue(ctx, src_buf)) { + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); + } else { + break; + } + } +} + /* * Mem-to-mem operations. */ -static void coda_device_run(void *m2m_priv) +static int coda_prepare_decode(struct coda_ctx *ctx) +{ + struct vb2_buffer *dst_buf; + struct coda_dev *dev = ctx->dev; + struct coda_q_data *q_data_dst; + u32 stridey, height; + u32 picture_y, picture_cb, picture_cr; + + dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + if (ctx->params.rot_mode & CODA_ROT_90) { + stridey = q_data_dst->height; + height = q_data_dst->width; + } else { + stridey = q_data_dst->width; + height = q_data_dst->height; + } + + /* Try to copy source buffer contents into the bitstream ringbuffer */ + mutex_lock(&ctx->bitstream_mutex); + coda_fill_bitstream(ctx); + mutex_unlock(&ctx->bitstream_mutex); + + if (coda_get_bitstream_payload(ctx) < 512 && + (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "bitstream payload: %d, skipping\n", + coda_get_bitstream_payload(ctx)); + schedule_work(&ctx->skip_run); + return -EAGAIN; + } + + /* Run coda_start_decoding (again) if not yet initialized */ + if (!ctx->initialized) { + int ret = coda_start_decoding(ctx); + if (ret < 0) { + v4l2_err(&dev->v4l2_dev, "failed to start decoding\n"); + schedule_work(&ctx->skip_run); + return -EAGAIN; + } else { + ctx->initialized = 1; + } + } + + /* Set rotator output */ + picture_y = vb2_dma_contig_plane_dma_addr(dst_buf, 0); + if (q_data_dst->fourcc == V4L2_PIX_FMT_YVU420) { + /* Switch Cr and Cb for YVU420 format */ + picture_cr = picture_y + stridey * height; + picture_cb = picture_cr + stridey / 2 * height / 2; + } else { + picture_cb = picture_y + stridey * height; + picture_cr = picture_cb + stridey / 2 * height / 2; + } + coda_write(dev, picture_y, CODA_CMD_DEC_PIC_ROT_ADDR_Y); + coda_write(dev, picture_cb, CODA_CMD_DEC_PIC_ROT_ADDR_CB); + coda_write(dev, picture_cr, CODA_CMD_DEC_PIC_ROT_ADDR_CR); + coda_write(dev, stridey, CODA_CMD_DEC_PIC_ROT_STRIDE); + coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode, + CODA_CMD_DEC_PIC_ROT_MODE); + + switch (dev->devtype->product) { + case CODA_DX6: + /* TBD */ + case CODA_7541: + coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION); + break; + } + + coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM); + + coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START); + coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE); + + return 0; +} + +static void coda_prepare_encode(struct coda_ctx *ctx) { - struct coda_ctx *ctx = m2m_priv; struct coda_q_data *q_data_src, *q_data_dst; struct vb2_buffer *src_buf, *dst_buf; struct coda_dev *dev = ctx->dev; @@ -681,17 +1039,15 @@ static void coda_device_run(void *m2m_priv) u32 pic_stream_buffer_addr, pic_stream_buffer_size; u32 dst_fourcc; - mutex_lock(&dev->coda_mutex); - src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); dst_fourcc = q_data_dst->fourcc; - src_buf->v4l2_buf.sequence = ctx->isequence; - dst_buf->v4l2_buf.sequence = ctx->isequence; - ctx->isequence++; + src_buf->v4l2_buf.sequence = ctx->osequence; + dst_buf->v4l2_buf.sequence = ctx->osequence; + ctx->osequence++; /* * Workaround coda firmware BUG that only marks the first @@ -793,16 +1149,53 @@ static void coda_device_run(void *m2m_priv) coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START); coda_write(dev, pic_stream_buffer_size / 1024, CODA_CMD_ENC_PIC_BB_SIZE); +} - if (dev->devtype->product == CODA_7541) { - coda_write(dev, CODA7_USE_BIT_ENABLE | CODA7_USE_HOST_BIT_ENABLE | - CODA7_USE_ME_ENABLE | CODA7_USE_HOST_ME_ENABLE, - CODA7_REG_BIT_AXI_SRAM_USE); +static void coda_device_run(void *m2m_priv) +{ + struct coda_ctx *ctx = m2m_priv; + struct coda_dev *dev = ctx->dev; + int ret; + + mutex_lock(&ctx->buffer_mutex); + + /* + * If streamoff dequeued all buffers before we could get the lock, + * just bail out immediately. + */ + if ((!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) && + ctx->inst_type != CODA_INST_DECODER) || + !v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) { + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "%d: device_run without buffers\n", ctx->idx); + mutex_unlock(&ctx->buffer_mutex); + schedule_work(&ctx->skip_run); + return; } + mutex_lock(&dev->coda_mutex); + + if (ctx->inst_type == CODA_INST_DECODER) { + ret = coda_prepare_decode(ctx); + if (ret < 0) { + mutex_unlock(&dev->coda_mutex); + mutex_unlock(&ctx->buffer_mutex); + /* job_finish scheduled by prepare_decode */ + return; + } + } else { + coda_prepare_encode(ctx); + } + + if (dev->devtype->product != CODA_DX6) + coda_write(dev, ctx->iram_info.axi_sram_use, + CODA7_REG_BIT_AXI_SRAM_USE); + /* 1 second timeout in case CODA locks up */ schedule_delayed_work(&dev->timeout, HZ); + if (ctx->inst_type == CODA_INST_DECODER) + coda_kfifo_sync_to_device_full(ctx); coda_command_async(ctx, CODA_COMMAND_PIC_RUN); } @@ -812,15 +1205,32 @@ static int coda_job_ready(void *m2m_priv) /* * For both 'P' and 'key' frame cases 1 picture - * and 1 frame are needed. + * and 1 frame are needed. In the decoder case, + * the compressed frame can be in the bitstream. */ - if (!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) || - !v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) { + if (!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) && + ctx->inst_type != CODA_INST_DECODER) { v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "not ready: not enough video buffers.\n"); return 0; } + if (!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) { + v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + "not ready: not enough video capture buffers.\n"); + return 0; + } + + if (ctx->prescan_failed || + ((ctx->inst_type == CODA_INST_DECODER) && + (coda_get_bitstream_payload(ctx) < 512) && + !(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { + v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + "%d: not ready: not enough bitstream data.\n", + ctx->idx); + return 0; + } + if (ctx->aborting) { v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "not ready: aborting\n"); @@ -936,7 +1346,29 @@ static int coda_buf_prepare(struct vb2_buffer *vb) static void coda_buf_queue(struct vb2_buffer *vb) { struct coda_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); - v4l2_m2m_buf_queue(ctx->m2m_ctx, vb); + struct coda_q_data *q_data; + + q_data = get_q_data(ctx, vb->vb2_queue->type); + + /* + * In the decoder case, immediately try to copy the buffer into the + * bitstream ringbuffer and mark it as ready to be dequeued. + */ + if (q_data->fourcc == V4L2_PIX_FMT_H264 && + vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { + /* + * For backwards compatiblity, queuing an empty buffer marks + * the stream end + */ + if (vb2_get_plane_payload(vb, 0) == 0) + ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; + mutex_lock(&ctx->bitstream_mutex); + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb); + coda_fill_bitstream(ctx); + mutex_unlock(&ctx->bitstream_mutex); + } else { + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb); + } } static void coda_wait_prepare(struct vb2_queue *q) @@ -951,21 +1383,6 @@ static void coda_wait_finish(struct vb2_queue *q) coda_lock(ctx); } -static void coda_free_framebuffers(struct coda_ctx *ctx) -{ - int i; - - for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) { - if (ctx->internal_frames[i].vaddr) { - dma_free_coherent(&ctx->dev->plat_dev->dev, - ctx->internal_frames[i].size, - ctx->internal_frames[i].vaddr, - ctx->internal_frames[i].paddr); - ctx->internal_frames[i].vaddr = NULL; - } - } -} - static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) { struct coda_dev *dev = ctx->dev; @@ -977,29 +1394,69 @@ static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) p[index ^ 1] = value; } +static int coda_alloc_aux_buf(struct coda_dev *dev, + struct coda_aux_buf *buf, size_t size) +{ + buf->vaddr = dma_alloc_coherent(&dev->plat_dev->dev, size, &buf->paddr, + GFP_KERNEL); + if (!buf->vaddr) + return -ENOMEM; + + buf->size = size; + + return 0; +} + +static inline int coda_alloc_context_buf(struct coda_ctx *ctx, + struct coda_aux_buf *buf, size_t size) +{ + return coda_alloc_aux_buf(ctx->dev, buf, size); +} + +static void coda_free_aux_buf(struct coda_dev *dev, + struct coda_aux_buf *buf) +{ + if (buf->vaddr) { + dma_free_coherent(&dev->plat_dev->dev, buf->size, + buf->vaddr, buf->paddr); + buf->vaddr = NULL; + buf->size = 0; + } +} + +static void coda_free_framebuffers(struct coda_ctx *ctx) +{ + int i; + + for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) + coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]); +} + static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_data, u32 fourcc) { struct coda_dev *dev = ctx->dev; - int height = q_data->height; dma_addr_t paddr; int ysize; + int ret; int i; + if (ctx->codec && ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) + height = round_up(height, 16); ysize = round_up(q_data->width, 8) * height; /* Allocate frame buffers */ - ctx->num_internal_frames = CODA_MAX_FRAMEBUFFERS; for (i = 0; i < ctx->num_internal_frames; i++) { - ctx->internal_frames[i].size = q_data->sizeimage; - if (fourcc == V4L2_PIX_FMT_H264 && dev->devtype->product != CODA_DX6) + size_t size; + + size = q_data->sizeimage; + if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 && + dev->devtype->product != CODA_DX6) ctx->internal_frames[i].size += ysize/4; - ctx->internal_frames[i].vaddr = dma_alloc_coherent( - &dev->plat_dev->dev, ctx->internal_frames[i].size, - &ctx->internal_frames[i].paddr, GFP_KERNEL); - if (!ctx->internal_frames[i].vaddr) { + ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i], size); + if (ret < 0) { coda_free_framebuffers(ctx); - return -ENOMEM; + return ret; } } @@ -1010,10 +1467,20 @@ static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_d coda_parabuf_write(ctx, i * 3 + 1, paddr + ysize); /* Cb */ coda_parabuf_write(ctx, i * 3 + 2, paddr + ysize + ysize/4); /* Cr */ - if (dev->devtype->product != CODA_DX6 && fourcc == V4L2_PIX_FMT_H264) - coda_parabuf_write(ctx, 96 + i, ctx->internal_frames[i].paddr + ysize + ysize/4 + ysize/4); + /* mvcol buffer for h.264 */ + if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 && + dev->devtype->product != CODA_DX6) + coda_parabuf_write(ctx, 96 + i, + ctx->internal_frames[i].paddr + + ysize + ysize/4 + ysize/4); } + /* mvcol buffer for mpeg4 */ + if ((dev->devtype->product != CODA_DX6) && + (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4)) + coda_parabuf_write(ctx, 97, ctx->internal_frames[i].paddr + + ysize + ysize/4 + ysize/4); + return 0; } @@ -1035,6 +1502,371 @@ static int coda_h264_padding(int size, char *p) return nal_size; } +static void coda_setup_iram(struct coda_ctx *ctx) +{ + struct coda_iram_info *iram_info = &ctx->iram_info; + struct coda_dev *dev = ctx->dev; + int ipacdc_size; + int bitram_size; + int dbk_size; + int ovl_size; + int mb_width; + int me_size; + int size; + + memset(iram_info, 0, sizeof(*iram_info)); + size = dev->iram_size; + + if (dev->devtype->product == CODA_DX6) + return; + + if (ctx->inst_type == CODA_INST_ENCODER) { + struct coda_q_data *q_data_src; + + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + mb_width = DIV_ROUND_UP(q_data_src->width, 16); + + /* Prioritize in case IRAM is too small for everything */ + me_size = round_up(round_up(q_data_src->width, 16) * 36 + 2048, + 1024); + iram_info->search_ram_size = me_size; + if (size >= iram_info->search_ram_size) { + if (dev->devtype->product == CODA_7541) + iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE; + iram_info->search_ram_paddr = dev->iram_paddr; + size -= iram_info->search_ram_size; + } else { + pr_err("IRAM is smaller than the search ram size\n"); + goto out; + } + + /* Only H.264BP and H.263P3 are considered */ + dbk_size = round_up(128 * mb_width, 1024); + if (size >= dbk_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE; + iram_info->buf_dbk_y_use = dev->iram_paddr + + iram_info->search_ram_size; + iram_info->buf_dbk_c_use = iram_info->buf_dbk_y_use + + dbk_size / 2; + size -= dbk_size; + } else { + goto out; + } + + bitram_size = round_up(128 * mb_width, 1024); + if (size >= bitram_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE; + iram_info->buf_bit_use = iram_info->buf_dbk_c_use + + dbk_size / 2; + size -= bitram_size; + } else { + goto out; + } + + ipacdc_size = round_up(128 * mb_width, 1024); + if (size >= ipacdc_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE; + iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use + + bitram_size; + size -= ipacdc_size; + } + + /* OVL and BTP disabled for encoder */ + } else if (ctx->inst_type == CODA_INST_DECODER) { + struct coda_q_data *q_data_dst; + int mb_height; + + q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + mb_width = DIV_ROUND_UP(q_data_dst->width, 16); + mb_height = DIV_ROUND_UP(q_data_dst->height, 16); + + dbk_size = round_up(256 * mb_width, 1024); + if (size >= dbk_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE; + iram_info->buf_dbk_y_use = dev->iram_paddr; + iram_info->buf_dbk_c_use = dev->iram_paddr + + dbk_size / 2; + size -= dbk_size; + } else { + goto out; + } + + bitram_size = round_up(128 * mb_width, 1024); + if (size >= bitram_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE; + iram_info->buf_bit_use = iram_info->buf_dbk_c_use + + dbk_size / 2; + size -= bitram_size; + } else { + goto out; + } + + ipacdc_size = round_up(128 * mb_width, 1024); + if (size >= ipacdc_size) { + iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE; + iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use + + bitram_size; + size -= ipacdc_size; + } else { + goto out; + } + + ovl_size = round_up(80 * mb_width, 1024); + } + +out: + switch (dev->devtype->product) { + case CODA_DX6: + break; + case CODA_7541: + /* i.MX53 uses secondary AXI for IRAM access */ + if (iram_info->axi_sram_use & CODA7_USE_HOST_BIT_ENABLE) + iram_info->axi_sram_use |= CODA7_USE_BIT_ENABLE; + if (iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE) + iram_info->axi_sram_use |= CODA7_USE_IP_ENABLE; + if (iram_info->axi_sram_use & CODA7_USE_HOST_DBK_ENABLE) + iram_info->axi_sram_use |= CODA7_USE_DBK_ENABLE; + if (iram_info->axi_sram_use & CODA7_USE_HOST_OVL_ENABLE) + iram_info->axi_sram_use |= CODA7_USE_OVL_ENABLE; + if (iram_info->axi_sram_use & CODA7_USE_HOST_ME_ENABLE) + iram_info->axi_sram_use |= CODA7_USE_ME_ENABLE; + } + + if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)) + v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + "IRAM smaller than needed\n"); + + if (dev->devtype->product == CODA_7541) { + /* TODO - Enabling these causes picture errors on CODA7541 */ + if (ctx->inst_type == CODA_INST_DECODER) { + /* fw 1.4.50 */ + iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | + CODA7_USE_IP_ENABLE); + } else { + /* fw 13.4.29 */ + iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | + CODA7_USE_HOST_DBK_ENABLE | + CODA7_USE_IP_ENABLE | + CODA7_USE_DBK_ENABLE); + } + } +} + +static void coda_free_context_buffers(struct coda_ctx *ctx) +{ + struct coda_dev *dev = ctx->dev; + + coda_free_aux_buf(dev, &ctx->slicebuf); + coda_free_aux_buf(dev, &ctx->psbuf); + if (dev->devtype->product != CODA_DX6) + coda_free_aux_buf(dev, &ctx->workbuf); +} + +static int coda_alloc_context_buffers(struct coda_ctx *ctx, + struct coda_q_data *q_data) +{ + struct coda_dev *dev = ctx->dev; + size_t size; + int ret; + + switch (dev->devtype->product) { + case CODA_7541: + size = CODA7_WORK_BUF_SIZE; + break; + default: + return 0; + } + + if (ctx->psbuf.vaddr) { + v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n"); + return -EBUSY; + } + if (ctx->slicebuf.vaddr) { + v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n"); + return -EBUSY; + } + if (ctx->workbuf.vaddr) { + v4l2_err(&dev->v4l2_dev, "context buffer still allocated\n"); + ret = -EBUSY; + return -ENOMEM; + } + + if (q_data->fourcc == V4L2_PIX_FMT_H264) { + /* worst case slice size */ + size = (DIV_ROUND_UP(q_data->width, 16) * + DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512; + ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size); + if (ret < 0) { + v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte slice buffer", + ctx->slicebuf.size); + return ret; + } + } + + if (dev->devtype->product == CODA_7541) { + ret = coda_alloc_context_buf(ctx, &ctx->psbuf, CODA7_PS_BUF_SIZE); + if (ret < 0) { + v4l2_err(&dev->v4l2_dev, "failed to allocate psmem buffer"); + goto err; + } + } + + ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size); + if (ret < 0) { + v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte context buffer", + ctx->workbuf.size); + goto err; + } + + return 0; + +err: + coda_free_context_buffers(ctx); + return ret; +} + +static int coda_start_decoding(struct coda_ctx *ctx) +{ + struct coda_q_data *q_data_src, *q_data_dst; + u32 bitstream_buf, bitstream_size; + struct coda_dev *dev = ctx->dev; + int width, height; + u32 src_fourcc; + u32 val; + int ret; + + /* Start decoding */ + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + bitstream_buf = ctx->bitstream.paddr; + bitstream_size = ctx->bitstream.size; + src_fourcc = q_data_src->fourcc; + + coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); + + /* Update coda bitstream read and write pointers from kfifo */ + coda_kfifo_sync_to_device_full(ctx); + + ctx->display_idx = -1; + ctx->frm_dis_flg = 0; + coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); + + coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE, + CODA_REG_BIT_BIT_STREAM_PARAM); + + coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START); + coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE); + val = 0; + if (dev->devtype->product == CODA_7541) + val |= CODA_REORDER_ENABLE; + coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION); + + ctx->params.codec_mode = ctx->codec->mode; + ctx->params.codec_mode_aux = 0; + if (src_fourcc == V4L2_PIX_FMT_H264) { + if (dev->devtype->product == CODA_7541) { + coda_write(dev, ctx->psbuf.paddr, + CODA_CMD_DEC_SEQ_PS_BB_START); + coda_write(dev, (CODA7_PS_BUF_SIZE / 1024), + CODA_CMD_DEC_SEQ_PS_BB_SIZE); + } + } + + if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) { + v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); + coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM); + return -ETIMEDOUT; + } + + /* Update kfifo out pointer from coda bitstream read pointer */ + coda_kfifo_sync_from_device(ctx); + + coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM); + + if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) { + v4l2_err(&dev->v4l2_dev, + "CODA_COMMAND_SEQ_INIT failed, error code = %d\n", + coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON)); + return -EAGAIN; + } + + val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE); + if (dev->devtype->product == CODA_DX6) { + width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK; + height = val & CODADX6_PICHEIGHT_MASK; + } else { + width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK; + height = val & CODA7_PICHEIGHT_MASK; + } + + if (width > q_data_dst->width || height > q_data_dst->height) { + v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n", + width, height, q_data_dst->width, q_data_dst->height); + return -EINVAL; + } + + width = round_up(width, 16); + height = round_up(height, 16); + + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n", + __func__, ctx->idx, width, height); + + ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED) + 1; + if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) { + v4l2_err(&dev->v4l2_dev, + "not enough framebuffers to decode (%d < %d)\n", + CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames); + return -EINVAL; + } + + ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc); + if (ret < 0) + return ret; + + /* Tell the decoder how many frame buffers we allocated. */ + coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); + coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE); + + if (dev->devtype->product != CODA_DX6) { + /* Set secondary AXI IRAM */ + coda_setup_iram(ctx); + + coda_write(dev, ctx->iram_info.buf_bit_use, + CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); + coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, + CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); + coda_write(dev, ctx->iram_info.buf_dbk_y_use, + CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); + coda_write(dev, ctx->iram_info.buf_dbk_c_use, + CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); + coda_write(dev, ctx->iram_info.buf_ovl_use, + CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); + } + + if (src_fourcc == V4L2_PIX_FMT_H264) { + coda_write(dev, ctx->slicebuf.paddr, + CODA_CMD_SET_FRAME_SLICE_BB_START); + coda_write(dev, ctx->slicebuf.size / 1024, + CODA_CMD_SET_FRAME_SLICE_BB_SIZE); + } + + if (dev->devtype->product == CODA_7541) { + int max_mb_x = 1920 / 16; + int max_mb_y = 1088 / 16; + int max_mb_num = max_mb_x * max_mb_y; + coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, + CODA7_CMD_SET_FRAME_MAX_DEC_SIZE); + } + + if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) { + v4l2_err(&ctx->dev->v4l2_dev, + "CODA_COMMAND_SET_FRAME_BUF timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf, int header_code, u8 *header, int *size) { @@ -1050,7 +1882,7 @@ static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf, v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n"); return ret; } - *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx)) - + *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) - coda_read(dev, CODA_CMD_ENC_HEADER_BB_START); memcpy(header, vb2_plane_vaddr(buf, 0), *size); @@ -1069,26 +1901,36 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) u32 value; int ret = 0; - if (count < 1) - return -EINVAL; + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { + if (q_data_src->fourcc == V4L2_PIX_FMT_H264) { + if (coda_get_bitstream_payload(ctx) < 512) + return -EINVAL; + } else { + if (count < 1) + return -EINVAL; + } - if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ctx->streamon_out = 1; - else - ctx->streamon_cap = 1; - q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); - if (ctx->streamon_out) { if (coda_format_is_yuv(q_data_src->fourcc)) ctx->inst_type = CODA_INST_ENCODER; else ctx->inst_type = CODA_INST_DECODER; + } else { + if (count < 1) + return -EINVAL; + + ctx->streamon_cap = 1; } /* Don't start the coda unless both queues are on */ if (!(ctx->streamon_out & ctx->streamon_cap)) return 0; + /* Allow device_run with no buffers queued and after streamoff */ + v4l2_m2m_set_src_buffered(ctx->m2m_ctx, true); + ctx->gopcounter = ctx->params.gop_size - 1; buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0); @@ -1103,6 +1945,25 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) return -EINVAL; } + /* Allocate per-instance buffers */ + ret = coda_alloc_context_buffers(ctx, q_data_src); + if (ret < 0) + return ret; + + if (ctx->inst_type == CODA_INST_DECODER) { + mutex_lock(&dev->coda_mutex); + ret = coda_start_decoding(ctx); + mutex_unlock(&dev->coda_mutex); + if (ret == -EAGAIN) { + return 0; + } else if (ret < 0) { + return ret; + } else { + ctx->initialized = 1; + return 0; + } + } + if (!coda_is_initialized(dev)) { v4l2_err(v4l2_dev, "coda is not initialized.\n"); return -EFAULT; @@ -1111,8 +1972,8 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) mutex_lock(&dev->coda_mutex); coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); - coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->idx)); - coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->idx)); + coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); + coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); switch (dev->devtype->product) { case CODA_DX6: coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN | @@ -1207,6 +2068,8 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) } coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION); + coda_setup_iram(ctx); + if (dst_fourcc == V4L2_PIX_FMT_H264) { value = (FMO_SLICE_SAVE_BUF_SIZE << 7); value |= (0 & CODA_FMOPARAM_TYPE_MASK) << CODA_FMOPARAM_TYPE_OFFSET; @@ -1214,8 +2077,10 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) if (dev->devtype->product == CODA_DX6) { coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO); } else { - coda_write(dev, dev->iram_paddr, CODA7_CMD_ENC_SEQ_SEARCH_BASE); - coda_write(dev, 48 * 1024, CODA7_CMD_ENC_SEQ_SEARCH_SIZE); + coda_write(dev, ctx->iram_info.search_ram_paddr, + CODA7_CMD_ENC_SEQ_SEARCH_BASE); + coda_write(dev, ctx->iram_info.search_ram_size, + CODA7_CMD_ENC_SEQ_SEARCH_SIZE); } } @@ -1231,6 +2096,7 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) goto out; } + ctx->num_internal_frames = 2; ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc); if (ret < 0) { v4l2_err(v4l2_dev, "failed to allocate framebuffers\n"); @@ -1239,13 +2105,20 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count) coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); coda_write(dev, round_up(q_data_src->width, 8), CODA_CMD_SET_FRAME_BUF_STRIDE); + if (dev->devtype->product == CODA_7541) + coda_write(dev, round_up(q_data_src->width, 8), + CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); if (dev->devtype->product != CODA_DX6) { - coda_write(dev, round_up(q_data_src->width, 8), CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); - coda_write(dev, dev->iram_paddr + 48 * 1024, CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); - coda_write(dev, dev->iram_paddr + 53 * 1024, CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); - coda_write(dev, dev->iram_paddr + 58 * 1024, CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); - coda_write(dev, dev->iram_paddr + 68 * 1024, CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); - coda_write(dev, 0x0, CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); + coda_write(dev, ctx->iram_info.buf_bit_use, + CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); + coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, + CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); + coda_write(dev, ctx->iram_info.buf_dbk_y_use, + CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); + coda_write(dev, ctx->iram_info.buf_dbk_c_use, + CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); + coda_write(dev, ctx->iram_info.buf_ovl_use, + CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); } ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF); if (ret < 0) { @@ -1326,32 +2199,26 @@ static int coda_stop_streaming(struct vb2_queue *q) struct coda_dev *dev = ctx->dev; if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { - v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s: output\n", __func__); ctx->streamon_out = 0; + + ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; + + ctx->isequence = 0; } else { - v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s: capture\n", __func__); ctx->streamon_cap = 0; - } - - /* Don't stop the coda unless both queues are off */ - if (ctx->streamon_out || ctx->streamon_cap) - return 0; - cancel_delayed_work(&dev->timeout); - - mutex_lock(&dev->coda_mutex); - v4l2_dbg(1, coda_debug, &dev->v4l2_dev, - "%s: sent command 'SEQ_END' to coda\n", __func__); - if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { - v4l2_err(&dev->v4l2_dev, - "CODA_COMMAND_SEQ_END failed\n"); - return -ETIMEDOUT; + ctx->osequence = 0; } - mutex_unlock(&dev->coda_mutex); - coda_free_framebuffers(ctx); + if (!ctx->streamon_out && !ctx->streamon_cap) { + kfifo_init(&ctx->bitstream_fifo, + ctx->bitstream.vaddr, ctx->bitstream.size); + ctx->runcounter = 0; + } return 0; } @@ -1511,23 +2378,41 @@ static int coda_open(struct file *file) { struct coda_dev *dev = video_drvdata(file); struct coda_ctx *ctx = NULL; - int ret = 0; + int ret; int idx; - idx = coda_next_free_instance(dev); - if (idx >= CODA_MAX_INSTANCES) - return -EBUSY; - set_bit(idx, &dev->instance_mask); - ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return -ENOMEM; + idx = coda_next_free_instance(dev); + if (idx >= CODA_MAX_INSTANCES) { + ret = -EBUSY; + goto err_coda_max; + } + set_bit(idx, &dev->instance_mask); + + INIT_WORK(&ctx->skip_run, coda_skip_run); v4l2_fh_init(&ctx->fh, video_devdata(file)); file->private_data = &ctx->fh; v4l2_fh_add(&ctx->fh); ctx->dev = dev; ctx->idx = idx; + switch (dev->devtype->product) { + case CODA_7541: + ctx->reg_idx = 0; + break; + default: + ctx->reg_idx = idx; + } + + ret = clk_prepare_enable(dev->clk_per); + if (ret) + goto err_clk_per; + + ret = clk_prepare_enable(dev->clk_ahb); + if (ret) + goto err_clk_ahb; set_default_params(ctx); ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, @@ -1537,39 +2422,62 @@ static int coda_open(struct file *file) v4l2_err(&dev->v4l2_dev, "%s return error (%d)\n", __func__, ret); - goto err; + goto err_ctx_init; } ret = coda_ctrls_setup(ctx); if (ret) { v4l2_err(&dev->v4l2_dev, "failed to setup coda controls\n"); - goto err; + goto err_ctrls_setup; } ctx->fh.ctrl_handler = &ctx->ctrls; - ctx->parabuf.vaddr = dma_alloc_coherent(&dev->plat_dev->dev, - CODA_PARA_BUF_SIZE, &ctx->parabuf.paddr, GFP_KERNEL); - if (!ctx->parabuf.vaddr) { + ret = coda_alloc_context_buf(ctx, &ctx->parabuf, CODA_PARA_BUF_SIZE); + if (ret < 0) { v4l2_err(&dev->v4l2_dev, "failed to allocate parabuf"); + goto err_dma_alloc; + } + + ctx->bitstream.size = CODA_MAX_FRAME_SIZE; + ctx->bitstream.vaddr = dma_alloc_writecombine(&dev->plat_dev->dev, + ctx->bitstream.size, &ctx->bitstream.paddr, GFP_KERNEL); + if (!ctx->bitstream.vaddr) { + v4l2_err(&dev->v4l2_dev, "failed to allocate bitstream ringbuffer"); ret = -ENOMEM; - goto err; + goto err_dma_writecombine; } + kfifo_init(&ctx->bitstream_fifo, + ctx->bitstream.vaddr, ctx->bitstream.size); + mutex_init(&ctx->bitstream_mutex); + mutex_init(&ctx->buffer_mutex); coda_lock(ctx); list_add(&ctx->list, &dev->instances); coda_unlock(ctx); - clk_prepare_enable(dev->clk_per); - clk_prepare_enable(dev->clk_ahb); - v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Created instance %d (%p)\n", ctx->idx, ctx); return 0; -err: +err_dma_writecombine: + coda_free_context_buffers(ctx); + if (ctx->dev->devtype->product == CODA_DX6) + coda_free_aux_buf(dev, &ctx->workbuf); + coda_free_aux_buf(dev, &ctx->parabuf); +err_dma_alloc: + v4l2_ctrl_handler_free(&ctx->ctrls); +err_ctrls_setup: + v4l2_m2m_ctx_release(ctx->m2m_ctx); +err_ctx_init: + clk_disable_unprepare(dev->clk_ahb); +err_clk_ahb: + clk_disable_unprepare(dev->clk_per); +err_clk_per: v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); + clear_bit(ctx->idx, &dev->instance_mask); +err_coda_max: kfree(ctx); return ret; } @@ -1582,16 +2490,37 @@ static int coda_release(struct file *file) v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Releasing instance %p\n", ctx); + /* If this instance is running, call .job_abort and wait for it to end */ + v4l2_m2m_ctx_release(ctx->m2m_ctx); + + /* In case the instance was not running, we still need to call SEQ_END */ + mutex_lock(&dev->coda_mutex); + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "%s: sent command 'SEQ_END' to coda\n", __func__); + if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { + v4l2_err(&dev->v4l2_dev, + "CODA_COMMAND_SEQ_END failed\n"); + mutex_unlock(&dev->coda_mutex); + return -ETIMEDOUT; + } + mutex_unlock(&dev->coda_mutex); + + coda_free_framebuffers(ctx); + coda_lock(ctx); list_del(&ctx->list); coda_unlock(ctx); - dma_free_coherent(&dev->plat_dev->dev, CODA_PARA_BUF_SIZE, - ctx->parabuf.vaddr, ctx->parabuf.paddr); - v4l2_m2m_ctx_release(ctx->m2m_ctx); + dma_free_writecombine(&dev->plat_dev->dev, ctx->bitstream.size, + ctx->bitstream.vaddr, ctx->bitstream.paddr); + coda_free_context_buffers(ctx); + if (ctx->dev->devtype->product == CODA_DX6) + coda_free_aux_buf(dev, &ctx->workbuf); + + coda_free_aux_buf(dev, &ctx->parabuf); v4l2_ctrl_handler_free(&ctx->ctrls); - clk_disable_unprepare(dev->clk_per); clk_disable_unprepare(dev->clk_ahb); + clk_disable_unprepare(dev->clk_per); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); clear_bit(ctx->idx, &dev->instance_mask); @@ -1628,55 +2557,180 @@ static const struct v4l2_file_operations coda_fops = { .mmap = coda_mmap, }; -static irqreturn_t coda_irq_handler(int irq, void *data) +static void coda_finish_decode(struct coda_ctx *ctx) { - struct vb2_buffer *src_buf, *dst_buf; - struct coda_dev *dev = data; - u32 wr_ptr, start_ptr; - struct coda_ctx *ctx; + struct coda_dev *dev = ctx->dev; + struct coda_q_data *q_data_src; + struct coda_q_data *q_data_dst; + struct vb2_buffer *dst_buf; + int width, height; + int decoded_idx; + int display_idx; + u32 src_fourcc; + int success; + u32 val; - cancel_delayed_work(&dev->timeout); + dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); - /* read status register to attend the IRQ */ - coda_read(dev, CODA_REG_BIT_INT_STATUS); - coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, - CODA_REG_BIT_INT_CLEAR); + /* Update kfifo out pointer from coda bitstream read pointer */ + coda_kfifo_sync_from_device(ctx); - ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); - if (ctx == NULL) { - v4l2_err(&dev->v4l2_dev, "Instance released before the end of transaction\n"); - mutex_unlock(&dev->coda_mutex); - return IRQ_HANDLED; + /* + * in stream-end mode, the read pointer can overshoot the write pointer + * by up to 512 bytes + */ + if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) { + if (coda_get_bitstream_payload(ctx) >= 0x100000 - 512) + kfifo_init(&ctx->bitstream_fifo, + ctx->bitstream.vaddr, ctx->bitstream.size); } - if (ctx->aborting) { - v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, - "task has been aborted\n"); - mutex_unlock(&dev->coda_mutex); - return IRQ_HANDLED; + q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); + src_fourcc = q_data_src->fourcc; + + val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS); + if (val != 1) + pr_err("DEC_PIC_SUCCESS = %d\n", val); + + success = val & 0x1; + if (!success) + v4l2_err(&dev->v4l2_dev, "decode failed\n"); + + if (src_fourcc == V4L2_PIX_FMT_H264) { + if (val & (1 << 3)) + v4l2_err(&dev->v4l2_dev, + "insufficient PS buffer space (%d bytes)\n", + ctx->psbuf.size); + if (val & (1 << 2)) + v4l2_err(&dev->v4l2_dev, + "insufficient slice buffer space (%d bytes)\n", + ctx->slicebuf.size); } - if (coda_isbusy(ctx->dev)) { - v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, - "coda is still busy!!!!\n"); - return IRQ_NONE; + val = coda_read(dev, CODA_RET_DEC_PIC_SIZE); + width = (val >> 16) & 0xffff; + height = val & 0xffff; + + q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + val = coda_read(dev, CODA_RET_DEC_PIC_TYPE); + if ((val & 0x7) == 0) { + dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME; + dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME; + } else { + dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME; + dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME; + } + + val = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB); + if (val > 0) + v4l2_err(&dev->v4l2_dev, + "errors in %d macroblocks\n", val); + + if (dev->devtype->product == CODA_7541) { + val = coda_read(dev, CODA_RET_DEC_PIC_OPTION); + if (val == 0) { + /* not enough bitstream data */ + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "prescan failed: %d\n", val); + ctx->prescan_failed = true; + return; + } + } + + ctx->frm_dis_flg = coda_read(dev, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); + + /* + * The previous display frame was copied out by the rotator, + * now it can be overwritten again + */ + if (ctx->display_idx >= 0 && + ctx->display_idx < ctx->num_internal_frames) { + ctx->frm_dis_flg &= ~(1 << ctx->display_idx); + coda_write(dev, ctx->frm_dis_flg, + CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); + } + + /* + * The index of the last decoded frame, not necessarily in + * display order, and the index of the next display frame. + * The latter could have been decoded in a previous run. + */ + decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX); + display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX); + + if (decoded_idx == -1) { + /* no frame was decoded, but we might have a display frame */ + if (display_idx < 0 && ctx->display_idx < 0) + ctx->prescan_failed = true; + } else if (decoded_idx == -2) { + /* no frame was decoded, we still return the remaining buffers */ + } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) { + v4l2_err(&dev->v4l2_dev, + "decoded frame index out of range: %d\n", decoded_idx); } + if (display_idx == -1) { + /* + * no more frames to be decoded, but there could still + * be rotator output to dequeue + */ + ctx->prescan_failed = true; + } else if (display_idx == -3) { + /* possibly prescan failure */ + } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) { + v4l2_err(&dev->v4l2_dev, + "presentation frame index out of range: %d\n", + display_idx); + } + + /* If a frame was copied out, return it */ + if (ctx->display_idx >= 0 && + ctx->display_idx < ctx->num_internal_frames) { + dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); + dst_buf->v4l2_buf.sequence = ctx->osequence++; + + vb2_set_plane_payload(dst_buf, 0, width * height * 3 / 2); + + v4l2_m2m_buf_done(dst_buf, success ? VB2_BUF_STATE_DONE : + VB2_BUF_STATE_ERROR); + + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "job finished: decoding frame (%d) (%s)\n", + dst_buf->v4l2_buf.sequence, + (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ? + "KEYFRAME" : "PFRAME"); + } else { + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "job finished: no frame decoded\n"); + } + + /* The rotator will copy the current display frame next time */ + ctx->display_idx = display_idx; +} + +static void coda_finish_encode(struct coda_ctx *ctx) +{ + struct vb2_buffer *src_buf, *dst_buf; + struct coda_dev *dev = ctx->dev; + u32 wr_ptr, start_ptr; + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); /* Get results from the coda */ coda_read(dev, CODA_RET_ENC_PIC_TYPE); start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START); - wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx)); + wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); + /* Calculate bytesused field */ if (dst_buf->v4l2_buf.sequence == 0) { - dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr) + - ctx->vpu_header_size[0] + - ctx->vpu_header_size[1] + - ctx->vpu_header_size[2]; + vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr + + ctx->vpu_header_size[0] + + ctx->vpu_header_size[1] + + ctx->vpu_header_size[2]); } else { - dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr); + vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr); } v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n", @@ -1708,8 +2762,62 @@ static irqreturn_t coda_irq_handler(int irq, void *data) dst_buf->v4l2_buf.sequence, (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ? "KEYFRAME" : "PFRAME"); +} + +static irqreturn_t coda_irq_handler(int irq, void *data) +{ + struct coda_dev *dev = data; + struct coda_ctx *ctx; + + cancel_delayed_work(&dev->timeout); + + /* read status register to attend the IRQ */ + coda_read(dev, CODA_REG_BIT_INT_STATUS); + coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, + CODA_REG_BIT_INT_CLEAR); + + ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); + if (ctx == NULL) { + v4l2_err(&dev->v4l2_dev, "Instance released before the end of transaction\n"); + mutex_unlock(&dev->coda_mutex); + return IRQ_HANDLED; + } + + if (ctx->aborting) { + v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + "task has been aborted\n"); + goto out; + } + + if (coda_isbusy(ctx->dev)) { + v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, + "coda is still busy!!!!\n"); + return IRQ_NONE; + } + + if (ctx->inst_type == CODA_INST_DECODER) + coda_finish_decode(ctx); + else + coda_finish_encode(ctx); + +out: + if (ctx->aborting || (!ctx->streamon_cap && !ctx->streamon_out)) { + v4l2_dbg(1, coda_debug, &dev->v4l2_dev, + "%s: sent command 'SEQ_END' to coda\n", __func__); + if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { + v4l2_err(&dev->v4l2_dev, + "CODA_COMMAND_SEQ_END failed\n"); + } + + kfifo_init(&ctx->bitstream_fifo, + ctx->bitstream.vaddr, ctx->bitstream.size); + + coda_free_framebuffers(ctx); + coda_free_context_buffers(ctx); + } mutex_unlock(&dev->coda_mutex); + mutex_unlock(&ctx->buffer_mutex); v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx); @@ -1726,6 +2834,8 @@ static void coda_timeout(struct work_struct *work) mutex_lock(&dev->dev_mutex); list_for_each_entry(ctx, &dev->instances, list) { + if (mutex_is_locked(&ctx->buffer_mutex)) + mutex_unlock(&ctx->buffer_mutex); v4l2_m2m_streamoff(NULL, ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); v4l2_m2m_streamoff(NULL, ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); } @@ -1738,7 +2848,7 @@ static void coda_timeout(struct work_struct *work) static u32 coda_supported_firmwares[] = { CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5), - CODA_FIRMWARE_VERNUM(CODA_7541, 13, 4, 29), + CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50), }; static bool coda_firmware_supported(u32 vernum) @@ -1771,10 +2881,15 @@ static int coda_hw_init(struct coda_dev *dev) u16 product, major, minor, release; u32 data; u16 *p; - int i; + int i, ret; - clk_prepare_enable(dev->clk_per); - clk_prepare_enable(dev->clk_ahb); + ret = clk_prepare_enable(dev->clk_per); + if (ret) + return ret; + + ret = clk_prepare_enable(dev->clk_ahb); + if (ret) + goto err_clk_ahb; /* * Copy the first CODA_ISRAM_SIZE in the internal SRAM. @@ -1803,8 +2918,14 @@ static int coda_hw_init(struct coda_dev *dev) coda_write(dev, 0, CODA_REG_BIT_CODE_BUF_ADDR + i * 4); /* Tell the BIT where to find everything it needs */ - coda_write(dev, dev->workbuf.paddr, - CODA_REG_BIT_WORK_BUF_ADDR); + if (dev->devtype->product == CODA_7541) { + coda_write(dev, dev->tempbuf.paddr, + CODA_REG_BIT_TEMP_BUF_ADDR); + coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM); + } else { + coda_write(dev, dev->workbuf.paddr, + CODA_REG_BIT_WORK_BUF_ADDR); + } coda_write(dev, dev->codebuf.paddr, CODA_REG_BIT_CODE_BUF_ADDR); coda_write(dev, 0, CODA_REG_BIT_CODE_RUN); @@ -1877,6 +2998,10 @@ static int coda_hw_init(struct coda_dev *dev) } return 0; + +err_clk_ahb: + clk_disable_unprepare(dev->clk_per); + return ret; } static void coda_fw_callback(const struct firmware *fw, void *context) @@ -1891,11 +3016,8 @@ static void coda_fw_callback(const struct firmware *fw, void *context) } /* allocate auxiliary per-device code buffer for the BIT processor */ - dev->codebuf.size = fw->size; - dev->codebuf.vaddr = dma_alloc_coherent(&pdev->dev, fw->size, - &dev->codebuf.paddr, - GFP_KERNEL); - if (!dev->codebuf.vaddr) { + ret = coda_alloc_aux_buf(dev, &dev->codebuf, fw->size); + if (ret < 0) { dev_err(&pdev->dev, "failed to allocate code buffer\n"); return; } @@ -1987,7 +3109,7 @@ MODULE_DEVICE_TABLE(platform, coda_platform_ids); #ifdef CONFIG_OF static const struct of_device_id coda_dt_ids[] = { - { .compatible = "fsl,imx27-vpu", .data = &coda_platform_ids[CODA_IMX27] }, + { .compatible = "fsl,imx27-vpu", .data = &coda_devdata[CODA_IMX27] }, { .compatible = "fsl,imx53-vpu", .data = &coda_devdata[CODA_IMX53] }, { /* sentinel */ } }; @@ -2032,11 +3154,6 @@ static int coda_probe(struct platform_device *pdev) /* Get memory for physical registers */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "failed to get memory region resource\n"); - return -ENOENT; - } - dev->regs_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(dev->regs_base)) return PTR_ERR(dev->regs_base); @@ -2048,8 +3165,8 @@ static int coda_probe(struct platform_device *pdev) return -ENOENT; } - if (devm_request_irq(&pdev->dev, irq, coda_irq_handler, - 0, CODA_NAME, dev) < 0) { + if (devm_request_threaded_irq(&pdev->dev, irq, NULL, coda_irq_handler, + IRQF_ONESHOT, CODA_NAME, dev) < 0) { dev_err(&pdev->dev, "failed to request irq\n"); return -ENOENT; } @@ -2085,24 +3202,36 @@ static int coda_probe(struct platform_device *pdev) /* allocate auxiliary per-device buffers for the BIT processor */ switch (dev->devtype->product) { case CODA_DX6: - dev->workbuf.size = CODADX6_WORK_BUF_SIZE; + ret = coda_alloc_aux_buf(dev, &dev->workbuf, + CODADX6_WORK_BUF_SIZE); + if (ret < 0) { + dev_err(&pdev->dev, "failed to allocate work buffer\n"); + v4l2_device_unregister(&dev->v4l2_dev); + return ret; + } + break; + case CODA_7541: + dev->tempbuf.size = CODA7_TEMP_BUF_SIZE; break; - default: - dev->workbuf.size = CODA7_WORK_BUF_SIZE; } - dev->workbuf.vaddr = dma_alloc_coherent(&pdev->dev, dev->workbuf.size, - &dev->workbuf.paddr, - GFP_KERNEL); - if (!dev->workbuf.vaddr) { - dev_err(&pdev->dev, "failed to allocate work buffer\n"); - v4l2_device_unregister(&dev->v4l2_dev); - return -ENOMEM; + if (dev->tempbuf.size) { + ret = coda_alloc_aux_buf(dev, &dev->tempbuf, + dev->tempbuf.size); + if (ret < 0) { + dev_err(&pdev->dev, "failed to allocate temp buffer\n"); + v4l2_device_unregister(&dev->v4l2_dev); + return ret; + } } - if (dev->devtype->product == CODA_DX6) + switch (dev->devtype->product) { + case CODA_DX6: dev->iram_size = CODADX6_IRAM_SIZE; - else + break; + case CODA_7541: dev->iram_size = CODA7_IRAM_SIZE; + break; + } dev->iram_vaddr = gen_pool_alloc(dev->iram_pool, dev->iram_size); if (!dev->iram_vaddr) { dev_err(&pdev->dev, "unable to alloc iram\n"); @@ -2128,12 +3257,9 @@ static int coda_remove(struct platform_device *pdev) v4l2_device_unregister(&dev->v4l2_dev); if (dev->iram_vaddr) gen_pool_free(dev->iram_pool, dev->iram_vaddr, dev->iram_size); - if (dev->codebuf.vaddr) - dma_free_coherent(&pdev->dev, dev->codebuf.size, - &dev->codebuf.vaddr, dev->codebuf.paddr); - if (dev->workbuf.vaddr) - dma_free_coherent(&pdev->dev, dev->workbuf.size, &dev->workbuf.vaddr, - dev->workbuf.paddr); + coda_free_aux_buf(dev, &dev->codebuf); + coda_free_aux_buf(dev, &dev->tempbuf); + coda_free_aux_buf(dev, &dev->workbuf); return 0; } |