diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
index 07b3e8a44baf2a1e53151442e82909d3955d19d3..ef62be2aaa71ccda36237925658bddf78b1081cd 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
@@ -328,27 +328,34 @@ static int start_decode(struct vpu_instance *inst, u32 *fail_res)
 	return ret;
 }
 
-static void flag_last_done_buffer(struct vb2_queue *q)
+/* called with state_spinlock */
+static void flag_last_buffer_done (struct vpu_instance *inst)
 {
-	struct vb2_buffer *b;
-	unsigned long flags;
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+	struct vb2_v4l2_buffer *vb;
 
-	spin_lock_irqsave(&q->done_lock, flags);
+	vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
+	if (!vb) {
+		m2m_ctx->is_draining = true;
+		m2m_ctx->next_buf_last = true;
+		return;
+	}
 
-	if (list_empty(&q->done_list)) {
-	         if (vb2_set_last_buffer_dequeued(q))
-			 wake_up(&q->done_wq);
-	} else {
-		struct vb2_v4l2_buffer *vb;
+	v4l2_m2m_last_buffer_done(m2m_ctx, vb);
+}
 
-		b = list_last_entry(&q->done_list, struct vb2_buffer, done_entry);
-		vb = to_vb2_v4l2_buffer(b);
-		vb->flags |= V4L2_BUF_FLAG_LAST;
-	}
+/* called with state_spinlock */
+static void send_eos_event(struct vpu_instance *inst)
+{
+	static const struct v4l2_event vpu_event_eos = {
+		.type = V4L2_EVENT_EOS
+	};
 
-	spin_unlock_irqrestore(&q->done_lock, flags);
+	v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
+	inst->eos = false;
 }
 
+/* called with state_spinlock */
 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
 {
 	struct v4l2_fh *fh = &inst->v4l2_fh;
@@ -491,21 +498,21 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
 	}
 
 	if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
-	     dec_info.sequence_changed) && !v4l2_m2m_has_stopped(m2m_ctx)) {
-		static const struct v4l2_event vpu_event_eos = {
-			.type = V4L2_EVENT_EOS
-		};
+	     dec_info.sequence_changed)) {
+		unsigned long flags;
+		spin_lock_irqsave(&inst->state_spinlock, flags);
+		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
+			switch_state(inst, VPU_INST_STATE_STOP);
 
-		switch_state(inst, VPU_INST_STATE_STOP);
-		flag_last_done_buffer(dst_vq);
-		v4l2_m2m_mark_stopped(m2m_ctx);
+			if (dec_info.sequence_changed) {
+				handle_dynamic_resolution_change(inst);
+			} else {
+				send_eos_event(inst);
+			}
 
-		if (dec_info.sequence_changed) {
-			handle_dynamic_resolution_change(inst);
-		} else {
-			v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
-			inst->eos = false;
+			flag_last_buffer_done(inst);
 		}
+		spin_unlock_irqrestore(&inst->state_spinlock, flags);
 	}
 
 	/*
@@ -843,10 +850,73 @@ static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_se
 	return 0;
 }
 
+/* called with state_spinlock */
+static int wave5_vpu_dec_stop_unlocked(struct vpu_instance *inst)
+{
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+
+	if (m2m_ctx->is_draining)
+		return -EBUSY;
+
+	if (inst->state != VPU_INST_STATE_NONE) {
+		int ret;
+		ret = wave5_vpu_dec_set_eos_on_firmware(inst);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * used to remember this state passed streamoff/on transition on
+	 * capture queue
+	 */
+	inst->eos = true;
+
+	if (m2m_ctx->has_stopped)
+		return 0;
+
+	m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
+	m2m_ctx->is_draining = true;
+
+	/*
+	 * deferred to device run in case it wasn't in the ring buffer
+	 * yet. In other case, we have to send the EOS signal to the
+	 * firmware so that any pending PIC_RUN returns without new
+	 * bitstream buffer.
+	 */
+	if (m2m_ctx->last_src_buf)
+		return 0;
+
+	if (inst->state == VPU_INST_STATE_NONE) {
+		send_eos_event(inst);
+		flag_last_buffer_done(inst);
+	}
+
+	return 0;
+}
+
+/* called with state_spinlock */
+static int wave5_vpu_dec_start_unlocked(struct vpu_instance *inst)
+{
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
+
+	if (m2m_ctx->is_draining)
+		return -EBUSY;
+
+	if (m2m_ctx->has_stopped)
+		m2m_ctx->has_stopped = false;
+
+	vb2_clear_last_buffer_dequeued(dst_vq);
+	inst->eos = false;
+
+	return 0;
+}
+
 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
 {
 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+	unsigned long flags;
 	int ret;
 
 	dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
@@ -857,54 +927,23 @@ static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_de
 
 	switch (dc->cmd) {
 	case V4L2_DEC_CMD_STOP:
-		if (m2m_ctx->is_draining)
-			return -EBUSY;
-
-
-		if (inst->state != VPU_INST_STATE_NONE) {
-			ret = wave5_vpu_dec_set_eos_on_firmware(inst);
-			if (ret)
-				return ret;
-		}
-
-		/*
-		 * used to remember this state passed streamoff/on transition on
-		 * capture queue
-		 */
-		inst->eos = true;
-
-		if (m2m_ctx->has_stopped)
-			return 0;
-
-		m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
-		m2m_ctx->is_draining = true;
-
-		/*
-		 * deferred to device run in case it wasn't in the ring buffer
-		 * yet. In other case, we have to send the EOS signal to the
-		 * firmware so that any pending PIC_RUN returns without new
-		 * bitstream buffer.
-		 */
-		if (m2m_ctx->last_src_buf)
-			return 0;
+		spin_lock_irqsave(&inst->state_spinlock, flags);
+		ret = wave5_vpu_dec_stop_unlocked(inst);
+		spin_unlock_irqrestore(&inst->state_spinlock, flags);
 
 		/* Just in case we don't have anything to decode anymore */
-		v4l2_m2m_try_schedule (m2m_ctx);
+		v4l2_m2m_try_schedule(m2m_ctx);
 		break;
 	case V4L2_DEC_CMD_START:
-		/* FIXME this could call v4l2_m2m_decoder_cmd */
-		if (m2m_ctx->is_draining)
-			return -EBUSY;
-
-		m2m_ctx->has_stopped = false;
-		vb2_clear_last_buffer_dequeued(v4l2_m2m_get_dst_vq(m2m_ctx));
-		inst->eos = false;
+		spin_lock_irqsave(&inst->state_spinlock, flags);
+		ret = wave5_vpu_dec_start_unlocked(inst);
+		spin_unlock_irqrestore(&inst->state_spinlock, flags);
 		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 
 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
@@ -1301,17 +1340,21 @@ static int fill_ringbuffer(struct vpu_instance *inst)
 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
 {
 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
 
 	vpu_buf->consumed = false;
 	vbuf->sequence = inst->queued_src_buf_num++;
+
+	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
 }
 
 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
 {
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
 
 	vbuf->sequence = inst->queued_dst_buf_num++;
 
@@ -1327,28 +1370,35 @@ static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
 		ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
 		if (ret) {
 			dev_dbg(inst->dev->dev,
-					"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
-					__func__, vb->index, ret);
+				"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
+				__func__, vb->index, ret);
 		}
 	}
+
+	if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
+		unsigned int i;
+
+		for (i = 0; i < vb->num_planes; i++)
+			vb2_set_plane_payload(vb, i, 0);
+
+		vbuf->field = V4L2_FIELD_NONE;
+
+		send_eos_event(inst);
+		v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
+	} else {
+		v4l2_m2m_buf_queue(m2m_ctx, vbuf);
+	}
 }
 
 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
 {
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
-	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
 
 	dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
 		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
 		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
 
-	/*
-	 * Do this first so we won't race with the IRQ that needs the buffer in
-	 * the rdy_list.
-	 */
-	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
-
 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
 		wave5_vpu_dec_buf_queue_src(vb);
 	else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
@@ -1469,8 +1519,10 @@ static int streamoff_output(struct vb2_queue *q)
 	inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
 	inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
 
+	if (v4l2_m2m_has_stopped(m2m_ctx))
+		send_eos_event(inst);
+
 	/* streamoff on output cancels any draining operation */
-	v4l2_m2m_clear_state(m2m_ctx);
 	inst->eos = false;
 
 	return ret;
@@ -1509,7 +1561,6 @@ static int streamoff_capture(struct vb2_queue *q)
 	}
 
 	if (v4l2_m2m_has_stopped(m2m_ctx)) {
-		v4l2_m2m_clear_state(m2m_ctx);
 		ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
 		if (ret)
 			return ret;
@@ -1521,6 +1572,7 @@ static int streamoff_capture(struct vb2_queue *q)
 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
 {
 	struct vpu_instance *inst = vb2_get_drv_priv(q);
+	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
 	bool check_cmd = TRUE;
 
 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
@@ -1541,6 +1593,8 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
 			dev_dbg(inst->dev->dev, "Getting decoding results from fw, fail\n");
 	}
 
+	v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
+
 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
 		streamoff_output(q);
 	else
@@ -1613,6 +1667,7 @@ static int initialize_sequence(struct vpu_instance *inst)
 	return ret;
 }
 
+/* called with state_spinlock */
 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
 {
 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
@@ -1643,22 +1698,21 @@ static void wave5_vpu_dec_device_run(void *priv)
 	case VPU_INST_STATE_OPEN:
 		ret = initialize_sequence(inst);
 		if (ret) {
+			unsigned long flags;
+			spin_lock_irqsave(&inst->state_spinlock, flags);
 			if (wave5_is_draining_or_eos(inst) && wave5_last_src_buffer_consumed (m2m_ctx)) {
 				struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
+				switch_state(inst, VPU_INST_STATE_STOP);
 
 				if (vb2_is_streaming(dst_vq)) {
-					static const struct v4l2_event vpu_event_eos = {
-						.type = V4L2_EVENT_EOS
-					};
-					v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
-					inst->eos = false;
+					send_eos_event(inst);
 				} else {
 					handle_dynamic_resolution_change(inst);
 				}
 
-				flag_last_done_buffer(dst_vq);
-				v4l2_m2m_mark_stopped(m2m_ctx);
+				flag_last_buffer_done(inst);
 			}
+			spin_unlock_irqrestore(&inst->state_spinlock, flags);
 		} else {
 			switch_state(inst, VPU_INST_STATE_INIT_SEQ);
 		}
@@ -1737,38 +1791,49 @@ static int wave5_vpu_dec_job_ready(void *priv)
 {
 	struct vpu_instance *inst = priv;
 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&inst->state_spinlock, flags);
 
 	switch (inst->state) {
 	case VPU_INST_STATE_NONE:
 		dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
-		return false;
+		break;
 	case VPU_INST_STATE_OPEN:
 		if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
-		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0)
-			return true;
+		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
+			ret = true;
+			break;
+		}
+
 		dev_dbg(inst->dev->dev,
 			"Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
-		return false;
+		break;
 	case VPU_INST_STATE_INIT_SEQ:
 	case VPU_INST_STATE_PIC_RUN:
 		if (!m2m_ctx->cap_q_ctx.q.streaming) {
 			dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
-			return false;
+			break;
 		} else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
 			dev_dbg(inst->dev->dev,
 				"No capture buffer ready to decode!\n");
-			return false;
+			break;
 		} else if (!wave5_is_draining_or_eos(inst) && !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
 			dev_dbg(inst->dev->dev,
-				"No bitstream data to decode! %i\n", inst->eos);
-			return false;
+				"No bitstream data to decode!\n");
+			break;
 		}
-		return true;
+		ret = true;
+		break;
 	case VPU_INST_STATE_STOP:
-		/* TODO */
+		dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
 		break;
 	}
-	return false;
+
+	spin_unlock_irqrestore(&inst->state_spinlock, flags);
+
+	return ret;
 }
 
 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
@@ -1793,6 +1858,9 @@ static int wave5_vpu_open_dec(struct file *filp)
 	inst->type = VPU_INST_TYPE_DEC;
 	inst->ops = &wave5_vpu_dec_inst_ops;
 
+	inst->state = VPU_INST_STATE_NONE;
+	spin_lock_init(&inst->state_spinlock);
+
 	inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
 	if (!inst->codec_info)
 		return -ENOMEM;
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
index fa7ed52677458ebbb1627fdaa328963bf440c812..dbe36d0d139bf51dd5f9f1803e308315ca63af71 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
@@ -1581,6 +1581,8 @@ static int wave5_vpu_open_enc(struct file *filp)
 	inst->type = VPU_INST_TYPE_ENC;
 	inst->ops = &wave5_vpu_enc_inst_ops;
 
+	spin_lock_init(&inst->state_spinlock);
+
 	inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
 	if (!inst->codec_info)
 		return -ENOMEM;
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
index 1d0d81994571530df2b68e0f6b847d039f07e0c9..f493bc17c25c4902c53b26547a5d46934247114d 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
@@ -294,6 +294,8 @@ static int wave5_vpu_remove(struct platform_device *pdev)
 {
 	struct vpu_device *dev = dev_get_drvdata(&pdev->dev);
 
+	mutex_destroy(&dev->dev_lock);
+	mutex_destroy(&dev->hw_lock);
 	clk_bulk_disable_unprepare(dev->num_clks, dev->clks);
 	wave5_vpu_enc_unregister_device(dev);
 	wave5_vpu_dec_unregister_device(dev);
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
index bc0b3636170b13d4ac587d46c7cad41b1b131580..a3b26af9e79b3db97606003ed2088f9d158136ad 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
@@ -1062,6 +1062,7 @@ struct vpu_instance {
 	enum vpu_instance_state state;
 	enum vpu_instance_type type;
 	const struct vpu_instance_ops *ops;
+	spinlock_t state_spinlock;
 
 	enum wave_std std;
 	s32 id;