From 962ec95e3811915535bea62e10372c6b35b68bb0 Mon Sep 17 00:00:00 2001 From: Detlev Casanova <detlev.casanova@collabora.com> Date: Thu, 27 Jun 2024 12:39:28 -0400 Subject: [PATCH] media: rkvdec2: Add iommu support This allows using all available memory. The IOMMU is part of the decoder core but can be driven by the rockchip iommu driver as a separate device. The only issue with this is that the IOMMU hardware is reset when the decoder is reset because of an error. In this case, the IOMMU hardware looses all mappings but the mapped buffer still retain their mapped address, cached in the IOMMU and decoder drivers. To avoid that, when a decoding error occurs, the iommu_flush_iotlb_all callback is used on the IOMMU to reconfigure the HW before continuing. Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com> --- arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 22 +++++ .../platform/rockchip/rkvdec2/rkvdec2-h264.c | 10 ++- .../platform/rockchip/rkvdec2/rkvdec2-regs.h | 1 + .../media/platform/rockchip/rkvdec2/rkvdec2.c | 90 +++++++++++++++---- .../media/platform/rockchip/rkvdec2/rkvdec2.h | 2 + 5 files changed, 105 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 5c5a186f59a09..8075b0bcf1d97 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -1253,9 +1253,20 @@ vdec0: video-decoder@fdc38000 { <&cru SRST_RKVDEC0_CORE>, <&cru SRST_RKVDEC0_HEVC_CA>; reset-names = "axi", "ahb", "cabac", "core", "hevc_cabac"; power-domains = <&power RK3588_PD_RKVDEC0>; + iommus = <&vdec0_mmu>; sram = <&vdec0_sram>; }; + vdec0_mmu: iommu@fdc38700 { + compatible = "rockchip,rk3588-iommu", "rockchip,rk3568-iommu"; + reg = <0x0 0xfdc38700 0x0 0x40>, <0x0 0xfdc38740 0x0 0x40>; + interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH 0>; + clocks = <&cru ACLK_RKVDEC0>, <&cru HCLK_RKVDEC0>; + clock-names = "aclk", "iface"; + #iommu-cells = <0>; + power-domains = <&power RK3588_PD_RKVDEC0>; + }; + vdec1: video-decoder@fdc40000 { compatible = "rockchip,rk3588-vdec"; reg = <0x0 0xfdc40000 0x0 0x100>, <0x0 0xfdc40100 0x0 0x500>, <0x0 0xfdc40600 0x0 0x100>; @@ -1272,9 +1283,20 @@ vdec1: video-decoder@fdc40000 { <&cru SRST_RKVDEC1_CORE>, <&cru SRST_RKVDEC1_HEVC_CA>; reset-names = "axi", "ahb", "cabac", "core", "hevc_cabac"; power-domains = <&power RK3588_PD_RKVDEC1>; + iommus = <&vdec1_mmu>; sram = <&vdec1_sram>; }; + vdec1_mmu: iommu@fdc40700 { + compatible = "rockchip,rk3588-iommu", "rockchip,rk3568-iommu"; + reg = <0x0 0xfdc40700 0x0 0x40>, <0x0 0xfdc40740 0x0 0x40>; + interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH 0>; + clocks = <&cru ACLK_RKVDEC0>, <&cru HCLK_RKVDEC0>; + clock-names = "aclk", "iface"; + #iommu-cells = <0>; + power-domains = <&power RK3588_PD_RKVDEC0>; + }; + av1d: video-codec@fdc70000 { compatible = "rockchip,rk3588-av1-vpu"; reg = <0x0 0xfdc70000 0x0 0x800>; diff --git a/drivers/media/platform/rockchip/rkvdec2/rkvdec2-h264.c b/drivers/media/platform/rockchip/rkvdec2/rkvdec2-h264.c index 2e991c85d90eb..9f022a16e03ed 100644 --- a/drivers/media/platform/rockchip/rkvdec2/rkvdec2-h264.c +++ b/drivers/media/platform/rockchip/rkvdec2/rkvdec2-h264.c @@ -718,8 +718,14 @@ static int rkvdec2_h264_run(struct rkvdec2_ctx *ctx) /* Set watchdog at 2 times the hardware timeout threshold */ u64 timeout_threshold = h264_ctx->regs.common.timeout_threshold; - watchdog_time = 2 * (1000 * timeout_threshold) / clk_get_rate(rkvdec->clocks[0].clk); - schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(watchdog_time)); + unsigned long axi_rate = clk_get_rate(rkvdec->clocks[0].clk); + + if (axi_rate) + watchdog_time = 2 * (1000 * timeout_threshold) / axi_rate; + else + watchdog_time = 2000; + schedule_delayed_work(&rkvdec->watchdog_work, + msecs_to_jiffies(watchdog_time)); /* Start decoding! */ writel(RKVDEC2_REG_DEC_E_BIT, rkvdec->regs + RKVDEC2_REG_DEC_E); diff --git a/drivers/media/platform/rockchip/rkvdec2/rkvdec2-regs.h b/drivers/media/platform/rockchip/rkvdec2/rkvdec2-regs.h index 2a432fc0bf527..7e4a54b5fe948 100644 --- a/drivers/media/platform/rockchip/rkvdec2/rkvdec2-regs.h +++ b/drivers/media/platform/rockchip/rkvdec2/rkvdec2-regs.h @@ -38,6 +38,7 @@ #define RKVDEC2_REG_STA_INT 0x380 #define STA_INT_DEC_RDY_STA BIT(2) +#define STA_INT_SOFTRESET_RDY BIT(9) /* base: OFFSET_COMMON_REGS */ struct rkvdec2_regs_common { diff --git a/drivers/media/platform/rockchip/rkvdec2/rkvdec2.c b/drivers/media/platform/rockchip/rkvdec2/rkvdec2.c index 852e88fc0ea17..85d91e6253b09 100644 --- a/drivers/media/platform/rockchip/rkvdec2/rkvdec2.c +++ b/drivers/media/platform/rockchip/rkvdec2/rkvdec2.c @@ -14,6 +14,7 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/pm_runtime.h> @@ -556,20 +557,23 @@ static void rkvdec2_free_rcb(struct rkvdec2_ctx *ctx) u32 width, height; int i; + struct rkvdec2_dev *rkvdec = ctx->dev; + width = ctx->decoded_fmt.fmt.pix_mp.width; height = ctx->decoded_fmt.fmt.pix_mp.height; for (i = 0; i < RKVDEC2_RCB_COUNT; i++) { - size_t rcb_size = RCB_SIZE(i, width, height); + size_t rcb_size = ctx->rcb_bufs[i].size; if (!ctx->rcb_bufs[i].cpu) continue; switch (ctx->rcb_bufs[i].type) { case RKVDEC2_ALLOC_SRAM: - gen_pool_free(ctx->dev->sram_pool, - (unsigned long)ctx->rcb_bufs[i].cpu, - rcb_size); + unsigned long virt_addr = (unsigned long)ctx->rcb_bufs[i].cpu; + + iommu_unmap(rkvdec->iommu_domain, virt_addr, rcb_size); + gen_pool_free(ctx->dev->sram_pool, virt_addr, rcb_size); break; case RKVDEC2_ALLOC_DMA: dma_free_coherent(ctx->dev->dev, @@ -585,6 +589,7 @@ static int rkvdec2_allocate_rcb(struct rkvdec2_ctx *ctx) { int ret, i; u32 width, height; + struct rkvdec2_dev *rkvdec = ctx->dev; memset(ctx->rcb_bufs, 0, sizeof(*ctx->rcb_bufs)); @@ -597,15 +602,43 @@ static int rkvdec2_allocate_rcb(struct rkvdec2_ctx *ctx) size_t rcb_size = RCB_SIZE(i, width, height); enum rkvdec2_alloc_type alloc_type = RKVDEC2_ALLOC_SRAM; + /* Try allocating an SRAM buffer */ if (ctx->dev->sram_pool) { + if (rkvdec->iommu_domain) + rcb_size = ALIGN(rcb_size, 0x1000); + cpu = gen_pool_dma_zalloc_align(ctx->dev->sram_pool, - rcb_size, - &dma, - 64); + rcb_size, + &dma, + 0x1000); } + /* If an IOMMU is used, map the SRAM address through it */ + if (cpu && rkvdec->iommu_domain) { + unsigned long virt_addr = (unsigned long)cpu; + phys_addr_t phys_addr = dma; + + ret = iommu_map(rkvdec->iommu_domain, virt_addr, phys_addr, + rcb_size, IOMMU_READ | IOMMU_WRITE, 0); + if (ret) { + gen_pool_free(ctx->dev->sram_pool, + (unsigned long)cpu, + rcb_size); + cpu = NULL; + goto ram_fallback; + } + + /* + * The registers will be configured with the virtual + * address so that it goes through the IOMMU + */ + dma = virt_addr; + } + +ram_fallback: /* Fallback to RAM */ if (!cpu) { + rcb_size = RCB_SIZE(i, width, height); cpu = dma_alloc_coherent(ctx->dev->dev, rcb_size, &dma, @@ -755,6 +788,20 @@ static void rkvdec2_job_finish(struct rkvdec2_ctx *ctx, pm_runtime_mark_last_busy(rkvdec->dev); pm_runtime_put_autosuspend(rkvdec->dev); + + /* + * The hw decoder block may reset IOMMU when there is an error, + * flush entire iotlb to reconfigure the IOMMU. + */ + if (rkvdec->need_reset) { + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(rkvdec->dev); + if (domain) + iommu_flush_iotlb_all(domain); + rkvdec->need_reset = false; + } + rkvdec2_job_finish_no_pm(ctx, result); } @@ -1050,6 +1097,9 @@ static irqreturn_t rkvdec2_irq_handler(int irq, void *priv) state = (status & STA_INT_DEC_RDY_STA) ? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + rkvdec->need_reset = state != VB2_BUF_STATE_DONE || + (status & STA_INT_SOFTRESET_RDY); + /* Clear interrupt status */ writel(0, rkvdec->regs + RKVDEC2_REG_STA_INT); if (cancel_delayed_work(&rkvdec->watchdog_work)) { @@ -1127,6 +1177,7 @@ static int rkvdec2_disable_multicore(struct rkvdec2_dev *rkvdec) static int rkvdec2_probe(struct platform_device *pdev) { struct rkvdec2_dev *rkvdec; + unsigned int dma_bit_mask = 40; unsigned int i; int ret, irq; @@ -1161,17 +1212,6 @@ static int rkvdec2_probe(struct platform_device *pdev) if (IS_ERR(rkvdec->regs)) return PTR_ERR(rkvdec->regs); - /* - * Without IOMMU support, keep DMA in the lower 32 bits. - */ - ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(&pdev->dev, "Could not set DMA coherent mask.\n"); - return ret; - } - - vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); - irq = platform_get_irq(pdev, 0); if (irq <= 0) return -ENXIO; @@ -1184,6 +1224,20 @@ static int rkvdec2_probe(struct platform_device *pdev) return ret; } + rkvdec->iommu_domain = iommu_get_domain_for_dev(&pdev->dev); + if (!rkvdec->iommu_domain) { + /* Without iommu, only the lower 32 bits of ram can be used */ + dma_bit_mask = 32; + vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(dma_bit_mask)); + dev_info(&pdev->dev, "No IOMMU domain found\n"); + } + + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(dma_bit_mask)); + if (ret) { + dev_err(&pdev->dev, "Could not set DMA coherent mask.\n"); + return ret; + } + rkvdec->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0); if (!rkvdec->sram_pool) dev_info(&pdev->dev, "No sram node, RCB will be stored in RAM\n"); diff --git a/drivers/media/platform/rockchip/rkvdec2/rkvdec2.h b/drivers/media/platform/rockchip/rkvdec2/rkvdec2.h index 8613051775e94..f46088d1d775e 100644 --- a/drivers/media/platform/rockchip/rkvdec2/rkvdec2.h +++ b/drivers/media/platform/rockchip/rkvdec2/rkvdec2.h @@ -101,6 +101,8 @@ struct rkvdec2_dev { struct gen_pool *sram_pool; struct mutex vdev_lock; /* serializes ioctls */ struct delayed_work watchdog_work; + struct iommu_domain *iommu_domain; + bool need_reset; }; struct rkvdec2_ctx { -- GitLab