Commit 1ada9010 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'char-misc-5.8-rc7' of...

Merge tag 'char-misc-5.8-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc into master

Pull char/misc driver fixes from Greg KH:
 "Here are a few small driver fixes for 5.8-rc7

  They include:

   - habanalabs fixes

   - tiny fpga driver fixes

   - /dev/mem fixup from previous changes

   - interconnect driver fixes

   - binder fix

  All of these have been in linux-next for a while with no reported
  issues"

* tag 'char-misc-5.8-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc:
  interconnect: msm8916: Fix buswidth of pcnoc_s nodes
  interconnect: Do not skip aggregation for disabled paths
  /dev/mem: Add missing memory barriers for devmem_inode
  binder: Don't use mmput() from shrinker function.
  habanalabs: prevent possible out-of-bounds array access
  fpga: dfl: fix bug in port reset handshake
  fpga: dfl: pci: reduce the scope of variable 'ret'
  habanalabs: set 4s timeout for message to device CPU
  habanalabs: set clock gating per engine
  habanalabs: block WREG_BULK packet on PDMA
parents 7f2e231c 92d232d1
......@@ -16,7 +16,16 @@ Description: Allow the root user to disable/enable in runtime the clock
gating mechanism in Gaudi. Due to how Gaudi is built, the
clock gating needs to be disabled in order to access the
registers of the TPC and MME engines. This is sometimes needed
during debug by the user and hence the user needs this option
during debug by the user and hence the user needs this option.
The user can supply a bitmask value, each bit represents
a different engine to disable/enable its clock gating feature.
The bitmask is composed of 20 bits:
0 - 7 : DMA channels
8 - 11 : MME engines
12 - 19 : TPC engines
The bit's location of a specific engine can be determined
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
are defined in uapi habanalabs.h file in enum gaudi_engine_id
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
Date: Jan 2019
......
......@@ -947,7 +947,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
trace_binder_unmap_user_end(alloc, index);
}
mmap_read_unlock(mm);
mmput(mm);
mmput_async(mm);
trace_binder_unmap_kernel_start(alloc, index);
......
......@@ -814,7 +814,8 @@ static struct inode *devmem_inode;
#ifdef CONFIG_IO_STRICT_DEVMEM
void revoke_devmem(struct resource *res)
{
struct inode *inode = READ_ONCE(devmem_inode);
/* pairs with smp_store_release() in devmem_init_inode() */
struct inode *inode = smp_load_acquire(&devmem_inode);
/*
* Check that the initialization has completed. Losing the race
......@@ -1028,8 +1029,11 @@ static int devmem_init_inode(void)
return rc;
}
/* publish /dev/mem initialized */
WRITE_ONCE(devmem_inode, inode);
/*
* Publish /dev/mem initialized.
* Pairs with smp_load_acquire() in revoke_devmem().
*/
smp_store_release(&devmem_inode, inode);
return 0;
}
......
......@@ -83,7 +83,8 @@ int __afu_port_disable(struct platform_device *pdev)
* on this port and minimum soft reset pulse width has elapsed.
* Driver polls port_soft_reset_ack to determine if reset done by HW.
*/
if (readq_poll_timeout(base + PORT_HDR_CTRL, v, v & PORT_CTRL_SFTRST,
if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
v & PORT_CTRL_SFTRST_ACK,
RST_POLL_INVL, RST_POLL_TIMEOUT)) {
dev_err(&pdev->dev, "timeout, fail to reset device\n");
return -ETIMEDOUT;
......
......@@ -227,7 +227,6 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs)
{
struct cci_drvdata *drvdata = pci_get_drvdata(pcidev);
struct dfl_fpga_cdev *cdev = drvdata->cdev;
int ret = 0;
if (!num_vfs) {
/*
......@@ -239,6 +238,8 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs)
dfl_fpga_cdev_config_ports_pf(cdev);
} else {
int ret;
/*
* before enable SRIOV, put released ports into VF access mode
* first of all.
......
......@@ -243,6 +243,7 @@ static int aggregate_requests(struct icc_node *node)
{
struct icc_provider *p = node->provider;
struct icc_req *r;
u32 avg_bw, peak_bw;
node->avg_bw = 0;
node->peak_bw = 0;
......@@ -251,9 +252,14 @@ static int aggregate_requests(struct icc_node *node)
p->pre_aggregate(node);
hlist_for_each_entry(r, &node->req_list, req_node) {
if (!r->enabled)
continue;
p->aggregate(node, r->tag, r->avg_bw, r->peak_bw,
if (r->enabled) {
avg_bw = r->avg_bw;
peak_bw = r->peak_bw;
} else {
avg_bw = 0;
peak_bw = 0;
}
p->aggregate(node, r->tag, avg_bw, peak_bw,
&node->avg_bw, &node->peak_bw);
}
......
......@@ -197,13 +197,13 @@ DEFINE_QNODE(pcnoc_int_0, MSM8916_PNOC_INT_0, 8, -1, -1, MSM8916_PNOC_SNOC_MAS,
DEFINE_QNODE(pcnoc_int_1, MSM8916_PNOC_INT_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS);
DEFINE_QNODE(pcnoc_m_0, MSM8916_PNOC_MAS_0, 8, -1, -1, MSM8916_PNOC_INT_0);
DEFINE_QNODE(pcnoc_m_1, MSM8916_PNOC_MAS_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS);
DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 8, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS);
DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 8, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG);
DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 8, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB);
DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 8, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG);
DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 8, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG);
DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 8, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1);
DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 8, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG);
DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 4, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS);
DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 4, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG);
DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 4, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB);
DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 4, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG);
DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 4, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG);
DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 4, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1);
DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 4, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG);
DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC_SLV);
DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1);
DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC);
......
......@@ -499,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev,
struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop;
/* This must be checked here to prevent out-of-bounds access to
* hw_queues_props array
*/
if (chunk->queue_index >= HL_MAX_QUEUES) {
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
return -EINVAL;
}
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
if ((chunk->queue_index >= HL_MAX_QUEUES) ||
(hw_queue_prop->type == QUEUE_TYPE_NA)) {
dev_err(hdev->dev, "Queue index %d is invalid\n",
if (hw_queue_prop->type == QUEUE_TYPE_NA) {
dev_err(hdev->dev, "Queue index %d is not applicable\n",
chunk->queue_index);
return -EINVAL;
}
......
......@@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_reg = i2c_reg;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, (long *) val);
0, (long *) val);
if (rc)
dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
......@@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.value = cpu_to_le64(val);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
......@@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
pkt.value = cpu_to_le64(state);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
0, NULL);
if (rc)
dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
......@@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
if (*ppos)
return 0;
sprintf(tmp_buf, "%d\n", hdev->clock_gating);
sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);
......@@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u32 value;
u64 value;
ssize_t rc;
if (atomic_read(&hdev->in_reset)) {
......@@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
return 0;
}
rc = kstrtouint_from_user(buf, count, 10, &value);
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
if (value) {
hdev->clock_gating = 1;
if (hdev->asic_funcs->enable_clock_gating)
hdev->asic_funcs->enable_clock_gating(hdev);
} else {
if (hdev->asic_funcs->disable_clock_gating)
hdev->asic_funcs->disable_clock_gating(hdev);
hdev->clock_gating = 0;
}
hdev->clock_gating_mask = value;
hdev->asic_funcs->set_clock_gating(hdev);
return count;
}
......
......@@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
hdev->in_debug = 0;
if (!hdev->hard_reset_pending)
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
goto out;
}
......
......@@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
sizeof(pkt), 0, NULL);
}
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
......@@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
pkt.value = cpu_to_le64(event_type);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, &result);
0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
......@@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
ARMCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
total_pkt_size, 0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask IRQ array\n");
......@@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)
test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
sizeof(test_pkt), 0, &result);
if (!rc) {
if (result != ARMCP_PACKET_FENCE_VAL)
......@@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
sizeof(hb_pkt), 0, &result);
if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
rc = -EIO;
......
......@@ -80,6 +80,7 @@
#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
......@@ -98,6 +99,11 @@
#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
BIT(GAUDI_ENGINE_ID_MME_0) |\
BIT(GAUDI_ENGINE_ID_MME_2) |\
GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
......@@ -106,14 +112,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
};
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
[GAUDI_PCI_DMA_1] = 0,
[GAUDI_PCI_DMA_2] = 1,
[GAUDI_PCI_DMA_3] = 5,
[GAUDI_HBM_DMA_1] = 2,
[GAUDI_HBM_DMA_2] = 3,
[GAUDI_HBM_DMA_3] = 4,
[GAUDI_HBM_DMA_4] = 6,
[GAUDI_HBM_DMA_5] = 7
[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
};
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
......@@ -1819,7 +1825,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
gaudi_init_rate_limiter(hdev);
gaudi_disable_clock_gating(hdev);
hdev->asic_funcs->disable_clock_gating(hdev);
for (tpc_id = 0, tpc_offset = 0;
tpc_id < TPC_NUMBER_OF_ENGINES;
......@@ -2531,46 +2537,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
}
static void gaudi_enable_clock_gating(struct hl_device *hdev)
static void gaudi_set_clock_gating(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qman_offset;
int i;
if (!hdev->clock_gating)
return;
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
return;
/* In case we are during debug session, don't enable the clock gate
* as it may interfere
*/
if (hdev->in_debug)
return;
for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
if (!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i]))))
continue;
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
QMAN_UPPER_CP_CGM_PWR_GATE_EN);
}
for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
if (!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i]))))
continue;
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
}
WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmMME0_QM_CGM_CFG,
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmMME2_QM_CGM_CFG,
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
}
if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
}
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
if (!(hdev->clock_gating_mask &
(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
continue;
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
QMAN_CGM1_PWR_GATE_EN);
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
......@@ -2663,7 +2678,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
gaudi_stop_hbm_dma_qmans(hdev);
gaudi_stop_pci_dma_qmans(hdev);
gaudi_disable_clock_gating(hdev);
hdev->asic_funcs->disable_clock_gating(hdev);
msleep(wait_timeout_ms);
......@@ -3003,7 +3018,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
gaudi_init_tpc_qmans(hdev);
gaudi_enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
gaudi_enable_timestamp(hdev);
......@@ -3112,7 +3127,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
HW_CAP_HBM_DMA | HW_CAP_PLL |
HW_CAP_MMU |
HW_CAP_SRAM_SCRAMBLER |
HW_CAP_HBM_SCRAMBLER);
HW_CAP_HBM_SCRAMBLER |
HW_CAP_CLK_GATE);
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
}
......@@ -3463,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
return 0;
}
if (!timeout)
timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
timeout, result);
}
......@@ -3865,6 +3885,12 @@ static int gaudi_validate_cb(struct hl_device *hdev,
rc = -EPERM;
break;
case PACKET_WREG_BULK:
dev_err(hdev->dev,
"User not allowed to use WREG_BULK\n");
rc = -EPERM;
break;
case PACKET_LOAD_AND_EXE:
rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
(struct packet_load_and_exe *) user_pkt);
......@@ -3880,7 +3906,6 @@ static int gaudi_validate_cb(struct hl_device *hdev,
break;
case PACKET_WREG_32:
case PACKET_WREG_BULK:
case PACKET_MSG_LONG:
case PACKET_MSG_SHORT:
case PACKET_REPEAT:
......@@ -4521,13 +4546,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
*val = RREG32(addr - CFG_BASE);
}
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
......@@ -4563,13 +4593,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
WREG32(addr - CFG_BASE, val);
}
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
......@@ -4605,7 +4640,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
......@@ -4615,6 +4654,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
*val = (((u64) val_h) << 32) | val_l;
}
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
......@@ -4651,7 +4691,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
(hdev->clock_gating_mask &
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
......@@ -4660,6 +4704,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
WREG32(addr + sizeof(u32) - CFG_BASE,
upper_32_bits(val));
}
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
......@@ -4881,7 +4926,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
......@@ -5262,7 +5307,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
}
if (disable_clock_gating) {
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
}
......@@ -5749,7 +5794,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
/* Clear interrupts */
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
......@@ -6265,7 +6310,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
if (s)
seq_puts(s, "\n");
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
......@@ -6366,7 +6411,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
dev_err(hdev->dev,
"Timeout while waiting for TPC%d icache prefetch\n",
tpc_id);
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return -EIO;
}
......@@ -6395,7 +6440,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
1000,
kernel_timeout);
hdev->asic_funcs->enable_clock_gating(hdev);
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
if (rc) {
......@@ -6736,7 +6781,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
.send_heartbeat = gaudi_send_heartbeat,
.enable_clock_gating = gaudi_enable_clock_gating,
.set_clock_gating = gaudi_set_clock_gating,
.disable_clock_gating = gaudi_disable_clock_gating,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,
......
......@@ -88,6 +88,7 @@
#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
......@@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
return 0;
}
if (!timeout)
timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
timeout, result);
}