diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 603a114b3dfe13deaecf1ab8c3f9855e2d832fcd..658a622791fb24ba32bbac9016959eaa50acdd1c 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -505,7 +505,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	} else
 		tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
 
-	dqpn = dev->dev->caps.sqp_start + 8 * slave + port + (dest_qpt * 2) - 1;
+	dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
 
 	/* get tunnel tx data buf for slave */
 	src_qp = tun_qp->qp;
@@ -1074,9 +1074,9 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
 
 static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
 {
-	int slave_start = dev->dev->caps.sqp_start + 8 * slave;
+	int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
 
-	return (qpn >= slave_start && qpn <= slave_start + 1);
+	return (qpn >= proxy_start && qpn <= proxy_start + 1);
 }
 
 
@@ -1191,14 +1191,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 	int slave;
 
 	/* Get slave that sent this packet */
-	if (wc->src_qp < dev->dev->caps.sqp_start ||
-	    wc->src_qp >= dev->dev->caps.base_tunnel_sqpn ||
+	if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
+	    wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
 	    (wc->src_qp & 0x1) != ctx->port - 1 ||
 	    wc->src_qp & 0x4) {
 		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
 		return;
 	}
-	slave = ((wc->src_qp & ~0x7) - dev->dev->caps.sqp_start) / 8;
+	slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
 	if (slave != ctx->slave) {
 		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
 			     "belongs to another slave\n", wc->src_qp);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index a8622510de42ad700b9c71c9474e700a1cab80c6..96fe103f5973f64f108ab25bea3d215ace3024b8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -116,33 +116,57 @@ static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 	if (!mlx4_is_master(dev->dev))
 		return 0;
 
-	return qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
-	       qp->mqp.qpn < dev->dev->caps.base_sqpn +
-	       8 + 16 * MLX4_MFUNC_MAX;
+	return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
+	       qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
+		8 * MLX4_MFUNC_MAX;
 }
 
 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 {
-	return ((mlx4_is_master(dev->dev) &&
-		 qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
-		 qp->mqp.qpn <= dev->dev->caps.base_sqpn + 3) ||
-		(qp->mqp.qpn >= dev->dev->caps.sqp_start &&
-		 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3));
+	int proxy_sqp = 0;
+	int real_sqp = 0;
+	int i;
+	/* PPF or Native -- real SQP */
+	real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+		    qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+		    qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
+	if (real_sqp)
+		return 1;
+	/* VF or PF -- proxy SQP */
+	if (mlx4_is_mfunc(dev->dev)) {
+		for (i = 0; i < dev->dev->caps.num_ports; i++) {
+			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
+			    qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+				proxy_sqp = 1;
+				break;
+			}
+		}
+	}
+	return proxy_sqp;
 }
 
 /* used for INIT/CLOSE port logic */
 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 {
-	int qp0;
-
-	/* qp0 is either the proxy qp0, or the real qp0 */
-	qp0 = (qp->mqp.qpn >= dev->dev->caps.sqp_start &&
-		qp->mqp.qpn <= dev->dev->caps.sqp_start + 1) ||
-		(mlx4_is_master(dev->dev) &&
-		 qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
-		 qp->mqp.qpn <= dev->dev->caps.base_sqpn + 1);
-
-	return qp0;
+	int proxy_qp0 = 0;
+	int real_qp0 = 0;
+	int i;
+	/* PPF or Native -- real QP0 */
+	real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+		    qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+		    qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
+	if (real_qp0)
+		return 1;
+	/* VF or PF -- proxy QP0 */
+	if (mlx4_is_mfunc(dev->dev)) {
+		for (i = 0; i < dev->dev->caps.num_ports; i++) {
+			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+				proxy_qp0 = 1;
+				break;
+			}
+		}
+	}
+	return proxy_qp0;
 }
 
 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -607,8 +631,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
 		else
 			qp_type = MLX4_IB_QPT_TUN_SMI;
-		qpn = dev->dev->caps.base_tunnel_sqpn + 8 * tnl_init->slave +
-		      tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
+		/* we are definitely in the PPF here, since we are creating
+		 * tunnel QPs. base_tunnel_sqpn is therefore valid. */
+		qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
+			+ tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
 		sqpn = qpn;
 	}
 
@@ -630,12 +656,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 
 	qp->mlx4_ib_qp_type = qp_type;
 
-	if (mlx4_is_mfunc(dev->dev) &&
-	    (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI)) {
-		qpn -= 8;
-		sqpn -= 8;
-	}
-
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
@@ -935,6 +955,23 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 	del_gid_entries(qp);
 }
 
+static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
+{
+	/* Native or PPF */
+	if (!mlx4_is_mfunc(dev->dev) ||
+	    (mlx4_is_master(dev->dev) &&
+	     attr->create_flags & MLX4_IB_SRIOV_SQP)) {
+		return  dev->dev->phys_caps.base_sqpn +
+			(attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+			attr->port_num - 1;
+	}
+	/* PF or VF -- creating proxies */
+	if (attr->qp_type == IB_QPT_SMI)
+		return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+	else
+		return dev->dev->caps.qp1_proxy[attr->port_num - 1];
+}
+
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata)
@@ -998,9 +1035,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 			return ERR_PTR(-EINVAL);
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-				       to_mdev(pd->device)->dev->caps.sqp_start +
-				       (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
-				       init_attr->port_num - 1,
+				       get_sqp_num(to_mdev(pd->device), init_attr),
 				       &qp);
 		if (err)
 			return ERR_PTR(err);
@@ -1643,8 +1678,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 		sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 	else
 		sqp->ud_header.bth.destination_qpn =
-			cpu_to_be32(mdev->dev->caps.base_tunnel_sqpn +
-				    sqp->qp.port - 1);
+			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 
 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
 	if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
@@ -2012,10 +2046,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 			cpu_to_be32(0xf0000000);
 
 	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
-	dseg->dqpn = cpu_to_be32(dev->dev->caps.base_tunnel_sqpn +
-				 qpt * 2 + port - 1);
-	/* use well-known qkey from the QPC */
-	dseg->qkey = cpu_to_be32(0x80000000);
+	/* This function used only for sending on QP1 proxies */
+	dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	/* Use QKEY from the QP context, which is set by master */
+	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
 
 static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 1d74e8541146a2bac5ccf1d6422e01d4916f5120..e2ed36e2fb291effa9c060ce9202599ad5cf6a58 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -184,8 +184,6 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET		0x28
 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET		0x2c
 #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET	0X30
-#define QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET	0x44
-#define QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET	0x48
 
 #define QUERY_FUNC_CAP_FMR_FLAG			0x80
 #define QUERY_FUNC_CAP_FLAG_RDMA		0x40
@@ -196,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET	0x8
 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET		0xc
 
+#define QUERY_FUNC_CAP_QP0_TUNNEL		0x10
+#define QUERY_FUNC_CAP_QP0_PROXY		0x14
+#define QUERY_FUNC_CAP_QP1_TUNNEL		0x18
+#define QUERY_FUNC_CAP_QP1_PROXY		0x1c
+
 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC	0x40
 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN	0x80
 
 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
 
 	if (vhcr->op_modifier == 1) {
-		field = vhcr->in_modifier;
-		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
-
 		field = 0;
 		/* ensure force vlan and force mac bits are not set */
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
 		/* ensure that phy_wqe_gid bit is not set */
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
 
+		field = vhcr->in_modifier; /* phys-port = logical-port */
+		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+
+		/* size is now the QP number */
+		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
+
+		size += 2;
+		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
+
+		size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
+		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
+
+		size += 2;
+		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+
 	} else if (vhcr->op_modifier == 0) {
 		/* enable rdma and ethernet interfaces */
 		field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA);
@@ -249,117 +265,124 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.num_mgms + dev->caps.num_amgms;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
 
-		size = dev->caps.base_tunnel_sqpn + 8 * slave;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET);
-
-		size = dev->caps.sqp_start + 8 * slave;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET);
-
 	} else
 		err = -EINVAL;
 
 	return err;
 }
 
-int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap)
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+			struct mlx4_func_cap *func_cap)
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	u32			*outbox;
-	u8			field;
+	u8			field, op_modifier;
 	u32			size;
-	int			i;
 	int			err = 0;
 
+	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
-	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP,
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
+			   MLX4_CMD_QUERY_FUNC_CAP,
 			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 	if (err)
 		goto out;
 
 	outbox = mailbox->buf;
 
-	MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
-	if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
-		mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
-		err = -EPROTONOSUPPORT;
-		goto out;
-	}
-	func_cap->flags = field;
-
-	MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
-	func_cap->num_ports = field;
+	if (!op_modifier) {
+		MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
+		if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
+			mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
+			err = -EPROTONOSUPPORT;
+			goto out;
+		}
+		func_cap->flags = field;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
-	func_cap->pf_context_behaviour = size;
+		MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+		func_cap->num_ports = field;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
-	func_cap->qp_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+		func_cap->pf_context_behaviour = size;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
-	func_cap->srq_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+		func_cap->qp_quota = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
-	func_cap->cq_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+		func_cap->srq_quota = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
-	func_cap->max_eq = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+		func_cap->cq_quota = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
-	func_cap->reserved_eq = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+		func_cap->max_eq = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
-	func_cap->mpt_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		func_cap->reserved_eq = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
-	func_cap->mtt_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+		func_cap->mpt_quota = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
-	func_cap->mcg_quota = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+		func_cap->mtt_quota = size & 0xFFFFFF;
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET);
-	func_cap->base_tunnel_qpn = size & 0xFFFFFF;
+		MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+		func_cap->mcg_quota = size & 0xFFFFFF;
+		goto out;
+	}
 
-	MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET);
-	func_cap->base_proxy_qpn = size & 0xFFFFFF;
+	/* logical port query */
+	if (gen_or_port > dev->caps.num_ports) {
+		err = -EINVAL;
+		goto out;
+	}
 
-	for (i = 1; i <= func_cap->num_ports; ++i) {
-		err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1,
-				   MLX4_CMD_QUERY_FUNC_CAP,
-				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
-		if (err)
+	if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
+		MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
+		if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
+			mlx4_err(dev, "VLAN is enforced on this port\n");
+			err = -EPROTONOSUPPORT;
 			goto out;
+		}
 
-		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
-			MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
-			if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
-				mlx4_err(dev, "VLAN is enforced on this port\n");
-				err = -EPROTONOSUPPORT;
-				goto out;
-			}
-
-			if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
-				mlx4_err(dev, "Force mac is enabled on this port\n");
-				err = -EPROTONOSUPPORT;
-				goto out;
-			}
-		} else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) {
-			MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
-			if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
-				mlx4_err(dev, "phy_wqe_gid is "
-					 "enforced on this ib port\n");
-				err = -EPROTONOSUPPORT;
-				goto out;
-			}
+		if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
+			mlx4_err(dev, "Force mac is enabled on this port\n");
+			err = -EPROTONOSUPPORT;
+			goto out;
+		}
+	} else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
+		MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
+		if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
+			mlx4_err(dev, "phy_wqe_gid is "
+				 "enforced on this ib port\n");
+			err = -EPROTONOSUPPORT;
+			goto out;
 		}
+	}
 
-		MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
-		func_cap->physical_port[i] = field;
+	MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+	func_cap->physical_port = field;
+	if (func_cap->physical_port != gen_or_port) {
+		err = -ENOSYS;
+		goto out;
 	}
 
+	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
+	func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
+
+	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
+	func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
+
+	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
+	func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
+
+	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
+	func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
+
 	/* All other resources are allocated by the master, but we still report
 	 * 'num' and 'reserved' capabilities as follows:
 	 * - num remains the maximum resource index
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index ced1de57c81868120f26b993764d0f031d4d46ca..85abe9c11a2262e199682f33f6ec77ac92062624 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -134,11 +134,12 @@ struct mlx4_func_cap {
 	int	max_eq;
 	int	reserved_eq;
 	int	mcg_quota;
-	u32	base_qpn;
-	u32	base_tunnel_qpn;
-	u32	base_proxy_qpn;
-	u8	physical_port[MLX4_MAX_PORTS + 1];
-	u8	port_flags[MLX4_MAX_PORTS + 1];
+	u32	qp0_tunnel_qpn;
+	u32	qp0_proxy_qpn;
+	u32	qp1_tunnel_qpn;
+	u32	qp1_proxy_qpn;
+	u8	physical_port;
+	u8	port_flags;
 };
 
 struct mlx4_adapter {
@@ -195,7 +196,8 @@ struct mlx4_set_ib_param {
 };
 
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
-int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap);
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+			struct mlx4_func_cap *func_cap);
 int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 76f69fdd01d5930cb207beb88bc5a9b96528fa63..1e2ab9d00f1c72965a70d43f7d95466fc088e3b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -410,15 +410,16 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
 {
 	u32 qk = MLX4_RESERVED_QKEY_BASE;
-	if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
-	    qpn < dev->caps.sqp_start)
+
+	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
+	    qpn < dev->phys_caps.base_proxy_sqpn)
 		return -EINVAL;
 
-	if (qpn >= dev->caps.base_tunnel_sqpn)
+	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
 		/* tunnel qp */
-		qk += qpn - dev->caps.base_tunnel_sqpn;
+		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
 	else
-		qk += qpn - dev->caps.sqp_start;
+		qk += qpn - dev->phys_caps.base_proxy_sqpn;
 	*qkey = qk;
 	return 0;
 }
@@ -527,9 +528,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	}
 
 	memset(&func_cap, 0, sizeof(func_cap));
-	err = mlx4_QUERY_FUNC_CAP(dev, &func_cap);
+	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 	if (err) {
-		mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n");
+		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
+			  err);
 		return err;
 	}
 
@@ -557,12 +559,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		return -ENODEV;
 	}
 
+	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+		err = -ENOMEM;
+		goto err_mem;
+	}
+
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
+		err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
+		if (err) {
+			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
+				 " port %d, aborting (%d).\n", i, err);
+			goto err_mem;
+		}
+		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
+		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
+		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
+		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 		if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
 						    &dev->caps.gid_table_len[i],
 						    &dev->caps.pkey_table_len[i]))
-			return -ENODEV;
+			goto err_mem;
 	}
 
 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
@@ -572,14 +595,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 			 "PCI resource 2 size of 0x%llx, aborting.\n",
 			 dev->caps.uar_page_size * dev->caps.num_uars,
 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
-		return -ENODEV;
+		goto err_mem;
 	}
 
-	/* Calculate our sqp_start */
-	dev->caps.sqp_start = func_cap.base_proxy_qpn;
-	dev->caps.base_tunnel_sqpn = func_cap.base_tunnel_qpn;
-
 	return 0;
+
+err_mem:
+	kfree(dev->caps.qp0_tunnel);
+	kfree(dev->caps.qp0_proxy);
+	kfree(dev->caps.qp1_tunnel);
+	kfree(dev->caps.qp1_proxy);
+	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+
+	return err;
 }
 
 /*
@@ -2261,6 +2290,12 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 
 		if (!mlx4_is_slave(dev))
 			mlx4_free_ownership(dev);
+
+		kfree(dev->caps.qp0_tunnel);
+		kfree(dev->caps.qp0_proxy);
+		kfree(dev->caps.qp1_tunnel);
+		kfree(dev->caps.qp1_proxy);
+
 		kfree(priv);
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 436ef6c69add5f3b21208c312c0b8fceb716a677..81e2abe07bbbf656689ba327ab0d4d22368961df 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -68,15 +68,15 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
 }
 
 /* used for INIT/CLOSE port logic */
-static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
+static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
 {
+	/* this procedure is called after we already know we are on the master */
 	/* qp0 is either the proxy qp0, or the real qp0 */
-	*proxy_qp0 = qp->qpn >= dev->caps.sqp_start &&
-		qp->qpn <= dev->caps.sqp_start + 1;
+	u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
+	*proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
 
-	*real_qp0 = mlx4_is_master(dev) &&
-		qp->qpn >= dev->caps.base_sqpn &&
-		qp->qpn <= dev->caps.base_sqpn + 1;
+	*real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
+		qp->qpn <= dev->phys_caps.base_sqpn + 1;
 
 	return *real_qp0 || *proxy_qp0;
 }
@@ -143,7 +143,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 			MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
 		if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
 		    cur_state != MLX4_QP_STATE_RST &&
-		    is_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+		    is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
 			port = (qp->qpn & 1) + 1;
 			if (proxy_qp0)
 				priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
@@ -175,7 +175,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		       new_state == MLX4_QP_STATE_RST ? 2 : 0,
 		       op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
 
-	if (mlx4_is_master(dev) && is_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+	if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
 		port = (qp->qpn & 1) + 1;
 		if (cur_state != MLX4_QP_STATE_ERR &&
 		    cur_state != MLX4_QP_STATE_RST &&
@@ -422,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
 	int err;
 	int reserved_from_top = 0;
+	int k;
 
 	spin_lock_init(&qp_table->lock);
 	INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
@@ -436,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	 * We also reserve the MSB of the 24-bit QP number to indicate
 	 * that a QP is an XRC QP.
 	 */
-	dev->caps.base_sqpn =
+	dev->phys_caps.base_sqpn =
 		ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
 
 	{
@@ -479,24 +480,54 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	*/
 
 	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 23) - 1, dev->caps.base_sqpn + 8 +
+			       (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 +
 			       16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev),
 			       reserved_from_top);
+	if (err)
+		return err;
 
-	/* In mfunc, sqp_start is the base of the proxy SQPs, since the PF also
-	 * uses paravirtualized SQPs.
-	 * In native mode, sqp_start is the base of the real SQPs. */
 	if (mlx4_is_mfunc(dev)) {
-		dev->caps.sqp_start = dev->caps.base_sqpn +
-			8 * (mlx4_master_func_num(dev) + 1);
-		dev->caps.base_tunnel_sqpn = dev->caps.sqp_start + 8 * MLX4_MFUNC_MAX;
-	} else
-		dev->caps.sqp_start = dev->caps.base_sqpn;
+		/* for PPF use */
+		dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
+		dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
+
+		/* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
+		 * since the PF does not call mlx4_slave_caps */
+		dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+		dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+		dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+		dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+		if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+		    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+			err = -ENOMEM;
+			goto err_mem;
+		}
 
+		for (k = 0; k < dev->caps.num_ports; k++) {
+			dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+				8 * mlx4_master_func_num(dev) + k;
+			dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
+			dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+				8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
+			dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
+		}
+	}
+
+
+	err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
 	if (err)
-		return err;
+		goto err_mem;
+	return 0;
 
-	return mlx4_CONF_SPECIAL_QP(dev, dev->caps.base_sqpn);
+err_mem:
+	kfree(dev->caps.qp0_tunnel);
+	kfree(dev->caps.qp0_proxy);
+	kfree(dev->caps.qp1_tunnel);
+	kfree(dev->caps.qp1_proxy);
+	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	return err;
 }
 
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c0f8f74a0a5eaeefb1cab38e1093b7819a2662d9..6d1acb04cd17c1babf6d477c0554156afd6d5950 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -328,6 +328,9 @@ struct mlx4_phys_caps {
 	u32			gid_phys_table_len[MLX4_MAX_PORTS + 1];
 	u32			pkey_phys_table_len[MLX4_MAX_PORTS + 1];
 	u32			num_phys_eqs;
+	u32			base_sqpn;
+	u32			base_proxy_sqpn;
+	u32			base_tunnel_sqpn;
 };
 
 struct mlx4_caps {
@@ -358,9 +361,10 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
-	int			sqp_start;
-	u32			base_sqpn;
-	u32			base_tunnel_sqpn;
+	u32			*qp0_proxy;
+	u32			*qp1_proxy;
+	u32			*qp0_tunnel;
+	u32			*qp1_tunnel;
 	int			num_srqs;
 	int			max_srq_wqes;
 	int			max_srq_sge;
@@ -722,15 +726,15 @@ static inline int mlx4_is_master(struct mlx4_dev *dev)
 
 static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
 {
-	return (qpn < dev->caps.base_sqpn + 8 +
+	return (qpn < dev->phys_caps.base_sqpn + 8 +
 		16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
 }
 
 static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
 {
-	int base = dev->caps.sqp_start + slave * 8;
+	int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
 
-	if (qpn >= base && qpn < base + 8)
+	if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
 		return 1;
 
 	return 0;