isert: Allocate sge and wr structures dynamically

In order to be able to support large block sizes (larger than 512K),
we allocate required structured in a lazy fashion. This way we both support
large block sizes (over 512K and even over 1M). We also lower memory footprint
when smaller block sizes are used.

Signed-off-by: Yan Burman <yanb@mellanox.com>


git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@6418 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Bart Van Assche
2015-07-11 03:41:15 +00:00
parent d1c2869292
commit ebcfc73439
5 changed files with 88 additions and 42 deletions

View File

@@ -24,7 +24,6 @@ NUMA node, you want the HCA to be close to the same node.
Limitations:
-------------
* Bidirectional commands are not supported
* Block size over 512KB is not supported
* Maximum number of concurent login requests that can be handled is 127 by default.
Note that there may be more connections, but only up to 127 login requests
can be handled at the same time. If you wish to increase this, load isert_scst with

View File

@@ -1,6 +1,5 @@
* Add suppport for immediate data in iSER
* Add suppport for data-out in iSER
* Look into allocating wr and sg entries dynamically from kmem_cache instead of embedding them into iser_cmnd
* Look into seperating between RX pdu and TX pdu
* Do not signal every "response sent" notification
* Make the code NUMA aware

View File

@@ -102,9 +102,6 @@ struct isert_wr {
};
} ____cacheline_aligned;
#define ISER_MAX_SGE 128
#define ISER_MAX_RDMAS 5
#define ISER_SQ_SIZE 128
#define ISER_MAX_WCE 2048
@@ -115,8 +112,10 @@ struct isert_cmnd {
struct isert_buf buf;
struct isert_buf rdma_buf;
struct isert_wr wr[ISER_MAX_RDMAS];
struct ib_sge sg_pool[ISER_MAX_SGE];
struct isert_wr *wr;
struct ib_sge *sg_pool;
int n_wr;
int n_sge;
struct isert_hdr *isert_hdr ____cacheline_aligned;
struct iscsi_hdr *bhs;

View File

@@ -166,6 +166,50 @@ static inline int isert_pdu_prepare_send(struct isert_connection *isert_conn,
return sg_cnt;
}
static int isert_alloc_for_rdma(struct isert_cmnd *pdu, int sge_cnt,
struct isert_connection *isert_conn)
{
struct isert_wr *wr;
struct ib_sge *sg_pool;
int i, ret = 0;
int wr_cnt;
sg_pool = kmalloc(sizeof(*sg_pool) * sge_cnt, GFP_KERNEL);
if (unlikely(sg_pool == NULL)) {
ret = -ENOMEM;
goto out;
}
wr_cnt = DIV_ROUND_UP(sge_cnt, isert_conn->max_sge);
wr = kmalloc(sizeof(*wr) * wr_cnt, GFP_KERNEL);
if (unlikely(wr == NULL)) {
ret = -ENOMEM;
goto out_free_sg_pool;
}
kfree(pdu->wr);
pdu->wr = wr;
kfree(pdu->sg_pool);
pdu->sg_pool = sg_pool;
pdu->n_wr = wr_cnt;
pdu->n_sge = sge_cnt;
for (i = 0; i < wr_cnt; ++i)
isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu);
for (i = 0; i < sge_cnt; ++i)
pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey;
goto out;
out_free_sg_pool:
kfree(sg_pool);
out:
return ret;
}
static inline void isert_link_send_wrs(struct isert_wr *from_wr,
struct isert_wr *to_wr)
{
@@ -204,10 +248,10 @@ int isert_prepare_rdma(struct isert_cmnd *isert_pdu,
else
isert_buf->dma_dir = DMA_FROM_DEVICE;
if (unlikely(isert_buf->sg_cnt > ISER_MAX_SGE)) {
pr_err("Scatterlist too large: %d\n", isert_buf->sg_cnt);
wr_cnt = -EOPNOTSUPP;
goto out;
if (unlikely(isert_buf->sg_cnt > isert_pdu->n_sge)) {
wr_cnt = isert_alloc_for_rdma(isert_pdu, isert_buf->sg_cnt, isert_conn);
if (unlikely(wr_cnt))
goto out;
}
err = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt,
@@ -245,12 +289,18 @@ out:
void isert_pdu_free(struct isert_cmnd *pdu)
{
unsigned int i;
int i;
list_del(&pdu->pool_node);
for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i)
for (i = 0; i < pdu->n_wr; ++i)
isert_wr_release(&pdu->wr[i]);
kfree(pdu->wr);
pdu->wr = NULL;
kfree(pdu->sg_pool);
pdu->sg_pool = NULL;
isert_pdu_kfree(pdu);
}
@@ -259,7 +309,6 @@ struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn,
{
struct isert_cmnd *pdu = NULL;
int err;
unsigned int i;
TRACE_ENTRY();
@@ -269,6 +318,12 @@ struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn,
goto out;
}
err = isert_alloc_for_rdma(pdu, 4, isert_conn);
if (unlikely(err)) {
pr_err("Failed to alloc sge and wr for rx pdu\n");
goto out;
}
err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev,
&pdu->buf, size, DMA_FROM_DEVICE);
if (unlikely(err)) {
@@ -283,12 +338,6 @@ struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn,
goto pdu_init_failed;
}
for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i)
isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu);
for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i)
pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey;
list_add_tail(&pdu->pool_node, &isert_conn->rx_buf_list);
goto out;
@@ -308,7 +357,6 @@ struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn,
{
struct isert_cmnd *pdu = NULL;
int err;
unsigned int i;
TRACE_ENTRY();
@@ -318,6 +366,12 @@ struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn,
goto out;
}
err = isert_alloc_for_rdma(pdu, 4, isert_conn);
if (unlikely(err)) {
pr_err("Failed to alloc sge and wr for tx pdu\n");
goto out;
}
err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev,
&pdu->buf, size, DMA_TO_DEVICE);
if (unlikely(err)) {
@@ -331,14 +385,9 @@ struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn,
&pdu->wr, size, err);
goto buf_init_failed;
}
isert_tx_pdu_init(pdu, isert_conn);
for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i)
isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu);
for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i)
pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey;
isert_pdu_set_hdr_plain(pdu);
list_add_tail(&pdu->pool_node, &isert_conn->tx_free_list);

View File

@@ -1011,21 +1011,6 @@ static int isert_conn_qp_create(struct isert_connection *isert_conn)
isert_conn->cq_desc = &isert_dev->cq_desc[cq_idx];
/*
* A quote from the OFED 1.5.3.1 release notes
* (docs/release_notes/mthca_release_notes.txt), section "Known Issues":
* In mem-free devices, RC QPs can be created with a maximum of
* (max_sge - 1) entries only; UD QPs can be created with a maximum of
* (max_sge - 3) entries.
* A quote from the OFED 1.2.5 release notes
* (docs/mthca_release_notes.txt), section "Known Issues":
* In mem-free devices, RC QPs can be created with a maximum of
* (max_sge - 3) entries only.
*/
isert_conn->max_sge = isert_dev->device_attr.max_sge - 3;
WARN_ON(isert_conn->max_sge < 1);
qp_attr.cap.max_send_sge = isert_conn->max_sge;
qp_attr.cap.max_recv_sge = 3;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -1089,6 +1074,21 @@ static struct isert_connection *isert_conn_create(struct rdma_cm_id *cm_id,
isert_conn->cm_id = cm_id;
isert_conn->isert_dev = isert_dev;
/*
* A quote from the OFED 1.5.3.1 release notes
* (docs/release_notes/mthca_release_notes.txt), section "Known Issues":
* In mem-free devices, RC QPs can be created with a maximum of
* (max_sge - 1) entries only; UD QPs can be created with a maximum of
* (max_sge - 3) entries.
* A quote from the OFED 1.2.5 release notes
* (docs/mthca_release_notes.txt), section "Known Issues":
* In mem-free devices, RC QPs can be created with a maximum of
* (max_sge - 3) entries only.
*/
isert_conn->max_sge = isert_dev->device_attr.max_sge - 3;
WARN_ON(isert_conn->max_sge < 1);
INIT_LIST_HEAD(&isert_conn->rx_buf_list);
INIT_LIST_HEAD(&isert_conn->tx_free_list);
INIT_LIST_HEAD(&isert_conn->tx_busy_list);