diff --git a/scst/README b/scst/README index 8771a5038..e156470ac 100644 --- a/scst/README +++ b/scst/README @@ -624,6 +624,10 @@ applications, so, if you experience large transfers stalls, you should check documentation for your application how to limit the transfer sizes. +Another way to solve this issue is to build SG entries with more than 1 +page each. See the following patch as an example: +http://scst.sf.net/sgv_big_order_alloc.diff + User space mode using scst_user dev handler ------------------------------------------- diff --git a/www/contributing.html b/www/contributing.html index 11f9e8b2f..adf70162b 100644 --- a/www/contributing.html +++ b/www/contributing.html @@ -308,6 +308,21 @@ { return vdisk_exec_fns[cmd->cdb[0]](cmd); }
+ +In the pass-through mode (i.e. using the pass-through device handlers + scst_disk, scst_tape, etc) SCSI commands, coming from remote initiators, + are passed to local SCSI hardware on target as is, without any + modifications. As any other hardware, the local SCSI hardware can not + handle commands with amount of data and/or segments count in + scatter-gather array bigger some values. If you have this issue you will see + symptoms like small transfers work well, but large ones stall and + messages like: "Unable to complete command due to SG IO count + limitation" are printed in the kernel logs.
+ +In sgv_big_order_alloc.diff you + can find a possible way to solve this issue.
diff --git a/www/sgv_big_order_alloc.diff b/www/sgv_big_order_alloc.diff new file mode 100644 index 000000000..46d24c6a4 --- /dev/null +++ b/www/sgv_big_order_alloc.diff @@ -0,0 +1,445 @@ +In the pass-through mode (i.e. using the pass-through device handlers +scst_disk, scst_tape, etc) SCSI commands, coming from remote initiators, +are passed to local SCSI hardware on target as is, without any +modifications. As any other hardware, the local SCSI hardware can not +handle commands with amount of data and/or segments count in +scatter-gather array bigger some values. If you have this issue you will +see symptoms like small transfers work well, but large ones stall and +messages like: "Unable to complete command due to SG IO count +limitation" are printed in the kernel logs. + +This is proposed patch to solve that. It allows SGV cache do allocation +of pages with order > 0, i.e. more than 1 page per SG entry. + +Compile tested only. + +Index: scst/include/scst.h +=================================================================== +--- scst/include/scst.h (revision 558) ++++ scst/include/scst.h (working copy) +@@ -2649,12 +2649,13 @@ struct sgv_pool *sgv_pool_create(const c + void sgv_pool_destroy(struct sgv_pool *pool); + + void sgv_pool_set_allocator(struct sgv_pool *pool, +- struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *), +- void (*free_pages_fn)(struct scatterlist *, int, void *)); ++ struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *, int), ++ void (*free_pages_fn)(struct scatterlist *, int, void *, int)); + + struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size, + gfp_t gfp_mask, int flags, int *count, +- struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv); ++ struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv, ++ int max_sg_count); + void sgv_pool_free(struct sgv_pool_obj *sgv, struct scst_mem_lim *mem_lim); + + void *sgv_get_priv(struct sgv_pool_obj *sgv); +Index: scst/src/scst_mem.h +=================================================================== +--- scst/src/scst_mem.h (revision 558) ++++ scst/src/scst_mem.h (working copy) +@@ -36,6 +36,8 @@ struct sgv_pool_obj { + /* if <0 - pages, >0 - order */ + int order_or_pages; + ++ int alloc_order; ++ + struct { + /* jiffies, protected by pool_mgr_lock */ + unsigned long time_stamp; +@@ -67,9 +69,9 @@ struct sgv_pool_cache_acc { + + struct sgv_pool_alloc_fns { + struct page *(*alloc_pages_fn)(struct scatterlist *sg, gfp_t gfp_mask, +- void *priv); ++ void *priv, int alloc_order); + void (*free_pages_fn)(struct scatterlist *sg, int sg_count, +- void *priv); ++ void *priv, int alloc_order); + }; + + struct sgv_pool { +Index: scst/src/scst_lib.c +=================================================================== +--- scst/src/scst_lib.c (revision 558) ++++ scst/src/scst_lib.c (working copy) +@@ -1663,34 +1663,18 @@ int scst_alloc_space(struct scst_cmd *cm + flags |= SCST_POOL_ALLOC_NO_CACHED; + + cmd->sg = sgv_pool_alloc(tgt_dev->pool, cmd->bufflen, gfp_mask, flags, +- &cmd->sg_cnt, &cmd->sgv, &cmd->dev->dev_mem_lim, NULL); ++ &cmd->sg_cnt, &cmd->sgv, &cmd->dev->dev_mem_lim, NULL, ++ tgt_dev->max_sg_cnt); + if (cmd->sg == NULL) + goto out; + +- if (unlikely(cmd->sg_cnt > tgt_dev->max_sg_cnt)) { +- static int ll; +- if (ll < 10) { +- PRINT_INFO("Unable to complete command due to " +- "SG IO count limitation (requested %d, " +- "available %d, tgt lim %d)", cmd->sg_cnt, +- tgt_dev->max_sg_cnt, cmd->tgt->sg_tablesize); +- ll++; +- } +- goto out_sg_free; +- } ++ EXTRACHECKS_BUG_ON(cmd->sg_cnt > tgt_dev->max_sg_cnt); + + res = 0; + + out: + TRACE_EXIT(); + return res; +- +-out_sg_free: +- sgv_pool_free(cmd->sgv, &cmd->dev->dev_mem_lim); +- cmd->sgv = NULL; +- cmd->sg = NULL; +- cmd->sg_cnt = 0; +- goto out; + } + + void scst_release_space(struct scst_cmd *cmd) +Index: scst/src/scst_mem.c +=================================================================== +--- scst/src/scst_mem.c (revision 558) ++++ scst/src/scst_mem.c (working copy) +@@ -118,7 +118,7 @@ out_head: + } + + static void scst_free_sys_sg_entries(struct scatterlist *sg, int sg_count, +- void *priv) ++ void *priv, int alloc_order) + { + int i; + +@@ -134,7 +134,7 @@ static void scst_free_sys_sg_entries(str + (unsigned long)p, len, pages); + + while (pages > 0) { +- int order = 0; ++ int order = alloc_order; + + /* + * __free_pages() doesn't like freeing pages with not that order with +@@ -159,9 +159,9 @@ static void scst_free_sys_sg_entries(str + } + + static struct page *scst_alloc_sys_pages(struct scatterlist *sg, +- gfp_t gfp_mask, void *priv) ++ gfp_t gfp_mask, void *priv, int alloc_order) + { +- struct page *page = alloc_pages(gfp_mask, 0); ++ struct page *page = alloc_pages(gfp_mask, alloc_order); + + sg_set_page(sg, page, PAGE_SIZE, 0); + TRACE_MEM("page=%p, sg=%p, priv=%p", page, sg, priv); +@@ -174,10 +174,10 @@ static struct page *scst_alloc_sys_pages + + static int scst_alloc_sg_entries(struct scatterlist *sg, int pages, + gfp_t gfp_mask, int clustered, struct trans_tbl_ent *trans_tbl, +- const struct sgv_pool_alloc_fns *alloc_fns, void *priv) ++ const struct sgv_pool_alloc_fns *alloc_fns, void *priv, int alloc_order) + { + int sg_count = 0; +- int pg, i, j; ++ int pg, i, j, pg_inc = 1 << alloc_order; + int merged = -1; + + TRACE_MEM("pages=%d, clustered=%d", pages, clustered); +@@ -189,7 +189,7 @@ static int scst_alloc_sg_entries(struct + gfp_mask |= __GFP_ZERO; + #endif + +- for (pg = 0; pg < pages; pg++) { ++ for (pg = 0; pg < pages; pg += pg_inc) { + void *rc; + #ifdef CONFIG_SCST_DEBUG_OOM + if (((gfp_mask & __GFP_NOFAIL) != __GFP_NOFAIL) && +@@ -198,7 +198,7 @@ static int scst_alloc_sg_entries(struct + else + #endif + rc = alloc_fns->alloc_pages_fn(&sg[sg_count], gfp_mask, +- priv); ++ priv, alloc_order); + if (rc == NULL) + goto out_no_mem; + if (clustered) { +@@ -229,7 +229,7 @@ out: + return sg_count; + + out_no_mem: +- alloc_fns->free_pages_fn(sg, sg_count, priv); ++ alloc_fns->free_pages_fn(sg, sg_count, priv, alloc_order); + sg_count = 0; + goto out; + } +@@ -292,7 +292,7 @@ static void sgv_dtor_and_free(struct sgv + { + if (obj->sg_count != 0) { + obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries, +- obj->sg_count, obj->allocator_priv); ++ obj->sg_count, obj->allocator_priv, obj->alloc_order); + } + if (obj->sg_entries != obj->sg_entries_data) { + if (obj->trans_tbl != +@@ -308,6 +308,36 @@ static void sgv_dtor_and_free(struct sgv + return; + } + ++static struct sgv_pool_obj *sgv_pool_cached_create(struct sgv_pool *pool, ++ int order, gfp_t gfp_mask, bool locked) ++{ ++ struct sgv_pool_obj *obj; ++ int pages = 1 << order; ++ ++ if (!locked) ++ spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); ++ ++ pool->acc.cached_entries++; ++ pool->acc.cached_pages += pages; ++ ++ spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); ++ ++ obj = kmem_cache_alloc(pool->caches[order], ++ gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA)); ++ if (likely(obj)) { ++ memset(obj, 0, sizeof(*obj)); ++ obj->order_or_pages = order; ++ obj->owner_pool = pool; ++ } else { ++ spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); ++ pool->acc.cached_entries--; ++ pool->acc.cached_pages -= pages; ++ spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); ++ } ++ ++ return obj; ++} ++ + static struct sgv_pool_obj *sgv_pool_cached_get(struct sgv_pool *pool, + int order, gfp_t gfp_mask) + { +@@ -332,23 +362,7 @@ static struct sgv_pool_obj *sgv_pool_cac + goto out; + } + +- pool->acc.cached_entries++; +- pool->acc.cached_pages += pages; +- +- spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); +- +- obj = kmem_cache_alloc(pool->caches[order], +- gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA)); +- if (likely(obj)) { +- memset(obj, 0, sizeof(*obj)); +- obj->order_or_pages = order; +- obj->owner_pool = pool; +- } else { +- spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); +- pool->acc.cached_entries--; +- pool->acc.cached_pages -= pages; +- spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock); +- } ++ obj = sgv_pool_cached_create(pool, order, gfp_mask, true); + + out: + return obj; +@@ -546,12 +560,13 @@ static void scst_uncheck_allowed_mem(str + + struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size, + gfp_t gfp_mask, int flags, int *count, +- struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv) ++ struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv, ++ int max_sg_count) + { + struct sgv_pool_obj *obj; + int order, pages, cnt; + struct scatterlist *res = NULL; +- int pages_to_alloc; ++ int pages_to_alloc, alloc_order; + struct kmem_cache *cache; + int no_cached = flags & SCST_POOL_ALLOC_NO_CACHED; + bool allowed_mem_checked = false, hiwmk_checked = false; +@@ -605,7 +620,23 @@ struct scatterlist *sgv_pool_alloc(struc + if (obj->sg_count != 0) { + TRACE_MEM("Cached sgv_obj %p", obj); + EXTRACHECKS_BUG_ON(obj->order_or_pages != order); +- atomic_inc(&pool->cache_acc[order].hit_alloc); ++ ++ if (unlikely(max_sg_count < obj->sg_count)) { ++ TRACE_MEM("Too many SG entries %d (max %d)", ++ obj->sg_count, max_sg_count); ++ ++ sgv_pool_cached_put(obj); ++ ++ obj = sgv_pool_cached_create(pool, order, ++ gfp_mask, false); ++ if (obj == NULL) { ++ TRACE(TRACE_OUT_OF_MEM, "Allocation of " ++ "sgv_pool_obj failed (size %d)", ++ size); ++ goto out_fail; ++ } ++ } else ++ atomic_inc(&pool->cache_acc[order].hit_alloc); + goto success; + } + +@@ -682,15 +713,27 @@ struct scatterlist *sgv_pool_alloc(struc + TRACE_MEM("Big or no_cached sgv_obj %p (size %d)", obj, sz); + } + +- obj->sg_count = scst_alloc_sg_entries(obj->sg_entries, +- pages_to_alloc, gfp_mask, pool->clustered, obj->trans_tbl, +- &pool->alloc_fns, priv); +- if (unlikely(obj->sg_count <= 0)) { +- obj->sg_count = 0; +- if ((flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL) && cache) +- goto out_return1; +- else +- goto out_fail_free_sg_entries; ++ alloc_order = 0; ++ while (1) { ++ obj->sg_count = scst_alloc_sg_entries(obj->sg_entries, ++ pages_to_alloc, gfp_mask, pool->clustered, ++ obj->trans_tbl, &pool->alloc_fns, priv, alloc_order); ++ if (unlikely(obj->sg_count <= 0)) { ++ obj->sg_count = 0; ++ if ((flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL) && cache) ++ goto out_return1; ++ else ++ goto out_fail_free_sg_entries; ++ } ++ obj->alloc_order = alloc_order; ++ ++ if (max_sg_count >= obj->sg_count) ++ break; ++ ++ obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries, ++ obj->sg_count, obj->allocator_priv, ++ obj->alloc_order); ++ alloc_order++; + } + + if (cache) { +@@ -815,7 +858,7 @@ void sgv_pool_free(struct sgv_pool_obj * + sgv_pool_cached_put(sgv); + } else { + sgv->owner_pool->alloc_fns.free_pages_fn(sgv->sg_entries, +- sgv->sg_count, sgv->allocator_priv); ++ sgv->sg_count, sgv->allocator_priv, sgv->alloc_order); + pages = (sgv->sg_count != 0) ? -sgv->order_or_pages : 0; + kfree(sgv); + sgv_pool_hiwmk_uncheck(pages); +@@ -861,7 +904,7 @@ struct scatterlist *scst_alloc(int size, + * So, always don't use clustering. + */ + *count = scst_alloc_sg_entries(res, pages, gfp_mask, 0, NULL, +- &sys_alloc_fns, NULL); ++ &sys_alloc_fns, NULL, 0); + if (*count <= 0) + goto out_free; + +@@ -888,7 +931,7 @@ void scst_free(struct scatterlist *sg, i + + sgv_pool_hiwmk_uncheck(count); + +- scst_free_sys_sg_entries(sg, count, NULL); ++ scst_free_sys_sg_entries(sg, count, NULL, 0); + kfree(sg); + return; + } +@@ -1060,8 +1103,8 @@ void sgv_pool_deinit(struct sgv_pool *po + } + + void sgv_pool_set_allocator(struct sgv_pool *pool, +- struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *), +- void (*free_pages_fn)(struct scatterlist *, int, void *)) ++ struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *, int), ++ void (*free_pages_fn)(struct scatterlist *, int, void *, int)) + { + pool->alloc_fns.alloc_pages_fn = alloc_pages_fn; + pool->alloc_fns.free_pages_fn = free_pages_fn; +Index: scst/src/dev_handlers/scst_user.c +=================================================================== +--- scst/src/dev_handlers/scst_user.c (revision 559) ++++ scst/src/dev_handlers/scst_user.c (working copy) +@@ -168,9 +168,9 @@ static int dev_user_disk_done(struct scs + static int dev_user_tape_done(struct scst_cmd *cmd); + + static struct page *dev_user_alloc_pages(struct scatterlist *sg, +- gfp_t gfp_mask, void *priv); ++ gfp_t gfp_mask, void *priv, int alloc_order); + static void dev_user_free_sg_entries(struct scatterlist *sg, int sg_count, +- void *priv); ++ void *priv, int alloc_order); + + static void dev_user_add_to_ready(struct scst_user_cmd *ucmd); + +@@ -368,7 +368,7 @@ static void dev_user_free_ucmd(struct sc + } + + static struct page *dev_user_alloc_pages(struct scatterlist *sg, +- gfp_t gfp_mask, void *priv) ++ gfp_t gfp_mask, void *priv, int alloc_order) + { + struct scst_user_cmd *ucmd = (struct scst_user_cmd *)priv; + int offset = 0; +@@ -377,8 +377,11 @@ static struct page *dev_user_alloc_pages + + /* *sg supposed to be zeroed */ + +- TRACE_MEM("ucmd %p, ubuff %lx, ucmd->cur_data_page %d", ucmd, +- ucmd->ubuff, ucmd->cur_data_page); ++ TRACE_MEM("ucmd %p, ubuff %lx, ucmd->cur_data_page %d, alloc_order %d", ++ ucmd, ucmd->ubuff, ucmd->cur_data_page, alloc_order); ++ ++ if (unlikely(alloc_order != 0)) ++ goto out; + + if (ucmd->cur_data_page == 0) { + TRACE_MEM("ucmd->first_page_offset %d", +@@ -468,7 +471,7 @@ static void __dev_user_free_sg_entries(s + } + + static void dev_user_free_sg_entries(struct scatterlist *sg, int sg_count, +- void *priv) ++ void *priv, int alloc_order) + { + struct scst_user_cmd *ucmd = (struct scst_user_cmd *)priv; + +@@ -537,7 +540,8 @@ static int dev_user_alloc_sg(struct scst + ucmd->buff_cached = cached_buff; + + cmd->sg = sgv_pool_alloc(dev->pool, bufflen, gfp_mask, flags, +- &cmd->sg_cnt, &ucmd->sgv, &dev->udev_mem_lim, ucmd); ++ &cmd->sg_cnt, &ucmd->sgv, &dev->udev_mem_lim, ucmd, ++ cmd->tgt_dev->max_sg_cnt); + if (cmd->sg != NULL) { + struct scst_user_cmd *buf_ucmd = + (struct scst_user_cmd *)sgv_get_priv(ucmd->sgv); +@@ -559,21 +563,7 @@ static int dev_user_alloc_sg(struct scst + "last_len %d, l %d)", ucmd, cached_buff, ucmd->ubuff, + last_len, cmd->sg[cmd->sg_cnt-1].length); + +- if (unlikely(cmd->sg_cnt > cmd->tgt_dev->max_sg_cnt)) { +- static int ll; +- if (ll < 10) { +- PRINT_INFO("Unable to complete command due to " +- "SG IO count limitation (requested %d, " +- "available %d, tgt lim %d)", +- cmd->sg_cnt, +- cmd->tgt_dev->max_sg_cnt, +- cmd->tgt->sg_tablesize); +- ll++; +- } +- cmd->sg = NULL; +- /* sgv will be freed in dev_user_free_sgv() */ +- res = -1; +- } ++ EXTRACHECKS_BUG_ON(cmd->sg_cnt > cmd->tgt_dev->max_sg_cnt); + } else { + TRACE_MEM("Buf not alloced (ucmd %p, h %d, buff_cached, %d, " + "sg_cnt %d, ubuff %lx, sgv %p", ucmd, ucmd->h,