From 94e0d33e4faed36eb32b5d2a8b07d380095290a7 Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Sat, 20 Aug 2016 02:05:18 +0000 Subject: [PATCH] Merged revisions 6930-6933,6960-6961 via svnmerge from svn://svn.code.sf.net/p/scst/svn/trunk ........ r6930 | vlnb | 2016-08-02 19:49:31 -0700 (Tue, 02 Aug 2016) | 3 lines scst_vdisk: don't report unmap alignment, if it is equal to unmap granularity ........ r6931 | bvassche | 2016-08-08 13:52:00 -0700 (Mon, 08 Aug 2016) | 1 line scst/README.dlm: Update startup sequence information ........ r6932 | bvassche | 2016-08-09 13:18:03 -0700 (Tue, 09 Aug 2016) | 5 lines srpt/Testing.txt: Leave out --norandommap --verify and --norandommap are mutually exclusive. Hence leave out --norandommap. ........ r6933 | bvassche | 2016-08-11 11:36:48 -0700 (Thu, 11 Aug 2016) | 1 line scst/README.dlm: Correct instructions for waiting for logout ........ r6960 | vlnb | 2016-08-19 18:24:03 -0700 (Fri, 19 Aug 2016) | 3 lines scst: fix compilation on older kernels ........ r6961 | vlnb | 2016-08-19 18:33:47 -0700 (Fri, 19 Aug 2016) | 5 lines scst README: add section about VMware and Ceph RBD space reclaim ........ git-svn-id: http://svn.code.sf.net/p/scst/svn/branches/3.2.x@6964 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- scst/README | 14 ++++++++++++++ scst/README.dlm | 19 ++++++++++++++----- scst/README_in-tree | 14 ++++++++++++++ scst/include/backport.h | 15 +++++++++++++++ scst/src/dev_handlers/scst_vdisk.c | 2 ++ scst/src/scst_main.c | 2 ++ scst/src/scst_priv.h | 2 ++ scst/src/scst_sysfs.c | 6 ++++++ scst/src/scst_targ.c | 2 ++ srpt/Testing.txt | 4 ++-- 10 files changed, 73 insertions(+), 7 deletions(-) diff --git a/scst/README b/scst/README index bd37eeb11..38ab73d5c 100644 --- a/scst/README +++ b/scst/README @@ -1952,6 +1952,20 @@ context switch is natural for such potentially long operation as EXTENDED COPY. +VMware and Ceph RBD space reclaim +--------------------------------- + +VMware with VMFS5 filesystem ignores UNMAP alignment, so if you use 4MB +Ceph RBD objects and VMFS5, only some discards will reclaim RBD space +due to 1MB discard not often hitting the tail of objects. + +Thus, to have efficient ESXi space reclamation with RBD and VMFS5, you are +recommended to use 1 MB object size in Ceph. + +See https://sourceforge.net/p/scst/mailman/message/35287598 thread for +details. + + Caching ------- diff --git a/scst/README.dlm b/scst/README.dlm index 56597a149..7ca663c0f 100644 --- a/scst/README.dlm +++ b/scst/README.dlm @@ -85,8 +85,9 @@ Startup and Shutdown -------------------- The startup sequence is as follows: -* Load and configure SCST with cluster_mode = 0 and with all target ports - disabled. +* Load the DLM kernel module. If not loaded explicitly, "modprobe scst" will + load the DLM kernel module implicitly. +* Load and configure SCST with all target ports disabled. * Enable cluster mode for all SCST devices that can be accessed through more than one cluster node: for x in /sys/kernel/scst_tgt/handlers/*/*/; do @@ -103,10 +104,17 @@ The startup sequence is as follows: crmadmin -S "$dc" 2>/dev/null | sed 's/^Status of crmd@[^[:blank:]]*:[[:blank:]]\([^[:blank:]]*\).*/\1/' } - for ((i=0;i<300;i++)); do - [ "$(pacemaker_dc_status)" = "S_IDLE" ] && break + timeout=300 + for ((i=0;i $x & done wait - while ls -Ad /sys/kernel/scst_tgt/targets/*/*/sessions/* >/dev/null 2>&1; do + while ls -Ad /sys/kernel/scst_tgt/targets/*/*/sessions/* 2>&1 | + grep -vE '/sys/kernel/scst_tgt/targets/(copy_manager|scst_local)/'; do sleep 1 done * Tell SCST to release the DLM lockspaces: diff --git a/scst/README_in-tree b/scst/README_in-tree index 78e94401d..65053019d 100644 --- a/scst/README_in-tree +++ b/scst/README_in-tree @@ -1805,6 +1805,20 @@ context switch is natural for such potentially long operation as EXTENDED COPY. +VMware and Ceph RBD space reclaim +--------------------------------- + +VMware with VMFS5 filesystem ignores UNMAP alignment, so if you use 4MB +Ceph RBD objects and VMFS5, only some discards will reclaim RBD space +due to 1MB discard not often hitting the tail of objects. + +Thus, to have efficient ESXi space reclamation with RBD and VMFS5, you are +recommended to use 1 MB object size in Ceph. + +See https://sourceforge.net/p/scst/mailman/message/35287598 thread for +details. + + Caching ------- diff --git a/scst/include/backport.h b/scst/include/backport.h index c7c923eae..eec927283 100644 --- a/scst/include/backport.h +++ b/scst/include/backport.h @@ -678,4 +678,19 @@ static inline int scsi_bidi_cmnd(struct scsi_cmnd *cmd) } #endif +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)) +/** + * ktime_before - Compare if a ktime_t value is smaller than another one. + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Return: true if cmp1 happened before cmp2. + */ +static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) +{ + return ktime_compare(cmp1, cmp2) < 0; +} +#endif /* (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) && (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)) */ + #endif /* _SCST_BACKPORT_H_ */ diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index c18a0ad6f..a91ce88ce 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -1062,6 +1062,8 @@ check: (defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 6) virt_dev->unmap_opt_gran = q->limits.discard_granularity >> block_shift; virt_dev->unmap_align = q->limits.discard_alignment >> block_shift; + if (virt_dev->unmap_opt_gran == virt_dev->unmap_align) + virt_dev->unmap_align = 0; virt_dev->unmap_max_lba_cnt = q->limits.max_discard_sectors >> (block_shift - 9); virt_dev->discard_zeroes_data = q->limits.discard_zeroes_data; #else diff --git a/scst/src/scst_main.c b/scst/src/scst_main.c index 0ee18e6be..8c569d274 100644 --- a/scst/src/scst_main.c +++ b/scst/src/scst_main.c @@ -121,7 +121,9 @@ unsigned long scst_trace_flag; unsigned long scst_flags; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) unsigned long scst_poll_ns = SCST_DEF_POLL_NS; +#endif int scst_max_tasklet_cmd = SCST_DEF_MAX_TASKLET_CMD; diff --git a/scst/src/scst_priv.h b/scst/src/scst_priv.h index b3bbc5471..9c5638ba3 100644 --- a/scst/src/scst_priv.h +++ b/scst/src/scst_priv.h @@ -183,8 +183,10 @@ extern unsigned int scst_setup_id; #define SCST_DEF_MAX_TASKLET_CMD 10 extern int scst_max_tasklet_cmd; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) #define SCST_DEF_POLL_NS 0 extern unsigned long scst_poll_ns; +#endif extern spinlock_t scst_init_lock; extern struct list_head scst_init_cmd_list; diff --git a/scst/src/scst_sysfs.c b/scst/src/scst_sysfs.c index 1cb3a59de..7e2985b45 100644 --- a/scst/src/scst_sysfs.c +++ b/scst/src/scst_sysfs.c @@ -7077,6 +7077,8 @@ static struct kobj_attribute scst_max_tasklet_cmd_attr = __ATTR(max_tasklet_cmd, S_IRUGO | S_IWUSR, scst_max_tasklet_cmd_show, scst_max_tasklet_cmd_store); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) + static ssize_t scst_poll_us_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -7124,6 +7126,8 @@ static struct kobj_attribute scst_poll_us_attr = __ATTR(poll_us, S_IRUGO | S_IWUSR, scst_poll_us_show, scst_poll_us_store); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */ + static ssize_t scst_suspend_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -7398,7 +7402,9 @@ static struct attribute *scst_sysfs_root_default_attrs[] = { &scst_threads_attr.attr, &scst_setup_id_attr.attr, &scst_max_tasklet_cmd_attr.attr, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) &scst_poll_us_attr.attr, +#endif &scst_suspend_attr.attr, #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) &scst_main_trace_level_attr.attr, diff --git a/scst/src/scst_targ.c b/scst/src/scst_targ.c index 507c71b62..cb1d06939 100644 --- a/scst/src/scst_targ.c +++ b/scst/src/scst_targ.c @@ -5620,6 +5620,7 @@ again: thr_locked = false; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) if (scst_poll_ns > 0) { struct timespec ts; ktime_t end, kt; @@ -5652,6 +5653,7 @@ again: } go: +#endif spin_lock_irq(&p_cmd_threads->cmd_list_lock); spin_lock(&thr->thr_cmd_list_lock); } diff --git a/srpt/Testing.txt b/srpt/Testing.txt index cfd6d09f9..86dfe40ac 100644 --- a/srpt/Testing.txt +++ b/srpt/Testing.txt @@ -37,8 +37,8 @@ At least the following tests must be run before releasing a new SRPT version: rm -rf /mnt/test* && \ fio --verify=md5 -rw=randwrite --size=10m --bs=4k \ --loops=1000000 --iodepth=64 --group_reporting --sync=1 --direct=1 \ - --norandommap --ioengine=aio --directory=/mnt --name=test --thread \ - --numjobs=80 --runtime=30 && \ + --ioengine=aio --directory=/mnt --name=test --thread --numjobs=80 \ + --runtime=30 && \ fsck -N $dev do true