diff --git a/iscsi-scst/Makefile b/iscsi-scst/Makefile index 28df16de5..f5f5b2a04 100644 --- a/iscsi-scst/Makefile +++ b/iscsi-scst/Makefile @@ -124,6 +124,8 @@ install: all modules_install $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(ISERTMOD) \ modules_install + $(MAKE) -C $(KDIR) SCST_INC_DIR=$(SCST_INC_DIR) SUBDIRS=$(ISERTMOD) \ + modules_install uninstall: rm -f $(DESTDIR)$(SBINDIR)/iscsi-scstd \ diff --git a/iscsi-scst/README.iser b/iscsi-scst/README.iser new file mode 100644 index 000000000..819b1b574 --- /dev/null +++ b/iscsi-scst/README.iser @@ -0,0 +1,108 @@ +iSCSI extensions for RDMA driver: +================================== + +Installation & Configuration: +--------------------------- +For installation and configuration, see iscsi README. +There are no specific configuration options for iSER. +See below for performance optimizations as well as troubleshooting. + + +Performance considerations: +--------------------------- + +In order to achieve better performance, it is recommended to specify +"QueuedCommands 128" parameter per iSER target, since the transport +is very fast and you usually want to connect it to fast backstorage. + + +Troubleshooting: +----------------- +* Initiator fails to connect to target. The following message is seen in dmesg: + Failed to accept conn request, err: -22 + The cause of this is often compilation issues if you have OFED or MLNX_OFED installed: + If you are compiling for OFED/MLNX_OFED, make sure OFED is installed for + the kernel you are running. Also, make sure you followed ALL steps described + in README.iser_ofed including patching the kernel. + If you are compiling for non-OFED kernel, make sure you don't have + OFED/MLNX_OFED installed. + + +* Discovery of iSER targets takes a long time or login to all discovered targets fails. + iSCSI discovery does not have a way to determine between iSCSI and iSER + enabled portals. Thus, initiator tries to connect to all interfaces it + discovered (by default discovery is done over iSCSI TCP). + In order to prevent this behaviour, you should specify + "allowed_portal " parameter for each target you want + to export through specific RDMA capable adapters. + + +* Initiator keeps connecting and disconnecting from target in a loop + with constant interval after target reboot. + The problem may be that connection requests from initiator are received + on wrong port/HCA. This can be one due to one (or both) of the following issues: + 1) net.ipv4.conf.all.arp_ignore sysclt is not set to 2 + rdma-cm relies on ARP responses being received on the same interface + that sent the request. Linux default does not do that. + In order to make Linux behave good for rdma-cm, you _MUST_ add + "net.ipv4.conf.all.arp_ignore = 2" to /etc/sysctl.conf + 2) You have more than 1 HCA and PCI mappings to netdev devices is not + persistent between reboots. Possible solution is to have udev rules + for mapping the ibX devices in persistent way. + See below for udev scripts example: + +/lib/udev/net.sh +------------------- +#!/bin/sh + +. /etc/sysconfig/net.conf + +type_fd="/sys/${DEVPATH}/type" +if [ ! -f $type_fd ]; then + exit +fi +type=`cat /sys/${DEVPATH}/type` + +if [ "$type" = "32" ]; then # IPoIB interface + i=0 + CONFDEV="DEV${i}" + CONFPCI=${!CONFDEV} + PCI=`basename $PHYSDEVPATH` + while [ -n "$CONFPCI" ]; do + if [ "$CONFPCI" = "$PCI" ]; then + devid=$(printf "%d\n" `cat /sys/$DEVPATH/dev_id`) + let id=$i*2+$devid + DEV="ib$id" + echo "$DEV" + exit + fi + let i=i+1 + CONFDEV="DEV$i" + CONFPCI=${!CONFDEV} + done +fi + +/etc/sysconfig/net.conf +----------------------- +DEV0="0000:01:00.0" +DEV1="0000:02:00.0" + +/etc/udev/rules.d/90-network.rules +------------------------------------- +ACTION=="add", SUBSYSTEM=="net", PROGRAM="/lib/udev/net.sh", RESULT=="?*", NAME="$result" + + +* Login to all targets from initiator sometimes times out. + It may be a network problem (try running tools like ibdiagnet + and rping between target and initiator hosts). The description of those tools + is beyond the scope of this readme. + Another issue may be that you failed to set net.ipv4.conf.all.arp_ignore sysctl + to the value of 2 (see above problem for more detailed explanation). + + +* When running IO, latency is getting higher and higher all the time. + If you have enabled intel_iommu either in kernel command line or in + kernel config (it may be enabled by default), you should specify + iommu=pt on kernel command line to avoid the latency issue. + + diff --git a/iscsi-scst/README.iser_ofed b/iscsi-scst/README.iser_ofed new file mode 100644 index 000000000..dd2b1d563 --- /dev/null +++ b/iscsi-scst/README.iser_ofed @@ -0,0 +1,125 @@ +iSCSI Extensins for RDMA (iSER) Target driver for Linux +================================================= + +Introduction +------------ + +The iSER target driver has been designed to work on top of the Linux +InfiniBand kernel drivers. While all recent Linux distributions +include recent versions of the InfiniBand drivers, the only way to +obtain the latest available InfiniBand drivers is by installing the +OFED or MLNX_OFED (for Mellanox drivers) software stack. + +The OFED stack is distributed by the OpenFabrics Alliance (OFA). The +mission of the OpenFabrics Alliance is to is to develop, distribute +and promote a unified, transport-independent, open-source software +stack for RDMA-capable fabrics and networks, including InfiniBand and +Ethernet. + +The MLNX_OFED is distributed by Mellanox and can be obtained from +http://www.mellanox.com/page/products_dyn?product_family=26 + +Note: because during OFED installation the distro-provided InfiniBand +kernel drivers are replaced, doing so voids the support contract +offered by your Linux distributor. + +Please follow the instructions below carefully. Skipping a step may +result in kernel modules that fail to load, a kernel oops or even a +system that does no longer boot. + + +Verifying the kernel version +---------------------------- + +Before installing the OFED distribution, it is very important to check +the OFED release notes. Each OFED distribution has been tested +carefully, but only against the kernel versions specified in +docs/OFED_release_notes.txt (you can find this document in the OFED +distribution). Make sure that you are using a supported kernel / OFED +combination. As an example, if you want to use OFED 1.5.1 on an Ubuntu +system, you will have to start with replacing the Ubuntu kernel by a +kernel from kernel.org since OFED 1.5.1 has not been tested on any +Ubuntu kernel. + + +Compiling iSER against OFED +-------------------------- + +Make sure that all necessary packages needed for kernel compilation +have been installed (kernel headers, gcc, binutils, ...). + +Unload any loaded InfiniBand drivers: + + /etc/init.d/opensmd stop + /etc/init.d/openibd stop + +Remove any distro-provided InfiniBand drivers: + + rm -rf /lib/modules/$(uname -r)/kernel/drivers/infiniband + rm -rf /lib/modules/$(uname -r)/kernel/drivers/net/mlx4 + +Now locate the file Makefile.lib and patch it such that it supports +the variable PRE_CFLAGS: + + if [ -e /lib/modules/$(uname -r)/build/scripts/Makefile.lib ]; then + cd /lib/modules/$(uname -r)/build + else + cd /usr/src/linux-$(uname -r) + fi + patch -p1 < ${SCST_DIR}/srpt/patches/kernel-${KV}-pre-cflags.patch + +Next, download and install an OFED pacakge. + +For MLNX_OFED, just run the mlnxofedinstall script inside the MLNX_OFED directory. + +NOTE TO ADVANCED USERS: +------------------------ +If you are installing MLNX_OFED by manually selecting which RPMs/DEBs to install, +make sure ofed_scripts package is one of them, since it is required for correct OFED +version detection by iscsi-scst makefile. + + +For the OFED package.Make sure to enable +at least the kernel-ib and kernel-ib-devel packages (compat-rdma and compat-rdma-devel for OFED 3.5 and above). +An example: + + wget http://www.openfabrics.org/downloads/OFED/ofed-1.5.1/OFED-1.5.1.tgz + tar xzf OFED-1.5.1.tgz + cd OFED-1.5.1 + cat <ofed.conf + libibverbs=y + libibverbs-utils=y + libmthca=y + libmlx4=y + libcxgb3=y + libnes=y + libipathverbs=y + librdmacm=y + librdmacm-utils=y + mstflint=y + ofed-docs=y + ofed-scripts=y + kernel-ib=y + kernel-ib-devel=y + ibvexdmtools=y + qlgc_vnic_daemon=y + core=y + mthca=y + mlx4=y + mlx4_en=y + cxgb3=y + nes=y + ipath=y + ipoib=y + opensm=y + opensm-libs=y + srpt=n + srptools=y + perftest=y + EOF + ./install.pl -c ofed.conf + +Now continue with the installation instructions you can find in the +ISCSI-SCST README file. The Makefile included with ISCSI-SCST detects +whether OFED has been installed, and if so, compiles ISCSIS-SCST with +the OFED kernel headers instead of with the regular kernel headers. diff --git a/iscsi-scst/include/iscsit_transport.h b/iscsi-scst/include/iscsit_transport.h new file mode 100644 index 000000000..6cbcedbcc --- /dev/null +++ b/iscsi-scst/include/iscsit_transport.h @@ -0,0 +1,71 @@ + +#ifndef __ISCSI_TRANSPORT_H__ +#define __ISCSI_TRANSPORT_H__ + +#include +#include + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include +#endif + +/* Forward declarations */ +struct iscsi_session; +struct iscsi_kern_conn_info; +struct iscsi_conn; + +enum iscsit_transport_type { + ISCSI_TCP, + ISCSI_RDMA, +}; + +struct iscsit_transport { + struct iscsi_cmnd* (*iscsit_alloc_cmd)(struct iscsi_conn *conn, + struct iscsi_cmnd *parent); + void (*iscsit_preprocessing_done)(struct iscsi_cmnd *cmnd); + void (*iscsit_send_data_rsp)(struct iscsi_cmnd *req, u8 *sense, + int sense_len, u8 status, + int send_status); + int (*iscsit_send_locally)(struct iscsi_cmnd *cmnd, + unsigned int cmd_count); + void (*iscsit_set_sense_data)(struct iscsi_cmnd *rsp, + const u8 *sense_buf, int sense_len); + int (*iscsit_receive_cmnd_data)(struct iscsi_cmnd *cmnd); + void (*iscsit_make_conn_wr_active)(struct iscsi_conn *conn); + void (*iscsit_free_cmd)(struct iscsi_cmnd *cmnd); + + void (*iscsit_set_req_data)(struct iscsi_cmnd *req, + struct iscsi_cmnd *rsp); + + int (*iscsit_conn_alloc)(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *transport); + int (*iscsit_conn_activate)(struct iscsi_conn *conn); + void (*iscsit_conn_free)(struct iscsi_conn *conn); + void (*iscsit_conn_close)(struct iscsi_conn *conn, int flags); + void (*iscsit_mark_conn_closed)(struct iscsi_conn *conn, int flags); + + ssize_t (*iscsit_get_initiator_ip)(struct iscsi_conn *conn, char *buf, + int size); + + void (*iscsit_close_all_portals)(void); + +#if !defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) + unsigned int need_alloc_write_buf:1; +#endif + + struct module *owner; + const char name[SCST_MAX_NAME]; + enum iscsit_transport_type transport_type; + struct list_head transport_list_entry; +} ____cacheline_aligned; + +extern int iscsit_register_transport(struct iscsit_transport *t); +extern void iscsit_unregister_transport(struct iscsit_transport *t); +extern struct iscsit_transport *iscsit_get_transport(enum iscsit_transport_type type); + +#endif /* __ISCSI_TRANSPORT_H__ */ + diff --git a/iscsi-scst/include/isert_scst.h b/iscsi-scst/include/isert_scst.h new file mode 100644 index 000000000..1477cfdfd --- /dev/null +++ b/iscsi-scst/include/isert_scst.h @@ -0,0 +1,24 @@ +#ifndef _ISERT_SCST_U_H +#define _ISERT_SCST_U_H + +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#endif + +struct isert_addr_info { + struct sockaddr_storage addr; + size_t addr_len; +}; + +#define ISERT_MAX_PORTALS 32 + +#define SET_LISTEN_ADDR _IOW('y', 0, struct isert_addr_info) +#define RDMA_CORK _IOW('y', 1, int) +#define GET_PORTAL_ADDR _IOW('y', 2, struct isert_addr_info) +#define DISCOVERY_SESSION _IOW('y', 3, int) + +#endif diff --git a/iscsi-scst/kernel/iscsit_transport.c b/iscsi-scst/kernel/iscsit_transport.c new file mode 100644 index 000000000..d913e3889 --- /dev/null +++ b/iscsi-scst/kernel/iscsit_transport.c @@ -0,0 +1,64 @@ + +#include +#include "iscsit_transport.h" +#include "iscsi.h" + +static LIST_HEAD(transport_list); +static DEFINE_MUTEX(transport_mutex); + +static struct iscsit_transport *__iscsit_get_transport(enum iscsit_transport_type type) +{ + struct iscsit_transport *t; + + list_for_each_entry(t, &transport_list, transport_list_entry) { + if (t->transport_type == type) + return t; + } + + return NULL; +} + +struct iscsit_transport *iscsit_get_transport(enum iscsit_transport_type type) +{ + struct iscsit_transport *t; + + mutex_lock(&transport_mutex); + t = __iscsit_get_transport(type); + mutex_unlock(&transport_mutex); + + return t; +} + +int iscsit_register_transport(struct iscsit_transport *t) +{ + struct iscsit_transport *tmp; + int ret = 0; + + INIT_LIST_HEAD(&t->transport_list_entry); + + mutex_lock(&transport_mutex); + tmp = __iscsit_get_transport(t->transport_type); + if (tmp) { + PRINT_ERROR("Unable to register transport type %d - Already registered\n", + t->transport_type); + ret = -EEXIST; + } else { + list_add_tail(&t->transport_list_entry, &transport_list); + PRINT_INFO("Registered iSCSI transport: %s\n", t->name); + } + mutex_unlock(&transport_mutex); + + return ret; +} +EXPORT_SYMBOL(iscsit_register_transport); + +void iscsit_unregister_transport(struct iscsit_transport *t) +{ + mutex_lock(&transport_mutex); + list_del(&t->transport_list_entry); + mutex_unlock(&transport_mutex); + + PRINT_INFO("Unregistered iSCSI transport: %s\n", t->name); +} +EXPORT_SYMBOL(iscsit_unregister_transport); + diff --git a/iscsi-scst/kernel/isert-scst/Kconfig b/iscsi-scst/kernel/isert-scst/Kconfig new file mode 100644 index 000000000..99ff7a97f --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Kconfig @@ -0,0 +1,8 @@ +config SCST_ISER + tristate "ISCSI Target" + depends on SCST && SCST_ISCSI + default SCST + help + ISER target driver for SCST framework. The iSCSI iSER extension + has been defined in RFC 5046. + diff --git a/iscsi-scst/kernel/isert-scst/Makefile b/iscsi-scst/kernel/isert-scst/Makefile new file mode 100644 index 000000000..5c3cebda8 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Makefile @@ -0,0 +1,39 @@ +# +# Makefile for the kernel part of iSER-SCST. +# +# Copyright (C) 2007 - 2014 Vladislav Bolkhovitin +# Copyright (C) 2007 - 2014 Fusion-io, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, version 2 +# of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Note! Dependencies are done automatically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile. + +cc-option = $(shell if $(CC) $(CFLAGS) $(1) -S -o /dev/null -xc /dev/null \ + > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) +enable-Wextra = $(shell uname_r="$$(uname -r)"; if [ "$${uname_r%.el5}" = "$${uname_r}" ]; then echo "$(1)"; fi) + +EXTRA_CFLAGS += -I$(src)/../../include -I$(src)/../ -I$(SCST_INC_DIR) +EXTRA_CFLAGS += $(call enable-Wextra,-Wextra \ + $(call cc-option,-Wno-old-style-declaration) \ + -Wno-unused-parameter -Wno-missing-field-initializers) + +EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS +#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING +EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -fno-inline -fno-inline-functions + +obj-m += isert-scst.o +isert-scst-objs := isert.o isert_login.o \ + iser_datamover.o iser_rdma.o iser_buf.o iser_pdu.o iser_global.o + diff --git a/iscsi-scst/kernel/isert-scst/Makefile.in-kernel b/iscsi-scst/kernel/isert-scst/Makefile.in-kernel new file mode 100644 index 000000000..e65970072 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/Makefile.in-kernel @@ -0,0 +1,4 @@ +isert-scst-y := isert.o isert_login.o \ + iser_datamover.o iser_rdma.o iser_buf.o iser_pdu.o iser_global.o + +obj-$(CONFIG_SCST_ISER) += isert-scst.o diff --git a/iscsi-scst/kernel/isert-scst/TODO b/iscsi-scst/kernel/isert-scst/TODO new file mode 100644 index 000000000..479c6b76b --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/TODO @@ -0,0 +1,9 @@ +* Add suppport for immediate data in iSER +* Add suppport for data-out in iSER +* Look into allocating wr and sg entries dynamically from kmem_cache instead of embedding them into iser_cmnd +* Look into seperating between RX pdu and TX pdu +* Do not signal every "response sent" notification +* Make the code NUMA aware +* Add support for AHS +* Add support for bidi commands + diff --git a/iscsi-scst/kernel/isert-scst/iser.h b/iscsi-scst/kernel/isert-scst/iser.h new file mode 100644 index 000000000..8c1883c39 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser.h @@ -0,0 +1,313 @@ +#ifndef __ISER_H__ +#define __ISER_H__ + +#include +#include +#include +#include +#include + +#include "iser_hdr.h" + +struct isert_portal { + struct rdma_cm_id *cm_id; + struct sockaddr_storage addr; + struct list_head list_node; /* in portals list */ + /* protected by dev_list_mutex */ + struct list_head conn_list; /* head of conns list */ +}; + +struct isert_buf { + int sg_cnt ____cacheline_aligned; + struct scatterlist *sg; + u8 *addr; + size_t size; + enum dma_data_direction dma_dir; + unsigned int is_alloced:1; + unsigned int is_pgalloced:1; + unsigned int is_malloced:1; +}; + +enum isert_wr_op { + ISER_WR_RECV, + ISER_WR_SEND, + ISER_WR_RDMA_WRITE, + ISER_WR_RDMA_READ, +}; + +struct isert_device; +struct isert_connection; + +struct isert_wr { + enum isert_wr_op wr_op; + struct isert_buf *buf; + + struct isert_connection *conn; + struct isert_cmnd *pdu; + + struct isert_device *isert_dev; + + struct ib_sge *sge_list; + union { + struct ib_recv_wr recv_wr; + struct ib_send_wr send_wr; + }; +} ____cacheline_aligned; + +#define ISER_MAX_SGE 128 +#define ISER_MAX_RDMAS 5 + +#define ISER_SQ_SIZE 128 +#define ISER_MAX_WCE 2048 + +struct isert_cmnd { + struct iscsi_cmnd iscsi ____cacheline_aligned; + + struct isert_buf buf; + struct isert_buf rdma_buf; + struct isert_wr wr[ISER_MAX_RDMAS]; + struct ib_sge sg_pool[ISER_MAX_SGE]; + + struct isert_hdr *isert_hdr ____cacheline_aligned; + struct iscsi_hdr *bhs; + void *ahs; + void *data; + + u8 isert_opcode; + u8 iscsi_opcode; + u8 is_rstag_valid; + u8 is_wstag_valid; + + u32 rem_write_stag; /* write rkey */ + u64 rem_write_va; + u32 rem_read_stag; /* read rkey */ + u64 rem_read_va; + + int is_fake_rx; + struct list_head pool_node; /* pool list */ +}; + +enum isert_conn_state { + ISER_CONN_INIT = 0, + ISER_CONN_HANDSHAKE, + ISER_CONN_ACTIVE, + ISER_CONN_CLOSING, +}; + +struct isert_cq { + struct ib_cq *cq ____cacheline_aligned; + struct ib_wc wc[ISER_SQ_SIZE]; + struct isert_device *dev; + struct workqueue_struct *cq_workqueue; + struct work_struct cq_comp_work; + int idx; +}; + +#define ISERT_CONNECTION_ABORTED 0 + +struct isert_connection { + struct iscsi_conn iscsi ____cacheline_aligned; + + int repost_threshold ____cacheline_aligned; + /* access to the following 3 fields is guarded by post_recv_lock */ + int to_post_recv; + struct isert_wr *post_recv_first; + struct isert_wr *post_recv_curr; + + spinlock_t post_recv_lock; + + + spinlock_t tx_lock ____cacheline_aligned; + + /* Following two protected by tx_lock */ + struct list_head tx_free_list; + struct list_head tx_busy_list; + + struct rdma_cm_id *cm_id; + struct isert_device *isert_dev; + struct ib_qp *qp; + struct isert_cq *cq_desc; + + enum isert_conn_state state; + + u32 responder_resources; + u32 initiator_depth; + u32 max_sge; + + /* + * Unprotected. Accessed only before login response is sent and when + * freeing connection + */ + struct list_head rx_buf_list; + + struct isert_cmnd *login_req_pdu; + struct isert_cmnd *login_rsp_pdu; + struct isert_wr *saved_wr; + + int queue_depth; + int immediate_data; + unsigned int target_recv_data_length; + int initiator_recv_data_length; + int initial_r2t; + unsigned int first_burst_length; + struct sockaddr_storage peer_addr; + size_t peer_addrsz; + struct sockaddr_storage self_addr; + + struct list_head dev_node; + struct list_head portal_node; + + unsigned long flags; + struct work_struct close_work; + struct kref kref; + + void *priv_data; /* for connection tracking */ +}; + +struct isert_device { + struct ib_device *ib_dev; + struct ib_pd *pd; + struct ib_mr *mr; + + struct list_head devs_node; + /* conn_list and refcnt protected by dev_list_mutex */ + struct list_head conn_list; + int refcnt; + struct ib_device_attr device_attr; + + int num_cqs; + int *cq_qps; + struct isert_cq *cq_desc; +}; + +struct isert_global { + spinlock_t portal_lock; + /* protected by portal_lock */ + struct list_head portal_list; + /* protected by dev_list_mutex */ + struct list_head dev_list; + struct workqueue_struct *conn_wq; +}; + +#define _ptr_to_u64(p) (u64)(unsigned long)(p) +#define _u64_to_ptr(v) (void *)(unsigned long)(v) + +/* global iser scope */ +int isert_global_init(void); +int isert_datamover_cleanup(void); + +void isert_portal_list_add(struct isert_portal *portal); +void isert_portal_list_remove(struct isert_portal *portal); + +void isert_dev_list_add(struct isert_device *isert_dev); +void isert_dev_list_remove(struct isert_device *isert_dev); +struct isert_device *isert_device_find(struct ib_device *ib_dev); + +void isert_conn_queue_work(struct work_struct *w); + +extern struct kmem_cache *isert_cmnd_cache; +extern struct kmem_cache *isert_conn_cache; + +/* iser portal */ +struct isert_portal *isert_portal_create(void); +int isert_portal_listen(struct isert_portal *portal, + struct sockaddr *sa, + size_t addr_len); +void isert_portal_release(struct isert_portal *portal); +void isert_portal_list_release_all(void); +struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len); + +/* iser connection */ +int isert_post_recv(struct isert_connection *isert_conn, + struct isert_wr *first_wr, int num_wr); +int isert_post_send(struct isert_connection *isert_conn, + struct isert_wr *first_wr, int num_wr); + +int isert_alloc_conn_resources(struct isert_connection *isert_conn); +void isert_free_conn_resources(struct isert_connection *isert_conn); +void isert_conn_free(struct isert_connection *isert_conn); +void isert_conn_disconnect(struct isert_connection *isert_conn); + +static inline struct isert_connection *isert_conn_alloc(void) +{ + return kmem_cache_zalloc(isert_conn_cache, GFP_KERNEL); +} + +static inline void isert_conn_kfree(struct isert_connection *isert_conn) +{ + kmem_cache_free(isert_conn_cache, isert_conn); +} + +/* iser buf */ +int isert_buf_alloc_data_buf(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir); +void isert_wr_set_fields(struct isert_wr *wr, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu); +int isert_wr_init(struct isert_wr *wr, + enum isert_wr_op wr_op, + struct isert_buf *isert_buf, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu, + struct ib_sge *sge, + int sg_offset, + int sg_cnt, + int buff_offset); +void isert_wr_release(struct isert_wr *wr); + +void isert_buf_release(struct isert_buf *isert_buf); + +static inline void isert_buf_init_sg(struct isert_buf *isert_buf, + struct scatterlist *sg, + int sg_cnt, size_t size) +{ + isert_buf->sg_cnt = sg_cnt; + isert_buf->sg = sg; + isert_buf->size = size; +} + +/* iser pdu */ +static inline struct isert_cmnd *isert_pdu_alloc(void) +{ + return kmem_cache_zalloc(isert_cmnd_cache, GFP_KERNEL); +} + +static inline void isert_pdu_kfree(struct isert_cmnd *cmnd) +{ + kmem_cache_free(isert_cmnd_cache, cmnd); +} + +struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn, + size_t size); +struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn, + size_t size); +void isert_tx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn); +int isert_pdu_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu); + +int isert_prepare_rdma(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn, + enum isert_wr_op op); +int isert_pdu_post_rdma_write(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + struct isert_cmnd *isert_rsp, + int wr_cnt); +int isert_pdu_post_rdma_read(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + int wr_cnt); + +void isert_pdu_free(struct isert_cmnd *pdu); +int isert_rx_pdu_done(struct isert_cmnd *pdu); + +void isert_tx_pdu_convert_from_iscsi(struct isert_cmnd *isert_cmnd, + struct iscsi_cmnd *iscsi_cmnd); + +void isert_tx_pdu_init_iscsi(struct isert_cmnd *isert_pdu); + +/* global */ +void isert_global_cleanup(void); +int isert_get_addr_size(struct sockaddr *sa, size_t *size); + +#endif diff --git a/iscsi-scst/kernel/isert-scst/iser_buf.c b/iscsi-scst/kernel/isert-scst/iser_buf.c new file mode 100644 index 000000000..8d74f1bff --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_buf.c @@ -0,0 +1,303 @@ +/* +* isert_buf.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" + +static int isert_buf_alloc_pg(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + int i; + struct page *page; + + isert_buf->sg_cnt = DIV_ROUND_UP(size, PAGE_SIZE); + isert_buf->sg = kmalloc(sizeof(*isert_buf->sg) * isert_buf->sg_cnt, + GFP_KERNEL); + if (unlikely(!isert_buf->sg)) { + pr_err("Failed to allocate buffer SG\n"); + res = -ENOMEM; + goto out; + } + + sg_init_table(isert_buf->sg, isert_buf->sg_cnt); + for (i = 0; i < isert_buf->sg_cnt; ++i) { + size_t page_len = min_t(size_t, size, PAGE_SIZE); + + page = alloc_page(GFP_KERNEL); + if (!page) { + pr_err("Failed to allocate page\n"); + res = -ENOMEM; + goto out_map_failed; + } + sg_set_page(&isert_buf->sg[i], page, page_len, 0); + size -= page_len; + } + + res = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, dma_dir); + if (unlikely(!res)) { + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + res = -ENOMEM; + goto out_map_failed; + } + + isert_buf->addr = sg_virt(&isert_buf->sg[0]); + + res = 0; + goto out; + +out_map_failed: + for (; i > 0; --i) + __free_page(sg_page(&isert_buf->sg[i])); + kfree(isert_buf->sg); + isert_buf->sg = NULL; +out: + return res; +} + +static void isert_buf_release_pg(struct isert_buf *isert_buf) +{ + int i; + + for (i = 0; i < isert_buf->sg_cnt; ++i) + __free_page(sg_page(&isert_buf->sg[i])); +} + +static int isert_buf_malloc(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + + isert_buf->sg_cnt = 1; + isert_buf->sg = kmalloc(sizeof(isert_buf->sg[0]), GFP_KERNEL); + if (unlikely(!isert_buf->sg)) { + pr_err("Failed to allocate buffer SG\n"); + res = -ENOMEM; + goto out; + } + + isert_buf->addr = kmalloc(size, GFP_KERNEL); + if (!isert_buf->addr) { + pr_err("Failed to allocate data buffer\n"); + res = -ENOMEM; + goto data_malloc_failed; + } + + sg_init_one(&isert_buf->sg[0], isert_buf->addr, size); + + res = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, dma_dir); + if (unlikely(!res)) { + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + res = -ENOMEM; + goto out_map_failed; + } + + res = 0; + goto out; + +out_map_failed: + kfree(isert_buf->addr); + isert_buf->addr = NULL; +data_malloc_failed: + kfree(isert_buf->addr); + isert_buf->addr = NULL; +out: + return res; +} + +static void isert_buf_release_kmalloc(struct isert_buf *isert_buf) +{ + kfree(isert_buf->addr); + isert_buf->addr = NULL; +} + +int isert_buf_alloc_data_buf(struct ib_device *ib_dev, + struct isert_buf *isert_buf, size_t size, + enum dma_data_direction dma_dir) +{ + int res = 0; + + isert_buf->is_alloced = 0; + if (size >= PAGE_SIZE) { + res = isert_buf_alloc_pg(ib_dev, isert_buf, size, dma_dir); + if (unlikely(res)) + goto out; + isert_buf->is_pgalloced = 1; + isert_buf->is_malloced = 0; + isert_buf->is_alloced = 1; + } else if (size) { + res = isert_buf_malloc(ib_dev, isert_buf, size, dma_dir); + if (unlikely(res)) + goto out; + isert_buf->is_pgalloced = 0; + isert_buf->is_malloced = 1; + isert_buf->is_alloced = 1; + } + + isert_buf->size = size; + isert_buf->dma_dir = dma_dir; +out: + return res; +} + +void isert_buf_release(struct isert_buf *isert_buf) +{ + if (isert_buf->is_alloced) { + if (isert_buf->is_pgalloced) + isert_buf_release_pg(isert_buf); + + if (isert_buf->is_malloced) + isert_buf_release_kmalloc(isert_buf); + + isert_buf->is_alloced = 0; + kfree(isert_buf->sg); + isert_buf->sg = NULL; + } +} + +void isert_wr_set_fields(struct isert_wr *wr, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu) +{ + struct isert_device *isert_dev = isert_conn->isert_dev; + + wr->conn = isert_conn; + wr->pdu = pdu; + wr->isert_dev = isert_dev; +} + +int isert_wr_init(struct isert_wr *wr, + enum isert_wr_op wr_op, + struct isert_buf *isert_buf, + struct isert_connection *isert_conn, + struct isert_cmnd *pdu, + struct ib_sge *sge, + int sg_offset, + int sg_cnt, + int buff_offset) +{ + enum ib_wr_opcode send_wr_op = IB_WR_SEND; + struct scatterlist *sg_tmp; + int i; + + TRACE_ENTRY(); + + switch (wr_op) { + case ISER_WR_RECV: + case ISER_WR_SEND: + break; + case ISER_WR_RDMA_READ: + send_wr_op = IB_WR_RDMA_READ; + if (unlikely(!pdu->is_wstag_valid)) { + pr_err("No write tag/va specified for RDMA op\n"); + isert_buf_release(isert_buf); + buff_offset = -EFAULT; + goto out; + } + wr->send_wr.wr.rdma.remote_addr = pdu->rem_write_va + + buff_offset; + wr->send_wr.wr.rdma.rkey = pdu->rem_write_stag; + break; + case ISER_WR_RDMA_WRITE: + send_wr_op = IB_WR_RDMA_WRITE; + if (unlikely(!pdu->is_rstag_valid)) { + pr_err("No read tag/va specified for RDMA op\n"); + isert_buf_release(isert_buf); + buff_offset = -EFAULT; + goto out; + } + wr->send_wr.wr.rdma.remote_addr = pdu->rem_read_va + + buff_offset; + wr->send_wr.wr.rdma.rkey = pdu->rem_read_stag; + break; + default: + BUG(); + } + + EXTRACHECKS_BUG_ON(isert_buf->sg_cnt == 0); + + wr->wr_op = wr_op; + wr->buf = isert_buf; + + wr->sge_list = sge + sg_offset; + + sg_tmp = &isert_buf->sg[sg_offset]; + for (i = 0; i < sg_cnt; i++, sg_tmp++) { + wr->sge_list[i].addr = sg_dma_address(sg_tmp); + wr->sge_list[i].length = sg_dma_len(sg_tmp); + buff_offset += wr->sge_list[i].length; + } + + if (wr_op == ISER_WR_RECV) { + wr->recv_wr.next = NULL; + wr->recv_wr.wr_id = _ptr_to_u64(wr); + wr->recv_wr.sg_list = wr->sge_list; + wr->recv_wr.num_sge = sg_cnt; + } else { + wr->send_wr.next = NULL; + wr->send_wr.wr_id = _ptr_to_u64(wr); + wr->send_wr.sg_list = wr->sge_list; + wr->send_wr.num_sge = sg_cnt; + wr->send_wr.opcode = send_wr_op; + wr->send_wr.send_flags = IB_SEND_SIGNALED; + } + +out: + TRACE_EXIT_RES(buff_offset); + return buff_offset; +} + +void isert_wr_release(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + if (isert_buf && isert_buf->is_alloced) { + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev; + + ib_dev = isert_dev->ib_dev; + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + isert_buf_release(isert_buf); + } + memset(wr, 0, sizeof(*wr)); +} + diff --git a/iscsi-scst/kernel/isert-scst/iser_datamover.c b/iscsi-scst/kernel/isert-scst/iser_datamover.c new file mode 100644 index 000000000..f46e7285e --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_datamover.c @@ -0,0 +1,286 @@ +/* +* isert_datamover.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" +#include "iser_datamover.h" + +int isert_datamover_init(void) +{ + int err; + + err = isert_global_init(); + if (err) { + pr_err("iser datamover init failed, err:%d\n", err); + return err; + } + return 0; +} + +int isert_datamover_cleanup(void) +{ + isert_global_cleanup(); + return 0; +} + +int isert_get_peer_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len) +{ + int ret; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct sockaddr *peer_sa = (struct sockaddr *)&isert_conn->peer_addr; + + ret = isert_get_addr_size(peer_sa, addr_len); + if (unlikely(ret)) + goto out; + + memcpy(sa, peer_sa, *addr_len); +out: + return ret; +} + +int isert_get_target_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len) +{ + int ret; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct sockaddr *self_sa = (struct sockaddr *)&isert_conn->self_addr; + + ret = isert_get_addr_size(self_sa, addr_len); + if (unlikely(ret)) + goto out; + + memcpy(sa, self_sa, *addr_len); +out: + return ret; +} + +void *isert_portal_add(struct sockaddr *saddr, size_t addr_len) +{ + struct isert_portal *portal = isert_portal_start(saddr, addr_len); + + if (IS_ERR(portal)) + portal = NULL; + + return portal; +} + +int isert_portal_remove(void *portal_h) +{ + struct isert_portal *portal = portal_h; + + isert_portal_release(portal); + return 0; +} + +void isert_free_connection(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + isert_conn_free(isert_conn); +} + +struct iscsi_cmnd *isert_alloc_login_rsp_pdu(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct isert_cmnd *isert_pdu = isert_conn->login_rsp_pdu; + + isert_tx_pdu_init(isert_pdu, isert_conn); + return &isert_pdu->iscsi; +} + +static struct iscsi_cmnd *isert_alloc_scsi_pdu(struct iscsi_conn *iscsi_conn, + int fake) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + struct isert_cmnd *isert_pdu; + + spin_lock(&isert_conn->tx_lock); + isert_pdu = list_first_entry(&isert_conn->tx_free_list, + struct isert_cmnd, pool_node); + list_move(&isert_pdu->pool_node, &isert_conn->tx_busy_list); + spin_unlock(&isert_conn->tx_lock); + + isert_pdu->is_fake_rx = fake; + return &isert_pdu->iscsi; +} + +struct iscsi_cmnd *isert_alloc_scsi_rsp_pdu(struct iscsi_conn *iscsi_conn) +{ + return isert_alloc_scsi_pdu(iscsi_conn, 0); +} + +struct iscsi_cmnd *isert_alloc_scsi_fake_pdu(struct iscsi_conn *iscsi_conn) +{ + return isert_alloc_scsi_pdu(iscsi_conn, 1); +} + +void isert_release_tx_pdu(struct iscsi_cmnd *iscsi_pdu) +{ + struct isert_cmnd *isert_pdu = (struct isert_cmnd *)iscsi_pdu; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_pdu->conn; + + isert_tx_pdu_init_iscsi(isert_pdu); + + spin_lock(&isert_conn->tx_lock); + list_move(&isert_pdu->pool_node, &isert_conn->tx_free_list); + spin_unlock(&isert_conn->tx_lock); +} + +void isert_release_rx_pdu(struct iscsi_cmnd *iscsi_pdu) +{ + struct isert_cmnd *isert_pdu = (struct isert_cmnd *)iscsi_pdu; + + if (likely(!isert_pdu->is_fake_rx)) + isert_rx_pdu_done(isert_pdu); +} + +/* if last transition into FF (Fully Featured) state */ +int isert_login_rsp_tx(struct iscsi_cmnd *login_rsp, int last, int discovery) +{ + struct isert_connection *isert_conn = (struct isert_connection *)login_rsp->conn; + + if (last && !discovery) { + int err = isert_alloc_conn_resources(isert_conn); + if (err) { + pr_err("Failed to init conn resources\n"); + return err; + } + isert_pdu_free(isert_conn->login_req_pdu); + isert_conn->login_req_pdu = NULL; + } else { + int err = isert_post_recv(isert_conn, + &isert_conn->login_req_pdu->wr[0], + 1); + if (unlikely(err)) { + pr_err("Failed to post recv login req rx buf, err:%d\n", err); + return err; + } + } + + return isert_pdu_tx(login_rsp); +} + +int isert_set_session_params(struct iscsi_conn *iscsi_conn, + struct iscsi_sess_params *sess_params, + struct iscsi_tgt_params *tgt_params) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn->queue_depth = tgt_params->queued_cmnds; + + isert_conn->immediate_data = sess_params->immediate_data; + isert_conn->target_recv_data_length = sess_params->target_recv_data_length; + isert_conn->initial_r2t = sess_params->initial_r2t; + isert_conn->first_burst_length = sess_params->first_burst_length; + isert_conn->initiator_recv_data_length = sess_params->initiator_recv_data_length; + + return 0; +} + +int isert_pdu_tx(struct iscsi_cmnd *iscsi_cmnd) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + int err; + + isert_tx_pdu_convert_from_iscsi(isert_cmnd, iscsi_cmnd); + err = isert_pdu_send(isert_conn, isert_cmnd); + + return err; +} + +int isert_request_data_out(struct iscsi_cmnd *iscsi_cmnd) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + int ret; + + ret = isert_prepare_rdma(isert_cmnd, isert_conn, ISER_WR_RDMA_READ); + if (unlikely(ret < 0)) + return ret; + + ret = isert_pdu_post_rdma_read(isert_conn, isert_cmnd, ret); + + return ret; +} + +int isert_send_data_in(struct iscsi_cmnd *iscsi_cmnd, + struct iscsi_cmnd *iscsi_rsp) +{ + struct isert_cmnd *isert_cmnd = (struct isert_cmnd *)iscsi_cmnd; + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_cmnd->conn; + struct isert_cmnd *isert_rsp = (struct isert_cmnd *)iscsi_rsp; + int ret; + + ret = isert_prepare_rdma(isert_cmnd, isert_conn, ISER_WR_RDMA_WRITE); + if (unlikely(ret < 0)) + return ret; + + isert_tx_pdu_convert_from_iscsi(isert_rsp, iscsi_rsp); + ret = isert_pdu_post_rdma_write(isert_conn, isert_cmnd, isert_rsp, ret); + + return ret; +} + +int isert_close_connection(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn_disconnect(isert_conn); + + return 0; +} + +int isert_task_abort(struct iscsi_cmnd *cmnd) +{ + return 0; +} + +void *isert_get_priv(struct iscsi_conn *iscsi_conn) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + return isert_conn->priv_data; +} + +void isert_set_priv(struct iscsi_conn *iscsi_conn, void *priv) +{ + struct isert_connection *isert_conn = (struct isert_connection *)iscsi_conn; + + isert_conn->priv_data = priv; +} diff --git a/iscsi-scst/kernel/isert-scst/iser_datamover.h b/iscsi-scst/kernel/isert-scst/iser_datamover.h new file mode 100644 index 000000000..f403ce509 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_datamover.h @@ -0,0 +1,59 @@ +#ifndef __ISER_DATAMOVER_H__ +#define __ISER_DATAMOVER_H__ + +#include "iscsi.h" + +/* iscsi layer calling iser */ +int isert_datamover_init(void); +int isert_datamover_cleanup(void); + +void *isert_portal_add(struct sockaddr *sa, size_t addr_len); +int isert_portal_remove(void *portal_h); + +struct iscsi_cmnd *isert_alloc_login_rsp_pdu(struct iscsi_conn *iscsi_conn); + +int isert_get_peer_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len); + +int isert_get_target_addr(struct iscsi_conn *iscsi_conn, struct sockaddr *sa, + size_t *addr_len); + + /* last: if last transition into FF (Fully Featured) state */ +int isert_login_rsp_tx(struct iscsi_cmnd *login_rsp, + int last, int discovery); +int isert_set_session_params(struct iscsi_conn *iscsi_conn, + struct iscsi_sess_params *sess_params, + struct iscsi_tgt_params *tgt_params); + +struct iscsi_cmnd *isert_alloc_scsi_rsp_pdu(struct iscsi_conn *iscsi_conn); +struct iscsi_cmnd *isert_alloc_scsi_fake_pdu(struct iscsi_conn *iscsi_conn); + +int isert_pdu_tx(struct iscsi_cmnd *pdu); + +int isert_request_data_out(struct iscsi_cmnd *cmd); +int isert_send_data_in(struct iscsi_cmnd *cmd, struct iscsi_cmnd *rsp); +int isert_send_status(struct iscsi_cmnd *rsp); + +int isert_close_connection(struct iscsi_conn *iscsi_conn); +int isert_task_abort(struct iscsi_cmnd *cmnd); +void isert_free_connection(struct iscsi_conn *iscsi_conn); + +void isert_release_tx_pdu(struct iscsi_cmnd *iscsi_pdu); +void isert_release_rx_pdu(struct iscsi_cmnd *cmnd); + +/* iser calling iscsi layer */ +int isert_conn_established(struct iscsi_conn *iscsi_conn, + struct sockaddr *from_addr, int addr_len); +int isert_login_req_rx(struct iscsi_cmnd *login_req); +int isert_pdu_rx(struct iscsi_cmnd *pdu); +int isert_data_out_ready(struct iscsi_cmnd *cmd); +int isert_data_in_sent(struct iscsi_cmnd *cmd); +int isert_pdu_sent(struct iscsi_cmnd *pdu); +void isert_pdu_err(struct iscsi_cmnd *pdu); + +int isert_connection_closed(struct iscsi_conn *iscsi_conn); + +void *isert_get_priv(struct iscsi_conn *iscsi_conn); +void isert_set_priv(struct iscsi_conn *iscsi_conn, void *priv); + +#endif diff --git a/iscsi-scst/kernel/isert-scst/iser_global.c b/iscsi-scst/kernel/isert-scst/iser_global.c new file mode 100644 index 000000000..71f85cd6e --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_global.c @@ -0,0 +1,161 @@ +/* +* isert_global.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" + +static struct isert_global isert_glob; + +struct kmem_cache *isert_cmnd_cache; +struct kmem_cache *isert_conn_cache; + +void isert_portal_list_add(struct isert_portal *portal) +{ + spin_lock(&isert_glob.portal_lock); + list_add_tail(&portal->list_node, &isert_glob.portal_list); + spin_unlock(&isert_glob.portal_lock); +} + +void isert_portal_list_remove(struct isert_portal *portal) +{ + spin_lock(&isert_glob.portal_lock); + list_del_init(&portal->list_node); + spin_unlock(&isert_glob.portal_lock); +} + +void isert_dev_list_add(struct isert_device *isert_dev) +{ + list_add_tail(&isert_dev->devs_node, &isert_glob.dev_list); +} + +void isert_dev_list_remove(struct isert_device *isert_dev) +{ + list_del_init(&isert_dev->devs_node); +} + +struct isert_device *isert_device_find(struct ib_device *ib_dev) +{ + struct isert_device *isert_dev; + struct isert_device *res = NULL; + + list_for_each_entry(isert_dev, &isert_glob.dev_list, devs_node) { + if (isert_dev->ib_dev == ib_dev) { + res = isert_dev; + break; + } + } + + return res; +} + +void isert_portal_list_release_all(void) +{ + struct isert_portal *portal, *n; + + list_for_each_entry_safe(portal, n, &isert_glob.portal_list, list_node) + isert_portal_release(portal); +} + +void isert_conn_queue_work(struct work_struct *w) +{ + queue_work(isert_glob.conn_wq, w); +} + +int isert_global_init(void) +{ + INIT_LIST_HEAD(&isert_glob.portal_list); + INIT_LIST_HEAD(&isert_glob.dev_list); + + spin_lock_init(&isert_glob.portal_lock); + + isert_glob.conn_wq = create_workqueue("isert_conn_wq"); + if (!isert_glob.conn_wq) { + pr_err("Failed to alloc iser conn work queue\n"); + return -ENOMEM; + } + + isert_cmnd_cache = KMEM_CACHE(isert_cmnd, + SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN); + if (!isert_cmnd_cache) { + destroy_workqueue(isert_glob.conn_wq); + pr_err("Failed to alloc iser command cache\n"); + return -ENOMEM; + } + + isert_conn_cache = KMEM_CACHE(isert_connection, + SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN); + if (!isert_conn_cache) { + destroy_workqueue(isert_glob.conn_wq); + kmem_cache_destroy(isert_cmnd_cache); + pr_err("Failed to alloc iser connection cache\n"); + return -ENOMEM; + } + + return 0; +} + +void isert_global_cleanup(void) +{ + isert_portal_list_release_all(); + if (isert_glob.conn_wq) + destroy_workqueue(isert_glob.conn_wq); + if (isert_cmnd_cache) + kmem_cache_destroy(isert_cmnd_cache); + if (isert_conn_cache) + kmem_cache_destroy(isert_conn_cache); +} + +int isert_get_addr_size(struct sockaddr *sa, size_t *addr_len) +{ + int ret = 0; + + switch (sa->sa_family) { + case AF_INET: + *addr_len = sizeof(struct sockaddr_in); + break; + case AF_INET6: + *addr_len = sizeof(struct sockaddr_in6); + break; + default: + pr_err("Unknown address family\n"); + ret = -EINVAL; + goto out; + } +out: + return ret; +} diff --git a/iscsi-scst/kernel/isert-scst/iser_hdr.h b/iscsi-scst/kernel/isert-scst/iser_hdr.h new file mode 100644 index 000000000..bcaf64905 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_hdr.h @@ -0,0 +1,27 @@ +#ifndef __ISER_HDR_H__ +#define __ISER_HDR_H__ + +#include "iscsi.h" + +#define ISCSI_LOGIN_MAX_RDSL (8 * 1024) + +struct isert_hdr { + u8 flags; + u8 rsvd[3]; + __be32 write_stag; /* write rkey */ + __be64 write_va; + __be32 read_stag; /* read rkey */ + __be64 read_va; +} __packed; + +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +#define ISER_ISCSI_CTRL 0x10 +#define ISER_HELLO 0x20 +#define ISER_HELLORPLY 0x30 + +#define ISER_HDRS_SZ (sizeof(struct isert_hdr) + sizeof(struct iscsi_hdr)) + +#endif + diff --git a/iscsi-scst/kernel/isert-scst/iser_pdu.c b/iscsi-scst/kernel/isert-scst/iser_pdu.c new file mode 100644 index 000000000..eeecc2f2e --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_pdu.c @@ -0,0 +1,575 @@ +/* +* isert_pdu.c +* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include + +#include "iser.h" +#include "iscsi.h" +#include "iser_datamover.h" + +static inline int isert_pdu_rx_buf_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct isert_buf *isert_buf = &isert_pdu->buf; + + return isert_wr_init(&isert_pdu->wr[0], ISER_WR_RECV, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + 0, isert_buf->sg_cnt, 0); +} + +static inline int isert_pdu_tx_buf_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct isert_buf *isert_buf = &isert_pdu->buf; + + return isert_wr_init(&isert_pdu->wr[0], ISER_WR_SEND, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + 0, isert_buf->sg_cnt, 0); +} + +static inline void isert_pdu_set_hdr_plain(struct isert_cmnd *isert_pdu) +{ + struct isert_hdr *isert_hdr = isert_pdu->isert_hdr; + + isert_hdr->flags = ISER_ISCSI_CTRL; + isert_hdr->write_stag = 0; + isert_hdr->write_va = 0; + isert_hdr->read_stag = 0; + isert_hdr->read_va = 0; +} + +/* rx pdu should be initialized to get the posted buffer and + * the associated pointers right; after a pdu is received + * it should be parsed to setup isert_cmnd + iscsi_cmnd in full + */ +static int isert_rx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + int err = isert_pdu_rx_buf_init(isert_pdu, isert_conn); + if (unlikely(err < 0)) + return err; + iscsi_cmnd->conn = &isert_conn->iscsi; + return 0; +} + +void isert_tx_pdu_init_iscsi(struct isert_cmnd *isert_pdu) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + + memset(iscsi_cmnd, 0, sizeof(*iscsi_cmnd)); + + iscsi_cmnd->sg_cnt = isert_buf->sg_cnt; + iscsi_cmnd->sg = isert_buf->sg; + iscsi_cmnd->bufflen = isert_buf->size; +} + +/* tx pdu should set most of the pointers to enable filling out + * of the iscsi pdu struct + */ +void isert_tx_pdu_init(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + void *addr = isert_buf->addr; + struct iscsi_hdr *bhs = (struct iscsi_hdr *)(addr + sizeof(struct isert_hdr)); + + isert_pdu->isert_hdr = (struct isert_hdr *)addr; + isert_pdu->bhs = bhs; + isert_pdu->ahs = NULL; + + isert_tx_pdu_init_iscsi(isert_pdu); + iscsi_cmnd->conn = &isert_conn->iscsi; +} + +void isert_tx_pdu_convert_from_iscsi(struct isert_cmnd *isert_cmnd, + struct iscsi_cmnd *iscsi_cmnd) +{ + struct iscsi_pdu *iscsi_pdu = &iscsi_cmnd->pdu; + + TRACE_ENTRY(); + + memcpy(isert_cmnd->bhs, &iscsi_pdu->bhs, sizeof(*isert_cmnd->bhs)); + if (unlikely(iscsi_pdu->ahssize)) { + isert_cmnd->ahs = isert_cmnd->bhs + 1; + memcpy(isert_cmnd->ahs, iscsi_pdu->ahs, iscsi_pdu->ahssize); + } + +#ifdef CONFIG_SCST_EXTRACHECKS + if (iscsi_cmnd->bufflen) + EXTRACHECKS_BUG_ON(!iscsi_cmnd->sg); +#endif + + TRACE_EXIT(); + return; +} + +static inline int isert_pdu_prepare_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu) +{ + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_sge *sge = tx_pdu->wr[0].sge_list; + size_t to_sync, size; + int sg_cnt = 0; + + size = ISER_HDRS_SZ + tx_pdu->iscsi.pdu.ahssize + + tx_pdu->iscsi.pdu.datasize; + while (size) { + to_sync = size > PAGE_SIZE ? PAGE_SIZE : size; + ib_dma_sync_single_for_device(isert_dev->ib_dev, sge->addr, + to_sync, + DMA_TO_DEVICE); + + sge->length = to_sync; + size -= to_sync; + ++sge; + ++sg_cnt; + } + + return sg_cnt; +} + +static inline void isert_link_send_wrs(struct isert_wr *from_wr, + struct isert_wr *to_wr) +{ + from_wr->send_wr.next = &to_wr->send_wr; + from_wr->send_wr.send_flags = 0; /* not signaled */ + + to_wr->send_wr.next = NULL; + to_wr->send_wr.send_flags = IB_SEND_SIGNALED; +} + +static inline void isert_link_send_pdu_wrs(struct isert_cmnd *from_pdu, + struct isert_cmnd *to_pdu, + int wr_cnt) +{ + isert_link_send_wrs(&from_pdu->wr[wr_cnt - 1], &to_pdu->wr[0]); +} + +int isert_prepare_rdma(struct isert_cmnd *isert_pdu, + struct isert_connection *isert_conn, + enum isert_wr_op op) +{ + struct isert_buf *isert_buf = &isert_pdu->rdma_buf; + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + int err; + int buff_offset; + int sg_offset, sg_cnt; + int wr_cnt, i; + + isert_buf_init_sg(isert_buf, isert_pdu->iscsi.sg, + isert_pdu->iscsi.sg_cnt, + isert_pdu->iscsi.bufflen); + + if (op == ISER_WR_RDMA_WRITE) + isert_buf->dma_dir = DMA_TO_DEVICE; + else + isert_buf->dma_dir = DMA_FROM_DEVICE; + + if (unlikely(isert_buf->sg_cnt > ISER_MAX_SGE)) { + pr_err("Scatterlist too large: %d\n", isert_buf->sg_cnt); + wr_cnt = -EOPNOTSUPP; + goto out; + } + + err = ib_dma_map_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + if (unlikely(!err)) { + pr_err("Failed to DMA map iser sg:%p len:%d\n", + isert_buf->sg, isert_buf->sg_cnt); + wr_cnt = -EFAULT; + goto out; + } + + buff_offset = 0; + sg_cnt = 0; + for (wr_cnt = 0, sg_offset = 0; sg_offset < isert_buf->sg_cnt; ++wr_cnt) { + sg_cnt = min((int)isert_conn->max_sge, + isert_buf->sg_cnt - sg_offset); + err = isert_wr_init(&isert_pdu->wr[wr_cnt], op, isert_buf, + isert_conn, isert_pdu, isert_pdu->sg_pool, + sg_offset, sg_cnt, buff_offset); + if (unlikely(err < 0)) { + wr_cnt = err; + goto out; + } + buff_offset = err; + sg_offset += sg_cnt; + } + + for (i = 1; i < wr_cnt; ++i) + isert_link_send_wrs(&isert_pdu->wr[i - 1], &isert_pdu->wr[i]); + +out: + TRACE_EXIT_RES(wr_cnt); + return wr_cnt; +} + +void isert_pdu_free(struct isert_cmnd *pdu) +{ + unsigned int i; + + list_del(&pdu->pool_node); + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_release(&pdu->wr[i]); + + isert_pdu_kfree(pdu); +} + +struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn, + size_t size) +{ + struct isert_cmnd *pdu = NULL; + int err; + unsigned int i; + + TRACE_ENTRY(); + + pdu = isert_pdu_alloc(); + if (unlikely(!pdu)) { + pr_err("Failed to alloc pdu\n"); + goto out; + } + + err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev, + &pdu->buf, size, DMA_FROM_DEVICE); + if (unlikely(err)) { + pr_err("Failed to alloc rx pdu buf sz:%zd\n", size); + goto buf_alloc_failed; + } + + err = isert_rx_pdu_init(pdu, isert_conn); + if (unlikely(err)) { + pr_err("Failed to init rx pdu wr:%p size:%zd err:%d\n", + &pdu->wr, size, err); + goto pdu_init_failed; + } + + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu); + + for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i) + pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey; + + list_add_tail(&pdu->pool_node, &isert_conn->rx_buf_list); + + goto out; + +pdu_init_failed: + isert_buf_release(&pdu->buf); +buf_alloc_failed: + isert_pdu_kfree(pdu); + pdu = NULL; +out: + TRACE_EXIT(); + return pdu; +} + +struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn, + size_t size) +{ + struct isert_cmnd *pdu = NULL; + int err; + unsigned int i; + + TRACE_ENTRY(); + + pdu = isert_pdu_alloc(); + if (unlikely(!pdu)) { + pr_err("Failed to alloc pdu\n"); + goto out; + } + + err = isert_buf_alloc_data_buf(isert_conn->isert_dev->ib_dev, + &pdu->buf, size, DMA_TO_DEVICE); + if (unlikely(err)) { + pr_err("Failed to alloc tx pdu buf sz:%zd\n", size); + goto buf_alloc_failed; + } + + err = isert_pdu_tx_buf_init(pdu, isert_conn); + if (unlikely(err < 0)) { + pr_err("Failed to init tx pdu wr:%p size:%zd err:%d\n", + &pdu->wr, size, err); + goto buf_init_failed; + } + isert_tx_pdu_init(pdu, isert_conn); + + for (i = 0; i < ARRAY_SIZE(pdu->wr); ++i) + isert_wr_set_fields(&pdu->wr[i], isert_conn, pdu); + + for (i = 0; i < ARRAY_SIZE(pdu->sg_pool); ++i) + pdu->sg_pool[i].lkey = isert_conn->isert_dev->mr->lkey; + + isert_pdu_set_hdr_plain(pdu); + + list_add_tail(&pdu->pool_node, &isert_conn->tx_free_list); + + goto out; + +buf_init_failed: + isert_buf_release(&pdu->buf); +buf_alloc_failed: + isert_pdu_kfree(pdu); + pdu = NULL; +out: + TRACE_EXIT(); + return pdu; +} + +static inline void isert_link_recv_wrs(struct isert_wr *from_wr, + struct isert_wr *to_wr) +{ + from_wr->recv_wr.next = &to_wr->recv_wr; + + to_wr->recv_wr.next = NULL; +} + +static inline void isert_link_recv_pdu_wrs(struct isert_cmnd *from_pdu, + struct isert_cmnd *to_pdu) +{ + isert_link_recv_wrs(&from_pdu->wr[0], &to_pdu->wr[0]); +} + +int isert_alloc_conn_resources(struct isert_connection *isert_conn) +{ + struct isert_cmnd *pdu, *prev_pdu = NULL, *first_pdu = NULL; + int t_datasz = 512; /* RFC states that minimum receive data size is 512 */ + int i_datasz = ISER_HDRS_SZ + SCST_SENSE_BUFFERSIZE; + int i, err = 0; + int to_alloc; + + TRACE_ENTRY(); + + isert_conn->repost_threshold = 32; + to_alloc = isert_conn->queue_depth * 2 + isert_conn->repost_threshold; + + if (unlikely(to_alloc > ISER_MAX_WCE)) { + pr_err("QueuedCommands larger than %d not supported\n", + (ISER_MAX_WCE - isert_conn->repost_threshold) / 2); + err = -EINVAL; + goto out; + } + + for (i = 0; i < to_alloc; i++) { + pdu = isert_rx_pdu_alloc(isert_conn, t_datasz); + if (unlikely(!pdu)) { + err = -ENOMEM; + goto clean_pdus; + } + + if (unlikely(first_pdu == NULL)) + first_pdu = pdu; + else + isert_link_recv_pdu_wrs(prev_pdu, pdu); + + prev_pdu = pdu; + + pdu = isert_tx_pdu_alloc(isert_conn, i_datasz); + if (unlikely(!pdu)) { + err = -ENOMEM; + goto clean_pdus; + } + } + + err = isert_post_recv(isert_conn, &first_pdu->wr[0], to_alloc); + if (unlikely(err)) { + pr_err("Failed to post recv err:%d\n", err); + goto clean_pdus; + } + +out: + TRACE_EXIT_RES(err); + return err; + +clean_pdus: + isert_free_conn_resources(isert_conn); + goto out; +} + +static int isert_reinit_rx_pdu(struct isert_cmnd *pdu) +{ + struct isert_connection *isert_conn = (struct isert_connection *)pdu->iscsi.conn; + + pdu->is_rstag_valid = 0; + pdu->is_wstag_valid = 0; + + memset(&pdu->iscsi, 0, sizeof(pdu->iscsi)); + + return isert_rx_pdu_init(pdu, isert_conn); +} + +int isert_rx_pdu_done(struct isert_cmnd *pdu) +{ + int err; + struct isert_connection *isert_conn = (struct isert_connection *)pdu->iscsi.conn; + + TRACE_ENTRY(); + + err = isert_reinit_rx_pdu(pdu); + if (unlikely(err)) + goto out; + + spin_lock(&isert_conn->post_recv_lock); + if (unlikely(isert_conn->to_post_recv == 0)) + isert_conn->post_recv_first = &pdu->wr[0]; + else + isert_link_recv_wrs(isert_conn->post_recv_curr, &pdu->wr[0]); + + isert_conn->post_recv_curr = &pdu->wr[0]; + + if (++isert_conn->to_post_recv > isert_conn->repost_threshold) { + err = isert_post_recv(isert_conn, isert_conn->post_recv_first, + isert_conn->to_post_recv); + if (unlikely(err)) + pr_err("Failed to post recv err:%d\n", err); + + isert_conn->to_post_recv = 0; + } + spin_unlock(&isert_conn->post_recv_lock); + +out: + TRACE_EXIT_RES(err); + return err; +} + +void isert_free_conn_resources(struct isert_connection *isert_conn) +{ + struct isert_cmnd *pdu; + + TRACE_ENTRY(); + + if (isert_conn->login_rsp_pdu) { + isert_pdu_free(isert_conn->login_rsp_pdu); + isert_conn->login_rsp_pdu = NULL; + } + if (isert_conn->login_req_pdu) { + isert_pdu_free(isert_conn->login_req_pdu); + isert_conn->login_req_pdu = NULL; + } + + while (!list_empty(&isert_conn->rx_buf_list)) { + pdu = list_first_entry(&isert_conn->rx_buf_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + + spin_lock(&isert_conn->tx_lock); + while (!list_empty(&isert_conn->tx_free_list)) { + pdu = list_first_entry(&isert_conn->tx_free_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + + while (!list_empty(&isert_conn->tx_busy_list)) { + pdu = list_first_entry(&isert_conn->tx_busy_list, + struct isert_cmnd, pool_node); + isert_pdu_free(pdu); /* releases buffer as well */ + } + spin_unlock(&isert_conn->tx_lock); + + TRACE_EXIT(); +} + +int isert_pdu_send(struct isert_connection *isert_conn, + struct isert_cmnd *tx_pdu) +{ + int err; + struct isert_wr *wr; + + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + EXTRACHECKS_BUG_ON(!isert_conn); + EXTRACHECKS_BUG_ON(!tx_pdu); +#endif + + wr = &tx_pdu->wr[0]; + wr->send_wr.num_sge = isert_pdu_prepare_send(isert_conn, tx_pdu); + + err = isert_post_send(isert_conn, wr, 1); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, tx_pdu, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +int isert_pdu_post_rdma_write(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, + struct isert_cmnd *isert_rsp, + int wr_cnt) +{ + int err; + + TRACE_ENTRY(); + + isert_rsp->wr[0].send_wr.num_sge = isert_pdu_prepare_send(isert_conn, + isert_rsp); + isert_link_send_pdu_wrs(isert_cmd, isert_rsp, wr_cnt); + err = isert_post_send(isert_conn, &isert_cmd->wr[0], wr_cnt + 1); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, isert_cmd, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +int isert_pdu_post_rdma_read(struct isert_connection *isert_conn, + struct isert_cmnd *isert_cmd, int wr_cnt) +{ + int err; + + TRACE_ENTRY(); + + err = isert_post_send(isert_conn, &isert_cmd->wr[0], wr_cnt); + if (unlikely(err)) { + pr_err("Failed to send pdu conn:%p pdu:%p err:%d\n", + isert_conn, isert_cmd, err); + } + + TRACE_EXIT_RES(err); + return err; +} + diff --git a/iscsi-scst/kernel/isert-scst/iser_rdma.c b/iscsi-scst/kernel/isert-scst/iser_rdma.c new file mode 100644 index 000000000..8140bf6a9 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/iser_rdma.c @@ -0,0 +1,1569 @@ +/* +* isert_rdma.c +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include + +#include "iser.h" +#include "iser_datamover.h" + +#define ISER_CQ_ENTRIES (128 * 1024) +#define ISER_LISTEN_BACKLOG 8 + +static DEFINE_MUTEX(dev_list_mutex); + +static int isert_num_recv_posted_on_err(struct ib_recv_wr *first_ib_wr, + struct ib_recv_wr *bad_wr) +{ + struct ib_recv_wr *wr; + int num_posted = 0; + + for (wr = first_ib_wr; wr != NULL && wr != bad_wr; wr = wr->next) + num_posted++; + + return num_posted; +} + +int isert_post_recv(struct isert_connection *isert_conn, + struct isert_wr *first_wr, + int num_wr) +{ + struct ib_recv_wr *first_ib_wr = &first_wr->recv_wr; + struct ib_recv_wr *bad_wr; + int num_posted; + int err; + + TRACE_ENTRY(); + + err = ib_post_recv(isert_conn->qp, first_ib_wr, &bad_wr); + if (unlikely(err)) { + num_posted = isert_num_recv_posted_on_err(first_ib_wr, bad_wr); + + pr_err("conn:%p recv posted:%d/%d 1st wr_id:0x%llx sz:%d err:%d\n", + isert_conn, num_posted, num_wr, first_ib_wr->wr_id, + first_ib_wr->sg_list->length, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +static int isert_num_send_posted_on_err(struct ib_send_wr *first_ib_wr, + struct ib_send_wr *bad_wr) +{ + struct ib_send_wr *wr; + int num_posted = 0; + + for (wr = first_ib_wr; wr != NULL && wr != bad_wr; wr = wr->next) + num_posted++; + + return num_posted; +} + +int isert_post_send(struct isert_connection *isert_conn, + struct isert_wr *first_wr, + int num_wr) +{ + struct ib_send_wr *first_ib_wr = &first_wr->send_wr; + struct ib_send_wr *bad_wr; + int num_posted; + int err; + + TRACE_ENTRY(); + + err = ib_post_send(isert_conn->qp, first_ib_wr, &bad_wr); + if (unlikely(err)) { + num_posted = isert_num_send_posted_on_err(first_ib_wr, bad_wr); + + pr_err("conn:%p send posted:%d/%d bad wr_id:0x%llx sz:%d num_sge: %d err:%d\n", + isert_conn, num_posted, num_wr, bad_wr->wr_id, + bad_wr->sg_list->length, bad_wr->num_sge, err); + } + + TRACE_EXIT_RES(err); + return err; +} + +void isert_conn_disconnect(struct isert_connection *isert_conn) +{ + int err = rdma_disconnect(isert_conn->cm_id); + if (unlikely(err)) + pr_err("Failed to rdma disconnect, err:%d\n", err); +} + +static int isert_pdu_handle_hello_req(struct isert_cmnd *pdu) +{ + pr_info("iSER Hello not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static int isert_pdu_handle_login_req(struct isert_cmnd *isert_pdu) +{ + return isert_login_req_rx(&isert_pdu->iscsi); +} + +static int isert_pdu_handle_text(struct isert_cmnd *pdu) +{ + return isert_login_req_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_nop_out(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_scsi_cmd(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_tm_func(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_data_out(struct isert_cmnd *pdu) +{ + pr_info("iser iscsi data out not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static int isert_pdu_handle_logout(struct isert_cmnd *pdu) +{ + return isert_pdu_rx(&pdu->iscsi); +} + +static int isert_pdu_handle_snack(struct isert_cmnd *pdu) +{ + pr_info("iser iscsi SNACK not supported\n"); + return -EINVAL; /* meanwhile disconnect immediately */ +} + +static void isert_rx_pdu_parse_headers(struct isert_cmnd *isert_pdu) +{ + struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi; + struct isert_buf *isert_buf = &isert_pdu->buf; + u8 *addr = isert_buf->addr; + struct isert_hdr *isert_hdr = (struct isert_hdr *)addr; + struct iscsi_hdr *bhs = (struct iscsi_hdr *)(addr + sizeof(*isert_hdr)); + unsigned int data_offset = ISER_HDRS_SZ; + unsigned int ahssize; + + TRACE_ENTRY(); + + isert_pdu->isert_hdr = isert_hdr; + isert_pdu->isert_opcode = isert_hdr->flags & 0xf0; + isert_pdu->is_rstag_valid = isert_hdr->flags & ISER_RSV ? 1 : 0; + isert_pdu->is_wstag_valid = isert_hdr->flags & ISER_WSV ? 1 : 0; + + if (isert_pdu->is_rstag_valid) { + isert_pdu->rem_read_stag = be32_to_cpu(isert_hdr->read_stag); + isert_pdu->rem_read_va = be64_to_cpu(isert_hdr->read_va); + } + + if (isert_pdu->is_wstag_valid) { + isert_pdu->rem_write_stag = be32_to_cpu(isert_hdr->write_stag); + isert_pdu->rem_write_va = be64_to_cpu(isert_hdr->write_va); + } + + isert_pdu->bhs = bhs; + isert_pdu->iscsi_opcode = bhs->opcode & ISCSI_OPCODE_MASK; + + memcpy(&iscsi_cmnd->pdu.bhs, bhs, sizeof(iscsi_cmnd->pdu.bhs)); + iscsi_cmnd_get_length(&iscsi_cmnd->pdu); /* get ahssize and datasize */ + + ahssize = isert_pdu->iscsi.pdu.ahssize; + if (likely(!ahssize)) { + isert_pdu->ahs = NULL; + } else { + isert_pdu->ahs = addr + ISER_HDRS_SZ; + data_offset += ahssize; + } + iscsi_cmnd->pdu.ahs = isert_pdu->ahs; + + iscsi_cmnd->bufflen = iscsi_cmnd->pdu.datasize; + iscsi_cmnd->bufflen = (iscsi_cmnd->bufflen + 3) & ~3; + if (iscsi_cmnd->bufflen) { + iscsi_cmnd->sg_cnt = isert_pdu->buf.sg_cnt; + iscsi_cmnd->sg = isert_pdu->buf.sg; + } else { + iscsi_cmnd->sg = NULL; + } + + TRACE_EXIT(); +} + +static void isert_dma_sync_data_for_cpu(struct ib_device *ib_dev, + struct ib_sge *sge, size_t size) +{ + size_t to_sync = size > (PAGE_SIZE - ISER_HDRS_SZ) ? + (PAGE_SIZE - ISER_HDRS_SZ) : size; + ib_dma_sync_single_for_cpu(ib_dev, sge->addr + ISER_HDRS_SZ, + to_sync, + DMA_FROM_DEVICE); + + size -= to_sync; + while (size) { + ++sge; + to_sync = size > PAGE_SIZE ? PAGE_SIZE : size; + ib_dma_sync_single_for_cpu(ib_dev, sge->addr, + to_sync, + DMA_FROM_DEVICE); + + size -= to_sync; + } +} + +static void isert_recv_completion_handler(struct isert_wr *wr) +{ + struct isert_cmnd *pdu = wr->pdu; + struct ib_sge *sge = wr->sge_list; + struct ib_device *ib_dev = wr->isert_dev->ib_dev; + int err; + + TRACE_ENTRY(); + + ib_dma_sync_single_for_cpu(ib_dev, sge->addr, + ISER_HDRS_SZ, + DMA_FROM_DEVICE); + isert_rx_pdu_parse_headers(pdu); + isert_dma_sync_data_for_cpu(ib_dev, sge, + pdu->iscsi.pdu.datasize + pdu->iscsi.pdu.ahssize); + + switch (pdu->isert_opcode) { + case ISER_ISCSI_CTRL: + switch (pdu->iscsi_opcode) { + case ISCSI_OP_NOP_OUT: + err = isert_pdu_handle_nop_out(pdu); + break; + case ISCSI_OP_SCSI_CMD: + err = isert_pdu_handle_scsi_cmd(pdu); + break; + case ISCSI_OP_SCSI_TASK_MGT_MSG: + err = isert_pdu_handle_tm_func(pdu); + break; + case ISCSI_OP_LOGIN_CMD: + err = isert_pdu_handle_login_req(pdu); + break; + case ISCSI_OP_TEXT_CMD: + err = isert_pdu_handle_text(pdu); + break; + case ISCSI_OP_SCSI_DATA_OUT: + err = isert_pdu_handle_data_out(pdu); + break; + case ISCSI_OP_LOGOUT_CMD: + err = isert_pdu_handle_logout(pdu); + break; + case ISCSI_OP_SNACK_CMD: + err = isert_pdu_handle_snack(pdu); + break; + default: + pr_err("Unexpected iscsi opcode:0x%x\n", + pdu->iscsi_opcode); + err = -EINVAL; + break; + } + break; + case ISER_HELLO: + err = isert_pdu_handle_hello_req(pdu); + break; + default: + pr_err("malformed isert_hdr, iser op:%x flags 0x%02x\n", + pdu->isert_opcode, pdu->isert_hdr->flags); + err = -EINVAL; + break; + } + + if (unlikely(err)) { + pr_err("err:%d while handling iser pdu\n", err); + isert_conn_disconnect(wr->conn); + } + + TRACE_EXIT(); +} + +static void isert_send_completion_handler(struct isert_wr *wr) +{ + struct isert_cmnd *isert_pdu = wr->pdu; + struct iscsi_cmnd *iscsi_pdu = &isert_pdu->iscsi; + struct iscsi_cmnd *iscsi_req_pdu = iscsi_pdu->parent_req; + struct isert_cmnd *isert_req_pdu = (struct isert_cmnd *)iscsi_req_pdu; + + TRACE_ENTRY(); + + if (iscsi_req_pdu && iscsi_req_pdu->bufflen && + isert_req_pdu->is_rstag_valid) + isert_data_in_sent(iscsi_req_pdu); + + isert_pdu_sent(iscsi_pdu); + + TRACE_EXIT(); +} + +static void isert_rdma_rd_completion_handler(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + + isert_data_out_ready(&wr->pdu->iscsi); +} + +static void isert_rdma_wr_completion_handler(struct isert_wr *wr) +{ + struct isert_buf *isert_buf = wr->buf; + struct isert_device *isert_dev = wr->isert_dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + + ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, + isert_buf->dma_dir); + + isert_data_in_sent(&wr->pdu->iscsi); +} + +static void isert_handle_wc(struct ib_wc *wc) +{ + struct isert_wr *wr = _u64_to_ptr(wc->wr_id); + struct isert_connection *isert_conn; + + TRACE_ENTRY(); + + switch (wr->wr_op) { + case ISER_WR_RECV: + isert_conn = wr->conn; + if (unlikely(isert_conn->state == ISER_CONN_HANDSHAKE)) { + isert_conn->state = ISER_CONN_ACTIVE; + isert_conn->saved_wr = wr; + pr_info("iser rx pdu before conn established, pdu saved\n"); + break; + } + isert_recv_completion_handler(wr); + break; + case ISER_WR_SEND: + isert_send_completion_handler(wr); + break; + case ISER_WR_RDMA_WRITE: + isert_rdma_wr_completion_handler(wr); + break; + case ISER_WR_RDMA_READ: + isert_rdma_rd_completion_handler(wr); + break; + default: + isert_conn = wr->conn; + pr_err("unexpected work req op:%d, wc op:%d, wc:%p wr_id:%p conn:%p\n", + wr->wr_op, wc->opcode, wc, wr, isert_conn); + if (isert_conn) + isert_conn_disconnect(isert_conn); + break; + } + + TRACE_EXIT(); +} + +static const char *wr_status_str(enum ib_wc_status status) +{ + switch (status) { + case IB_WC_SUCCESS: + return "WC_SUCCESS"; + + case IB_WC_LOC_LEN_ERR: + return "WC_LOC_LEN_ERR"; + + case IB_WC_LOC_QP_OP_ERR: + return "WC_LOC_QP_OP_ERR"; + + case IB_WC_LOC_EEC_OP_ERR: + return "WC_LOC_EEC_OP_ERR"; + + case IB_WC_LOC_PROT_ERR: + return "WC_LOC_PROT_ERR"; + + case IB_WC_WR_FLUSH_ERR: + return "WC_WR_FLUSH_ERR"; + + case IB_WC_MW_BIND_ERR: + return "WC_MW_BIND_ERR"; + + case IB_WC_BAD_RESP_ERR: + return "WC_BAD_RESP_ERR"; + + case IB_WC_LOC_ACCESS_ERR: + return "WC_LOC_ACCESS_ERR"; + + case IB_WC_REM_INV_REQ_ERR: + return "WC_REM_INV_REQ_ERR"; + + case IB_WC_REM_ACCESS_ERR: + return "WC_REM_ACCESS_ERR"; + + case IB_WC_REM_OP_ERR: + return "WC_REM_OP_ERR"; + + case IB_WC_RETRY_EXC_ERR: + return "WC_RETRY_EXC_ERR"; + + case IB_WC_RNR_RETRY_EXC_ERR: + return "WC_RNR_RETRY_EXC_ERR"; + + case IB_WC_LOC_RDD_VIOL_ERR: + return "WC_LOC_RDD_VIOL_ERR"; + + case IB_WC_REM_INV_RD_REQ_ERR: + return "WC_REM_INV_RD_REQ_ERR"; + + case IB_WC_REM_ABORT_ERR: + return "WC_REM_ABORT_ERR"; + + case IB_WC_INV_EECN_ERR: + return "WC_INV_EECN_ERR"; + + case IB_WC_INV_EEC_STATE_ERR: + return "WC_INV_EEC_STATE_ERR"; + + case IB_WC_FATAL_ERR: + return "WC_FATAL_ERR"; + + case IB_WC_RESP_TIMEOUT_ERR: + return "WC_RESP_TIMEOUT_ERR"; + + case IB_WC_GENERAL_ERR: + return "WC_GENERAL_ERR"; + + default: + return "UNKNOWN"; + } +} + +static void isert_handle_wc_error(struct ib_wc *wc) +{ + struct isert_wr *wr = _u64_to_ptr(wc->wr_id); + struct isert_cmnd *isert_pdu = wr->pdu; + struct isert_connection *isert_conn = wr->conn; + + TRACE_ENTRY(); + + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("conn:%p wr_id:0x%p status:%s vendor_err:0x%0x\n", + isert_conn, wr, wr_status_str(wc->status), + wc->vendor_err); + + switch (wr->wr_op) { + case ISER_WR_SEND: + isert_pdu_err(&isert_pdu->iscsi); + break; + case ISER_WR_RDMA_READ: + isert_pdu_err(&isert_pdu->iscsi); + break; + case ISER_WR_RECV: + /* this should be the Flush, no task has been created yet */ + case ISER_WR_RDMA_WRITE: + /* RDMA-WR and SEND response of a READ task + are sent together, so when receiving RDMA-WR error, + wait until SEND error arrives to complete the task */ + break; + default: + pr_err("unexpected opcode %d, wc:%p wr_id:%p conn:%p\n", + wr->wr_op, wc, wr, isert_conn); + break; + } + + TRACE_EXIT(); +} + +static int isert_poll_cq(struct isert_cq *cq) +{ + int err, i; + + TRACE_ENTRY(); + + do { + err = ib_poll_cq(cq->cq, ARRAY_SIZE(cq->wc), cq->wc); + + for (i = 0; i < err; ++i) { + if (likely(cq->wc[i].status == IB_WC_SUCCESS)) + isert_handle_wc(&cq->wc[i]); + else + isert_handle_wc_error(&cq->wc[i]); + } + + } while (err > 0); + + TRACE_EXIT_RES(err); + return err; +} + +/* callback function for isert_dev->[cq]->cq_comp_work */ +static void isert_cq_comp_work_cb(struct work_struct *work) +{ + struct isert_cq *cq_desc; + struct isert_device *isert_dev; + int ret; + + TRACE_ENTRY(); + + cq_desc = container_of(work, struct isert_cq, cq_comp_work); + isert_dev = cq_desc->dev; + ret = isert_poll_cq(cq_desc); + if (unlikely(ret < 0)) { /* poll error */ + pr_err("ib_poll_cq failed\n"); + goto out; + } + + ib_req_notify_cq(cq_desc->cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + /* + * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS, + * so we need to make sure we don't miss any events between + * last call to ib_poll_cq() and ib_req_notify_cq() + */ + isert_poll_cq(cq_desc); + +out: + TRACE_EXIT(); + return; +} + +static void isert_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct isert_cq *cq_desc = context; + + queue_work_on(smp_processor_id(), cq_desc->cq_workqueue, + &cq_desc->cq_comp_work); +} + +static const char *ib_event_type_str(enum ib_event_type ev_type) +{ + switch (ev_type) { + case IB_EVENT_COMM_EST: + return "COMM_EST"; + case IB_EVENT_QP_FATAL: + return "QP_FATAL"; + case IB_EVENT_QP_REQ_ERR: + return "QP_REQ_ERR"; + case IB_EVENT_QP_ACCESS_ERR: + return "QP_ACCESS_ERR"; + case IB_EVENT_SQ_DRAINED: + return "SQ_DRAINED"; + case IB_EVENT_PATH_MIG: + return "PATH_MIG"; + case IB_EVENT_PATH_MIG_ERR: + return "PATH_MIG_ERR"; + case IB_EVENT_QP_LAST_WQE_REACHED: + return "QP_LAST_WQE_REACHED"; + case IB_EVENT_CQ_ERR: + return "CQ_ERR"; + case IB_EVENT_SRQ_ERR: + return "SRQ_ERR"; + case IB_EVENT_SRQ_LIMIT_REACHED: + return "SRQ_LIMIT_REACHED"; + case IB_EVENT_PORT_ACTIVE: + return "PORT_ACTIVE"; + case IB_EVENT_PORT_ERR: + return "PORT_ERR"; + case IB_EVENT_LID_CHANGE: + return "LID_CHANGE"; + case IB_EVENT_PKEY_CHANGE: + return "PKEY_CHANGE"; + case IB_EVENT_SM_CHANGE: + return "SM_CHANGE"; + case IB_EVENT_CLIENT_REREGISTER: + return "CLIENT_REREGISTER"; + case IB_EVENT_DEVICE_FATAL: + return "DEVICE_FATAL"; + default: + return "UNKNOWN"; + } +} + +static void isert_async_evt_handler(struct ib_event *async_ev, void *context) +{ + struct isert_cq *cq = context; + struct isert_device *isert_dev = cq->dev; + struct ib_device *ib_dev = isert_dev->ib_dev; + char *dev_name = ib_dev->name; + enum ib_event_type ev_type = async_ev->event; + struct isert_connection *isert_conn; + + TRACE_ENTRY(); + + switch (ev_type) { + case IB_EVENT_COMM_EST: + isert_conn = async_ev->element.qp->qp_context; + pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", + isert_conn, isert_conn->cm_id, dev_name, + ib_event_type_str(IB_EVENT_COMM_EST)); + /* force "connection established" event */ + rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); + break; + + /* rest of QP-related events */ + case IB_EVENT_QP_FATAL: + case IB_EVENT_QP_REQ_ERR: + case IB_EVENT_QP_ACCESS_ERR: + case IB_EVENT_SQ_DRAINED: + case IB_EVENT_PATH_MIG: + case IB_EVENT_PATH_MIG_ERR: + case IB_EVENT_QP_LAST_WQE_REACHED: + isert_conn = async_ev->element.qp->qp_context; + pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n", + isert_conn, isert_conn->cm_id, dev_name, + ib_event_type_str(ev_type)); + break; + + /* CQ-related events */ + case IB_EVENT_CQ_ERR: + pr_err("dev:%s CQ evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + /* SRQ events */ + case IB_EVENT_SRQ_ERR: + case IB_EVENT_SRQ_LIMIT_REACHED: + pr_err("dev:%s SRQ evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + /* Port events */ + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_PORT_ERR: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_PKEY_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + pr_err("dev:%s port:%d evt: %s\n", + dev_name, async_ev->element.port_num, + ib_event_type_str(ev_type)); + break; + + /* HCA events */ + case IB_EVENT_DEVICE_FATAL: + pr_err("dev:%s HCA evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + + default: + pr_err("dev:%s evt: %s\n", dev_name, + ib_event_type_str(ev_type)); + break; + } + + TRACE_EXIT(); +} + +static struct isert_device *isert_device_create(struct ib_device *ib_dev) +{ + struct isert_device *isert_dev; + struct ib_device_attr *dev_attr; + int cqe_num, err; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_cq *cq; + char wq_name[64]; + int i, j; + + TRACE_ENTRY(); + + isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL); + if (isert_dev == NULL) { + pr_err("Failed to allocate iser dev\n"); + err = -ENOMEM; + goto out; + } + + dev_attr = &isert_dev->device_attr; + err = ib_query_device(ib_dev, dev_attr); + if (err) { + pr_err("Failed to query device, err: %d\n", err); + goto fail_query; + } + + isert_dev->num_cqs = min_t(int, num_online_cpus(), + ib_dev->num_comp_vectors); + + isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs, + GFP_KERNEL); + if (isert_dev->cq_qps == NULL) { + pr_err("Failed to allocate iser cq_qps\n"); + err = -ENOMEM; + goto fail_cq_qps; + } + + isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); + if (isert_dev->cq_desc == NULL) { + pr_err("Failed to allocate %ld bytes for iser cq_desc\n", + sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs); + err = -ENOMEM; + goto fail_alloc_cq_desc; + } + + pd = ib_alloc_pd(ib_dev); + if (IS_ERR(pd)) { + err = PTR_ERR(pd); + pr_err("Failed to alloc iser dev pd, err:%d\n", err); + goto fail_pd; + } + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + pr_err("Failed to get dma mr, err: %d\n", err); + goto fail_mr; + } + + cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES); + cqe_num = cqe_num / isert_dev->num_cqs; + + for (i = 0; i < isert_dev->num_cqs; ++i) { + struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; + + cq_desc->dev = isert_dev; + cq_desc->idx = i; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL); +#else + INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb); +#endif + + snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name); +#else +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36) + cq_desc->cq_workqueue = alloc_workqueue(wq_name, + WQ_CPU_INTENSIVE| + WQ_RESCUER, 1); +#else + cq_desc->cq_workqueue = alloc_workqueue(wq_name, + WQ_CPU_INTENSIVE| + WQ_MEM_RECLAIM, 1); +#endif +#endif + if (!cq_desc->cq_workqueue) { + pr_err("Failed to alloc iser cq work queue for dev:%s\n", + ib_dev->name); + err = -ENOMEM; + goto fail_cq; + } + + cq = ib_create_cq(ib_dev, + isert_cq_comp_handler, + isert_async_evt_handler, + cq_desc, /* context */ + cqe_num, + i); /* completion vector */ + if (IS_ERR(cq)) { + err = PTR_ERR(cq); + pr_err("Failed to create iser dev cq, err:%d\n", err); + goto fail_cq; + } + + cq_desc->cq = cq; + err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (err) { + pr_err("Failed to request notify cq, err: %d\n", err); + goto fail_cq; + } + } + + isert_dev->ib_dev = ib_dev; + isert_dev->pd = pd; + isert_dev->mr = mr; + + INIT_LIST_HEAD(&isert_dev->conn_list); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) + lockdep_assert_held(&dev_list_mutex); +#endif + isert_dev_list_add(isert_dev); + + pr_info("iser created device:%p\n", isert_dev); + return isert_dev; + +fail_cq: + for (j = 0; j < i; ++j) { + if (isert_dev->cq_desc[j].cq) + ib_destroy_cq(isert_dev->cq_desc[j].cq); + if (isert_dev->cq_desc[j].cq_workqueue) + destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue); + } + ib_dereg_mr(mr); +fail_mr: + ib_dealloc_pd(pd); +fail_pd: + vfree(isert_dev->cq_desc); +fail_alloc_cq_desc: + kfree(isert_dev->cq_qps); +fail_cq_qps: +fail_query: + kfree(isert_dev); +out: + TRACE_EXIT_RES(err); + return ERR_PTR(err); +} + +static void isert_device_release(struct isert_device *isert_dev) +{ + int err, i; + + TRACE_ENTRY(); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) + lockdep_assert_held(&dev_list_mutex); +#endif + isert_dev_list_remove(isert_dev); /* remove from global list */ + + for (i = 0; i < isert_dev->num_cqs; ++i) { + struct isert_cq *cq_desc = &isert_dev->cq_desc[i]; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) + /* + * cancel_work_sync() was introduced in 2.6.22. We can + * only wait until all scheduled work is done. + */ + flush_workqueue(cq_desc->cq_workqueue); +#else + cancel_work_sync(&cq_desc->cq_comp_work); +#endif + + err = ib_destroy_cq(cq_desc->cq); + if (err) + pr_err("Failed to destroy cq, err:%d\n", err); + + destroy_workqueue(cq_desc->cq_workqueue); + } + + err = ib_dereg_mr(isert_dev->mr); + if (err) + pr_err("Failed to destroy mr, err:%d\n", err); + err = ib_dealloc_pd(isert_dev->pd); + if (err) + pr_err("Failed to destroy pd, err:%d\n", err); + + vfree(isert_dev->cq_desc); + isert_dev->cq_desc = NULL; + + kfree(isert_dev->cq_qps); + isert_dev->cq_qps = NULL; + + kfree(isert_dev); + + TRACE_EXIT(); +} + +static int isert_get_cq_idx(struct isert_device *isert_dev) +{ + int i, min_idx; + + min_idx = 0; + mutex_lock(&dev_list_mutex); + for (i = 0; i < isert_dev->num_cqs; ++i) + if (isert_dev->cq_qps[i] < isert_dev->cq_qps[min_idx]) + min_idx = i; + isert_dev->cq_qps[min_idx]++; + mutex_unlock(&dev_list_mutex); + + return min_idx; +} + +static int isert_conn_qp_create(struct isert_connection *isert_conn) +{ + struct rdma_cm_id *cm_id = isert_conn->cm_id; + struct isert_device *isert_dev = isert_conn->isert_dev; + struct ib_qp_init_attr qp_attr; + int err; + int cq_idx; + + TRACE_ENTRY(); + + cq_idx = isert_get_cq_idx(isert_dev); + + memset(&qp_attr, 0, sizeof(qp_attr)); + + qp_attr.event_handler = isert_async_evt_handler; + qp_attr.qp_context = isert_conn; + qp_attr.send_cq = isert_dev->cq_desc[cq_idx].cq; + qp_attr.recv_cq = isert_dev->cq_desc[cq_idx].cq; + qp_attr.cap.max_send_wr = ISER_MAX_WCE; + qp_attr.cap.max_recv_wr = ISER_MAX_WCE; + + isert_conn->cq_desc = &isert_dev->cq_desc[cq_idx]; + + /* + * A quote from the OFED 1.5.3.1 release notes + * (docs/release_notes/mthca_release_notes.txt), section "Known Issues": + * In mem-free devices, RC QPs can be created with a maximum of + * (max_sge - 1) entries only; UD QPs can be created with a maximum of + * (max_sge - 3) entries. + * A quote from the OFED 1.2.5 release notes + * (docs/mthca_release_notes.txt), section "Known Issues": + * In mem-free devices, RC QPs can be created with a maximum of + * (max_sge - 3) entries only. + */ + isert_conn->max_sge = isert_dev->device_attr.max_sge - 3; + + WARN_ON(isert_conn->max_sge < 1); + + qp_attr.cap.max_send_sge = isert_conn->max_sge; + qp_attr.cap.max_recv_sge = 2; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + + err = rdma_create_qp(cm_id, isert_dev->pd, &qp_attr); + if (unlikely(err)) { + pr_err("Failed to create qp, err:%d\n", err); + goto out; + } + isert_conn->qp = cm_id->qp; + + pr_info("iser created cm_id:%p qp:0x%X\n", cm_id, cm_id->qp->qp_num); + +out: + TRACE_EXIT_RES(err); + return err; +} + +static void isert_conn_qp_destroy(struct isert_connection *isert_conn) +{ + rdma_destroy_qp(isert_conn->cm_id); + isert_conn->qp = NULL; +} + +static struct isert_connection *isert_conn_create(struct rdma_cm_id *cm_id, + struct isert_device *isert_dev) +{ + struct isert_connection *isert_conn; + int err; + + TRACE_ENTRY(); + + if (!try_module_get(THIS_MODULE)) { + err = -EINVAL; + goto fail_get; + } + + isert_conn = isert_conn_alloc(); + if (unlikely(!isert_conn)) { + pr_err("Unable to allocate iser conn, cm_id:%p\n", cm_id); + err = -ENOMEM; + goto fail_alloc; + } + isert_conn->state = ISER_CONN_INIT; + isert_conn->cm_id = cm_id; + isert_conn->isert_dev = isert_dev; + + INIT_LIST_HEAD(&isert_conn->rx_buf_list); + INIT_LIST_HEAD(&isert_conn->tx_free_list); + INIT_LIST_HEAD(&isert_conn->tx_busy_list); + spin_lock_init(&isert_conn->tx_lock); + spin_lock_init(&isert_conn->post_recv_lock); + + isert_conn->login_req_pdu = isert_rx_pdu_alloc(isert_conn, + ISCSI_LOGIN_MAX_RDSL); + if (unlikely(!isert_conn->login_req_pdu)) { + pr_err("Failed to init login req rx pdu\n"); + err = -ENOMEM; + goto fail_login_req_pdu; + } + + isert_conn->login_rsp_pdu = isert_tx_pdu_alloc(isert_conn, + ISCSI_LOGIN_MAX_RDSL); + if (unlikely(!isert_conn->login_rsp_pdu)) { + pr_err("Failed to init login rsp tx pdu\n"); + err = -ENOMEM; + goto fail_login_rsp_pdu; + } + + err = isert_conn_qp_create(isert_conn); + if (unlikely(err)) + goto fail_qp; + + err = isert_post_recv(isert_conn, &isert_conn->login_req_pdu->wr[0], 1); + if (unlikely(err)) { + pr_err("Failed to post recv login req rx buf, err:%d\n", err); + goto fail_post_recv; + } + + kref_init(&isert_conn->kref); + + TRACE_EXIT(); + return isert_conn; + +fail_post_recv: + isert_conn_qp_destroy(isert_conn); +fail_qp: + isert_pdu_free(isert_conn->login_rsp_pdu); +fail_login_rsp_pdu: + isert_pdu_free(isert_conn->login_req_pdu); +fail_login_req_pdu: + isert_conn_kfree(isert_conn); +fail_alloc: + module_put(THIS_MODULE); +fail_get: + TRACE_EXIT_RES(err); + return ERR_PTR(err); +} + +/* start closing process; + * only when all buffers released, can free */ +static void isert_kref_free(struct kref *kref) +{ + struct isert_connection *isert_conn = container_of(kref, + struct isert_connection, + kref); + struct isert_device *isert_dev = isert_conn->isert_dev; + struct isert_cq *cq = isert_conn->qp->recv_cq->cq_context; + + TRACE_ENTRY(); + + pr_info("isert_conn_free conn:%p\n", isert_conn); + + flush_workqueue(isert_conn->cq_desc->cq_workqueue); + + isert_free_conn_resources(isert_conn); + + isert_conn_qp_destroy(isert_conn); + + mutex_lock(&dev_list_mutex); + isert_dev->cq_qps[cq->idx]--; + list_del(&isert_conn->portal_node); + list_del(&isert_conn->dev_node); + isert_dev->refcnt--; + if (isert_dev->refcnt == 0) + isert_device_release(isert_dev); + mutex_unlock(&dev_list_mutex); + + rdma_destroy_id(isert_conn->cm_id); + + isert_conn_kfree(isert_conn); + + module_put(THIS_MODULE); + + TRACE_EXIT(); +} + +void isert_conn_free(struct isert_connection *isert_conn) +{ + kref_put(&isert_conn->kref, isert_kref_free); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +static void isert_conn_closed_do_work(void *ctx) +#else +static void isert_conn_closed_do_work(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + struct isert_connection *isert_conn = ctx; +#else + struct isert_connection *isert_conn = + container_of(work, struct isert_connection, close_work); +#endif + + /* notify upper layer */ + if (!test_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags)) + isert_connection_closed(&isert_conn->iscsi); + + isert_conn_free(isert_conn); +} + +static void isert_sched_conn_closed(struct isert_connection *isert_conn) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work, isert_conn); +#else + INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work); +#endif + isert_conn_queue_work(&isert_conn->close_work); +} + +static int isert_cm_timewait_exit_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + + isert_sched_conn_closed(isert_conn); + return 0; +} + +static int isert_cm_conn_req_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + /* passed in rdma_create_id */ + struct isert_portal *portal = cm_id->context; + struct ib_device *ib_dev = cm_id->device; + struct isert_device *new_isert_dev = NULL; + struct isert_device *isert_dev; + struct isert_connection *isert_conn; + struct rdma_conn_param *ini_conn_param; + struct rdma_conn_param tgt_conn_param; + int err; + + TRACE_ENTRY(); + + mutex_lock(&dev_list_mutex); + isert_dev = isert_device_find(ib_dev); + if (!isert_dev) { + new_isert_dev = isert_device_create(ib_dev); + if (unlikely(IS_ERR(new_isert_dev))) { + err = PTR_ERR(new_isert_dev); + mutex_unlock(&dev_list_mutex); + goto fail_dev_create; + } + isert_dev = new_isert_dev; + } + isert_dev->refcnt++; + mutex_unlock(&dev_list_mutex); + + isert_conn = isert_conn_create(cm_id, isert_dev); + if (unlikely(IS_ERR(isert_conn))) { + err = PTR_ERR(isert_conn); + goto fail_conn_create; + } + + isert_conn->state = ISER_CONN_HANDSHAKE; + + mutex_lock(&dev_list_mutex); + list_add_tail(&isert_conn->portal_node, &portal->conn_list); + list_add_tail(&isert_conn->dev_node, &isert_dev->conn_list); + mutex_unlock(&dev_list_mutex); + + /* initiator is dst, target is src */ + memcpy(&isert_conn->peer_addr, &cm_id->route.addr.dst_addr, + sizeof(isert_conn->peer_addr)); + memcpy(&isert_conn->self_addr, &cm_id->route.addr.src_addr, + sizeof(isert_conn->self_addr)); + + ini_conn_param = &event->param.conn; + memset(&tgt_conn_param, 0, sizeof(tgt_conn_param)); + tgt_conn_param.flow_control = + ini_conn_param->flow_control; + tgt_conn_param.rnr_retry_count = + ini_conn_param->rnr_retry_count; + + tgt_conn_param.initiator_depth = isert_dev->device_attr.max_qp_init_rd_atom; + if (tgt_conn_param.initiator_depth > ini_conn_param->initiator_depth) + tgt_conn_param.initiator_depth = ini_conn_param->initiator_depth; + + err = rdma_accept(cm_id, &tgt_conn_param); + if (unlikely(err)) { + pr_err("Failed to accept conn request, err:%d\n", err); + goto fail_accept; + } + + switch (isert_conn->peer_addr.ss_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pr_info("iser accepted connection cm_id:%p " + NIPQUAD_FMT "->" NIPQUAD_FMT "\n", cm_id, + NIPQUAD(((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr), + NIPQUAD(((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr)); +#else + pr_info("iser accepted connection cm_id:%p " + "%pI4->%pI4\n", cm_id, + &((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr, + &((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr); +#endif + break; + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pr_info("iser accepted connection cm_id:%p " + NIP6_FMT "->" NIP6_FMT "\n", cm_id, + NIP6(((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr.s_addr), + NIP6(((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr.s_addr)); +#else + pr_info("iser accepted connection cm_id:%p " + "%pI6->%pI6\n", cm_id, + &((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr, + &((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr); +#endif + break; + default: + pr_info("iser accepted connection cm_id:%p\n", cm_id); + } + +out: + TRACE_EXIT_RES(err); + return err; + +fail_accept: + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_cm_timewait_exit_handler(cm_id, NULL); + err = 0; + goto out; + +fail_conn_create: + if (new_isert_dev) { + mutex_lock(&dev_list_mutex); + new_isert_dev->refcnt--; + if (new_isert_dev->refcnt == 0) + isert_device_release(new_isert_dev); + mutex_unlock(&dev_list_mutex); + } +fail_dev_create: + rdma_reject(cm_id, NULL, 0); + goto out; +} + +static int isert_cm_connect_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + int push_saved_pdu = 0; + int ret; + + TRACE_ENTRY(); + + if (isert_conn->state == ISER_CONN_HANDSHAKE) + isert_conn->state = ISER_CONN_ACTIVE; + else if (isert_conn->state == ISER_CONN_ACTIVE) + push_saved_pdu = 1; + + ret = isert_get_addr_size((struct sockaddr *)&isert_conn->peer_addr, + &isert_conn->peer_addrsz); + if (unlikely(ret)) + goto out; + + kref_get(&isert_conn->kref); + /* notify upper layer */ + ret = isert_conn_established(&isert_conn->iscsi, + (struct sockaddr *)&isert_conn->peer_addr, + isert_conn->peer_addrsz); + if (unlikely(ret)) { + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_conn_free(isert_conn); + goto out; + } + + if (push_saved_pdu) { + pr_info("iser push saved rx pdu\n"); + isert_recv_completion_handler(isert_conn->saved_wr); + isert_conn->saved_wr = NULL; + } + +out: + TRACE_EXIT_RES(ret); + return ret; +} + +static int isert_cm_disconnect_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct isert_connection *isert_conn = cm_id->qp->qp_context; + + isert_conn_disconnect(isert_conn); + + return 0; +} + +static const char *cm_event_type_str(enum rdma_cm_event_type ev_type) +{ + switch (ev_type) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + return "ADDRESS_RESOLVED"; + case RDMA_CM_EVENT_ADDR_ERROR: + return "ADDESS_ERROR"; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + return "ROUTE_RESOLVED"; + case RDMA_CM_EVENT_ROUTE_ERROR: + return "ROUTE_ERROR"; + case RDMA_CM_EVENT_CONNECT_REQUEST: + return "CONNECT_REQUEST"; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + return "CONNECT_RESPONSE"; + case RDMA_CM_EVENT_CONNECT_ERROR: + return "CONNECT_ERROR"; + case RDMA_CM_EVENT_UNREACHABLE: + return "UNREACHABLE"; + case RDMA_CM_EVENT_REJECTED: + return "REJECTED"; + case RDMA_CM_EVENT_ESTABLISHED: + return "ESTABLISHED"; + case RDMA_CM_EVENT_DISCONNECTED: + return "DISCONNECTED"; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + return "DEVICE_REMOVAL"; + case RDMA_CM_EVENT_MULTICAST_JOIN: + return "MULTICAST_JOIN"; + case RDMA_CM_EVENT_MULTICAST_ERROR: + return "MULTICAST_ERROR"; + case RDMA_CM_EVENT_ADDR_CHANGE: + return "ADDR_CHANGE"; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + return "TIMEWAIT_EXIT"; + default: + return "UNKNOWN"; + } +} + +static int isert_handle_failure(struct isert_connection *conn) +{ + isert_conn_disconnect(conn); + return 0; +} + +static int isert_cm_evt_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *cm_ev) +{ + enum rdma_cm_event_type ev_type; + struct isert_portal *portal; + int err = -EINVAL; + + TRACE_ENTRY(); + + if (unlikely(IS_ERR(cm_id))) { + pr_err("isert_cm_evt invalid cm_id:%p\n", cm_id); + goto out; + } + ev_type = cm_ev->event; + portal = cm_id->context; + pr_info("isert_cm_evt:%s(%d) status:%d portal:%p cm_id:%p\n", + cm_event_type_str(ev_type), ev_type, cm_ev->status, + portal, cm_id); + + switch (ev_type) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + err = isert_cm_conn_req_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + err = isert_cm_connect_handler(cm_id, cm_ev); + if (unlikely(err)) + err = isert_handle_failure(cm_id->qp->qp_context); + break; + + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_DISCONNECTED: + err = isert_cm_disconnect_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_cm_disconnect_handler(cm_id, cm_ev); + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* fall through */ + err = isert_cm_timewait_exit_handler(cm_id, cm_ev); + break; + + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + pr_err("UD-related event:%d, ignored\n", ev_type); + break; + + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + pr_err("Active side event:%d, ignored\n", ev_type); + break; + + /* We can receive this instead of RDMA_CM_EVENT_ESTABLISHED */ + case RDMA_CM_EVENT_UNREACHABLE: + { + struct isert_connection *isert_conn; + + isert_conn = cm_id->qp->qp_context; + set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); + isert_sched_conn_closed(isert_conn); + err = 0; + } + break; + + default: + pr_err("Illegal event:%d, ignored\n", ev_type); + break; + } + + if (unlikely(err)) + pr_err("Failed to handle rdma cm evt:%d, err:%d\n", + ev_type, err); + +out: + TRACE_EXIT_RES(err); + return err; +} + +/* create a portal, after listening starts all events + * are received in isert_cm_evt_handler() + */ +struct isert_portal *isert_portal_create(void) +{ + struct isert_portal *portal; + struct rdma_cm_id *cm_id; + int err; + + if (!try_module_get(THIS_MODULE)) { + pr_err("Unable increment module reference\n"); + portal = ERR_PTR(-EINVAL); + goto out; + } + + portal = kzalloc(sizeof(*portal), GFP_KERNEL); + if (unlikely(!portal)) { + pr_err("Unable to allocate struct portal\n"); + portal = ERR_PTR(-ENOMEM); + goto err_alloc; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) && !defined(RHEL_MAJOR) + cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP); +#else + cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP, + IB_QPT_RC); +#endif + if (unlikely(IS_ERR(cm_id))) { + err = PTR_ERR(cm_id); + pr_err("Failed to create rdma id, err:%d\n", err); + goto create_id_err; + } + portal->cm_id = cm_id; + + INIT_LIST_HEAD(&portal->conn_list); + isert_portal_list_add(portal); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) + rdma_set_afonly(cm_id, 1); +#endif + + pr_info("Created iser portal cm_id:%p\n", cm_id); +out: + return portal; + +create_id_err: + kfree(portal); + portal = ERR_PTR(err); +err_alloc: + module_put(THIS_MODULE); + goto out; +} + +int isert_portal_listen(struct isert_portal *portal, + struct sockaddr *sa, + size_t addr_len) +{ + int err; + + TRACE_ENTRY(); + err = rdma_bind_addr(portal->cm_id, sa); + if (err) { + pr_warn("Failed to bind rdma addr, err:%d\n", err); + goto out; + } + err = rdma_listen(portal->cm_id, ISER_LISTEN_BACKLOG); + if (err) { + pr_err("Failed rdma listen, err:%d\n", err); + goto out; + } + memcpy(&portal->addr, sa, addr_len); + + switch (sa->sa_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pr_info("iser portal cm_id:%p listens on: " + NIPQUAD_FMT ":%d\n", portal->cm_id, + NIPQUAD(((struct sockaddr_in *)sa)->sin_addr.s_addr), + (int)ntohs(((struct sockaddr_in *)sa)->sin_port)); +#else + pr_info("iser portal cm_id:%p listens on: " + "%pI4:%d\n", portal->cm_id, + &((struct sockaddr_in *)sa)->sin_addr.s_addr, + (int)ntohs(((struct sockaddr_in *)sa)->sin_port)); +#endif + break; + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pr_info("iser portal cm_id:%p listens on: " + NIP6_FMT " %d\n", + portal->cm_id, + NIP6(((struct sockaddr_in6 *)sa)->sin6_addr.s_addr), + (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port)); +#else + pr_info("iser portal cm_id:%p listens on: " + "%pI6 %d\n", portal->cm_id, + &((struct sockaddr_in6 *)sa)->sin6_addr, + (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port)); +#endif + break; + default: + pr_err("Unknown address family\n"); + err = -EINVAL; + goto out; + } + +out: + TRACE_EXIT_RES(err); + return err; +} + +void isert_portal_release(struct isert_portal *portal) +{ + struct isert_connection *conn; + + pr_info("iser portal cm_id:%p releasing\n", portal->cm_id); + + rdma_destroy_id(portal->cm_id); + + mutex_lock(&dev_list_mutex); + list_for_each_entry(conn, &portal->conn_list, portal_node) + isert_conn_disconnect(conn); + mutex_unlock(&dev_list_mutex); + + isert_portal_list_remove(portal); + + module_put(THIS_MODULE); +} + +struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len) +{ + struct isert_portal *portal; + int err; + + portal = isert_portal_create(); + if (IS_ERR(portal)) + return portal; + + err = isert_portal_listen(portal, sa, addr_len); + if (err) { + isert_portal_release(portal); + portal = ERR_PTR(err); + } + return portal; +} + diff --git a/iscsi-scst/kernel/isert-scst/isert.c b/iscsi-scst/kernel/isert-scst/isert.c new file mode 100644 index 000000000..844fba8ff --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert.c @@ -0,0 +1,495 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include +#include + +#include "isert.h" +#include "isert_dbg.h" +#include "iscsit_transport.h" +#include "iser_datamover.h" + +#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) +unsigned long isert_trace_flag = ISERT_DEFAULT_LOG_FLAGS; +unsigned long iscsi_trace_flag = ISERT_DEFAULT_LOG_FLAGS; +#endif + +static unsigned int isert_nr_devs = ISERT_NR_DEVS; +module_param(isert_nr_devs, uint, S_IRUGO); +MODULE_PARM_DESC(isert_nr_devs, + "Maximum concurrent number of connection requests to handle."); + +static void isert_mark_conn_closed(struct iscsi_conn *conn, int flags) +{ + TRACE_ENTRY(); + if (flags & ISCSI_CONN_ACTIVE_CLOSE) + conn->active_close = 1; + if (flags & ISCSI_CONN_DELETING) + conn->deleting = 1; + + conn->read_state = 0; + + if (!conn->closing) { + conn->closing = 1; + schedule_work(&conn->close_work); + } + + TRACE_EXIT(); +} + +static void isert_close_conn(struct iscsi_conn *conn, int flags) +{ +} + +static int isert_receive_cmnd_data(struct iscsi_cmnd *cmnd) +{ +#ifdef CONFIG_SCST_EXTRACHECKS + if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) + TRACE_DBG("cmnd %p is still in RX_CMD state", + cmnd); +#endif + EXTRACHECKS_BUG_ON(cmnd->scst_state != ISCSI_CMD_STATE_AFTER_PREPROC); + return 0; +} + +static void isert_update_len_sn(struct iscsi_cmnd *cmnd) +{ + TRACE_ENTRY(); + + iscsi_cmnd_set_length(&cmnd->pdu); + switch (cmnd_opcode(cmnd)) { + case ISCSI_OP_NOP_IN: + if (cmnd->pdu.bhs.itt == ISCSI_RESERVED_TAG) + cmnd->pdu.bhs.sn = (__force u32)cmnd_set_sn(cmnd, 0); + else + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_TASK_MGT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_TEXT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_SCSI_DATA_IN: + { + struct iscsi_data_in_hdr *rsp = + (struct iscsi_data_in_hdr *)&cmnd->pdu.bhs; + + cmnd_set_sn(cmnd, (rsp->flags & ISCSI_FLG_FINAL) ? 1 : 0); + break; + } + case ISCSI_OP_LOGOUT_RSP: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_R2T: + cmnd->pdu.bhs.sn = (__force u32)cmnd_set_sn(cmnd, 0); + break; + case ISCSI_OP_ASYNC_MSG: + cmnd_set_sn(cmnd, 1); + break; + case ISCSI_OP_REJECT: + cmnd_set_sn(cmnd, 1); + break; + default: + PRINT_ERROR("Unexpected cmnd op %x", cmnd_opcode(cmnd)); + break; + } + + TRACE_EXIT(); +} + +static int isert_process_all_writes(struct iscsi_conn *conn) +{ + struct iscsi_cmnd *cmnd; + int res = 0; + + TRACE_ENTRY(); + + while ((cmnd = iscsi_get_send_cmnd(conn)) != NULL) { + isert_update_len_sn(cmnd); + conn_get(conn); + isert_pdu_tx(cmnd); + } + + TRACE_EXIT_RES(res); + return res; +} + +static int isert_send_locally(struct iscsi_cmnd *req, unsigned int cmd_count) +{ + int res = 0; + + TRACE_ENTRY(); + + req_cmnd_pre_release(req); + res = isert_process_all_writes(req->conn); + cmnd_put(req); + + TRACE_EXIT_RES(res); + return res; +} + +static struct iscsi_cmnd *isert_cmnd_alloc(struct iscsi_conn *conn, + struct iscsi_cmnd *parent) +{ + struct iscsi_cmnd *cmnd; + + TRACE_ENTRY(); + + if (likely(parent)) + cmnd = isert_alloc_scsi_rsp_pdu(conn); + else + cmnd = isert_alloc_scsi_fake_pdu(conn); + + iscsi_cmnd_init(conn, cmnd, parent); + + TRACE_EXIT(); + return cmnd; +} + +static void isert_cmnd_free(struct iscsi_cmnd *cmnd) +{ + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + if (unlikely(cmnd->on_write_list || cmnd->on_write_timeout_list)) { + struct iscsi_scsi_cmd_hdr *req = cmnd_hdr(cmnd); + + PRINT_CRIT_ERROR("cmnd %p still on some list?, %x, %x, %x, " + "%x, %x, %x, %x", cmnd, req->opcode, req->scb[0], + req->flags, req->itt, be32_to_cpu(req->data_length), + req->cmd_sn, + be32_to_cpu((__force __be32)(cmnd->pdu.datasize))); + + if (unlikely(cmnd->parent_req)) { + struct iscsi_scsi_cmd_hdr *preq = + cmnd_hdr(cmnd->parent_req); + PRINT_CRIT_ERROR("%p %x %u", preq, preq->opcode, + preq->scb[0]); + } + sBUG(); + } +#endif + if (cmnd->parent_req) + isert_release_tx_pdu(cmnd); + else + isert_release_rx_pdu(cmnd); + + TRACE_EXIT(); +} + +static void isert_preprocessing_done(struct iscsi_cmnd *req) +{ + req->scst_state = ISCSI_CMD_STATE_AFTER_PREPROC; +} + +static void isert_set_sense_data(struct iscsi_cmnd *rsp, + const u8 *sense_buf, int sense_len) +{ + u8 *buf; + + buf = sg_virt(rsp->sg) + ISER_HDRS_SZ; + + memcpy(buf, &rsp->sense_hdr, sizeof(rsp->sense_hdr)); + memcpy(&buf[sizeof(rsp->sense_hdr)], sense_buf, sense_len); +} + +static void isert_set_req_data(struct iscsi_cmnd *req, struct iscsi_cmnd *rsp) +{ + memcpy(sg_virt(rsp->sg) + ISER_HDRS_SZ, + sg_virt(req->sg) + ISER_HDRS_SZ, req->bufflen); + rsp->bufflen = req->bufflen; +} + +static void isert_send_data_rsp(struct iscsi_cmnd *req, u8 *sense, + int sense_len, u8 status, int is_send_status) +{ + struct iscsi_cmnd *rsp; + + TRACE_ENTRY(); + + sBUG_ON(!is_send_status); + + rsp = create_status_rsp(req, status, sense, sense_len); + + isert_update_len_sn(rsp); + + conn_get(rsp->conn); + if (status != SAM_STAT_CHECK_CONDITION) + isert_send_data_in(req, rsp); + else + isert_pdu_tx(rsp); + + TRACE_EXIT(); +} + +static void isert_make_conn_wr_active(struct iscsi_conn *conn) +{ + isert_process_all_writes(conn); +} + +static int isert_conn_activate(struct iscsi_conn *conn) +{ + return 0; +} + +static void isert_conn_free(struct iscsi_conn *conn) +{ + isert_free_connection(conn); +} + +int isert_handle_close_connection(struct iscsi_conn *conn) +{ + isert_mark_conn_closed(conn, 0); + /* Take care of case where our connection is being closed + * without being connected to a session - if connection allocation + * failed for some reason */ + if (unlikely(!conn->session)) + isert_free_connection(conn); + else + start_close_conn(conn); + return 0; +} + +int isert_pdu_rx(struct iscsi_cmnd *cmnd) +{ + int res = 0; + scst_data_direction dir; + + TRACE_ENTRY(); + +#ifdef CONFIG_SCST_EXTRACHECKS + cmnd->conn->rd_task = current; +#endif + iscsi_cmnd_init(cmnd->conn, cmnd, NULL); + cmnd_rx_start(cmnd); + + if (unlikely(!cmnd->scst_cmd)) { + cmnd_rx_end(cmnd); + goto out; + } + + if (unlikely(scst_cmd_prelim_completed(cmnd->scst_cmd) || + unlikely(cmnd->prelim_compl_flags != 0))) { + set_bit(ISCSI_CMD_PRELIM_COMPLETED, &cmnd->prelim_compl_flags); + cmnd_rx_end(cmnd); + goto out; + } + + dir = scst_cmd_get_data_direction(cmnd->scst_cmd); + + if (dir & SCST_DATA_WRITE) { + res = iscsi_cmnd_set_write_buf(cmnd); + if (unlikely(res)) + goto out; + res = isert_request_data_out(cmnd); + cmnd->r2t_len_to_receive = 0; + cmnd->r2t_len_to_send = 0; + cmnd->outstanding_r2t = 0; + } else { + cmnd_rx_end(cmnd); + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +int isert_data_out_ready(struct iscsi_cmnd *cmnd) +{ + int res = 0; + + TRACE_ENTRY(); +#ifdef CONFIG_SCST_EXTRACHECKS + cmnd->conn->rd_task = current; +#endif + cmnd_rx_end(cmnd); + + TRACE_EXIT_RES(res); + return res; +} + +int isert_data_in_sent(struct iscsi_cmnd *din) +{ + return 0; +} + +void isert_pdu_err(struct iscsi_cmnd *pdu) +{ + struct iscsi_conn *conn = pdu->conn; + + if (!conn->session) /* we are still in login phase */ + return; + + if (pdu->parent_req) { + rsp_cmnd_release(pdu); + conn_put(conn); + } else { + /* + * we will get multiple pdu errors + * for same PDU with multiple RDMAs case + */ + if (pdu->on_write_timeout_list) + req_cmnd_release(pdu); + } +} + +int isert_pdu_sent(struct iscsi_cmnd *pdu) +{ + struct iscsi_conn *conn = pdu->conn; + int res = 0; + + TRACE_ENTRY(); + + if (unlikely(pdu->should_close_conn)) { + if (pdu->should_close_all_conn) { + struct iscsi_target *target = pdu->conn->session->target; + + PRINT_INFO("Closing all connections for target %x at " + "initiator's %s request", target->tid, + conn->session->initiator_name); + mutex_lock(&target->target_mutex); + target_del_all_sess(target, 0); + mutex_unlock(&target->target_mutex); + } else { + PRINT_INFO("Closing connection at initiator's %s " + "request", conn->session->initiator_name); + mark_conn_closed(conn); + } + } + + /* we may get NULL parent req for login response */ + if (likely(pdu->parent_req)) { + rsp_cmnd_release(pdu); + conn_put(conn); + } + + TRACE_EXIT_RES(res); + return res; +} + +static ssize_t isert_get_initiator_ip(struct iscsi_conn *conn, + char *buf, int size) +{ + int pos; + struct sockaddr_storage ss; + size_t addr_len; + + TRACE_ENTRY(); + + isert_get_peer_addr(conn, (struct sockaddr *)&ss, &addr_len); + + switch (ss.ss_family) { + case AF_INET: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) + pos = scnprintf(buf, size, + "%u.%u.%u.%u", + NIPQUAD(((struct sockaddr_in *)&ss)->sin_addr.s_addr)); +#else + pos = scnprintf(buf, size, + "%pI4", &((struct sockaddr_in *)&ss)->sin_addr.s_addr); +#endif + break; + case AF_INET6: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) + pos = scnprintf(buf, size, + "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]", + NIP6(((struct sockaddr_in6 *)&ss)->sin6_addr.s_addr)); +#else + pos = scnprintf(buf, size, "[%p6]", + &((struct sockaddr_in6 *)&ss)->sin6_addr); +#endif + break; + default: + pos = scnprintf(buf, size, "Unknown family %d", + ss.ss_family); + break; + } + + TRACE_EXIT_RES(pos); + return pos; +} + +static struct iscsit_transport isert_transport = { + .owner = THIS_MODULE, + .name = "iSER", + .transport_type = ISCSI_RDMA, + .iscsit_conn_alloc = isert_conn_alloc, + .iscsit_conn_activate = isert_conn_activate, + .iscsit_conn_free = isert_conn_free, + .iscsit_alloc_cmd = isert_cmnd_alloc, + .iscsit_free_cmd = isert_cmnd_free, + .iscsit_preprocessing_done = isert_preprocessing_done, + .iscsit_send_data_rsp = isert_send_data_rsp, + .iscsit_make_conn_wr_active = isert_make_conn_wr_active, + .iscsit_get_initiator_ip = isert_get_initiator_ip, + .iscsit_send_locally = isert_send_locally, + .iscsit_mark_conn_closed = isert_mark_conn_closed, + .iscsit_conn_close = isert_close_conn, + .iscsit_set_sense_data = isert_set_sense_data, + .iscsit_set_req_data = isert_set_req_data, + .iscsit_receive_cmnd_data = isert_receive_cmnd_data, + .iscsit_close_all_portals = isert_close_all_portals, +}; + +static void isert_cleanup_module(void) +{ + iscsit_unregister_transport(&isert_transport); + isert_cleanup_login_devs(); +} + +static int __init isert_init_module(void) +{ + int ret; + + ret = iscsit_register_transport(&isert_transport); + if (ret) + return ret; + + ret = isert_init_login_devs(isert_nr_devs); + + return ret; +} + +MODULE_AUTHOR("Yan Burman"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("iSER target transport driver"); + +module_init(isert_init_module); +module_exit(isert_cleanup_module); diff --git a/iscsi-scst/kernel/isert-scst/isert.h b/iscsi-scst/kernel/isert-scst/isert.h new file mode 100644 index 000000000..9570f030c --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert.h @@ -0,0 +1,133 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __ISERT_H__ +#define __ISERT_H__ + +#include +#include +#include +#include /* size_t, dev_t */ +#include +#include +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +#include +#else +#include +#endif + +#ifdef INSIDE_KERNEL_TREE +#include +#include +#include +#else +#include "isert_scst.h" +#include "iscsi_scst.h" +#include "iscsi.h" +#endif + +#include "iser_hdr.h" + +struct iscsi_conn; + +#define ISERT_NR_DEVS 64 + +struct isert_listener_dev { + struct device *dev; + struct cdev cdev; + dev_t devno; + wait_queue_head_t waitqueue; + spinlock_t conn_lock; + struct list_head new_conn_list; + struct list_head curr_conn_list; + struct isert_addr_info info; + atomic_t available; + void *portal_h[ISERT_MAX_PORTALS]; + int free_portal_idx; +}; + +enum isert_conn_dev_state { + CS_INIT, + CS_REQ_BHS, + CS_REQ_DATA, + CS_REQ_FINISHED, + CS_RSP_BHS, + CS_RSP_DATA, + CS_RSP_FINISHED, + CS_DISCONNECTED, +}; + +struct isert_conn_dev { + struct device *dev; + struct cdev cdev; + dev_t devno; + wait_queue_head_t waitqueue; + struct list_head conn_list_entry; + struct iscsi_conn *conn; + unsigned int idx; + int occupied; + spinlock_t pdu_lock; + struct iscsi_cmnd *login_req; + struct iscsi_cmnd *login_rsp; + atomic_t available; + size_t read_len; + char *read_buf; + size_t write_len; + char *write_buf; + void *sg_virt; + struct page *pages[DIV_ROUND_UP(ISCSI_LOGIN_MAX_RDSL, PAGE_SIZE)]; + enum isert_conn_dev_state state; + int is_discovery; + struct timer_list tmo_timer; + int timer_active; + struct kref kref; +}; + +#define ISER_CONN_DEV_PREFIX "isert/conn" + +/* isert_login.c */ +int __init isert_init_login_devs(unsigned int ndevs); +void isert_cleanup_login_devs(void); +int isert_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *t); +int isert_handle_close_connection(struct iscsi_conn *conn); +void isert_close_all_portals(void); + +#endif /* __ISERT_H__ */ diff --git a/iscsi-scst/kernel/isert-scst/isert_dbg.h b/iscsi-scst/kernel/isert-scst/isert_dbg.h new file mode 100644 index 000000000..064c714bf --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert_dbg.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2007 - 2014 Vladislav Bolkhovitin + * Copyright (C) 2007 - 2014 Fusion-io, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef ISERT_DBG_H +#define ISERT_DBG_H + +#include + +#ifdef LOG_PREFIX +#undef LOG_PREFIX +#endif + +#define LOG_PREFIX "isert" /* Prefix for SCST tracing macros. */ + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include +#endif + +#ifdef CONFIG_SCST_DEBUG +#define ISERT_DEFAULT_LOG_FLAGS (TRACE_FUNCTION | TRACE_LINE | TRACE_PID | \ + TRACE_OUT_OF_MEM | TRACE_MGMT | TRACE_MGMT_DEBUG | \ + TRACE_MINOR | TRACE_SPECIAL) +#else +#define ISERT_DEFAULT_LOG_FLAGS (TRACE_OUT_OF_MEM | TRACE_MGMT | \ + TRACE_SPECIAL) +#endif + +#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) +extern unsigned long isert_trace_flag; +#ifdef trace_flag +#undef trace_flag +#endif +#define trace_flag isert_trace_flag +#endif + +#endif diff --git a/iscsi-scst/kernel/isert-scst/isert_login.c b/iscsi-scst/kernel/isert-scst/isert_login.c new file mode 100644 index 000000000..fae0963e3 --- /dev/null +++ b/iscsi-scst/kernel/isert-scst/isert_login.c @@ -0,0 +1,977 @@ +/* +* This file is part of iser target kernel module. +* +* Copyright (c) 2013 - 2014 Mellanox Technologies. All rights reserved. +* Copyright (c) 2013 - 2014 Yan Burman (yanb@mellanox.com) +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#include +#include +#include /* everything... */ +#include /* error codes */ +#include +#include +#include + +#ifdef INSIDE_KERNEL_TREE +#include +#else +#include "iscsi.h" +#endif + +#include "isert.h" +#include "isert_dbg.h" +#include "iser_datamover.h" + +static unsigned int n_devs; + +static int isert_major; + +static struct isert_conn_dev *isert_conn_devices; + +static struct isert_listener_dev isert_listen_dev; + +static struct class *isert_class; + +static struct isert_conn_dev *get_available_dev(struct isert_listener_dev *dev, + struct iscsi_conn *conn) +{ + unsigned int i; + struct isert_conn_dev *res = NULL; + + spin_lock(&dev->conn_lock); + for (i = 0; i < n_devs; ++i) { + if (!isert_conn_devices[i].occupied) { + res = &isert_conn_devices[i]; + res->occupied = 1; + res->conn = conn; + isert_set_priv(conn, res); + list_add(&res->conn_list_entry, &dev->new_conn_list); + break; + } + } + spin_unlock(&dev->conn_lock); + + return res; +} + +static void isert_del_timer(struct isert_conn_dev *dev) +{ + if (dev->timer_active) { + del_timer_sync(&dev->tmo_timer); + dev->timer_active = 0; + } +} + +static void release_dev(struct isert_conn_dev *dev) +{ + kref_init(&dev->kref); + + spin_lock(&isert_listen_dev.conn_lock); + dev->occupied = 0; + list_del_init(&dev->conn_list_entry); + dev->state = CS_INIT; + atomic_set(&dev->available, 1); + spin_unlock(&isert_listen_dev.conn_lock); +} + +static void isert_kref_release_dev(struct kref *kref) +{ + struct isert_conn_dev *dev = container_of(kref, + struct isert_conn_dev, + kref); + release_dev(dev); +} + +static void isert_dev_release(struct isert_conn_dev *dev) +{ + kref_put(&dev->kref, isert_kref_release_dev); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +static void isert_close_conn_fn(void *ctx) +#else +static void isert_close_conn_fn(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + struct iscsi_conn *conn = ctx; +#else + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, close_work); +#endif + + isert_close_connection(conn); +} + +static void isert_conn_timer_fn(unsigned long arg) +{ + struct isert_conn_dev *conn_dev = (struct isert_conn_dev *)arg; + struct iscsi_conn *conn = conn_dev->conn; + + TRACE_ENTRY(); + + conn_dev->timer_active = 0; + + PRINT_ERROR("Timeout on connection %p\n", conn_dev->conn); + + schedule_work(&conn->close_work); + + TRACE_EXIT(); +} + +static int add_new_connection(struct isert_listener_dev *dev, + struct iscsi_conn *conn) +{ + struct isert_conn_dev *conn_dev = get_available_dev(dev, conn); + int res = 0; + + TRACE_ENTRY(); + + if (!conn_dev) { + PRINT_WARNING("%s", "Unable to allocate new connection"); + res = -ENOSPC; + goto out; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) + INIT_WORK(&conn->close_work, isert_close_conn_fn, conn); +#else + INIT_WORK(&conn->close_work, isert_close_conn_fn); +#endif + + init_timer(&conn_dev->tmo_timer); + conn_dev->tmo_timer.function = isert_conn_timer_fn; + conn_dev->tmo_timer.expires = jiffies + 120 * HZ; + conn_dev->tmo_timer.data = (unsigned long)conn_dev; + add_timer(&conn_dev->tmo_timer); + conn_dev->timer_active = 1; + wake_up(&dev->waitqueue); + +out: + TRACE_EXIT_RES(res); + return res; +} + +static bool have_new_connection(struct isert_listener_dev *dev) +{ + bool ret; + + spin_lock(&dev->conn_lock); + ret = !list_empty(&dev->new_conn_list); + spin_unlock(&dev->conn_lock); + + return ret; +} + +int isert_conn_alloc(struct iscsi_session *session, + struct iscsi_kern_conn_info *info, + struct iscsi_conn **new_conn, + struct iscsit_transport *t) +{ + int res = 0; + struct isert_conn_dev *dev; + struct iscsi_conn *conn; + struct iscsi_cmnd *cmnd; + struct file *filp = fget(info->fd); + + TRACE_ENTRY(); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) + lockdep_assert_held(&session->target->target_mutex); +#endif + + if (unlikely(!filp)) { + res = -EBADF; + goto out; + } + + dev = filp->private_data; + + cmnd = dev->login_rsp; + + sBUG_ON(cmnd == NULL); + dev->login_rsp = NULL; + + *new_conn = dev->conn; + res = isert_set_session_params(dev->conn, &session->sess_params, + &session->tgt_params); + + if (!res) + dev->conn = NULL; + + fput(filp); + + conn = *new_conn; + + if (unlikely(res)) + goto cleanup_conn; + + conn->transport = t; + + res = iscsi_init_conn(session, info, conn); + if (unlikely(res)) + goto cleanup_conn; + + conn->rd_state = 1; + isert_dev_release(dev); + isert_set_priv(conn, NULL); + + res = isert_login_rsp_tx(cmnd, true, false); + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + + if (unlikely(res)) + goto cleanup_iscsi_conn; + +#ifndef CONFIG_SCST_PROC + res = conn_sysfs_add(conn); + if (unlikely(res)) + goto cleanup_iscsi_conn; +#endif + + list_add_tail(&conn->conn_list_entry, &session->conn_list); + + goto out; + +cleanup_iscsi_conn: + conn->rd_state = 0; + if (conn->nop_in_interval > 0) + cancel_delayed_work_sync(&conn->nop_in_delayed_work); +cleanup_conn: + conn->session = NULL; + isert_close_connection(conn); +out: + TRACE_EXIT_RES(res); + return res; +} + +static unsigned int isert_listen_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct isert_listener_dev *dev = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &dev->waitqueue, wait); + + if (have_new_connection(dev)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int isert_listen_open(struct inode *inode, struct file *filp) +{ + struct isert_listener_dev *dev; + + dev = container_of(inode->i_cdev, struct isert_listener_dev, cdev); + + if (!atomic_dec_and_test(&dev->available)) { + atomic_inc(&dev->available); + return -EBUSY; /* already open */ + } + + filp->private_data = dev; /* for other methods */ + + return 0; +} + +static int isert_listen_release(struct inode *inode, struct file *filp) +{ + struct isert_listener_dev *dev = filp->private_data; + struct isert_conn_dev *conn_dev; + + /* No need for locking here, since the chardev is being closed */ + while (!list_empty(&dev->new_conn_list)) { + conn_dev = list_first_entry(&dev->new_conn_list, + struct isert_conn_dev, + conn_list_entry); + + isert_del_timer(conn_dev); + if (conn_dev->conn) { + isert_close_connection(conn_dev->conn); + conn_dev->conn = NULL; + } + list_del(&conn_dev->conn_list_entry); + } + + atomic_inc(&dev->available); + return 0; +} + +static ssize_t isert_listen_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + struct isert_listener_dev *dev = filp->private_data; + struct isert_conn_dev *conn_dev; + int res = 0; + char k_buff[sizeof("/dev/") + sizeof(ISER_CONN_DEV_PREFIX) + 3 + 1]; + + TRACE_ENTRY(); + + if (!have_new_connection(dev)) { + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + res = wait_event_freezable(dev->waitqueue, + !have_new_connection(dev)); + if (res < 0) + goto out; + } + + sBUG_ON(list_empty(&dev->new_conn_list)); + + spin_lock(&dev->conn_lock); + conn_dev = list_first_entry(&dev->new_conn_list, struct isert_conn_dev, + conn_list_entry); + list_move(&conn_dev->conn_list_entry, &dev->curr_conn_list); + kref_get(&conn_dev->kref); + spin_unlock(&dev->conn_lock); + + res = snprintf(k_buff, sizeof(k_buff), "/dev/"ISER_CONN_DEV_PREFIX"%d", + conn_dev->idx); + ++res; /* copy trailing \0 as well */ + + if (copy_to_user(buf, k_buff, res)) + res = -EFAULT; + +out: + TRACE_EXIT_RES(res); + return res; +} + +static long isert_listen_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct isert_listener_dev *dev = filp->private_data; + int res = 0, rc; + void __user *ptr = (void __user *)arg; + void *portal; + + TRACE_ENTRY(); + + switch (cmd) { + case SET_LISTEN_ADDR: + rc = copy_from_user(&dev->info, ptr, sizeof(dev->info)); + if (rc != 0) { + PRINT_ERROR("Failed to copy %d user's bytes\n", rc); + res = -EFAULT; + goto out; + } + + if (dev->free_portal_idx >= ISERT_MAX_PORTALS) { + PRINT_ERROR("Maximum number of portals exceeded: %d\n", + ISERT_MAX_PORTALS); + res = -EINVAL; + goto out; + } + + portal = isert_portal_add((struct sockaddr *)&dev->info.addr, + dev->info.addr_len); + if (!portal) { + PRINT_ERROR("Unable to add portal of size %zu\n", + dev->info.addr_len); + res = -EINVAL; + goto out; + } + dev->portal_h[dev->free_portal_idx++] = portal; + break; + + default: + PRINT_ERROR("Invalid ioctl cmd %x", cmd); + res = -EINVAL; + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +int isert_conn_established(struct iscsi_conn *iscsi_conn, + struct sockaddr *from_addr, int addr_len) +{ + return add_new_connection(&isert_listen_dev, iscsi_conn); +} + +int isert_connection_closed(struct iscsi_conn *iscsi_conn) +{ + int res = 0; + + TRACE_ENTRY(); + + if (iscsi_conn->rd_state) { + res = isert_handle_close_connection(iscsi_conn); + } else { + struct isert_conn_dev *dev = isert_get_priv(iscsi_conn); + + if (dev) { + isert_del_timer(dev); + dev->state = CS_DISCONNECTED; + if (dev->login_req) { + res = isert_task_abort(dev->login_req); + dev->login_req = NULL; + } + + dev->conn = NULL; + wake_up(&dev->waitqueue); + isert_dev_release(dev); + } + + isert_free_connection(iscsi_conn); + } + + TRACE_EXIT_RES(res); + return res; +} + +static bool will_read_block(struct isert_conn_dev *dev) +{ + bool res; + + spin_lock(&dev->pdu_lock); + res = (dev->login_req == NULL) && (dev->state != CS_DISCONNECTED); + spin_unlock(&dev->pdu_lock); + + return res; +} + +static int isert_open(struct inode *inode, struct file *filp) +{ + struct isert_conn_dev *dev; /* device information */ + int res = 0; + + TRACE_ENTRY(); + + dev = container_of(inode->i_cdev, struct isert_conn_dev, cdev); + + if (!atomic_dec_and_test(&dev->available)) { + atomic_inc(&dev->available); + res = -EBUSY; /* already open */ + goto out; + } + + filp->private_data = dev; /* for other methods */ + +out: + TRACE_EXIT_RES(res); + return res; +} + +static int isert_release(struct inode *inode, struct file *filp) +{ + struct isert_conn_dev *dev = filp->private_data; + int res = 0; + + TRACE_ENTRY(); + + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + dev->is_discovery = 0; + + if (dev->conn) { + isert_close_connection(dev->conn); + dev->conn = NULL; + } + + isert_del_timer(dev); + + isert_dev_release(dev); + + TRACE_EXIT_RES(res); + return res; +} + +static char *isert_vmap_sg(struct page **pages, struct scatterlist *sgl, + int n_ents) +{ + unsigned int i; + struct scatterlist *sg; + void *vaddr; + + for_each_sg(sgl, sg, n_ents, i) + pages[i] = sg_page(sg); + + vaddr = vmap(pages, n_ents, 0, PAGE_KERNEL); + + return vaddr; +} + +static ssize_t isert_read(struct file *filp, char __user *buf, size_t count, + loff_t *f_pos) +{ + struct isert_conn_dev *dev = filp->private_data; + size_t to_read; + + if (will_read_block(dev)) { + int ret; + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + ret = wait_event_freezable(dev->waitqueue, + !will_read_block(dev)); + if (ret < 0) + return ret; + } + + if (dev->state == CS_DISCONNECTED) + return -EPIPE; + + to_read = min(count, dev->read_len); + if (copy_to_user(buf, dev->read_buf, to_read)) + return -EFAULT; + + dev->read_len -= to_read; + dev->read_buf += to_read; + + switch (dev->state) { + case CS_REQ_BHS: + if (dev->read_len == 0) { + dev->read_len = dev->login_req->bufflen; + dev->sg_virt = isert_vmap_sg(dev->pages, + dev->login_req->sg, + dev->login_req->sg_cnt); + if (!dev->sg_virt) + return -ENOMEM; + dev->read_buf = dev->sg_virt + ISER_HDRS_SZ; + dev->state = CS_REQ_DATA; + } + break; + + case CS_REQ_DATA: + if (dev->read_len == 0) { + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + + spin_lock(&dev->pdu_lock); + dev->login_req = NULL; + dev->state = CS_REQ_FINISHED; + spin_unlock(&dev->pdu_lock); + } + break; + + default: + sBUG(); + } + + return to_read; +} + +static ssize_t isert_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct isert_conn_dev *dev = filp->private_data; + size_t to_write; + + if (dev->state == CS_DISCONNECTED) + return -EPIPE; + + to_write = min(count, dev->write_len); + if (copy_from_user(dev->write_buf, buf, to_write)) + return -EFAULT; + + dev->write_len -= to_write; + dev->write_buf += to_write; + + switch (dev->state) { + case CS_RSP_BHS: + if (dev->write_len == 0) { + dev->state = CS_RSP_DATA; + dev->sg_virt = isert_vmap_sg(dev->pages, + dev->login_rsp->sg, + dev->login_rsp->sg_cnt); + if (!dev->sg_virt) + return -ENOMEM; + dev->write_buf = dev->sg_virt + ISER_HDRS_SZ; + dev->write_len = dev->login_rsp->bufflen - + sizeof(dev->login_rsp->pdu.bhs); + iscsi_cmnd_get_length(&dev->login_rsp->pdu); + } + break; + + case CS_RSP_DATA: + break; + + default: + sBUG(); + } + + return to_write; +} + +static bool is_last_login_rsp(struct iscsi_login_rsp_hdr *rsp) +{ + return (rsp->flags & ISCSI_FLG_TRANSIT) && + ((rsp->flags & ISCSI_FLG_NSG_MASK) == ISCSI_FLG_NSG_FULL_FEATURE); +} + +static long isert_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct isert_conn_dev *dev = filp->private_data; + int res = 0, rc; + int val; + void __user *ptr = (void __user *)arg; + struct iscsi_cmnd *cmnd; + + TRACE_ENTRY(); + + if (dev->state == CS_DISCONNECTED) { + res = -EPIPE; + goto out; + } + + switch (cmd) { + case RDMA_CORK: + rc = copy_from_user(&val, ptr, sizeof(val)); + if (unlikely(rc != 0)) { + PRINT_ERROR("Failed to copy %d user's bytes", rc); + res = -EFAULT; + goto out; + } + if (val) { + if (!dev->login_rsp) { + cmnd = isert_alloc_login_rsp_pdu(dev->conn); + if (!cmnd) { + res = -ENOMEM; + goto out; + } + dev->login_rsp = cmnd; + dev->write_buf = (char *)&cmnd->pdu.bhs; + dev->write_len = sizeof(cmnd->pdu.bhs); + dev->state = CS_RSP_BHS; + } + } else { + struct iscsi_login_rsp_hdr *rsp; + bool last; + + if (!dev->login_rsp) { + res = -EINVAL; + goto out; + } + + dev->state = CS_RSP_FINISHED; + rsp = (struct iscsi_login_rsp_hdr *)(&dev->login_rsp->pdu.bhs); + last = is_last_login_rsp(rsp); + + dev->login_rsp->bufflen -= dev->write_len; + + if (!last || dev->is_discovery) { + res = isert_login_rsp_tx(dev->login_rsp, + last, + dev->is_discovery); + vunmap(dev->sg_virt); + dev->sg_virt = NULL; + dev->login_rsp = NULL; + } + } + break; + + case GET_PORTAL_ADDR: + { + struct isert_addr_info addr; + + res = isert_get_target_addr(dev->conn, + (struct sockaddr *)&addr.addr, + &addr.addr_len); + if (unlikely(res)) + goto out; + + rc = copy_to_user(ptr, &addr, sizeof(addr)); + if (rc) + res = -EFAULT; + } + break; + + case DISCOVERY_SESSION: + rc = copy_from_user(&val, ptr, sizeof(val)); + if (unlikely(rc != 0)) { + PRINT_ERROR("Failed to copy %d user's bytes", rc); + res = -EFAULT; + goto out; + } + dev->is_discovery = val; + break; + + default: + PRINT_ERROR("Invalid ioctl cmd %x", cmd); + res = -EINVAL; + } + +out: + TRACE_EXIT_RES(res); + return res; +} + +static unsigned int isert_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct isert_conn_dev *dev = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &dev->waitqueue, wait); + + if (!dev->conn) + mask |= POLLHUP | POLLERR; + if (!will_read_block(dev)) + mask |= POLLIN | POLLRDNORM; + + mask |= POLLOUT | POLLWRNORM; + + return mask; +} + +int isert_login_req_rx(struct iscsi_cmnd *login_req) +{ + struct isert_conn_dev *dev = isert_get_priv(login_req->conn); + int res = 0; + + TRACE_ENTRY(); + + if (!dev) { + PRINT_ERROR("Received PDU %p on invalid connection", + login_req); + res = -EINVAL; + goto out; + } + + switch (dev->state) { + case CS_INIT: + case CS_RSP_FINISHED: + if (dev->login_req != NULL) { + sBUG(); + res = -EINVAL; + goto out; + } + break; + + case CS_REQ_BHS: /* Got login request before done handling old one */ + break; + + case CS_REQ_DATA: + case CS_REQ_FINISHED: + case CS_RSP_BHS: + case CS_RSP_DATA: + PRINT_WARNING("Received login PDU while handling previous one. State:%d", + dev->state); + res = -EINVAL; + goto out; + + default: + sBUG(); + res = -EINVAL; + goto out; + } + + + spin_lock(&dev->pdu_lock); + dev->login_req = login_req; + dev->read_len = sizeof(login_req->pdu.bhs); + dev->read_buf = (char *)&login_req->pdu.bhs; + dev->state = CS_REQ_BHS; + spin_unlock(&dev->pdu_lock); + + wake_up(&dev->waitqueue); + +out: + TRACE_EXIT_RES(res); + return res; +} + +static dev_t devno; + +static const struct file_operations listener_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = isert_listen_read, + .unlocked_ioctl = isert_listen_ioctl, + .compat_ioctl = isert_listen_ioctl, + .poll = isert_listen_poll, + .open = isert_listen_open, + .release = isert_listen_release, +}; + +static const struct file_operations conn_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = isert_read, + .write = isert_write, + .unlocked_ioctl = isert_ioctl, + .compat_ioctl = isert_ioctl, + .poll = isert_poll, + .open = isert_open, + .release = isert_release, +}; + +static void __init isert_setup_cdev(struct isert_conn_dev *dev, + unsigned int index) +{ + int err; + + TRACE_ENTRY(); + + dev->devno = MKDEV(isert_major, index + 1); + + cdev_init(&dev->cdev, &conn_fops); + dev->cdev.owner = THIS_MODULE; + dev->cdev.ops = &conn_fops; + dev->idx = index; + init_waitqueue_head(&dev->waitqueue); + dev->login_req = NULL; + dev->login_rsp = NULL; + spin_lock_init(&dev->pdu_lock); + atomic_set(&dev->available, 1); + kref_init(&dev->kref); + dev->state = CS_INIT; + err = cdev_add(&dev->cdev, dev->devno, 1); + /* Fail gracefully if need be */ + if (err) + PRINT_ERROR("Error %d adding "ISER_CONN_DEV_PREFIX"%d", err, + index); + + dev->dev = device_create(isert_class, NULL, dev->devno, NULL, + ISER_CONN_DEV_PREFIX"%d", index); + + TRACE_EXIT(); +} + +static void __init isert_setup_listener_cdev(struct isert_listener_dev *dev) +{ + int err; + + TRACE_ENTRY(); + + dev->devno = MKDEV(isert_major, 0); + + cdev_init(&dev->cdev, &listener_fops); + dev->cdev.owner = THIS_MODULE; + dev->cdev.ops = &listener_fops; + init_waitqueue_head(&dev->waitqueue); + INIT_LIST_HEAD(&dev->new_conn_list); + INIT_LIST_HEAD(&dev->curr_conn_list); + spin_lock_init(&dev->conn_lock); + atomic_set(&dev->available, 1); + err = cdev_add(&dev->cdev, dev->devno, 1); + /* Fail gracefully if need be */ + if (err) + PRINT_ERROR("Error %d adding isert_scst", err); + + dev->dev = device_create(isert_class, NULL, dev->devno, NULL, + "isert_scst"); + + TRACE_EXIT(); +} + +int __init isert_init_login_devs(unsigned int ndevs) +{ + int res; + unsigned int i; + + TRACE_ENTRY(); + + n_devs = ndevs; + + res = alloc_chrdev_region(&devno, 0, n_devs, + "isert_scst"); + isert_major = MAJOR(devno); + + if (res < 0) { + PRINT_ERROR("isert: can't get major %d\n", isert_major); + goto out; + } + + /* + * allocate the devices -- we can't have them static, as the number + * can be specified at load time + */ + isert_conn_devices = kzalloc(n_devs * sizeof(struct isert_conn_dev), + GFP_KERNEL); + if (!isert_conn_devices) { + res = -ENOMEM; + goto fail; /* Make this more graceful */ + } + + isert_class = class_create(THIS_MODULE, "isert_scst"); + + isert_setup_listener_cdev(&isert_listen_dev); + + /* Initialize each device. */ + for (i = 0; i < n_devs; i++) + isert_setup_cdev(&isert_conn_devices[i], i); + + res = isert_datamover_init(); + if (res) { + PRINT_ERROR("Unable to initialize datamover: %d\n", res); + goto fail; + } + +out: + TRACE_EXIT_RES(res); + return res; +fail: + isert_cleanup_login_devs(); + goto out; +} + +void isert_close_all_portals(void) +{ + int i; + + for (i = 0; i < isert_listen_dev.free_portal_idx; ++i) + isert_portal_remove(isert_listen_dev.portal_h[i]); + isert_listen_dev.free_portal_idx = 0; +} + +void isert_cleanup_login_devs(void) +{ + int i; + + TRACE_ENTRY(); + + isert_close_all_portals(); + + isert_datamover_cleanup(); + + if (isert_conn_devices) { + for (i = 0; i < n_devs; i++) { + device_destroy(isert_class, + isert_conn_devices[i].devno); + cdev_del(&isert_conn_devices[i].cdev); + } + kfree(isert_conn_devices); + } + + device_destroy(isert_class, isert_listen_dev.devno); + cdev_del(&isert_listen_dev.cdev); + + if (isert_class) + class_destroy(isert_class); + + unregister_chrdev_region(devno, n_devs); + + TRACE_EXIT(); +}