[lustre-devel] [PATCH 19/24] lustre: lnet: add "lnetctl peer list"
James Simmons
jsimmons at infradead.org
Sun Oct 14 16:38:52 PDT 2018
> From: Olaf Weber <olaf at sgi.com>
>
> Add IOC_LIBCFS_GET_PEER_LIST to obtain a list of the primary
> NIDs of all peers known to the system. The list is written
> into a userspace buffer by the kernel. The typical usage is
> to make a first call to determine the required buffer size,
> then a second call to obtain the list.
>
> Extend the "lnetctl peer" set of commands with a "list"
> subcommand that uses this interface.
>
> Modify the IOC_LIBCFS_GET_PEER_NI ioctl (which is new in the
> Multi-Rail code) to use a NID to indicate the peer to look
> up, and then pass out the data for all NIDs of that peer.
>
> Re-implement "lnetctl peer show" to obtain the list of NIDs
> using IOC_LIBCFS_GET_PEER_LIST followed by one or more
> IOC_LIBCFS_GET_PEER_NI calls to get information for each
> peer.
>
> Make sure to copy the structure from kernel space to
> user space even if the ioctl handler returns an error.
> This is needed because if the buffer passed in by the
> user space is not big enough to copy the data, we want
> to pass the requested size to user space in the structure
> passed in. The return code in this case is -E2BIG.
Reviewed-by: James Simmons <jsimmons at infradead.org>
> WC-bug-id: https://jira.whamcloud.com/browse/LU-9480
> Signed-off-by: Olaf Weber <olaf at sgi.com>
> Reviewed-on: https://review.whamcloud.com/25790
> Reviewed-by: Amir Shehata <amir.shehata at intel.com>
> Tested-by: Amir Shehata <amir.shehata at intel.com>
> Signed-off-by: NeilBrown <neilb at suse.com>
> ---
> .../staging/lustre/include/linux/lnet/lib-lnet.h | 9 -
> .../staging/lustre/include/linux/lnet/lib-types.h | 3
> .../lustre/include/uapi/linux/lnet/libcfs_ioctl.h | 3
> drivers/staging/lustre/lnet/lnet/api-ni.c | 30 ++-
> drivers/staging/lustre/lnet/lnet/peer.c | 222 +++++++++++++-------
> 5 files changed, 169 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> index f82a699371f2..58e3a9c4e39f 100644
> --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> @@ -462,6 +462,8 @@ int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
> struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
> struct lnet_ni *prev);
> struct lnet_ni *lnet_get_ni_idx_locked(int idx);
> +int lnet_get_peer_list(__u32 *countp, __u32 *sizep,
> + struct lnet_process_id __user *ids);
>
> void lnet_router_debugfs_init(void);
> void lnet_router_debugfs_fini(void);
> @@ -730,10 +732,9 @@ bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid);
> int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
> int lnet_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
> int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid);
> -int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
> - bool *mr,
> - struct lnet_peer_ni_credit_info __user *peer_ni_info,
> - struct lnet_ioctl_element_stats __user *peer_ni_stats);
> +int lnet_get_peer_info(lnet_nid_t *primary_nid, lnet_nid_t *nid,
> + __u32 *nnis, bool *mr, __u32 *sizep,
> + void __user *bulk);
> int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
> char alivness[LNET_MAX_STR_LEN],
> __u32 *cpt_iter, __u32 *refcount,
> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
> index 07baa86e61ab..8543a67420d7 100644
> --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
> +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
> @@ -651,7 +651,6 @@ struct lnet_peer_net {
> * pt_hash[...]
> * pt_peer_list
> * pt_peers
> - * pt_peer_nnids
> * protected by pt_zombie_lock:
> * pt_zombie_list
> * pt_zombies
> @@ -667,8 +666,6 @@ struct lnet_peer_table {
> struct list_head pt_peer_list;
> /* # peers */
> int pt_peers;
> - /* # NIDS on listed peers */
> - int pt_peer_nnids;
> /* # zombies to go to deathrow (and not there yet) */
> int pt_zombies;
> /* zombie peers_ni */
> diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
> index 2a9beed23985..2607620e8ef8 100644
> --- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
> +++ b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
> @@ -144,6 +144,7 @@ struct libcfs_debug_ioctl_data {
> #define IOC_LIBCFS_GET_LOCAL_NI _IOWR(IOC_LIBCFS_TYPE, 97, IOCTL_CONFIG_SIZE)
> #define IOC_LIBCFS_SET_NUMA_RANGE _IOWR(IOC_LIBCFS_TYPE, 98, IOCTL_CONFIG_SIZE)
> #define IOC_LIBCFS_GET_NUMA_RANGE _IOWR(IOC_LIBCFS_TYPE, 99, IOCTL_CONFIG_SIZE)
> -#define IOC_LIBCFS_MAX_NR 99
> +#define IOC_LIBCFS_GET_PEER_LIST _IOWR(IOC_LIBCFS_TYPE, 100, IOCTL_CONFIG_SIZE)
> +#define IOC_LIBCFS_MAX_NR 100
>
> #endif /* __LIBCFS_IOCTL_H__ */
> diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
> index 955d1711eda4..f624abe7db80 100644
> --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
> +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
> @@ -3117,21 +3117,31 @@ LNetCtl(unsigned int cmd, void *arg)
>
> case IOC_LIBCFS_GET_PEER_NI: {
> struct lnet_ioctl_peer_cfg *cfg = arg;
> - struct lnet_peer_ni_credit_info __user *lpni_cri;
> - struct lnet_ioctl_element_stats __user *lpni_stats;
> - size_t usr_size = sizeof(*lpni_cri) + sizeof(*lpni_stats);
>
> - if ((cfg->prcfg_hdr.ioc_len != sizeof(*cfg)) ||
> - (cfg->prcfg_size != usr_size))
> + if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
> return -EINVAL;
>
> - lpni_cri = cfg->prcfg_bulk;
> - lpni_stats = cfg->prcfg_bulk + sizeof(*lpni_cri);
> + mutex_lock(&the_lnet.ln_api_mutex);
> + rc = lnet_get_peer_info(&cfg->prcfg_prim_nid,
> + &cfg->prcfg_cfg_nid,
> + &cfg->prcfg_count,
> + &cfg->prcfg_mr,
> + &cfg->prcfg_size,
> + (void __user *)cfg->prcfg_bulk);
> + mutex_unlock(&the_lnet.ln_api_mutex);
> + return rc;
> + }
> +
> + case IOC_LIBCFS_GET_PEER_LIST: {
> + struct lnet_ioctl_peer_cfg *cfg = arg;
> +
> + if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
> + return -EINVAL;
>
> mutex_lock(&the_lnet.ln_api_mutex);
> - rc = lnet_get_peer_info(cfg->prcfg_count, &cfg->prcfg_prim_nid,
> - &cfg->prcfg_cfg_nid, &cfg->prcfg_mr,
> - lpni_cri, lpni_stats);
> + rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
> + (struct lnet_process_id __user *)
> + cfg->prcfg_bulk);
> mutex_unlock(&the_lnet.ln_api_mutex);
> return rc;
> }
> diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
> index 1ef4a44e752e..8dff3b767577 100644
> --- a/drivers/staging/lustre/lnet/lnet/peer.c
> +++ b/drivers/staging/lustre/lnet/lnet/peer.c
> @@ -263,9 +263,7 @@ lnet_peer_detach_peer_ni_locked(struct lnet_peer_ni *lpni)
>
> /* Update peer NID count. */
> lp = lpn->lpn_peer;
> - ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
> lp->lp_nnis--;
> - ptable->pt_peer_nnids--;
>
> /*
> * If there are no more peer nets, make the peer unfindable
> @@ -277,6 +275,7 @@ lnet_peer_detach_peer_ni_locked(struct lnet_peer_ni *lpni)
> */
> if (list_empty(&lp->lp_peer_nets)) {
> list_del_init(&lp->lp_peer_list);
> + ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
> ptable->pt_peers--;
> } else if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) {
> /* Discovery isn't running, nothing to do here. */
> @@ -637,44 +636,6 @@ lnet_find_peer(lnet_nid_t nid)
> return lp;
> }
>
> -struct lnet_peer_ni *
> -lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn,
> - struct lnet_peer **lp)
> -{
> - struct lnet_peer_table *ptable;
> - struct lnet_peer_ni *lpni;
> - int lncpt;
> - int cpt;
> -
> - lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
> -
> - for (cpt = 0; cpt < lncpt; cpt++) {
> - ptable = the_lnet.ln_peer_tables[cpt];
> - if (ptable->pt_peer_nnids > idx)
> - break;
> - idx -= ptable->pt_peer_nnids;
> - }
> - if (cpt >= lncpt)
> - return NULL;
> -
> - list_for_each_entry((*lp), &ptable->pt_peer_list, lp_peer_list) {
> - if ((*lp)->lp_nnis <= idx) {
> - idx -= (*lp)->lp_nnis;
> - continue;
> - }
> - list_for_each_entry((*lpn), &((*lp)->lp_peer_nets),
> - lpn_peer_nets) {
> - list_for_each_entry(lpni, &((*lpn)->lpn_peer_nis),
> - lpni_peer_nis) {
> - if (idx-- == 0)
> - return lpni;
> - }
> - }
> - }
> -
> - return NULL;
> -}
> -
> struct lnet_peer_ni *
> lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
> struct lnet_peer_net *peer_net,
> @@ -734,6 +695,69 @@ lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
> return lpni;
> }
>
> +/* Call with the ln_api_mutex held */
> +int
> +lnet_get_peer_list(__u32 *countp, __u32 *sizep,
> + struct lnet_process_id __user *ids)
> +{
> + struct lnet_process_id id;
> + struct lnet_peer_table *ptable;
> + struct lnet_peer *lp;
> + __u32 count = 0;
> + __u32 size = 0;
> + int lncpt;
> + int cpt;
> + __u32 i;
> + int rc;
> +
> + rc = -ESHUTDOWN;
> + if (the_lnet.ln_state == LNET_STATE_SHUTDOWN)
> + goto done;
> +
> + lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
> +
> + /*
> + * Count the number of peers, and return E2BIG if the buffer
> + * is too small. We'll also return the desired size.
> + */
> + rc = -E2BIG;
> + for (cpt = 0; cpt < lncpt; cpt++) {
> + ptable = the_lnet.ln_peer_tables[cpt];
> + count += ptable->pt_peers;
> + }
> + size = count * sizeof(*ids);
> + if (size > *sizep)
> + goto done;
> +
> + /*
> + * Walk the peer lists and copy out the primary nids.
> + * This is safe because the peer lists are only modified
> + * while the ln_api_mutex is held. So we don't need to
> + * hold the lnet_net_lock as well, and can therefore
> + * directly call copy_to_user().
> + */
> + rc = -EFAULT;
> + memset(&id, 0, sizeof(id));
> + id.pid = LNET_PID_LUSTRE;
> + i = 0;
> + for (cpt = 0; cpt < lncpt; cpt++) {
> + ptable = the_lnet.ln_peer_tables[cpt];
> + list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
> + if (i >= count)
> + goto done;
> + id.nid = lp->lp_primary_nid;
> + if (copy_to_user(&ids[i], &id, sizeof(id)))
> + goto done;
> + i++;
> + }
> + }
> + rc = 0;
> +done:
> + *countp = count;
> + *sizep = size;
> + return rc;
> +}
> +
> /*
> * Start pushes to peers that need to be updated for a configuration
> * change on this node.
> @@ -1128,7 +1152,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
> spin_unlock(&lp->lp_lock);
>
> lp->lp_nnis++;
> - the_lnet.ln_peer_tables[lp->lp_cpt]->pt_peer_nnids++;
> lnet_net_unlock(LNET_LOCK_EX);
>
> CDEBUG(D_NET, "peer %s NID %s flags %#x\n",
> @@ -3273,55 +3296,94 @@ lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
> }
>
> /* ln_api_mutex is held, which keeps the peer list stable */
> -int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
> - bool *mr,
> - struct lnet_peer_ni_credit_info __user *peer_ni_info,
> - struct lnet_ioctl_element_stats __user *peer_ni_stats)
> +int lnet_get_peer_info(lnet_nid_t *primary_nid, lnet_nid_t *nidp,
> + __u32 *nnis, bool *mr, __u32 *sizep,
> + void __user *bulk)
> {
> - struct lnet_ioctl_element_stats ni_stats;
> - struct lnet_peer_ni_credit_info ni_info;
> - struct lnet_peer_ni *lpni = NULL;
> - struct lnet_peer_net *lpn = NULL;
> - struct lnet_peer *lp = NULL;
> + struct lnet_ioctl_element_stats *lpni_stats;
> + struct lnet_peer_ni_credit_info *lpni_info;
> + struct lnet_peer_ni *lpni;
> + struct lnet_peer *lp;
> + lnet_nid_t nid;
> + __u32 size;
> int rc;
>
> - lpni = lnet_get_peer_ni_idx_locked(idx, &lpn, &lp);
> + lp = lnet_find_peer(*primary_nid);
>
> - if (!lpni)
> - return -ENOENT;
> + if (!lp) {
> + rc = -ENOENT;
> + goto out;
> + }
> +
> + size = sizeof(nid) + sizeof(*lpni_info) + sizeof(*lpni_stats);
> + size *= lp->lp_nnis;
> + if (size > *sizep) {
> + *sizep = size;
> + rc = -E2BIG;
> + goto out_lp_decref;
> + }
>
> *primary_nid = lp->lp_primary_nid;
> *mr = lnet_peer_is_multi_rail(lp);
> - *nid = lpni->lpni_nid;
> - snprintf(ni_info.cr_aliveness, LNET_MAX_STR_LEN, "NA");
> - if (lnet_isrouter(lpni) ||
> - lnet_peer_aliveness_enabled(lpni))
> - snprintf(ni_info.cr_aliveness, LNET_MAX_STR_LEN,
> - lpni->lpni_alive ? "up" : "down");
> -
> - ni_info.cr_refcount = atomic_read(&lpni->lpni_refcount);
> - ni_info.cr_ni_peer_tx_credits = lpni->lpni_net ?
> - lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
> - ni_info.cr_peer_tx_credits = lpni->lpni_txcredits;
> - ni_info.cr_peer_rtr_credits = lpni->lpni_rtrcredits;
> - ni_info.cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
> - ni_info.cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
> - ni_info.cr_peer_tx_qnob = lpni->lpni_txqnob;
> -
> - ni_stats.iel_send_count = atomic_read(&lpni->lpni_stats.send_count);
> - ni_stats.iel_recv_count = atomic_read(&lpni->lpni_stats.recv_count);
> - ni_stats.iel_drop_count = atomic_read(&lpni->lpni_stats.drop_count);
> -
> - /* If copy_to_user fails */
> - rc = -EFAULT;
> - if (copy_to_user(peer_ni_info, &ni_info, sizeof(ni_info)))
> - goto copy_failed;
> + *nidp = lp->lp_primary_nid;
> + *nnis = lp->lp_nnis;
> + *sizep = size;
>
> - if (copy_to_user(peer_ni_stats, &ni_stats, sizeof(ni_stats)))
> - goto copy_failed;
> + /* Allocate helper buffers. */
> + rc = -ENOMEM;
> + lpni_info = kzalloc(sizeof(*lpni_info), GFP_KERNEL);
> + if (!lpni_info)
> + goto out_lp_decref;
> + lpni_stats = kzalloc(sizeof(*lpni_stats), GFP_KERNEL);
> + if (!lpni_stats)
> + goto out_free_info;
>
> + lpni = NULL;
> + rc = -EFAULT;
> + while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
> + nid = lpni->lpni_nid;
> + if (copy_to_user(bulk, &nid, sizeof(nid)))
> + goto out_free_stats;
> + bulk += sizeof(nid);
> +
> + memset(lpni_info, 0, sizeof(*lpni_info));
> + snprintf(lpni_info->cr_aliveness, LNET_MAX_STR_LEN, "NA");
> + if (lnet_isrouter(lpni) ||
> + lnet_peer_aliveness_enabled(lpni))
> + snprintf(lpni_info->cr_aliveness, LNET_MAX_STR_LEN,
> + lpni->lpni_alive ? "up" : "down");
> +
> + lpni_info->cr_refcount = atomic_read(&lpni->lpni_refcount);
> + lpni_info->cr_ni_peer_tx_credits = lpni->lpni_net ?
> + lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
> + lpni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
> + lpni_info->cr_peer_rtr_credits = lpni->lpni_rtrcredits;
> + lpni_info->cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
> + lpni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
> + lpni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
> + if (copy_to_user(bulk, lpni_info, sizeof(*lpni_info)))
> + goto out_free_stats;
> + bulk += sizeof(*lpni_info);
> +
> + memset(lpni_stats, 0, sizeof(*lpni_stats));
> + lpni_stats->iel_send_count =
> + atomic_read(&lpni->lpni_stats.send_count);
> + lpni_stats->iel_recv_count =
> + atomic_read(&lpni->lpni_stats.recv_count);
> + lpni_stats->iel_drop_count =
> + atomic_read(&lpni->lpni_stats.drop_count);
> + if (copy_to_user(bulk, lpni_stats, sizeof(*lpni_stats)))
> + goto out_free_stats;
> + bulk += sizeof(*lpni_stats);
> + }
> rc = 0;
>
> -copy_failed:
> +out_free_stats:
> + kfree(lpni_stats);
> +out_free_info:
> + kfree(lpni_info);
> +out_lp_decref:
> + lnet_peer_decref_locked(lp);
> +out:
> return rc;
> }
>
>
>
More information about the lustre-devel
mailing list