[lustre-devel] [PATCH 10/24] lustre: lnet: refactor lnet_add_peer_ni()

James Simmons jsimmons at infradead.org
Sun Oct 14 13:02:56 PDT 2018


> From: Olaf Weber <olaf at sgi.com>
> 
> Refactor lnet_add_peer_ni() and the functions called by it. In
> particular, lnet_peer_add_nid() adds an lnet_peer_ni to an
> existing lnet_peer, lnet_peer_add() adds a new lnet_peer.
> 
> lnet_find_or_create_peer_locked() is removed.

Reviewed-by: James Simmons <jsimmons at infradead.org>
 
> WC-bug-id: https://jira.whamcloud.com/browse/LU-9480
> Signed-off-by: Olaf Weber <olaf at sgi.com>
> Reviewed-on: https://review.whamcloud.com/25780
> Reviewed-by: Olaf Weber <olaf.weber at hpe.com>
> Reviewed-by: Amir Shehata <amir.shehata at intel.com>
> Tested-by: Amir Shehata <amir.shehata at intel.com>
> Signed-off-by: NeilBrown <neilb at suse.com>
> ---
>  .../staging/lustre/include/linux/lnet/lib-lnet.h   |    1 
>  drivers/staging/lustre/lnet/lnet/lib-move.c        |   13 +
>  drivers/staging/lustre/lnet/lnet/peer.c            |  230 +++++++-------------
>  3 files changed, 92 insertions(+), 152 deletions(-)
> 
> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> index 69f45a76f1cc..fc748ffa251d 100644
> --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
> @@ -668,7 +668,6 @@ u32 lnet_get_dlc_seq_locked(void);
>  struct lnet_peer_ni *lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
>  						  struct lnet_peer_net *peer_net,
>  						  struct lnet_peer_ni *prev);
> -struct lnet_peer *lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt);
>  struct lnet_peer_ni *lnet_nid2peerni_locked(lnet_nid_t nid, int cpt);
>  struct lnet_peer_ni *lnet_nid2peerni_ex(lnet_nid_t nid, int cpt);
>  struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid);
> diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
> index e8c021622f91..59ae8d0649e5 100644
> --- a/drivers/staging/lustre/lnet/lnet/lib-move.c
> +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
> @@ -1262,11 +1262,18 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
>  		return -ESHUTDOWN;
>  	}
>  
> -	peer = lnet_find_or_create_peer_locked(dst_nid, cpt);
> -	if (IS_ERR(peer)) {
> +	/*
> +	 * lnet_nid2peerni_locked() is the path that will find an
> +	 * existing peer_ni, or create one and mark it as having been
> +	 * created due to network traffic.
> +	 */
> +	lpni = lnet_nid2peerni_locked(dst_nid, cpt);
> +	if (IS_ERR(lpni)) {
>  		lnet_net_unlock(cpt);
> -		return PTR_ERR(peer);
> +		return PTR_ERR(lpni);
>  	}
> +	peer = lpni->lpni_peer_net->lpn_peer;
> +	lnet_peer_ni_decref_locked(lpni);
>  
>  	/* If peer is not healthy then can not send anything to it */
>  	if (!lnet_is_peer_healthy_locked(peer)) {
> diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
> index 30a2486712e4..6b7ca5c361b8 100644
> --- a/drivers/staging/lustre/lnet/lnet/peer.c
> +++ b/drivers/staging/lustre/lnet/lnet/peer.c
> @@ -541,25 +541,6 @@ lnet_find_peer_ni_locked(lnet_nid_t nid)
>  	return lpni;
>  }
>  
> -struct lnet_peer *
> -lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt)
> -{
> -	struct lnet_peer_ni *lpni;
> -	struct lnet_peer *lp;
> -
> -	lpni = lnet_find_peer_ni_locked(dst_nid);
> -	if (!lpni) {
> -		lpni = lnet_nid2peerni_locked(dst_nid, cpt);
> -		if (IS_ERR(lpni))
> -			return ERR_CAST(lpni);
> -	}
> -
> -	lp = lpni->lpni_peer_net->lpn_peer;
> -	lnet_peer_ni_decref_locked(lpni);
> -
> -	return lp;
> -}
> -
>  struct lnet_peer_ni *
>  lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn,
>  			    struct lnet_peer **lp)
> @@ -774,131 +755,95 @@ lnet_peer_setup_hierarchy(struct lnet_peer *lp, struct lnet_peer_ni
>  	return -ENOMEM;
>  }
>  
> +/*
> + * Create a new peer, with nid as its primary nid.
> + *
> + * It is not an error if the peer already exists, provided that the
> + * given nid is the primary NID.
> + *
> + * Call with the lnet_api_mutex held.
> + */
>  static int
> -lnet_add_prim_lpni(lnet_nid_t nid)
> +lnet_peer_add(lnet_nid_t nid, bool mr)
>  {
> -	int rc;
> -	struct lnet_peer *peer;
> +	struct lnet_peer *lp;
>  	struct lnet_peer_ni *lpni;
>  
>  	LASSERT(nid != LNET_NID_ANY);
>  
>  	/*
> -	 * lookup the NID and its peer
> -	 *  if the peer doesn't exist, create it.
> -	 *  if this is a non-MR peer then change its state to MR and exit.
> -	 *  if this is an MR peer and it's a primary NI: NO-OP.
> -	 *  if this is an MR peer and it's not a primary NI. Operation not
> -	 *     allowed.
> -	 *
> -	 * The adding and deleting of peer nis is being serialized through
> -	 * the api_mutex. So we can look up peers with the mutex locked
> -	 * safely. Only when we need to change the ptable, do we need to
> -	 * exclusively lock the lnet_net_lock()
> +	 * No need for the lnet_net_lock here, because the
> +	 * lnet_api_mutex is held.
>  	 */
>  	lpni = lnet_find_peer_ni_locked(nid);
>  	if (!lpni) {
> -		rc = lnet_peer_setup_hierarchy(NULL, NULL, nid);
> +		int rc = lnet_peer_setup_hierarchy(NULL, NULL, nid);
>  		if (rc != 0)
>  			return rc;
>  		lpni = lnet_find_peer_ni_locked(nid);
> +		LASSERT(lpni);
>  	}
> -
> -	LASSERT(lpni);
> -
> +	lp = lpni->lpni_peer_net->lpn_peer;
>  	lnet_peer_ni_decref_locked(lpni);
>  
> -	peer = lpni->lpni_peer_net->lpn_peer;
> -
> -	/*
> -	 * If we found a lpni with the same nid as the NID we're trying to
> -	 * create, then we're trying to create an already existing lpni
> -	 * that belongs to a different peer
> -	 */
> -	if (peer->lp_primary_nid != nid)
> +	/* A found peer must have this primary NID */
> +	if (lp->lp_primary_nid != nid)
>  		return -EEXIST;
>  
>  	/*
> -	 * if we found an lpni that is not a multi-rail, which could occur
> +	 * If we found an lpni that is not a multi-rail, which could occur
>  	 * if lpni is already created as a non-mr lpni or we just created
>  	 * it, then make sure you indicate that this lpni is a primary mr
>  	 * capable peer.
>  	 *
>  	 * TODO: update flags if necessary
>  	 */
> -	if (!peer->lp_multi_rail && peer->lp_primary_nid == nid)
> -		peer->lp_multi_rail = true;
> +	if (mr && !lp->lp_multi_rail) {
> +		lp->lp_multi_rail = true;
> +	} else if (!mr && lp->lp_multi_rail) {
> +		/* The mr state is sticky. */
> +		CDEBUG(D_NET, "Cannot clear multi-flag from peer %s\n",
> +		       libcfs_nid2str(nid));
> +	}
>  
> -	return rc;
> +	return 0;
>  }
>  
>  static int
> -lnet_add_peer_ni_to_prim_lpni(lnet_nid_t prim_nid, lnet_nid_t nid)
> +lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, bool mr)
>  {
> -	struct lnet_peer *peer, *primary_peer;
> -	struct lnet_peer_ni *lpni = NULL, *klpni = NULL;
> -
> -	LASSERT(prim_nid != LNET_NID_ANY && nid != LNET_NID_ANY);
> +	struct lnet_peer_ni *lpni;
>  
> -	/*
> -	 * key nid must be created by this point. If not then this
> -	 * operation is not permitted
> -	 */
> -	klpni = lnet_find_peer_ni_locked(prim_nid);
> -	if (!klpni)
> -		return -ENOENT;
> +	LASSERT(lp);
> +	LASSERT(nid != LNET_NID_ANY);
>  
> -	lnet_peer_ni_decref_locked(klpni);
> +	if (!mr && !lp->lp_multi_rail) {
> +		CERROR("Cannot add nid %s to non-multi-rail peer %s\n",
> +		       libcfs_nid2str(nid),
> +		       libcfs_nid2str(lp->lp_primary_nid));
> +		return -EPERM;
> +	}
>  
> -	primary_peer = klpni->lpni_peer_net->lpn_peer;
> +	if (!lp->lp_multi_rail)
> +		lp->lp_multi_rail = true;
>  
>  	lpni = lnet_find_peer_ni_locked(nid);
> -	if (lpni) {
> -		lnet_peer_ni_decref_locked(lpni);
> -
> -		peer = lpni->lpni_peer_net->lpn_peer;
> -		/*
> -		 * lpni already exists in the system but it belongs to
> -		 * a different peer. We can't re-added it
> -		 */
> -		if (peer->lp_primary_nid != prim_nid && peer->lp_multi_rail) {
> -			CERROR("Cannot add NID %s owned by peer %s to peer %s\n",
> -			       libcfs_nid2str(lpni->lpni_nid),
> -			       libcfs_nid2str(peer->lp_primary_nid),
> -			       libcfs_nid2str(prim_nid));
> -			return -EEXIST;
> -		} else if (peer->lp_primary_nid == prim_nid) {
> -			/*
> -			 * found a peer_ni that is already part of the
> -			 * peer. This is a no-op operation.
> -			 */
> -			return 0;
> -		}
> -
> -		/*
> -		 * TODO: else if (peer->lp_primary_nid != prim_nid &&
> -		 *		  !peer->lp_multi_rail)
> -		 * peer is not an MR peer and it will be moved in the next
> -		 * step to klpni, so update its flags accordingly.
> -		 * lnet_move_peer_ni()
> -		 */
> -
> -		/*
> -		 * TODO: call lnet_update_peer() from here to update the
> -		 * flags. This is the case when the lpni you're trying to
> -		 * add is already part of the peer. This could've been
> -		 * added by the DD previously, so go ahead and do any
> -		 * updates to the state if necessary
> -		 */
> +	if (!lpni)
> +		return lnet_peer_setup_hierarchy(lp, NULL, nid);
>  
> +	if (lpni->lpni_peer_net->lpn_peer != lp) {
> +		struct lnet_peer *lp2 = lpni->lpni_peer_net->lpn_peer;
> +		CERROR("Cannot add NID %s owned by peer %s to peer %s\n",
> +		       libcfs_nid2str(lpni->lpni_nid),
> +		       libcfs_nid2str(lp2->lp_primary_nid),
> +		       libcfs_nid2str(lp->lp_primary_nid));
> +		return -EEXIST;
>  	}
>  
> -	/*
> -	 * When we get here we either have found an existing lpni, which
> -	 * we can switch to the new peer. Or we need to create one and
> -	 * add it to the new peer
> -	 */
> -	return lnet_peer_setup_hierarchy(primary_peer, lpni, nid);
> +	CDEBUG(D_NET, "NID %s is already owned by peer %s\n",
> +	       libcfs_nid2str(lpni->lpni_nid),
> +	       libcfs_nid2str(lp->lp_primary_nid));
> +	return 0;
>  }
>  
>  /*
> @@ -929,61 +874,50 @@ lnet_peer_ni_traffic_add(lnet_nid_t nid)
>  	return rc;
>  }
>  
> -static int
> -lnet_peer_ni_add_non_mr(lnet_nid_t nid)
> -{
> -	struct lnet_peer_ni *lpni;
> -
> -	lpni = lnet_find_peer_ni_locked(nid);
> -	if (lpni) {
> -		CERROR("Cannot add %s as non-mr when it already exists\n",
> -		       libcfs_nid2str(nid));
> -		lnet_peer_ni_decref_locked(lpni);
> -		return -EEXIST;
> -	}
> -
> -	return lnet_peer_setup_hierarchy(NULL, NULL, nid);
> -}
> -
>  /*
>   * Implementation of IOC_LIBCFS_ADD_PEER_NI.
>   *
>   * This API handles the following combinations:
> - *   Create a primary NI if only the prim_nid is provided
> - *   Create or add an lpni to a primary NI. Primary NI must've already
> - *   been created
> - *   Create a non-MR peer.
> + *   Create a peer with its primary NI if only the prim_nid is provided
> + *   Add a NID to a peer identified by the prim_nid. The peer identified
> + *   by the prim_nid must already exist.
> + *   The peer being created may be non-MR.
> + *
> + * The caller must hold ln_api_mutex. This prevents the peer from
> + * being created/modified/deleted by a different thread.
>   */
>  int
>  lnet_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr)
>  {
> +	struct lnet_peer *lp = NULL;
> +	struct lnet_peer_ni *lpni;
> +
> +	/* The prim_nid must always be specified */
> +	if (prim_nid == LNET_NID_ANY)
> +		return -EINVAL;
> +
>  	/*
> -	 * Caller trying to setup an MR like peer hierarchy but
> -	 * specifying it to be non-MR. This is not allowed.
> +	 * If nid isn't specified, we must create a new peer with
> +	 * prim_nid as its primary nid.
>  	 */
> -	if (prim_nid != LNET_NID_ANY &&
> -	    nid != LNET_NID_ANY && !mr)
> -		return -EPERM;
> -
> -	/* Add the primary NID of a peer */
> -	if (prim_nid != LNET_NID_ANY &&
> -	    nid == LNET_NID_ANY && mr)
> -		return lnet_add_prim_lpni(prim_nid);
> +	if (nid == LNET_NID_ANY)
> +		return lnet_peer_add(prim_nid, mr);
>  
> -	/* Add a NID to an existing peer */
> -	if (prim_nid != LNET_NID_ANY &&
> -	    nid != LNET_NID_ANY && mr)
> -		return lnet_add_peer_ni_to_prim_lpni(prim_nid, nid);
> +	/* Look up the prim_nid, which must exist. */
> +	lpni = lnet_find_peer_ni_locked(prim_nid);
> +	if (!lpni)
> +		return -ENOENT;
> +	lnet_peer_ni_decref_locked(lpni);
> +	lp = lpni->lpni_peer_net->lpn_peer;
>  
> -	/* Add a non-MR peer NI */
> -	if (((prim_nid != LNET_NID_ANY &&
> -	      nid == LNET_NID_ANY) ||
> -	     (prim_nid == LNET_NID_ANY &&
> -	      nid != LNET_NID_ANY)) && !mr)
> -		return lnet_peer_ni_add_non_mr(prim_nid != LNET_NID_ANY ?
> -							 prim_nid : nid);
> +	if (lp->lp_primary_nid != prim_nid) {
> +		CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
> +		       libcfs_nid2str(prim_nid),
> +		       libcfs_nid2str(lp->lp_primary_nid));
> +		return -ENODEV;
> +	}
>  
> -	return 0;
> +	return lnet_peer_add_nid(lp, nid, mr);
>  }
>  
>  /*
> 
> 
> 


More information about the lustre-devel mailing list