[lustre-devel] [PATCH 14/24] lustre: lnet: reference counts on lnet_peer/lnet_peer_net
NeilBrown
neilb at suse.com
Tue Oct 16 22:16:51 PDT 2018
On Sun, Oct 14 2018, James Simmons wrote:
>> From: Olaf Weber <olaf at sgi.com>
>>
>> Peer discovery will be keeping track of lnet_peer structures,
>> so there will be references to an lnet_peer independent of
>> the references implied by lnet_peer_ni structures. Manage
>> this by adding explicit reference counts to lnet_peer_net and
>> lnet_peer.
>>
>> Each lnet_peer_net has a hold on the lnet_peer it links to
>> with its lpn_peer pointer. This hold is only removed when that
>> pointer is assigned a new value or the lnet_peer_net is freed.
>> Just removing an lnet_peer_net from the lp_peer_nets list does
>> not release this hold, it just prevents new lookups of the
>> lnet_peer_net via the lnet_peer.
>>
>> Each lnet_peer_ni has a hold on the lnet_peer_net it links to
>> with its lpni_peer_net pointer. This hold is only removed when
>> that pointer is assigned a new value or the lnet_peer_ni is
>> freed. Just removing an lnet_peer_ni from the lpn_peer_nis
>> list does not release this hold, it just prevents new lookups
>> of the lnet_peer_ni via the lnet_peer_net.
>>
>> This ensures that given a lnet_peer_ni *lpni, we can rely on
>> lpni->lpni_peer_net->lpn_peer pointing to a valid lnet_peer.
>>
>> Keep a count of the total number of lnet_peer_ni attached to
>> an lnet_peer in lp_nnis.
>>
>> Split the global ln_peers list into per-lnet_peer_table lists.
>> The CPT of the peer table in which the lnet_peer is linked is
>> stored in lp_cpt.
>>
>> WC-bug-id: https://jira.whamcloud.com/browse/LU-9480
>> Signed-off-by: Olaf Weber <olaf at sgi.com>
>> Reviewed-on: https://review.whamcloud.com/25784
>> Reviewed-by: Olaf Weber <olaf.weber at hpe.com>
>> Reviewed-by: Amir Shehata <amir.shehata at intel.com>
>> Tested-by: Amir Shehata <amir.shehata at intel.com>
>> Signed-off-by: NeilBrown <neilb at suse.com>
>> ---
>> .../staging/lustre/include/linux/lnet/lib-lnet.h | 49 +++--
>> .../staging/lustre/include/linux/lnet/lib-types.h | 50 ++++-
>> drivers/staging/lustre/lnet/lnet/api-ni.c | 1
>> drivers/staging/lustre/lnet/lnet/lib-move.c | 8 -
>> drivers/staging/lustre/lnet/lnet/peer.c | 210 ++++++++++++++------
>> 5 files changed, 227 insertions(+), 91 deletions(-)
>>
>> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>> index 563417510722..aad25eb0011b 100644
>> --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>> +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>> @@ -310,6 +310,36 @@ lnet_handle2me(struct lnet_handle_me *handle)
>> return lh_entry(lh, struct lnet_me, me_lh);
>> }
>>
>> +static inline void
>> +lnet_peer_net_addref_locked(struct lnet_peer_net *lpn)
>> +{
>> + atomic_inc(&lpn->lpn_refcount);
>> +}
>> +
>> +void lnet_destroy_peer_net_locked(struct lnet_peer_net *lpn);
>> +
>> +static inline void
>> +lnet_peer_net_decref_locked(struct lnet_peer_net *lpn)
>> +{
>> + if (atomic_dec_and_test(&lpn->lpn_refcount))
>> + lnet_destroy_peer_net_locked(lpn);
>> +}
>> +
>> +static inline void
>> +lnet_peer_addref_locked(struct lnet_peer *lp)
>> +{
>> + atomic_inc(&lp->lp_refcount);
>> +}
>> +
>> +void lnet_destroy_peer_locked(struct lnet_peer *lp);
>> +
>> +static inline void
>> +lnet_peer_decref_locked(struct lnet_peer *lp)
>> +{
>> + if (atomic_dec_and_test(&lp->lp_refcount))
>> + lnet_destroy_peer_locked(lp);
>> +}
>> +
>> static inline void
>> lnet_peer_ni_addref_locked(struct lnet_peer_ni *lp)
>> {
>> @@ -695,21 +725,6 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
>> __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
>> __u32 *peer_tx_qnob);
>>
>> -static inline __u32
>> -lnet_get_num_peer_nis(struct lnet_peer *peer)
>> -{
>> - struct lnet_peer_net *lpn;
>> - struct lnet_peer_ni *lpni;
>> - __u32 count = 0;
>> -
>> - list_for_each_entry(lpn, &peer->lp_peer_nets, lpn_on_peer_list)
>> - list_for_each_entry(lpni, &lpn->lpn_peer_nis,
>> - lpni_on_peer_net_list)
>> - count++;
>> -
>> - return count;
>> -}
>> -
>> static inline bool
>> lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni)
>> {
>> @@ -728,7 +743,7 @@ lnet_is_peer_net_healthy_locked(struct lnet_peer_net *peer_net)
>> struct lnet_peer_ni *lpni;
>>
>> list_for_each_entry(lpni, &peer_net->lpn_peer_nis,
>> - lpni_on_peer_net_list) {
>> + lpni_peer_nis) {
>> if (lnet_is_peer_ni_healthy_locked(lpni))
>> return true;
>> }
>> @@ -741,7 +756,7 @@ lnet_is_peer_healthy_locked(struct lnet_peer *peer)
>> {
>> struct lnet_peer_net *peer_net;
>>
>> - list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
>> + list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
>> if (lnet_is_peer_net_healthy_locked(peer_net))
>> return true;
>> }
>> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
>> index d1721fd01d93..260619e19bde 100644
>> --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
>> +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
>> @@ -411,7 +411,8 @@ struct lnet_rc_data {
>> };
>>
>> struct lnet_peer_ni {
>> - struct list_head lpni_on_peer_net_list;
>> + /* chain on lpn_peer_nis */
>> + struct list_head lpni_peer_nis;
>> /* chain on remote peer list */
>> struct list_head lpni_on_remote_peer_ni_list;
>> /* chain on peer hash */
>> @@ -496,8 +497,8 @@ struct lnet_peer_ni {
>> #define LNET_PEER_NI_NON_MR_PREF BIT(0)
>>
>> struct lnet_peer {
>> - /* chain on global peer list */
>> - struct list_head lp_on_lnet_peer_list;
>> + /* chain on pt_peer_list */
>> + struct list_head lp_peer_list;
>>
>> /* list of peer nets */
>> struct list_head lp_peer_nets;
>> @@ -505,6 +506,15 @@ struct lnet_peer {
>> /* primary NID of the peer */
>> lnet_nid_t lp_primary_nid;
>>
>> + /* CPT of peer_table */
>> + int lp_cpt;
>> +
>> + /* number of NIDs on this peer */
>> + int lp_nnis;
>> +
>> + /* reference count */
>> + atomic_t lp_refcount;
>> +
>> /* lock protecting peer state flags */
>> spinlock_t lp_lock;
>>
>> @@ -516,8 +526,8 @@ struct lnet_peer {
>> #define LNET_PEER_CONFIGURED BIT(1)
>>
>> struct lnet_peer_net {
>> - /* chain on peer block */
>> - struct list_head lpn_on_peer_list;
>> + /* chain on lp_peer_nets */
>> + struct list_head lpn_peer_nets;
>>
>> /* list of peer_nis on this network */
>> struct list_head lpn_peer_nis;
>> @@ -527,21 +537,45 @@ struct lnet_peer_net {
>>
>> /* Net ID */
>> __u32 lpn_net_id;
>> +
>> + /* reference count */
>> + atomic_t lpn_refcount;
>> };
>>
>> /* peer hash size */
>> #define LNET_PEER_HASH_BITS 9
>> #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS)
>>
>> -/* peer hash table */
>> +/*
>> + * peer hash table - one per CPT
>> + *
>> + * protected by lnet_net_lock/EX for update
>> + * pt_version
>> + * pt_number
>> + * pt_hash[...]
>> + * pt_peer_list
>> + * pt_peers
>> + * pt_peer_nnids
>> + * protected by pt_zombie_lock:
>> + * pt_zombie_list
>> + * pt_zombies
>> + *
>> + * pt_zombie lock nests inside lnet_net_lock
>> + */
>> struct lnet_peer_table {
>> /* /proc validity stamp */
>> int pt_version;
>> /* # peers extant */
>> atomic_t pt_number;
>> + /* peers */
>> + struct list_head pt_peer_list;
>> + /* # peers */
>> + int pt_peers;
>> + /* # NIDS on listed peers */
>> + int pt_peer_nnids;
>> /* # zombies to go to deathrow (and not there yet) */
>> int pt_zombies;
>> - /* zombie peers */
>> + /* zombie peers_ni */
>> struct list_head pt_zombie_list;
>> /* protect list and count */
>> spinlock_t pt_zombie_lock;
>> @@ -785,8 +819,6 @@ struct lnet {
>> struct lnet_msg_container **ln_msg_containers;
>> struct lnet_counters **ln_counters;
>> struct lnet_peer_table **ln_peer_tables;
>> - /* list of configured or discovered peers */
>> - struct list_head ln_peers;
>> /* list of peer nis not on a local network */
>> struct list_head ln_remote_peer_ni_list;
>> /* failure simulation */
>> diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
>> index d64ae2939abc..c48bcb8722a0 100644
>> --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
>> +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
>> @@ -625,7 +625,6 @@ lnet_prepare(lnet_pid_t requested_pid)
>> the_lnet.ln_pid = requested_pid;
>>
>> INIT_LIST_HEAD(&the_lnet.ln_test_peers);
>> - INIT_LIST_HEAD(&the_lnet.ln_peers);
>> INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
>> INIT_LIST_HEAD(&the_lnet.ln_nets);
>> INIT_LIST_HEAD(&the_lnet.ln_routers);
>> diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
>> index 99d8b22356bb..4c1eef907dc7 100644
>> --- a/drivers/staging/lustre/lnet/lnet/lib-move.c
>> +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
>> @@ -1388,7 +1388,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
>> peer_net = lnet_peer_get_net_locked(
>> peer, LNET_NIDNET(best_lpni->lpni_nid));
>> list_for_each_entry(lpni, &peer_net->lpn_peer_nis,
>> - lpni_on_peer_net_list) {
>> + lpni_peer_nis) {
>> if (lpni->lpni_pref_nnids == 0)
>> continue;
>> LASSERT(lpni->lpni_pref_nnids == 1);
>> @@ -1411,7 +1411,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
>> }
>> lpni = list_entry(peer_net->lpn_peer_nis.next,
>> struct lnet_peer_ni,
>> - lpni_on_peer_net_list);
>> + lpni_peer_nis);
>> }
>> /* Set preferred NI if necessary. */
>> if (lpni->lpni_pref_nnids == 0)
>> @@ -1443,7 +1443,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
>> * then the best route is chosen. If all routes are equal then
>> * they are used in round robin.
>> */
>> - list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
>> + list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
>> if (!lnet_is_peer_net_healthy_locked(peer_net))
>> continue;
>>
>> @@ -1453,7 +1453,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
>>
>> lpni = list_entry(peer_net->lpn_peer_nis.next,
>> struct lnet_peer_ni,
>> - lpni_on_peer_net_list);
>> + lpni_peer_nis);
>>
>> net_gw = lnet_find_route_locked(NULL,
>> lpni->lpni_nid,
>> diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
>> index 09c1b5516f6b..d7a0a2f3bdd9 100644
>> --- a/drivers/staging/lustre/lnet/lnet/peer.c
>> +++ b/drivers/staging/lustre/lnet/lnet/peer.c
>
> INIT_LIST_HEAD(&ptable->pt_peer_list); seems to be missing from
> lnet_peer_tables_create(). This is in the patch merged into
> lustre-testing. Other than that it looks okay.
No, it is there. It is just the the lnet_peer_tables_create() has moved,
so it isn't in the same place in the patch.
..snip..
>> @@ -319,6 +358,8 @@ lnet_peer_tables_create(void)
>> spin_lock_init(&ptable->pt_zombie_lock);
>> INIT_LIST_HEAD(&ptable->pt_zombie_list);
>>
>> + INIT_LIST_HEAD(&ptable->pt_peer_list);
>> +
>> for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
>> INIT_LIST_HEAD(&hash[j]);
>> ptable->pt_hash = hash; /* sign of initialization */
here it is.
Thanks,
NeilBrown
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.lustre.org/pipermail/lustre-devel-lustre.org/attachments/20181017/8e4d4715/attachment.sig>
More information about the lustre-devel
mailing list