[lustre-devel] [PATCH 19/22] lnet: find correct primary for peer
James Simmons
jsimmons at infradead.org
Sun Nov 20 06:17:05 PST 2022
From: Mr NeilBrown <neilb at suse.de>
If the peer has a large-address for the primary, it can now be found.
WC-bug-id: https://jira.whamcloud.com/browse/LU-10391
Lustre-commit: 022b46d887603f703 ("LU-10391 lnet: find correct primary for peer")
Signed-off-by: Mr NeilBrown <neilb at suse.de>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/44632
Reviewed-by: Serguei Smirnov <ssmirnov at whamcloud.com>
Reviewed-by: Frank Sehr <fsehr at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Reviewed-by: James Simmons <jsimmons at infradead.org>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
net/lnet/lnet/peer.c | 41 ++++++++++++++++++++++++++++++++++-------
1 file changed, 34 insertions(+), 7 deletions(-)
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index b33d6ac..a1305b6 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -2585,11 +2585,40 @@ static void lnet_peer_clear_discovery_error(struct lnet_peer *lp)
libcfs_nidstr(&lp->lp_primary_nid), ev->status);
}
+static bool find_primary(struct lnet_nid *nid,
+ struct lnet_ping_buffer *pbuf)
+{
+ struct lnet_ping_info *pi = &pbuf->pb_info;
+ struct lnet_ping_iter piter;
+ u32 *stp;
+
+ if (pi->pi_features & LNET_PING_FEAT_PRIMARY_LARGE) {
+ /* First large nid is primary */
+ for (stp = ping_iter_first(&piter, pbuf, nid);
+ stp;
+ stp = ping_iter_next(&piter, nid)) {
+ if (nid_is_nid4(nid))
+ continue;
+ /* nid has already been copied in */
+ return true;
+ }
+ /* no large nids ... weird ... ignore the flag
+ * and use first nid.
+ */
+ }
+ /* pi_nids[1] is primary */
+ if (pi->pi_nnis < 2)
+ return false;
+ lnet_nid4_to_nid(pbuf->pb_info.pi_ni[1].ns_nid, nid);
+ return true;
+}
+
/* Handle a Reply message. This is the reply to a Ping message. */
static void
lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
{
struct lnet_ping_buffer *pbuf;
+ struct lnet_nid primary;
int infobytes;
int rc;
bool ping_feat_disc;
@@ -2731,9 +2760,8 @@ static void lnet_peer_clear_discovery_error(struct lnet_peer *lp)
* available if the reply came from a Multi-Rail peer.
*/
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL &&
- pbuf->pb_info.pi_nnis > 1 &&
- lnet_nid_to_nid4(&lp->lp_primary_nid) ==
- pbuf->pb_info.pi_ni[1].ns_nid) {
+ find_primary(&primary, pbuf) &&
+ nid_same(&lp->lp_primary_nid, &primary)) {
if (LNET_PING_BUFFER_SEQNO(pbuf) < lp->lp_peer_seqno)
CDEBUG(D_NET,
"peer %s: seq# got %u have %u. peer rebooted?\n",
@@ -3081,11 +3109,11 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
* peer's lp_peer_nets list, and the peer NI for the primary NID should
* be the first entry in its peer net's lpn_peer_nis list.
*/
- lnet_nid4_to_nid(pbuf->pb_info.pi_ni[1].ns_nid, &nid);
+ find_primary(&nid, pbuf);
lpni = lnet_peer_ni_find_locked(&nid);
if (!lpni) {
CERROR("Internal error: Failed to lookup peer NI for primary NID: %s\n",
- libcfs_nid2str(pbuf->pb_info.pi_ni[1].ns_nid));
+ libcfs_nidstr(&nid));
goto out;
}
@@ -3341,11 +3369,10 @@ static int lnet_peer_data_present(struct lnet_peer *lp)
* primary NID to the correct value here. Moreover, this peer
* can show up with only the loopback NID in the ping buffer.
*/
- if (pbuf->pb_info.pi_nnis <= 1) {
+ if (!find_primary(&nid, pbuf)) {
lnet_ping_buffer_decref(pbuf);
goto out;
}
- lnet_nid4_to_nid(pbuf->pb_info.pi_ni[1].ns_nid, &nid);
if (nid_is_lo0(&lp->lp_primary_nid)) {
rc = lnet_peer_set_primary_nid(lp, &nid, flags);
if (rc)
--
1.8.3.1
More information about the lustre-devel
mailing list