[lustre-devel] [PATCH 09/12] lustre: obdclass: obd_device improvement

NeilBrown neilb at suse.com
Mon Nov 26 20:01:03 PST 2018


On Sun, Nov 25 2018, James Simmons wrote:

> From: Alexander Boyko <c17825 at cray.com>
>
> The patch removes self exports from obd's reference counting which
> allows to avoid freeing of self exports by zombie thread.
> A pair of functions class_register_device()/class_unregister_device()
> is to make sure that an obd can not be referenced again once its
> refcount reached 0.
>
> Signed-off-by: Vladimir Saveliev Vladimir Saveliev <c17830 at cray.com>
> Signed-off-by: Alexey Lyashkov <c17817 at cray.com>
> Signed-off-by: Alexander Boyko <c17825 at cray.com>
> Signed-off-by: Yang Sheng <ys at whamcloud.com>
> WC-bug-id: https://jira.whamcloud.com/browse/LU-4134
> Seagate-bug-id: MRP-2139 MRP-3267
> Reviewed-on: https://review.whamcloud.com/8045
> Reviewed-on: https://review.whamcloud.com/29967
> Reviewed-by: James Simmons <uja.ornl at yahoo.com>
> Reviewed-by: Alexey Lyashkov <c17817 at cray.com>
> Reviewed-by: Oleg Drokin <green at whamcloud.com>
> Signed-off-by: James Simmons <jsimmons at infradead.org>
> ---
>  drivers/staging/lustre/lustre/include/obd_class.h  |   9 +-
>  drivers/staging/lustre/lustre/obdclass/genops.c    | 284 +++++++++++++++------
>  .../staging/lustre/lustre/obdclass/obd_config.c    | 143 ++++-------
>  drivers/staging/lustre/lustre/obdclass/obd_mount.c |   6 +-
>  4 files changed, 261 insertions(+), 181 deletions(-)
>
> diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
> index 567189c..cc00915 100644
> --- a/drivers/staging/lustre/lustre/include/obd_class.h
> +++ b/drivers/staging/lustre/lustre/include/obd_class.h
> @@ -65,8 +65,11 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
>  			const char *name, struct lu_device_type *ldt);
>  int class_unregister_type(const char *name);
>  
> -struct obd_device *class_newdev(const char *type_name, const char *name);
> -void class_release_dev(struct obd_device *obd);
> +struct obd_device *class_newdev(const char *type_name, const char *name,
> +				const char *uuid);
> +int class_register_device(struct obd_device *obd);
> +void class_unregister_device(struct obd_device *obd);
> +void class_free_dev(struct obd_device *obd);
>  
>  int class_name2dev(const char *name);
>  struct obd_device *class_name2obd(const char *name);
> @@ -230,6 +233,8 @@ void __class_export_del_lock_ref(struct obd_export *exp,
>  void class_export_put(struct obd_export *exp);
>  struct obd_export *class_new_export(struct obd_device *obddev,
>  				    struct obd_uuid *cluuid);
> +struct obd_export *class_new_export_self(struct obd_device *obd,
> +					 struct obd_uuid *uuid);
>  void class_unlink_export(struct obd_export *exp);
>  
>  struct obd_import *class_import_get(struct obd_import *imp);
> diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
> index 59891a8..cdd44f7 100644
> --- a/drivers/staging/lustre/lustre/obdclass/genops.c
> +++ b/drivers/staging/lustre/lustre/obdclass/genops.c
> @@ -38,6 +38,7 @@
>  
>  #define DEBUG_SUBSYSTEM S_CLASS
>  #include <obd_class.h>
> +#include <lustre_log.h>
>  #include <lprocfs_status.h>
>  #include <lustre_kernelcomm.h>
>  
> @@ -273,21 +274,20 @@ int class_unregister_type(const char *name)
>  /**
>   * Create a new obd device.
>   *
> - * Find an empty slot in ::obd_devs[], create a new obd device in it.
> + * Allocate the new obd_device and initialize it.
>   *
>   * \param[in] type_name obd device type string.
>   * \param[in] name      obd device name.
> + * @uuid		obd device UUID.
>   *
> - * \retval NULL if create fails, otherwise return the obd device
> - *	 pointer created.
> + * RETURN newdev	 pointer to created obd_device
> + * RETURN ERR_PTR(errno) on error
>   */
> -struct obd_device *class_newdev(const char *type_name, const char *name)
> +struct obd_device *class_newdev(const char *type_name, const char *name,
> +				const char *uuid)
>  {
> -	struct obd_device *result = NULL;
>  	struct obd_device *newdev;
>  	struct obd_type *type = NULL;
> -	int i;
> -	int new_obd_minor = 0;
>  
>  	if (strlen(name) >= MAX_OBD_NAME) {
>  		CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
> @@ -302,87 +302,167 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
>  
>  	newdev = obd_device_alloc();
>  	if (!newdev) {
> -		result = ERR_PTR(-ENOMEM);
> -		goto out_type;
> +		class_put_type(type);
> +		return ERR_PTR(-ENOMEM);
>  	}
>  
>  	LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
> +	strncpy(newdev->obd_name, name, sizeof(newdev->obd_name) - 1);
> +	newdev->obd_type = type;
> +	newdev->obd_minor = -1;
> +
> +	rwlock_init(&newdev->obd_pool_lock);
> +	newdev->obd_pool_limit = 0;
> +	newdev->obd_pool_slv = 0;
> +
> +	INIT_LIST_HEAD(&newdev->obd_exports);
> +	INIT_LIST_HEAD(&newdev->obd_unlinked_exports);
> +	INIT_LIST_HEAD(&newdev->obd_delayed_exports);
> +	spin_lock_init(&newdev->obd_nid_lock);
> +	spin_lock_init(&newdev->obd_dev_lock);
> +	mutex_init(&newdev->obd_dev_mutex);
> +	spin_lock_init(&newdev->obd_osfs_lock);
> +	/* newdev->obd_osfs_age must be set to a value in the distant
> +	 * past to guarantee a fresh statfs is fetched on mount.
> +	 */
> +	newdev->obd_osfs_age = get_jiffies_64() - 1000 * HZ;
>  
> -	write_lock(&obd_dev_lock);
> -	for (i = 0; i < class_devno_max(); i++) {
> -		struct obd_device *obd = class_num2obd(i);
> +	/* XXX belongs in setup not attach  */
> +	init_rwsem(&newdev->obd_observer_link_sem);
> +	/* recovery data */
> +	init_waitqueue_head(&newdev->obd_evict_inprogress_waitq);
>  
> -		if (obd && (strcmp(name, obd->obd_name) == 0)) {
> -			CERROR("Device %s already exists at %d, won't add\n",
> -			       name, i);
> -			if (result) {
> -				LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
> -					 "%p obd_magic %08x != %08x\n", result,
> -					 result->obd_magic, OBD_DEVICE_MAGIC);
> -				LASSERTF(result->obd_minor == new_obd_minor,
> -					 "%p obd_minor %d != %d\n", result,
> -					 result->obd_minor, new_obd_minor);
> -
> -				obd_devs[result->obd_minor] = NULL;
> -				result->obd_name[0] = '\0';
> -			 }
> -			result = ERR_PTR(-EEXIST);
> -			break;
> -		}
> -		if (!result && !obd) {
> -			result = newdev;
> -			result->obd_minor = i;
> -			new_obd_minor = i;
> -			result->obd_type = type;
> -			strncpy(result->obd_name, name,
> -				sizeof(result->obd_name) - 1);
> -			obd_devs[i] = result;
> -		}
> -	}
> -	write_unlock(&obd_dev_lock);
> +	llog_group_init(&newdev->obd_olg);
> +	/* Detach drops this */
> +        atomic_set(&newdev->obd_refcount, 1);
> +        lu_ref_init(&newdev->obd_reference);
> +        lu_ref_add(&newdev->obd_reference, "newdev", newdev);
>  
> -	if (!result && i >= class_devno_max()) {
> -		CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
> -		       class_devno_max());
> -		result = ERR_PTR(-EOVERFLOW);
> -		goto out;
> -	}
> +	newdev->obd_conn_inprogress = 0;
>  
> -	if (IS_ERR(result))
> -		goto out;
> +	strncpy(newdev->obd_uuid.uuid, uuid, strlen(uuid));
>  
> -	CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
> -	       result->obd_name, result);
> +	CDEBUG(D_IOCTL, "Allocate new device %s (%p)\n",
> +	       newdev->obd_name, newdev);
>  
> -	return result;
> -out:
> -	obd_device_free(newdev);
> -out_type:
> -	class_put_type(type);
> -	return result;
> +	return newdev;
>  }
>  
> -void class_release_dev(struct obd_device *obd)
> +/**
> + * Free obd device.
> + *
> + * @obd obd_device to be freed
> + *
> + * RETURN none
> + */
> +void class_free_dev(struct obd_device *obd)
>  {
>  	struct obd_type *obd_type = obd->obd_type;
>  
>  	LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
>  		 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
> -	LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
> +	LASSERTF(obd->obd_minor == -1 || obd == obd_devs[obd->obd_minor],
> +		 "obd %p != obd_devs[%d] %p\n",
>  		 obd, obd->obd_minor, obd_devs[obd->obd_minor]);
> +	LASSERTF(atomic_read(&obd->obd_refcount) == 0,
> +		 "obd_refcount should be 0, not %d\n",
> +		 atomic_read(&obd->obd_refcount));
>  	LASSERT(obd_type);
>  
> -	CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n",
> -	       obd->obd_name, obd->obd_minor, obd->obd_type->typ_name);
> +	CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n",
> +	       obd->obd_name, obd->obd_type->typ_name);
> +
> +	CDEBUG(D_CONFIG, "finishing cleanup of obd %s (%s)\n",
> +	       obd->obd_name, obd->obd_uuid.uuid);
> +	if (obd->obd_stopping) {
> +		int err;
> +
> +		/* If we're not stopping, we were never set up */
> +		err = obd_cleanup(obd);
> +		if (err)
> +			CERROR("Cleanup %s returned %d\n",
> +			       obd->obd_name, err);
> +	}
>  
> -	write_lock(&obd_dev_lock);
> -	obd_devs[obd->obd_minor] = NULL;
> -	write_unlock(&obd_dev_lock);
>  	obd_device_free(obd);
>  
>  	class_put_type(obd_type);
>  }
>  
> +/**
> + * Unregister obd device.
> + *
> + * Free slot in obd_dev[] used by \a obd.
> + *
> + * @new_obd	obd_device to be unregistered
> + *
> + * RETURN	none
> + */
> +void class_unregister_device(struct obd_device *obd)
> +{
> +	write_lock(&obd_dev_lock);
> +	if (obd->obd_minor >= 0) {
> +		LASSERT(obd_devs[obd->obd_minor] == obd);
> +		obd_devs[obd->obd_minor] = NULL;
> +		obd->obd_minor = -1;
> +	}
> +	write_unlock(&obd_dev_lock);
> +}
> +
> +/**
> + * Register obd device.
> + *
> + * Find free slot in obd_devs[], fills it with \a new_obd.
> + *
> + * @new_obd	obd_device to be registered
> + *
> + * RETURN	0		success
> + *		-EEXIST		device with this name is registered
> + *		-EOVERFLOW	obd_devs[] is full
> + */
> +int class_register_device(struct obd_device *new_obd)
> +{
> +	int new_obd_minor = 0;
> +	bool minor_assign = false;
> +	int ret = 0;
> +	int i;
> +
> +	write_lock(&obd_dev_lock);
> +	for (i = 0; i < class_devno_max(); i++) {
> +		struct obd_device *obd = class_num2obd(i);
> +
> +		if (obd && (strcmp(new_obd->obd_name, obd->obd_name) == 0)) {
> +			CERROR("%s: already exists, won't add\n",
> +			       obd->obd_name);
> +			/* in case we found a free slot before duplicate */
> +			minor_assign = false;
> +			ret = -EEXIST;
> +			break;
> +		}
> +		if (!minor_assign && !obd) {
> +			new_obd_minor = i;
> +			minor_assign = true;
> +		}
> +	}
> +
> +	if (minor_assign) {
> +		new_obd->obd_minor = new_obd_minor;
> +		LASSERTF(!obd_devs[new_obd_minor], "obd_devs[%d] %p\n",
> +			 new_obd_minor, obd_devs[new_obd_minor]);
> +		obd_devs[new_obd_minor] = new_obd;
> +	} else {
> +		if (ret == 0) {
> +			ret = -EOVERFLOW;
> +			CERROR("%s: all %u/%u devices used, increase MAX_OBD_DEVICES: rc = %d\n",
> +			       new_obd->obd_name, i, class_devno_max(), ret);
> +		}
> +	}
> +	write_unlock(&obd_dev_lock);
> +
> +	return ret;
> +}
> +
> +
>  int class_name2dev(const char *name)
>  {
>  	int i;
> @@ -677,7 +757,11 @@ static void class_export_destroy(struct obd_export *exp)
>  	LASSERT(list_empty(&exp->exp_req_replay_queue));
>  	LASSERT(list_empty(&exp->exp_hp_rpcs));
>  	obd_destroy_export(exp);
> -	class_decref(obd, "export", exp);
> +	/* self export doesn't hold a reference to an obd, although it
> +	 * exists until freeing of the obd
> +	 */
> +	if (exp != obd->obd_self_export)
> +		class_decref(obd, "export", exp);
>  
>  	OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle);
>  }
> @@ -708,11 +792,27 @@ void class_export_put(struct obd_export *exp)
>  	       atomic_read(&exp->exp_refcount) - 1);
>  
>  	if (atomic_dec_and_test(&exp->exp_refcount)) {
> -		LASSERT(!list_empty(&exp->exp_obd_chain));
> +		struct obd_device *obd = exp->exp_obd;
> +
>  		CDEBUG(D_IOCTL, "final put %p/%s\n",
>  		       exp, exp->exp_client_uuid.uuid);
>  
> -		obd_zombie_export_add(exp);
> +		if (exp == obd->obd_self_export) {
> +			/* self export should be destroyed without
> +			 * zombie thread as it doesn't hold a
> +			 * reference to obd and doesn't hold any
> +			 * resources
> +			 */
> +			class_export_destroy(exp);
> +			/* self export is destroyed, no class
> +			 * references exist and it is safe to free
> +			 * obd
> +			 */
> +			class_free_dev(obd);
> +		} else {
> +			LASSERT(!list_empty(&exp->exp_obd_chain));
> +			obd_zombie_export_add(exp);
> +		}
>  	}
>  }
>  EXPORT_SYMBOL(class_export_put);
> @@ -728,8 +828,9 @@ static void obd_zombie_exp_cull(struct work_struct *ws)
>   * pointer to it. The refcount is 2: one for the hash reference, and
>   * one for the pointer returned by this function.
>   */
> -struct obd_export *class_new_export(struct obd_device *obd,
> -				    struct obd_uuid *cluuid)
> +static struct obd_export *__class_new_export(struct obd_device *obd,
> +					     struct obd_uuid *cluuid,
> +					     bool is_self)
>  {
>  	struct obd_export *export;
>  	int rc = 0;
> @@ -739,6 +840,7 @@ struct obd_export *class_new_export(struct obd_device *obd,
>  		return ERR_PTR(-ENOMEM);
>  
>  	export->exp_conn_cnt = 0;
> +	/* 2 = class_handle_hash + last */
>  	atomic_set(&export->exp_refcount, 2);
>  	atomic_set(&export->exp_rpc_count, 0);
>  	atomic_set(&export->exp_cb_count, 0);
> @@ -767,41 +869,65 @@ struct obd_export *class_new_export(struct obd_device *obd,
>  	export->exp_client_uuid = *cluuid;
>  	obd_init_export(export);
>  
> -	spin_lock(&obd->obd_dev_lock);
> -	/* shouldn't happen, but might race */
> -	if (obd->obd_stopping) {
> -		rc = -ENODEV;
> -		goto exit_unlock;
> -	}
> -
>  	if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
> +		spin_lock(&obd->obd_dev_lock);
> +		/* shouldn't happen, but might race */
> +		if (obd->obd_stopping) {
> +			rc = -ENODEV;
> +			goto exit_unlock;
> +		}
> +		spin_unlock(&obd->obd_dev_lock);

This is wrong.  The lock previously protected obd_uuid_add(), now it
doesn't.
This probably happened because the locking was simplified when I
converted to rhashtables so the OpenSFS patch did quite different things
with locking.

I've applied the following incremental patch to fix up the locking.

Thanks,
NeilBrown

diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cdd44f72403f..76bc73fd79c8 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -869,24 +869,22 @@ static struct obd_export *__class_new_export(struct obd_device *obd,
 	export->exp_client_uuid = *cluuid;
 	obd_init_export(export);
 
+	spin_lock(&obd->obd_dev_lock);
 	if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
-		spin_lock(&obd->obd_dev_lock);
 		/* shouldn't happen, but might race */
 		if (obd->obd_stopping) {
 			rc = -ENODEV;
 			goto exit_unlock;
 		}
-		spin_unlock(&obd->obd_dev_lock);
 
 		rc = obd_uuid_add(obd, export);
 		if (rc) {
 			LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n",
 				      obd->obd_name, cluuid->uuid, rc);
-			goto exit_err;
+			goto exit_unlock;
 		}
 	}
 
-	spin_lock(&obd->obd_dev_lock);
 	if (!is_self) {
 		class_incref(obd, "export", export);
 		list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
@@ -899,7 +897,6 @@ static struct obd_export *__class_new_export(struct obd_device *obd,
 
 exit_unlock:
 	spin_unlock(&obd->obd_dev_lock);
-exit_err:
 	class_handle_unhash(&export->exp_handle);
 	obd_destroy_export(export);
 	kfree(export);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.lustre.org/pipermail/lustre-devel-lustre.org/attachments/20181127/d8edc8f4/attachment.sig>


More information about the lustre-devel mailing list