[lustre-devel] [PATCH 14/24] lustre: llite: Add D_IOTRACE

James Simmons jsimmons at infradead.org
Thu Jan 13 17:37:53 PST 2022


From: Patrick Farrell <pfarrell at whamcloud.com>

In looking in to performance problems, it's very important
to be able to trace the I/O patterns from userspace in to
Lustre, and also understand the key basics of how Lustre
handles that I/O (readahead, RPC generation).

This is best done with a dedicated debug flag - No
userspace tool can provide all this information, and
existing debug flags collect a huge number of unrelated
pieces of, well, debug information.

The goal is for customers to be able to quickly gather log
files of a reasonable size which contain the necessary
information and which can easily be interpreted by
engineering.  This is not possible if the information is
spread out across a number of heavyweight debug flags.

This is a first pass at adding the flag and the debug
required to track basic data I/O.  One significant
omission in the first patch is RPC generation - I have not
decided how best to do that yet.  That will be added in a
future patch.

WC-bug-id: https://jira.whamcloud.com/browse/LU-15137
Lustre-commit: 40d286e11138fc67f ("LU-15317 llite: Add D_IOTRACE")
Signed-off-by: Patrick Farrell <pfarrell at whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45752
Reviewed-by: Sebastien Buisson <sbuisson at ddn.com>
Reviewed-by: Andreas Dilger <adilger at whamcloud.com>
Reviewed-by: Oleg Drokin <green at whamcloud.com>
Signed-off-by: James Simmons <jsimmons at infradead.org>
---
 fs/lustre/llite/file.c                 | 10 ++++++++++
 fs/lustre/llite/llite_internal.h       |  1 +
 fs/lustre/llite/llite_mmap.c           | 13 ++++++++++---
 fs/lustre/llite/rw.c                   | 10 ++++++++--
 include/uapi/linux/lnet/libcfs_debug.h |  3 ++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/fs/lustre/llite/file.c b/fs/lustre/llite/file.c
index 05f2f1a..dec0109 100644
--- a/fs/lustre/llite/file.c
+++ b/fs/lustre/llite/file.c
@@ -1954,6 +1954,11 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	ktime_t kstart = ktime_get();
 	bool cached;
 
+	CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n",
+	       file_dentry(file)->d_name.name,
+	       PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos,
+	       iov_iter_count(to));
+
 	if (!iov_iter_count(to))
 		return 0;
 
@@ -2075,6 +2080,11 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ktime_t kstart = ktime_get();
 	int result;
 
+	CDEBUG(D_VFSTRACE|D_IOTRACE, "file %s:"DFID", ppos: %lld, count: %zu\n",
+	       file_dentry(file)->d_name.name,
+	       PFID(ll_inode2fid(file_inode(file))), iocb->ki_pos,
+	       iov_iter_count(from));
+
 	if (!iov_iter_count(from)) {
 		rc_normal = 0;
 		goto out;
diff --git a/fs/lustre/llite/llite_internal.h b/fs/lustre/llite/llite_internal.h
index 54f0218..8c7361a 100644
--- a/fs/lustre/llite/llite_internal.h
+++ b/fs/lustre/llite/llite_internal.h
@@ -885,6 +885,7 @@ struct ll_readahead_work {
 	struct file			*lrw_file;
 	pgoff_t				 lrw_start_idx;
 	pgoff_t				 lrw_end_idx;
+	pid_t				 lrw_user_pid;
 
 	/* async worker to handler read */
 	struct work_struct		 lrw_readahead_work;
diff --git a/fs/lustre/llite/llite_mmap.c b/fs/lustre/llite/llite_mmap.c
index 0009c5f..d87a68d 100644
--- a/fs/lustre/llite/llite_mmap.c
+++ b/fs/lustre/llite/llite_mmap.c
@@ -377,9 +377,10 @@ static vm_fault_t ll_fault(struct vm_fault *vmf)
 	if (cached)
 		goto out;
 
-	CDEBUG(D_MMAP, DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx\n",
+	CDEBUG(D_MMAP|D_IOTRACE,
+	       DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n",
 	       PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid),
-	       vma, vma->vm_start, vma->vm_end, vma->vm_flags);
+	       vma, vma->vm_start, vma->vm_end, vma->vm_flags, vmf->pgoff);
 
 	/* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite
 	 * so that it can be killed by admin but not cause segfault by
@@ -440,8 +441,14 @@ static vm_fault_t ll_page_mkwrite(struct vm_fault *vmf)
 	bool retry;
 	bool cached;
 	int err;
-	vm_fault_t ret;
 	ktime_t kstart = ktime_get();
+	vm_fault_t ret;
+
+	CDEBUG(D_MMAP|D_IOTRACE,
+	       DFID": vma=%p start=%#lx end=%#lx vm_flags=%#lx idx=%lu\n",
+	       PFID(&ll_i2info(file_inode(vma->vm_file))->lli_fid),
+	       vma, vma->vm_start, vma->vm_end, vma->vm_flags,
+	       vmf->page->index);
 
 	ret = pcc_page_mkwrite(vma, vmf, &cached);
 	if (cached)
diff --git a/fs/lustre/llite/rw.c b/fs/lustre/llite/rw.c
index c9f29ef..9f6e140 100644
--- a/fs/lustre/llite/rw.c
+++ b/fs/lustre/llite/rw.c
@@ -595,6 +595,11 @@ static void ll_readahead_handle_work(struct work_struct *wq)
 	inode = file_inode(file);
 	sbi = ll_i2sbi(inode);
 
+	CDEBUG(D_READA|D_IOTRACE,
+	       "%s: async ra from %lu to %lu triggered by user pid %d\n",
+	       file_dentry(file)->d_name.name, work->lrw_start_idx,
+	       work->lrw_end_idx, work->lrw_user_pid);
+
 	env = cl_env_alloc(&refcheck, LCT_NOREF);
 	if (IS_ERR(env)) {
 		rc = PTR_ERR(env);
@@ -1301,7 +1306,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	spin_lock(&ras->ras_lock);
 
 	if (!hit)
-		CDEBUG(D_READA, DFID " pages at %lu miss.\n",
+		CDEBUG(D_READA|D_IOTRACE, DFID " pages at %lu miss.\n",
 		       PFID(ll_inode2fid(inode)), index);
 	ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
 
@@ -1670,7 +1675,7 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
 			skip_index = vvp_index(vpg);
 		rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
 				   uptodate, file, skip_index);
-		CDEBUG(D_READA, DFID " %d pages read ahead at %lu\n",
+		CDEBUG(D_READA|D_IOTRACE, DFID " %d pages read ahead at %lu\n",
 		       PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
 	} else if (vvp_index(vpg) == io_start_index &&
 		   io_end_index - io_start_index > 0) {
@@ -1770,6 +1775,7 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages)
 		lrw->lrw_file = get_file(file);
 		lrw->lrw_start_idx = start_idx;
 		lrw->lrw_end_idx = end_idx;
+		lrw->lrw_user_pid = current->pid;
 		spin_lock(&ras->ras_lock);
 		ras->ras_next_readahead_idx = end_idx + 1;
 		ras->ras_async_last_readpage_idx = start_idx;
diff --git a/include/uapi/linux/lnet/libcfs_debug.h b/include/uapi/linux/lnet/libcfs_debug.h
index 4cb6594..bbd9f25 100644
--- a/include/uapi/linux/lnet/libcfs_debug.h
+++ b/include/uapi/linux/lnet/libcfs_debug.h
@@ -106,6 +106,7 @@ struct ptldebug_header {
 #define D_TRACE		0x00000001 /* ENTRY/EXIT markers */
 #define D_INODE		0x00000002
 #define D_SUPER		0x00000004
+#define D_IOTRACE	0x00000008 /* simple, low overhead io tracing */
 #define D_MALLOC	0x00000010 /* print malloc, free information */
 #define D_CACHE		0x00000020 /* cache-related items */
 #define D_INFO		0x00000040 /* general information */
@@ -136,7 +137,7 @@ struct ptldebug_header {
 #define D_LAYOUT	0x80000000
 
 #define LIBCFS_DEBUG_MASKS_NAMES {					\
-	"trace", "inode", "super", "tty", "malloc", "cache", "info",	\
+	"trace", "inode", "super", "iotrace", "malloc", "cache", "info",\
 	"ioctl", "neterror", "net", "warning", "buffs", "other",	\
 	"dentry", "nettrace", "page", "dlmtrace", "error", "emerg",	\
 	"ha", "rpctrace", "vfstrace", "reada", "mmap", "config",	\
-- 
1.8.3.1



More information about the lustre-devel mailing list