[1/2] kernel/latencytop: Add non-scheduler interface for latency reporting

Submitted by Daniel Vetter on Dec. 1, 2015, 3:29 p.m.

Details

Message ID 1448983768-22324-1-git-send-email-daniel.vetter@ffwll.ch
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Daniel Vetter Dec. 1, 2015, 3:29 p.m.
Some sources of significant amounts of latency aren't simple sleeps
but instead busy-loops or a series of hundreds of small sleeps simply
because the hardware can't do better. Unfortunately latencytop doesn't
register these and so they slip under the radar. Hence expose a
simplified interface to report additional latencies and export the
underlying function so that modules can use this.

The example I have in mind are edid reads. The drm subsystem exposes
both interfaces to do full probes and to just get at the cached state
from the last probe and often userspace developers don't know about
the difference and incur unecessary big latencies. And usually the i2c
transfer is done with busy-looping or if there is a hw engine it might
only be able to transfer a few bytes per sleep/irq cycle. And edid
reads take at least 12ms and with crappy hw can easily be a few
hundred ms.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 include/linux/latencytop.h | 15 +++++++++++++++
 kernel/latencytop.c        |  2 ++
 2 files changed, 17 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index e23121f9d82a..46b69bc35f02 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -10,6 +10,9 @@ 
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 
 #include <linux/compiler.h>
+
+#include <asm/current.h>
+
 struct task_struct;
 
 #ifdef CONFIG_LATENCYTOP
@@ -35,6 +38,13 @@  account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 		__account_scheduler_latency(task, usecs, inter);
 }
 
+static inline void
+account_latency(int usecs)
+{
+	if (unlikely(latencytop_enabled))
+		__account_scheduler_latency(current, usecs, 0);
+}
+
 void clear_all_latency_tracing(struct task_struct *p);
 
 #else
@@ -44,6 +54,11 @@  account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 {
 }
 
+static inline void
+account_latency(int usecs)
+{
+}
+
 static inline void clear_all_latency_tracing(struct task_struct *p)
 {
 }
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a02812743a7e..b066a19fc52a 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -64,6 +64,7 @@  static DEFINE_RAW_SPINLOCK(latency_lock);
 static struct latency_record latency_record[MAXLR];
 
 int latencytop_enabled;
+EXPORT_SYMBOL_GPL(latencytop_enabled);
 
 void clear_all_latency_tracing(struct task_struct *p)
 {
@@ -234,6 +235,7 @@  __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 out_unlock:
 	raw_spin_unlock_irqrestore(&latency_lock, flags);
 }
+EXPORT_SYMBOL_GPL(__account_scheduler_latency);
 
 static int lstats_show(struct seq_file *m, void *v)
 {

Comments

On Tue, Dec 01, 2015 at 04:29:27PM +0100, Daniel Vetter wrote:
> Some sources of significant amounts of latency aren't simple sleeps
> but instead busy-loops or a series of hundreds of small sleeps simply
> because the hardware can't do better. Unfortunately latencytop doesn't
> register these and so they slip under the radar. Hence expose a
> simplified interface to report additional latencies and export the
> underlying function so that modules can use this.
> 
> The example I have in mind are edid reads. The drm subsystem exposes
> both interfaces to do full probes and to just get at the cached state
> from the last probe and often userspace developers don't know about
> the difference and incur unecessary big latencies. And usually the i2c
> transfer is done with busy-looping or if there is a hw engine it might
> only be able to transfer a few bytes per sleep/irq cycle. And edid
> reads take at least 12ms and with crappy hw can easily be a few
> hundred ms.
> 
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Arjan van de Ven <arjan@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  include/linux/latencytop.h | 15 +++++++++++++++
>  kernel/latencytop.c        |  2 ++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
> index e23121f9d82a..46b69bc35f02 100644
> --- a/include/linux/latencytop.h
> +++ b/include/linux/latencytop.h
> @@ -10,6 +10,9 @@
>  #define _INCLUDE_GUARD_LATENCYTOP_H_
>  
>  #include <linux/compiler.h>
> +
> +#include <asm/current.h>
> +
>  struct task_struct;
>  
>  #ifdef CONFIG_LATENCYTOP
> @@ -35,6 +38,13 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
>  		__account_scheduler_latency(task, usecs, inter);
>  }
>  
> +static inline void
> +account_latency(int usecs)
> +{
> +	if (unlikely(latencytop_enabled))
> +		__account_scheduler_latency(current, usecs, 0);

Just

	account_scheduler_latency(current, usecs, 0);
> +}

And then that can be used for both ifdef paths, i.e. move account_latency() to after the #endif.
-Chris