[33/39] drm/i915/perf: add a parameter to control the size of OA buffer

Submitted by Lucas De Marchi on Aug. 16, 2019, 8:04 a.m.

Details

Message ID 20190816080503.28594-34-lucas.demarchi@intel.com
State New
Headers show
Series "Tiger Lake batch 3" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Lucas De Marchi Aug. 16, 2019, 8:04 a.m.
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

The way our hardware is designed doesn't seem to let us use the
MI_RECORD_PERF_COUNT command without setting up a circular buffer.

In the case where the user didn't request OA reports to be available
through the i915 perf stream, we can set the OA buffer to the minimum
size to avoid consuming memory which won't be used by the driver.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_reg.h  |  2 +
 include/uapi/drm/i915_drm.h      |  7 +++
 3 files changed, 74 insertions(+), 33 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2c9f46e12622..9386d9c82930 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -216,13 +216,7 @@ 
 #include "oa/i915_oa_cnl.h"
 #include "oa/i915_oa_icl.h"
 
-/* HW requires this to be a power of two, between 128k and 16M, though driver
- * is currently generally designed assuming the largest 16M size is used such
- * that the overflow cases are unlikely in normal operation.
- */
-#define OA_BUFFER_SIZE		SZ_16M
-
-#define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
+#define OA_TAKEN(tail, head)	(((tail) - (head)) & (stream->oa_buffer.vma->size - 1))
 
 /**
  * DOC: OA Tail Pointer Race
@@ -347,6 +341,7 @@  static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_format: An OA unit HW report format
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
+ * @oa_buffer_size_exponent: The OA buffer size is derived from this
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -363,6 +358,7 @@  struct perf_open_properties {
 	int oa_format;
 	bool oa_periodic;
 	int oa_period_exponent;
+	u32 oa_buffer_size_exponent;
 };
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
@@ -531,7 +527,7 @@  static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 		 * could put the tail out of bounds...
 		 */
 		if (hw_tail >= gtt_offset &&
-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
+		    hw_tail < (gtt_offset + stream->oa_buffer.vma->size)) {
 			stream->oa_buffer.tails[!aged_idx].offset =
 				aging_tail = hw_tail;
 			stream->oa_buffer.aging_timestamp = now;
@@ -659,7 +655,7 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	int report_size = stream->oa_buffer.format_size;
 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
-	u32 mask = (OA_BUFFER_SIZE - 1);
+	u32 mask = (stream->oa_buffer.vma->size - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
 	unsigned int aged_tail_idx;
@@ -699,8 +695,8 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	 * only be incremented by multiples of the report size (notably also
 	 * all a power of two).
 	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
+	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
+		      tail > stream->oa_buffer.vma->size || tail % report_size,
 		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
 		      head, tail))
 		return -EIO;
@@ -723,7 +719,7 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * here would imply a driver bug that would result
 		 * in an overrun.
 		 */
-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
+		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
 			break;
 		}
@@ -881,11 +877,6 @@  static int gen8_oa_read(struct i915_perf_stream *stream,
 	 * automatically triggered reports in this condition and so we
 	 * have to assume that old reports are now being trampled
 	 * over.
-	 *
-	 * Considering how we don't currently give userspace control
-	 * over the OA buffer size and always configure a large 16MB
-	 * buffer, then a buffer overflow does anyway likely indicate
-	 * that something has gone quite badly wrong.
 	 */
 	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
 		ret = append_oa_status(stream, buf, count, offset,
@@ -947,7 +938,7 @@  static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	int report_size = stream->oa_buffer.format_size;
 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
-	u32 mask = (OA_BUFFER_SIZE - 1);
+	u32 mask = (stream->oa_buffer.vma->size - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
 	unsigned int aged_tail_idx;
@@ -984,8 +975,8 @@  static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	 * only be incremented by multiples of the report size (notably also
 	 * all a power of two).
 	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
+	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
+		      tail > stream->oa_buffer.vma->size || tail % report_size,
 		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
 		      head, tail))
 		return -EIO;
@@ -1005,7 +996,7 @@  static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		 * here would imply a driver bug that would result
 		 * in an overrun.
 		 */
-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
+		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
 			break;
 		}
@@ -1408,7 +1399,9 @@  static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 
 	I915_WRITE(GEN7_OABUFFER, gtt_offset);
 
-	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
+	I915_WRITE(GEN7_OASTATUS1, gtt_offset |
+		   ((stream->oa_buffer.size_exponent - 17) <<
+		    GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */
 
 	/* Mark that we need updated tail pointers to read from... */
 	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
@@ -1433,7 +1426,7 @@  static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	 * the assumption that new reports are being written to zeroed
 	 * memory...
 	 */
-	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
 
 	/* Maybe make ->pollin per-stream state if we support multiple
 	 * concurrent streams in the future.
@@ -1464,7 +1457,9 @@  static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 *  bit."
 	 */
 	I915_WRITE(GEN8_OABUFFER, gtt_offset |
-		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+		   ((stream->oa_buffer.size_exponent - 17) <<
+		    GEN8_OABUFFER_BUFFER_SIZE_SHIFT) |
+		   GEN8_OABUFFER_MEM_SELECT_GGTT);
 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
@@ -1492,7 +1487,7 @@  static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 * the assumption that new reports are being written to zeroed
 	 * memory...
 	 */
-	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
 
 	/*
 	 * Maybe make ->pollin per-stream state if we support multiple
@@ -1501,24 +1496,25 @@  static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	stream->pollin = false;
 }
 
-static int alloc_oa_buffer(struct i915_perf_stream *stream)
+static int alloc_oa_buffer(struct i915_perf_stream *stream, int size_exponent)
 {
 	struct drm_i915_gem_object *bo;
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	struct i915_vma *vma;
+	size_t size = 1U << size_exponent;
 	int ret;
 
 	if (WARN_ON(stream->oa_buffer.vma))
 		return -ENODEV;
 
+	if (WARN_ON(size < SZ_128K || size > SZ_16M))
+		return -EINVAL;
+
 	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
 	if (ret)
 		return ret;
 
-	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
-	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
-
-	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
 		ret = PTR_ERR(bo);
@@ -1534,6 +1530,7 @@  static int alloc_oa_buffer(struct i915_perf_stream *stream)
 		goto err_unref;
 	}
 	stream->oa_buffer.vma = vma;
+	stream->oa_buffer.size_exponent = size_exponent;
 
 	stream->oa_buffer.vaddr =
 		i915_gem_object_pin_map(bo, I915_MAP_WB);
@@ -1542,9 +1539,10 @@  static int alloc_oa_buffer(struct i915_perf_stream *stream)
 		goto err_unpin;
 	}
 
-	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
+	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n",
 			 i915_ggtt_offset(stream->oa_buffer.vma),
-			 stream->oa_buffer.vaddr);
+			 stream->oa_buffer.vaddr,
+			 stream->oa_buffer.vma->size);
 
 	goto unlock;
 
@@ -2251,7 +2249,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
 
-	ret = alloc_oa_buffer(stream);
+	ret = alloc_oa_buffer(stream, props->oa_buffer_size_exponent);
 	if (ret)
 		goto err_oa_buf_alloc;
 
@@ -2823,6 +2821,26 @@  static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
 			 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
 }
 
+static int
+select_oa_buffer_exponent(struct drm_i915_private *i915,
+			  u64 requested_size)
+{
+	int order;
+
+	/*
+	 * When no size is specified, use the largest size supported by all
+	 * generations.
+	 */
+	if (!requested_size)
+		return order_base_2(SZ_16M);
+
+	order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M));
+	if (requested_size != (1UL << order))
+		return -EINVAL;
+
+	return order;
+}
+
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @dev_priv: i915 device instance
@@ -2950,6 +2968,14 @@  static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_OA_BUFFER_SIZE:
+			ret = select_oa_buffer_exponent(dev_priv, value);
+			if (ret < 0) {
+				DRM_DEBUG("OA buffer size invalid %llu\n", value);
+				return ret;
+			}
+			props->oa_buffer_size_exponent = ret;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -2958,6 +2984,12 @@  static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		uprop += 2;
 	}
 
+	/* If no buffer size was requested, select the default one. */
+	if (!props->oa_buffer_size_exponent) {
+		props->oa_buffer_size_exponent =
+			select_oa_buffer_exponent(dev_priv, 0);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 34d83e3a51a7..d35f385f8288 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -675,12 +675,14 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN8_OABUFFER_UDW _MMIO(0x23b4)
 #define GEN8_OABUFFER _MMIO(0x2b14)
 #define  GEN8_OABUFFER_MEM_SELECT_GGTT      (1 << 0)  /* 0: PPGTT, 1: GGTT */
+#define  GEN8_OABUFFER_BUFFER_SIZE_SHIFT    3
 
 #define GEN7_OASTATUS1 _MMIO(0x2364)
 #define  GEN7_OASTATUS1_TAIL_MASK	    0xffffffc0
 #define  GEN7_OASTATUS1_COUNTER_OVERFLOW    (1 << 2)
 #define  GEN7_OASTATUS1_OABUFFER_OVERFLOW   (1 << 1)
 #define  GEN7_OASTATUS1_REPORT_LOST	    (1 << 0)
+#define  GEN7_OASTATUS1_BUFFER_SIZE_SHIFT   3
 
 #define GEN7_OASTATUS2 _MMIO(0x2368)
 #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..f662c534de0a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1873,6 +1873,13 @@  enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specify a global OA buffer size to be allocated in bytes. The size
+	 * specified must be supported by HW (currently supported sizes are
+	 * powers of 2 ranging from 128Kb to 16Mb).
+	 */
+	DRM_I915_PERF_PROP_OA_BUFFER_SIZE,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 

Comments

Looks good.

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

On Fri, Aug 16, 2019 at 01:04:57AM -0700, Lucas De Marchi wrote:
>From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
>The way our hardware is designed doesn't seem to let us use the
>MI_RECORD_PERF_COUNT command without setting up a circular buffer.
>
>In the case where the user didn't request OA reports to be available
>through the i915 perf stream, we can set the OA buffer to the minimum
>size to avoid consuming memory which won't be used by the driver.
>
>Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>Cc: Matthew Auld <matthew.auld@intel.com>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>---
> drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
> drivers/gpu/drm/i915/i915_reg.h  |  2 +
> include/uapi/drm/i915_drm.h      |  7 +++
> 3 files changed, 74 insertions(+), 33 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 2c9f46e12622..9386d9c82930 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -216,13 +216,7 @@
> #include "oa/i915_oa_cnl.h"
> #include "oa/i915_oa_icl.h"
>
>-/* HW requires this to be a power of two, between 128k and 16M, though driver
>- * is currently generally designed assuming the largest 16M size is used such
>- * that the overflow cases are unlikely in normal operation.
>- */
>-#define OA_BUFFER_SIZE		SZ_16M
>-
>-#define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
>+#define OA_TAKEN(tail, head)	(((tail) - (head)) & (stream->oa_buffer.vma->size - 1))
>
> /**
>  * DOC: OA Tail Pointer Race
>@@ -347,6 +341,7 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>  * @oa_format: An OA unit HW report format
>  * @oa_periodic: Whether to enable periodic OA unit sampling
>  * @oa_period_exponent: The OA unit sampling period is derived from this
>+ * @oa_buffer_size_exponent: The OA buffer size is derived from this
>  *
>  * As read_properties_unlocked() enumerates and validates the properties given
>  * to open a stream of metrics the configuration is built up in the structure
>@@ -363,6 +358,7 @@ struct perf_open_properties {
> 	int oa_format;
> 	bool oa_periodic;
> 	int oa_period_exponent;
>+	u32 oa_buffer_size_exponent;
> };
>
> static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
>@@ -531,7 +527,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> 		 * could put the tail out of bounds...
> 		 */
> 		if (hw_tail >= gtt_offset &&
>-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
>+		    hw_tail < (gtt_offset + stream->oa_buffer.vma->size)) {
> 			stream->oa_buffer.tails[!aged_idx].offset =
> 				aging_tail = hw_tail;
> 			stream->oa_buffer.aging_timestamp = now;
>@@ -659,7 +655,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> 	int report_size = stream->oa_buffer.format_size;
> 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
> 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>-	u32 mask = (OA_BUFFER_SIZE - 1);
>+	u32 mask = (stream->oa_buffer.vma->size - 1);
> 	size_t start_offset = *offset;
> 	unsigned long flags;
> 	unsigned int aged_tail_idx;
>@@ -699,8 +695,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> 	 * only be incremented by multiples of the report size (notably also
> 	 * all a power of two).
> 	 */
>-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>-		      tail > OA_BUFFER_SIZE || tail % report_size,
>+	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
>+		      tail > stream->oa_buffer.vma->size || tail % report_size,
> 		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
> 		      head, tail))
> 		return -EIO;
>@@ -723,7 +719,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> 		 * here would imply a driver bug that would result
> 		 * in an overrun.
> 		 */
>-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>+		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
> 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
> 			break;
> 		}
>@@ -881,11 +877,6 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
> 	 * automatically triggered reports in this condition and so we
> 	 * have to assume that old reports are now being trampled
> 	 * over.
>-	 *
>-	 * Considering how we don't currently give userspace control
>-	 * over the OA buffer size and always configure a large 16MB
>-	 * buffer, then a buffer overflow does anyway likely indicate
>-	 * that something has gone quite badly wrong.
> 	 */
> 	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
> 		ret = append_oa_status(stream, buf, count, offset,
>@@ -947,7 +938,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> 	int report_size = stream->oa_buffer.format_size;
> 	u8 *oa_buf_base = stream->oa_buffer.vaddr;
> 	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>-	u32 mask = (OA_BUFFER_SIZE - 1);
>+	u32 mask = (stream->oa_buffer.vma->size - 1);
> 	size_t start_offset = *offset;
> 	unsigned long flags;
> 	unsigned int aged_tail_idx;
>@@ -984,8 +975,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> 	 * only be incremented by multiples of the report size (notably also
> 	 * all a power of two).
> 	 */
>-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>-		      tail > OA_BUFFER_SIZE || tail % report_size,
>+	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
>+		      tail > stream->oa_buffer.vma->size || tail % report_size,
> 		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
> 		      head, tail))
> 		return -EIO;
>@@ -1005,7 +996,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> 		 * here would imply a driver bug that would result
> 		 * in an overrun.
> 		 */
>-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>+		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
> 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
> 			break;
> 		}
>@@ -1408,7 +1399,9 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>
> 	I915_WRITE(GEN7_OABUFFER, gtt_offset);
>
>-	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
>+	I915_WRITE(GEN7_OASTATUS1, gtt_offset |
>+		   ((stream->oa_buffer.size_exponent - 17) <<
>+		    GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */
>
> 	/* Mark that we need updated tail pointers to read from... */
> 	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
>@@ -1433,7 +1426,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
> 	 * the assumption that new reports are being written to zeroed
> 	 * memory...
> 	 */
>-	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
>+	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>
> 	/* Maybe make ->pollin per-stream state if we support multiple
> 	 * concurrent streams in the future.
>@@ -1464,7 +1457,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
> 	 *  bit."
> 	 */
> 	I915_WRITE(GEN8_OABUFFER, gtt_offset |
>-		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
>+		   ((stream->oa_buffer.size_exponent - 17) <<
>+		    GEN8_OABUFFER_BUFFER_SIZE_SHIFT) |
>+		   GEN8_OABUFFER_MEM_SELECT_GGTT);
> 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>
> 	/* Mark that we need updated tail pointers to read from... */
>@@ -1492,7 +1487,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
> 	 * the assumption that new reports are being written to zeroed
> 	 * memory...
> 	 */
>-	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
>+	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>
> 	/*
> 	 * Maybe make ->pollin per-stream state if we support multiple
>@@ -1501,24 +1496,25 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
> 	stream->pollin = false;
> }
>
>-static int alloc_oa_buffer(struct i915_perf_stream *stream)
>+static int alloc_oa_buffer(struct i915_perf_stream *stream, int size_exponent)
> {
> 	struct drm_i915_gem_object *bo;
> 	struct drm_i915_private *dev_priv = stream->dev_priv;
> 	struct i915_vma *vma;
>+	size_t size = 1U << size_exponent;
> 	int ret;
>
> 	if (WARN_ON(stream->oa_buffer.vma))
> 		return -ENODEV;
>
>+	if (WARN_ON(size < SZ_128K || size > SZ_16M))
>+		return -EINVAL;
>+
> 	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> 	if (ret)
> 		return ret;
>
>-	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
>-	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
>-
>-	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
>+	bo = i915_gem_object_create_shmem(dev_priv, size);
> 	if (IS_ERR(bo)) {
> 		DRM_ERROR("Failed to allocate OA buffer\n");
> 		ret = PTR_ERR(bo);
>@@ -1534,6 +1530,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
> 		goto err_unref;
> 	}
> 	stream->oa_buffer.vma = vma;
>+	stream->oa_buffer.size_exponent = size_exponent;
>
> 	stream->oa_buffer.vaddr =
> 		i915_gem_object_pin_map(bo, I915_MAP_WB);
>@@ -1542,9 +1539,10 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
> 		goto err_unpin;
> 	}
>
>-	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
>+	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n",
> 			 i915_ggtt_offset(stream->oa_buffer.vma),
>-			 stream->oa_buffer.vaddr);
>+			 stream->oa_buffer.vaddr,
>+			 stream->oa_buffer.vma->size);
>
> 	goto unlock;
>
>@@ -2251,7 +2249,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> 	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
> 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
>
>-	ret = alloc_oa_buffer(stream);
>+	ret = alloc_oa_buffer(stream, props->oa_buffer_size_exponent);
> 	if (ret)
> 		goto err_oa_buf_alloc;
>
>@@ -2823,6 +2821,26 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> 			 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
> }
>
>+static int
>+select_oa_buffer_exponent(struct drm_i915_private *i915,
>+			  u64 requested_size)
>+{
>+	int order;
>+
>+	/*
>+	 * When no size is specified, use the largest size supported by all
>+	 * generations.
>+	 */
>+	if (!requested_size)
>+		return order_base_2(SZ_16M);
>+
>+	order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M));
>+	if (requested_size != (1UL << order))
>+		return -EINVAL;
>+
>+	return order;
>+}
>+
> /**
>  * read_properties_unlocked - validate + copy userspace stream open properties
>  * @dev_priv: i915 device instance
>@@ -2950,6 +2968,14 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> 			props->oa_periodic = true;
> 			props->oa_period_exponent = value;
> 			break;
>+		case DRM_I915_PERF_PROP_OA_BUFFER_SIZE:
>+			ret = select_oa_buffer_exponent(dev_priv, value);
>+			if (ret < 0) {
>+				DRM_DEBUG("OA buffer size invalid %llu\n", value);
>+				return ret;
>+			}
>+			props->oa_buffer_size_exponent = ret;
>+			break;
> 		case DRM_I915_PERF_PROP_MAX:
> 			MISSING_CASE(id);
> 			return -EINVAL;
>@@ -2958,6 +2984,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> 		uprop += 2;
> 	}
>
>+	/* If no buffer size was requested, select the default one. */
>+	if (!props->oa_buffer_size_exponent) {
>+		props->oa_buffer_size_exponent =
>+			select_oa_buffer_exponent(dev_priv, 0);
>+	}
>+
> 	return 0;
> }
>
>diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>index 34d83e3a51a7..d35f385f8288 100644
>--- a/drivers/gpu/drm/i915/i915_reg.h
>+++ b/drivers/gpu/drm/i915/i915_reg.h
>@@ -675,12 +675,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
> #define GEN8_OABUFFER_UDW _MMIO(0x23b4)
> #define GEN8_OABUFFER _MMIO(0x2b14)
> #define  GEN8_OABUFFER_MEM_SELECT_GGTT      (1 << 0)  /* 0: PPGTT, 1: GGTT */
>+#define  GEN8_OABUFFER_BUFFER_SIZE_SHIFT    3
>
> #define GEN7_OASTATUS1 _MMIO(0x2364)
> #define  GEN7_OASTATUS1_TAIL_MASK	    0xffffffc0
> #define  GEN7_OASTATUS1_COUNTER_OVERFLOW    (1 << 2)
> #define  GEN7_OASTATUS1_OABUFFER_OVERFLOW   (1 << 1)
> #define  GEN7_OASTATUS1_REPORT_LOST	    (1 << 0)
>+#define  GEN7_OASTATUS1_BUFFER_SIZE_SHIFT   3
>
> #define GEN7_OASTATUS2 _MMIO(0x2368)
> #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>index 469dc512cca3..f662c534de0a 100644
>--- a/include/uapi/drm/i915_drm.h
>+++ b/include/uapi/drm/i915_drm.h
>@@ -1873,6 +1873,13 @@ enum drm_i915_perf_property_id {
> 	 */
> 	DRM_I915_PERF_PROP_OA_EXPONENT,
>
>+	/**
>+	 * Specify a global OA buffer size to be allocated in bytes. The size
>+	 * specified must be supported by HW (currently supported sizes are
>+	 * powers of 2 ranging from 128Kb to 16Mb).
>+	 */
>+	DRM_I915_PERF_PROP_OA_BUFFER_SIZE,
>+
> 	DRM_I915_PERF_PROP_MAX /* non-ABI */
> };
>
>-- 
>2.21.0
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
I think this patch needs to be landed after 
https://patchwork.freedesktop.org/patch/319815/?series=60916&rev=10
As recommended by Joonas.

It should also bump the revision number of the perf API so that the 
application can detect this feature is available.

Thanks,

-Lionel

On 16/08/2019 10:04, Lucas De Marchi wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> The way our hardware is designed doesn't seem to let us use the
> MI_RECORD_PERF_COUNT command without setting up a circular buffer.
>
> In the case where the user didn't request OA reports to be available
> through the i915 perf stream, we can set the OA buffer to the minimum
> size to avoid consuming memory which won't be used by the driver.
>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
>   drivers/gpu/drm/i915/i915_reg.h  |  2 +
>   include/uapi/drm/i915_drm.h      |  7 +++
>   3 files changed, 74 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 2c9f46e12622..9386d9c82930 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -216,13 +216,7 @@
>   #include "oa/i915_oa_cnl.h"
>   #include "oa/i915_oa_icl.h"
>   
> -/* HW requires this to be a power of two, between 128k and 16M, though driver
> - * is currently generally designed assuming the largest 16M size is used such
> - * that the overflow cases are unlikely in normal operation.
> - */
> -#define OA_BUFFER_SIZE		SZ_16M
> -
> -#define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
> +#define OA_TAKEN(tail, head)	(((tail) - (head)) & (stream->oa_buffer.vma->size - 1))
>   
>   /**
>    * DOC: OA Tail Pointer Race
> @@ -347,6 +341,7 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>    * @oa_format: An OA unit HW report format
>    * @oa_periodic: Whether to enable periodic OA unit sampling
>    * @oa_period_exponent: The OA unit sampling period is derived from this
> + * @oa_buffer_size_exponent: The OA buffer size is derived from this
>    *
>    * As read_properties_unlocked() enumerates and validates the properties given
>    * to open a stream of metrics the configuration is built up in the structure
> @@ -363,6 +358,7 @@ struct perf_open_properties {
>   	int oa_format;
>   	bool oa_periodic;
>   	int oa_period_exponent;
> +	u32 oa_buffer_size_exponent;
>   };
>   
>   static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
> @@ -531,7 +527,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>   		 * could put the tail out of bounds...
>   		 */
>   		if (hw_tail >= gtt_offset &&
> -		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> +		    hw_tail < (gtt_offset + stream->oa_buffer.vma->size)) {
>   			stream->oa_buffer.tails[!aged_idx].offset =
>   				aging_tail = hw_tail;
>   			stream->oa_buffer.aging_timestamp = now;
> @@ -659,7 +655,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	int report_size = stream->oa_buffer.format_size;
>   	u8 *oa_buf_base = stream->oa_buffer.vaddr;
>   	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> -	u32 mask = (OA_BUFFER_SIZE - 1);
> +	u32 mask = (stream->oa_buffer.vma->size - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
>   	unsigned int aged_tail_idx;
> @@ -699,8 +695,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	 * only be incremented by multiples of the report size (notably also
>   	 * all a power of two).
>   	 */
> -	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
> -		      tail > OA_BUFFER_SIZE || tail % report_size,
> +	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
> +		      tail > stream->oa_buffer.vma->size || tail % report_size,
>   		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>   		      head, tail))
>   		return -EIO;
> @@ -723,7 +719,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   		 * here would imply a driver bug that would result
>   		 * in an overrun.
>   		 */
> -		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
> +		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
>   			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>   			break;
>   		}
> @@ -881,11 +877,6 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>   	 * automatically triggered reports in this condition and so we
>   	 * have to assume that old reports are now being trampled
>   	 * over.
> -	 *
> -	 * Considering how we don't currently give userspace control
> -	 * over the OA buffer size and always configure a large 16MB
> -	 * buffer, then a buffer overflow does anyway likely indicate
> -	 * that something has gone quite badly wrong.
>   	 */
>   	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
>   		ret = append_oa_status(stream, buf, count, offset,
> @@ -947,7 +938,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	int report_size = stream->oa_buffer.format_size;
>   	u8 *oa_buf_base = stream->oa_buffer.vaddr;
>   	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> -	u32 mask = (OA_BUFFER_SIZE - 1);
> +	u32 mask = (stream->oa_buffer.vma->size - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
>   	unsigned int aged_tail_idx;
> @@ -984,8 +975,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	 * only be incremented by multiples of the report size (notably also
>   	 * all a power of two).
>   	 */
> -	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
> -		      tail > OA_BUFFER_SIZE || tail % report_size,
> +	if (WARN_ONCE(head > stream->oa_buffer.vma->size || head % report_size ||
> +		      tail > stream->oa_buffer.vma->size || tail % report_size,
>   		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>   		      head, tail))
>   		return -EIO;
> @@ -1005,7 +996,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   		 * here would imply a driver bug that would result
>   		 * in an overrun.
>   		 */
> -		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
> +		if (WARN_ON((stream->oa_buffer.vma->size - head) < report_size)) {
>   			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>   			break;
>   		}
> @@ -1408,7 +1399,9 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>   
>   	I915_WRITE(GEN7_OABUFFER, gtt_offset);
>   
> -	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
> +	I915_WRITE(GEN7_OASTATUS1, gtt_offset |
> +		   ((stream->oa_buffer.size_exponent - 17) <<
> +		    GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */
>   
>   	/* Mark that we need updated tail pointers to read from... */
>   	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> @@ -1433,7 +1426,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>   	 * the assumption that new reports are being written to zeroed
>   	 * memory...
>   	 */
> -	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
> +	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>   
>   	/* Maybe make ->pollin per-stream state if we support multiple
>   	 * concurrent streams in the future.
> @@ -1464,7 +1457,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	 *  bit."
>   	 */
>   	I915_WRITE(GEN8_OABUFFER, gtt_offset |
> -		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
> +		   ((stream->oa_buffer.size_exponent - 17) <<
> +		    GEN8_OABUFFER_BUFFER_SIZE_SHIFT) |
> +		   GEN8_OABUFFER_MEM_SELECT_GGTT);
>   	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>   
>   	/* Mark that we need updated tail pointers to read from... */
> @@ -1492,7 +1487,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	 * the assumption that new reports are being written to zeroed
>   	 * memory...
>   	 */
> -	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
> +	memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.vma->size);
>   
>   	/*
>   	 * Maybe make ->pollin per-stream state if we support multiple
> @@ -1501,24 +1496,25 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	stream->pollin = false;
>   }
>   
> -static int alloc_oa_buffer(struct i915_perf_stream *stream)
> +static int alloc_oa_buffer(struct i915_perf_stream *stream, int size_exponent)
>   {
>   	struct drm_i915_gem_object *bo;
>   	struct drm_i915_private *dev_priv = stream->dev_priv;
>   	struct i915_vma *vma;
> +	size_t size = 1U << size_exponent;
>   	int ret;
>   
>   	if (WARN_ON(stream->oa_buffer.vma))
>   		return -ENODEV;
>   
> +	if (WARN_ON(size < SZ_128K || size > SZ_16M))
> +		return -EINVAL;
> +
>   	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
>   	if (ret)
>   		return ret;
>   
> -	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
> -	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
> -
> -	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
> +	bo = i915_gem_object_create_shmem(dev_priv, size);
>   	if (IS_ERR(bo)) {
>   		DRM_ERROR("Failed to allocate OA buffer\n");
>   		ret = PTR_ERR(bo);
> @@ -1534,6 +1530,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   		goto err_unref;
>   	}
>   	stream->oa_buffer.vma = vma;
> +	stream->oa_buffer.size_exponent = size_exponent;
>   
>   	stream->oa_buffer.vaddr =
>   		i915_gem_object_pin_map(bo, I915_MAP_WB);
> @@ -1542,9 +1539,10 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
>   		goto err_unpin;
>   	}
>   
> -	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
> +	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n",
>   			 i915_ggtt_offset(stream->oa_buffer.vma),
> -			 stream->oa_buffer.vaddr);
> +			 stream->oa_buffer.vaddr,
> +			 stream->oa_buffer.vma->size);
>   
>   	goto unlock;
>   
> @@ -2251,7 +2249,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>   	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
>   	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
>   
> -	ret = alloc_oa_buffer(stream);
> +	ret = alloc_oa_buffer(stream, props->oa_buffer_size_exponent);
>   	if (ret)
>   		goto err_oa_buf_alloc;
>   
> @@ -2823,6 +2821,26 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
>   			 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
>   }
>   
> +static int
> +select_oa_buffer_exponent(struct drm_i915_private *i915,
> +			  u64 requested_size)
> +{
> +	int order;
> +
> +	/*
> +	 * When no size is specified, use the largest size supported by all
> +	 * generations.
> +	 */
> +	if (!requested_size)
> +		return order_base_2(SZ_16M);
> +
> +	order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M));
> +	if (requested_size != (1UL << order))
> +		return -EINVAL;
> +
> +	return order;
> +}
> +
>   /**
>    * read_properties_unlocked - validate + copy userspace stream open properties
>    * @dev_priv: i915 device instance
> @@ -2950,6 +2968,14 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>   			props->oa_periodic = true;
>   			props->oa_period_exponent = value;
>   			break;
> +		case DRM_I915_PERF_PROP_OA_BUFFER_SIZE:
> +			ret = select_oa_buffer_exponent(dev_priv, value);
> +			if (ret < 0) {
> +				DRM_DEBUG("OA buffer size invalid %llu\n", value);
> +				return ret;
> +			}
> +			props->oa_buffer_size_exponent = ret;
> +			break;
>   		case DRM_I915_PERF_PROP_MAX:
>   			MISSING_CASE(id);
>   			return -EINVAL;
> @@ -2958,6 +2984,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>   		uprop += 2;
>   	}
>   
> +	/* If no buffer size was requested, select the default one. */
> +	if (!props->oa_buffer_size_exponent) {
> +		props->oa_buffer_size_exponent =
> +			select_oa_buffer_exponent(dev_priv, 0);
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 34d83e3a51a7..d35f385f8288 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -675,12 +675,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define GEN8_OABUFFER_UDW _MMIO(0x23b4)
>   #define GEN8_OABUFFER _MMIO(0x2b14)
>   #define  GEN8_OABUFFER_MEM_SELECT_GGTT      (1 << 0)  /* 0: PPGTT, 1: GGTT */
> +#define  GEN8_OABUFFER_BUFFER_SIZE_SHIFT    3
>   
>   #define GEN7_OASTATUS1 _MMIO(0x2364)
>   #define  GEN7_OASTATUS1_TAIL_MASK	    0xffffffc0
>   #define  GEN7_OASTATUS1_COUNTER_OVERFLOW    (1 << 2)
>   #define  GEN7_OASTATUS1_OABUFFER_OVERFLOW   (1 << 1)
>   #define  GEN7_OASTATUS1_REPORT_LOST	    (1 << 0)
> +#define  GEN7_OASTATUS1_BUFFER_SIZE_SHIFT   3
>   
>   #define GEN7_OASTATUS2 _MMIO(0x2368)
>   #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..f662c534de0a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1873,6 +1873,13 @@ enum drm_i915_perf_property_id {
>   	 */
>   	DRM_I915_PERF_PROP_OA_EXPONENT,
>   
> +	/**
> +	 * Specify a global OA buffer size to be allocated in bytes. The size
> +	 * specified must be supported by HW (currently supported sizes are
> +	 * powers of 2 ranging from 128Kb to 16Mb).
> +	 */
> +	DRM_I915_PERF_PROP_OA_BUFFER_SIZE,
> +
>   	DRM_I915_PERF_PROP_MAX /* non-ABI */
>   };
>
Quoting Lucas De Marchi (2019-08-16 11:04:57)
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> 
> The way our hardware is designed doesn't seem to let us use the
> MI_RECORD_PERF_COUNT command without setting up a circular buffer.
> 
> In the case where the user didn't request OA reports to be available
> through the i915 perf stream, we can set the OA buffer to the minimum
> size to avoid consuming memory which won't be used by the driver.
> 
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_reg.h  |  2 +
>  include/uapi/drm/i915_drm.h      |  7 +++

Any patch touching i915_drm.h should have a link to the corresponding
userspace series.

Regards, Joonas
On 22/08/2019 12:43, Joonas Lahtinen wrote:
> Quoting Lucas De Marchi (2019-08-16 11:04:57)
>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>
>> The way our hardware is designed doesn't seem to let us use the
>> MI_RECORD_PERF_COUNT command without setting up a circular buffer.
>>
>> In the case where the user didn't request OA reports to be available
>> through the i915 perf stream, we can set the OA buffer to the minimum
>> size to avoid consuming memory which won't be used by the driver.
>>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
>>   drivers/gpu/drm/i915/i915_reg.h  |  2 +
>>   include/uapi/drm/i915_drm.h      |  7 +++
> Any patch touching i915_drm.h should have a link to the corresponding
> userspace series.
>
> Regards, Joonas
>
Might be worth dropping this patch from the series then.

I don't think we have anything public or any coming userspace release 
making use of this.


Cheers,


-Lionel
On Fri, Aug 23, 2019 at 1:13 AM Lionel Landwerlin
<lionel.g.landwerlin@intel.com> wrote:
>
> On 22/08/2019 12:43, Joonas Lahtinen wrote:
> > Quoting Lucas De Marchi (2019-08-16 11:04:57)
> >> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>
> >> The way our hardware is designed doesn't seem to let us use the
> >> MI_RECORD_PERF_COUNT command without setting up a circular buffer.
> >>
> >> In the case where the user didn't request OA reports to be available
> >> through the i915 perf stream, we can set the OA buffer to the minimum
> >> size to avoid consuming memory which won't be used by the driver.
> >>
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> Cc: Matthew Auld <matthew.auld@intel.com>
> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_perf.c | 98 +++++++++++++++++++++-----------
> >>   drivers/gpu/drm/i915/i915_reg.h  |  2 +
> >>   include/uapi/drm/i915_drm.h      |  7 +++
> > Any patch touching i915_drm.h should have a link to the corresponding
> > userspace series.
> >
> > Regards, Joonas
> >
> Might be worth dropping this patch from the series then.
>
> I don't think we have anything public or any coming userspace release
> making use of this.

This is dropped from v3.

Lucas De Marchi

>
>
> Cheers,
>
>
> -Lionel
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx