[06/19] drm/i915: Handle log buffer flush interrupt event from GuC

Submitted by Akash Goel on Aug. 17, 2016, 10:14 a.m.

Details

Message ID 1471428859-10339-7-git-send-email-akash.goel@intel.com
State New
Headers show
Series "Support for sustained capturing of GuC firmware logs" ( rev: 8 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Akash Goel Aug. 17, 2016, 10:14 a.m.
From: Sagar Arun Kamble <sagar.a.kamble@intel.com>

GuC ukernel sends an interrupt to Host to flush the log buffer
and expects Host to correspondingly update the read pointer
information in the state structure, once it has consumed the
log buffer contents by copying them to a file or buffer.
Even if Host couldn't copy the contents, it can still update the
read pointer so that logging state is not disturbed on GuC side.

v2:
- Use a dedicated workqueue for handling flush interrupt. (Tvrtko)
- Reduce the overall log buffer copying time by skipping the copy of
  crash buffer area for regular cases and copying only the state
  structure data in first page.

v3:
 - Create a vmalloc mapping of log buffer. (Chris)
 - Cover the flush acknowledgment under rpm get & put.(Chris)
 - Revert the change of skipping the copy of crash dump area, as
   not really needed, will be covered by subsequent patch.

v4:
 - Destroy the wq under the same condition in which it was created,
   pass dev_piv pointer instead of dev to newly added GuC function,
   add more comments & rename variable for clarity. (Tvrtko)

v5:
- Allocate & destroy the dedicated wq, for handling flush interrupt,
  from the setup/teardown routines of GuC logging. (Chris)
- Validate the log buffer size value retrieved from state structure
  and do some minor cleanup. (Tvrtko)
- Fix error/warnings reported by checkpatch. (Tvrtko)
- Rebase.

v6:
 - Remove the interrupts_enabled check from guc_capture_logs_work, need
   to process that last work item also, queued just before disabling the
   interrupt as log buffer flush interrupt handling is a bit different
   case where GuC is actually expecting an ACK from host, which should be
   provided to keep the logging going.
   Sync against the work will be done by caller disabling the interrupt.
 - Don't sample the log buffer size value from state structure, directly
   use the expected value to move the pointer & do the copy and that cannot
   go wrong (out of bounds) as Driver only allocated the log buffer and the
   relay buffers. Driver should refrain from interpreting the log packet,
   as much possible and let Userspace parser detect the anomaly. (Chris)

Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 186 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c            |  28 ++++-
 drivers/gpu/drm/i915/intel_guc.h           |   4 +
 3 files changed, 217 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index b062da6..ade51cb 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -172,6 +172,15 @@  static int host2guc_sample_forcewake(struct intel_guc *guc,
 	return host2guc_action(guc, data, ARRAY_SIZE(data));
 }
 
+static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
+{
+	u32 data[1];
+
+	data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE;
+
+	return host2guc_action(guc, data, 1);
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -828,6 +837,163 @@  err:
 	return NULL;
 }
 
+static void guc_move_to_next_buf(struct intel_guc *guc)
+{
+}
+
+static void *guc_get_write_buffer(struct intel_guc *guc)
+{
+	return NULL;
+}
+
+static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
+{
+	if (type == GUC_ISR_LOG_BUFFER)
+		return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE;
+	else if (type == GUC_DPC_LOG_BUFFER)
+		return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE;
+	else
+		return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE;
+}
+
+static void guc_read_update_log_buffer(struct intel_guc *guc)
+{
+	struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state;
+	struct guc_log_buffer_state log_buffer_state_local;
+	void *src_data_ptr, *dst_data_ptr;
+	unsigned int buffer_size;
+	enum guc_log_buffer_type type;
+
+	if (WARN_ON(!guc->log.buf_addr))
+		return;
+
+	/* Get the pointer to shared GuC log buffer */
+	log_buffer_state = src_data_ptr = guc->log.buf_addr;
+
+	/* Get the pointer to local buffer to store the logs */
+	dst_data_ptr = log_buffer_snapshot_state = guc_get_write_buffer(guc);
+
+	/* Actual logs are present from the 2nd page */
+	src_data_ptr += PAGE_SIZE;
+	dst_data_ptr += PAGE_SIZE;
+
+	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+		/* Make a copy of the state structure in GuC log buffer (which
+		 * is uncached mapped) on the stack to avoid reading from it
+		 * multiple times.
+		 */
+		memcpy(&log_buffer_state_local, log_buffer_state,
+		       sizeof(struct guc_log_buffer_state));
+		buffer_size = guc_get_log_buffer_size(type);
+
+		if (log_buffer_snapshot_state) {
+			/* First copy the state structure in snapshot buffer */
+			memcpy(log_buffer_snapshot_state, &log_buffer_state_local,
+			       sizeof(struct guc_log_buffer_state));
+
+			/* The write pointer could have been updated by the GuC
+			 * firmware, after sending the flush interrupt to Host,
+			 * for consistency set the write pointer value to same
+			 * value of sampled_write_ptr in the snapshot buffer.
+			 */
+			log_buffer_snapshot_state->write_ptr =
+				log_buffer_snapshot_state->sampled_write_ptr;
+
+			log_buffer_snapshot_state++;
+
+			/* Now copy the actual logs. */
+			memcpy(dst_data_ptr, src_data_ptr, buffer_size);
+
+			src_data_ptr += buffer_size;
+			dst_data_ptr += buffer_size;
+		}
+
+		/* FIXME: invalidate/flush for log buffer needed */
+
+		/* Update the read pointer in the shared log buffer */
+		log_buffer_state->read_ptr =
+			log_buffer_state_local.sampled_write_ptr;
+
+		/* Clear the 'flush to file' flag */
+		log_buffer_state->flush_to_file = 0;
+		log_buffer_state++;
+	}
+
+	if (log_buffer_snapshot_state)
+		guc_move_to_next_buf(guc);
+}
+
+static void guc_capture_logs_work(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private, guc.log.flush_work);
+
+	i915_guc_capture_logs(dev_priv);
+}
+
+static void guc_log_cleanup(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (i915.guc_log_level < 0)
+		return;
+
+	/* First disable the flush interrupt */
+	gen9_disable_guc_interrupts(dev_priv);
+
+	if (guc->log.flush_wq)
+		destroy_workqueue(guc->log.flush_wq);
+
+	guc->log.flush_wq = NULL;
+
+	if (guc->log.buf_addr)
+		i915_gem_object_unpin_map(guc->log.vma->obj);
+
+	guc->log.buf_addr = NULL;
+}
+
+static int guc_create_log_extras(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	void *vaddr;
+	int ret;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	/* Nothing to do */
+	if (i915.guc_log_level < 0)
+		return 0;
+
+	if (!guc->log.buf_addr) {
+		/* Create a vmalloc mapping of log buffer pages */
+		vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			ret = PTR_ERR(vaddr);
+			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
+			return ret;
+		}
+
+		guc->log.buf_addr = vaddr;
+	}
+
+	if (!guc->log.flush_wq) {
+		INIT_WORK(&guc->log.flush_work, guc_capture_logs_work);
+
+		/* Need a dedicated wq to process log buffer flush interrupts
+		 * from GuC without much delay so as to avoid any loss of logs.
+		 */
+		guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", 0);
+		if (guc->log.flush_wq == NULL) {
+			DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 static void guc_create_log(struct intel_guc *guc)
 {
 	struct i915_vma *vma;
@@ -853,6 +1019,13 @@  static void guc_create_log(struct intel_guc *guc)
 		}
 
 		guc->log.vma = vma;
+
+		if (guc_create_log_extras(guc)) {
+			guc_log_cleanup(guc);
+			i915_vma_unpin_and_release(&guc->log.vma);
+			i915.guc_log_level = -1;
+			return;
+		}
 	}
 
 	/* each allocated unit is a page */
@@ -1034,6 +1207,7 @@  void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
 	struct intel_guc *guc = &dev_priv->guc;
 
 	i915_vma_unpin_and_release(&guc->ads_vma);
+	guc_log_cleanup(guc);
 	i915_vma_unpin_and_release(&guc->log.vma);
 
 	if (guc->ctx_pool_vma)
@@ -1095,3 +1269,15 @@  int intel_guc_resume(struct drm_device *dev)
 
 	return host2guc_action(guc, data, ARRAY_SIZE(data));
 }
+
+void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
+{
+	guc_read_update_log_buffer(&dev_priv->guc);
+
+	/* Generally device is expected to be active only at this
+	 * time, so get/put should be really quick.
+	 */
+	intel_runtime_pm_get(dev_priv);
+	host2guc_logbuffer_flush_complete(&dev_priv->guc);
+	intel_runtime_pm_put(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fc1fe72..19c0078 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1662,7 +1662,33 @@  static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 {
 	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) {
-		/* TODO: Handle events for which GuC interrupted host */
+		/* Sample the log buffer flush related bits & clear them out now
+		 * itself from the message identity register to minimize the
+		 * probability of losing a flush interrupt, when there are back
+		 * to back flush interrupts.
+		 * There can be a new flush interrupt, for different log buffer
+		 * type (like for ISR), whilst Host is handling one (for DPC).
+		 * Since same bit is used in message register for ISR & DPC, it
+		 * could happen that GuC sets the bit for 2nd interrupt but Host
+		 * clears out the bit on handling the 1st interrupt.
+		 */
+		u32 msg, flush;
+
+		msg = I915_READ(SOFT_SCRATCH(15));
+		flush = msg & (GUC2HOST_MSG_CRASH_DUMP_POSTED |
+			       GUC2HOST_MSG_FLUSH_LOG_BUFFER);
+		if (flush) {
+			/* Clear the message bits that are handled */
+			I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
+
+			/* Handle flush interrupt in bottom half */
+			queue_work(dev_priv->guc.log.flush_wq,
+				   &dev_priv->guc.log.flush_work);
+		} else {
+			/* Not clearing of unhandled event bits won't result in
+			 * re-triggering of the interrupt.
+			 */
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 1fc63fe..d053a18 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -124,6 +124,9 @@  struct intel_guc_fw {
 struct intel_guc_log {
 	uint32_t flags;
 	struct i915_vma *vma;
+	void *buf_addr;
+	struct workqueue_struct *flush_wq;
+	struct work_struct flush_work;
 };
 
 struct intel_guc {
@@ -167,5 +170,6 @@  int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
 int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
+void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
 
 #endif

Comments

On 17/08/16 11:14, akash.goel@intel.com wrote:
> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>
> GuC ukernel sends an interrupt to Host to flush the log buffer
> and expects Host to correspondingly update the read pointer
> information in the state structure, once it has consumed the
> log buffer contents by copying them to a file or buffer.
> Even if Host couldn't copy the contents, it can still update the
> read pointer so that logging state is not disturbed on GuC side.
>
> v2:
> - Use a dedicated workqueue for handling flush interrupt. (Tvrtko)
> - Reduce the overall log buffer copying time by skipping the copy of
>    crash buffer area for regular cases and copying only the state
>    structure data in first page.
>
> v3:
>   - Create a vmalloc mapping of log buffer. (Chris)
>   - Cover the flush acknowledgment under rpm get & put.(Chris)
>   - Revert the change of skipping the copy of crash dump area, as
>     not really needed, will be covered by subsequent patch.
>
> v4:
>   - Destroy the wq under the same condition in which it was created,
>     pass dev_piv pointer instead of dev to newly added GuC function,
>     add more comments & rename variable for clarity. (Tvrtko)
>
> v5:
> - Allocate & destroy the dedicated wq, for handling flush interrupt,
>    from the setup/teardown routines of GuC logging. (Chris)
> - Validate the log buffer size value retrieved from state structure
>    and do some minor cleanup. (Tvrtko)
> - Fix error/warnings reported by checkpatch. (Tvrtko)
> - Rebase.
>
> v6:
>   - Remove the interrupts_enabled check from guc_capture_logs_work, need
>     to process that last work item also, queued just before disabling the
>     interrupt as log buffer flush interrupt handling is a bit different
>     case where GuC is actually expecting an ACK from host, which should be
>     provided to keep the logging going.
>     Sync against the work will be done by caller disabling the interrupt.
>   - Don't sample the log buffer size value from state structure, directly
>     use the expected value to move the pointer & do the copy and that cannot
>     go wrong (out of bounds) as Driver only allocated the log buffer and the
>     relay buffers. Driver should refrain from interpreting the log packet,
>     as much possible and let Userspace parser detect the anomaly. (Chris)
>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 186 +++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_irq.c            |  28 ++++-
>   drivers/gpu/drm/i915/intel_guc.h           |   4 +
>   3 files changed, 217 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index b062da6..ade51cb 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -172,6 +172,15 @@ static int host2guc_sample_forcewake(struct intel_guc *guc,
>   	return host2guc_action(guc, data, ARRAY_SIZE(data));
>   }
>
> +static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
> +{
> +	u32 data[1];
> +
> +	data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE;
> +
> +	return host2guc_action(guc, data, 1);
> +}
> +
>   /*
>    * Initialise, update, or clear doorbell data shared with the GuC
>    *
> @@ -828,6 +837,163 @@ err:
>   	return NULL;
>   }
>
> +static void guc_move_to_next_buf(struct intel_guc *guc)
> +{
> +}
> +
> +static void *guc_get_write_buffer(struct intel_guc *guc)
> +{
> +	return NULL;
> +}
> +
> +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
> +{
> +	if (type == GUC_ISR_LOG_BUFFER)
> +		return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE;
> +	else if (type == GUC_DPC_LOG_BUFFER)
> +		return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE;
> +	else
> +		return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE;
> +}

Could do it with a switch statement to get automatic reminder of size 
not being handled if some day a new log buffer type gets added. It would 
probably more in the style of the rest of the driver as well.

> +
> +static void guc_read_update_log_buffer(struct intel_guc *guc)
> +{
> +	struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state;
> +	struct guc_log_buffer_state log_buffer_state_local;
> +	void *src_data_ptr, *dst_data_ptr;
> +	unsigned int buffer_size;
> +	enum guc_log_buffer_type type;
> +
> +	if (WARN_ON(!guc->log.buf_addr))
> +		return;
> +
> +	/* Get the pointer to shared GuC log buffer */
> +	log_buffer_state = src_data_ptr = guc->log.buf_addr;
> +
> +	/* Get the pointer to local buffer to store the logs */
> +	dst_data_ptr = log_buffer_snapshot_state = guc_get_write_buffer(guc);
> +
> +	/* Actual logs are present from the 2nd page */
> +	src_data_ptr += PAGE_SIZE;
> +	dst_data_ptr += PAGE_SIZE;
> +
> +	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
> +		/* Make a copy of the state structure in GuC log buffer (which
> +		 * is uncached mapped) on the stack to avoid reading from it
> +		 * multiple times.
> +		 */
> +		memcpy(&log_buffer_state_local, log_buffer_state,
> +		       sizeof(struct guc_log_buffer_state));
> +		buffer_size = guc_get_log_buffer_size(type);
> +
> +		if (log_buffer_snapshot_state) {
> +			/* First copy the state structure in snapshot buffer */
> +			memcpy(log_buffer_snapshot_state, &log_buffer_state_local,
> +			       sizeof(struct guc_log_buffer_state));

I've noticed now log_buffer_state_local is used only for the 
sample_write_ptr below and otherwise it causes two copies of the same data.

Since this branch is an interesting one, you could avoid one copy by 
updating the log_buffer_state->read_ptr from the 
log_buffer_snapshot_state in this branch, and add an else branch to 
update it directly from log_buffer_state when log_buffer_snapshot_state 
is not available.

Sounds like it would be worth eliminating double memcpy, what do you think>

> +
> +			/* The write pointer could have been updated by the GuC
> +			 * firmware, after sending the flush interrupt to Host,
> +			 * for consistency set the write pointer value to same
> +			 * value of sampled_write_ptr in the snapshot buffer.
> +			 */
> +			log_buffer_snapshot_state->write_ptr =
> +				log_buffer_snapshot_state->sampled_write_ptr;
> +
> +			log_buffer_snapshot_state++;
> +
> +			/* Now copy the actual logs. */
> +			memcpy(dst_data_ptr, src_data_ptr, buffer_size);
> +
> +			src_data_ptr += buffer_size;
> +			dst_data_ptr += buffer_size;
> +		}
> +
> +		/* FIXME: invalidate/flush for log buffer needed */
> +
> +		/* Update the read pointer in the shared log buffer */
> +		log_buffer_state->read_ptr =
> +			log_buffer_state_local.sampled_write_ptr;
> +
> +		/* Clear the 'flush to file' flag */
> +		log_buffer_state->flush_to_file = 0;
> +		log_buffer_state++;
> +	}
> +
> +	if (log_buffer_snapshot_state)
> +		guc_move_to_next_buf(guc);
> +}
> +
> +static void guc_capture_logs_work(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, struct drm_i915_private, guc.log.flush_work);
> +
> +	i915_guc_capture_logs(dev_priv);
> +}
> +
> +static void guc_log_cleanup(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
> +	if (i915.guc_log_level < 0)
> +		return;
> +
> +	/* First disable the flush interrupt */
> +	gen9_disable_guc_interrupts(dev_priv);
> +
> +	if (guc->log.flush_wq)
> +		destroy_workqueue(guc->log.flush_wq);
> +
> +	guc->log.flush_wq = NULL;
> +
> +	if (guc->log.buf_addr)
> +		i915_gem_object_unpin_map(guc->log.vma->obj);
> +
> +	guc->log.buf_addr = NULL;
> +}
> +
> +static int guc_create_log_extras(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	void *vaddr;
> +	int ret;
> +
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
> +	/* Nothing to do */
> +	if (i915.guc_log_level < 0)
> +		return 0;
> +
> +	if (!guc->log.buf_addr) {
> +		/* Create a vmalloc mapping of log buffer pages */
> +		vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WB);
> +		if (IS_ERR(vaddr)) {
> +			ret = PTR_ERR(vaddr);
> +			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
> +			return ret;
> +		}
> +
> +		guc->log.buf_addr = vaddr;
> +	}
> +
> +	if (!guc->log.flush_wq) {
> +		INIT_WORK(&guc->log.flush_work, guc_capture_logs_work);
> +
> +		/* Need a dedicated wq to process log buffer flush interrupts
> +		 * from GuC without much delay so as to avoid any loss of logs.
> +		 */
> +		guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", 0);
> +		if (guc->log.flush_wq == NULL) {
> +			DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static void guc_create_log(struct intel_guc *guc)
>   {
>   	struct i915_vma *vma;
> @@ -853,6 +1019,13 @@ static void guc_create_log(struct intel_guc *guc)
>   		}
>
>   		guc->log.vma = vma;
> +
> +		if (guc_create_log_extras(guc)) {
> +			guc_log_cleanup(guc);
> +			i915_vma_unpin_and_release(&guc->log.vma);
> +			i915.guc_log_level = -1;
> +			return;
> +		}
>   	}
>
>   	/* each allocated unit is a page */
> @@ -1034,6 +1207,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>   	struct intel_guc *guc = &dev_priv->guc;
>
>   	i915_vma_unpin_and_release(&guc->ads_vma);
> +	guc_log_cleanup(guc);
>   	i915_vma_unpin_and_release(&guc->log.vma);
>
>   	if (guc->ctx_pool_vma)
> @@ -1095,3 +1269,15 @@ int intel_guc_resume(struct drm_device *dev)
>
>   	return host2guc_action(guc, data, ARRAY_SIZE(data));
>   }
> +
> +void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
> +{
> +	guc_read_update_log_buffer(&dev_priv->guc);
> +
> +	/* Generally device is expected to be active only at this
> +	 * time, so get/put should be really quick.
> +	 */
> +	intel_runtime_pm_get(dev_priv);
> +	host2guc_logbuffer_flush_complete(&dev_priv->guc);
> +	intel_runtime_pm_put(dev_priv);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index fc1fe72..19c0078 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1662,7 +1662,33 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>   static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
>   {
>   	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) {
> -		/* TODO: Handle events for which GuC interrupted host */
> +		/* Sample the log buffer flush related bits & clear them out now
> +		 * itself from the message identity register to minimize the
> +		 * probability of losing a flush interrupt, when there are back
> +		 * to back flush interrupts.
> +		 * There can be a new flush interrupt, for different log buffer
> +		 * type (like for ISR), whilst Host is handling one (for DPC).
> +		 * Since same bit is used in message register for ISR & DPC, it
> +		 * could happen that GuC sets the bit for 2nd interrupt but Host
> +		 * clears out the bit on handling the 1st interrupt.
> +		 */
> +		u32 msg, flush;
> +
> +		msg = I915_READ(SOFT_SCRATCH(15));
> +		flush = msg & (GUC2HOST_MSG_CRASH_DUMP_POSTED |
> +			       GUC2HOST_MSG_FLUSH_LOG_BUFFER);
> +		if (flush) {
> +			/* Clear the message bits that are handled */
> +			I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
> +
> +			/* Handle flush interrupt in bottom half */
> +			queue_work(dev_priv->guc.log.flush_wq,
> +				   &dev_priv->guc.log.flush_work);
> +		} else {
> +			/* Not clearing of unhandled event bits won't result in
> +			 * re-triggering of the interrupt.
> +			 */
> +		}
>   	}
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 1fc63fe..d053a18 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -124,6 +124,9 @@ struct intel_guc_fw {
>   struct intel_guc_log {
>   	uint32_t flags;
>   	struct i915_vma *vma;
> +	void *buf_addr;
> +	struct workqueue_struct *flush_wq;
> +	struct work_struct flush_work;
>   };
>
>   struct intel_guc {
> @@ -167,5 +170,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
> +void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
>
>   #endif
>

Otherwise looks OK to me.

Regards,

Tvrtko
On 8/17/2016 4:37 PM, Tvrtko Ursulin wrote:
>
> On 17/08/16 11:14, akash.goel@intel.com wrote:
>> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>
>> GuC ukernel sends an interrupt to Host to flush the log buffer
>> and expects Host to correspondingly update the read pointer
>> information in the state structure, once it has consumed the
>> log buffer contents by copying them to a file or buffer.
>> Even if Host couldn't copy the contents, it can still update the
>> read pointer so that logging state is not disturbed on GuC side.
>>
>> v2:
>> - Use a dedicated workqueue for handling flush interrupt. (Tvrtko)
>> - Reduce the overall log buffer copying time by skipping the copy of
>>    crash buffer area for regular cases and copying only the state
>>    structure data in first page.
>>
>> v3:
>>   - Create a vmalloc mapping of log buffer. (Chris)
>>   - Cover the flush acknowledgment under rpm get & put.(Chris)
>>   - Revert the change of skipping the copy of crash dump area, as
>>     not really needed, will be covered by subsequent patch.
>>
>> v4:
>>   - Destroy the wq under the same condition in which it was created,
>>     pass dev_piv pointer instead of dev to newly added GuC function,
>>     add more comments & rename variable for clarity. (Tvrtko)
>>
>> v5:
>> - Allocate & destroy the dedicated wq, for handling flush interrupt,
>>    from the setup/teardown routines of GuC logging. (Chris)
>> - Validate the log buffer size value retrieved from state structure
>>    and do some minor cleanup. (Tvrtko)
>> - Fix error/warnings reported by checkpatch. (Tvrtko)
>> - Rebase.
>>
>> v6:
>>   - Remove the interrupts_enabled check from guc_capture_logs_work, need
>>     to process that last work item also, queued just before disabling the
>>     interrupt as log buffer flush interrupt handling is a bit different
>>     case where GuC is actually expecting an ACK from host, which
>> should be
>>     provided to keep the logging going.
>>     Sync against the work will be done by caller disabling the interrupt.
>>   - Don't sample the log buffer size value from state structure, directly
>>     use the expected value to move the pointer & do the copy and that
>> cannot
>>     go wrong (out of bounds) as Driver only allocated the log buffer
>> and the
>>     relay buffers. Driver should refrain from interpreting the log
>> packet,
>>     as much possible and let Userspace parser detect the anomaly. (Chris)
>>
>> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 186
>> +++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_irq.c            |  28 ++++-
>>   drivers/gpu/drm/i915/intel_guc.h           |   4 +
>>   3 files changed, 217 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index b062da6..ade51cb 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -172,6 +172,15 @@ static int host2guc_sample_forcewake(struct
>> intel_guc *guc,
>>       return host2guc_action(guc, data, ARRAY_SIZE(data));
>>   }
>>
>> +static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
>> +{
>> +    u32 data[1];
>> +
>> +    data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE;
>> +
>> +    return host2guc_action(guc, data, 1);
>> +}
>> +
>>   /*
>>    * Initialise, update, or clear doorbell data shared with the GuC
>>    *
>> @@ -828,6 +837,163 @@ err:
>>       return NULL;
>>   }
>>
>> +static void guc_move_to_next_buf(struct intel_guc *guc)
>> +{
>> +}
>> +
>> +static void *guc_get_write_buffer(struct intel_guc *guc)
>> +{
>> +    return NULL;
>> +}
>> +
>> +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type
>> type)
>> +{
>> +    if (type == GUC_ISR_LOG_BUFFER)
>> +        return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE;
>> +    else if (type == GUC_DPC_LOG_BUFFER)
>> +        return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE;
>> +    else
>> +        return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE;
>> +}
>
> Could do it with a switch statement to get automatic reminder of size
> not being handled if some day a new log buffer type gets added. It would
> probably more in the style of the rest of the driver as well.
>
Fine will use switch statement here.

Should I use BUG_ON for the default/unhandled case ?
	case GUC_ISR_LOG_BUFFER
	
	case GUC_DPC_LOG_BUFFER

	case GUC_LOG_CRASH_PAGES

	default
		BUG_ON(1)

>> +
>> +static void guc_read_update_log_buffer(struct intel_guc *guc)
>> +{
>> +    struct guc_log_buffer_state *log_buffer_state,
>> *log_buffer_snapshot_state;
>> +    struct guc_log_buffer_state log_buffer_state_local;
>> +    void *src_data_ptr, *dst_data_ptr;
>> +    unsigned int buffer_size;
>> +    enum guc_log_buffer_type type;
>> +
>> +    if (WARN_ON(!guc->log.buf_addr))
>> +        return;
>> +
>> +    /* Get the pointer to shared GuC log buffer */
>> +    log_buffer_state = src_data_ptr = guc->log.buf_addr;
>> +
>> +    /* Get the pointer to local buffer to store the logs */
>> +    dst_data_ptr = log_buffer_snapshot_state =
>> guc_get_write_buffer(guc);
>> +
>> +    /* Actual logs are present from the 2nd page */
>> +    src_data_ptr += PAGE_SIZE;
>> +    dst_data_ptr += PAGE_SIZE;
>> +
>> +    for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
>> +        /* Make a copy of the state structure in GuC log buffer (which
>> +         * is uncached mapped) on the stack to avoid reading from it
>> +         * multiple times.
>> +         */
>> +        memcpy(&log_buffer_state_local, log_buffer_state,
>> +               sizeof(struct guc_log_buffer_state));
>> +        buffer_size = guc_get_log_buffer_size(type);
>> +
>> +        if (log_buffer_snapshot_state) {
>> +            /* First copy the state structure in snapshot buffer */
>> +            memcpy(log_buffer_snapshot_state, &log_buffer_state_local,
>> +                   sizeof(struct guc_log_buffer_state));
>
> I've noticed now log_buffer_state_local is used only for the
> sample_write_ptr below and otherwise it causes two copies of the same data.
>
Actually log_buffer_state_local gets used more in the subsequent 
patches, where we do the bookkeeping and optimize the copying.

[PATCH 10/19] drm/i915: Add stats for GuC log buffer flush interrupts
[PATCH 11/19] drm/i915: Optimization to reduce the sampling time of GuC 
log buffer

> Since this branch is an interesting one, you could avoid one copy by
> updating the log_buffer_state->read_ptr from the
> log_buffer_snapshot_state in this branch, and add an else branch to
> update it directly from log_buffer_state when log_buffer_snapshot_state
> is not available.
>
> Sounds like it would be worth eliminating double memcpy, what do you think>
>
Won't the 2nd memcpy (from the copy on stack to the relay buffer) be 
really fast ?
The copy on stack (16 bytes) will most likely be in the CPU cache and 
the same area is used for all 3 buffer types.

Best regards
Akash

>> +
>> +            /* The write pointer could have been updated by the GuC
>> +             * firmware, after sending the flush interrupt to Host,
>> +             * for consistency set the write pointer value to same
>> +             * value of sampled_write_ptr in the snapshot buffer.
>> +             */
>> +            log_buffer_snapshot_state->write_ptr =
>> +                log_buffer_snapshot_state->sampled_write_ptr;
>> +
>> +            log_buffer_snapshot_state++;
>> +
>> +            /* Now copy the actual logs. */
>> +            memcpy(dst_data_ptr, src_data_ptr, buffer_size);
>> +
>> +            src_data_ptr += buffer_size;
>> +            dst_data_ptr += buffer_size;
>> +        }
>> +
>> +        /* FIXME: invalidate/flush for log buffer needed */
>> +
>> +        /* Update the read pointer in the shared log buffer */
>> +        log_buffer_state->read_ptr =
>> +            log_buffer_state_local.sampled_write_ptr;
>> +
>> +        /* Clear the 'flush to file' flag */
>> +        log_buffer_state->flush_to_file = 0;
>> +        log_buffer_state++;
>> +    }
>> +
>> +    if (log_buffer_snapshot_state)
>> +        guc_move_to_next_buf(guc);
>> +}
>> +
>> +static void guc_capture_logs_work(struct work_struct *work)
>> +{
>> +    struct drm_i915_private *dev_priv =
>> +        container_of(work, struct drm_i915_private, guc.log.flush_work);
>> +
>> +    i915_guc_capture_logs(dev_priv);
>> +}
>> +
>> +static void guc_log_cleanup(struct intel_guc *guc)
>> +{
>> +    struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +
>> +    lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +    if (i915.guc_log_level < 0)
>> +        return;
>> +
>> +    /* First disable the flush interrupt */
>> +    gen9_disable_guc_interrupts(dev_priv);
>> +
>> +    if (guc->log.flush_wq)
>> +        destroy_workqueue(guc->log.flush_wq);
>> +
>> +    guc->log.flush_wq = NULL;
>> +
>> +    if (guc->log.buf_addr)
>> +        i915_gem_object_unpin_map(guc->log.vma->obj);
>> +
>> +    guc->log.buf_addr = NULL;
>> +}
>> +
>> +static int guc_create_log_extras(struct intel_guc *guc)
>> +{
>> +    struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +    void *vaddr;
>> +    int ret;
>> +
>> +    lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +    /* Nothing to do */
>> +    if (i915.guc_log_level < 0)
>> +        return 0;
>> +
>> +    if (!guc->log.buf_addr) {
>> +        /* Create a vmalloc mapping of log buffer pages */
>> +        vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WB);
>> +        if (IS_ERR(vaddr)) {
>> +            ret = PTR_ERR(vaddr);
>> +            DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
>> +            return ret;
>> +        }
>> +
>> +        guc->log.buf_addr = vaddr;
>> +    }
>> +
>> +    if (!guc->log.flush_wq) {
>> +        INIT_WORK(&guc->log.flush_work, guc_capture_logs_work);
>> +
>> +        /* Need a dedicated wq to process log buffer flush interrupts
>> +         * from GuC without much delay so as to avoid any loss of logs.
>> +         */
>> +        guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", 0);
>> +        if (guc->log.flush_wq == NULL) {
>> +            DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
>> +            return -ENOMEM;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static void guc_create_log(struct intel_guc *guc)
>>   {
>>       struct i915_vma *vma;
>> @@ -853,6 +1019,13 @@ static void guc_create_log(struct intel_guc *guc)
>>           }
>>
>>           guc->log.vma = vma;
>> +
>> +        if (guc_create_log_extras(guc)) {
>> +            guc_log_cleanup(guc);
>> +            i915_vma_unpin_and_release(&guc->log.vma);
>> +            i915.guc_log_level = -1;
>> +            return;
>> +        }
>>       }
>>
>>       /* each allocated unit is a page */
>> @@ -1034,6 +1207,7 @@ void i915_guc_submission_fini(struct
>> drm_i915_private *dev_priv)
>>       struct intel_guc *guc = &dev_priv->guc;
>>
>>       i915_vma_unpin_and_release(&guc->ads_vma);
>> +    guc_log_cleanup(guc);
>>       i915_vma_unpin_and_release(&guc->log.vma);
>>
>>       if (guc->ctx_pool_vma)
>> @@ -1095,3 +1269,15 @@ int intel_guc_resume(struct drm_device *dev)
>>
>>       return host2guc_action(guc, data, ARRAY_SIZE(data));
>>   }
>> +
>> +void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
>> +{
>> +    guc_read_update_log_buffer(&dev_priv->guc);
>> +
>> +    /* Generally device is expected to be active only at this
>> +     * time, so get/put should be really quick.
>> +     */
>> +    intel_runtime_pm_get(dev_priv);
>> +    host2guc_logbuffer_flush_complete(&dev_priv->guc);
>> +    intel_runtime_pm_put(dev_priv);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c
>> b/drivers/gpu/drm/i915/i915_irq.c
>> index fc1fe72..19c0078 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -1662,7 +1662,33 @@ static void gen6_rps_irq_handler(struct
>> drm_i915_private *dev_priv, u32 pm_iir)
>>   static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv,
>> u32 gt_iir)
>>   {
>>       if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) {
>> -        /* TODO: Handle events for which GuC interrupted host */
>> +        /* Sample the log buffer flush related bits & clear them out now
>> +         * itself from the message identity register to minimize the
>> +         * probability of losing a flush interrupt, when there are back
>> +         * to back flush interrupts.
>> +         * There can be a new flush interrupt, for different log buffer
>> +         * type (like for ISR), whilst Host is handling one (for DPC).
>> +         * Since same bit is used in message register for ISR & DPC, it
>> +         * could happen that GuC sets the bit for 2nd interrupt but Host
>> +         * clears out the bit on handling the 1st interrupt.
>> +         */
>> +        u32 msg, flush;
>> +
>> +        msg = I915_READ(SOFT_SCRATCH(15));
>> +        flush = msg & (GUC2HOST_MSG_CRASH_DUMP_POSTED |
>> +                   GUC2HOST_MSG_FLUSH_LOG_BUFFER);
>> +        if (flush) {
>> +            /* Clear the message bits that are handled */
>> +            I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
>> +
>> +            /* Handle flush interrupt in bottom half */
>> +            queue_work(dev_priv->guc.log.flush_wq,
>> +                   &dev_priv->guc.log.flush_work);
>> +        } else {
>> +            /* Not clearing of unhandled event bits won't result in
>> +             * re-triggering of the interrupt.
>> +             */
>> +        }
>>       }
>>   }
>>
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 1fc63fe..d053a18 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -124,6 +124,9 @@ struct intel_guc_fw {
>>   struct intel_guc_log {
>>       uint32_t flags;
>>       struct i915_vma *vma;
>> +    void *buf_addr;
>> +    struct workqueue_struct *flush_wq;
>> +    struct work_struct flush_work;
>>   };
>>
>>   struct intel_guc {
>> @@ -167,5 +170,6 @@ int i915_guc_submission_enable(struct
>> drm_i915_private *dev_priv);
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>> +void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
>>
>>   #endif
>>
>
> Otherwise looks OK to me.
>
> Regards,
>
> Tvrtko
On 17/08/16 12:24, Goel, Akash wrote:
> On 8/17/2016 4:37 PM, Tvrtko Ursulin wrote:
>>
>> On 17/08/16 11:14, akash.goel@intel.com wrote:
>>> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>>
>>> GuC ukernel sends an interrupt to Host to flush the log buffer
>>> and expects Host to correspondingly update the read pointer
>>> information in the state structure, once it has consumed the
>>> log buffer contents by copying them to a file or buffer.
>>> Even if Host couldn't copy the contents, it can still update the
>>> read pointer so that logging state is not disturbed on GuC side.
>>>
>>> v2:
>>> - Use a dedicated workqueue for handling flush interrupt. (Tvrtko)
>>> - Reduce the overall log buffer copying time by skipping the copy of
>>>    crash buffer area for regular cases and copying only the state
>>>    structure data in first page.
>>>
>>> v3:
>>>   - Create a vmalloc mapping of log buffer. (Chris)
>>>   - Cover the flush acknowledgment under rpm get & put.(Chris)
>>>   - Revert the change of skipping the copy of crash dump area, as
>>>     not really needed, will be covered by subsequent patch.
>>>
>>> v4:
>>>   - Destroy the wq under the same condition in which it was created,
>>>     pass dev_piv pointer instead of dev to newly added GuC function,
>>>     add more comments & rename variable for clarity. (Tvrtko)
>>>
>>> v5:
>>> - Allocate & destroy the dedicated wq, for handling flush interrupt,
>>>    from the setup/teardown routines of GuC logging. (Chris)
>>> - Validate the log buffer size value retrieved from state structure
>>>    and do some minor cleanup. (Tvrtko)
>>> - Fix error/warnings reported by checkpatch. (Tvrtko)
>>> - Rebase.
>>>
>>> v6:
>>>   - Remove the interrupts_enabled check from guc_capture_logs_work, need
>>>     to process that last work item also, queued just before disabling
>>> the
>>>     interrupt as log buffer flush interrupt handling is a bit different
>>>     case where GuC is actually expecting an ACK from host, which
>>> should be
>>>     provided to keep the logging going.
>>>     Sync against the work will be done by caller disabling the
>>> interrupt.
>>>   - Don't sample the log buffer size value from state structure,
>>> directly
>>>     use the expected value to move the pointer & do the copy and that
>>> cannot
>>>     go wrong (out of bounds) as Driver only allocated the log buffer
>>> and the
>>>     relay buffers. Driver should refrain from interpreting the log
>>> packet,
>>>     as much possible and let Userspace parser detect the anomaly.
>>> (Chris)
>>>
>>> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 186
>>> +++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_irq.c            |  28 ++++-
>>>   drivers/gpu/drm/i915/intel_guc.h           |   4 +
>>>   3 files changed, 217 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index b062da6..ade51cb 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -172,6 +172,15 @@ static int host2guc_sample_forcewake(struct
>>> intel_guc *guc,
>>>       return host2guc_action(guc, data, ARRAY_SIZE(data));
>>>   }
>>>
>>> +static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
>>> +{
>>> +    u32 data[1];
>>> +
>>> +    data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE;
>>> +
>>> +    return host2guc_action(guc, data, 1);
>>> +}
>>> +
>>>   /*
>>>    * Initialise, update, or clear doorbell data shared with the GuC
>>>    *
>>> @@ -828,6 +837,163 @@ err:
>>>       return NULL;
>>>   }
>>>
>>> +static void guc_move_to_next_buf(struct intel_guc *guc)
>>> +{
>>> +}
>>> +
>>> +static void *guc_get_write_buffer(struct intel_guc *guc)
>>> +{
>>> +    return NULL;
>>> +}
>>> +
>>> +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type
>>> type)
>>> +{
>>> +    if (type == GUC_ISR_LOG_BUFFER)
>>> +        return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE;
>>> +    else if (type == GUC_DPC_LOG_BUFFER)
>>> +        return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE;
>>> +    else
>>> +        return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE;
>>> +}
>>
>> Could do it with a switch statement to get automatic reminder of size
>> not being handled if some day a new log buffer type gets added. It would
>> probably more in the style of the rest of the driver as well.
>>
> Fine will use switch statement here.
>
> Should I use BUG_ON for the default/unhandled case ?
>      case GUC_ISR_LOG_BUFFER
>
>      case GUC_DPC_LOG_BUFFER
>
>      case GUC_LOG_CRASH_PAGES
>
>      default
>          BUG_ON(1)

You could use MISSING_CASE, but I think compiler should complain as 
well. Suppose MISSING_CASE is safer.

>>> +
>>> +static void guc_read_update_log_buffer(struct intel_guc *guc)
>>> +{
>>> +    struct guc_log_buffer_state *log_buffer_state,
>>> *log_buffer_snapshot_state;
>>> +    struct guc_log_buffer_state log_buffer_state_local;
>>> +    void *src_data_ptr, *dst_data_ptr;
>>> +    unsigned int buffer_size;
>>> +    enum guc_log_buffer_type type;
>>> +
>>> +    if (WARN_ON(!guc->log.buf_addr))
>>> +        return;
>>> +
>>> +    /* Get the pointer to shared GuC log buffer */
>>> +    log_buffer_state = src_data_ptr = guc->log.buf_addr;
>>> +
>>> +    /* Get the pointer to local buffer to store the logs */
>>> +    dst_data_ptr = log_buffer_snapshot_state =
>>> guc_get_write_buffer(guc);
>>> +
>>> +    /* Actual logs are present from the 2nd page */
>>> +    src_data_ptr += PAGE_SIZE;
>>> +    dst_data_ptr += PAGE_SIZE;
>>> +
>>> +    for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER;
>>> type++) {
>>> +        /* Make a copy of the state structure in GuC log buffer (which
>>> +         * is uncached mapped) on the stack to avoid reading from it
>>> +         * multiple times.
>>> +         */
>>> +        memcpy(&log_buffer_state_local, log_buffer_state,
>>> +               sizeof(struct guc_log_buffer_state));
>>> +        buffer_size = guc_get_log_buffer_size(type);
>>> +
>>> +        if (log_buffer_snapshot_state) {
>>> +            /* First copy the state structure in snapshot buffer */
>>> +            memcpy(log_buffer_snapshot_state, &log_buffer_state_local,
>>> +                   sizeof(struct guc_log_buffer_state));
>>
>> I've noticed now log_buffer_state_local is used only for the
>> sample_write_ptr below and otherwise it causes two copies of the same
>> data.
>>
> Actually log_buffer_state_local gets used more in the subsequent
> patches, where we do the bookkeeping and optimize the copying.
>
> [PATCH 10/19] drm/i915: Add stats for GuC log buffer flush interrupts
> [PATCH 11/19] drm/i915: Optimization to reduce the sampling time of GuC
> log buffer
>
>> Since this branch is an interesting one, you could avoid one copy by
>> updating the log_buffer_state->read_ptr from the
>> log_buffer_snapshot_state in this branch, and add an else branch to
>> update it directly from log_buffer_state when log_buffer_snapshot_state
>> is not available.
>>
>> Sounds like it would be worth eliminating double memcpy, what do you
>> think>
>>
> Won't the 2nd memcpy (from the copy on stack to the relay buffer) be
> really fast ?
> The copy on stack (16 bytes) will most likely be in the CPU cache and
> the same area is used for all 3 buffer types.

Yes I realized later you use it more in later patches.

I don't think it is slow but was just wondering if it could be made 
tidier by getting rid of one copy.

Would have to apply the series to see how the final loop looks like, but 
would something like the below be possible:

	if (!log_buffer_snapshot_state) {
		.. read_ptr update from wc ..
		continue;
	}

	memcpy from wc to final buffer

	.. the rest of processing you do reading from the copy ..

?

Regards,

Tvrtko
On 17/08/16 12:35, Tvrtko Ursulin wrote:
> On 17/08/16 12:24, Goel, Akash wrote:

[snip]

>> Won't the 2nd memcpy (from the copy on stack to the relay buffer) be
>> really fast ?
>> The copy on stack (16 bytes) will most likely be in the CPU cache and
>> the same area is used for all 3 buffer types.
> 
> Yes I realized later you use it more in later patches.
> 
> I don't think it is slow but was just wondering if it could be made 
> tidier by getting rid of one copy.
> 
> Would have to apply the series to see how the final loop looks like, but 
> would something like the below be possible:
> 
>      if (!log_buffer_snapshot_state) {
>          .. read_ptr update from wc ..
>          continue;
>      }
> 
>      memcpy from wc to final buffer
> 
>      .. the rest of processing you do reading from the copy ..
> 
> ?

Not even compile tested and possibly incorrectly refactored,
but what do you think of:

static bool
guc_log_check_overflow(struct intel_guc *guc,
		       enum guc_log_buffer_type type,
		       struct guc_log_buffer *snapshot)
{
	unsigned int full_cnt, prev_full_cnt;
	bool overflow = false;

	guc->log.flush_count[type]++;

	full_cnt = snapshot.buffer_full_cnt;
	prev_full_cnt = guc->log.prev_overflow_count[type];

	if (full_cnt != prev_full_cnt) {
		overflow = true;

		guc->log.prev_overflow_count[type] = full_cnt;
		guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt;

		if (full_cnt < prev_full_cnt) {
			/* buffer_full_cnt is a 4 bit counter */
			guc->log.total_overflow_count[type] += 16;
		}
	}

	return overflow;
}

static void guc_read_update_log_buffer(struct intel_guc *guc)
{
	struct guc_log_buffer *log_buffer, *snapshot;
	void *src_data, *dst_data;
	enum guc_log_buffer_type type;

	if (WARN_ON(!guc->log.buf_addr))
		return;

	/* Get the pointer to shared GuC log buffer */
	log_buffer = src_data = guc->log.buf_addr;

	/* Get the pointer to local buffer to store the logs */
	dst_data = snapshot = guc_get_write_buffer(guc);

	/* Actual logs are present from the 2nd page */
	src_data += PAGE_SIZE;
	dst_data += PAGE_SIZE;

	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER;
	     type++, log_buffer++) {
		unsigned int buffer_size, bytes_to_copy;
		unsigned int read_offset, write_offset;
		bool new_overflow;

		/* Clear the 'flush to file' flag */
		log_buffer->flush_to_file = 0;

		if (unlikely(!snapshot)) {
			/* Update the read pointer in the shared log buffer */
			log_buffer->read_ptr = log_buffer.sampled_write_ptr;
			continue;
		}

		memcpy(snapshot, &log_buffer, sizeof(*snapshot));

		buffer_size = guc_get_log_buffer_size(type);
		read_offset = snapshot.read_ptr;
		write_offset = snapshot.sampled_write_ptr;

		/* The write pointer could have been updated by the GuC
		 * firmware, after sending the flush interrupt to Host,
		 * for consistency set the write pointer value to same
		 * value of sampled_write_ptr in the snapshot buffer.
		 */
		snapshot->write_ptr = write_offset;

		new_overflow = guc_log_check_overflow(guc, type, snapshot);
		if (unlikely(new_overflow)) {
			DRM_ERROR_RATELIMITED("GuC log buffer overflow\n");
			/* copy the whole buffer in case of overflow */
			read_offset = 0;
			write_offset = buffer_size;
		} else if (unlikely((read_offset > buffer_size) ||
		           (write_offset > buffer_size))) {
			DRM_ERROR("invalid log buffer state\n");
			/* copy whole buffer as offsets are unreliable */
			read_offset = 0;
			write_offset = buffer_size;
		}

		/* Just copy the newly written data */
		if (read_offset > write_offset) {
			bytes_to_copy = buffer_size - read_offset;

			i915_memcpy_from_wc(dst_data, src_data, write_offset);
		} else {
			bytes_to_copy = write_offset - read_offset;
		}
		i915_memcpy_from_wc(dst_data + read_offset,
				    src_data + read_offset, bytes_to_copy);

		src_data += buffer_size;
		dst_data += buffer_size;

		/* Update the read pointer in the shared log buffer */
		log_buffer->read_ptr = snapshot.sampled_write_ptr;

		snapshot++;
	}

	if (snapshot) {
		guc_move_to_next_buf(guc);
	} else {
		/* Used rate limited to avoid deluge of messages, logs might be
		 * getting consumed by User at a slow rate.
		 */
		DRM_ERROR_RATELIMITED("no sub-buffer to capture log buffer\n");
		guc->log.capture_miss_count++;
	}
}

Removes the double memcpy and shortens some variable names in order
to make the whole thing more readable.

Regards,

Tvrtko