drm/i915/gvt: keep oa config in shadow ctx

Submitted by He, Min on March 2, 2018, 2 a.m.

Details

Message ID 1519956025-32516-1-git-send-email-min.he@intel.com
State New
Headers show
Series "drm/i915/gvt: keep oa config in shadow ctx" ( rev: 1 ) in Intel GVT devel

Not browsing as part of any series.

Commit Message

He, Min March 2, 2018, 2 a.m.
When populating shadow ctx from guest, we should handle oa related
registers in hw ctx, so that they will not be overlapped by guest oa
configs. This patch made it possible to capture oa data from host for
both host and guests.

Signed-off-by: Min He <min.he@intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 50 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/scheduler.h |  4 +++
 2 files changed, 54 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9b92b4e..6a1f7ed 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,54 @@  static void set_context_pdp_root_pointer(
 		pdp_pair[i].val = pdp[7 - i];
 }
 
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+		u32 *reg_state, bool save)
+{
+	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+	int i = 0;
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+
+	if (!workload || !reg_state || workload->ring_id != RCS)
+		return;
+
+	if (save) {
+		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+
+			workload->flex_mmio[i] = reg_state[state_offset + 1];
+		}
+	} else {
+		reg_state[ctx_oactxctrl] =
+			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+			u32 mmio = flex_mmio[i];
+
+			reg_state[state_offset] = mmio;
+			reg_state[state_offset + 1] = workload->flex_mmio[i];
+		}
+	}
+}
+
 static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@  static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
 	shadow_ring_context = kmap(page);
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
 		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@  static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			sizeof(*shadow_ring_context),
 			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
 	kunmap(page);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index bab4097..2cfc639 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@  struct intel_vgpu_workload {
 	/* shadow batch buffer */
 	struct list_head shadow_bb;
 	struct intel_shadow_wa_ctx wa_ctx;
+
+	/* oa registers */
+	u32 oactxctrl;
+	u32 flex_mmio[7];
 };
 
 struct intel_vgpu_shadow_bb {

Comments

On 2018.03.02 10:00:25 +0800, Min He wrote:
> When populating shadow ctx from guest, we should handle oa related
> registers in hw ctx, so that they will not be overlapped by guest oa
> configs. This patch made it possible to capture oa data from host for
> both host and guests.
> 
> Signed-off-by: Min He <min.he@intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/scheduler.c | 50 ++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/gvt/scheduler.h |  4 +++
>  2 files changed, 54 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 9b92b4e..6a1f7ed 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
>  		pdp_pair[i].val = pdp[7 - i];
>  }
>  
> +/*
> + * when populating shadow ctx from guest, we should not overrride oa related
> + * registers, so that they will not be overlapped by guest oa configs. Thus
> + * made it possible to capture oa data from host for both host and guests.
> + */
> +static void sr_oa_regs(struct intel_vgpu_workload *workload,
> +		u32 *reg_state, bool save)
> +{
> +	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
> +	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> +	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> +	int i = 0;
> +	u32 flex_mmio[] = {
> +		i915_mmio_reg_offset(EU_PERF_CNTL0),
> +		i915_mmio_reg_offset(EU_PERF_CNTL1),
> +		i915_mmio_reg_offset(EU_PERF_CNTL2),
> +		i915_mmio_reg_offset(EU_PERF_CNTL3),
> +		i915_mmio_reg_offset(EU_PERF_CNTL4),
> +		i915_mmio_reg_offset(EU_PERF_CNTL5),
> +		i915_mmio_reg_offset(EU_PERF_CNTL6),
> +	};
> +
> +	if (!workload || !reg_state || workload->ring_id != RCS)
> +		return;
> +
> +	if (save) {
> +		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
> +
> +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> +			u32 state_offset = ctx_flexeu0 + i * 2;
> +
> +			workload->flex_mmio[i] = reg_state[state_offset + 1];
> +		}
> +	} else {
> +		reg_state[ctx_oactxctrl] =
> +			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> +		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
> +
> +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> +			u32 state_offset = ctx_flexeu0 + i * 2;
> +			u32 mmio = flex_mmio[i];
> +
> +			reg_state[state_offset] = mmio;
> +			reg_state[state_offset + 1] = workload->flex_mmio[i];
> +		}
> +	}
> +}
> +
>  static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  {
>  	struct intel_vgpu *vgpu = workload->vgpu;
> @@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
>  	shadow_ring_context = kmap(page);
>  
> +	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);

hmm, looks this still could save/restore guest modified state? I think we should
save OA state in shadow ctx before handing back to guest after request and restore it
before next submission in this populate_shadow_context().

>  #define COPY_REG(name) \
>  	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
>  		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
> @@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  			sizeof(*shadow_ring_context),
>  			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
>  
> +	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
>  	kunmap(page);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
> index bab4097..2cfc639 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.h
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.h
> @@ -110,6 +110,10 @@ struct intel_vgpu_workload {
>  	/* shadow batch buffer */
>  	struct list_head shadow_bb;
>  	struct intel_shadow_wa_ctx wa_ctx;
> +
> +	/* oa registers */
> +	u32 oactxctrl;
> +	u32 flex_mmio[7];
>  };
>  
>  struct intel_vgpu_shadow_bb {
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
> -----Original Message-----
> From: Zhenyu Wang [mailto:zhenyuw@linux.intel.com]
> Sent: Monday, March 5, 2018 11:14 AM
> To: He, Min <min.he@intel.com>
> Cc: intel-gvt-dev@lists.freedesktop.org
> Subject: Re: [PATCH] drm/i915/gvt: keep oa config in shadow ctx
> 
> On 2018.03.02 10:00:25 +0800, Min He wrote:
> > When populating shadow ctx from guest, we should handle oa related
> > registers in hw ctx, so that they will not be overlapped by guest oa
> > configs. This patch made it possible to capture oa data from host for
> > both host and guests.
> >
> > Signed-off-by: Min He <min.he@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gvt/scheduler.c | 50
> ++++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/gvt/scheduler.h |  4 +++
> >  2 files changed, 54 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c
> b/drivers/gpu/drm/i915/gvt/scheduler.c
> > index 9b92b4e..6a1f7ed 100644
> > --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> > @@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
> >  		pdp_pair[i].val = pdp[7 - i];
> >  }
> >
> > +/*
> > + * when populating shadow ctx from guest, we should not overrride oa
> related
> > + * registers, so that they will not be overlapped by guest oa configs. Thus
> > + * made it possible to capture oa data from host for both host and guests.
> > + */
> > +static void sr_oa_regs(struct intel_vgpu_workload *workload,
> > +		u32 *reg_state, bool save)
> > +{
> > +	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
> > +	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> > +	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> > +	int i = 0;
> > +	u32 flex_mmio[] = {
> > +		i915_mmio_reg_offset(EU_PERF_CNTL0),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL1),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL2),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL3),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL4),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL5),
> > +		i915_mmio_reg_offset(EU_PERF_CNTL6),
> > +	};
> > +
> > +	if (!workload || !reg_state || workload->ring_id != RCS)
> > +		return;
> > +
> > +	if (save) {
> > +		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
> > +
> > +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> > +			u32 state_offset = ctx_flexeu0 + i * 2;
> > +
> > +			workload->flex_mmio[i] = reg_state[state_offset + 1];
> > +		}
> > +	} else {
> > +		reg_state[ctx_oactxctrl] =
> > +			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> > +		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
> > +
> > +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> > +			u32 state_offset = ctx_flexeu0 + i * 2;
> > +			u32 mmio = flex_mmio[i];
> > +
> > +			reg_state[state_offset] = mmio;
> > +			reg_state[state_offset + 1] = workload->flex_mmio[i];
> > +		}
> > +	}
> > +}
> > +
> >  static int populate_shadow_context(struct intel_vgpu_workload *workload)
> >  {
> >  	struct intel_vgpu *vgpu = workload->vgpu;
> > @@ -98,6 +146,7 @@ static int populate_shadow_context(struct
> intel_vgpu_workload *workload)
> >  	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
> >  	shadow_ring_context = kmap(page);
> >
> > +	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
> 
> hmm, looks this still could save/restore guest modified state? I think we should
> save OA state in shadow ctx before handing back to guest after request and
> restore it
> before next submission in this populate_shadow_context().

No. it's to save the original shadow ctx state. And after populating guest ctx
into shadow ctx, we will restore the original OA regs. So the shadow ctx will
always keep the host configuration.

> 
> >  #define COPY_REG(name) \
> >  	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
> >  		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val,
> 4)
> > @@ -122,6 +171,7 @@ static int populate_shadow_context(struct
> intel_vgpu_workload *workload)
> >  			sizeof(*shadow_ring_context),
> >  			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
> >
> > +	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
> >  	kunmap(page);
> >  	return 0;
> >  }
> > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h
> b/drivers/gpu/drm/i915/gvt/scheduler.h
> > index bab4097..2cfc639 100644
> > --- a/drivers/gpu/drm/i915/gvt/scheduler.h
> > +++ b/drivers/gpu/drm/i915/gvt/scheduler.h
> > @@ -110,6 +110,10 @@ struct intel_vgpu_workload {
> >  	/* shadow batch buffer */
> >  	struct list_head shadow_bb;
> >  	struct intel_shadow_wa_ctx wa_ctx;
> > +
> > +	/* oa registers */
> > +	u32 oactxctrl;
> > +	u32 flex_mmio[7];
> >  };
> >
> >  struct intel_vgpu_shadow_bb {
> > --
> > 2.7.4
> >
> > _______________________________________________
> > intel-gvt-dev mailing list
> > intel-gvt-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
> 
> --
> Open Source Technology Center, Intel ltd.
> 
> $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
On 2018.03.05 03:25:45 +0000, He, Min wrote:
> 
> 
> > -----Original Message-----
> > From: Zhenyu Wang [mailto:zhenyuw@linux.intel.com]
> > Sent: Monday, March 5, 2018 11:14 AM
> > To: He, Min <min.he@intel.com>
> > Cc: intel-gvt-dev@lists.freedesktop.org
> > Subject: Re: [PATCH] drm/i915/gvt: keep oa config in shadow ctx
> > 
> > On 2018.03.02 10:00:25 +0800, Min He wrote:
> > > When populating shadow ctx from guest, we should handle oa related
> > > registers in hw ctx, so that they will not be overlapped by guest oa
> > > configs. This patch made it possible to capture oa data from host for
> > > both host and guests.
> > >
> > > Signed-off-by: Min He <min.he@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gvt/scheduler.c | 50
> > ++++++++++++++++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/gvt/scheduler.h |  4 +++
> > >  2 files changed, 54 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c
> > b/drivers/gpu/drm/i915/gvt/scheduler.c
> > > index 9b92b4e..6a1f7ed 100644
> > > --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> > > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> > > @@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
> > >  		pdp_pair[i].val = pdp[7 - i];
> > >  }
> > >
> > > +/*
> > > + * when populating shadow ctx from guest, we should not overrride oa
> > related
> > > + * registers, so that they will not be overlapped by guest oa configs. Thus
> > > + * made it possible to capture oa data from host for both host and guests.
> > > + */
> > > +static void sr_oa_regs(struct intel_vgpu_workload *workload,
> > > +		u32 *reg_state, bool save)
> > > +{
> > > +	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
> > > +	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> > > +	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> > > +	int i = 0;
> > > +	u32 flex_mmio[] = {
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL0),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL1),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL2),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL3),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL4),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL5),
> > > +		i915_mmio_reg_offset(EU_PERF_CNTL6),
> > > +	};
> > > +
> > > +	if (!workload || !reg_state || workload->ring_id != RCS)
> > > +		return;
> > > +
> > > +	if (save) {
> > > +		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
> > > +
> > > +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> > > +			u32 state_offset = ctx_flexeu0 + i * 2;
> > > +
> > > +			workload->flex_mmio[i] = reg_state[state_offset + 1];
> > > +		}
> > > +	} else {
> > > +		reg_state[ctx_oactxctrl] =
> > > +			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> > > +		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
> > > +
> > > +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> > > +			u32 state_offset = ctx_flexeu0 + i * 2;
> > > +			u32 mmio = flex_mmio[i];
> > > +
> > > +			reg_state[state_offset] = mmio;
> > > +			reg_state[state_offset + 1] = workload->flex_mmio[i];
> > > +		}
> > > +	}
> > > +}
> > > +
> > >  static int populate_shadow_context(struct intel_vgpu_workload *workload)
> > >  {
> > >  	struct intel_vgpu *vgpu = workload->vgpu;
> > > @@ -98,6 +146,7 @@ static int populate_shadow_context(struct
> > intel_vgpu_workload *workload)
> > >  	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
> > >  	shadow_ring_context = kmap(page);
> > >
> > > +	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
> > 
> > hmm, looks this still could save/restore guest modified state? I think we should
> > save OA state in shadow ctx before handing back to guest after request and
> > restore it
> > before next submission in this populate_shadow_context().
> 
> No. it's to save the original shadow ctx state. And after populating guest ctx
> into shadow ctx, we will restore the original OA regs. So the shadow ctx will
> always keep the host configuration.
>

oh, yeah, this is for shadow ctx only but not touched by guest, and seems even
this should be better correct that there might be OA change after giving back to
guest and before next submission, your change could ensure sane shadow state.
Looks this is missed fix to make OA actually work correct for guest.

> > 
> > >  #define COPY_REG(name) \
> > >  	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
> > >  		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val,
> > 4)
> > > @@ -122,6 +171,7 @@ static int populate_shadow_context(struct
> > intel_vgpu_workload *workload)
> > >  			sizeof(*shadow_ring_context),
> > >  			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
> > >
> > > +	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
> > >  	kunmap(page);
> > >  	return 0;
> > >  }
> > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h
> > b/drivers/gpu/drm/i915/gvt/scheduler.h
> > > index bab4097..2cfc639 100644
> > > --- a/drivers/gpu/drm/i915/gvt/scheduler.h
> > > +++ b/drivers/gpu/drm/i915/gvt/scheduler.h
> > > @@ -110,6 +110,10 @@ struct intel_vgpu_workload {
> > >  	/* shadow batch buffer */
> > >  	struct list_head shadow_bb;
> > >  	struct intel_shadow_wa_ctx wa_ctx;
> > > +
> > > +	/* oa registers */
> > > +	u32 oactxctrl;
> > > +	u32 flex_mmio[7];
> > >  };
> > >
> > >  struct intel_vgpu_shadow_bb {
> > > --
> > > 2.7.4
> > >
> > > _______________________________________________
> > > intel-gvt-dev mailing list
> > > intel-gvt-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
> > 
> > --
> > Open Source Technology Center, Intel ltd.
> > 
> > $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
On 2018.03.02 10:00:25 +0800, Min He wrote:
> When populating shadow ctx from guest, we should handle oa related
> registers in hw ctx, so that they will not be overlapped by guest oa
> configs. This patch made it possible to capture oa data from host for
> both host and guests.
> 
> Signed-off-by: Min He <min.he@intel.com>
> ---

Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>

cc Zhi, I think this one should be pushed to 4.16 for fixing guest perf profiling.

>  drivers/gpu/drm/i915/gvt/scheduler.c | 50 ++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/gvt/scheduler.h |  4 +++
>  2 files changed, 54 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 9b92b4e..6a1f7ed 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
>  		pdp_pair[i].val = pdp[7 - i];
>  }
>  
> +/*
> + * when populating shadow ctx from guest, we should not overrride oa related
> + * registers, so that they will not be overlapped by guest oa configs. Thus
> + * made it possible to capture oa data from host for both host and guests.
> + */
> +static void sr_oa_regs(struct intel_vgpu_workload *workload,
> +		u32 *reg_state, bool save)
> +{
> +	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
> +	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> +	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> +	int i = 0;
> +	u32 flex_mmio[] = {
> +		i915_mmio_reg_offset(EU_PERF_CNTL0),
> +		i915_mmio_reg_offset(EU_PERF_CNTL1),
> +		i915_mmio_reg_offset(EU_PERF_CNTL2),
> +		i915_mmio_reg_offset(EU_PERF_CNTL3),
> +		i915_mmio_reg_offset(EU_PERF_CNTL4),
> +		i915_mmio_reg_offset(EU_PERF_CNTL5),
> +		i915_mmio_reg_offset(EU_PERF_CNTL6),
> +	};
> +
> +	if (!workload || !reg_state || workload->ring_id != RCS)
> +		return;
> +
> +	if (save) {
> +		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
> +
> +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> +			u32 state_offset = ctx_flexeu0 + i * 2;
> +
> +			workload->flex_mmio[i] = reg_state[state_offset + 1];
> +		}
> +	} else {
> +		reg_state[ctx_oactxctrl] =
> +			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> +		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
> +
> +		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
> +			u32 state_offset = ctx_flexeu0 + i * 2;
> +			u32 mmio = flex_mmio[i];
> +
> +			reg_state[state_offset] = mmio;
> +			reg_state[state_offset + 1] = workload->flex_mmio[i];
> +		}
> +	}
> +}
> +
>  static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  {
>  	struct intel_vgpu *vgpu = workload->vgpu;
> @@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
>  	shadow_ring_context = kmap(page);
>  
> +	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
>  #define COPY_REG(name) \
>  	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
>  		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
> @@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  			sizeof(*shadow_ring_context),
>  			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
>  
> +	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
>  	kunmap(page);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
> index bab4097..2cfc639 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.h
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.h
> @@ -110,6 +110,10 @@ struct intel_vgpu_workload {
>  	/* shadow batch buffer */
>  	struct list_head shadow_bb;
>  	struct intel_shadow_wa_ctx wa_ctx;
> +
> +	/* oa registers */
> +	u32 oactxctrl;
> +	u32 flex_mmio[7];
>  };
>  
>  struct intel_vgpu_shadow_bb {
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev