drm/i915/gvt: scan non-privileged batch buffer for debug purpose

Submitted by Zhao, Yan Y on March 14, 2018, 12:43 a.m.

Details

Message ID 1520988193-16891-1-git-send-email-yan.y.zhao@intel.com
State New
Headers show
Series "drm/i915/gvt: scan non-privileged batch buffer for debug purpose" ( rev: 1 ) in Intel GVT devel

Not browsing as part of any series.

Commit Message

Zhao, Yan Y March 14, 2018, 12:43 a.m.
For perfomance purpose, scanning of non-privileged batch buffer is turned
off by default. But for debugging purpose, it can be turned on via debugfs.
After scanning, we submit the original non-privileged batch buffer into
hardware, so that the scanning is only a peeking window of guest submitted
commands and will not affect the execution results.

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 42 ++++++++++++++++--------
 drivers/gpu/drm/i915/gvt/debugfs.c    |  6 ++++
 drivers/gpu/drm/i915/gvt/gvt.h        |  1 +
 drivers/gpu/drm/i915/gvt/scheduler.c  | 61 +++++++++++++++++++++++------------
 drivers/gpu/drm/i915/gvt/scheduler.h  |  1 +
 drivers/gpu/drm/i915/gvt/trace.h      | 24 +++++++++++---
 6 files changed, 97 insertions(+), 38 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac..713f5de 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1602,7 +1602,7 @@  static int batch_buffer_needs_scan(struct parser_exec_state *s)
 	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
 		|| IS_KABYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
-		if (cmd_val(s, 0) & (1 << 8))
+		if (cmd_val(s, 0) & (1 << 8) && !s->vgpu->scan_nonprivbb)
 			return 0;
 	}
 	return 1;
@@ -1616,6 +1616,8 @@  static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size)
 	bool bb_end = false;
 	struct intel_vgpu *vgpu = s->vgpu;
 	u32 cmd;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
 
 	*bb_size = 0;
 
@@ -1627,18 +1629,22 @@  static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size)
 	cmd = cmd_val(s, 0);
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 		return -EBADRQC;
 	}
 	do {
-		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+		if (copy_gma_to_hva(s->vgpu, mm,
 				gma, gma + 4, &cmd) < 0)
 			return -EFAULT;
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
-			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 			return -EBADRQC;
 		}
 
@@ -1664,6 +1670,9 @@  static int perform_bb_shadow(struct parser_exec_state *s)
 	unsigned long gma = 0;
 	unsigned long bb_size;
 	int ret = 0;
+	unsigned long bb_offset = 0;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
 
 	/* get the start gm address of the batch buffer */
 	gma = get_gma_bb_from_cmd(s, 1);
@@ -1678,8 +1687,12 @@  static int perform_bb_shadow(struct parser_exec_state *s)
 	if (!bb)
 		return -ENOMEM;
 
+	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
+	if (bb->ppgtt)
+		bb_offset = gma & ~I915_GTT_PAGE_MASK;
+
 	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
-					 roundup(bb_size, PAGE_SIZE));
+				 roundup(bb_size + bb_offset, PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
 		goto err_free_bb;
@@ -1700,9 +1713,9 @@  static int perform_bb_shadow(struct parser_exec_state *s)
 		bb->clflush &= ~CLFLUSH_BEFORE;
 	}
 
-	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+	ret = copy_gma_to_hva(s->vgpu, mm,
 			      gma, gma + bb_size,
-			      bb->va);
+			      bb->va + bb_offset);
 	if (ret < 0) {
 		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		ret = -EFAULT;
@@ -1723,7 +1736,7 @@  static int perform_bb_shadow(struct parser_exec_state *s)
 	 * buffer's gma in pair. After all, we don't want to pin the shadow
 	 * buffer here (too early).
 	 */
-	s->ip_va = bb->va;
+	s->ip_va = bb->va + bb_offset;
 	s->ip_gma = gma;
 	return 0;
 err_unmap:
@@ -2462,15 +2475,18 @@  static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 		return -EBADRQC;
 	}
 
 	s->info = info;
 
 	trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
-			  cmd_length(s), s->buf_type);
+			  cmd_length(s), s->buf_type, s->buf_addr_type,
+			  s->workload, info->name);
 
 	if (info->handler) {
 		ret = info->handler(s);
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 32a66df..44ffe81 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -162,6 +162,12 @@  int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
 	if (!ent)
 		return -ENOMEM;
 
+	ent = debugfs_create_bool("scan_nonprivbb", 0644, vgpu->debugfs,
+				  &vgpu->scan_nonprivbb);
+	if (!ent)
+		return -ENOMEM;
+
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index efacd8a..b52b133 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -226,6 +226,7 @@  struct intel_vgpu {
 
 	struct completion vblank_done;
 
+	bool scan_nonprivbb;
 };
 
 /* validating GM healthy status*/
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6a1f7ed..c4ded4e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -425,31 +425,52 @@  static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	int ret;
 
 	list_for_each_entry(bb, &workload->shadow_bb, list) {
-		bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0);
-		if (IS_ERR(bb->vma)) {
-			ret = PTR_ERR(bb->vma);
-			goto err;
-		}
+		if (bb->ppgtt) {
+			/* for non-priv bb, scan&shadow is only for
+			 * debugging purpose, so the content of shadow bb
+			 * is the same as original bb. Therefore,
+			 * here, rather than switch to shadow bb's gma
+			 * address, we directly use original batch buffer's
+			 * gma address, and send original bb to hardware
+			 * directly
+			 */
+			if (bb->clflush & CLFLUSH_AFTER) {
+				drm_clflush_virt_range(bb->va,
+						bb->obj->base.size);
+				bb->clflush &= ~CLFLUSH_AFTER;
+			}
+			i915_gem_obj_finish_shmem_access(bb->obj);
+			bb->accessing = false;
+		} else {
+			bb->vma = i915_gem_object_ggtt_pin(bb->obj,
+					NULL, 0, 0, 0);
+			if (IS_ERR(bb->vma)) {
+				ret = PTR_ERR(bb->vma);
+				goto err;
+			}
 
-		/* relocate shadow batch buffer */
-		bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
-		if (gmadr_bytes == 8)
-			bb->bb_start_cmd_va[2] = 0;
+			/* relocate shadow batch buffer */
+			bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
+			if (gmadr_bytes == 8)
+				bb->bb_start_cmd_va[2] = 0;
 
-		/* No one is going to touch shadow bb from now on. */
-		if (bb->clflush & CLFLUSH_AFTER) {
-			drm_clflush_virt_range(bb->va, bb->obj->base.size);
-			bb->clflush &= ~CLFLUSH_AFTER;
-		}
+			/* No one is going to touch shadow bb from now on. */
+			if (bb->clflush & CLFLUSH_AFTER) {
+				drm_clflush_virt_range(bb->va,
+						bb->obj->base.size);
+				bb->clflush &= ~CLFLUSH_AFTER;
+			}
 
-		ret = i915_gem_object_set_to_gtt_domain(bb->obj, false);
-		if (ret)
-			goto err;
+			ret = i915_gem_object_set_to_gtt_domain(bb->obj,
+					false);
+			if (ret)
+				goto err;
 
-		i915_gem_obj_finish_shmem_access(bb->obj);
-		bb->accessing = false;
+			i915_gem_obj_finish_shmem_access(bb->obj);
+			bb->accessing = false;
 
-		i915_vma_move_to_active(bb->vma, workload->req, 0);
+			i915_vma_move_to_active(bb->vma, workload->req, 0);
+		}
 	}
 	return 0;
 err:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 2cfc639..2932186 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -124,6 +124,7 @@  struct intel_vgpu_shadow_bb {
 	u32 *bb_start_cmd_va;
 	unsigned int clflush;
 	bool accessing;
+	bool ppgtt;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 82093f1..1fd6420 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -224,19 +224,25 @@ 
 	TP_printk("%s", __entry->buf)
 );
 
+#define GVT_CMD_STR_LEN 40
 TRACE_EVENT(gvt_command,
-	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
-		 u32 buf_type),
+	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va,
+		u32 cmd_len,  u32 buf_type, u32 buf_addr_type,
+		void *workload, char *cmd_name),
 
-	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
+	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type,
+		buf_addr_type, workload, cmd_name),
 
 	TP_STRUCT__entry(
 		__field(u8, vgpu_id)
 		__field(u8, ring_id)
 		__field(u32, ip_gma)
 		__field(u32, buf_type)
+		__field(u32, buf_addr_type)
 		__field(u32, cmd_len)
+		__field(void*, workload)
 		__dynamic_array(u32, raw_cmd, cmd_len)
+		__array(char, cmd_name, GVT_CMD_STR_LEN)
 	),
 
 	TP_fast_assign(
@@ -244,17 +250,25 @@ 
 		__entry->ring_id = ring_id;
 		__entry->ip_gma = ip_gma;
 		__entry->buf_type = buf_type;
+		__entry->buf_addr_type = buf_addr_type;
 		__entry->cmd_len = cmd_len;
+		__entry->workload = workload;
+		snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name);
 		memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
 	),
 
 
-	TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
+	TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n",
 		__entry->vgpu_id,
 		__entry->ring_id,
+		__entry->buf_addr_type,
 		__entry->buf_type,
 		__entry->ip_gma,
-		__print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
+		__entry->cmd_name,
+		__entry->cmd_len,
+		__print_array(__get_dynamic_array(raw_cmd),
+			__entry->cmd_len, 4),
+		__entry->workload)
 );
 
 #define GVT_TEMP_STR_LEN 10

Comments

On Wed, Mar 14, 2018 at 08:43:13AM +0800, Zhao Yan wrote:
>  
> @@ -1664,6 +1670,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>  	unsigned long gma = 0;
>  	unsigned long bb_size;
>  	int ret = 0;
> +	unsigned long bb_offset = 0;
> +	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
> +		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
>  
>  	/* get the start gm address of the batch buffer */
>  	gma = get_gma_bb_from_cmd(s, 1);
> @@ -1678,8 +1687,12 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>  	if (!bb)
>  		return -ENOMEM;
>  
> +	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
> +	if (bb->ppgtt)
> +		bb_offset = gma & ~I915_GTT_PAGE_MASK;
> +
>  	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
> -					 roundup(bb_size, PAGE_SIZE));
> +				 roundup(bb_size + bb_offset, PAGE_SIZE));
Just curious why 'bb_offset' size of space is allocated but looks like not used later?

>  	if (IS_ERR(bb->obj)) {
>  		ret = PTR_ERR(bb->obj);
>  		goto err_free_bb;
> @@ -1700,9 +1713,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>  		bb->clflush &= ~CLFLUSH_BEFORE;
>  	}
>  
> -	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> +	ret = copy_gma_to_hva(s->vgpu, mm,
>  			      gma, gma + bb_size,
> -			      bb->va);
> +			      bb->va + bb_offset);
>  	if (ret < 0) {
>  		gvt_vgpu_err("fail to copy guest ring buffer\n");
>  		ret = -EFAULT;
On 3/14/2018 4:17 PM, Hang Yuan wrote:
> On Wed, Mar 14, 2018 at 08:43:13AM +0800, Zhao Yan wrote:
>>   
>> @@ -1664,6 +1670,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>>   	unsigned long gma = 0;
>>   	unsigned long bb_size;
>>   	int ret = 0;
>> +	unsigned long bb_offset = 0;
>> +	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
>> +		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
>>   
>>   	/* get the start gm address of the batch buffer */
>>   	gma = get_gma_bb_from_cmd(s, 1);
>> @@ -1678,8 +1687,12 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>>   	if (!bb)
>>   		return -ENOMEM;
>>   
>> +	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
>> +	if (bb->ppgtt)
>> +		bb_offset = gma & ~I915_GTT_PAGE_MASK;
>> +
>>   	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
>> -					 roundup(bb_size, PAGE_SIZE));
>> +				 roundup(bb_size + bb_offset, PAGE_SIZE));
> Just curious why 'bb_offset' size of space is allocated but looks like not used later?
or you mean why bb->obj is not used later? because it's referenced by 
bb->va. For scan it, we use s->ip_va, and s->ip_gma
>
>>   	if (IS_ERR(bb->obj)) {
>>   		ret = PTR_ERR(bb->obj);
>>   		goto err_free_bb;
>> @@ -1700,9 +1713,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
>>   		bb->clflush &= ~CLFLUSH_BEFORE;
>>   	}
>>   
>> -	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
>> +	ret = copy_gma_to_hva(s->vgpu, mm,
>>   			      gma, gma + bb_size,
>> -			      bb->va);
>> +			      bb->va + bb_offset);
bb_offset is used here. I add bb_offset is to make shadowed BB looks the 
same as original BB, so if later we want to change PPGTT's mapping of 
the BB's gma to shadowed BB's physical address, it still works.
>>   	if (ret < 0) {
>>   		gvt_vgpu_err("fail to copy guest ring buffer\n");
>>   		ret = -EFAULT;
On Thu, Mar 15, 2018 at 09:47:19AM +0800, Zhao, Yan Y wrote:
> 
> 
> On 3/14/2018 4:17 PM, Hang Yuan wrote:
> >On Wed, Mar 14, 2018 at 08:43:13AM +0800, Zhao Yan wrote:
> >>@@ -1664,6 +1670,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
> >>  	unsigned long gma = 0;
> >>  	unsigned long bb_size;
> >>  	int ret = 0;
> >>+	unsigned long bb_offset = 0;
> >>+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
> >>+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
> >>  	/* get the start gm address of the batch buffer */
> >>  	gma = get_gma_bb_from_cmd(s, 1);
> >>@@ -1678,8 +1687,12 @@ static int perform_bb_shadow(struct parser_exec_state *s)
> >>  	if (!bb)
> >>  		return -ENOMEM;
> >>+	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
> >>+	if (bb->ppgtt)
> >>+		bb_offset = gma & ~I915_GTT_PAGE_MASK;
> >>+
> >>  	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
> >>-					 roundup(bb_size, PAGE_SIZE));
> >>+				 roundup(bb_size + bb_offset, PAGE_SIZE));
> >Just curious why 'bb_offset' size of space is allocated but looks like not used later?
> or you mean why bb->obj is not used later? because it's referenced by
> bb->va. For scan it, we use s->ip_va, and s->ip_gma
I'm wondering why you allocate additional bb_offset size of buffer for non-pri BB. You have explained later. Thanks.

> >
> >>  	if (IS_ERR(bb->obj)) {
> >>  		ret = PTR_ERR(bb->obj);
> >>  		goto err_free_bb;
> >>@@ -1700,9 +1713,9 @@ static int perform_bb_shadow(struct parser_exec_state *s)
> >>  		bb->clflush &= ~CLFLUSH_BEFORE;
> >>  	}
> >>-	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
> >>+	ret = copy_gma_to_hva(s->vgpu, mm,
> >>  			      gma, gma + bb_size,
> >>-			      bb->va);
> >>+			      bb->va + bb_offset);
> bb_offset is used here. I add bb_offset is to make shadowed BB looks the
> same as original BB, so if later we want to change PPGTT's mapping of the
> BB's gma to shadowed BB's physical address, it still works.
> >>  	if (ret < 0) {
> >>  		gvt_vgpu_err("fail to copy guest ring buffer\n");
> >>  		ret = -EFAULT;
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev