[v3] drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES

Submitted by Grodzovsky, Andrey on Feb. 1, 2019, 9:41 p.m.

Details

Message ID 1549057286-31201-1-git-send-email-andrey.grodzovsky@amd.com
State New
Series "drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES"
Headers show

Commit Message

Grodzovsky, Andrey Feb. 1, 2019, 9:41 p.m.
New chunk for dependency on start of job's execution instead on
the end. This is used for GPU deadlock prevention when
userspace uses mid-IB fences to wait for mid-IB work on other rings.

v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
v3: Bump KMS version

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 12 +++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 include/uapi/drm/amdgpu_drm.h           |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b82..3f21eca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -214,6 +214,7 @@  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 			break;
 
 		default:
@@ -1090,6 +1091,14 @@  static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 
 		fence = amdgpu_ctx_get_fence(ctx, entity,
 					     deps[i].handle);
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+			struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+			dma_fence_put(fence);
+			fence = dma_fence_get(&s_fence->scheduled);
+		}
+
 		if (IS_ERR(fence)) {
 			r = PTR_ERR(fence);
 			amdgpu_ctx_put(ctx);
@@ -1177,7 +1186,8 @@  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 
 		chunk = &p->chunks[i];
 
-		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
+		    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
 			r = amdgpu_cs_process_fence_dep(p, chunk);
 			if (r)
 				return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f98..1158a6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,10 @@ 
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04..43d03a2 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -526,6 +526,7 @@  struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
 #define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;

Comments

Koenig, Christian Feb. 3, 2019, 3:51 p.m.
Am 01.02.19 um 22:41 schrieb Andrey Grodzovsky:
> New chunk for dependency on start of job's execution instead on

> the end. This is used for GPU deadlock prevention when

> userspace uses mid-IB fences to wait for mid-IB work on other rings.

>

> v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES

> v3: Bump KMS version

>

> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>

> ---

>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 12 +++++++++++-

>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-

>   include/uapi/drm/amdgpu_drm.h           |  1 +

>   3 files changed, 14 insertions(+), 2 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> index 1c49b82..3f21eca 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> @@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs

>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:

>   		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:

>   		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:

> +		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:

>   			break;

>   

>   		default:

> @@ -1090,6 +1091,14 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,

>   

>   		fence = amdgpu_ctx_get_fence(ctx, entity,

>   					     deps[i].handle);

> +

> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {

> +			struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);

> +

> +			dma_fence_put(fence);

> +			fence = dma_fence_get(&s_fence->scheduled);


You need to change the order here or otherwise the dma_fence_put could 
destroy the fence we want.

Apart from that looks really good to me,
Christian.

> +		}

> +

>   		if (IS_ERR(fence)) {

>   			r = PTR_ERR(fence);

>   			amdgpu_ctx_put(ctx);

> @@ -1177,7 +1186,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,

>   

>   		chunk = &p->chunks[i];

>   

> -		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {

> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||

> +		    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {

>   			r = amdgpu_cs_process_fence_dep(p, chunk);

>   			if (r)

>   				return r;

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> index c806f98..1158a6f 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> @@ -71,9 +71,10 @@

>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).

>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.

>    * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.

> + * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES

>    */

>   #define KMS_DRIVER_MAJOR	3

> -#define KMS_DRIVER_MINOR	27

> +#define KMS_DRIVER_MINOR	28

>   #define KMS_DRIVER_PATCHLEVEL	0

>   

>   int amdgpu_vram_limit = 0;

> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h

> index faaad04..43d03a2 100644

> --- a/include/uapi/drm/amdgpu_drm.h

> +++ b/include/uapi/drm/amdgpu_drm.h

> @@ -526,6 +526,7 @@ struct drm_amdgpu_gem_va {

>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04

>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05

>   #define AMDGPU_CHUNK_ID_BO_HANDLES      0x06

> +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07

>   

>   struct drm_amdgpu_cs_chunk {

>   	__u32		chunk_id;
Grodzovsky, Andrey Feb. 4, 2019, 3:45 p.m.
Thanks, I will update, add your RB and push.

Andrey

On 2/3/19 10:51 AM, Koenig, Christian wrote:

@@ -1090,6 +1091,14 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,

                fence = amdgpu_ctx_get_fence(ctx, entity,
                                             deps[i].handle);
+
+               if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+                       struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+                       dma_fence_put(fence);
+                       fence = dma_fence_get(&s_fence->scheduled);


You need to change the order here or otherwise the dma_fence_put could
destroy the fence we want.

Apart from that looks really good to me,
Christian.