[v2,8/8] panfrost: Add backend targeting the DRM driver

Submitted by Tomeu Vizoso on March 8, 2019, 9:27 a.m.

Details

Message ID 20190308092713.9307-9-tomeu.vizoso@collabora.com
State New
Headers show
Series "panfrost: Add DRM backend" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Tomeu Vizoso March 8, 2019, 9:27 a.m.
This backend interacts with the new DRM driver for Midgard GPUs which is
currently in development.

When using this backend, Panfrost has roughly on-par functionality as
when using the non-DRM driver from Arm.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>

---

v2: - Adapt to changes in the UAPI, mostly one atom per job.
    - Mmap imported buffers, needed for kmscube -M rgba
---
 include/drm-uapi/panfrost_drm.h        | 141 ++++++++++
 src/gallium/drivers/panfrost/pan_drm.c | 363 +++++++++++++++++++++++++
 2 files changed, 504 insertions(+)
 create mode 100644 include/drm-uapi/panfrost_drm.h

Patch hide | download patch | download mbox

diff --git a/include/drm-uapi/panfrost_drm.h b/include/drm-uapi/panfrost_drm.h
new file mode 100644
index 000000000000..7618f14f9e26
--- /dev/null
+++ b/include/drm-uapi/panfrost_drm.h
@@ -0,0 +1,141 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Broadcom
+ * Copyright © 2019 Collabora ltd.
+ */
+#ifndef _PANFROST_DRM_H_
+#define _PANFROST_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_PANFROST_SUBMIT			0x00
+#define DRM_PANFROST_WAIT_BO			0x01
+#define DRM_PANFROST_CREATE_BO			0x02
+#define DRM_PANFROST_MMAP_BO			0x03
+#define DRM_PANFROST_GET_PARAM			0x04
+#define DRM_PANFROST_GET_BO_OFFSET		0x05
+
+#define DRM_IOCTL_PANFROST_SUBMIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
+#define DRM_IOCTL_PANFROST_WAIT_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
+#define DRM_IOCTL_PANFROST_CREATE_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_BO, struct drm_panfrost_create_bo)
+#define DRM_IOCTL_PANFROST_MMAP_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MMAP_BO, struct drm_panfrost_mmap_bo)
+#define DRM_IOCTL_PANFROST_GET_PARAM		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param)
+#define DRM_IOCTL_PANFROST_GET_BO_OFFSET	DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset)
+
+#define PANFROST_JD_REQ_FS (1 << 0)
+
+/**
+ * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute a render command list.
+ */
+struct drm_panfrost_submit {
+
+	/** Address to GPU mapping of job descriptor */
+	__u64 jc;
+
+	/** An optional array of sync objects to wait on before starting this job. */
+	__u64 in_syncs;
+
+	/** Number of sync objects to wait on before starting this job. */
+	__u32 in_sync_count;
+
+	/** An optional sync object to place the completion fence in. */
+	__u32 out_sync;
+
+	/** Pointer to a u32 array of the BOs that are referenced by the job. */
+	__u64 bo_handles;
+
+	/** Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/** A combination of PANFROST_JD_REQ_* */
+	__u32 requirements;
+};
+
+/**
+ * struct drm_panfrost_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_PANFROST_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_panfrost_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__s64 timeout_ns;	/* absolute */
+};
+
+/**
+ * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_panfrost_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/**
+	 * Returned offset for the BO in the GPU address space.  This offset
+	 * is private to the DRM fd and is valid for the lifetime of the GEM
+	 * handle.
+	 *
+	 * This offset value will always be nonzero, since various HW
+	 * units treat 0 specially.
+	 */
+	__u64 offset;
+};
+
+/**
+ * struct drm_panfrost_mmap_bo - ioctl argument for mapping Panfrost BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_panfrost_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+enum drm_panfrost_param {
+	DRM_PANFROST_PARAM_GPU_ID,
+};
+
+struct drm_panfrost_get_param {
+	__u32 param;
+	__u32 pad;
+	__u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the GPU address space for this DRM fd.
+ * This is the same value returned by drm_panfrost_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_panfrost_get_bo_offset {
+	__u32 handle;
+	__u32 pad;
+	__u64 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _PANFROST_DRM_H_ */
diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
index cb01cce634f7..a4018a1c74af 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -21,16 +21,365 @@ 
  * SOFTWARE.
  *
  */
+
+#include <fcntl.h>
+#include <xf86drm.h>
+
+#include "drm-uapi/panfrost_drm.h"
+
 #include "util/u_memory.h"
+#include "util/os_time.h"
 
 #include "pan_screen.h"
+#include "pan_resource.h"
+#include "pan_context.h"
 #include "pan_drm.h"
+#include "pan_trace.h"
 
 struct panfrost_drm {
 	struct panfrost_driver base;
 	int fd;
 };
 
+static void
+panfrost_drm_allocate_slab(struct panfrost_screen *screen,
+		           struct panfrost_memory *mem,
+		           size_t pages,
+		           bool same_va,
+		           int extra_flags,
+		           int commit_count,
+		           int extent)
+{
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+	struct drm_panfrost_create_bo create_bo = {
+		        .size = pages * 4096,
+		        .flags = 0,  // TODO figure out proper flags..
+	};
+	struct drm_panfrost_mmap_bo mmap_bo = {0,};
+	int ret;
+
+	// TODO cache allocations
+	// TODO properly handle errors
+	// TODO take into account extra_flags
+
+	ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
+	if (ret) {
+                fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %d\n", ret);
+		assert(0);
+	}
+
+	mem->gpu = create_bo.offset;
+	mem->gem_handle = create_bo.handle;
+        mem->stack_bottom = 0;
+        mem->size = create_bo.size;
+
+	// TODO map and unmap on demand?
+	mmap_bo.handle = create_bo.handle;
+	ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
+	if (ret) {
+                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret);
+		assert(0);
+	}
+
+        mem->cpu = mmap(NULL, mem->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       drm->fd, mmap_bo.offset);
+        if (mem->cpu == MAP_FAILED) {
+                fprintf(stderr, "mmap failed: %p\n", mem->cpu);
+		assert(0);
+	}
+
+        /* Record the mmap if we're tracing */
+        if (!(extra_flags & PAN_ALLOCATE_GROWABLE))
+                pantrace_mmap(mem->gpu, mem->cpu, mem->size, NULL);
+}
+
+static void
+panfrost_drm_free_slab(struct panfrost_screen *screen, struct panfrost_memory *mem)
+{
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+	struct drm_gem_close gem_close = {
+		.handle = mem->gem_handle,
+	};
+	int ret;
+
+        if (munmap((void *) (uintptr_t) mem->cpu, mem->size)) {
+                perror("munmap");
+                abort();
+        }
+
+	mem->cpu = NULL;
+
+	ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+	if (ret) {
+                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
+		assert(0);
+	}
+
+	mem->gem_handle = -1;
+}
+
+static struct panfrost_bo *
+panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *whandle)
+{
+	struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
+	struct drm_panfrost_mmap_bo mmap_bo = {0,};
+        int ret, size;
+        unsigned gem_handle;
+
+	ret = drmPrimeFDToHandle(drm->fd, whandle->handle, &gem_handle);
+	assert(!ret);
+
+	get_bo_offset.handle = gem_handle;
+        ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
+        assert(!ret);
+
+	bo->gem_handle = gem_handle;
+        bo->gpu[0] = (mali_ptr) get_bo_offset.offset;
+
+	// TODO map and unmap on demand?
+	mmap_bo.handle = gem_handle;
+	ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
+	if (ret) {
+                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret);
+		assert(0);
+	}
+
+	size = lseek(whandle->handle, 0, SEEK_END);
+	assert(size > 0);
+        bo->cpu[0] = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       drm->fd, mmap_bo.offset);
+        if (bo->cpu[0] == MAP_FAILED) {
+                fprintf(stderr, "mmap failed: %p\n", bo->cpu[0]);
+		assert(0);
+	}
+
+        /* Record the mmap if we're tracing */
+        pantrace_mmap(bo->gpu[0], bo->cpu[0], size, NULL);
+
+        return bo;
+}
+
+static int
+panfrost_drm_export_bo(struct panfrost_screen *screen, int gem_handle, struct winsys_handle *whandle)
+{
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct drm_prime_handle args = {
+                .handle = gem_handle,
+                .flags = DRM_CLOEXEC,
+        };
+
+        int ret = drmIoctl(drm->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+        if (ret == -1)
+                return FALSE;
+
+        whandle->handle = args.fd;
+
+        return TRUE;
+}
+
+static void
+panfrost_drm_free_imported_bo(struct panfrost_screen *screen, struct panfrost_bo *bo) 
+{
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+	struct drm_gem_close gem_close = {
+		.handle = bo->gem_handle,
+	};
+	int ret;
+
+	ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+	if (ret) {
+                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
+		assert(0);
+	}
+
+	bo->gem_handle = -1;
+	bo->gpu[0] = (mali_ptr)NULL;
+}
+
+static int
+panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, struct pipe_surface *surf)
+{
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(gallium->screen);
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct drm_panfrost_submit submit = {0,};
+
+        submit.in_syncs = &ctx->out_sync;
+        submit.in_sync_count = 1;
+
+        submit.out_sync = ctx->out_sync;
+
+	submit.jc = job_desc;
+	submit.requirements = reqs;
+
+	if (surf) {
+		struct panfrost_resource *res = pan_resource(surf->texture);
+		submit.bo_handles = (u64) &res->bo->gem_handle;
+		submit.bo_handle_count = 1;
+	}
+
+        /* Dump memory _before_ submitting so we're not corrupted with actual GPU results */
+        pantrace_dump_memory();
+
+	if (drmIoctl(drm->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit)) {
+	        fprintf(stderr, "Error submitting: %m\n");
+	        return errno;
+	}
+
+        /* Trace the job if we're doing that and do a memory dump. We may
+         * want to adjust this logic once we're ready to trace FBOs */
+        pantrace_submit_job(submit.jc, submit.requirements, FALSE);
+
+	return 0;
+}
+
+static int
+panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool is_scanout)
+{
+        struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
+	int ret;
+
+        if (has_draws) {
+		ret = panfrost_drm_submit_job(ctx, ctx->set_value_job, 0, NULL);
+		assert(!ret);
+	}
+
+	ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx), PANFROST_JD_REQ_FS, surf);
+	assert(!ret);
+
+        return 0;
+}
+
+static struct panfrost_fence *
+panfrost_fence_create(struct panfrost_context *ctx)
+{
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(gallium->screen);
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct panfrost_fence *f = calloc(1, sizeof(*f));
+        if (!f)
+                return NULL;
+
+        /* Snapshot the last Panfrost's rendering's out fence.  We'd rather have
+         * another syncobj instead of a sync file, but this is all we get.
+         * (HandleToFD/FDToHandle just gives you another syncobj ID for the
+         * same syncobj).
+         */
+        drmSyncobjExportSyncFile(drm->fd, ctx->out_sync, &f->fd);
+        if (f->fd == -1) {
+                fprintf(stderr, "export failed\n");
+                free(f);
+                return NULL;
+        }
+
+        pipe_reference_init(&f->reference, 1);
+
+        return f;
+}
+
+static void
+panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
+				  struct pipe_fence_handle **fence)
+{
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+
+        if (fence) {
+                struct panfrost_fence *f = panfrost_fence_create(ctx);
+                gallium->screen->fence_reference(gallium->screen, fence, NULL);
+                *fence = (struct pipe_fence_handle *)f;
+        }
+}
+
+static void
+panfrost_drm_enable_counters(struct panfrost_screen *screen)
+{
+	fprintf(stderr, "unimplemented: %s\n", __func__);
+}
+
+static void
+panfrost_drm_dump_counters(struct panfrost_screen *screen)
+{
+	fprintf(stderr, "unimplemented: %s\n", __func__);
+}
+
+static unsigned
+panfrost_drm_query_gpu_version(struct panfrost_screen *screen)
+{
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct drm_panfrost_get_param get_param = {0,};
+        int ret;
+
+	get_param.param = DRM_PANFROST_PARAM_GPU_ID;
+        ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
+        assert(!ret);
+
+	return get_param.value;
+}
+
+static int
+panfrost_drm_init_context(struct panfrost_context *ctx)
+{
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(gallium->screen);
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+
+        return drmSyncobjCreate(drm->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+                                &ctx->out_sync);
+}
+
+static void
+panfrost_drm_fence_reference(struct pipe_screen *screen,
+                         struct pipe_fence_handle **ptr,
+                         struct pipe_fence_handle *fence)
+{
+        struct panfrost_fence **p = (struct panfrost_fence **)ptr;
+        struct panfrost_fence *f = (struct panfrost_fence *)fence;
+        struct panfrost_fence *old = *p;
+
+        if (pipe_reference(&(*p)->reference, &f->reference)) {
+                close(old->fd);
+                free(old);
+        }
+        *p = f;
+}
+
+static boolean
+panfrost_drm_fence_finish(struct pipe_screen *pscreen,
+                      struct pipe_context *ctx,
+                      struct pipe_fence_handle *fence,
+                      uint64_t timeout)
+{
+        struct panfrost_screen *screen = pan_screen(pscreen);
+	struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver;
+        struct panfrost_fence *f = (struct panfrost_fence *)fence;
+        int ret;
+
+        unsigned syncobj;
+        ret = drmSyncobjCreate(drm->fd, 0, &syncobj);
+        if (ret) {
+                fprintf(stderr, "Failed to create syncobj to wait on: %m\n");
+                return false;
+        }
+
+        drmSyncobjImportSyncFile(drm->fd, syncobj, f->fd);
+        if (ret) {
+                fprintf(stderr, "Failed to import fence to syncobj: %m\n");
+                return false;
+        }
+
+        uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+        if (abs_timeout == OS_TIMEOUT_INFINITE)
+                abs_timeout = INT64_MAX;
+
+        ret = drmSyncobjWait(drm->fd, &syncobj, 1, abs_timeout, 0, NULL);
+
+        drmSyncobjDestroy(drm->fd, syncobj);
+
+        return ret >= 0;
+}
+
 struct panfrost_driver *
 panfrost_create_drm_driver(int fd)
 {
@@ -38,5 +387,19 @@  panfrost_create_drm_driver(int fd)
 
 	driver->fd = fd;
 
+	driver->base.import_bo = panfrost_drm_import_bo;
+	driver->base.export_bo = panfrost_drm_export_bo;
+	driver->base.free_imported_bo = panfrost_drm_free_imported_bo;
+	driver->base.submit_vs_fs_job = panfrost_drm_submit_vs_fs_job;
+	driver->base.force_flush_fragment = panfrost_drm_force_flush_fragment;
+	driver->base.allocate_slab = panfrost_drm_allocate_slab;
+	driver->base.free_slab = panfrost_drm_free_slab;
+	driver->base.enable_counters = panfrost_drm_enable_counters;
+	driver->base.query_gpu_version = panfrost_drm_query_gpu_version;
+	driver->base.init_context = panfrost_drm_init_context;
+	driver->base.fence_reference = panfrost_drm_fence_reference;
+	driver->base.fence_finish = panfrost_drm_fence_finish;
+	driver->base.dump_counters = panfrost_drm_dump_counters;
+
         return &driver->base;
 }

Comments


On Fri, Mar 8, 2019 at 3:59 PM Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
>
> > +/**
> > + * struct drm_panfrost_wait_bo - ioctl argument for waiting for
> > + * completion of the last DRM_PANFROST_SUBMIT_CL on a BO.
>
> Nit: Should be plain DRM_PANFROST_SUBMIT, there is no CL for us.
>
> > +     __s64 timeout_ns;       /* absolute */
>
> Erm, why is this signed? Semantically, what does a negative timestamp
> mean? Seems suspect. The comment /* absolute */ seems to underscore that
> we really do want an unsigned value, perhaps ascribing a special meaning
> to 0/~0 for "nonblocking" and "block indefinitely" if needed. Of course,
> "(2^64)-1 ns" is essentially indefinite, so the latter need not be a
> special case.

Signed is convention and used internally in the kernel (ktime_t). I
checked that signed is correct with Arnd Bergmann who is leading the
2038 work.

> * It's 585 years, according to a back of the envelope calculation.
>   Panfrost will be obsolete many times over by the time that timeout
>   elapses ;)

I'm pretty sure it will be obsolete in only 240 years too. :)

Rob
> I'm pretty sure it will be obsolete in only 240 years too. :)

Ecclesiastes 1:9, alas :P