[libdrm,4/9] amdgpu: Add struct amdgpu_core_device and amdgpu_core_bo

Submitted by Michel Dänzer on June 24, 2019, 4:54 p.m.

Details

Message ID 20190624165406.13682-5-michel@daenzer.net
State Rejected
Headers show
Series "amdgpu:" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Michel Dänzer June 24, 2019, 4:54 p.m.
From: Michel Dänzer <michel.daenzer@amd.com>

They can be referenced by any number of struct amdgpu_device/bo, which
are used for amdgpu_device/bo_handle in the public API.

This allows keeping track of the DRM file descriptor passed to
amdgpu_device_initialize and the one used for CS submission etc.
separately. The core structs hold the information relevant for the
latter.

Because we now always keep a duplicate of the file descriptor passed to
amdgpu_device_initialize, we can use that for flink, and we no longer
need to check its authentication status (flink could never be expected
to work after passing an unauthenticated file descriptor to
amdgpu_device_initialize).

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 amdgpu/amdgpu_asic_id.c       |   4 +-
 amdgpu/amdgpu_bo.c            | 251 ++++++++++++++++++++++------------
 amdgpu/amdgpu_cs.c            |  64 +++++----
 amdgpu/amdgpu_device.c        | 219 ++++++++++++-----------------
 amdgpu/amdgpu_gpu_info.c      |  35 ++---
 amdgpu/amdgpu_internal.h      |  27 ++--
 amdgpu/amdgpu_vamgr.c         |   9 +-
 amdgpu/amdgpu_vm.c            |   4 +-
 tests/amdgpu/amdgpu_test.c    |   2 +-
 tests/amdgpu/bo_tests.c       |   2 +-
 tests/amdgpu/cs_tests.c       |   8 +-
 tests/amdgpu/deadlock_tests.c |   8 +-
 tests/amdgpu/uvd_enc_tests.c  |   2 +-
 tests/amdgpu/vce_tests.c      |  12 +-
 tests/amdgpu/vcn_tests.c      |   4 +-
 tests/amdgpu/vm_tests.c       |   2 +-
 16 files changed, 360 insertions(+), 293 deletions(-)

Patch hide | download patch | download mbox

diff --git a/amdgpu/amdgpu_asic_id.c b/amdgpu/amdgpu_asic_id.c
index a5007ffc..356c8a59 100644
--- a/amdgpu/amdgpu_asic_id.c
+++ b/amdgpu/amdgpu_asic_id.c
@@ -34,7 +34,7 @@ 
 #include "amdgpu_drm.h"
 #include "amdgpu_internal.h"
 
-static int parse_one_line(struct amdgpu_device *dev, const char *line)
+static int parse_one_line(struct amdgpu_core_device *dev, const char *line)
 {
 	char *buf, *saveptr;
 	char *s_did;
@@ -104,7 +104,7 @@  out:
 	return r;
 }
 
-void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
+void amdgpu_parse_asic_ids(struct amdgpu_core_device *dev)
 {
 	FILE *fp;
 	char *line = NULL;
diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 5bdb8fe8..7fec1f15 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -47,15 +47,15 @@  static int amdgpu_close_kms_handle(int fd, uint32_t handle)
 	return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &args);
 }
 
-static int amdgpu_bo_create(amdgpu_device_handle dev,
-			    uint64_t size,
-			    uint32_t handle,
-			    amdgpu_bo_handle *buf_handle)
+static int amdgpu_core_bo_create(struct amdgpu_core_device *dev,
+				 uint64_t size,
+				 uint32_t handle,
+				 struct amdgpu_core_bo **out_bo)
 {
-	struct amdgpu_bo *bo;
+	struct amdgpu_core_bo *bo;
 	int r;
 
-	bo = calloc(1, sizeof(struct amdgpu_bo));
+	bo = calloc(1, sizeof(struct amdgpu_core_bo));
 	if (!bo)
 		return -ENOMEM;
 
@@ -66,19 +66,64 @@  static int amdgpu_bo_create(amdgpu_device_handle dev,
 	}
 
 	atomic_set(&bo->refcount, 1);
-	bo->dev = dev;
 	bo->alloc_size = size;
 	bo->handle = handle;
 	pthread_mutex_init(&bo->cpu_access_mutex, NULL);
 
-	*buf_handle = bo;
+	*out_bo = bo;
 	return 0;
 }
 
-drm_public int amdgpu_bo_alloc(amdgpu_device_handle dev,
+static int amdgpu_bo_create(amdgpu_device_handle user_dev,
+			    uint64_t size,
+			    uint32_t handle,
+			    amdgpu_bo_handle *buf_handle)
+{
+	struct amdgpu_core_device *dev = user_dev->core;
+	struct amdgpu_bo *user_bo = NULL;
+	struct amdgpu_core_bo *bo;
+	int r;
+
+	bo = handle_table_lookup(&dev->bo_handles, handle);
+
+	if (bo) {
+		for (user_bo = bo->user_bos; user_bo; user_bo = user_bo->next) {
+			if (user_bo->dev == user_dev) {
+				/* Re-use existing buffer */
+				atomic_inc(&user_bo->refcount);
+				r = 0;
+				goto out;
+			}
+		}
+		atomic_inc(&bo->refcount);
+	} else {
+		r = amdgpu_core_bo_create(dev, size, handle, &bo);
+		if (r)
+			goto out;
+	}
+
+	user_bo = calloc(1, sizeof(struct amdgpu_bo));
+	if (!user_bo) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	atomic_set(&user_bo->refcount, 1);
+	user_bo->next = bo->user_bos;
+	bo->user_bos = user_bo;
+	user_bo->core = bo;
+	user_bo->dev = user_dev;
+
+out:
+	*buf_handle = user_bo;
+	return r;
+}
+
+drm_public int amdgpu_bo_alloc(amdgpu_device_handle user_dev,
 			       struct amdgpu_bo_alloc_request *alloc_buffer,
 			       amdgpu_bo_handle *buf_handle)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	union drm_amdgpu_gem_create args;
 	int r;
 
@@ -97,8 +142,8 @@  drm_public int amdgpu_bo_alloc(amdgpu_device_handle dev,
 		goto out;
 
 	pthread_mutex_lock(&dev->bo_table_mutex);
-	r = amdgpu_bo_create(dev, alloc_buffer->alloc_size, args.out.handle,
-			     buf_handle);
+	r = amdgpu_bo_create(user_dev, alloc_buffer->alloc_size,
+			     args.out.handle, buf_handle);
 	pthread_mutex_unlock(&dev->bo_table_mutex);
 	if (r) {
 		amdgpu_close_kms_handle(dev->fd, args.out.handle);
@@ -108,9 +153,10 @@  out:
 	return r;
 }
 
-drm_public int amdgpu_bo_set_metadata(amdgpu_bo_handle bo,
+drm_public int amdgpu_bo_set_metadata(amdgpu_bo_handle user_bo,
 				      struct amdgpu_bo_metadata *info)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
 	struct drm_amdgpu_gem_metadata args = {};
 
 	args.handle = bo->handle;
@@ -126,17 +172,19 @@  drm_public int amdgpu_bo_set_metadata(amdgpu_bo_handle bo,
 		memcpy(args.data.data, info->umd_metadata, info->size_metadata);
 	}
 
-	return drmCommandWriteRead(bo->dev->fd,
+	return drmCommandWriteRead(user_bo->dev->core->fd,
 				   DRM_AMDGPU_GEM_METADATA,
 				   &args, sizeof(args));
 }
 
-drm_public int amdgpu_bo_query_info(amdgpu_bo_handle bo,
+drm_public int amdgpu_bo_query_info(amdgpu_bo_handle user_bo,
 				    struct amdgpu_bo_info *info)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
 	struct drm_amdgpu_gem_metadata metadata = {};
 	struct drm_amdgpu_gem_create_in bo_info = {};
 	struct drm_amdgpu_gem_op gem_op = {};
+	int fd = user_bo->dev->core->fd;
 	int r;
 
 	/* Validate the BO passed in */
@@ -147,8 +195,8 @@  drm_public int amdgpu_bo_query_info(amdgpu_bo_handle bo,
 	metadata.handle = bo->handle;
 	metadata.op = AMDGPU_GEM_METADATA_OP_GET_METADATA;
 
-	r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_METADATA,
-				&metadata, sizeof(metadata));
+	r = drmCommandWriteRead(fd, DRM_AMDGPU_GEM_METADATA, &metadata,
+				sizeof(metadata));
 	if (r)
 		return r;
 
@@ -161,8 +209,7 @@  drm_public int amdgpu_bo_query_info(amdgpu_bo_handle bo,
 	gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO;
 	gem_op.value = (uintptr_t)&bo_info;
 
-	r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_OP,
-				&gem_op, sizeof(gem_op));
+	r = drmCommandWriteRead(fd, DRM_AMDGPU_GEM_OP, &gem_op, sizeof(gem_op));
 	if (r)
 		return r;
 
@@ -182,29 +229,30 @@  drm_public int amdgpu_bo_query_info(amdgpu_bo_handle bo,
 	return 0;
 }
 
-static int amdgpu_bo_export_flink(amdgpu_bo_handle bo)
+static int amdgpu_bo_export_flink(amdgpu_bo_handle user_bo)
 {
+	struct amdgpu_core_device *dev = user_bo->dev->core;
+	struct amdgpu_core_bo *bo = user_bo->core;
+	int user_fd = user_bo->dev->user_fd;
 	struct drm_gem_flink flink;
 	int fd, dma_fd;
 	uint32_t handle;
 	int r;
 
-	fd = bo->dev->fd;
+	fd = dev->fd;
 	handle = bo->handle;
 	if (bo->flink_name)
 		return 0;
 
-
-	if (bo->dev->flink_fd != bo->dev->fd) {
-		r = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
-				       &dma_fd);
+	if (user_fd != fd) {
+		r = drmPrimeHandleToFD(fd, bo->handle, DRM_CLOEXEC, &dma_fd);
 		if (!r) {
-			r = drmPrimeFDToHandle(bo->dev->flink_fd, dma_fd, &handle);
+			r = drmPrimeFDToHandle(user_fd, dma_fd, &handle);
 			close(dma_fd);
 		}
 		if (r)
 			return r;
-		fd = bo->dev->flink_fd;
+		fd = user_fd;
 	}
 	memset(&flink, 0, sizeof(flink));
 	flink.handle = handle;
@@ -215,25 +263,26 @@  static int amdgpu_bo_export_flink(amdgpu_bo_handle bo)
 
 	bo->flink_name = flink.name;
 
-	if (bo->dev->flink_fd != bo->dev->fd)
-		amdgpu_close_kms_handle(bo->dev->flink_fd, handle);
+	if (user_fd != dev->fd)
+		amdgpu_close_kms_handle(user_fd, handle);
 
-	pthread_mutex_lock(&bo->dev->bo_table_mutex);
-	r = handle_table_insert(&bo->dev->bo_flink_names, bo->flink_name, bo);
-	pthread_mutex_unlock(&bo->dev->bo_table_mutex);
+	pthread_mutex_lock(&dev->bo_table_mutex);
+	r = handle_table_insert(&dev->bo_flink_names, bo->flink_name, bo);
+	pthread_mutex_unlock(&dev->bo_table_mutex);
 
 	return r;
 }
 
-drm_public int amdgpu_bo_export(amdgpu_bo_handle bo,
+drm_public int amdgpu_bo_export(amdgpu_bo_handle user_bo,
 				enum amdgpu_bo_handle_type type,
 				uint32_t *shared_handle)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
 	int r;
 
 	switch (type) {
 	case amdgpu_bo_handle_type_gem_flink_name:
-		r = amdgpu_bo_export_flink(bo);
+		r = amdgpu_bo_export_flink(user_bo);
 		if (r)
 			return r;
 
@@ -246,21 +295,24 @@  drm_public int amdgpu_bo_export(amdgpu_bo_handle bo,
 		return 0;
 
 	case amdgpu_bo_handle_type_dma_buf_fd:
-		return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
+		return drmPrimeHandleToFD(user_bo->dev->core->fd, bo->handle,
 					  DRM_CLOEXEC | DRM_RDWR,
 					  (int*)shared_handle);
 	}
 	return -EINVAL;
 }
 
-drm_public int amdgpu_bo_import(amdgpu_device_handle dev,
+drm_public int amdgpu_bo_import(amdgpu_device_handle user_dev,
 				enum amdgpu_bo_handle_type type,
 				uint32_t shared_handle,
-		     struct amdgpu_bo_import_result *output)
+				struct amdgpu_bo_import_result *output)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	struct drm_gem_open open_arg = {};
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *user_bo = NULL;
 	uint32_t handle = 0, flink_name = 0;
+	int user_fd = user_dev->user_fd;
+	struct amdgpu_core_bo *bo;
 	uint64_t alloc_size = 0;
 	int r = 0;
 	int dma_fd;
@@ -313,37 +365,32 @@  drm_public int amdgpu_bo_import(amdgpu_device_handle dev,
 	}
 
 	if (bo) {
-		/* The buffer already exists, just bump the refcount. */
-		atomic_inc(&bo->refcount);
-		pthread_mutex_unlock(&dev->bo_table_mutex);
-
-		output->buf_handle = bo;
-		output->alloc_size = bo->alloc_size;
-		return 0;
+		handle = bo->handle;
+		alloc_size = bo->alloc_size;
+		goto bo_create;
 	}
 
 	/* Open the handle. */
 	switch (type) {
 	case amdgpu_bo_handle_type_gem_flink_name:
 		open_arg.name = shared_handle;
-		r = drmIoctl(dev->flink_fd, DRM_IOCTL_GEM_OPEN, &open_arg);
+		r = drmIoctl(user_fd, DRM_IOCTL_GEM_OPEN, &open_arg);
 		if (r)
 			goto unlock;
 
 		flink_name = shared_handle;
 		handle = open_arg.handle;
 		alloc_size = open_arg.size;
-		if (dev->flink_fd != dev->fd) {
-			r = drmPrimeHandleToFD(dev->flink_fd, handle,
-					       DRM_CLOEXEC, &dma_fd);
+		if (user_fd != dev->fd) {
+			r = drmPrimeHandleToFD(user_fd, handle, DRM_CLOEXEC,
+					       &dma_fd);
 			if (r)
 				goto free_bo_handle;
 			r = drmPrimeFDToHandle(dev->fd, dma_fd, &handle);
 			close(dma_fd);
 			if (r)
 				goto free_bo_handle;
-			r = amdgpu_close_kms_handle(dev->flink_fd,
-						    open_arg.handle);
+			r = amdgpu_close_kms_handle(user_fd, open_arg.handle);
 			if (r)
 				goto free_bo_handle;
 		}
@@ -360,11 +407,13 @@  drm_public int amdgpu_bo_import(amdgpu_device_handle dev,
 		assert(0); /* unreachable */
 	}
 
+bo_create:
 	/* Initialize it. */
-	r = amdgpu_bo_create(dev, alloc_size, handle, &bo);
+	r = amdgpu_bo_create(user_dev, alloc_size, handle, &user_bo);
 	if (r)
 		goto free_bo_handle;
 
+	bo = user_bo->core;
 	if (flink_name) {
 		bo->flink_name = flink_name;
 		r = handle_table_insert(&dev->bo_flink_names, flink_name,
@@ -374,17 +423,17 @@  drm_public int amdgpu_bo_import(amdgpu_device_handle dev,
 
 	}
 
-	output->buf_handle = bo;
+	output->buf_handle = user_bo;
 	output->alloc_size = bo->alloc_size;
 	pthread_mutex_unlock(&dev->bo_table_mutex);
 	return 0;
 
 free_bo_handle:
 	if (flink_name && open_arg.handle)
-		amdgpu_close_kms_handle(dev->flink_fd, open_arg.handle);
+		amdgpu_close_kms_handle(user_fd, open_arg.handle);
 
-	if (bo)
-		amdgpu_bo_free(bo);
+	if (user_bo)
+		amdgpu_bo_free(user_bo);
 	else
 		amdgpu_close_kms_handle(dev->fd, handle);
 unlock:
@@ -392,14 +441,10 @@  unlock:
 	return r;
 }
 
-drm_public int amdgpu_bo_free(amdgpu_bo_handle buf_handle)
+static void amdgpu_core_bo_free(struct amdgpu_bo *user_bo)
 {
-	struct amdgpu_device *dev;
-	struct amdgpu_bo *bo = buf_handle;
-
-	assert(bo != NULL);
-	dev = bo->dev;
-	pthread_mutex_lock(&dev->bo_table_mutex);
+	struct amdgpu_core_device *dev = user_bo->dev->core;
+	struct amdgpu_core_bo *bo = user_bo->core;
 
 	if (update_references(&bo->refcount, NULL)) {
 		/* Remove the buffer from the hash tables. */
@@ -412,12 +457,39 @@  drm_public int amdgpu_bo_free(amdgpu_bo_handle buf_handle)
 		/* Release CPU access. */
 		if (bo->cpu_map_count > 0) {
 			bo->cpu_map_count = 1;
-			amdgpu_bo_cpu_unmap(bo);
+			amdgpu_bo_cpu_unmap(user_bo);
 		}
 
 		amdgpu_close_kms_handle(dev->fd, bo->handle);
 		pthread_mutex_destroy(&bo->cpu_access_mutex);
 		free(bo);
+	} else if (bo->user_bos == user_bo) {
+		bo->user_bos = user_bo->next;
+	} else {
+		struct amdgpu_bo *iter;
+
+		for (iter = bo->user_bos; iter->next; iter = iter->next) {
+			if (iter->next == user_bo) {
+				iter->next = user_bo->next;
+				break;
+			}
+		}
+	}
+}
+
+drm_public int amdgpu_bo_free(amdgpu_bo_handle buf_handle)
+{
+	struct amdgpu_bo *user_bo = buf_handle;
+	struct amdgpu_core_device *dev;
+
+	assert(user_bo != NULL);
+	dev = user_bo->dev->core;
+
+	pthread_mutex_lock(&dev->bo_table_mutex);
+
+	if (update_references(&user_bo->refcount, NULL)) {
+		amdgpu_core_bo_free(user_bo);
+		free(user_bo);
 	}
 
 	pthread_mutex_unlock(&dev->bo_table_mutex);
@@ -430,8 +502,10 @@  drm_public void amdgpu_bo_inc_ref(amdgpu_bo_handle bo)
 	atomic_inc(&bo->refcount);
 }
 
-drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
+drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle user_bo, void **cpu)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
+	int fd = user_bo->dev->core->fd;
 	union drm_amdgpu_gem_mmap args;
 	void *ptr;
 	int r;
@@ -455,8 +529,7 @@  drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
 	 * The kernel driver ignores the offset and size parameters. */
 	args.in.handle = bo->handle;
 
-	r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args,
-				sizeof(args));
+	r = drmCommandWriteRead(fd, DRM_AMDGPU_GEM_MMAP, &args, sizeof(args));
 	if (r) {
 		pthread_mutex_unlock(&bo->cpu_access_mutex);
 		return r;
@@ -464,7 +537,7 @@  drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
 
 	/* Map the buffer. */
 	ptr = drm_mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED,
-		       bo->dev->fd, args.out.addr_ptr);
+		       fd, args.out.addr_ptr);
 	if (ptr == MAP_FAILED) {
 		pthread_mutex_unlock(&bo->cpu_access_mutex);
 		return -errno;
@@ -478,8 +551,9 @@  drm_public int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
 	return 0;
 }
 
-drm_public int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo)
+drm_public int amdgpu_bo_cpu_unmap(amdgpu_bo_handle user_bo)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
 	int r;
 
 	pthread_mutex_lock(&bo->cpu_access_mutex);
@@ -504,18 +578,21 @@  drm_public int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo)
 	return r;
 }
 
-drm_public int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev,
+drm_public int amdgpu_query_buffer_size_alignment(amdgpu_device_handle user_dev,
 				struct amdgpu_buffer_size_alignments *info)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
+
 	info->size_local = dev->dev_info.pte_fragment_size;
 	info->size_remote = dev->dev_info.gart_page_size;
 	return 0;
 }
 
-drm_public int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo,
+drm_public int amdgpu_bo_wait_for_idle(amdgpu_bo_handle user_bo,
 				       uint64_t timeout_ns,
 			    bool *busy)
 {
+	struct amdgpu_core_bo *bo = user_bo->core;
 	union drm_amdgpu_gem_wait_idle args;
 	int r;
 
@@ -523,7 +600,7 @@  drm_public int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo,
 	args.in.handle = bo->handle;
 	args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
 
-	r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE,
+	r = drmCommandWriteRead(user_bo->dev->core->fd, DRM_AMDGPU_GEM_WAIT_IDLE,
 				&args, sizeof(args));
 
 	if (r == 0) {
@@ -535,13 +612,14 @@  drm_public int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo,
 	}
 }
 
-drm_public int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
+drm_public int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle user_dev,
 					     void *cpu,
 					     uint64_t size,
 					     amdgpu_bo_handle *buf_handle,
 					     uint64_t *offset_in_bo)
 {
-	struct amdgpu_bo *bo;
+	struct amdgpu_core_device *dev = user_dev->core;
+	struct amdgpu_core_bo *bo;
 	uint32_t i;
 	int r = 0;
 
@@ -564,8 +642,8 @@  drm_public int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
 	}
 
 	if (i < dev->bo_handles.max_key) {
-		atomic_inc(&bo->refcount);
-		*buf_handle = bo;
+		r = amdgpu_bo_create(user_dev, bo->alloc_size, bo->handle,
+				     buf_handle);
 		*offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr;
 	} else {
 		*buf_handle = NULL;
@@ -577,11 +655,12 @@  drm_public int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
 	return r;
 }
 
-drm_public int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
+drm_public int amdgpu_create_bo_from_user_mem(amdgpu_device_handle user_dev,
 					      void *cpu,
 					      uint64_t size,
 					      amdgpu_bo_handle *buf_handle)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	int r;
 	struct drm_amdgpu_gem_userptr args;
 
@@ -595,7 +674,7 @@  drm_public int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
 		goto out;
 
 	pthread_mutex_lock(&dev->bo_table_mutex);
-	r = amdgpu_bo_create(dev, size, args.handle, buf_handle);
+	r = amdgpu_bo_create(user_dev, size, args.handle, buf_handle);
 	pthread_mutex_unlock(&dev->bo_table_mutex);
 	if (r) {
 		amdgpu_close_kms_handle(dev->fd, args.handle);
@@ -605,11 +684,12 @@  out:
 	return r;
 }
 
-drm_public int amdgpu_bo_list_create_raw(amdgpu_device_handle dev,
+drm_public int amdgpu_bo_list_create_raw(amdgpu_device_handle user_dev,
 					 uint32_t number_of_buffers,
 					 struct drm_amdgpu_bo_list_entry *buffers,
 					 uint32_t *result)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	union drm_amdgpu_bo_list args;
 	int r;
 
@@ -626,9 +706,10 @@  drm_public int amdgpu_bo_list_create_raw(amdgpu_device_handle dev,
 	return r;
 }
 
-drm_public int amdgpu_bo_list_destroy_raw(amdgpu_device_handle dev,
+drm_public int amdgpu_bo_list_destroy_raw(amdgpu_device_handle user_dev,
 					  uint32_t bo_list)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	union drm_amdgpu_bo_list args;
 
 	memset(&args, 0, sizeof(args));
@@ -639,12 +720,13 @@  drm_public int amdgpu_bo_list_destroy_raw(amdgpu_device_handle dev,
 				   &args, sizeof(args));
 }
 
-drm_public int amdgpu_bo_list_create(amdgpu_device_handle dev,
+drm_public int amdgpu_bo_list_create(amdgpu_device_handle user_dev,
 				     uint32_t number_of_resources,
 				     amdgpu_bo_handle *resources,
 				     uint8_t *resource_prios,
 				     amdgpu_bo_list_handle *result)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	struct drm_amdgpu_bo_list_entry *list;
 	union drm_amdgpu_bo_list args;
 	unsigned i;
@@ -674,7 +756,7 @@  drm_public int amdgpu_bo_list_create(amdgpu_device_handle dev,
 	args.in.bo_info_ptr = (uint64_t)(uintptr_t)list;
 
 	for (i = 0; i < number_of_resources; i++) {
-		list[i].bo_handle = resources[i]->handle;
+		list[i].bo_handle = resources[i]->core->handle;
 		if (resource_prios)
 			list[i].bo_priority = resource_prios[i];
 		else
@@ -740,7 +822,7 @@  drm_public int amdgpu_bo_list_update(amdgpu_bo_list_handle handle,
 	args.in.bo_info_ptr = (uintptr_t)list;
 
 	for (i = 0; i < number_of_resources; i++) {
-		list[i].bo_handle = resources[i]->handle;
+		list[i].bo_handle = resources[i]->core->handle;
 		if (resource_prios)
 			list[i].bo_priority = resource_prios[i];
 		else
@@ -770,7 +852,7 @@  drm_public int amdgpu_bo_va_op(amdgpu_bo_handle bo,
 				   AMDGPU_VM_PAGE_EXECUTABLE, ops);
 }
 
-drm_public int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
+drm_public int amdgpu_bo_va_op_raw(amdgpu_device_handle user_dev,
 				   amdgpu_bo_handle bo,
 				   uint64_t offset,
 				   uint64_t size,
@@ -778,6 +860,7 @@  drm_public int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
 				   uint64_t flags,
 				   uint32_t ops)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	struct drm_amdgpu_gem_va va;
 	int r;
 
@@ -786,7 +869,7 @@  drm_public int amdgpu_bo_va_op_raw(amdgpu_device_handle dev,
 		return -EINVAL;
 
 	memset(&va, 0, sizeof(va));
-	va.handle = bo ? bo->handle : 0;
+	va.handle = bo ? bo->core->handle : 0;
 	va.operation = ops;
 	va.flags = flags;
 	va.va_address = addr;
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index 20d5aef2..98130105 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -48,22 +48,24 @@  static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
  *
  * \return  0 on success otherwise POSIX Error code
 */
-drm_public int amdgpu_cs_ctx_create2(amdgpu_device_handle dev,
+drm_public int amdgpu_cs_ctx_create2(amdgpu_device_handle user_dev,
 				     uint32_t priority,
 				     amdgpu_context_handle *context)
 {
 	struct amdgpu_context *gpu_context;
+	struct amdgpu_core_device *dev;
 	union drm_amdgpu_ctx args;
 	int i, j, k;
 	int r;
 
-	if (!dev || !context)
+	if (!user_dev || !context)
 		return -EINVAL;
 
 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
 	if (!gpu_context)
 		return -ENOMEM;
 
+	dev = user_dev->core;
 	gpu_context->dev = dev;
 
 	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
@@ -156,7 +158,7 @@  drm_public int amdgpu_cs_ctx_override_priority(amdgpu_device_handle dev,
 	memset(&args, 0, sizeof(args));
 
 	args.in.op = AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE;
-	args.in.fd = dev->fd;
+	args.in.fd = dev->core->fd;
 	args.in.priority = priority;
 	args.in.ctx_id = context->id;
 
@@ -269,7 +271,7 @@  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 
 		/* fence bo handle */
-		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
+		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->core->handle;
 		/* offset */
 		chunk_data[i].fence_data.offset = 
 			ibs_request->fence_info.offset * sizeof(uint64_t);
@@ -409,7 +411,7 @@  static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
 				uint64_t flags,
 				bool *busy)
 {
-	amdgpu_device_handle dev = context->dev;
+	struct amdgpu_core_device *dev = context->dev;
 	union drm_amdgpu_wait_cs args;
 	int r;
 
@@ -471,8 +473,8 @@  static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
 				    uint32_t *status,
 				    uint32_t *first)
 {
+	struct amdgpu_core_device *dev = fences[0].context->dev;
 	struct drm_amdgpu_fence *drm_fences;
-	amdgpu_device_handle dev = fences[0].context->dev;
 	union drm_amdgpu_wait_fences args;
 	int r;
 	uint32_t i;
@@ -633,7 +635,7 @@  drm_public int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjCreate(dev->fd, flags, handle);
+	return drmSyncobjCreate(dev->core->fd, flags, handle);
 }
 
 drm_public int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
@@ -642,7 +644,7 @@  drm_public int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjCreate(dev->fd, 0, handle);
+	return drmSyncobjCreate(dev->core->fd, 0, handle);
 }
 
 drm_public int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
@@ -651,7 +653,7 @@  drm_public int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjDestroy(dev->fd, handle);
+	return drmSyncobjDestroy(dev->core->fd, handle);
 }
 
 drm_public int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
@@ -661,7 +663,7 @@  drm_public int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjReset(dev->fd, syncobjs, syncobj_count);
+	return drmSyncobjReset(dev->core->fd, syncobjs, syncobj_count);
 }
 
 drm_public int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
@@ -671,7 +673,7 @@  drm_public int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjSignal(dev->fd, syncobjs, syncobj_count);
+	return drmSyncobjSignal(dev->core->fd, syncobjs, syncobj_count);
 }
 
 drm_public int amdgpu_cs_syncobj_timeline_signal(amdgpu_device_handle dev,
@@ -682,7 +684,7 @@  drm_public int amdgpu_cs_syncobj_timeline_signal(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjTimelineSignal(dev->fd, syncobjs,
+	return drmSyncobjTimelineSignal(dev->core->fd, syncobjs,
 					points, syncobj_count);
 }
 
@@ -694,7 +696,7 @@  drm_public int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjWait(dev->fd, handles, num_handles, timeout_nsec,
+	return drmSyncobjWait(dev->core->fd, handles, num_handles, timeout_nsec,
 			      flags, first_signaled);
 }
 
@@ -707,7 +709,7 @@  drm_public int amdgpu_cs_syncobj_timeline_wait(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjTimelineWait(dev->fd, handles, points, num_handles,
+	return drmSyncobjTimelineWait(dev->core->fd, handles, points, num_handles,
 				      timeout_nsec, flags, first_signaled);
 }
 
@@ -718,7 +720,7 @@  drm_public int amdgpu_cs_syncobj_query(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjQuery(dev->fd, handles, points, num_handles);
+	return drmSyncobjQuery(dev->core->fd, handles, points, num_handles);
 }
 
 drm_public int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
@@ -728,7 +730,7 @@  drm_public int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
+	return drmSyncobjHandleToFD(dev->core->fd, handle, shared_fd);
 }
 
 drm_public int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
@@ -738,7 +740,7 @@  drm_public int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
+	return drmSyncobjFDToHandle(dev->core->fd, shared_fd, handle);
 }
 
 drm_public int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
@@ -748,7 +750,7 @@  drm_public int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
+	return drmSyncobjExportSyncFile(dev->core->fd, syncobj, sync_file_fd);
 }
 
 drm_public int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
@@ -758,21 +760,24 @@  drm_public int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
+	return drmSyncobjImportSyncFile(dev->core->fd, syncobj, sync_file_fd);
 }
 
-drm_public int amdgpu_cs_syncobj_export_sync_file2(amdgpu_device_handle dev,
+drm_public int amdgpu_cs_syncobj_export_sync_file2(amdgpu_device_handle user_dev,
 						   uint32_t syncobj,
 						   uint64_t point,
 						   uint32_t flags,
 						   int *sync_file_fd)
 {
+	struct amdgpu_core_device *dev;
 	uint32_t binary_handle;
 	int ret;
 
-	if (NULL == dev)
+	if (!user_dev)
 		return -EINVAL;
 
+	dev = user_dev->core;
+
 	if (!point)
 		return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
 
@@ -790,17 +795,20 @@  out:
 	return ret;
 }
 
-drm_public int amdgpu_cs_syncobj_import_sync_file2(amdgpu_device_handle dev,
+drm_public int amdgpu_cs_syncobj_import_sync_file2(amdgpu_device_handle user_dev,
 						   uint32_t syncobj,
 						   uint64_t point,
 						   int sync_file_fd)
 {
+	struct amdgpu_core_device *dev;
 	uint32_t binary_handle;
 	int ret;
 
-	if (NULL == dev)
+	if (!user_dev)
 		return -EINVAL;
 
+	dev = user_dev->core;
+
 	if (!point)
 		return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
 
@@ -827,7 +835,7 @@  drm_public int amdgpu_cs_syncobj_transfer(amdgpu_device_handle dev,
 	if (NULL == dev)
 		return -EINVAL;
 
-	return drmSyncobjTransfer(dev->fd,
+	return drmSyncobjTransfer(dev->core->fd,
 				  dst_handle, dst_point,
 				  src_handle, src_point,
 				  flags);
@@ -854,7 +862,7 @@  drm_public int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
 	cs.in.ctx_id = context->id;
 	cs.in.bo_list_handle = bo_list_handle ? bo_list_handle->handle : 0;
 	cs.in.num_chunks = num_chunks;
-	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
+	r = drmCommandWriteRead(dev->core->fd, DRM_AMDGPU_CS,
 				&cs, sizeof(cs));
 	if (r)
 		return r;
@@ -883,7 +891,7 @@  drm_public int amdgpu_cs_submit_raw2(amdgpu_device_handle dev,
 	cs.in.ctx_id = context->id;
 	cs.in.bo_list_handle = bo_list_handle;
 	cs.in.num_chunks = num_chunks;
-	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
+	r = drmCommandWriteRead(dev->core->fd, DRM_AMDGPU_CS,
 				&cs, sizeof(cs));
 	if (!r && seq_no)
 		*seq_no = cs.out.handle;
@@ -893,7 +901,7 @@  drm_public int amdgpu_cs_submit_raw2(amdgpu_device_handle dev,
 drm_public void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
 					struct drm_amdgpu_cs_chunk_data *data)
 {
-	data->fence_data.handle = fence_info->handle->handle;
+	data->fence_data.handle = fence_info->handle->core->handle;
 	data->fence_data.offset = fence_info->offset * sizeof(uint64_t);
 }
 
@@ -923,7 +931,7 @@  drm_public int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
 	fth.in.fence.seq_no = fence->fence;
 	fth.in.what = what;
 
-	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
+	r = drmCommandWriteRead(dev->core->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
 				&fth, sizeof(fth));
 	if (r == 0)
 		*out_handle = fth.out.handle;
diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c
index 76b4e5eb..abf5f942 100644
--- a/amdgpu/amdgpu_device.c
+++ b/amdgpu/amdgpu_device.c
@@ -44,7 +44,7 @@ 
 #define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
 
 static pthread_mutex_t dev_mutex = PTHREAD_MUTEX_INITIALIZER;
-static amdgpu_device_handle dev_list;
+static struct amdgpu_core_device *dev_list;
 
 static int fd_compare(int fd1, int fd2)
 {
@@ -65,47 +65,15 @@  static int fd_compare(int fd1, int fd2)
 	return result;
 }
 
-/**
-* Get the authenticated form fd,
-*
-* \param   fd   - \c [in]  File descriptor for AMD GPU device
-* \param   auth - \c [out] Pointer to output the fd is authenticated or not
-*                          A render node fd, output auth = 0
-*                          A legacy fd, get the authenticated for compatibility root
-*
-* \return   0 on success\n
-*          >0 - AMD specific error code\n
-*          <0 - Negative POSIX Error code
-*/
-static int amdgpu_get_auth(int fd, int *auth)
+static void amdgpu_device_free(struct amdgpu_core_device *dev)
 {
-	int r = 0;
-	drm_client_t client = {};
-
-	if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER)
-		*auth = 0;
-	else {
-		client.idx = 0;
-		r = drmIoctl(fd, DRM_IOCTL_GET_CLIENT, &client);
-		if (!r)
-			*auth = client.auth;
-	}
-	return r;
-}
+	struct amdgpu_core_device **node = &dev_list;
 
-static void amdgpu_device_free_internal(amdgpu_device_handle dev)
-{
-	amdgpu_device_handle *node = &dev_list;
-
-	pthread_mutex_lock(&dev_mutex);
 	while (*node != dev && (*node)->next)
 		node = &(*node)->next;
 	*node = (*node)->next;
-	pthread_mutex_unlock(&dev_mutex);
 
 	close(dev->fd);
-	if ((dev->flink_fd >= 0) && (dev->fd != dev->flink_fd))
-		close(dev->flink_fd);
 
 	amdgpu_vamgr_deinit(&dev->vamgr_32);
 	amdgpu_vamgr_deinit(&dev->vamgr);
@@ -118,87 +86,39 @@  static void amdgpu_device_free_internal(amdgpu_device_handle dev)
 	free(dev);
 }
 
-/**
- * Assignment between two amdgpu_device pointers with reference counting.
- *
- * Usage:
- *    struct amdgpu_device *dst = ... , *src = ...;
- *
- *    dst = src;
- *    // No reference counting. Only use this when you need to move
- *    // a reference from one pointer to another.
- *
- *    amdgpu_device_reference(&dst, src);
- *    // Reference counters are updated. dst is decremented and src is
- *    // incremented. dst is freed if its reference counter is 0.
- */
-static void amdgpu_device_reference(struct amdgpu_device **dst,
-				    struct amdgpu_device *src)
+static int amdgpu_device_init(amdgpu_device_handle user_dev)
 {
-	if (update_references(&(*dst)->refcount, &src->refcount))
-		amdgpu_device_free_internal(*dst);
-	*dst = src;
-}
-
-drm_public int amdgpu_device_initialize(int fd,
-					uint32_t *major_version,
-					uint32_t *minor_version,
-					amdgpu_device_handle *device_handle)
-{
-	struct amdgpu_device *dev;
+	struct amdgpu_core_device *dev;
 	drmVersionPtr version;
-	int r;
-	int flag_auth = 0;
-	int flag_authexist=0;
-	uint32_t accel_working = 0;
 	uint64_t start, max;
-
-	*device_handle = NULL;
-
-	pthread_mutex_lock(&dev_mutex);
-	r = amdgpu_get_auth(fd, &flag_auth);
-	if (r) {
-		fprintf(stderr, "%s: amdgpu_get_auth (1) failed (%i)\n",
-			__func__, r);
-		pthread_mutex_unlock(&dev_mutex);
-		return r;
-	}
+	int r;
 
 	for (dev = dev_list; dev; dev = dev->next)
-		if (fd_compare(dev->fd, fd) == 0)
+		if (fd_compare(dev->fd, user_dev->user_fd) == 0)
 			break;
 
 	if (dev) {
-		r = amdgpu_get_auth(dev->fd, &flag_authexist);
-		if (r) {
-			fprintf(stderr, "%s: amdgpu_get_auth (2) failed (%i)\n",
-				__func__, r);
-			pthread_mutex_unlock(&dev_mutex);
-			return r;
-		}
-		if ((flag_auth) && (!flag_authexist)) {
-			dev->flink_fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
-		}
-		*major_version = dev->major_version;
-		*minor_version = dev->minor_version;
-		amdgpu_device_reference(device_handle, dev);
-		pthread_mutex_unlock(&dev_mutex);
+		atomic_inc(&dev->refcount);
+		user_dev->core = dev;
 		return 0;
 	}
 
-	dev = calloc(1, sizeof(struct amdgpu_device));
+	dev = calloc(1, sizeof(struct amdgpu_core_device));
 	if (!dev) {
 		fprintf(stderr, "%s: calloc failed\n", __func__);
-		pthread_mutex_unlock(&dev_mutex);
 		return -ENOMEM;
 	}
 
-	dev->fd = -1;
-	dev->flink_fd = -1;
-
 	atomic_set(&dev->refcount, 1);
+	pthread_mutex_init(&dev->bo_table_mutex, NULL);
+
+	dev->fd = user_dev->user_fd;
+	user_dev->core = dev;
+
+	dev->next = dev_list;
+	dev_list = dev;
 
-	version = drmGetVersion(fd);
+	version = drmGetVersion(dev->fd);
 	if (version->version_major != 3) {
 		fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
 			"only compatible with 3.x.x.\n",
@@ -211,28 +131,11 @@  drm_public int amdgpu_device_initialize(int fd,
 		goto cleanup;
 	}
 
-	dev->fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
-	dev->flink_fd = dev->fd;
 	dev->major_version = version->version_major;
 	dev->minor_version = version->version_minor;
 	drmFreeVersion(version);
 
-	pthread_mutex_init(&dev->bo_table_mutex, NULL);
-
-	/* Check if acceleration is working. */
-	r = amdgpu_query_info(dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working);
-	if (r) {
-		fprintf(stderr, "%s: amdgpu_query_info(ACCEL_WORKING) failed (%i)\n",
-			__func__, r);
-		goto cleanup;
-	}
-	if (!accel_working) {
-		fprintf(stderr, "%s: AMDGPU_INFO_ACCEL_WORKING = 0\n", __func__);
-		r = -EBADF;
-		goto cleanup;
-	}
-
-	r = amdgpu_query_gpu_info_init(dev);
+	r = amdgpu_query_gpu_info_init(user_dev);
 	if (r) {
 		fprintf(stderr, "%s: amdgpu_query_gpu_info_init failed\n", __func__);
 		goto cleanup;
@@ -261,39 +164,97 @@  drm_public int amdgpu_device_initialize(int fd,
 			  dev->dev_info.virtual_address_alignment);
 
 	amdgpu_parse_asic_ids(dev);
+	return 0;
 
-	*major_version = dev->major_version;
-	*minor_version = dev->minor_version;
-	*device_handle = dev;
-	dev->next = dev_list;
-	dev_list = dev;
+cleanup:
+	user_dev->core = NULL;
+	close(dev->fd);
+	free(dev);
+	return r;
+}
+
+drm_public int amdgpu_device_initialize(int fd,
+					uint32_t *major_version,
+					uint32_t *minor_version,
+					amdgpu_device_handle *device_handle)
+{
+	struct amdgpu_device *user_dev;
+	uint32_t accel_working = 0;
+	int r;
+
+	*device_handle = NULL;
+
+	user_dev = calloc(1, sizeof(struct amdgpu_device));
+	if (!user_dev) {
+		fprintf(stderr, "%s: calloc failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	user_dev->user_fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+
+	pthread_mutex_lock(&dev_mutex);
+
+	r = amdgpu_device_init(user_dev);
+	if (r != 0)
+		goto cleanup;
+
+	/* Check if acceleration is working. */
+	r = amdgpu_query_info(user_dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working);
+	if (r) {
+		fprintf(stderr, "%s: amdgpu_query_info(ACCEL_WORKING) failed (%i)\n",
+			__func__, r);
+		goto cleanup;
+	}
+	if (!accel_working) {
+		fprintf(stderr, "%s: AMDGPU_INFO_ACCEL_WORKING = 0\n", __func__);
+		r = -EBADF;
+		goto cleanup;
+	}
+
+	*major_version = user_dev->core->major_version;
+	*minor_version = user_dev->core->minor_version;
+	*device_handle = user_dev;
 	pthread_mutex_unlock(&dev_mutex);
 
 	return 0;
 
 cleanup:
-	if (dev->fd >= 0)
-		close(dev->fd);
-	free(dev);
+	if (!user_dev->core || user_dev->user_fd != user_dev->core->fd)
+		close(user_dev->user_fd);
+	if (user_dev->core && update_references(&user_dev->core->refcount, NULL))
+		amdgpu_device_free(user_dev->core);
+	free(user_dev);
 	pthread_mutex_unlock(&dev_mutex);
 	return r;
 }
 
-drm_public int amdgpu_device_deinitialize(amdgpu_device_handle dev)
+drm_public int amdgpu_device_deinitialize(amdgpu_device_handle user_dev)
 {
-	amdgpu_device_reference(&dev, NULL);
+	struct amdgpu_core_device *dev = user_dev->core;
+
+	pthread_mutex_lock(&dev_mutex);
+
+	if (user_dev->user_fd != dev->fd)
+		close(user_dev->user_fd);
+
+	if (update_references(&dev->refcount, NULL))
+		amdgpu_device_free(dev);
+
+	pthread_mutex_unlock(&dev_mutex);
+	free(user_dev);
 	return 0;
 }
 
-drm_public const char *amdgpu_get_marketing_name(amdgpu_device_handle dev)
+drm_public const char *amdgpu_get_marketing_name(amdgpu_device_handle user_dev)
 {
-	return dev->marketing_name;
+	return user_dev->core->marketing_name;
 }
 
-drm_public int amdgpu_query_sw_info(amdgpu_device_handle dev,
+drm_public int amdgpu_query_sw_info(amdgpu_device_handle user_dev,
 				    enum amdgpu_sw_info info,
 				    void *value)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	uint32_t *val32 = (uint32_t*)value;
 
 	switch (info) {
diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index 777087f2..7253fbea 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -40,7 +40,7 @@  drm_public int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id,
 	request.return_size = size;
 	request.query = info_id;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
 
@@ -55,7 +55,7 @@  drm_public int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id,
 	request.query = AMDGPU_INFO_CRTC_FROM_ID;
 	request.mode_crtc.id = id;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
 
@@ -74,7 +74,7 @@  drm_public int amdgpu_read_mm_registers(amdgpu_device_handle dev,
 	request.read_mmr_reg.instance = instance;
 	request.read_mmr_reg.flags = flags;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
 
@@ -90,7 +90,7 @@  drm_public int amdgpu_query_hw_ip_count(amdgpu_device_handle dev,
 	request.query = AMDGPU_INFO_HW_IP_COUNT;
 	request.query_hw_ip.type = type;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
 
@@ -107,7 +107,7 @@  drm_public int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type,
 	request.query_hw_ip.type = type;
 	request.query_hw_ip.ip_instance = ip_instance;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
 
@@ -127,7 +127,7 @@  drm_public int amdgpu_query_firmware_version(amdgpu_device_handle dev,
 	request.query_fw.ip_instance = ip_instance;
 	request.query_fw.index = index;
 
-	r = drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	r = drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			    sizeof(struct drm_amdgpu_info));
 	if (r)
 		return r;
@@ -137,11 +137,12 @@  drm_public int amdgpu_query_firmware_version(amdgpu_device_handle dev,
 	return 0;
 }
 
-drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
+drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle user_dev)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	int r, i;
 
-	r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(dev->dev_info),
+	r = amdgpu_query_info(user_dev, AMDGPU_INFO_DEV_INFO, sizeof(dev->dev_info),
 			      &dev->dev_info);
 	if (r)
 		return r;
@@ -172,7 +173,7 @@  drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 					    (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
 					     AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
 
-			r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
+			r = amdgpu_read_mm_registers(user_dev, 0x263d, 1, instance, 0,
 						     &dev->info.backend_disable[i]);
 			if (r)
 				return r;
@@ -180,13 +181,13 @@  drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 			dev->info.backend_disable[i] =
 				(dev->info.backend_disable[i] >> 16) & 0xff;
 
-			r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
+			r = amdgpu_read_mm_registers(user_dev, 0xa0d4, 1, instance, 0,
 						     &dev->info.pa_sc_raster_cfg[i]);
 			if (r)
 				return r;
 
 			if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-				r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
+				r = amdgpu_read_mm_registers(user_dev, 0xa0d5, 1, instance, 0,
 						     &dev->info.pa_sc_raster_cfg1[i]);
 				if (r)
 					return r;
@@ -194,25 +195,25 @@  drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 		}
 	}
 
-	r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
+	r = amdgpu_read_mm_registers(user_dev, 0x263e, 1, 0xffffffff, 0,
 					     &dev->info.gb_addr_cfg);
 	if (r)
 		return r;
 
 	if (dev->info.family_id < AMDGPU_FAMILY_AI) {
-		r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
+		r = amdgpu_read_mm_registers(user_dev, 0x2644, 32, 0xffffffff, 0,
 					     dev->info.gb_tile_mode);
 		if (r)
 			return r;
 
 		if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-			r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
+			r = amdgpu_read_mm_registers(user_dev, 0x2664, 16, 0xffffffff, 0,
 						     dev->info.gb_macro_tile_mode);
 			if (r)
 				return r;
 		}
 
-		r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
+		r = amdgpu_read_mm_registers(user_dev, 0x9d8, 1, 0xffffffff, 0,
 					     &dev->info.mc_arb_ramcfg);
 		if (r)
 			return r;
@@ -235,7 +236,7 @@  drm_public int amdgpu_query_gpu_info(amdgpu_device_handle dev,
 		return -EINVAL;
 
 	/* Get ASIC info*/
-	*info = dev->info;
+	*info = dev->core->info;
 
 	return 0;
 }
@@ -328,6 +329,6 @@  drm_public int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned senso
 	request.query = AMDGPU_INFO_SENSOR;
 	request.sensor_info.type = sensor_type;
 
-	return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request,
+	return drmCommandWrite(dev->core->fd, DRM_AMDGPU_INFO, &request,
 			       sizeof(struct drm_amdgpu_info));
 }
diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h
index a340abbd..3a2ab74c 100644
--- a/amdgpu/amdgpu_internal.h
+++ b/amdgpu/amdgpu_internal.h
@@ -64,14 +64,13 @@  struct amdgpu_va {
 	struct amdgpu_bo_va_mgr *vamgr;
 };
 
-struct amdgpu_device {
+struct amdgpu_core_device {
 	atomic_t refcount;
-	struct amdgpu_device *next;
 	int fd;
-	int flink_fd;
 	unsigned major_version;
 	unsigned minor_version;
 
+	struct amdgpu_core_device *next;
 	char *marketing_name;
 	/** List of buffer handles. Protected by bo_table_mutex. */
 	struct handle_table bo_handles;
@@ -91,9 +90,14 @@  struct amdgpu_device {
 	struct amdgpu_bo_va_mgr vamgr_high_32;
 };
 
-struct amdgpu_bo {
+struct amdgpu_device {
+	int user_fd;
+	struct amdgpu_core_device *core;
+};
+
+struct amdgpu_core_bo {
 	atomic_t refcount;
-	struct amdgpu_device *dev;
+	amdgpu_bo_handle user_bos;
 
 	uint64_t alloc_size;
 
@@ -105,14 +109,21 @@  struct amdgpu_bo {
 	int cpu_map_count;
 };
 
-struct amdgpu_bo_list {
+struct amdgpu_bo {
+	atomic_t refcount;
+	struct amdgpu_bo *next;
+	struct amdgpu_core_bo *core;
 	struct amdgpu_device *dev;
+};
+
+struct amdgpu_bo_list {
+	struct amdgpu_core_device *dev;
 
 	uint32_t handle;
 };
 
 struct amdgpu_context {
-	struct amdgpu_device *dev;
+	struct amdgpu_core_device *dev;
 	/** Mutex for accessing fences and to maintain command submissions
 	    in good sequence. */
 	pthread_mutex_t sequence_mutex;
@@ -141,7 +152,7 @@  drm_private void amdgpu_vamgr_init(struct amdgpu_bo_va_mgr *mgr, uint64_t start,
 
 drm_private void amdgpu_vamgr_deinit(struct amdgpu_bo_va_mgr *mgr);
 
-drm_private void amdgpu_parse_asic_ids(struct amdgpu_device *dev);
+drm_private void amdgpu_parse_asic_ids(struct amdgpu_core_device *dev);
 
 drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev);
 
diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c
index d25d4216..560b84e7 100644
--- a/amdgpu/amdgpu_vamgr.c
+++ b/amdgpu/amdgpu_vamgr.c
@@ -29,10 +29,12 @@ 
 #include "amdgpu_internal.h"
 #include "util_math.h"
 
-drm_public int amdgpu_va_range_query(amdgpu_device_handle dev,
+drm_public int amdgpu_va_range_query(amdgpu_device_handle user_dev,
 				     enum amdgpu_gpu_va_range type,
 				     uint64_t *start, uint64_t *end)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
+
 	if (type != amdgpu_gpu_va_range_general)
 		return -EINVAL;
 
@@ -186,7 +188,7 @@  out:
 	pthread_mutex_unlock(&mgr->bo_va_mutex);
 }
 
-drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
+drm_public int amdgpu_va_range_alloc(amdgpu_device_handle user_dev,
 				     enum amdgpu_gpu_va_range va_range_type,
 				     uint64_t size,
 				     uint64_t va_base_alignment,
@@ -195,6 +197,7 @@  drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 				     amdgpu_va_handle *va_range_handle,
 				     uint64_t flags)
 {
+	struct amdgpu_core_device *dev = user_dev->core;
 	struct amdgpu_bo_va_mgr *vamgr;
 
 	/* Clear the flag when the high VA manager is not initialized */
@@ -237,7 +240,7 @@  drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 			amdgpu_vamgr_free_va(vamgr, *va_base_allocated, size);
 			return -ENOMEM;
 		}
-		va->dev = dev;
+		va->dev = user_dev;
 		va->address = *va_base_allocated;
 		va->size = size;
 		va->range = va_range_type;
diff --git a/amdgpu/amdgpu_vm.c b/amdgpu/amdgpu_vm.c
index 7e6e28f0..de44e6c2 100644
--- a/amdgpu/amdgpu_vm.c
+++ b/amdgpu/amdgpu_vm.c
@@ -33,7 +33,7 @@  drm_public int amdgpu_vm_reserve_vmid(amdgpu_device_handle dev, uint32_t flags)
 	vm.in.op = AMDGPU_VM_OP_RESERVE_VMID;
 	vm.in.flags = flags;
 
-	return drmCommandWriteRead(dev->fd, DRM_AMDGPU_VM,
+	return drmCommandWriteRead(dev->core->fd, DRM_AMDGPU_VM,
 				   &vm, sizeof(vm));
 }
 
@@ -45,6 +45,6 @@  drm_public int amdgpu_vm_unreserve_vmid(amdgpu_device_handle dev,
 	vm.in.op = AMDGPU_VM_OP_UNRESERVE_VMID;
 	vm.in.flags = flags;
 
-	return drmCommandWriteRead(dev->fd, DRM_AMDGPU_VM,
+	return drmCommandWriteRead(dev->core->fd, DRM_AMDGPU_VM,
 				   &vm, sizeof(vm));
 }
diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index 73403fb4..7095c4e4 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -428,7 +428,7 @@  static void amdgpu_disable_suites()
 				   &minor_version, &device_handle))
 		return;
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 
 	if (amdgpu_device_deinitialize(device_handle))
 		return;
diff --git a/tests/amdgpu/bo_tests.c b/tests/amdgpu/bo_tests.c
index 7cff4cf7..d89c944d 100644
--- a/tests/amdgpu/bo_tests.c
+++ b/tests/amdgpu/bo_tests.c
@@ -309,7 +309,7 @@  static void amdgpu_bo_find_by_cpu_mapping(void)
 					  &offset);
 	CU_ASSERT_EQUAL(r, 0);
 	CU_ASSERT_EQUAL(offset, 0);
-	CU_ASSERT_EQUAL(bo_handle->handle, find_bo_handle->handle);
+	CU_ASSERT_EQUAL(bo_handle->core->handle, find_bo_handle->core->handle);
 
 	atomic_dec(&find_bo_handle->refcount, 1);
 	r = amdgpu_bo_unmap_and_free(bo_handle, va_handle,
diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 7ad0f0dc..8a5f6ed3 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -68,7 +68,7 @@  CU_BOOL suite_cs_tests_enable(void)
 					     &minor_version, &device_handle))
 		return CU_FALSE;
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 
 	if (amdgpu_device_deinitialize(device_handle))
 		return CU_FALSE;
@@ -101,10 +101,10 @@  int suite_cs_tests_init(void)
 		return CUE_SINIT_FAILED;
 	}
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 	/* VI asic POLARIS10/11 have specific external_rev_id */
-	chip_rev = device_handle->info.chip_rev;
-	chip_id = device_handle->info.chip_external_rev;
+	chip_rev = device_handle->core->info.chip_rev;
+	chip_id = device_handle->core->info.chip_external_rev;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	if (r)
diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
index 91368c15..8526bae7 100644
--- a/tests/amdgpu/deadlock_tests.c
+++ b/tests/amdgpu/deadlock_tests.c
@@ -127,14 +127,14 @@  CU_BOOL suite_deadlock_tests_enable(void)
 	 * Only enable for ASICs supporting GPU reset and for which it's enabled
 	 * by default (currently GFX8/9 dGPUS)
 	 */
-	if (device_handle->info.family_id != AMDGPU_FAMILY_VI &&
-	    device_handle->info.family_id != AMDGPU_FAMILY_AI &&
-	    device_handle->info.family_id != AMDGPU_FAMILY_CI) {
+	if (device_handle->core->info.family_id != AMDGPU_FAMILY_VI &&
+	    device_handle->core->info.family_id != AMDGPU_FAMILY_AI &&
+	    device_handle->core->info.family_id != AMDGPU_FAMILY_CI) {
 		printf("\n\nGPU reset is not enabled for the ASIC, deadlock suite disabled\n");
 		enable = CU_FALSE;
 	}
 
-	if (device_handle->info.family_id >= AMDGPU_FAMILY_AI)
+	if (device_handle->core->info.family_id >= AMDGPU_FAMILY_AI)
 		use_uc_mtype = 1;
 
 	if (amdgpu_device_deinitialize(device_handle))
diff --git a/tests/amdgpu/uvd_enc_tests.c b/tests/amdgpu/uvd_enc_tests.c
index b4251bcf..856e7ae1 100644
--- a/tests/amdgpu/uvd_enc_tests.c
+++ b/tests/amdgpu/uvd_enc_tests.c
@@ -114,7 +114,7 @@  int suite_uvd_enc_tests_init(void)
 	if (r)
 		return CUE_SINIT_FAILED;
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	if (r)
diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c
index 0026826e..05d9ef57 100644
--- a/tests/amdgpu/vce_tests.c
+++ b/tests/amdgpu/vce_tests.c
@@ -103,10 +103,10 @@  CU_BOOL suite_vce_tests_enable(void)
 					     &minor_version, &device_handle))
 		return CU_FALSE;
 
-	family_id = device_handle->info.family_id;
-	chip_rev = device_handle->info.chip_rev;
-	chip_id = device_handle->info.chip_external_rev;
-	ids_flags = device_handle->info.ids_flags;
+	family_id = device_handle->core->info.family_id;
+	chip_rev = device_handle->core->info.chip_rev;
+	chip_id = device_handle->core->info.chip_external_rev;
+	ids_flags = device_handle->core->info.ids_flags;
 
 	amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
 					  0, &version, &feature);
@@ -153,8 +153,8 @@  int suite_vce_tests_init(void)
 		return CUE_SINIT_FAILED;
 	}
 
-	family_id = device_handle->info.family_id;
-	vce_harvest_config = device_handle->info.vce_harvest_config;
+	family_id = device_handle->core->info.family_id;
+	vce_harvest_config = device_handle->core->info.vce_harvest_config;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	if (r)
diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c
index ad438f35..17248444 100644
--- a/tests/amdgpu/vcn_tests.c
+++ b/tests/amdgpu/vcn_tests.c
@@ -94,7 +94,7 @@  CU_BOOL suite_vcn_tests_enable(void)
 				   &minor_version, &device_handle))
 		return CU_FALSE;
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 
 	if (amdgpu_device_deinitialize(device_handle))
 			return CU_FALSE;
@@ -132,7 +132,7 @@  int suite_vcn_tests_init(void)
 	if (r)
 		return CUE_SINIT_FAILED;
 
-	family_id = device_handle->info.family_id;
+	family_id = device_handle->core->info.family_id;
 
 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
 	if (r)
diff --git a/tests/amdgpu/vm_tests.c b/tests/amdgpu/vm_tests.c
index 69bc4683..e52aef25 100644
--- a/tests/amdgpu/vm_tests.c
+++ b/tests/amdgpu/vm_tests.c
@@ -43,7 +43,7 @@  CU_BOOL suite_vm_tests_enable(void)
 				     &minor_version, &device_handle))
 		return CU_FALSE;
 
-	if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
+	if (device_handle->core->info.family_id == AMDGPU_FAMILY_SI) {
 		printf("\n\nCurrently hangs the CP on this ASIC, VM suite disabled\n");
 		enable = CU_FALSE;
 	}