[libdrm] intel: Query full context GTT sizes for use with execbuffer

Submitted by Chris Wilson on Oct. 19, 2015, 12:45 p.m.

Details

Message ID 1445258740-9223-1-git-send-email-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson Oct. 19, 2015, 12:45 p.m.
With the advent of full per-process GTT, the per context GTT may be a
different size to the global GTT as reported by the get_aperture ioctl.
It is also likely to be 4GiB or larger, exposing some fragility in the
code for summing batch sizes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 intel/intel_bufmgr_gem.c | 118 +++++++++++++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 49 deletions(-)

Patch hide | download patch | download mbox

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index a5549a6..82673b6 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -249,7 +249,7 @@  struct _drm_intel_bo_gem {
 	 * Used to avoid costly tree walking in
 	 * drm_intel_bufmgr_check_aperture in the common case.
 	 */
-	int reloc_tree_size;
+	uint64_t reloc_tree_size;
 
 	/**
 	 * Number of potential fence registers required by this buffer and its
@@ -261,10 +261,10 @@  struct _drm_intel_bo_gem {
 	bool mapped_cpu_write;
 };
 
-static unsigned int
+static uint64_t
 drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
 
-static unsigned int
+static uint64_t
 drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
 
 static int
@@ -288,12 +288,13 @@  static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
         return (drm_intel_bo_gem *)bo;
 }
 
-static unsigned long
-drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
+static uint64_t
+drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem,
+			   uint64_t size,
 			   uint32_t *tiling_mode)
 {
-	unsigned long min_size, max_size;
-	unsigned long i;
+	uint64_t min_size, max_size;
+	uint64_t i;
 
 	if (*tiling_mode == I915_TILING_NONE)
 		return size;
@@ -372,7 +373,7 @@  drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
 
 static struct drm_intel_gem_bo_bucket *
 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
-				 unsigned long size)
+				 uint64_t size)
 {
 	int i;
 
@@ -531,7 +532,7 @@  drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
 				      drm_intel_bo_gem *bo_gem,
 				      unsigned int alignment)
 {
-	unsigned int size;
+	uint64_t size;
 
 	assert(!bo_gem->used_as_reloc_target);
 
@@ -663,7 +664,7 @@  drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
 static drm_intel_bo *
 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
 				const char *name,
-				unsigned long size,
+				uint64_t size,
 				unsigned long flags,
 				uint32_t tiling_mode,
 				unsigned long stride,
@@ -673,7 +674,7 @@  drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
 	drm_intel_bo_gem *bo_gem;
 	struct drm_intel_gem_bo_bucket *bucket;
 	bool alloc_from_cache;
-	unsigned long bo_size;
+	uint64_t bo_size;
 	int ret;
 
 	/* Round the allocated size up to a power of two number of pages. */
@@ -825,7 +826,7 @@  drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
 			     unsigned long *pitch, unsigned long flags)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
-	unsigned long size, stride;
+	uint64_t size, stride;
 	uint32_t tiling;
 
 	do {
@@ -2247,14 +2248,14 @@  drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
 		ret = -errno;
 		if (errno == ENOSPC) {
 			DBG("Execbuffer fails to pin. "
-			    "Estimate: %u. Actual: %u. Available: %u\n",
-			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
+			    "Estimate: %llu. Actual: %llu. Available: %llu\n",
+			    (long long)drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
 							       bufmgr_gem->
 							       exec_count),
-			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
+			    (long long)drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
 							      bufmgr_gem->
 							      exec_count),
-			    (unsigned int)bufmgr_gem->gtt_size);
+			    (long long)bufmgr_gem->gtt_size);
 		}
 	}
 	drm_intel_update_buffer_offsets(bufmgr_gem);
@@ -2345,12 +2346,12 @@  do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
 		ret = -errno;
 		if (ret == -ENOSPC) {
 			DBG("Execbuffer fails to pin. "
-			    "Estimate: %u. Actual: %u. Available: %u\n",
-			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
+			    "Estimate: %llu. Actual: %llu. Available: %llu\n",
+			    (long long)drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
 							       bufmgr_gem->exec_count),
-			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
+			    (long long)drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
 							      bufmgr_gem->exec_count),
-			    (unsigned int) bufmgr_gem->gtt_size);
+			    (long long)bufmgr_gem->gtt_size);
 		}
 	}
 	drm_intel_update_buffer_offsets2(bufmgr_gem);
@@ -2681,12 +2682,12 @@  drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
  * Return the additional aperture space required by the tree of buffer objects
  * rooted at bo.
  */
-static int
+static uint64_t
 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
 {
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	uint64_t total = 0;
 	int i;
-	int total = 0;
 
 	if (bo == NULL || bo_gem->included_in_check_aperture)
 		return 0;
@@ -2751,11 +2752,11 @@  drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
  * Return a conservative estimate for the amount of aperture required
  * for a collection of buffers. This may double-count some buffers.
  */
-static unsigned int
+static uint64_t
 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
 {
+	uint64_t total = 0;
 	int i;
-	unsigned int total = 0;
 
 	for (i = 0; i < count; i++) {
 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
@@ -2770,11 +2771,11 @@  drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
  * This avoids double counting any buffers, at the cost of looking
  * at every buffer in the set.
  */
-static unsigned int
+static uint64_t
 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
 {
+	uint64_t total = 0;
 	int i;
-	unsigned int total = 0;
 
 	for (i = 0; i < count; i++) {
 		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
@@ -2820,8 +2821,8 @@  drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem =
 	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
-	unsigned int total = 0;
-	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
+	uint64_t threshold = bufmgr_gem->gtt_size * 3 / 4;
+	uint64_t total = 0;
 	int total_fences;
 
 	/* Check for fence reg constraints if necessary */
@@ -2839,11 +2840,13 @@  drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
 	if (total > threshold) {
 		DBG("check_space: overflowed available aperture, "
 		    "%dkb vs %dkb\n",
-		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
+		    (int)(total / 1024),
+		    (int)(bufmgr_gem->gtt_size / 1024));
 		return -ENOSPC;
 	} else {
-		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
-		    (int)bufmgr_gem->gtt_size / 1024);
+		DBG("drm_check_space: total %dkb vs bufgr %dkb\n",
+		    (int)(total / 1024),
+		    (int)(bufmgr_gem->gtt_size / 1024));
 		return 0;
 	}
 }
@@ -3345,6 +3348,40 @@  void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
 	return bo_gem->wc_virtual;
 }
 
+static uint64_t get_gtt_size(int fd)
+{
+	struct drm_i915_gem_get_aperture aperture;
+	struct local_i915_gem_context_param {
+		uint32_t context;
+		uint32_t size;
+		uint64_t param;
+#define LOCAL_CONTEXT_PARAM_BAN_PERIOD  0x1
+#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP  0x2
+#define LOCAL_CONTEXT_PARAM_GTT_SIZE    0x3
+		uint64_t value;
+	} p;
+#define LOCAL_I915_GEM_CONTEXT_GETPARAM       0x34
+#define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param)
+
+	memclear(aperture);
+
+	memclear(p);
+	p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE;
+	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0)
+		aperture.aper_size = p.value;
+	if (aperture.aper_size == 0)
+		(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+	if (aperture.aper_size == 0) {
+		aperture.aper_size = 128 * 1024 * 1024;
+		fprintf(stderr,
+		       	"Unable to query GTT size, assuming %dkB available aperture size.\n"
+			"May lead to reduced performance or incorrect rendering.\n",
+			(int)(aperture.aper_size / 1024));
+	}
+
+	return aperture.aper_size;
+}
+
 /**
  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
  * and manage map buffer objections.
@@ -3355,7 +3392,6 @@  drm_intel_bufmgr *
 drm_intel_bufmgr_gem_init(int fd, int batch_size)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem;
-	struct drm_i915_gem_get_aperture aperture;
 	drm_i915_getparam_t gp;
 	int ret, tmp;
 	bool exec2 = false;
@@ -3379,23 +3415,7 @@  drm_intel_bufmgr_gem_init(int fd, int batch_size)
 		goto exit;
 	}
 
-	memclear(aperture);
-	ret = drmIoctl(bufmgr_gem->fd,
-		       DRM_IOCTL_I915_GEM_GET_APERTURE,
-		       &aperture);
-
-	if (ret == 0)
-		bufmgr_gem->gtt_size = aperture.aper_available_size;
-	else {
-		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
-			strerror(errno));
-		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
-		fprintf(stderr, "Assuming %dkB available aperture size.\n"
-			"May lead to reduced performance or incorrect "
-			"rendering.\n",
-			(int)bufmgr_gem->gtt_size / 1024);
-	}
-
+	bufmgr_gem->gtt_size = get_gtt_size(bufmgr_gem->fd);
 	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
 
 	if (IS_GEN2(bufmgr_gem->pci_device))