radv: implement VK_EXT_sample_locations

Submitted by Samuel Pitoiset on Dec. 7, 2018, 4:21 p.m.

Details

Message ID 20181207162117.3951-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: implement VK_EXT_sample_locations" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset Dec. 7, 2018, 4:21 p.m.
Basically, this extension allows applications to use custom
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.

Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.

Only enabled on VI+ because it's untested on older chips.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_cmd_buffer.c  | 177 +++++++++++++++++++++++++++++-
 src/amd/vulkan/radv_device.c      |  27 +++++
 src/amd/vulkan/radv_extensions.py |   1 +
 src/amd/vulkan/radv_pipeline.c    |  30 +++++
 src/amd/vulkan/radv_private.h     |  26 +++--
 5 files changed, 253 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@  radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
 	dest->viewport.count = src->viewport.count;
 	dest->scissor.count = src->scissor.count;
 	dest->discard_rectangle.count = src->discard_rectangle.count;
+	dest->sample_location.count = src->sample_location.count;
 
 	if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
 		if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
@@ -192,6 +193,22 @@  radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
 		}
 	}
 
+	if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+		if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
+		    dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
+		    dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
+		    memcmp(&dest->sample_location.locations,
+			   &src->sample_location.locations,
+			   src->sample_location.count * sizeof(VkSampleLocationEXT))) {
+			dest->sample_location.per_pixel = src->sample_location.per_pixel;
+			dest->sample_location.grid_size = src->sample_location.grid_size;
+			typed_memcpy(dest->sample_location.locations,
+				     src->sample_location.locations,
+				     src->sample_location.count);
+			dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+		}
+	}
+
 	cmd_buffer->state.dirty |= dest_mask;
 }
 
@@ -634,6 +651,135 @@  radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
 	}
 }
 
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+			      uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+	uint32_t x_offset = x % state->grid_size.width;
+	uint32_t y_offset = y % state->grid_size.height;
+	uint32_t num_samples = (uint32_t)state->per_pixel;
+	VkSampleLocationEXT *user_locs;
+	uint32_t pixel_offset;
+
+	pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
+
+	assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+	user_locs = &state->locations[pixel_offset];
+
+	for (uint32_t i = 0; i < num_samples; i++) {
+		float shifted_pos_x = user_locs[i].x - 0.5;
+		float shifted_pos_y = user_locs[i].y - 0.5;
+
+		int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+		int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+		sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+		sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+	}
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+			       uint32_t *sample_locs_pixel)
+{
+	for (uint32_t i = 0; i < num_samples; ++i) {
+		uint32_t sample_reg_idx = i / 4;
+		uint32_t sample_loc_idx = i % 4;
+		int32_t pos_x = sample_locs[i].x;
+		int32_t pos_y = sample_locs[i].y;
+
+		uint32_t shift_x = 8 * sample_loc_idx;
+		uint32_t shift_y = shift_x + 4;
+
+		sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+		sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+	}
+}
+
+/**
+ * Emit the sample locations that are specified with VK_EXT_sample_locations.
+ */
+static void
+radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+	struct radv_multisample_state *ms = &pipeline->graphics.ms;
+	struct radv_sample_locations_state *sample_location =
+		&cmd_buffer->state.dynamic.sample_location;
+	uint32_t num_samples = (uint32_t)sample_location->per_pixel;
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint32_t sample_locs_pixel[4][2] = {};
+	VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
+	uint32_t max_sample_dist = 0;
+
+	/* Convert the user sample locations to hardware sample locations. */
+	radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
+	radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
+	radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
+	radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
+
+	/* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
+	for (uint32_t i = 0; i < 4; i++) {
+		radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
+					       sample_locs_pixel[i]);
+	}
+
+	/* Emit the specified user sample locations. */
+	switch (num_samples) {
+	case 2:
+	case 4:
+		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+		break;
+	case 8:
+		radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+		radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+		radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+		radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+		radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
+		radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
+		radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
+		radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
+		break;
+	default:
+		unreachable("Unsupported number of samples!");
+	}
+
+	/* Compute the maximum sample distance from the specified locations. */
+	for (uint32_t i = 0; i < num_samples; i++) {
+		VkOffset2D offset = sample_locs[0][i];
+		max_sample_dist = MAX2(max_sample_dist,
+				       MAX2(abs(offset.x), abs(offset.y)));
+	}
+
+	/* Emit the maximum sample distance if different. */
+	if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
+		uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
+
+		pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
+		pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
+
+		radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
+		radeon_emit(cs, pa_sc_aa_config);
+
+		/* GFX9: Flush DFSM when the AA mode changes. */
+		if (cmd_buffer->device->dfsm_allowed) {
+			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+		}
+	}
+}
+
 static void
 radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
 			      struct radv_pipeline *pipeline)
@@ -645,7 +791,14 @@  radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
 	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
 		cmd_buffer->sample_positions_needed = true;
 
-	if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+	/* Emit the multisample state (including sample locations) only if:
+	 * - it's the first bound pipeline in the command buffer
+	 * - the number of samples of this pipeline is different
+	 * - the previous pipeline used custom sample locations
+	 */
+	if (old_pipeline &&
+	    num_samples == old_pipeline->graphics.ms.num_samples &&
+	    !old_pipeline->dynamic_state.sample_location.count)
 		return;
 
 	radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
@@ -1711,6 +1864,9 @@  radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
 	if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
 		radv_emit_discard_rectangle(cmd_buffer);
 
+	if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
+		radv_emit_sample_locations(cmd_buffer);
+
 	cmd_buffer->state.dirty &= ~states;
 }
 
@@ -3050,6 +3206,25 @@  void radv_CmdSetDiscardRectangleEXT(
 	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
 }
 
+void radv_CmdSetSampleLocationsEXT(
+	VkCommandBuffer                             commandBuffer,
+	const VkSampleLocationsInfoEXT*             pSampleLocationsInfo)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+
+	assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+	state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+	state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+	state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+	typed_memcpy(&state->dynamic.sample_location.locations[0],
+		     pSampleLocationsInfo->pSampleLocations,
+		     pSampleLocationsInfo->sampleLocationsCount);
+
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+}
+
 void radv_CmdExecuteCommands(
 	VkCommandBuffer                             commandBuffer,
 	uint32_t                                    commandBufferCount,
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index cb51ee44e58..6b19641f66d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1238,6 +1238,19 @@  void radv_GetPhysicalDeviceProperties2(
 			properties->transformFeedbackDraw = true;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+			VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+				(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+			properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
+								 VK_SAMPLE_COUNT_4_BIT |
+								 VK_SAMPLE_COUNT_8_BIT;
+			properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
+			properties->sampleLocationCoordinateRange[0] = 0.0f;
+			properties->sampleLocationCoordinateRange[1] = 1.0f;
+			properties->sampleLocationSubPixelBits = 4;
+			properties->variableSampleLocations = VK_FALSE;
+			break;
+		}
 		default:
 			break;
 		}
@@ -5111,3 +5124,17 @@  VkResult radv_GetCalibratedTimestampsEXT(
 
 	return VK_SUCCESS;
 }
+
+void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
+    VkPhysicalDevice                            physicalDevice,
+    VkSampleCountFlagBits                       samples,
+    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
+{
+	if (samples & (VK_SAMPLE_COUNT_2_BIT |
+		       VK_SAMPLE_COUNT_4_BIT |
+		       VK_SAMPLE_COUNT_8_BIT)) {
+		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
+	} else {
+		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
+	}
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index d14169144f7..19b24ac4157 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -106,6 +106,7 @@  EXTENSIONS = [
     Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
     Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
     Extension('VK_EXT_pci_bus_info',                      1, True),
+    Extension('VK_EXT_sample_locations',                  1, 'device->rad_info.chip_class >= VI'),
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_scalar_block_layout',               1, 'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 33076cc2bd2..266fdb43367 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1276,6 +1276,8 @@  static unsigned radv_dynamic_state_mask(VkDynamicState state)
 		return RADV_DYNAMIC_STENCIL_REFERENCE;
 	case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
 		return RADV_DYNAMIC_DISCARD_RECTANGLE;
+	case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+		return RADV_DYNAMIC_SAMPLE_LOCATIONS;
 	default:
 		unreachable("Unhandled dynamic state");
 	}
@@ -1306,6 +1308,11 @@  static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
 	if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
 		states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
 
+	if (!pCreateInfo->pMultisampleState ||
+	    !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+				  PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
+		states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
+
 	/* TODO: blend constants & line width. */
 
 	return states;
@@ -1442,6 +1449,29 @@  radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		             discard_rectangle_info->discardRectangleCount);
 	}
 
+	if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+		const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
+			vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+					     PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+		/* If sampleLocationsEnable is VK_FALSE, the default sample
+		 * locations are used and the values specified in
+		 * sampleLocationsInfo are ignored.
+		 */
+		if (sample_location_info->sampleLocationsEnable) {
+			const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
+				&sample_location_info->sampleLocationsInfo;
+
+			assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+			dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+			dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+			dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+			typed_memcpy(&dynamic->sample_location.locations[0],
+				     pSampleLocationsInfo->pSampleLocations,
+				     pSampleLocationsInfo->sampleLocationsCount);
+		}
+	}
+
 	pipeline->dynamic_state.mask = states;
 }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e3dd301ee8f..4139a2911aa 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -90,6 +90,7 @@  typedef uint32_t xcb_window_t;
 #define MAX_VIEWPORTS   16
 #define MAX_SCISSORS    16
 #define MAX_DISCARD_RECTANGLES 4
+#define MAX_SAMPLE_LOCATIONS 32
 #define MAX_PUSH_CONSTANTS_SIZE 128
 #define MAX_PUSH_DESCRIPTORS 32
 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
@@ -829,7 +830,8 @@  enum radv_dynamic_state_bits {
 	RADV_DYNAMIC_STENCIL_WRITE_MASK   = 1 << 7,
 	RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
 	RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
-	RADV_DYNAMIC_ALL                  = (1 << 10) - 1,
+	RADV_DYNAMIC_SAMPLE_LOCATIONS     = 1 << 10,
+	RADV_DYNAMIC_ALL                  = (1 << 11) - 1,
 };
 
 enum radv_cmd_dirty_bits {
@@ -845,12 +847,13 @@  enum radv_cmd_dirty_bits {
 	RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 7,
 	RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
 	RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
-	RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 10) - 1,
-	RADV_CMD_DIRTY_PIPELINE                          = 1 << 10,
-	RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 11,
-	RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 12,
-	RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 13,
-	RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 14,
+	RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS          = 1 << 10,
+	RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 11) - 1,
+	RADV_CMD_DIRTY_PIPELINE                          = 1 << 11,
+	RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 12,
+	RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 13,
+	RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 14,
+	RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 15,
 };
 
 enum radv_cmd_flush_bits {
@@ -927,6 +930,13 @@  struct radv_discard_rectangle_state {
 	VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
 };
 
+struct radv_sample_locations_state {
+	VkSampleCountFlagBits per_pixel;
+	VkExtent2D grid_size;
+	uint32_t count;
+	VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
+};
+
 struct radv_dynamic_state {
 	/**
 	 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
@@ -969,6 +979,8 @@  struct radv_dynamic_state {
 	} stencil_reference;
 
 	struct radv_discard_rectangle_state               discard_rectangle;
+
+	struct radv_sample_locations_state                sample_location;
 };
 
 extern const struct radv_dynamic_state default_dynamic_state;

Comments

A small number of questions/concerns:

- sampleLocationCoordinateRange[1] should probably be set to 0.9375,
  because of how the sample locations are encoded
- gl_SamplePosition doesn't seem like it would return the new sample
  locations
- R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
  this is required, but it's probably best to do so.
- I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
  before radv_emit_sample_locations()
- unlike AMDVLK, this doesn't seem to make use of sample location
  information during layout transitions?

You said that this implements the bare minimum, so you might already know
about some of these though (unless you were just talking about the
variableSampleLocations thing).
On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset <samuel.pitoiset@gmail.com> wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. This only implements the barely minimum.
> It doesn't support variable sample locations during subpass.
>
> Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
> CTS now pass.
>
> Only enabled on VI+ because it's untested on older chips.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 177 +++++++++++++++++++++++++++++-
>  src/amd/vulkan/radv_device.c      |  27 +++++
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c    |  30 +++++
>  src/amd/vulkan/radv_private.h     |  26 +++--
>  5 files changed, 253 insertions(+), 8 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index b4aea5bc898..c4bebeda0ce 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
>         dest->viewport.count = src->viewport.count;
>         dest->scissor.count = src->scissor.count;
>         dest->discard_rectangle.count = src->discard_rectangle.count;
> +       dest->sample_location.count = src->sample_location.count;
>
>         if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>                 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
>                 }
>         }
>
> +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +               if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
> +                   dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
> +                   dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
> +                   memcmp(&dest->sample_location.locations,
> +                          &src->sample_location.locations,
> +                          src->sample_location.count * sizeof(VkSampleLocationEXT))) {
> +                       dest->sample_location.per_pixel = src->sample_location.per_pixel;
> +                       dest->sample_location.grid_size = src->sample_location.grid_size;
> +                       typed_memcpy(dest->sample_location.locations,
> +                                    src->sample_location.locations,
> +                                    src->sample_location.count);
> +                       dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +               }
> +       }
> +
>         cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
>         }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> +                             uint32_t x, uint32_t y, VkOffset2D *sample_locs)
> +{
> +       uint32_t x_offset = x % state->grid_size.width;
> +       uint32_t y_offset = y % state->grid_size.height;
> +       uint32_t num_samples = (uint32_t)state->per_pixel;
> +       VkSampleLocationEXT *user_locs;
> +       uint32_t pixel_offset;
> +
> +       pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
> +
> +       assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +       user_locs = &state->locations[pixel_offset];
> +
> +       for (uint32_t i = 0; i < num_samples; i++) {
> +               float shifted_pos_x = user_locs[i].x - 0.5;
> +               float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +               sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> +               sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> +       }
> +}
> +
> +/**
> + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
> + * locations.
> + */
> +static void
> +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
> +                              uint32_t *sample_locs_pixel)
> +{
> +       for (uint32_t i = 0; i < num_samples; ++i) {
> +               uint32_t sample_reg_idx = i / 4;
> +               uint32_t sample_loc_idx = i % 4;
> +               int32_t pos_x = sample_locs[i].x;
> +               int32_t pos_y = sample_locs[i].y;
> +
> +               uint32_t shift_x = 8 * sample_loc_idx;
> +               uint32_t shift_y = shift_x + 4;
> +
> +               sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
> +               sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
> +       }
> +}
> +
> +/**
> + * Emit the sample locations that are specified with VK_EXT_sample_locations.
> + */
> +static void
> +radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
> +{
> +       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
> +       struct radv_multisample_state *ms = &pipeline->graphics.ms;
> +       struct radv_sample_locations_state *sample_location =
> +               &cmd_buffer->state.dynamic.sample_location;
> +       uint32_t num_samples = (uint32_t)sample_location->per_pixel;
> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
> +       uint32_t sample_locs_pixel[4][2] = {};
> +       VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
> +       uint32_t max_sample_dist = 0;
> +
> +       /* Convert the user sample locations to hardware sample locations. */
> +       radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
> +       radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
> +       radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
> +       radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
> +
> +       /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
> +       for (uint32_t i = 0; i < 4; i++) {
> +               radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
> +                                              sample_locs_pixel[i]);
> +       }
> +
> +       /* Emit the specified user sample locations. */
> +       switch (num_samples) {
> +       case 2:
> +       case 4:
> +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
> +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
> +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
> +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
> +               break;
> +       case 8:
> +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
> +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
> +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
> +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
> +               radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
> +               radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
> +               radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
> +               radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
> +               break;
> +       default:
> +               unreachable("Unsupported number of samples!");
> +       }
> +
> +       /* Compute the maximum sample distance from the specified locations. */
> +       for (uint32_t i = 0; i < num_samples; i++) {
> +               VkOffset2D offset = sample_locs[0][i];
> +               max_sample_dist = MAX2(max_sample_dist,
> +                                      MAX2(abs(offset.x), abs(offset.y)));
> +       }
> +
> +       /* Emit the maximum sample distance if different. */
> +       if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
> +               uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
> +
> +               pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
> +               pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
> +
> +               radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
> +               radeon_emit(cs, pa_sc_aa_config);
> +
> +               /* GFX9: Flush DFSM when the AA mode changes. */
> +               if (cmd_buffer->device->dfsm_allowed) {
> +                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> +                       radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
> +               }
> +       }
> +}
> +
>  static void
>  radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
>                               struct radv_pipeline *pipeline)
> @@ -645,7 +791,14 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
>         if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
>                 cmd_buffer->sample_positions_needed = true;
>
> -       if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
> +       /* Emit the multisample state (including sample locations) only if:
> +        * - it's the first bound pipeline in the command buffer
> +        * - the number of samples of this pipeline is different
> +        * - the previous pipeline used custom sample locations
> +        */
> +       if (old_pipeline &&
> +           num_samples == old_pipeline->graphics.ms.num_samples &&
> +           !old_pipeline->dynamic_state.sample_location.count)
>                 return;
>
>         radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
> @@ -1711,6 +1864,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
>         if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
>                 radv_emit_discard_rectangle(cmd_buffer);
>
> +       if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
> +               radv_emit_sample_locations(cmd_buffer);
> +
>         cmd_buffer->state.dirty &= ~states;
>  }
>
> @@ -3050,6 +3206,25 @@ void radv_CmdSetDiscardRectangleEXT(
>         state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
>  }
>
> +void radv_CmdSetSampleLocationsEXT(
> +       VkCommandBuffer                             commandBuffer,
> +       const VkSampleLocationsInfoEXT*             pSampleLocationsInfo)
> +{
> +       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> +       struct radv_cmd_state *state = &cmd_buffer->state;
> +
> +       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
> +
> +       state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
> +       state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
> +       state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
> +       typed_memcpy(&state->dynamic.sample_location.locations[0],
> +                    pSampleLocationsInfo->pSampleLocations,
> +                    pSampleLocationsInfo->sampleLocationsCount);
> +
> +       state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
> +}
> +
>  void radv_CmdExecuteCommands(
>         VkCommandBuffer                             commandBuffer,
>         uint32_t                                    commandBufferCount,
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index cb51ee44e58..6b19641f66d 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1238,6 +1238,19 @@ void radv_GetPhysicalDeviceProperties2(
>                         properties->transformFeedbackDraw = true;
>                         break;
>                 }
> +               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
> +                       VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
> +                               (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
> +                       properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
> +                                                                VK_SAMPLE_COUNT_4_BIT |
> +                                                                VK_SAMPLE_COUNT_8_BIT;
> +                       properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
> +                       properties->sampleLocationCoordinateRange[0] = 0.0f;
> +                       properties->sampleLocationCoordinateRange[1] = 1.0f;
> +                       properties->sampleLocationSubPixelBits = 4;
> +                       properties->variableSampleLocations = VK_FALSE;
> +                       break;
> +               }
>                 default:
>                         break;
>                 }
> @@ -5111,3 +5124,17 @@ VkResult radv_GetCalibratedTimestampsEXT(
>
>         return VK_SUCCESS;
>  }
> +
> +void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
> +    VkPhysicalDevice                            physicalDevice,
> +    VkSampleCountFlagBits                       samples,
> +    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
> +{
> +       if (samples & (VK_SAMPLE_COUNT_2_BIT |
> +                      VK_SAMPLE_COUNT_4_BIT |
> +                      VK_SAMPLE_COUNT_8_BIT)) {
> +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
> +       } else {
> +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
> +       }
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
> index d14169144f7..19b24ac4157 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -106,6 +106,7 @@ EXTENSIONS = [
>      Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
>      Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
>      Extension('VK_EXT_pci_bus_info',                      1, True),
> +    Extension('VK_EXT_sample_locations',                  1, 'device->rad_info.chip_class >= VI'),
>      Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
>      Extension('VK_EXT_scalar_block_layout',               1, 'device->rad_info.chip_class >= CIK'),
>      Extension('VK_EXT_shader_viewport_index_layer',       1, True),
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 33076cc2bd2..266fdb43367 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1276,6 +1276,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state)
>                 return RADV_DYNAMIC_STENCIL_REFERENCE;
>         case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
>                 return RADV_DYNAMIC_DISCARD_RECTANGLE;
> +       case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
> +               return RADV_DYNAMIC_SAMPLE_LOCATIONS;
>         default:
>                 unreachable("Unhandled dynamic state");
>         }
> @@ -1306,6 +1308,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
>         if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
>                 states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
>
> +       if (!pCreateInfo->pMultisampleState ||
> +           !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
> +                                 PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
> +               states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +
>         /* TODO: blend constants & line width. */
>
>         return states;
> @@ -1442,6 +1449,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
>                              discard_rectangle_info->discardRectangleCount);
>         }
>
> +       if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +               const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
> +                       vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
> +                                            PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
> +               /* If sampleLocationsEnable is VK_FALSE, the default sample
> +                * locations are used and the values specified in
> +                * sampleLocationsInfo are ignored.
> +                */
> +               if (sample_location_info->sampleLocationsEnable) {
> +                       const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
> +                               &sample_location_info->sampleLocationsInfo;
> +
> +                       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
> +
> +                       dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
> +                       dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
> +                       dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
> +                       typed_memcpy(&dynamic->sample_location.locations[0],
> +                                    pSampleLocationsInfo->pSampleLocations,
> +                                    pSampleLocationsInfo->sampleLocationsCount);
> +               }
> +       }
> +
>         pipeline->dynamic_state.mask = states;
>  }
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index e3dd301ee8f..4139a2911aa 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -90,6 +90,7 @@ typedef uint32_t xcb_window_t;
>  #define MAX_VIEWPORTS   16
>  #define MAX_SCISSORS    16
>  #define MAX_DISCARD_RECTANGLES 4
> +#define MAX_SAMPLE_LOCATIONS 32
>  #define MAX_PUSH_CONSTANTS_SIZE 128
>  #define MAX_PUSH_DESCRIPTORS 32
>  #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
> @@ -829,7 +830,8 @@ enum radv_dynamic_state_bits {
>         RADV_DYNAMIC_STENCIL_WRITE_MASK   = 1 << 7,
>         RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
>         RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
> -       RADV_DYNAMIC_ALL                  = (1 << 10) - 1,
> +       RADV_DYNAMIC_SAMPLE_LOCATIONS     = 1 << 10,
> +       RADV_DYNAMIC_ALL                  = (1 << 11) - 1,
>  };
>
>  enum radv_cmd_dirty_bits {
> @@ -845,12 +847,13 @@ enum radv_cmd_dirty_bits {
>         RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 7,
>         RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
>         RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
> -       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 10) - 1,
> -       RADV_CMD_DIRTY_PIPELINE                          = 1 << 10,
> -       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 11,
> -       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 12,
> -       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 13,
> -       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 14,
> +       RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS          = 1 << 10,
> +       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 11) - 1,
> +       RADV_CMD_DIRTY_PIPELINE                          = 1 << 11,
> +       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 12,
> +       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 13,
> +       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 14,
> +       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 15,
>  };
>
>  enum radv_cmd_flush_bits {
> @@ -927,6 +930,13 @@ struct radv_discard_rectangle_state {
>         VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
>  };
>
> +struct radv_sample_locations_state {
> +       VkSampleCountFlagBits per_pixel;
> +       VkExtent2D grid_size;
> +       uint32_t count;
> +       VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
> +};
> +
>  struct radv_dynamic_state {
>         /**
>          * Bitmask of (1 << VK_DYNAMIC_STATE_*).
> @@ -969,6 +979,8 @@ struct radv_dynamic_state {
>         } stencil_reference;
>
>         struct radv_discard_rectangle_state               discard_rectangle;
> +
> +       struct radv_sample_locations_state                sample_location;
>  };
>
>  extern const struct radv_dynamic_state default_dynamic_state;
> --
> 2.19.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Sat, Dec 8, 2018 at 7:03 PM Rhys Perry <pendingchaos02@gmail.com> wrote:
>
> A small number of questions/concerns:
>
> - sampleLocationCoordinateRange[1] should probably be set to 0.9375,
>   because of how the sample locations are encoded
> - gl_SamplePosition doesn't seem like it would return the new sample
>   locations
> - R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
>   this is required, but it's probably best to do so.
> - I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
>   before radv_emit_sample_locations()
> - unlike AMDVLK, this doesn't seem to make use of sample location
>   information during layout transitions?

AFAIU having the correct sample locations might be necessary during
HTILE decompression.

>
> You said that this implements the bare minimum, so you might already know
> about some of these though (unless you were just talking about the
> variableSampleLocations thing).
> On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset <samuel.pitoiset@gmail.com> wrote:
> >
> > Basically, this extension allows applications to use custom
> > sample locations. This only implements the barely minimum.
> > It doesn't support variable sample locations during subpass.
> >
> > Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
> > CTS now pass.
> >
> > Only enabled on VI+ because it's untested on older chips.
> >
> > Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> > ---
> >  src/amd/vulkan/radv_cmd_buffer.c  | 177 +++++++++++++++++++++++++++++-
> >  src/amd/vulkan/radv_device.c      |  27 +++++
> >  src/amd/vulkan/radv_extensions.py |   1 +
> >  src/amd/vulkan/radv_pipeline.c    |  30 +++++
> >  src/amd/vulkan/radv_private.h     |  26 +++--
> >  5 files changed, 253 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> > index b4aea5bc898..c4bebeda0ce 100644
> > --- a/src/amd/vulkan/radv_cmd_buffer.c
> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
> >         dest->viewport.count = src->viewport.count;
> >         dest->scissor.count = src->scissor.count;
> >         dest->discard_rectangle.count = src->discard_rectangle.count;
> > +       dest->sample_location.count = src->sample_location.count;
> >
> >         if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> >                 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
> >                 }
> >         }
> >
> > +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> > +               if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
> > +                   dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
> > +                   dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
> > +                   memcmp(&dest->sample_location.locations,
> > +                          &src->sample_location.locations,
> > +                          src->sample_location.count * sizeof(VkSampleLocationEXT))) {
> > +                       dest->sample_location.per_pixel = src->sample_location.per_pixel;
> > +                       dest->sample_location.grid_size = src->sample_location.grid_size;
> > +                       typed_memcpy(dest->sample_location.locations,
> > +                                    src->sample_location.locations,
> > +                                    src->sample_location.count);
> > +                       dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> > +               }
> > +       }
> > +
> >         cmd_buffer->state.dirty |= dest_mask;
> >  }
> >
> > @@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
> >         }
> >  }
> >
> > +/**
> > + * Convert the user sample locations to hardware sample locations (the values
> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> > + */
> > +static void
> > +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> > +                             uint32_t x, uint32_t y, VkOffset2D *sample_locs)
> > +{
> > +       uint32_t x_offset = x % state->grid_size.width;
> > +       uint32_t y_offset = y % state->grid_size.height;
> > +       uint32_t num_samples = (uint32_t)state->per_pixel;
> > +       VkSampleLocationEXT *user_locs;
> > +       uint32_t pixel_offset;
> > +
> > +       pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
> > +
> > +       assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> > +       user_locs = &state->locations[pixel_offset];
> > +
> > +       for (uint32_t i = 0; i < num_samples; i++) {
> > +               float shifted_pos_x = user_locs[i].x - 0.5;
> > +               float shifted_pos_y = user_locs[i].y - 0.5;
> > +
> > +               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> > +               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> > +
> > +               sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> > +               sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> > +       }
> > +}
> > +
> > +/**
> > + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
> > + * locations.
> > + */
> > +static void
> > +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
> > +                              uint32_t *sample_locs_pixel)
> > +{
> > +       for (uint32_t i = 0; i < num_samples; ++i) {
> > +               uint32_t sample_reg_idx = i / 4;
> > +               uint32_t sample_loc_idx = i % 4;
> > +               int32_t pos_x = sample_locs[i].x;
> > +               int32_t pos_y = sample_locs[i].y;
> > +
> > +               uint32_t shift_x = 8 * sample_loc_idx;
> > +               uint32_t shift_y = shift_x + 4;
> > +
> > +               sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
> > +               sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
> > +       }
> > +}
> > +
> > +/**
> > + * Emit the sample locations that are specified with VK_EXT_sample_locations.
> > + */
> > +static void
> > +radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
> > +{
> > +       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
> > +       struct radv_multisample_state *ms = &pipeline->graphics.ms;
> > +       struct radv_sample_locations_state *sample_location =
> > +               &cmd_buffer->state.dynamic.sample_location;
> > +       uint32_t num_samples = (uint32_t)sample_location->per_pixel;
> > +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
> > +       uint32_t sample_locs_pixel[4][2] = {};
> > +       VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
> > +       uint32_t max_sample_dist = 0;
> > +
> > +       /* Convert the user sample locations to hardware sample locations. */
> > +       radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
> > +       radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
> > +       radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
> > +       radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
> > +
> > +       /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
> > +       for (uint32_t i = 0; i < 4; i++) {
> > +               radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
> > +                                              sample_locs_pixel[i]);
> > +       }
> > +
> > +       /* Emit the specified user sample locations. */
> > +       switch (num_samples) {
> > +       case 2:
> > +       case 4:
> > +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
> > +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
> > +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
> > +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
> > +               break;
> > +       case 8:
> > +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
> > +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
> > +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
> > +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
> > +               radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
> > +               radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
> > +               radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
> > +               radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
> > +               break;
> > +       default:
> > +               unreachable("Unsupported number of samples!");
> > +       }
> > +
> > +       /* Compute the maximum sample distance from the specified locations. */
> > +       for (uint32_t i = 0; i < num_samples; i++) {
> > +               VkOffset2D offset = sample_locs[0][i];
> > +               max_sample_dist = MAX2(max_sample_dist,
> > +                                      MAX2(abs(offset.x), abs(offset.y)));
> > +       }
> > +
> > +       /* Emit the maximum sample distance if different. */
> > +       if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
> > +               uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
> > +
> > +               pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
> > +               pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
> > +
> > +               radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
> > +               radeon_emit(cs, pa_sc_aa_config);
> > +
> > +               /* GFX9: Flush DFSM when the AA mode changes. */
> > +               if (cmd_buffer->device->dfsm_allowed) {
> > +                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> > +                       radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
> > +               }
> > +       }
> > +}
> > +
> >  static void
> >  radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
> >                               struct radv_pipeline *pipeline)
> > @@ -645,7 +791,14 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
> >         if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
> >                 cmd_buffer->sample_positions_needed = true;
> >
> > -       if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
> > +       /* Emit the multisample state (including sample locations) only if:
> > +        * - it's the first bound pipeline in the command buffer
> > +        * - the number of samples of this pipeline is different
> > +        * - the previous pipeline used custom sample locations
> > +        */
> > +       if (old_pipeline &&
> > +           num_samples == old_pipeline->graphics.ms.num_samples &&
> > +           !old_pipeline->dynamic_state.sample_location.count)
> >                 return;
> >
> >         radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
> > @@ -1711,6 +1864,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
> >         if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
> >                 radv_emit_discard_rectangle(cmd_buffer);
> >
> > +       if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
> > +               radv_emit_sample_locations(cmd_buffer);
> > +
> >         cmd_buffer->state.dirty &= ~states;
> >  }
> >
> > @@ -3050,6 +3206,25 @@ void radv_CmdSetDiscardRectangleEXT(
> >         state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
> >  }
> >
> > +void radv_CmdSetSampleLocationsEXT(
> > +       VkCommandBuffer                             commandBuffer,
> > +       const VkSampleLocationsInfoEXT*             pSampleLocationsInfo)
> > +{
> > +       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> > +       struct radv_cmd_state *state = &cmd_buffer->state;
> > +
> > +       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
> > +
> > +       state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
> > +       state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
> > +       state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
> > +       typed_memcpy(&state->dynamic.sample_location.locations[0],
> > +                    pSampleLocationsInfo->pSampleLocations,
> > +                    pSampleLocationsInfo->sampleLocationsCount);
> > +
> > +       state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
> > +}
> > +
> >  void radv_CmdExecuteCommands(
> >         VkCommandBuffer                             commandBuffer,
> >         uint32_t                                    commandBufferCount,
> > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> > index cb51ee44e58..6b19641f66d 100644
> > --- a/src/amd/vulkan/radv_device.c
> > +++ b/src/amd/vulkan/radv_device.c
> > @@ -1238,6 +1238,19 @@ void radv_GetPhysicalDeviceProperties2(
> >                         properties->transformFeedbackDraw = true;
> >                         break;
> >                 }
> > +               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
> > +                       VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
> > +                               (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
> > +                       properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
> > +                                                                VK_SAMPLE_COUNT_4_BIT |
> > +                                                                VK_SAMPLE_COUNT_8_BIT;
> > +                       properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
> > +                       properties->sampleLocationCoordinateRange[0] = 0.0f;
> > +                       properties->sampleLocationCoordinateRange[1] = 1.0f;
> > +                       properties->sampleLocationSubPixelBits = 4;
> > +                       properties->variableSampleLocations = VK_FALSE;
> > +                       break;
> > +               }
> >                 default:
> >                         break;
> >                 }
> > @@ -5111,3 +5124,17 @@ VkResult radv_GetCalibratedTimestampsEXT(
> >
> >         return VK_SUCCESS;
> >  }
> > +
> > +void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
> > +    VkPhysicalDevice                            physicalDevice,
> > +    VkSampleCountFlagBits                       samples,
> > +    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
> > +{
> > +       if (samples & (VK_SAMPLE_COUNT_2_BIT |
> > +                      VK_SAMPLE_COUNT_4_BIT |
> > +                      VK_SAMPLE_COUNT_8_BIT)) {
> > +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
> > +       } else {
> > +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
> > +       }
> > +}
> > diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
> > index d14169144f7..19b24ac4157 100644
> > --- a/src/amd/vulkan/radv_extensions.py
> > +++ b/src/amd/vulkan/radv_extensions.py
> > @@ -106,6 +106,7 @@ EXTENSIONS = [
> >      Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
> >      Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
> >      Extension('VK_EXT_pci_bus_info',                      1, True),
> > +    Extension('VK_EXT_sample_locations',                  1, 'device->rad_info.chip_class >= VI'),
> >      Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
> >      Extension('VK_EXT_scalar_block_layout',               1, 'device->rad_info.chip_class >= CIK'),
> >      Extension('VK_EXT_shader_viewport_index_layer',       1, True),
> > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> > index 33076cc2bd2..266fdb43367 100644
> > --- a/src/amd/vulkan/radv_pipeline.c
> > +++ b/src/amd/vulkan/radv_pipeline.c
> > @@ -1276,6 +1276,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state)
> >                 return RADV_DYNAMIC_STENCIL_REFERENCE;
> >         case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
> >                 return RADV_DYNAMIC_DISCARD_RECTANGLE;
> > +       case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
> > +               return RADV_DYNAMIC_SAMPLE_LOCATIONS;
> >         default:
> >                 unreachable("Unhandled dynamic state");
> >         }
> > @@ -1306,6 +1308,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
> >         if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
> >                 states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
> >
> > +       if (!pCreateInfo->pMultisampleState ||
> > +           !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
> > +                                 PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
> > +               states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
> > +
> >         /* TODO: blend constants & line width. */
> >
> >         return states;
> > @@ -1442,6 +1449,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
> >                              discard_rectangle_info->discardRectangleCount);
> >         }
> >
> > +       if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> > +               const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
> > +                       vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
> > +                                            PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
> > +               /* If sampleLocationsEnable is VK_FALSE, the default sample
> > +                * locations are used and the values specified in
> > +                * sampleLocationsInfo are ignored.
> > +                */
> > +               if (sample_location_info->sampleLocationsEnable) {
> > +                       const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
> > +                               &sample_location_info->sampleLocationsInfo;
> > +
> > +                       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
> > +
> > +                       dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
> > +                       dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
> > +                       dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
> > +                       typed_memcpy(&dynamic->sample_location.locations[0],
> > +                                    pSampleLocationsInfo->pSampleLocations,
> > +                                    pSampleLocationsInfo->sampleLocationsCount);
> > +               }
> > +       }
> > +
> >         pipeline->dynamic_state.mask = states;
> >  }
> >
> > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> > index e3dd301ee8f..4139a2911aa 100644
> > --- a/src/amd/vulkan/radv_private.h
> > +++ b/src/amd/vulkan/radv_private.h
> > @@ -90,6 +90,7 @@ typedef uint32_t xcb_window_t;
> >  #define MAX_VIEWPORTS   16
> >  #define MAX_SCISSORS    16
> >  #define MAX_DISCARD_RECTANGLES 4
> > +#define MAX_SAMPLE_LOCATIONS 32
> >  #define MAX_PUSH_CONSTANTS_SIZE 128
> >  #define MAX_PUSH_DESCRIPTORS 32
> >  #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
> > @@ -829,7 +830,8 @@ enum radv_dynamic_state_bits {
> >         RADV_DYNAMIC_STENCIL_WRITE_MASK   = 1 << 7,
> >         RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
> >         RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
> > -       RADV_DYNAMIC_ALL                  = (1 << 10) - 1,
> > +       RADV_DYNAMIC_SAMPLE_LOCATIONS     = 1 << 10,
> > +       RADV_DYNAMIC_ALL                  = (1 << 11) - 1,
> >  };
> >
> >  enum radv_cmd_dirty_bits {
> > @@ -845,12 +847,13 @@ enum radv_cmd_dirty_bits {
> >         RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 7,
> >         RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
> >         RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
> > -       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 10) - 1,
> > -       RADV_CMD_DIRTY_PIPELINE                          = 1 << 10,
> > -       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 11,
> > -       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 12,
> > -       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 13,
> > -       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 14,
> > +       RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS          = 1 << 10,
> > +       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 11) - 1,
> > +       RADV_CMD_DIRTY_PIPELINE                          = 1 << 11,
> > +       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 12,
> > +       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 13,
> > +       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 14,
> > +       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 15,
> >  };
> >
> >  enum radv_cmd_flush_bits {
> > @@ -927,6 +930,13 @@ struct radv_discard_rectangle_state {
> >         VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
> >  };
> >
> > +struct radv_sample_locations_state {
> > +       VkSampleCountFlagBits per_pixel;
> > +       VkExtent2D grid_size;
> > +       uint32_t count;
> > +       VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
> > +};
> > +
> >  struct radv_dynamic_state {
> >         /**
> >          * Bitmask of (1 << VK_DYNAMIC_STATE_*).
> > @@ -969,6 +979,8 @@ struct radv_dynamic_state {
> >         } stencil_reference;
> >
> >         struct radv_discard_rectangle_state               discard_rectangle;
> > +
> > +       struct radv_sample_locations_state                sample_location;
> >  };
> >
> >  extern const struct radv_dynamic_state default_dynamic_state;
> > --
> > 2.19.2
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 12/8/18 7:03 PM, Rhys Perry wrote:
> A small number of questions/concerns:
> 
> - sampleLocationCoordinateRange[1] should probably be set to 0.9375,
>    because of how the sample locations are encoded

That doesn't matter much but we can update it yeah.

> - gl_SamplePosition doesn't seem like it would return the new sample
>    locations

Oh right, and that's a huge pain to change...

> - R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
>    this is required, but it's probably best to do so.

Not sure if that's really important to set.

> - I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
>    before radv_emit_sample_locations()

That's an optimization.

> - unlike AMDVLK, this doesn't seem to make use of sample location
>    information during layout transitions?

No variableSampleLocations support.

> 
> You said that this implements the bare minimum, so you might already know
> about some of these though (unless you were just talking about the
> variableSampleLocations thing).
> On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset <samuel.pitoiset@gmail.com> wrote:
>>
>> Basically, this extension allows applications to use custom
>> sample locations. This only implements the barely minimum.
>> It doesn't support variable sample locations during subpass.
>>
>> Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
>> CTS now pass.
>>
>> Only enabled on VI+ because it's untested on older chips.
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>>   src/amd/vulkan/radv_cmd_buffer.c  | 177 +++++++++++++++++++++++++++++-
>>   src/amd/vulkan/radv_device.c      |  27 +++++
>>   src/amd/vulkan/radv_extensions.py |   1 +
>>   src/amd/vulkan/radv_pipeline.c    |  30 +++++
>>   src/amd/vulkan/radv_private.h     |  26 +++--
>>   5 files changed, 253 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index b4aea5bc898..c4bebeda0ce 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
>>          dest->viewport.count = src->viewport.count;
>>          dest->scissor.count = src->scissor.count;
>>          dest->discard_rectangle.count = src->discard_rectangle.count;
>> +       dest->sample_location.count = src->sample_location.count;
>>
>>          if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>>                  if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
>> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
>>                  }
>>          }
>>
>> +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
>> +               if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
>> +                   dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
>> +                   dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
>> +                   memcmp(&dest->sample_location.locations,
>> +                          &src->sample_location.locations,
>> +                          src->sample_location.count * sizeof(VkSampleLocationEXT))) {
>> +                       dest->sample_location.per_pixel = src->sample_location.per_pixel;
>> +                       dest->sample_location.grid_size = src->sample_location.grid_size;
>> +                       typed_memcpy(dest->sample_location.locations,
>> +                                    src->sample_location.locations,
>> +                                    src->sample_location.count);
>> +                       dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
>> +               }
>> +       }
>> +
>>          cmd_buffer->state.dirty |= dest_mask;
>>   }
>>
>> @@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
>>          }
>>   }
>>
>> +/**
>> + * Convert the user sample locations to hardware sample locations (the values
>> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
>> + */
>> +static void
>> +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
>> +                             uint32_t x, uint32_t y, VkOffset2D *sample_locs)
>> +{
>> +       uint32_t x_offset = x % state->grid_size.width;
>> +       uint32_t y_offset = y % state->grid_size.height;
>> +       uint32_t num_samples = (uint32_t)state->per_pixel;
>> +       VkSampleLocationEXT *user_locs;
>> +       uint32_t pixel_offset;
>> +
>> +       pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
>> +
>> +       assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
>> +       user_locs = &state->locations[pixel_offset];
>> +
>> +       for (uint32_t i = 0; i < num_samples; i++) {
>> +               float shifted_pos_x = user_locs[i].x - 0.5;
>> +               float shifted_pos_y = user_locs[i].y - 0.5;
>> +
>> +               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
>> +               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
>> +
>> +               sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
>> +               sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
>> +       }
>> +}
>> +
>> +/**
>> + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
>> + * locations.
>> + */
>> +static void
>> +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
>> +                              uint32_t *sample_locs_pixel)
>> +{
>> +       for (uint32_t i = 0; i < num_samples; ++i) {
>> +               uint32_t sample_reg_idx = i / 4;
>> +               uint32_t sample_loc_idx = i % 4;
>> +               int32_t pos_x = sample_locs[i].x;
>> +               int32_t pos_y = sample_locs[i].y;
>> +
>> +               uint32_t shift_x = 8 * sample_loc_idx;
>> +               uint32_t shift_y = shift_x + 4;
>> +
>> +               sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
>> +               sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
>> +       }
>> +}
>> +
>> +/**
>> + * Emit the sample locations that are specified with VK_EXT_sample_locations.
>> + */
>> +static void
>> +radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
>> +{
>> +       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
>> +       struct radv_multisample_state *ms = &pipeline->graphics.ms;
>> +       struct radv_sample_locations_state *sample_location =
>> +               &cmd_buffer->state.dynamic.sample_location;
>> +       uint32_t num_samples = (uint32_t)sample_location->per_pixel;
>> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
>> +       uint32_t sample_locs_pixel[4][2] = {};
>> +       VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
>> +       uint32_t max_sample_dist = 0;
>> +
>> +       /* Convert the user sample locations to hardware sample locations. */
>> +       radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
>> +       radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
>> +       radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
>> +       radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
>> +
>> +       /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
>> +       for (uint32_t i = 0; i < 4; i++) {
>> +               radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
>> +                                              sample_locs_pixel[i]);
>> +       }
>> +
>> +       /* Emit the specified user sample locations. */
>> +       switch (num_samples) {
>> +       case 2:
>> +       case 4:
>> +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
>> +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
>> +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
>> +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
>> +               break;
>> +       case 8:
>> +               radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
>> +               radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
>> +               radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
>> +               radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
>> +               radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
>> +               radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
>> +               radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
>> +               radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
>> +               break;
>> +       default:
>> +               unreachable("Unsupported number of samples!");
>> +       }
>> +
>> +       /* Compute the maximum sample distance from the specified locations. */
>> +       for (uint32_t i = 0; i < num_samples; i++) {
>> +               VkOffset2D offset = sample_locs[0][i];
>> +               max_sample_dist = MAX2(max_sample_dist,
>> +                                      MAX2(abs(offset.x), abs(offset.y)));
>> +       }
>> +
>> +       /* Emit the maximum sample distance if different. */
>> +       if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
>> +               uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
>> +
>> +               pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
>> +               pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
>> +
>> +               radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
>> +               radeon_emit(cs, pa_sc_aa_config);
>> +
>> +               /* GFX9: Flush DFSM when the AA mode changes. */
>> +               if (cmd_buffer->device->dfsm_allowed) {
>> +                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>> +                       radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
>> +               }
>> +       }
>> +}
>> +
>>   static void
>>   radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
>>                                struct radv_pipeline *pipeline)
>> @@ -645,7 +791,14 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
>>          if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
>>                  cmd_buffer->sample_positions_needed = true;
>>
>> -       if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
>> +       /* Emit the multisample state (including sample locations) only if:
>> +        * - it's the first bound pipeline in the command buffer
>> +        * - the number of samples of this pipeline is different
>> +        * - the previous pipeline used custom sample locations
>> +        */
>> +       if (old_pipeline &&
>> +           num_samples == old_pipeline->graphics.ms.num_samples &&
>> +           !old_pipeline->dynamic_state.sample_location.count)
>>                  return;
>>
>>          radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
>> @@ -1711,6 +1864,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
>>          if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
>>                  radv_emit_discard_rectangle(cmd_buffer);
>>
>> +       if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
>> +               radv_emit_sample_locations(cmd_buffer);
>> +
>>          cmd_buffer->state.dirty &= ~states;
>>   }
>>
>> @@ -3050,6 +3206,25 @@ void radv_CmdSetDiscardRectangleEXT(
>>          state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
>>   }
>>
>> +void radv_CmdSetSampleLocationsEXT(
>> +       VkCommandBuffer                             commandBuffer,
>> +       const VkSampleLocationsInfoEXT*             pSampleLocationsInfo)
>> +{
>> +       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>> +
>> +       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
>> +
>> +       state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
>> +       state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
>> +       state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
>> +       typed_memcpy(&state->dynamic.sample_location.locations[0],
>> +                    pSampleLocationsInfo->pSampleLocations,
>> +                    pSampleLocationsInfo->sampleLocationsCount);
>> +
>> +       state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
>> +}
>> +
>>   void radv_CmdExecuteCommands(
>>          VkCommandBuffer                             commandBuffer,
>>          uint32_t                                    commandBufferCount,
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index cb51ee44e58..6b19641f66d 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -1238,6 +1238,19 @@ void radv_GetPhysicalDeviceProperties2(
>>                          properties->transformFeedbackDraw = true;
>>                          break;
>>                  }
>> +               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
>> +                       VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
>> +                               (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
>> +                       properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
>> +                                                                VK_SAMPLE_COUNT_4_BIT |
>> +                                                                VK_SAMPLE_COUNT_8_BIT;
>> +                       properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
>> +                       properties->sampleLocationCoordinateRange[0] = 0.0f;
>> +                       properties->sampleLocationCoordinateRange[1] = 1.0f;
>> +                       properties->sampleLocationSubPixelBits = 4;
>> +                       properties->variableSampleLocations = VK_FALSE;
>> +                       break;
>> +               }
>>                  default:
>>                          break;
>>                  }
>> @@ -5111,3 +5124,17 @@ VkResult radv_GetCalibratedTimestampsEXT(
>>
>>          return VK_SUCCESS;
>>   }
>> +
>> +void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
>> +    VkPhysicalDevice                            physicalDevice,
>> +    VkSampleCountFlagBits                       samples,
>> +    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
>> +{
>> +       if (samples & (VK_SAMPLE_COUNT_2_BIT |
>> +                      VK_SAMPLE_COUNT_4_BIT |
>> +                      VK_SAMPLE_COUNT_8_BIT)) {
>> +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
>> +       } else {
>> +               pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
>> +       }
>> +}
>> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
>> index d14169144f7..19b24ac4157 100644
>> --- a/src/amd/vulkan/radv_extensions.py
>> +++ b/src/amd/vulkan/radv_extensions.py
>> @@ -106,6 +106,7 @@ EXTENSIONS = [
>>       Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
>>       Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
>>       Extension('VK_EXT_pci_bus_info',                      1, True),
>> +    Extension('VK_EXT_sample_locations',                  1, 'device->rad_info.chip_class >= VI'),
>>       Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
>>       Extension('VK_EXT_scalar_block_layout',               1, 'device->rad_info.chip_class >= CIK'),
>>       Extension('VK_EXT_shader_viewport_index_layer',       1, True),
>> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
>> index 33076cc2bd2..266fdb43367 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -1276,6 +1276,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state)
>>                  return RADV_DYNAMIC_STENCIL_REFERENCE;
>>          case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
>>                  return RADV_DYNAMIC_DISCARD_RECTANGLE;
>> +       case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
>> +               return RADV_DYNAMIC_SAMPLE_LOCATIONS;
>>          default:
>>                  unreachable("Unhandled dynamic state");
>>          }
>> @@ -1306,6 +1308,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
>>          if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
>>                  states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
>>
>> +       if (!pCreateInfo->pMultisampleState ||
>> +           !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
>> +                                 PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
>> +               states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
>> +
>>          /* TODO: blend constants & line width. */
>>
>>          return states;
>> @@ -1442,6 +1449,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
>>                               discard_rectangle_info->discardRectangleCount);
>>          }
>>
>> +       if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
>> +               const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
>> +                       vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
>> +                                            PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
>> +               /* If sampleLocationsEnable is VK_FALSE, the default sample
>> +                * locations are used and the values specified in
>> +                * sampleLocationsInfo are ignored.
>> +                */
>> +               if (sample_location_info->sampleLocationsEnable) {
>> +                       const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
>> +                               &sample_location_info->sampleLocationsInfo;
>> +
>> +                       assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
>> +
>> +                       dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
>> +                       dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
>> +                       dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
>> +                       typed_memcpy(&dynamic->sample_location.locations[0],
>> +                                    pSampleLocationsInfo->pSampleLocations,
>> +                                    pSampleLocationsInfo->sampleLocationsCount);
>> +               }
>> +       }
>> +
>>          pipeline->dynamic_state.mask = states;
>>   }
>>
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>> index e3dd301ee8f..4139a2911aa 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -90,6 +90,7 @@ typedef uint32_t xcb_window_t;
>>   #define MAX_VIEWPORTS   16
>>   #define MAX_SCISSORS    16
>>   #define MAX_DISCARD_RECTANGLES 4
>> +#define MAX_SAMPLE_LOCATIONS 32
>>   #define MAX_PUSH_CONSTANTS_SIZE 128
>>   #define MAX_PUSH_DESCRIPTORS 32
>>   #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
>> @@ -829,7 +830,8 @@ enum radv_dynamic_state_bits {
>>          RADV_DYNAMIC_STENCIL_WRITE_MASK   = 1 << 7,
>>          RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
>>          RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
>> -       RADV_DYNAMIC_ALL                  = (1 << 10) - 1,
>> +       RADV_DYNAMIC_SAMPLE_LOCATIONS     = 1 << 10,
>> +       RADV_DYNAMIC_ALL                  = (1 << 11) - 1,
>>   };
>>
>>   enum radv_cmd_dirty_bits {
>> @@ -845,12 +847,13 @@ enum radv_cmd_dirty_bits {
>>          RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 7,
>>          RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
>>          RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
>> -       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 10) - 1,
>> -       RADV_CMD_DIRTY_PIPELINE                          = 1 << 10,
>> -       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 11,
>> -       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 12,
>> -       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 13,
>> -       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 14,
>> +       RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS          = 1 << 10,
>> +       RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 11) - 1,
>> +       RADV_CMD_DIRTY_PIPELINE                          = 1 << 11,
>> +       RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 12,
>> +       RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 13,
>> +       RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 14,
>> +       RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 15,
>>   };
>>
>>   enum radv_cmd_flush_bits {
>> @@ -927,6 +930,13 @@ struct radv_discard_rectangle_state {
>>          VkRect2D                                          rectangles[MAX_DISCARD_RECTANGLES];
>>   };
>>
>> +struct radv_sample_locations_state {
>> +       VkSampleCountFlagBits per_pixel;
>> +       VkExtent2D grid_size;
>> +       uint32_t count;
>> +       VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
>> +};
>> +
>>   struct radv_dynamic_state {
>>          /**
>>           * Bitmask of (1 << VK_DYNAMIC_STATE_*).
>> @@ -969,6 +979,8 @@ struct radv_dynamic_state {
>>          } stencil_reference;
>>
>>          struct radv_discard_rectangle_state               discard_rectangle;
>> +
>> +       struct radv_sample_locations_state                sample_location;
>>   };
>>
>>   extern const struct radv_dynamic_state default_dynamic_state;
>> --
>> 2.19.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev