[dri3proto,v2] Add modifier/multi-plane requests, bump to v1.1

Submitted by Daniel Stone on July 20, 2017, 5:08 p.m.

Details

Message ID 20170720170813.26162-1-daniels@collabora.com
State New
Headers show
Series "DRI4096: modifiers and multi-plane" ( rev: 2 ) in X.org

Not browsing as part of any series.

Commit Message

Daniel Stone July 20, 2017, 5:08 p.m.
DRI3 version 1.1 adds support for explicit format modifiers, including
multi-planar buffers.

Signed-off-by: Daniel Stone <daniels@collabora.com>
---
 dri3proto.h   | 142 ++++++++++++++++++++++++++-
 dri3proto.txt | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 438 insertions(+), 4 deletions(-)

Sorry, this was supposed to get sent out with the revised patchset.

Now with actual descriptive text.

Patch hide | download patch | download mbox

diff --git a/dri3proto.h b/dri3proto.h
index ceddee8..442b714 100644
--- a/dri3proto.h
+++ b/dri3proto.h
@@ -25,7 +25,7 @@ 
 
 #define DRI3_NAME			"DRI3"
 #define DRI3_MAJOR			1
-#define DRI3_MINOR			0
+#define DRI3_MINOR			1
 
 #define DRI3NumberErrors		0
 #define DRI3NumberEvents		0
@@ -37,7 +37,15 @@ 
 #define X_DRI3FenceFromFD               4
 #define X_DRI3FDFromFence               5
 
-#define DRI3NumberRequests		6
+/* v1.1 */
+#define xDRI3GetSupportedFormats	6
+#define xDRI3GetSupportedModifiers	7
+#define xDRI3PixmapFromBuffers		8
+#define xDRI3BuffersFromPixmap		9
+#define xDRI3FenceFromDMAFenceFD        10
+#define xDRI3DMAFenceFDFromFrence       11
+
+#define DRI3NumberRequests		12
 
 typedef struct {
     CARD8   reqType;
@@ -164,4 +172,134 @@  typedef struct {
 
 #define sz_xDRI3FDFromFenceReply   32
 
+/* v1.1 */
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  window B32;
+} xDRI3GetSupportedFormatsReq;
+#define sz_xDRI3GetSupportedFormatsReq     8
+
+typedef struct {
+    BYTE    type;   /* X_Reply */
+    CARD8   pad1;
+    CARD16  sequenceNumber B16;
+    CARD32  length B32;
+    CARD32  numFormats B32;
+    CARD32  pad12 B32;
+    CARD32  pad16 B32;
+    CARD32  pad20 B32;
+    CARD32  pad24 B32;
+    CARD32  pad28 B32;
+} xDRI3GetSupportedFormatsReply;
+#define sz_xDRI3GetSupportedFormatsReply   32
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  window B32;
+    CARD32  format B32;
+} xDRI3GetSupportedModifiersReq;
+#define sz_xDRI3GetSupportedModifiersReq     12
+
+typedef struct {
+    BYTE    type;   /* X_Reply */
+    CARD8   pad1;
+    CARD16  sequenceNumber B16;
+    CARD32  length B32;
+    CARD32  format B32;
+    CARD32  numModifiers B32;
+    CARD32  pad16 B32;
+    CARD32  pad20 B32;
+    CARD32  pad24 B32;
+    CARD32  pad28 B32;
+} xDRI3GetSupportedModifiersReply;
+#define sz_xDRI3GetSupportedModifiersReply   32
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  pixmap B32;
+    CARD32  drawable B32;
+    CARD8   num_buffers; /* Number of file descriptors passed */
+    CARD8   pad13;
+    CARD16  pad14 B16;
+    CARD16  width B16;
+    CARD16  height B16;
+    CARD32  stride0 B32;
+    CARD32  offset0 B32;
+    CARD32  stride1 B32;
+    CARD32  offset1 B32;
+    CARD32  stride2 B32;
+    CARD32  offset2 B32;
+    CARD32  stride3 B32;
+    CARD32  offset3 B32;
+    CARD32  format B32;
+    CARD32  modifier_hi B32;
+    CARD32  modifier_lo B32;
+} xDRI3PixmapFromBuffersReq;
+#define sz_xDRI3PixmapFromBuffersReq 64
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  pixmap B32;
+} xDRI3BuffersFromPixmapReq;
+#define sz_xDRI3BuffersFromPixmapReq     8
+
+typedef struct {
+    BYTE    type;   /* X_Reply */
+    CARD8   nfd;    /* Number of file descriptors returned */
+    CARD16  sequenceNumber B16;
+    CARD32  length B32;
+    CARD16  width B16;
+    CARD16  height B16;
+    CARD32  format B32;
+    CARD32  modifier_hi B32;
+    CARD32  modifier_lo B32;
+    CARD32  pad24 B32;
+    CARD32  pad28 B32;
+} xDRI3BuffersFromPixmapReply;
+#define sz_xDRI3BuffersFromPixmapReply   32
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  drawable B32;
+    CARD32  fence B32;
+} xDRI3FenceFromDMAFenceFDReq;
+
+#define sz_xDRI3FenceFromDMAFenceFDReq  12
+
+typedef struct {
+    CARD8   reqType;
+    CARD8   dri3ReqType;
+    CARD16  length B16;
+    CARD32  drawable B32;
+    CARD32  fence B32;
+} xDRI3DMAFenceFDFromFenceReq;
+
+#define sz_xDRI3DMAFenceFDFromFenceReq  12
+
+typedef struct {
+    BYTE    type;   /* X_Reply */
+    CARD8   nfd;    /* Number of file descriptors returned (1) */
+    CARD16  sequenceNumber B16;
+    CARD32  length B32;
+    CARD32  pad08 B32;
+    CARD32  pad12 B32;
+    CARD32  pad16 B32;
+    CARD32  pad20 B32;
+    CARD32  pad24 B32;
+    CARD32  pad28 B32;
+} xDRI3DMAFenceFDFromFenceReply;
+
+#define sz_xDRI3DMAFenceFDFromFenceReply   32
+
 #endif
diff --git a/dri3proto.txt b/dri3proto.txt
index dac11d3..e906419 100644
--- a/dri3proto.txt
+++ b/dri3proto.txt
@@ -1,11 +1,12 @@ 
 			  The DRI3 Extension
-			     Version 1.0
-			       2013-6-4
+			     Version 1.1
+			      2017-06-27
       
 			    Keith Packard
 			  keithp@keithp.com
 			  Intel Corporation
 
+
 1. Introduction
 
 The DRI3 extension provides mechanisms to translate between direct
@@ -27,6 +28,8 @@  Dave Airlie <airlied@redhat.com>
 Kristian Høgsberg <krh@bitplanet.net>
 James Jones <janomes@nvidia.com>
 Arthur Huillet <arthur.huillet@free.fr>
+Louis-Francis Ratté-Boulianne <lfrb@collabora.com>
+Daniel Stone <daniels@collabora.com>
 
 			     ❄ ❄ ❄  ❄  ❄ ❄ ❄ 
 
@@ -199,6 +202,182 @@  The name of this extension is "DRI3"
 	associated with a direct rendering device that 'fence' can
 	work with, otherwise a Match error results.
 
+┌───
+    DRI3GetSupportedFormats
+	window: WINDOW
+      ▶
+	num_formats: CARD32
+	formats: ListOfCARD32
+└───
+	Errors: Window, Match
+
+	For the Screen associated with 'window', return a list of
+	supported DRM FourCC formats, as defined in drm_fourcc.h,
+	supported as formats for DRI3 pixmap/buffer interchange.
+	The length of the list, in number of CARD32 elements,
+	is returned in 'num_formats'.
+
+┌───
+    DRI3GetSupportedModifiers
+	window: WINDOW
+	format: CARD32
+      ▶
+	num_modifiers: CARD32
+	modifiers: ListOfCARD32
+└───
+	Errors: Window, Match
+
+	For the Screen associated with 'window', return a list of
+	supported DRM FourCC modifiers, as defined in drm_fourcc.h,
+	supported as formats for DRI3 pixmap/buffer interchange.
+	Each modifier is returned as returned as a CARD32
+	containing the most significant 32 bits, followed by a
+	CARD32 containing the least significant 32 bits. The hi/lo
+	pattern repeats 'num_modifiers' times, thus there are
+	'2 * num_modifiers' CARD32 elements returned.
+
+┌───
+    DRI3PixmapFromBuffers
+	pixmap: PIXMAP
+	drawable: DRAWABLE
+	num_buffers: CARD8
+	width, height: CARD16
+	stride0, offset0: CARD32
+	stride1, offset1: CARD32
+	stride2, offset2: CARD32
+	stride3, offset3: CARD32
+	format, modifier_hi, modifier_lo: CARD32
+	buffers: ListOfFD
+└───
+	Errors: Alloc, Drawable, IDChoice, Value, Match
+
+	Creates a pixmap for the direct rendering object associated
+	with 'buffers'. Changes to pixmap will be visible in that
+	direct rendered object and changes to the direct rendered
+	object will be visible in the pixmap.
+
+	In contrast to PixmapFromBuffers, multiple buffers may be
+	combined to specify a single logical source for pixel
+	sampling: 'num_buffers' may be set from 1 (single buffer,
+	akin to PixmapFromBuffer) to 4. This is the number of file
+	descriptors which will be sent with this request; one per
+	buffer.
+	
+	The exact configuration of the buffer is specified by 'format',
+	a DRM FourCC format token as defined in that project's
+	drm_fourcc.h header, in combination with the modifier.
+
+	Modifiers allow explicit specification of non-linear sources,
+	such as tiled or compressed buffers. 'modifier_hi' (the most
+	significant 32 bits of a 64-bit value) and 'modifier_lo' are
+	combined to produce a single DRM format modifier token, again
+	as defined in drm_fourcc.h. The combination of format and
+	modifier allows unambiguous declaration of the buffer layout
+	in a manner defined by the DRM tokens.
+
+	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
+	case the driver may make its own inference as to the exact
+	layout of the buffer(s).
+
+	'width' and 'height' describe the geometry (in pixels) of the
+	logical pixel-sample source.
+
+	'strideN' and 'offsetN' define the number of bytes per logical
+	scanline, and the distance in bytes from the beginning of the
+	buffer passed for that plane until the start of the sample
+	source for that plane, respectively for plane N. If the plane
+	is not used according to the format and modifier specification,
+	both values for that plane must be zero.
+
+	Precisely how any additional information about the buffer is
+	shared is outside the scope of this extension.
+
+	If the buffer(s) cannot be used with the screen associated with
+	'pixmap', a Match error is returned.
+
+	If the format and modifier combination is not supported by the
+	screen, a Value error is returned.
+
+┌───
+    DRI3BuffersFromPixmap
+	pixmap: PIXMAP
+      ▶
+	nfd: CARD8
+	width, height: CARD16
+	format, modifier_hi, modifier_lo: CARD32
+	strides: ListOfCARD32
+	offsets: ListOfCARD32
+	buffers: ListOfFD
+└───
+	Errors: Pixmap, Match
+
+	Pass back direct rendering objects associated with 'pixmap'.
+	Changes to 'pixmap' will be visible in the direct rendered
+	objects and changes to the direct rendered objects will be
+	visible in 'pixmap' after flushing and synchronization.
+
+	'width' and 'height' describe the geometry (in pixels) of the
+	logical pixel-sample source from combining the direct rendering
+	objects.
+
+	'format' describes a DRM FourCC format token, with
+	'modifier_hi' (most significant 32 bits) and 'modifier_lo'
+	(least significant 32 bits) being combined together to produce
+	a single unsigned 64-bit 'modifier' token. See
+	PixmapFromBuffers for more details on DRM format definitions.
+
+	'nfd' describes the number of buffers returned for the pixmap,
+	which must be combined together according to 'format' and
+	'modifier'.
+
+	For each buffer, there is an entry in the 'strides',
+	'offsets', and 'buffers' list. 'buffer' contains a single file
+	descriptor referring to the buffer. 'stride' specifies the
+	number of bytes per logical scanline for this plane, and
+	'offset' specifies the distance in bytes from the beginning
+	of 'buffer' until the start of the sample source for that
+	plane.
+
+	Precisely how any additional information about the buffer is
+	shared is outside the scope of this extension.
+
+	If buffers cannot be exported from the the screen associated
+	with 'pixmap', a Match error is returned.
+
+┌───
+    DRI3FenceFromDMAFenceFD
+	drawable: DRAWABLE
+	fence: FENCE
+	fd: FD
+└───
+	Errors: IDChoice, Drawable
+
+	Creates a Sync extension Fence that provides the regular Sync
+	extension semantics. The Fence will begin untriggered, and
+	become triggered when the underlying dma-fence FD signals.
+	The resulting Sync Fence is a one-shot, and may not be
+	manually triggered, reset, or reused until it is destroyed.
+	Details about the mechanism used with this file descriptor are
+	outside the scope of the DRI3 extension.
+
+┌───
+    DRI3DMAFenceFDFromFence
+	drawable: DRAWABLE
+	fence: FENCE
+      ▶
+	fd: FD
+└───
+	Errors: IDChoice, Drawable, Match
+
+	Given a Sync extension Fence originally created by the
+	DRI3FenceFromDMAFenceFD request, return the underlying
+	dma-fence FD to the client. Details about the mechanism used
+	with this file descriptor are outside the scope of the DRI3
+	extension. 'drawable' must be associated with a direct
+	rendering device that 'fence' can work with, otherwise a Match
+	error results. NB: it is quite likely this will be forever
+	unused, and may be removed in later revisions.
+
 
 			     ❄ ❄ ❄  ❄  ❄ ❄ ❄ 
 
@@ -214,6 +393,10 @@  The DRI3 extension is adapted from the DRI2 extension.
 
 	1.0: First published version
 
+	1.1: Add GetSupportedFormats, GetSupportedModifiers,
+	     PixmapFromBuffers, BuffersFromPixmap,
+	     FenceFromDMAFenceFD, and DMAFenceFDFromFence requests.
+
 			     ❄ ❄ ❄  ❄  ❄ ❄ ❄
 
 
@@ -367,6 +550,119 @@  A.2 Protocol Requests
 	0	FD			fence fd
 └───
 
+┌───
+    DRI3GetSupportedFormats
+	1	CARD8			major opcode
+	1	6			DRI3 opcode
+	2	2			length
+	4	Window			window
+      ▶	
+	1	1			Reply
+        1	0			unused
+	2	CARD16			sequence number
+	4	CARD32			reply length
+	4	CARD32			num_formats
+	20				unused
+
+	4	ListOfCARD32		formats[num_formats]
+└───
+
+┌───
+    DRI3GetSupportedModifiers
+	1	CARD8			major opcode
+	1	7			DRI3 opcode
+	2	3			length
+	4	Window			window
+	4	CARD32			format
+      ▶	
+	1	1			Reply
+        1	0			unused
+	2	CARD16			sequence number
+	4	CARD32			reply length
+	4	CARD32			num_modifiers
+	20				unused
+
+	4	ListOfCARD32		formats[2*num_modifiers]
+└───
+
+┌───
+    DRI3PixmapFromBuffers
+	1	CARD8			major opcode
+	1	8			DRI3 opcode
+	2	8			length
+	4	Pixmap			pixmap
+	4	Drawable		drawable
+	1	CARD8			num_buffers
+	3				unused
+	2	CARD16			width
+	2	CARD16			height
+	4	CARD32			stride0
+	4	CARD32			offset0
+	4	CARD32			stride1
+	4	CARD32			offset1
+	4	CARD32			stride2
+	4	CARD32			offset2
+	4	CARD32			stride3
+	4	CARD32			offset3
+	4	CARD32			format
+	4	CARD32			modifier_hi
+	4	CARD32			modifier_lo
+
+	0	ListOfFD		buffers[num_buffers]
+└───
+
+┌───
+    DRI3BuffersFromPixmap
+	1	CARD8			major opcode
+	1	9			DRI3 opcode
+	2	2			length
+	4	Pixmap			pixmap
+      ▶	
+	1	1			Reply
+        1	CARD8			nfd
+	2	CARD16			sequence number
+	4	CARD32			reply length
+	2	CARD16			width
+	2	CARD16			height
+	4	CARD32			format
+	4	CARD32			modifier_hi
+	4	CARD32			modifier_lo
+	8				unused
+
+	0	ListOfFD		buffer[num_buffers]
+	4	ListOfCARD32		strides[num_buffers]
+	4	ListOfCARD32		offsets[num_buffers]
+└───
+
+┌───
+    DRI3FenceFromDMAFenceFD
+	1	CARD8			major opcode
+	1	10			DRI3 opcode
+	2	4			length
+	4	Drawable		drawable
+	4	Fence			fence
+	4				unused
+
+	0	FD			fence fd
+└───
+
+┌───
+    DRI3DMAFenceFDFromFence
+	1	CARD8			major opcode
+	1	11			DRI3 opcode
+	2	3			length
+	4	Drawable		drawable
+	4	Fence			fence
+      ▶	
+	1	1			Reply
+        1	1			nfd
+	2	CARD16			sequence number
+	4	0			reply length
+	24				unused
+
+	0	FD			fence fd
+└───
+
 A.3 Protocol Events
 
 The DRI3 extension defines no events.

Comments

On 20/07/17 01:08 PM, Daniel Stone wrote:
> DRI3 version 1.1 adds support for explicit format modifiers, including
> multi-planar buffers.

Adding mesa-dev, Nicolai and Marek.

We just ran into an issue which might mean that there's still something 
missing in this v2 proposal:

The context is DRI3 PRIME render offloading of glxgears (not useful in 
practice, but bear with me). The display GPU is Raven Ridge, which 
requires that the stride even of linear textures is a multiple of 256 
bytes. The renderer GPU is Polaris12, which still supports smaller 
alignment of the stride. With the glxgears window of width 300, the 
renderer GPU driver chooses a stride of 304 (* 4 / 256 = 4.75), whereas 
the display GPU would require 320 (* 4 / 256 = 5). This cannot work.


I see two basic approaches to solve this:

1. A protocol request for the client to retrieve the display
    GPU constraints on the stride (and possibly other parameters) for a
    given format and modifier.

2. A protocol request which allows the creation of a pixmap with
    given format and modifier. The renderer GPU driver needs to pass in
    the stride it would choose, then the display GPU driver can choose a
    stride satisfying the constraints on both sides.

Maybe there are other possible approaches I'm missing? Other comments?


Relevant proposed new protocol requests quoted below for the benefit of 
mesa-dev readers:

> @@ -199,6 +202,182 @@ The name of this extension is "DRI3"
>   	associated with a direct rendering device that 'fence' can
>   	work with, otherwise a Match error results.
>   
> +┌───
> +    DRI3GetSupportedFormats
> +	window: WINDOW
> +      ▶
> +	num_formats: CARD32
> +	formats: ListOfCARD32
> +└───
> +	Errors: Window, Match
> +
> +	For the Screen associated with 'window', return a list of
> +	supported DRM FourCC formats, as defined in drm_fourcc.h,
> +	supported as formats for DRI3 pixmap/buffer interchange.
> +	The length of the list, in number of CARD32 elements,
> +	is returned in 'num_formats'.
> +
> +┌───
> +    DRI3GetSupportedModifiers
> +	window: WINDOW
> +	format: CARD32
> +      ▶
> +	num_modifiers: CARD32
> +	modifiers: ListOfCARD32
> +└───
> +	Errors: Window, Match
> +
> +	For the Screen associated with 'window', return a list of
> +	supported DRM FourCC modifiers, as defined in drm_fourcc.h,
> +	supported as formats for DRI3 pixmap/buffer interchange.
> +	Each modifier is returned as returned as a CARD32
> +	containing the most significant 32 bits, followed by a
> +	CARD32 containing the least significant 32 bits. The hi/lo
> +	pattern repeats 'num_modifiers' times, thus there are
> +	'2 * num_modifiers' CARD32 elements returned.
> +
> +┌───
> +    DRI3PixmapFromBuffers
> +	pixmap: PIXMAP
> +	drawable: DRAWABLE
> +	num_buffers: CARD8
> +	width, height: CARD16
> +	stride0, offset0: CARD32
> +	stride1, offset1: CARD32
> +	stride2, offset2: CARD32
> +	stride3, offset3: CARD32
> +	format, modifier_hi, modifier_lo: CARD32
> +	buffers: ListOfFD
> +└───
> +	Errors: Alloc, Drawable, IDChoice, Value, Match
> +
> +	Creates a pixmap for the direct rendering object associated
> +	with 'buffers'. Changes to pixmap will be visible in that
> +	direct rendered object and changes to the direct rendered
> +	object will be visible in the pixmap.
> +
> +	In contrast to PixmapFromBuffers, multiple buffers may be
> +	combined to specify a single logical source for pixel
> +	sampling: 'num_buffers' may be set from 1 (single buffer,
> +	akin to PixmapFromBuffer) to 4. This is the number of file
> +	descriptors which will be sent with this request; one per
> +	buffer.
> +	
> +	The exact configuration of the buffer is specified by 'format',
> +	a DRM FourCC format token as defined in that project's
> +	drm_fourcc.h header, in combination with the modifier.
> +
> +	Modifiers allow explicit specification of non-linear sources,
> +	such as tiled or compressed buffers. 'modifier_hi' (the most
> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
> +	combined to produce a single DRM format modifier token, again
> +	as defined in drm_fourcc.h. The combination of format and
> +	modifier allows unambiguous declaration of the buffer layout
> +	in a manner defined by the DRM tokens.
> +
> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
> +	case the driver may make its own inference as to the exact
> +	layout of the buffer(s).
> +
> +	'width' and 'height' describe the geometry (in pixels) of the
> +	logical pixel-sample source.
> +
> +	'strideN' and 'offsetN' define the number of bytes per logical
> +	scanline, and the distance in bytes from the beginning of the
> +	buffer passed for that plane until the start of the sample
> +	source for that plane, respectively for plane N. If the plane
> +	is not used according to the format and modifier specification,
> +	both values for that plane must be zero.
> +
> +	Precisely how any additional information about the buffer is
> +	shared is outside the scope of this extension.
> +
> +	If the buffer(s) cannot be used with the screen associated with
> +	'pixmap', a Match error is returned.
> +
> +	If the format and modifier combination is not supported by the
> +	screen, a Value error is returned.
> +
> +┌───
> +    DRI3BuffersFromPixmap
> +	pixmap: PIXMAP
> +      ▶
> +	nfd: CARD8
> +	width, height: CARD16
> +	format, modifier_hi, modifier_lo: CARD32
> +	strides: ListOfCARD32
> +	offsets: ListOfCARD32
> +	buffers: ListOfFD
> +└───
> +	Errors: Pixmap, Match
> +
> +	Pass back direct rendering objects associated with 'pixmap'.
> +	Changes to 'pixmap' will be visible in the direct rendered
> +	objects and changes to the direct rendered objects will be
> +	visible in 'pixmap' after flushing and synchronization.
> +
> +	'width' and 'height' describe the geometry (in pixels) of the
> +	logical pixel-sample source from combining the direct rendering
> +	objects.
> +
> +	'format' describes a DRM FourCC format token, with
> +	'modifier_hi' (most significant 32 bits) and 'modifier_lo'
> +	(least significant 32 bits) being combined together to produce
> +	a single unsigned 64-bit 'modifier' token. See
> +	PixmapFromBuffers for more details on DRM format definitions.
> +
> +	'nfd' describes the number of buffers returned for the pixmap,
> +	which must be combined together according to 'format' and
> +	'modifier'.
> +
> +	For each buffer, there is an entry in the 'strides',
> +	'offsets', and 'buffers' list. 'buffer' contains a single file
> +	descriptor referring to the buffer. 'stride' specifies the
> +	number of bytes per logical scanline for this plane, and
> +	'offset' specifies the distance in bytes from the beginning
> +	of 'buffer' until the start of the sample source for that
> +	plane.
> +
> +	Precisely how any additional information about the buffer is
> +	shared is outside the scope of this extension.
> +
> +	If buffers cannot be exported from the the screen associated
> +	with 'pixmap', a Match error is returned.
Hi Michel,

On 21 July 2017 at 18:32, Michel Dänzer <michel@daenzer.net> wrote:
> On 20/07/17 01:08 PM, Daniel Stone wrote:
>> DRI3 version 1.1 adds support for explicit format modifiers, including
>> multi-planar buffers.
>
> Adding mesa-dev, Nicolai and Marek.
>
> We just ran into an issue which might mean that there's still something
> missing in this v2 proposal:
>
> The context is DRI3 PRIME render offloading of glxgears (not useful in
> practice, but bear with me). The display GPU is Raven Ridge, which requires
> that the stride even of linear textures is a multiple of 256 bytes. The
> renderer GPU is Polaris12, which still supports smaller alignment of the
> stride. With the glxgears window of width 300, the renderer GPU driver
> chooses a stride of 304 (* 4 / 256 = 4.75), whereas the display GPU would
> require 320 (* 4 / 256 = 5). This cannot work.

The obvious answer is just to increase padding on external/winsys
surfaces? Increasing it for all allocations would probably be a
non-starter, but winsys surfaces are rare enough that you could
probably afford to take the hit, I guess.

> I see two basic approaches to solve this:
>
> 1. A protocol request for the client to retrieve the display
>    GPU constraints on the stride (and possibly other parameters) for a
>    given format and modifier.

+ corresponding new EGL request and new GBM/KMS API :\

> 2. A protocol request which allows the creation of a pixmap with
>    given format and modifier. The renderer GPU driver needs to pass in
>    the stride it would choose, then the display GPU driver can choose a
>    stride satisfying the constraints on both sides.

Heh, that sounds familiar - DRI2!

> Maybe there are other possible approaches I'm missing? Other comments?

I don't have any great solution off the top of my head, but I'd be
inclined to bundle stride in with placement. TTBOMK (from having
looked at radv), buffers shared cross-GPU also need to be allocated
from a separate externally-visible memory heap. And at the moment,
lacking placement information at allocation time (at least for EGL
allocations, via DRIImage), that happens via transparent migration at
import time I think. Placement restrictions would probably also
involve communicating base address alignment requirements.

Given that, I'm fairly inclined to punt those until we have the grand
glorious allocator, rather than trying to add it to EGL/GBM
separately. The modifiers stuff was a fairly obvious augmentation -
EGL already had no-modifier format import but no query as to which
formats it would accept, and modifiers are a logical extension of
format - but adding the other restrictions is a bigger step forward.

Cheers,
Daniel
On 22.07.2017 14:00, Daniel Stone wrote:
> On 21 July 2017 at 18:32, Michel Dänzer <michel@daenzer.net> wrote:
>> On 20/07/17 01:08 PM, Daniel Stone wrote:
>>> DRI3 version 1.1 adds support for explicit format modifiers, including
>>> multi-planar buffers.
>>
>> Adding mesa-dev, Nicolai and Marek.
>>
>> We just ran into an issue which might mean that there's still something
>> missing in this v2 proposal:
>>
>> The context is DRI3 PRIME render offloading of glxgears (not useful in
>> practice, but bear with me). The display GPU is Raven Ridge, which requires
>> that the stride even of linear textures is a multiple of 256 bytes. The
>> renderer GPU is Polaris12, which still supports smaller alignment of the
>> stride. With the glxgears window of width 300, the renderer GPU driver
>> chooses a stride of 304 (* 4 / 256 = 4.75), whereas the display GPU would
>> require 320 (* 4 / 256 = 5). This cannot work.
> 
> The obvious answer is just to increase padding on external/winsys
> surfaces? Increasing it for all allocations would probably be a
> non-starter, but winsys surfaces are rare enough that you could
> probably afford to take the hit, I guess.
> 
>> I see two basic approaches to solve this:
>>
>> 1. A protocol request for the client to retrieve the display
>>     GPU constraints on the stride (and possibly other parameters) for a
>>     given format and modifier.
> 
> + corresponding new EGL request and new GBM/KMS API :\
> 
>> 2. A protocol request which allows the creation of a pixmap with
>>     given format and modifier. The renderer GPU driver needs to pass in
>>     the stride it would choose, then the display GPU driver can choose a
>>     stride satisfying the constraints on both sides.
> 
> Heh, that sounds familiar - DRI2!
> 
>> Maybe there are other possible approaches I'm missing? Other comments?
> 
> I don't have any great solution off the top of my head, but I'd be
> inclined to bundle stride in with placement. TTBOMK (from having
> looked at radv), buffers shared cross-GPU also need to be allocated
> from a separate externally-visible memory heap. And at the moment,
> lacking placement information at allocation time (at least for EGL
> allocations, via DRIImage), that happens via transparent migration at
> import time I think. Placement restrictions would probably also
> involve communicating base address alignment requirements.

Stride isn't really in the same category as placement and base address 
alignment, though.

Placement and base address alignment requirements can apply to all 
possible texture layouts, while the concept of stride is specific to 
linear layouts.


> Given that, I'm fairly inclined to punt those until we have the grand
> glorious allocator, rather than trying to add it to EGL/GBM
> separately. The modifiers stuff was a fairly obvious augmentation -
> EGL already had no-modifier format import but no query as to which
> formats it would accept, and modifiers are a logical extension of
> format - but adding the other restrictions is a bigger step forward.

Perhaps a third option would be to encode the required pitch_in_bytes 
alignment as part of the modifier?

So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B 
when the display GPU is a Raven Ridge.

More generally, we could say that fourcc_mod_code(NONE, k) means that 
the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if 
you prefer, we could have a stride requirement in elements or pixels 
instead of bytes.

Cheers,
Nicolai




> 
> Cheers,
> Daniel
>
Daniel Stone <daniels@collabora.com> writes:

> DRI3 version 1.1 adds support for explicit format modifiers, including
> multi-planar buffers.

I still want proper 64-bit values, and I don't think the little XSync
mess will be much of a blocker.

> Signed-off-by: Daniel Stone <daniels@collabora.com>
> ---
>  dri3proto.h   | 142 ++++++++++++++++++++++++++-
>  dri3proto.txt | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 438 insertions(+), 4 deletions(-)
>
> Sorry, this was supposed to get sent out with the revised patchset.
>
> Now with actual descriptive text.
>
> diff --git a/dri3proto.h b/dri3proto.h
> index ceddee8..442b714 100644
> --- a/dri3proto.h
> +++ b/dri3proto.h
> @@ -25,7 +25,7 @@
>  
>  #define DRI3_NAME			"DRI3"
>  #define DRI3_MAJOR			1
> -#define DRI3_MINOR			0
> +#define DRI3_MINOR			1
>  
>  #define DRI3NumberErrors		0
>  #define DRI3NumberEvents		0
> @@ -37,7 +37,15 @@
>  #define X_DRI3FenceFromFD               4
>  #define X_DRI3FDFromFence               5
>  
> -#define DRI3NumberRequests		6
> +/* v1.1 */
> +#define xDRI3GetSupportedFormats	6
> +#define xDRI3GetSupportedModifiers	7
> +#define xDRI3PixmapFromBuffers		8
> +#define xDRI3BuffersFromPixmap		9
> +#define xDRI3FenceFromDMAFenceFD        10
> +#define xDRI3DMAFenceFDFromFrence       11
> +
> +#define DRI3NumberRequests		12
>  
>  typedef struct {
>      CARD8   reqType;
> @@ -164,4 +172,134 @@ typedef struct {
>  
>  #define sz_xDRI3FDFromFenceReply   32
>  
> +/* v1.1 */
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  window B32;
> +} xDRI3GetSupportedFormatsReq;
> +#define sz_xDRI3GetSupportedFormatsReq     8
> +
> +typedef struct {
> +    BYTE    type;   /* X_Reply */
> +    CARD8   pad1;
> +    CARD16  sequenceNumber B16;
> +    CARD32  length B32;
> +    CARD32  numFormats B32;
> +    CARD32  pad12 B32;
> +    CARD32  pad16 B32;
> +    CARD32  pad20 B32;
> +    CARD32  pad24 B32;
> +    CARD32  pad28 B32;
> +} xDRI3GetSupportedFormatsReply;
> +#define sz_xDRI3GetSupportedFormatsReply   32
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  window B32;
> +    CARD32  format B32;
> +} xDRI3GetSupportedModifiersReq;
> +#define sz_xDRI3GetSupportedModifiersReq     12
> +
> +typedef struct {
> +    BYTE    type;   /* X_Reply */
> +    CARD8   pad1;
> +    CARD16  sequenceNumber B16;
> +    CARD32  length B32;
> +    CARD32  format B32;
> +    CARD32  numModifiers B32;
> +    CARD32  pad16 B32;
> +    CARD32  pad20 B32;
> +    CARD32  pad24 B32;
> +    CARD32  pad28 B32;
> +} xDRI3GetSupportedModifiersReply;
> +#define sz_xDRI3GetSupportedModifiersReply   32
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  pixmap B32;
> +    CARD32  drawable B32;
> +    CARD8   num_buffers; /* Number of file descriptors passed */
> +    CARD8   pad13;
> +    CARD16  pad14 B16;
> +    CARD16  width B16;
> +    CARD16  height B16;
> +    CARD32  stride0 B32;
> +    CARD32  offset0 B32;
> +    CARD32  stride1 B32;
> +    CARD32  offset1 B32;
> +    CARD32  stride2 B32;
> +    CARD32  offset2 B32;
> +    CARD32  stride3 B32;
> +    CARD32  offset3 B32;
> +    CARD32  format B32;
> +    CARD32  modifier_hi B32;
> +    CARD32  modifier_lo B32;
> +} xDRI3PixmapFromBuffersReq;
> +#define sz_xDRI3PixmapFromBuffersReq 64
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  pixmap B32;
> +} xDRI3BuffersFromPixmapReq;
> +#define sz_xDRI3BuffersFromPixmapReq     8
> +
> +typedef struct {
> +    BYTE    type;   /* X_Reply */
> +    CARD8   nfd;    /* Number of file descriptors returned */
> +    CARD16  sequenceNumber B16;
> +    CARD32  length B32;
> +    CARD16  width B16;
> +    CARD16  height B16;
> +    CARD32  format B32;
> +    CARD32  modifier_hi B32;
> +    CARD32  modifier_lo B32;
> +    CARD32  pad24 B32;
> +    CARD32  pad28 B32;
> +} xDRI3BuffersFromPixmapReply;
> +#define sz_xDRI3BuffersFromPixmapReply   32
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  drawable B32;
> +    CARD32  fence B32;
> +} xDRI3FenceFromDMAFenceFDReq;
> +
> +#define sz_xDRI3FenceFromDMAFenceFDReq  12
> +
> +typedef struct {
> +    CARD8   reqType;
> +    CARD8   dri3ReqType;
> +    CARD16  length B16;
> +    CARD32  drawable B32;
> +    CARD32  fence B32;
> +} xDRI3DMAFenceFDFromFenceReq;
> +
> +#define sz_xDRI3DMAFenceFDFromFenceReq  12
> +
> +typedef struct {
> +    BYTE    type;   /* X_Reply */
> +    CARD8   nfd;    /* Number of file descriptors returned (1) */
> +    CARD16  sequenceNumber B16;
> +    CARD32  length B32;
> +    CARD32  pad08 B32;
> +    CARD32  pad12 B32;
> +    CARD32  pad16 B32;
> +    CARD32  pad20 B32;
> +    CARD32  pad24 B32;
> +    CARD32  pad28 B32;
> +} xDRI3DMAFenceFDFromFenceReply;
> +
> +#define sz_xDRI3DMAFenceFDFromFenceReply   32
> +
>  #endif
> diff --git a/dri3proto.txt b/dri3proto.txt
> index dac11d3..e906419 100644
> --- a/dri3proto.txt
> +++ b/dri3proto.txt
> @@ -1,11 +1,12 @@
>  			  The DRI3 Extension
> -			     Version 1.0
> -			       2013-6-4
> +			     Version 1.1
> +			      2017-06-27
>        
>  			    Keith Packard
>  			  keithp@keithp.com
>  			  Intel Corporation
>  
> +
>  1. Introduction
>  
>  The DRI3 extension provides mechanisms to translate between direct
> @@ -27,6 +28,8 @@ Dave Airlie <airlied@redhat.com>
>  Kristian Høgsberg <krh@bitplanet.net>
>  James Jones <janomes@nvidia.com>
>  Arthur Huillet <arthur.huillet@free.fr>
> +Louis-Francis Ratté-Boulianne <lfrb@collabora.com>
> +Daniel Stone <daniels@collabora.com>
>  
>  			     ❄ ❄ ❄  ❄  ❄ ❄ ❄ 
>  
> @@ -199,6 +202,182 @@ The name of this extension is "DRI3"
>  	associated with a direct rendering device that 'fence' can
>  	work with, otherwise a Match error results.
>  
> +┌───
> +    DRI3GetSupportedFormats
> +	window: WINDOW
> +      ▶
> +	num_formats: CARD32
> +	formats: ListOfCARD32
> +└───
> +	Errors: Window, Match
> +
> +	For the Screen associated with 'window', return a list of
> +	supported DRM FourCC formats, as defined in drm_fourcc.h,
> +	supported as formats for DRI3 pixmap/buffer interchange.
> +	The length of the list, in number of CARD32 elements,
> +	is returned in 'num_formats'.
> +
> +┌───
> +    DRI3GetSupportedModifiers
> +	window: WINDOW
> +	format: CARD32
> +      ▶
> +	num_modifiers: CARD32
> +	modifiers: ListOfCARD32
> +└───
> +	Errors: Window, Match
> +
> +	For the Screen associated with 'window', return a list of
> +	supported DRM FourCC modifiers, as defined in drm_fourcc.h,
> +	supported as formats for DRI3 pixmap/buffer interchange.
> +	Each modifier is returned as returned as a CARD32
> +	containing the most significant 32 bits, followed by a
> +	CARD32 containing the least significant 32 bits. The hi/lo
> +	pattern repeats 'num_modifiers' times, thus there are
> +	'2 * num_modifiers' CARD32 elements returned.

Should any meaning be assumed from the ordering of modifiers?

> +┌───
> +    DRI3PixmapFromBuffers
> +	pixmap: PIXMAP
> +	drawable: DRAWABLE
> +	num_buffers: CARD8
> +	width, height: CARD16
> +	stride0, offset0: CARD32
> +	stride1, offset1: CARD32
> +	stride2, offset2: CARD32
> +	stride3, offset3: CARD32
> +	format, modifier_hi, modifier_lo: CARD32
> +	buffers: ListOfFD
> +└───
> +	Errors: Alloc, Drawable, IDChoice, Value, Match
> +
> +	Creates a pixmap for the direct rendering object associated
> +	with 'buffers'. Changes to pixmap will be visible in that
> +	direct rendered object and changes to the direct rendered
> +	object will be visible in the pixmap.
> +
> +	In contrast to PixmapFromBuffers, multiple buffers may be

I think you meant PixmapFromBuffer

> +	combined to specify a single logical source for pixel
> +	sampling: 'num_buffers' may be set from 1 (single buffer,
> +	akin to PixmapFromBuffer) to 4. This is the number of file
> +	descriptors which will be sent with this request; one per
> +	buffer.
> +	
> +	The exact configuration of the buffer is specified by 'format',
> +	a DRM FourCC format token as defined in that project's
> +	drm_fourcc.h header, in combination with the modifier.
> +
> +	Modifiers allow explicit specification of non-linear sources,
> +	such as tiled or compressed buffers. 'modifier_hi' (the most
> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
> +	combined to produce a single DRM format modifier token, again
> +	as defined in drm_fourcc.h. The combination of format and
> +	modifier allows unambiguous declaration of the buffer layout
> +	in a manner defined by the DRM tokens.
> +
> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
> +	case the driver may make its own inference as to the exact
> +	layout of the buffer(s).
> +
> +	'width' and 'height' describe the geometry (in pixels) of the
> +	logical pixel-sample source.
> +
> +	'strideN' and 'offsetN' define the number of bytes per logical
> +	scanline, and the distance in bytes from the beginning of the
> +	buffer passed for that plane until the start of the sample
> +	source for that plane, respectively for plane N. If the plane
> +	is not used according to the format and modifier specification,
> +	both values for that plane must be zero.
> +
> +	Precisely how any additional information about the buffer is
> +	shared is outside the scope of this extension.
> +
> +	If the buffer(s) cannot be used with the screen associated with
> +	'pixmap', a Match error is returned.
> +
> +	If the format and modifier combination is not supported by the
> +	screen, a Value error is returned.

Should we be specifying how the depth of the Pixmap is determined from
the fourcc?  Should we be specifying if X11 rendering works on various
fourccs, and between pixmaps of different fourccs?  It's not clear to me
what glamor would need to be able to do with these pixmaps (can I
CopyArea between XRGB888 and BGRX8888?  What does that even mean?)

> +┌───
> +    DRI3BuffersFromPixmap
> +	pixmap: PIXMAP
> +      ▶
> +	nfd: CARD8
> +	width, height: CARD16
> +	format, modifier_hi, modifier_lo: CARD32
> +	strides: ListOfCARD32
> +	offsets: ListOfCARD32
> +	buffers: ListOfFD
> +└───
> +	Errors: Pixmap, Match
> +
> +	Pass back direct rendering objects associated with 'pixmap'.

Maybe "Returns direct rendering objects..."

> +	Changes to 'pixmap' will be visible in the direct rendered
> +	objects and changes to the direct rendered objects will be
> +	visible in 'pixmap' after flushing and synchronization.
> +
> +	'width' and 'height' describe the geometry (in pixels) of the
> +	logical pixel-sample source from combining the direct rendering
> +	objects.
> +
> +	'format' describes a DRM FourCC format token, with
> +	'modifier_hi' (most significant 32 bits) and 'modifier_lo'
> +	(least significant 32 bits) being combined together to produce
> +	a single unsigned 64-bit 'modifier' token. See
> +	PixmapFromBuffers for more details on DRM format definitions.
> +
> +	'nfd' describes the number of buffers returned for the pixmap,
> +	which must be combined together according to 'format' and
> +	'modifier'.
> +
> +	For each buffer, there is an entry in the 'strides',
> +	'offsets', and 'buffers' list. 'buffer' contains a single file
> +	descriptor referring to the buffer. 'stride' specifies the
> +	number of bytes per logical scanline for this plane, and
> +	'offset' specifies the distance in bytes from the beginning
> +	of 'buffer' until the start of the sample source for that
> +	plane.
> +
> +	Precisely how any additional information about the buffer is
> +	shared is outside the scope of this extension.
> +
> +	If buffers cannot be exported from the the screen associated
> +	with 'pixmap', a Match error is returned.
> +
> +┌───
> +    DRI3FenceFromDMAFenceFD
> +	drawable: DRAWABLE
> +	fence: FENCE
> +	fd: FD
> +└───
> +	Errors: IDChoice, Drawable
> +
> +	Creates a Sync extension Fence that provides the regular Sync
> +	extension semantics. The Fence will begin untriggered, and
> +	become triggered when the underlying dma-fence FD signals.
> +	The resulting Sync Fence is a one-shot, and may not be
> +	manually triggered, reset, or reused until it is destroyed.
> +	Details about the mechanism used with this file descriptor are
> +	outside the scope of the DRI3 extension.

I was surprised to find this lumped in with a commit about
multi-planar/buffer support -- is it actually related, and is it used?

Must an implementation supporting 1.1 support this?  dma-fences seem
like a pretty recent kernel feature.

> +┌───
> +    DRI3DMAFenceFDFromFence
> +	drawable: DRAWABLE
> +	fence: FENCE
> +      ▶
> +	fd: FD
> +└───
> +	Errors: IDChoice, Drawable, Match
> +
> +	Given a Sync extension Fence originally created by the
> +	DRI3FenceFromDMAFenceFD request, return the underlying
> +	dma-fence FD to the client. Details about the mechanism used
> +	with this file descriptor are outside the scope of the DRI3
> +	extension. 'drawable' must be associated with a direct
> +	rendering device that 'fence' can work with, otherwise a Match
> +	error results. NB: it is quite likely this will be forever
> +	unused, and may be removed in later revisions.
> +

Let's not introduce protocol if we can't come up with a use for it.
On 26/07/17 06:20 AM, Eric Anholt wrote:
> Daniel Stone <daniels@collabora.com> writes:
> 
>> DRI3 version 1.1 adds support for explicit format modifiers, including
>> multi-planar buffers.
> 
> I still want proper 64-bit values, and I don't think the little XSync
> mess will be much of a blocker.
> 
>> Signed-off-by: Daniel Stone <daniels@collabora.com>

[...]

>> +	combined to specify a single logical source for pixel
>> +	sampling: 'num_buffers' may be set from 1 (single buffer,
>> +	akin to PixmapFromBuffer) to 4. This is the number of file
>> +	descriptors which will be sent with this request; one per
>> +	buffer.
>> +	
>> +	The exact configuration of the buffer is specified by 'format',
>> +	a DRM FourCC format token as defined in that project's
>> +	drm_fourcc.h header, in combination with the modifier.
>> +
>> +	Modifiers allow explicit specification of non-linear sources,
>> +	such as tiled or compressed buffers. 'modifier_hi' (the most
>> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
>> +	combined to produce a single DRM format modifier token, again
>> +	as defined in drm_fourcc.h. The combination of format and
>> +	modifier allows unambiguous declaration of the buffer layout
>> +	in a manner defined by the DRM tokens.
>> +
>> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
>> +	case the driver may make its own inference as to the exact
>> +	layout of the buffer(s).
>> +
>> +	'width' and 'height' describe the geometry (in pixels) of the
>> +	logical pixel-sample source.
>> +
>> +	'strideN' and 'offsetN' define the number of bytes per logical
>> +	scanline, and the distance in bytes from the beginning of the
>> +	buffer passed for that plane until the start of the sample
>> +	source for that plane, respectively for plane N. If the plane
>> +	is not used according to the format and modifier specification,
>> +	both values for that plane must be zero.
>> +
>> +	Precisely how any additional information about the buffer is
>> +	shared is outside the scope of this extension.
>> +
>> +	If the buffer(s) cannot be used with the screen associated with
>> +	'pixmap', a Match error is returned.
>> +
>> +	If the format and modifier combination is not supported by the
>> +	screen, a Value error is returned.
> 
> Should we be specifying how the depth of the Pixmap is determined from
> the fourcc?  Should we be specifying if X11 rendering works on various
> fourccs, and between pixmaps of different fourccs?  It's not clear to me
> what glamor would need to be able to do with these pixmaps (can I
> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)

X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
depth. CopyArea between pixmaps just copies the bits in the source to
the destination verbatim. Note that this is only possible if both
pixmaps have the same depth.
On 26/07/17 10:48 AM, Michel Dänzer wrote:
> On 26/07/17 06:20 AM, Eric Anholt wrote:
>> Daniel Stone <daniels@collabora.com> writes:
>>
>>> +	combined to specify a single logical source for pixel
>>> +	sampling: 'num_buffers' may be set from 1 (single buffer,
>>> +	akin to PixmapFromBuffer) to 4. This is the number of file
>>> +	descriptors which will be sent with this request; one per
>>> +	buffer.
>>> +	
>>> +	The exact configuration of the buffer is specified by 'format',
>>> +	a DRM FourCC format token as defined in that project's
>>> +	drm_fourcc.h header, in combination with the modifier.
>>> +
>>> +	Modifiers allow explicit specification of non-linear sources,
>>> +	such as tiled or compressed buffers. 'modifier_hi' (the most
>>> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
>>> +	combined to produce a single DRM format modifier token, again
>>> +	as defined in drm_fourcc.h. The combination of format and
>>> +	modifier allows unambiguous declaration of the buffer layout
>>> +	in a manner defined by the DRM tokens.
>>> +
>>> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
>>> +	case the driver may make its own inference as to the exact
>>> +	layout of the buffer(s).
>>> +
>>> +	'width' and 'height' describe the geometry (in pixels) of the
>>> +	logical pixel-sample source.
>>> +
>>> +	'strideN' and 'offsetN' define the number of bytes per logical
>>> +	scanline, and the distance in bytes from the beginning of the
>>> +	buffer passed for that plane until the start of the sample
>>> +	source for that plane, respectively for plane N. If the plane
>>> +	is not used according to the format and modifier specification,
>>> +	both values for that plane must be zero.
>>> +
>>> +	Precisely how any additional information about the buffer is
>>> +	shared is outside the scope of this extension.
>>> +
>>> +	If the buffer(s) cannot be used with the screen associated with
>>> +	'pixmap', a Match error is returned.
>>> +
>>> +	If the format and modifier combination is not supported by the
>>> +	screen, a Value error is returned.
>>
>> Should we be specifying how the depth of the Pixmap is determined from
>> the fourcc?  Should we be specifying if X11 rendering works on various
>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>> what glamor would need to be able to do with these pixmaps (can I
>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
> 
> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
> depth. CopyArea between pixmaps just copies the bits in the source to
> the destination verbatim. Note that this is only possible if both
> pixmaps have the same depth.

This raises a question about multi-plane (e.g. YUV) formats, where
different planes may have different resolutions. Is this functionality
intended to be used for such formats? If so, how are X11 drawing
operations to/from pixmaps created from such formats envisioned to work?
On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
> On 22.07.2017 14:00, Daniel Stone wrote:
>>
>> I don't have any great solution off the top of my head, but I'd be
>> inclined to bundle stride in with placement. TTBOMK (from having
>> looked at radv), buffers shared cross-GPU also need to be allocated
>> from a separate externally-visible memory heap. And at the moment,
>> lacking placement information at allocation time (at least for EGL
>> allocations, via DRIImage), that happens via transparent migration at
>> import time I think. Placement restrictions would probably also
>> involve communicating base address alignment requirements.
> 
> Stride isn't really in the same category as placement and base address
> alignment, though.
> 
> Placement and base address alignment requirements can apply to all
> possible texture layouts, while the concept of stride is specific to
> linear layouts.

Also, the starting address of shareable buffers is generally aligned to
at least the CPU page size anyway. Do we know of any cases requiring
higher alignment than that, and if so, which address space does the
requirement apply to?


>> Given that, I'm fairly inclined to punt those until we have the grand
>> glorious allocator, rather than trying to add it to EGL/GBM
>> separately. The modifiers stuff was a fairly obvious augmentation -
>> EGL already had no-modifier format import but no query as to which
>> formats it would accept, and modifiers are a logical extension of
>> format - but adding the other restrictions is a bigger step forward.
> 
> Perhaps a third option would be to encode the required pitch_in_bytes
> alignment as part of the modifier?
> 
> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
> when the display GPU is a Raven Ridge.
> 
> More generally, we could say that fourcc_mod_code(NONE, k) means that
> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if
> you prefer, we could have a stride requirement in elements or pixels
> instead of bytes.

Interesting idea. FWIW, I suspect we'd need to support specifying the
requirement in both bytes or pixels, since one or the other alone may
not be sufficient to describe the constraints of all hardware.
On Wed, Jul 26, 2017 at 8:29 AM, Michel Dänzer <michel@daenzer.net> wrote:
> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>
>>> I don't have any great solution off the top of my head, but I'd be
>>> inclined to bundle stride in with placement. TTBOMK (from having
>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>> from a separate externally-visible memory heap. And at the moment,
>>> lacking placement information at allocation time (at least for EGL
>>> allocations, via DRIImage), that happens via transparent migration at
>>> import time I think. Placement restrictions would probably also
>>> involve communicating base address alignment requirements.
>>
>> Stride isn't really in the same category as placement and base address
>> alignment, though.
>>
>> Placement and base address alignment requirements can apply to all
>> possible texture layouts, while the concept of stride is specific to
>> linear layouts.
>
> Also, the starting address of shareable buffers is generally aligned to
> at least the CPU page size anyway. Do we know of any cases requiring
> higher alignment than that, and if so, which address space does the
> requirement apply to?

The highest base address alignment I know of is 2D tiling on Fiji =
256KB alignment.

Marek
On 26.07.2017 08:29, Michel Dänzer wrote:
> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>
>>> I don't have any great solution off the top of my head, but I'd be
>>> inclined to bundle stride in with placement. TTBOMK (from having
>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>> from a separate externally-visible memory heap. And at the moment,
>>> lacking placement information at allocation time (at least for EGL
>>> allocations, via DRIImage), that happens via transparent migration at
>>> import time I think. Placement restrictions would probably also
>>> involve communicating base address alignment requirements.
>>
>> Stride isn't really in the same category as placement and base address
>> alignment, though.
>>
>> Placement and base address alignment requirements can apply to all
>> possible texture layouts, while the concept of stride is specific to
>> linear layouts.
> 
> Also, the starting address of shareable buffers is generally aligned to
> at least the CPU page size anyway. Do we know of any cases requiring
> higher alignment than that, and if so, which address space does the
> requirement apply to?

Only tiling modes, as Marek mentioned. We don't do tiling shares across 
different GPUs right now.

Maybe we can do it in the future with gfx9 GPUs. But then the alignment 
requirements should be known on both sides based on the tiling mode 
anyway -- if they even apply for non-VRAM textures.


>>> Given that, I'm fairly inclined to punt those until we have the grand
>>> glorious allocator, rather than trying to add it to EGL/GBM
>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>> EGL already had no-modifier format import but no query as to which
>>> formats it would accept, and modifiers are a logical extension of
>>> format - but adding the other restrictions is a bigger step forward.
>>
>> Perhaps a third option would be to encode the required pitch_in_bytes
>> alignment as part of the modifier?
>>
>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
>> when the display GPU is a Raven Ridge.
>>
>> More generally, we could say that fourcc_mod_code(NONE, k) means that
>> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if
>> you prefer, we could have a stride requirement in elements or pixels
>> instead of bytes.
> 
> Interesting idea. FWIW, I suspect we'd need to support specifying the
> requirement in both bytes or pixels, since one or the other alone may
> not be sufficient to describe the constraints of all hardware.

 From what I've seen, modifiers are always specified together with one 
specific format, so the bytes-per-pixel are always known, so I don't 
think we need both. Specifying it in bytes is a bit more natural for our 
hardware, that's all.

Cheers,
Nicolai
On Wed, Jul 26, 2017 at 8:15 AM, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
> On 26.07.2017 08:29, Michel Dänzer wrote:
>>
>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>>
>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>
>>>>
>>>> I don't have any great solution off the top of my head, but I'd be
>>>> inclined to bundle stride in with placement. TTBOMK (from having
>>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>>> from a separate externally-visible memory heap. And at the moment,
>>>> lacking placement information at allocation time (at least for EGL
>>>> allocations, via DRIImage), that happens via transparent migration at
>>>> import time I think. Placement restrictions would probably also
>>>> involve communicating base address alignment requirements.
>>>
>>>
>>> Stride isn't really in the same category as placement and base address
>>> alignment, though.
>>>
>>> Placement and base address alignment requirements can apply to all
>>> possible texture layouts, while the concept of stride is specific to
>>> linear layouts.
>>
>>
>> Also, the starting address of shareable buffers is generally aligned to
>> at least the CPU page size anyway. Do we know of any cases requiring
>> higher alignment than that, and if so, which address space does the
>> requirement apply to?
>
>
> Only tiling modes, as Marek mentioned. We don't do tiling shares across
> different GPUs right now.
>
> Maybe we can do it in the future with gfx9 GPUs. But then the alignment
> requirements should be known on both sides based on the tiling mode anyway
> -- if they even apply for non-VRAM textures.

We should be able to do some 1D tiling modes.  That doesn't have any
per sku alignment dependencies.

Alex

>
>
>>>> Given that, I'm fairly inclined to punt those until we have the grand
>>>> glorious allocator, rather than trying to add it to EGL/GBM
>>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>>> EGL already had no-modifier format import but no query as to which
>>>> formats it would accept, and modifiers are a logical extension of
>>>> format - but adding the other restrictions is a bigger step forward.
>>>
>>>
>>> Perhaps a third option would be to encode the required pitch_in_bytes
>>> alignment as part of the modifier?
>>>
>>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
>>> when the display GPU is a Raven Ridge.
>>>
>>> More generally, we could say that fourcc_mod_code(NONE, k) means that
>>> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if
>>> you prefer, we could have a stride requirement in elements or pixels
>>> instead of bytes.
>>
>>
>> Interesting idea. FWIW, I suspect we'd need to support specifying the
>> requirement in both bytes or pixels, since one or the other alone may
>> not be sufficient to describe the constraints of all hardware.
>
>
> From what I've seen, modifiers are always specified together with one
> specific format, so the bytes-per-pixel are always known, so I don't think
> we need both. Specifying it in bytes is a bit more natural for our hardware,
> that's all.
>
> Cheers,
> Nicolai
> --
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Wed, Jul 26, 2017 at 7:05 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Wed, Jul 26, 2017 at 8:15 AM, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
>> On 26.07.2017 08:29, Michel Dänzer wrote:
>>>
>>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>>>
>>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>>
>>>>>
>>>>> I don't have any great solution off the top of my head, but I'd be
>>>>> inclined to bundle stride in with placement. TTBOMK (from having
>>>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>>>> from a separate externally-visible memory heap. And at the moment,
>>>>> lacking placement information at allocation time (at least for EGL
>>>>> allocations, via DRIImage), that happens via transparent migration at
>>>>> import time I think. Placement restrictions would probably also
>>>>> involve communicating base address alignment requirements.
>>>>
>>>>
>>>> Stride isn't really in the same category as placement and base address
>>>> alignment, though.
>>>>
>>>> Placement and base address alignment requirements can apply to all
>>>> possible texture layouts, while the concept of stride is specific to
>>>> linear layouts.
>>>
>>>
>>> Also, the starting address of shareable buffers is generally aligned to
>>> at least the CPU page size anyway. Do we know of any cases requiring
>>> higher alignment than that, and if so, which address space does the
>>> requirement apply to?
>>
>>
>> Only tiling modes, as Marek mentioned. We don't do tiling shares across
>> different GPUs right now.
>>
>> Maybe we can do it in the future with gfx9 GPUs. But then the alignment
>> requirements should be known on both sides based on the tiling mode anyway
>> -- if they even apply for non-VRAM textures.
>
> We should be able to do some 1D tiling modes.  That doesn't have any
> per sku alignment dependencies.

Yeah, I think 1D tiling for displayable 32bpp is compatible across all
radeon GPUs newer than R600.

All non-X non-VAR tiling modes on Radeon/GFX9 (Vega, Raven) are the
same on all GFX9 GPUs and might be the same on all future products.
The only catch is that X modes are better optimized for the memory
config, so non-X modes can be slower. I think the non-X modes might
also be compatible with Intel (the first 12 at least), so some
cross-vendor interface might be possible. All GFX9 tiling modes:

SW_LINEAR (256B pitch alignment)
SW_256B_S
SW_256B_D (compatible with older Radeons if bpp == 32)
SW_256B_R (compatible with older Radeons if bpp == 32)
SW_4KB_Z (Z = depth/stencil sample order)
SW_4KB_S (S = standard)
SW_4KB_D (D = displayable)
SW_4KB_R (R = displayable rotated)
SW_64KB_Z
SW_64KB_S
SW_64KB_D
SW_64KB_R
SW_VAR_Z (VAR = tile size depends on memory config)
SW_VAR_S
SW_VAR_D
SW_VAR_R
SW_64KB_Z_T
SW_64KB_S_T
SW_64KB_D_T
SW_64KB_R_T
SW_4KB_Z_X (X = optimized for memory config)
SW_4KB_S_X
SW_4KB_D_X
SW_4KB_R_X
SW_64KB_Z_X
SW_64KB_S_X
SW_64KB_D_X
SW_64KB_R_X
SW_VAR_Z_X
SW_VAR_S_X
SW_VAR_D_X
SW_VAR_R_X

Marek
On 26.07.2017 19:42, Marek Olšák wrote:
> On Wed, Jul 26, 2017 at 7:05 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Wed, Jul 26, 2017 at 8:15 AM, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
>>> On 26.07.2017 08:29, Michel Dänzer wrote:
>>>>
>>>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>>>>
>>>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>>>
>>>>>>
>>>>>> I don't have any great solution off the top of my head, but I'd be
>>>>>> inclined to bundle stride in with placement. TTBOMK (from having
>>>>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>>>>> from a separate externally-visible memory heap. And at the moment,
>>>>>> lacking placement information at allocation time (at least for EGL
>>>>>> allocations, via DRIImage), that happens via transparent migration at
>>>>>> import time I think. Placement restrictions would probably also
>>>>>> involve communicating base address alignment requirements.
>>>>>
>>>>>
>>>>> Stride isn't really in the same category as placement and base address
>>>>> alignment, though.
>>>>>
>>>>> Placement and base address alignment requirements can apply to all
>>>>> possible texture layouts, while the concept of stride is specific to
>>>>> linear layouts.
>>>>
>>>>
>>>> Also, the starting address of shareable buffers is generally aligned to
>>>> at least the CPU page size anyway. Do we know of any cases requiring
>>>> higher alignment than that, and if so, which address space does the
>>>> requirement apply to?
>>>
>>>
>>> Only tiling modes, as Marek mentioned. We don't do tiling shares across
>>> different GPUs right now.
>>>
>>> Maybe we can do it in the future with gfx9 GPUs. But then the alignment
>>> requirements should be known on both sides based on the tiling mode anyway
>>> -- if they even apply for non-VRAM textures.
>>
>> We should be able to do some 1D tiling modes.  That doesn't have any
>> per sku alignment dependencies.
> 
> Yeah, I think 1D tiling for displayable 32bpp is compatible across all
> radeon GPUs newer than R600.
> 
> All non-X non-VAR tiling modes on Radeon/GFX9 (Vega, Raven) are the
> same on all GFX9 GPUs and might be the same on all future products.
> The only catch is that X modes are better optimized for the memory
> config, so non-X modes can be slower. I think the non-X modes might
> also be compatible with Intel (the first 12 at least), so some
> cross-vendor interface might be possible. All GFX9 tiling modes:

Right. It might be worth it to try to use some of these tiling modes to 
make PRIME a bit more efficient in some cases. Non-X modes may be 
non-optimal, but certainly they're better than linear :)

Cheers,
Nicolai


> 
> SW_LINEAR (256B pitch alignment)
> SW_256B_S
> SW_256B_D (compatible with older Radeons if bpp == 32)
> SW_256B_R (compatible with older Radeons if bpp == 32)
> SW_4KB_Z (Z = depth/stencil sample order)
> SW_4KB_S (S = standard)
> SW_4KB_D (D = displayable)
> SW_4KB_R (R = displayable rotated)
> SW_64KB_Z
> SW_64KB_S
> SW_64KB_D
> SW_64KB_R
> SW_VAR_Z (VAR = tile size depends on memory config)
> SW_VAR_S
> SW_VAR_D
> SW_VAR_R
> SW_64KB_Z_T
> SW_64KB_S_T
> SW_64KB_D_T
> SW_64KB_R_T
> SW_4KB_Z_X (X = optimized for memory config)
> SW_4KB_S_X
> SW_4KB_D_X
> SW_4KB_R_X
> SW_64KB_Z_X
> SW_64KB_S_X
> SW_64KB_D_X
> SW_64KB_R_X
> SW_VAR_Z_X
> SW_VAR_S_X
> SW_VAR_D_X
> SW_VAR_R_X
> 
> Marek
>
Michel Dänzer <michel@daenzer.net> writes:

> [ Unknown signature status ]
> On 26/07/17 06:20 AM, Eric Anholt wrote:
>> Daniel Stone <daniels@collabora.com> writes:
>> 
>>> DRI3 version 1.1 adds support for explicit format modifiers, including
>>> multi-planar buffers.
>> 
>> I still want proper 64-bit values, and I don't think the little XSync
>> mess will be much of a blocker.
>> 
>>> Signed-off-by: Daniel Stone <daniels@collabora.com>
>
> [...]
>
>>> +	combined to specify a single logical source for pixel
>>> +	sampling: 'num_buffers' may be set from 1 (single buffer,
>>> +	akin to PixmapFromBuffer) to 4. This is the number of file
>>> +	descriptors which will be sent with this request; one per
>>> +	buffer.
>>> +	
>>> +	The exact configuration of the buffer is specified by 'format',
>>> +	a DRM FourCC format token as defined in that project's
>>> +	drm_fourcc.h header, in combination with the modifier.
>>> +
>>> +	Modifiers allow explicit specification of non-linear sources,
>>> +	such as tiled or compressed buffers. 'modifier_hi' (the most
>>> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
>>> +	combined to produce a single DRM format modifier token, again
>>> +	as defined in drm_fourcc.h. The combination of format and
>>> +	modifier allows unambiguous declaration of the buffer layout
>>> +	in a manner defined by the DRM tokens.
>>> +
>>> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
>>> +	case the driver may make its own inference as to the exact
>>> +	layout of the buffer(s).
>>> +
>>> +	'width' and 'height' describe the geometry (in pixels) of the
>>> +	logical pixel-sample source.
>>> +
>>> +	'strideN' and 'offsetN' define the number of bytes per logical
>>> +	scanline, and the distance in bytes from the beginning of the
>>> +	buffer passed for that plane until the start of the sample
>>> +	source for that plane, respectively for plane N. If the plane
>>> +	is not used according to the format and modifier specification,
>>> +	both values for that plane must be zero.
>>> +
>>> +	Precisely how any additional information about the buffer is
>>> +	shared is outside the scope of this extension.
>>> +
>>> +	If the buffer(s) cannot be used with the screen associated with
>>> +	'pixmap', a Match error is returned.
>>> +
>>> +	If the format and modifier combination is not supported by the
>>> +	screen, a Value error is returned.
>> 
>> Should we be specifying how the depth of the Pixmap is determined from
>> the fourcc?  Should we be specifying if X11 rendering works on various
>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>> what glamor would need to be able to do with these pixmaps (can I
>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
>
> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
> depth. CopyArea between pixmaps just copies the bits in the source to
> the destination verbatim. Note that this is only possible if both
> pixmaps have the same depth.

Right.  So is the plan that XRGB888 to BGRX8888 (both depth 24) will
actually shift around the bits during the copy?
On 26/07/17 09:15 PM, Nicolai Hähnle wrote:
> On 26.07.2017 08:29, Michel Dänzer wrote:
>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>
>>>> Given that, I'm fairly inclined to punt those until we have the grand
>>>> glorious allocator, rather than trying to add it to EGL/GBM
>>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>>> EGL already had no-modifier format import but no query as to which
>>>> formats it would accept, and modifiers are a logical extension of
>>>> format - but adding the other restrictions is a bigger step forward.
>>>
>>> Perhaps a third option would be to encode the required pitch_in_bytes
>>> alignment as part of the modifier?
>>>
>>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
>>> when the display GPU is a Raven Ridge.
>>>
>>> More generally, we could say that fourcc_mod_code(NONE, k) means that
>>> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if
>>> you prefer, we could have a stride requirement in elements or pixels
>>> instead of bytes.
>>
>> Interesting idea. FWIW, I suspect we'd need to support specifying the
>> requirement in both bytes or pixels, since one or the other alone may
>> not be sufficient to describe the constraints of all hardware.
> 
> From what I've seen, modifiers are always specified together with one
> specific format, so the bytes-per-pixel are always known, so I don't
> think we need both.

The proposal adds two DRI3 extension requests for querying the list of
supported formats and modifiers, respectively. This suggests that the
supported formats and modifiers can be freely combined.
On 27/07/17 07:38 AM, Eric Anholt wrote:
> Michel Dänzer <michel@daenzer.net> writes:
> 
>> [ Unknown signature status ]
>> On 26/07/17 06:20 AM, Eric Anholt wrote:
>>> Daniel Stone <daniels@collabora.com> writes:
>>>
>>>> DRI3 version 1.1 adds support for explicit format modifiers, including
>>>> multi-planar buffers.
>>>
>>> I still want proper 64-bit values, and I don't think the little XSync
>>> mess will be much of a blocker.
>>>
>>>> Signed-off-by: Daniel Stone <daniels@collabora.com>
>>
>> [...]
>>
>>>> +	combined to specify a single logical source for pixel
>>>> +	sampling: 'num_buffers' may be set from 1 (single buffer,
>>>> +	akin to PixmapFromBuffer) to 4. This is the number of file
>>>> +	descriptors which will be sent with this request; one per
>>>> +	buffer.
>>>> +	
>>>> +	The exact configuration of the buffer is specified by 'format',
>>>> +	a DRM FourCC format token as defined in that project's
>>>> +	drm_fourcc.h header, in combination with the modifier.
>>>> +
>>>> +	Modifiers allow explicit specification of non-linear sources,
>>>> +	such as tiled or compressed buffers. 'modifier_hi' (the most
>>>> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
>>>> +	combined to produce a single DRM format modifier token, again
>>>> +	as defined in drm_fourcc.h. The combination of format and
>>>> +	modifier allows unambiguous declaration of the buffer layout
>>>> +	in a manner defined by the DRM tokens.
>>>> +
>>>> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
>>>> +	case the driver may make its own inference as to the exact
>>>> +	layout of the buffer(s).
>>>> +
>>>> +	'width' and 'height' describe the geometry (in pixels) of the
>>>> +	logical pixel-sample source.
>>>> +
>>>> +	'strideN' and 'offsetN' define the number of bytes per logical
>>>> +	scanline, and the distance in bytes from the beginning of the
>>>> +	buffer passed for that plane until the start of the sample
>>>> +	source for that plane, respectively for plane N. If the plane
>>>> +	is not used according to the format and modifier specification,
>>>> +	both values for that plane must be zero.
>>>> +
>>>> +	Precisely how any additional information about the buffer is
>>>> +	shared is outside the scope of this extension.
>>>> +
>>>> +	If the buffer(s) cannot be used with the screen associated with
>>>> +	'pixmap', a Match error is returned.
>>>> +
>>>> +	If the format and modifier combination is not supported by the
>>>> +	screen, a Value error is returned.
>>>
>>> Should we be specifying how the depth of the Pixmap is determined from
>>> the fourcc?  Should we be specifying if X11 rendering works on various
>>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>>> what glamor would need to be able to do with these pixmaps (can I
>>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
>>
>> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
>> depth. CopyArea between pixmaps just copies the bits in the source to
>> the destination verbatim. Note that this is only possible if both
>> pixmaps have the same depth.
> 
> Right.  So is the plan that XRGB888 to BGRX8888 (both depth 24)

I don't see how those can both be depth 24.

> will actually shift around the bits during the copy?

Assuming they were the same depth, since CopyArea just copies the bits
verbatim, it will shift around the components for other uses which apply
the different formats to interpret the bits.
On 27.07.2017 03:14, Michel Dänzer wrote:
> On 26/07/17 09:15 PM, Nicolai Hähnle wrote:
>> On 26.07.2017 08:29, Michel Dänzer wrote:
>>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>>
>>>>> Given that, I'm fairly inclined to punt those until we have the grand
>>>>> glorious allocator, rather than trying to add it to EGL/GBM
>>>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>>>> EGL already had no-modifier format import but no query as to which
>>>>> formats it would accept, and modifiers are a logical extension of
>>>>> format - but adding the other restrictions is a bigger step forward.
>>>>
>>>> Perhaps a third option would be to encode the required pitch_in_bytes
>>>> alignment as part of the modifier?
>>>>
>>>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
>>>> when the display GPU is a Raven Ridge.
>>>>
>>>> More generally, we could say that fourcc_mod_code(NONE, k) means that
>>>> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if
>>>> you prefer, we could have a stride requirement in elements or pixels
>>>> instead of bytes.
>>>
>>> Interesting idea. FWIW, I suspect we'd need to support specifying the
>>> requirement in both bytes or pixels, since one or the other alone may
>>> not be sufficient to describe the constraints of all hardware.
>>
>>  From what I've seen, modifiers are always specified together with one
>> specific format, so the bytes-per-pixel are always known, so I don't
>> think we need both.
> 
> The proposal adds two DRI3 extension requests for querying the list of
> supported formats and modifiers, respectively. This suggests that the
> supported formats and modifiers can be freely combined.

Which are these? I only saw DRI3GetSupportedModifiers, which takes both 
a window and a format argument.

Cheers,
Nicolai
On 27/07/17 04:08 PM, Nicolai Hähnle wrote:
> On 27.07.2017 03:14, Michel Dänzer wrote:
>> On 26/07/17 09:15 PM, Nicolai Hähnle wrote:
>>> On 26.07.2017 08:29, Michel Dänzer wrote:
>>>> On 25/07/17 05:28 PM, Nicolai Hähnle wrote:
>>>>> On 22.07.2017 14:00, Daniel Stone wrote:
>>>>>>
>>>>>> Given that, I'm fairly inclined to punt those until we have the grand
>>>>>> glorious allocator, rather than trying to add it to EGL/GBM
>>>>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>>>>> EGL already had no-modifier format import but no query as to which
>>>>>> formats it would accept, and modifiers are a logical extension of
>>>>>> format - but adding the other restrictions is a bigger step forward.
>>>>>
>>>>> Perhaps a third option would be to encode the required pitch_in_bytes
>>>>> alignment as part of the modifier?
>>>>>
>>>>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B
>>>>> when the display GPU is a Raven Ridge.
>>>>>
>>>>> More generally, we could say that fourcc_mod_code(NONE, k) means that
>>>>> the pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31.
>>>>> Or if
>>>>> you prefer, we could have a stride requirement in elements or pixels
>>>>> instead of bytes.
>>>>
>>>> Interesting idea. FWIW, I suspect we'd need to support specifying the
>>>> requirement in both bytes or pixels, since one or the other alone may
>>>> not be sufficient to describe the constraints of all hardware.
>>>
>>>  From what I've seen, modifiers are always specified together with one
>>> specific format, so the bytes-per-pixel are always known, so I don't
>>> think we need both.
>>
>> The proposal adds two DRI3 extension requests for querying the list of
>> supported formats and modifiers, respectively. This suggests that the
>> supported formats and modifiers can be freely combined.
> 
> Which are these? I only saw DRI3GetSupportedModifiers, which takes both
> a window and a format argument.

You're right, I missed the format argument.
Michel Dänzer <michel@daenzer.net> writes:

> [ Unknown signature status ]
> On 27/07/17 07:38 AM, Eric Anholt wrote:
>> Michel Dänzer <michel@daenzer.net> writes:
>> 
>>> [ Unknown signature status ]
>>> On 26/07/17 06:20 AM, Eric Anholt wrote:
>>>> Daniel Stone <daniels@collabora.com> writes:
>>>>
>>>>> DRI3 version 1.1 adds support for explicit format modifiers, including
>>>>> multi-planar buffers.
>>>>
>>>> I still want proper 64-bit values, and I don't think the little XSync
>>>> mess will be much of a blocker.
>>>>
>>>>> Signed-off-by: Daniel Stone <daniels@collabora.com>
>>>
>>> [...]
>>>
>>>>> +	combined to specify a single logical source for pixel
>>>>> +	sampling: 'num_buffers' may be set from 1 (single buffer,
>>>>> +	akin to PixmapFromBuffer) to 4. This is the number of file
>>>>> +	descriptors which will be sent with this request; one per
>>>>> +	buffer.
>>>>> +	
>>>>> +	The exact configuration of the buffer is specified by 'format',
>>>>> +	a DRM FourCC format token as defined in that project's
>>>>> +	drm_fourcc.h header, in combination with the modifier.
>>>>> +
>>>>> +	Modifiers allow explicit specification of non-linear sources,
>>>>> +	such as tiled or compressed buffers. 'modifier_hi' (the most
>>>>> +	significant 32 bits of a 64-bit value) and 'modifier_lo' are
>>>>> +	combined to produce a single DRM format modifier token, again
>>>>> +	as defined in drm_fourcc.h. The combination of format and
>>>>> +	modifier allows unambiguous declaration of the buffer layout
>>>>> +	in a manner defined by the DRM tokens.
>>>>> +
>>>>> +	DRM_FORMAT_MOD_INVALID may be passed for 'modifier', in which
>>>>> +	case the driver may make its own inference as to the exact
>>>>> +	layout of the buffer(s).
>>>>> +
>>>>> +	'width' and 'height' describe the geometry (in pixels) of the
>>>>> +	logical pixel-sample source.
>>>>> +
>>>>> +	'strideN' and 'offsetN' define the number of bytes per logical
>>>>> +	scanline, and the distance in bytes from the beginning of the
>>>>> +	buffer passed for that plane until the start of the sample
>>>>> +	source for that plane, respectively for plane N. If the plane
>>>>> +	is not used according to the format and modifier specification,
>>>>> +	both values for that plane must be zero.
>>>>> +
>>>>> +	Precisely how any additional information about the buffer is
>>>>> +	shared is outside the scope of this extension.
>>>>> +
>>>>> +	If the buffer(s) cannot be used with the screen associated with
>>>>> +	'pixmap', a Match error is returned.
>>>>> +
>>>>> +	If the format and modifier combination is not supported by the
>>>>> +	screen, a Value error is returned.
>>>>
>>>> Should we be specifying how the depth of the Pixmap is determined from
>>>> the fourcc?  Should we be specifying if X11 rendering works on various
>>>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>>>> what glamor would need to be able to do with these pixmaps (can I
>>>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
>>>
>>> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
>>> depth. CopyArea between pixmaps just copies the bits in the source to
>>> the destination verbatim. Note that this is only possible if both
>>> pixmaps have the same depth.
>> 
>> Right.  So is the plan that XRGB888 to BGRX8888 (both depth 24)
>
> I don't see how those can both be depth 24.

That's what both patch 5 of this series and pixman say the depth is for
bgrx8888.  I think things have only worked because nobody uses bgrx8888
with Render and also tries to do core operations with those contents,
but I think we should figure out what we actually want the behavior to
be here.

I don't think we want to define that CopyArea from XRGB8888 to BGRX8888
shifts the 24 bits of depth up by 8 within the 32bpp pixels.  One of the
options might be to say something along the lines of "core rendering
between pixmaps of different drm_fourccs is undefined".  I think
figuring out what the exact requirement would be takes a bit more
thought, though.
Eric Anholt <eric@anholt.net> writes:

> That's what both patch 5 of this series and pixman say the depth is for
> bgrx8888.  I think things have only worked because nobody uses bgrx8888
> with Render and also tries to do core operations with those contents,
> but I think we should figure out what we actually want the behavior to
> be here.

The core protocol uses the low bits in a pixel; is there actually
translation code which shifts things around as necessary to make this
work?

> I don't think we want to define that CopyArea from XRGB8888 to BGRX8888
> shifts the 24 bits of depth up by 8 within the 32bpp pixels.

I don't think you have a choice if you actually report BGRX8888 as depth
24 -- you're going to have depth 24 pixmaps and they'll have to work
with both.
Keith Packard <keithp@keithp.com> writes:

> [ Unknown signature status ]
> Eric Anholt <eric@anholt.net> writes:
>
>> That's what both patch 5 of this series and pixman say the depth is for
>> bgrx8888.  I think things have only worked because nobody uses bgrx8888
>> with Render and also tries to do core operations with those contents,
>> but I think we should figure out what we actually want the behavior to
>> be here.
>
> The core protocol uses the low bits in a pixel; is there actually
> translation code which shifts things around as necessary to make this
> work?

Ah, I was misled by looking at Pixman.  Render handles the depth of
formats in an interesting way.  From a quick hack of rendercheck:

Found server-supported format: a8 8 depth
Found server-supported format: a4 4 depth
Found server-supported format: a8r8g8b8 32 depth
Found server-supported format: x8r8g8b8 32 depth
Found server-supported format: b8g8r8a8 32 depth
Found server-supported format: b8g8r8x8 32 depth
Found server-supported format: r8g8b8 24 depth
Found server-supported format: b8g8r8 24 depth
Found server-supported format: x3r4g4b4 15 depth
Found server-supported format: x3b4g4r4 15 depth
Found server-supported format: r5g5b5 15 depth
Found server-supported format: b5g5r5 15 depth
Found server-supported format: x4r4g4b4 16 depth
Found server-supported format: x4b4g4r4 16 depth
Found server-supported format: x1r5g5b5 16 depth
Found server-supported format: x1b5g5r5 16 depth
Found server-supported format: r5g6b5 16 depth
Found server-supported format: b5g6r5 16 depth
Found server-supported format: a4r4g4b4 16 depth
Found server-supported format: a4b4g4r4 16 depth
Found server-supported format: x8b8g8r8 32 depth
Found server-supported format: x2r10g10b10 32 depth
Found server-supported format: x2b10g10r10 32 depth

Note here that b8g8r8x8 couldn't be used with a depth 24 pixmap -- it's
only advertised under 32 depth.  The r8g8b8 and x8r8g8b8 I believe
internally are the same PictFormat, it's just advertised under both 24
and 32bpp.  (picture.c's PictureCreateDefaultFormats() for reference)

>> I don't think we want to define that CopyArea from XRGB8888 to BGRX8888
>> shifts the 24 bits of depth up by 8 within the 32bpp pixels.
>
> I don't think you have a choice if you actually report BGRX8888 as depth
> 24 -- you're going to have depth 24 pixmaps and they'll have to work
> with both.

I think one option would be to have this extension create pixmaps with a
depth equal to the highest populated bit of the fourcc plus one.  Sure,
it's weird that rgbx8888 and xrgb888 have a different depth, but "depth"
is a pretty awful concept at this point.
Eric Anholt <eric@anholt.net> writes:

> Note here that b8g8r8x8 couldn't be used with a depth 24 pixmap -- it's
> only advertised under 32 depth.  The r8g8b8 and x8r8g8b8 I believe
> internally are the same PictFormat, it's just advertised under both 24
> and 32bpp.  (picture.c's PictureCreateDefaultFormats() for reference)

Check xdpyinfo -ext RENDER:

  pict format:
	format id:    0x2b
	type:         Direct
	depth:        24
	alpha:         0 mask 0x0
	red:           0 mask 0xff
	green:         8 mask 0xff
	blue:         16 mask 0xff

...

  pict format:
	format id:    0x3a
	type:         Direct
	depth:        32
	alpha:         0 mask 0x0
	red:           0 mask 0xff
	green:         8 mask 0xff
	blue:         16 mask 0xff

And, of course all of these are 32bpp:

    depth 24, bits_per_pixel 32, scanline_pad 32
    depth 32, bits_per_pixel 32, scanline_pad 32

I'm not sure in what way these are the same?

> I think one option would be to have this extension create pixmaps with a
> depth equal to the highest populated bit of the fourcc plus one.  Sure,
> it's weird that rgbx8888 and xrgb888 have a different depth, but "depth"
> is a pretty awful concept at this point.

It's just supposed to express which bits in the pixel contribute to
generating the resulting color; bits outside of depth 'don't matter',
and may not even be retained. Of course, for fourcc values which spread
'pixel' data across multiple storage units.

Sorry for not reading the whole proposal up front; I've been out
crabbing in the San Juan's for a week and trying to catch up on email in
the last few days...

When I was doing Present, what I figured was that we'd define new kinds
of read-only picture which had image storage data associated with it
that could be in a non-pixel format -- various fourcc formats using
multiple buffers, jpeg, png or whatever. You could use those with Render
or Present to get data into RGB format or onto the screen. Trying to
mash them into 'pixmaps' makes little sense.

In this case, I'd imagine we'd add fourcc pictures, and a new
DRI3PictureFromBuffers request. Adding a PresentPicture request would be
a nice bonus feature to make sure the copy was synchronized with vblank.
Hi,
Sorry, I've been sick the past couple of days - exactly when this
thread exploded ...

On 25 July 2017 at 22:20, Eric Anholt <eric@anholt.net> wrote:
> Daniel Stone <daniels@collabora.com> writes:
>> DRI3 version 1.1 adds support for explicit format modifiers, including
>> multi-planar buffers.
>
> I still want proper 64-bit values, and I don't think the little XSync
> mess will be much of a blocker.

Cool, I'll happily review the CARD64 bits and flick the switch on the
protocol when those land.

>> +┌───
>> +    DRI3GetSupportedModifiers
>> +     window: WINDOW
>> +     format: CARD32
>> +      ▶
>> +     num_modifiers: CARD32
>> +     modifiers: ListOfCARD32
>> +└───
>> +     Errors: Window, Match
>> +
>> +     For the Screen associated with 'window', return a list of
>> +     supported DRM FourCC modifiers, as defined in drm_fourcc.h,
>> +     supported as formats for DRI3 pixmap/buffer interchange.
>> +     Each modifier is returned as returned as a CARD32
>> +     containing the most significant 32 bits, followed by a
>> +     CARD32 containing the least significant 32 bits. The hi/lo
>> +     pattern repeats 'num_modifiers' times, thus there are
>> +     '2 * num_modifiers' CARD32 elements returned.
>
> Should any meaning be assumed from the ordering of modifiers?

Nope, arbitrary order. In practice, the client does its own sort
across the list anyway, selecting for local optimality.

>> +     Precisely how any additional information about the buffer is
>> +     shared is outside the scope of this extension.
>
> Should we be specifying how the depth of the Pixmap is determined from
> the fourcc?  Should we be specifying if X11 rendering works on various
> fourccs, and between pixmaps of different fourccs?  It's not clear to me
> what glamor would need to be able to do with these pixmaps (can I
> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)

I'll come back to that in the subthread.

>> +┌───
>> +    DRI3FenceFromDMAFenceFD
>> +     drawable: DRAWABLE
>> +     fence: FENCE
>> +     fd: FD
>> +└───
>> +     Errors: IDChoice, Drawable
>> +
>> +     Creates a Sync extension Fence that provides the regular Sync
>> +     extension semantics. The Fence will begin untriggered, and
>> +     become triggered when the underlying dma-fence FD signals.
>> +     The resulting Sync Fence is a one-shot, and may not be
>> +     manually triggered, reset, or reused until it is destroyed.
>> +     Details about the mechanism used with this file descriptor are
>> +     outside the scope of the DRI3 extension.
>
> I was surprised to find this lumped in with a commit about
> multi-planar/buffer support -- is it actually related, and is it used?

Related, no. Used, not right now, but there'll be patches out to
implement explicit fencing for Vulkan clients next week. It's only
lumped in to save doing two version bumps at the exact same time.

> Must an implementation supporting 1.1 support this?  dma-fences seem
> like a pretty recent kernel feature.

You're right, a capability query would be better here.

>> +┌───
>> +    DRI3DMAFenceFDFromFence
>> +     drawable: DRAWABLE
>> +     fence: FENCE
>> +      ▶
>> +     fd: FD
>> +└───
>> +     Errors: IDChoice, Drawable, Match
>> +
>> +     Given a Sync extension Fence originally created by the
>> +     DRI3FenceFromDMAFenceFD request, return the underlying
>> +     dma-fence FD to the client. Details about the mechanism used
>> +     with this file descriptor are outside the scope of the DRI3
>> +     extension. 'drawable' must be associated with a direct
>> +     rendering device that 'fence' can work with, otherwise a Match
>> +     error results. NB: it is quite likely this will be forever
>> +     unused, and may be removed in later revisions.
>> +
>
> Let's not introduce protocol if we can't come up with a use for it.

Happily, it is actually used, after a bit of back-and-forth on the
implementation. lfrb is at SIGGRAPH this week, but he has some
branches which work but are in need of cleanup here:
https://git.collabora.com/cgit/user/lfrb/xserver.git/log/?h=x11-fences
https://git.collabora.com/cgit/user/lfrb/mesa.git/log/?h=wip/2017-07/vulkan-fences

The idea is that when a VkSemaphore is passed into vkQueuePresentKHR,
the implementation extracts a dma-fence from the semaphore, creates an
X11 fence directly wrapping that, and passes that in as the wait_fence
to PresentPixmap. The server then inserts a hardware-side wait (either
EGL_ANDROID_native_fence_fd + eglWaitSyncKHR for Glamor, or
IN_FENCE_FD when directly flipping with KMS). On the converse side,
out-fences are implemented by creating an 'empty' DMAFence object,
passing that as the idle_fence to PresentPixmap, calling
DMAFenceFDFromFence when the PresentIdlePixmap event comes through,
then wrapping that into a VkSemaphore/VkFence returned via
vkAcquireNextImageKHR.

We'll do some cleanup across the branch - and this protocol text -
before sending it out though.

Cheers,
Daniel
Hi Michel,

On 26 July 2017 at 07:21, Michel Dänzer <michel@daenzer.net> wrote:
> On 26/07/17 10:48 AM, Michel Dänzer wrote:
>> On 26/07/17 06:20 AM, Eric Anholt wrote:
>>> Daniel Stone <daniels@collabora.com> writes:
>>>> +   The exact configuration of the buffer is specified by 'format',
>>>> +   a DRM FourCC format token as defined in that project's
>>>> +   drm_fourcc.h header, in combination with the modifier.
>>>
>>> Should we be specifying how the depth of the Pixmap is determined from
>>> the fourcc?  Should we be specifying if X11 rendering works on various
>>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>>> what glamor would need to be able to do with these pixmaps (can I
>>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
>>
>> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
>> depth. CopyArea between pixmaps just copies the bits in the source to
>> the destination verbatim. Note that this is only possible if both
>> pixmaps have the same depth.
>
> This raises a question about multi-plane (e.g. YUV) formats, where
> different planes may have different resolutions. Is this functionality
> intended to be used for such formats? If so, how are X11 drawing
> operations to/from pixmaps created from such formats envisioned to work?

Honestly, I wasn't planning on attacking YUV right now, especially
render-to-YUV. Not least since that brings the complexity of wide vs.
narrow range and selection of primaries into play. I wouldn't expect
any server to advertise that as a supported format; one which would
had better have that figured out ...

Should we perhaps insert text specifically enforcing only RGB formats?

Cheers,
Daniel
Hi Nicolai,
Trying to tackle the stride subthread in one go ...

On 25 July 2017 at 09:28, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
> On 22.07.2017 14:00, Daniel Stone wrote:
>> On 21 July 2017 at 18:32, Michel Dänzer <michel@daenzer.net> wrote:
>>> We just ran into an issue which might mean that there's still something
>>> missing in this v2 proposal:
>>>
>>> The context is DRI3 PRIME render offloading of glxgears (not useful in
>>> practice, but bear with me). The display GPU is Raven Ridge, which
>>> requires
>>> that the stride even of linear textures is a multiple of 256 bytes. The
>>> renderer GPU is Polaris12, which still supports smaller alignment of the
>>> stride. With the glxgears window of width 300, the renderer GPU driver
>>> chooses a stride of 304 (* 4 / 256 = 4.75), whereas the display GPU would
>>> require 320 (* 4 / 256 = 5). This cannot work.
>>
>>
>> The obvious answer is just to increase padding on external/winsys
>> surfaces? Increasing it for all allocations would probably be a
>> non-starter, but winsys surfaces are rare enough that you could
>> probably afford to take the hit, I guess.
>>
>>> I see two basic approaches to solve this:
>>> [...]
>>> Maybe there are other possible approaches I'm missing? Other comments?
>>
>> I don't have any great solution off the top of my head, but I'd be
>> inclined to bundle stride in with placement. TTBOMK (from having
>> looked at radv), buffers shared cross-GPU also need to be allocated
>> from a separate externally-visible memory heap. And at the moment,
>> lacking placement information at allocation time (at least for EGL
>> allocations, via DRIImage), that happens via transparent migration at
>> import time I think. Placement restrictions would probably also
>> involve communicating base address alignment requirements.
>
> Stride isn't really in the same category as placement and base address
> alignment, though.
>
> Placement and base address alignment requirements can apply to all possible
> texture layouts, while the concept of stride is specific to linear layouts.

No, I don't think it is. Tiled layouts still have a stride: if you
look at i915 X/Y/Yf/Y_CCS/Yf_CCS (the latter two containing an
auxiliary compression/fast-clear buffer), iMX/etnaviv
tiled/supertiled, or VC4 T-tiled modifiers and how they're handled
both for DRIImage and KMS interchange, they all specify a stride which
is conceptually the same as linear, if you imagine linear to be 1x1
tiled.

Most tiling users accept any integer units of tiles (buffer width
aligned to tile width), but not always. The NV12MT format used by
Samsung media decoders (also shipped in some Qualcomm SoCs) is
particularly special, IIRC requiring height to be aligned to a
multiple of two tiles.

>> Given that, I'm fairly inclined to punt those until we have the grand
>> glorious allocator, rather than trying to add it to EGL/GBM
>> separately. The modifiers stuff was a fairly obvious augmentation -
>> EGL already had no-modifier format import but no query as to which
>> formats it would accept, and modifiers are a logical extension of
>> format - but adding the other restrictions is a bigger step forward.
>
> Perhaps a third option would be to encode the required pitch_in_bytes
> alignment as part of the modifier?
>
> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B when
> the display GPU is a Raven Ridge.
>
> More generally, we could say that fourcc_mod_code(NONE, k) means that the
> pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if you
> prefer, we could have a stride requirement in elements or pixels instead of
> bytes.

I've been thinking this over for the past couple of days, and we could
make it work in clients. But we already have DRM_FORMAT_MOD_LINEAR
with a well-defined meaning, implemented in quite a few places. I
think special-casing linear to encode stride alignment is something
we'd regret in the long run, especially given that some hardware
_does_ have more strict alignment requirements for tiled modes than
just an integer number of tiles allocated.

AFAICT, both AMD and NVIDIA are both going to use a fair bit of the
tiling enum space to encode tile size as well as layout. If allocation
alignment requirements (in both dimensions) needed to be added to
that, it's entirely likely that there wouldn't be enough space and
you'd need to put it somewhere else than the modifier, in which case
we've not even really solved the problem.

It definitely seems attractive to kill two birds with one stone, but
I'd really much rather not conflate format description/advertisement,
and allocation restriction, into one enum. I'm still on the side of
saying that this is a problem modifiers do not solve, deferring to the
allocator we need anyway in order to determine things like placement
and global optimality (e.g. rotated scanout placing further
restrictions on allocation).

Cheers,
Daniel
Hi,

On 28 July 2017 at 01:11, Keith Packard <keithp@keithp.com> wrote:
> Eric Anholt <eric@anholt.net> writes:
>> I think one option would be to have this extension create pixmaps with a
>> depth equal to the highest populated bit of the fourcc plus one.  Sure,
>> it's weird that rgbx8888 and xrgb888 have a different depth, but "depth"
>> is a pretty awful concept at this point.
>
> It's just supposed to express which bits in the pixel contribute to
> generating the resulting color; bits outside of depth 'don't matter',
> and may not even be retained. Of course, for fourcc values which spread
> 'pixel' data across multiple storage units.
>
> Sorry for not reading the whole proposal up front; I've been out
> crabbing in the San Juan's for a week and trying to catch up on email in
> the last few days...

Really can't fault your choices there.

> When I was doing Present, what I figured was that we'd define new kinds
> of read-only picture which had image storage data associated with it
> that could be in a non-pixel format -- various fourcc formats using
> multiple buffers, jpeg, png or whatever. You could use those with Render
> or Present to get data into RGB format or onto the screen. Trying to
> mash them into 'pixmaps' makes little sense.
>
> In this case, I'd imagine we'd add fourcc pictures, and a new
> DRI3PictureFromBuffers request. Adding a PresentPicture request would be
> a nice bonus feature to make sure the copy was synchronized with vblank.

Hm. I think I prefer Eric's suggestion, of just declaring this to be
undefined behaviour.

You're right that it makes little to no sense to mix the two, but I'm
not sure what practical gain we get from expressing things as
Pictures. Ultimately, the server still deals in Window Pixmaps and
Screen Pixmaps for backing storage, and the compositor interface is
NameWindowPixmap. Your suggestion of another type seems nicer, but it
doesn't look like we can avoid Pixmaps as the lowest common
denominator anyway.

By implementation if not spec, DRI3 v1.0 enforces that depth 16 is
RGB565, depth 24 is XRGB8888, and depth 32 is ARGB8888. Or at least in
Mesa: the server only supports the latter two with glamor_egl. It
seems like this has served well enough for long enough, so equally we
could just eliminate the FourCC from the protocol, stick with the
fixed depth <-> base format mapping, and encode that in the protocol
text as well.

That would much more clearly match the intention of the spec
additions, which was to allow non-linearly-addressed (Intel X/Y*,
Vivante tiled/supertiled, VC4 T-tiled, the infinite AMD/NV tiling
modes) buffers, as well as losslessly-compressed buffers (Intel CCS,
whatever Tegra calls its mode which similarly has an auxiliary buffer
to interpret the tiled primary colour buffer, ARM AFBC which is just
an inline block format). Allowing YUV buffers, attaching colourspace
information (e.g. converting between primaries/EOTF), replacing the
use of Pixmaps with another buffer type (Readable as a counterpart to
Drawable ... ?), would all be objectively good things to have, but
equally I don't think trying to fix them in a protocol which is really
just a better version of PutImage for Pixmaps is the best thing.

tl;dr: we could replace FourCC with depth as the format determinant to
better match DRI3 v1.0, or just declare that doing anything with those
Pixmaps other than displaying them to a Window with a compatible
Visual results in undefined behaviour

Cheers,
Daniel
On 28/07/17 05:03 PM, Daniel Stone wrote:
> On 28 July 2017 at 01:11, Keith Packard <keithp@keithp.com> wrote:
>> Eric Anholt <eric@anholt.net> writes:
>>> I think one option would be to have this extension create pixmaps with a
>>> depth equal to the highest populated bit of the fourcc plus one.  Sure,
>>> it's weird that rgbx8888 and xrgb888 have a different depth, but "depth"
>>> is a pretty awful concept at this point.
>>
>> It's just supposed to express which bits in the pixel contribute to
>> generating the resulting color; bits outside of depth 'don't matter',
>> and may not even be retained. Of course, for fourcc values which spread
>> 'pixel' data across multiple storage units.
>>
>> Sorry for not reading the whole proposal up front; I've been out
>> crabbing in the San Juan's for a week and trying to catch up on email in
>> the last few days...
> 
> Really can't fault your choices there.
> 
>> When I was doing Present, what I figured was that we'd define new kinds
>> of read-only picture which had image storage data associated with it
>> that could be in a non-pixel format -- various fourcc formats using
>> multiple buffers, jpeg, png or whatever. You could use those with Render
>> or Present to get data into RGB format or onto the screen. Trying to
>> mash them into 'pixmaps' makes little sense.
>>
>> In this case, I'd imagine we'd add fourcc pictures, and a new
>> DRI3PictureFromBuffers request. Adding a PresentPicture request would be
>> a nice bonus feature to make sure the copy was synchronized with vblank.
> 
> Hm. I think I prefer Eric's suggestion, of just declaring this to be
> undefined behaviour.

Declaring where? Once a pixmap is created, it only has a depth, no
format, so there's nothing to base on that e.g. CopyArea between two
pixmaps of the same depth is undefined.


> You're right that it makes little to no sense to mix the two, but I'm
> not sure what practical gain we get from expressing things as
> Pictures. Ultimately, the server still deals in Window Pixmaps and
> Screen Pixmaps for backing storage, and the compositor interface is
> NameWindowPixmap. Your suggestion of another type seems nicer, but it
> doesn't look like we can avoid Pixmaps as the lowest common
> denominator anyway.
> 
> By implementation if not spec, DRI3 v1.0 enforces that depth 16 is
> RGB565, depth 24 is XRGB8888, and depth 32 is ARGB8888.

No, it doesn't. How the bits stored in a pixmap are interpreted is
outside of the scope of DRI3 1.0.


> Or at least in Mesa: the server only supports the latter two with
> glamor_egl. It seems like this has served well enough for long enough,
> so equally we could just eliminate the FourCC from the protocol, stick
> with the fixed depth <-> base format mapping, and encode that in the
> protocol text as well.
> 
> That would much more clearly match the intention of the spec
> additions, which was to allow non-linearly-addressed (Intel X/Y*,
> Vivante tiled/supertiled, VC4 T-tiled, the infinite AMD/NV tiling
> modes) buffers, as well as losslessly-compressed buffers (Intel CCS,
> whatever Tegra calls its mode which similarly has an auxiliary buffer
> to interpret the tiled primary colour buffer, ARM AFBC which is just
> an inline block format). Allowing YUV buffers, attaching colourspace
> information (e.g. converting between primaries/EOTF), replacing the
> use of Pixmaps with another buffer type (Readable as a counterpart to
> Drawable ... ?), would all be objectively good things to have, but
> equally I don't think trying to fix them in a protocol which is really
> just a better version of PutImage for Pixmaps is the best thing.
> 
> tl;dr: we could replace FourCC with depth as the format determinant to
> better match DRI3 v1.0, or just declare that doing anything with those
> Pixmaps other than displaying them to a Window with a compatible
> Visual results in undefined behaviour

I'm getting less and less convinced that any reference at all to formats
in the context of pixmaps in the DRI3 specification is a good idea.
On 28/07/17 04:14 PM, Daniel Stone wrote:
> On 26 July 2017 at 07:21, Michel Dänzer <michel@daenzer.net> wrote:
>> On 26/07/17 10:48 AM, Michel Dänzer wrote:
>>> On 26/07/17 06:20 AM, Eric Anholt wrote:
>>>> Daniel Stone <daniels@collabora.com> writes:
>>>>> +   The exact configuration of the buffer is specified by 'format',
>>>>> +   a DRM FourCC format token as defined in that project's
>>>>> +   drm_fourcc.h header, in combination with the modifier.
>>>>
>>>> Should we be specifying how the depth of the Pixmap is determined from
>>>> the fourcc?  Should we be specifying if X11 rendering works on various
>>>> fourccs, and between pixmaps of different fourccs?  It's not clear to me
>>>> what glamor would need to be able to do with these pixmaps (can I
>>>> CopyArea between XRGB888 and BGRX8888?  What does that even mean?)
>>>
>>> X11 pixmaps provide storage for n bits per pixel, where n is the pixmap
>>> depth. CopyArea between pixmaps just copies the bits in the source to
>>> the destination verbatim. Note that this is only possible if both
>>> pixmaps have the same depth.
>>
>> This raises a question about multi-plane (e.g. YUV) formats, where
>> different planes may have different resolutions. Is this functionality
>> intended to be used for such formats? If so, how are X11 drawing
>> operations to/from pixmaps created from such formats envisioned to work?
> 
> Honestly, I wasn't planning on attacking YUV right now, especially
> render-to-YUV. Not least since that brings the complexity of wide vs.
> narrow range and selection of primaries into play. I wouldn't expect
> any server to advertise that as a supported format; one which would
> had better have that figured out ...
> 
> Should we perhaps insert text specifically enforcing only RGB formats?

Probably, assuming there should be any reference to formats at all.


BTW, what's the purpose of supporting four buffers per pixmap, if not
for multi-plane formats?
On 28.07.2017 09:44, Daniel Stone wrote:
> Hi Nicolai,
> Trying to tackle the stride subthread in one go ...
> 
> On 25 July 2017 at 09:28, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
>> On 22.07.2017 14:00, Daniel Stone wrote:
>>> On 21 July 2017 at 18:32, Michel Dänzer <michel@daenzer.net> wrote:
>>>> We just ran into an issue which might mean that there's still something
>>>> missing in this v2 proposal:
>>>>
>>>> The context is DRI3 PRIME render offloading of glxgears (not useful in
>>>> practice, but bear with me). The display GPU is Raven Ridge, which
>>>> requires
>>>> that the stride even of linear textures is a multiple of 256 bytes. The
>>>> renderer GPU is Polaris12, which still supports smaller alignment of the
>>>> stride. With the glxgears window of width 300, the renderer GPU driver
>>>> chooses a stride of 304 (* 4 / 256 = 4.75), whereas the display GPU would
>>>> require 320 (* 4 / 256 = 5). This cannot work.
>>>
>>>
>>> The obvious answer is just to increase padding on external/winsys
>>> surfaces? Increasing it for all allocations would probably be a
>>> non-starter, but winsys surfaces are rare enough that you could
>>> probably afford to take the hit, I guess.
>>>
>>>> I see two basic approaches to solve this:
>>>> [...]
>>>> Maybe there are other possible approaches I'm missing? Other comments?
>>>
>>> I don't have any great solution off the top of my head, but I'd be
>>> inclined to bundle stride in with placement. TTBOMK (from having
>>> looked at radv), buffers shared cross-GPU also need to be allocated
>>> from a separate externally-visible memory heap. And at the moment,
>>> lacking placement information at allocation time (at least for EGL
>>> allocations, via DRIImage), that happens via transparent migration at
>>> import time I think. Placement restrictions would probably also
>>> involve communicating base address alignment requirements.
>>
>> Stride isn't really in the same category as placement and base address
>> alignment, though.
>>
>> Placement and base address alignment requirements can apply to all possible
>> texture layouts, while the concept of stride is specific to linear layouts.
> 
> No, I don't think it is. Tiled layouts still have a stride: if you
> look at i915 X/Y/Yf/Y_CCS/Yf_CCS (the latter two containing an
> auxiliary compression/fast-clear buffer), iMX/etnaviv
> tiled/supertiled, or VC4 T-tiled modifiers and how they're handled
> both for DRIImage and KMS interchange, they all specify a stride which
> is conceptually the same as linear, if you imagine linear to be 1x1
> tiled.
> 
> Most tiling users accept any integer units of tiles (buffer width
> aligned to tile width), but not always. The NV12MT format used by
> Samsung media decoders (also shipped in some Qualcomm SoCs) is
> particularly special, IIRC requiring height to be aligned to a
> multiple of two tiles.

Fair enough, but I think you need to distinguish between the chosen 
stride and stride *requirements*. I do think it makes sense to consider 
the stride requirement as part of the format/layout description, but 
more below.


>>> Given that, I'm fairly inclined to punt those until we have the grand
>>> glorious allocator, rather than trying to add it to EGL/GBM
>>> separately. The modifiers stuff was a fairly obvious augmentation -
>>> EGL already had no-modifier format import but no query as to which
>>> formats it would accept, and modifiers are a logical extension of
>>> format - but adding the other restrictions is a bigger step forward.
>>
>> Perhaps a third option would be to encode the required pitch_in_bytes
>> alignment as part of the modifier?
>>
>> So DRI3GetSupportedModifiers would return DRM_FORMAT_MOD_LINEAR_256B when
>> the display GPU is a Raven Ridge.
>>
>> More generally, we could say that fourcc_mod_code(NONE, k) means that the
>> pitch_in_bytes has to be a multiple of 2**k for e.g. k <= 31. Or if you
>> prefer, we could have a stride requirement in elements or pixels instead of
>> bytes.
> 
> I've been thinking this over for the past couple of days, and we could
> make it work in clients. But we already have DRM_FORMAT_MOD_LINEAR
> with a well-defined meaning, implemented in quite a few places. I
> think special-casing linear to encode stride alignment is something
> we'd regret in the long run, especially given that some hardware
> _does_ have more strict alignment requirements for tiled modes than
> just an integer number of tiles allocated.
> 
> AFAICT, both AMD and NVIDIA are both going to use a fair bit of the
> tiling enum space to encode tile size as well as layout. If allocation
> alignment requirements (in both dimensions) needed to be added to
> that, it's entirely likely that there wouldn't be enough space and
> you'd need to put it somewhere else than the modifier, in which case
> we've not even really solved the problem.

At least for AMD, the alignment requirements are de facto part of the 
tiling description, so I don't think it makes a difference.


> It definitely seems attractive to kill two birds with one stone, but
> I'd really much rather not conflate format description/advertisement,
> and allocation restriction, into one enum. I'm still on the side of
> saying that this is a problem modifiers do not solve, deferring to the
> allocator we need anyway in order to determine things like placement
> and global optimality (e.g. rotated scanout placing further
> restrictions on allocation).

Okay, the original issue here is that the allocator *cannot* determine 
the alignment requirement in the use case that prompted this sub-thread.

The use case is PRIME off-loading, where the rendering GPU supports 
linear layouts with a 64 byte stride, while the display GPU requires a 
256 byte stride.

The allocator *cannot* solve this issue, because the allocation happens 
on the rendering GPU. We need to communicate somehow what the display 
GPU's stride requirements are.

How do you propose to do that?

Cheers,
Nicolai
Hi,

On 28 July 2017 at 09:54, Michel Dänzer <michel@daenzer.net> wrote:
> On 28/07/17 05:03 PM, Daniel Stone wrote:
>> By implementation if not spec, DRI3 v1.0 enforces that depth 16 is
>> RGB565, depth 24 is XRGB8888, and depth 32 is ARGB8888.
>
> No, it doesn't. How the bits stored in a pixmap are interpreted is
> outside of the scope of DRI3 1.0.

I mean that, when using glamor_egl and Mesa, PixmapFromBuffer
translates XRGB8888 to depth 24, and BufferFromPixmap translates depth
24 to XRGB8888. So, de facto, these are the established translations
between depth and format required to use DRI3.

>> tl;dr: we could replace FourCC with depth as the format determinant to
>> better match DRI3 v1.0, or just declare that doing anything with those
>> Pixmaps other than displaying them to a Window with a compatible
>> Visual results in undefined behaviour
>
> I'm getting less and less convinced that any reference at all to formats
> in the context of pixmaps in the DRI3 specification is a good idea.

I'd be fine with that, but at least documenting the implemented
behaviour would probably be reasonable, to save other implementations
from having to reverse-engineer the actual semantics.

Cheers,
Daniel
Hi,

On 28 July 2017 at 10:24, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
> On 28.07.2017 09:44, Daniel Stone wrote:
>> No, I don't think it is. Tiled layouts still have a stride: if you
>> look at i915 X/Y/Yf/Y_CCS/Yf_CCS (the latter two containing an
>> auxiliary compression/fast-clear buffer), iMX/etnaviv
>> tiled/supertiled, or VC4 T-tiled modifiers and how they're handled
>> both for DRIImage and KMS interchange, they all specify a stride which
>> is conceptually the same as linear, if you imagine linear to be 1x1
>> tiled.
>>
>> Most tiling users accept any integer units of tiles (buffer width
>> aligned to tile width), but not always. The NV12MT format used by
>> Samsung media decoders (also shipped in some Qualcomm SoCs) is
>> particularly special, IIRC requiring height to be aligned to a
>> multiple of two tiles.
>
> Fair enough, but I think you need to distinguish between the chosen stride
> and stride *requirements*. I do think it makes sense to consider the stride
> requirement as part of the format/layout description, but more below.

Right. Stride is a property of one buffer, stride requirements are a
property of the users of that buffer (GPU, display control, media
encode, etc). The requirements also depend on use, e.g. trying to do
rotation through your scanout engine can change those requirements.

>> It definitely seems attractive to kill two birds with one stone, but
>> I'd really much rather not conflate format description/advertisement,
>> and allocation restriction, into one enum. I'm still on the side of
>> saying that this is a problem modifiers do not solve, deferring to the
>> allocator we need anyway in order to determine things like placement
>> and global optimality (e.g. rotated scanout placing further
>> restrictions on allocation).
>
> Okay, the original issue here is that the allocator *cannot* determine the
> alignment requirement in the use case that prompted this sub-thread.
>
> The use case is PRIME off-loading, where the rendering GPU supports linear
> layouts with a 64 byte stride, while the display GPU requires a 256 byte
> stride.
>
> The allocator *cannot* solve this issue, because the allocation happens on
> the rendering GPU. We need to communicate somehow what the display GPU's
> stride requirements are.
>
> How do you propose to do that?

The allocator[0] in itself can't magically reach across processes to
determine desired usage and resolve dependencies. But the entire
design behind it was to be able to solve cross-device usage: between
GPU and scanout, between both of those and media encode/decode, etc.
Obviously it can't do that without help, so winsys will need to gain
protocol in order to express those in terms the allocator will
understand.

The idea was to split information into positive capabilities and
negative constraints. Modifier queries fall into the same boat as
format queries: you're expressing an additional capability ('I can
speak tiled'). Stride alignment, for me, falls into a negative
constraint ('linear allocations must have stride aligned to 256
bytes'). Similarly, placement constraints (VRAM possibly only
accessible to SLI-type paired GPU vs. GTT vs. pure system RAM, etc)
are in the same boat AFAICT. So this helps solve one side of the
equation, but not the other.

Cheers,
Daniel

[0]: Read as, 'the design we discussed for the allocator at XDC'.
Hi Daniel,

On 28.07.2017 12:46, Daniel Stone wrote:
> On 28 July 2017 at 10:24, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
>> On 28.07.2017 09:44, Daniel Stone wrote:
>>> No, I don't think it is. Tiled layouts still have a stride: if you
>>> look at i915 X/Y/Yf/Y_CCS/Yf_CCS (the latter two containing an
>>> auxiliary compression/fast-clear buffer), iMX/etnaviv
>>> tiled/supertiled, or VC4 T-tiled modifiers and how they're handled
>>> both for DRIImage and KMS interchange, they all specify a stride which
>>> is conceptually the same as linear, if you imagine linear to be 1x1
>>> tiled.
>>>
>>> Most tiling users accept any integer units of tiles (buffer width
>>> aligned to tile width), but not always. The NV12MT format used by
>>> Samsung media decoders (also shipped in some Qualcomm SoCs) is
>>> particularly special, IIRC requiring height to be aligned to a
>>> multiple of two tiles.
>>
>> Fair enough, but I think you need to distinguish between the chosen stride
>> and stride *requirements*. I do think it makes sense to consider the stride
>> requirement as part of the format/layout description, but more below.
> 
> Right. Stride is a property of one buffer, stride requirements are a
> property of the users of that buffer (GPU, display control, media
> encode, etc). The requirements also depend on use, e.g. trying to do
> rotation through your scanout engine can change those requirements.

Right.


>>> It definitely seems attractive to kill two birds with one stone, but
>>> I'd really much rather not conflate format description/advertisement,
>>> and allocation restriction, into one enum. I'm still on the side of
>>> saying that this is a problem modifiers do not solve, deferring to the
>>> allocator we need anyway in order to determine things like placement
>>> and global optimality (e.g. rotated scanout placing further
>>> restrictions on allocation).
>>
>> Okay, the original issue here is that the allocator *cannot* determine the
>> alignment requirement in the use case that prompted this sub-thread.
>>
>> The use case is PRIME off-loading, where the rendering GPU supports linear
>> layouts with a 64 byte stride, while the display GPU requires a 256 byte
>> stride.
>>
>> The allocator *cannot* solve this issue, because the allocation happens on
>> the rendering GPU. We need to communicate somehow what the display GPU's
>> stride requirements are.
>>
>> How do you propose to do that?
> 
> The allocator[0] in itself can't magically reach across processes to
> determine desired usage and resolve dependencies. But the entire
> design behind it was to be able to solve cross-device usage: between
> GPU and scanout, between both of those and media encode/decode, etc.
> Obviously it can't do that without help, so winsys will need to gain
> protocol in order to express those in terms the allocator will
> understand.
> 
> The idea was to split information into positive capabilities and
> negative constraints. Modifier queries fall into the same boat as
> format queries: you're expressing an additional capability ('I can
> speak tiled'). Stride alignment, for me, falls into a negative
> constraint ('linear allocations must have stride aligned to 256
> bytes'). Similarly, placement constraints (VRAM possibly only
> accessible to SLI-type paired GPU vs. GTT vs. pure system RAM, etc)
> are in the same boat AFAICT. So this helps solve one side of the
> equation, but not the other.

I've been thinking about this some more, and I can see now that the 
changed modifier scheme that I originally proposed does not fit well 
into places where modifiers are used to express buffer properties (e.g. 
DRI3PixmapFromBuffers, DRI3BuffersFromPixmap).

But I see no proposal on how to fix the issue so far. You cannot fully 
separate capabilities from constraints. As is, we (AMD) cannot properly 
implement the proposed DRI3 v1.1: what would we return in 
DRI3GetSupportedModifiers?

The natural option is to return (at least) DRM_FORMAT_MOD_LINEAR, but 
that would be a lie, because we *don't* speak arbitrary linear formats.

I don't think this is difficult to fix in terms of protocol, although 
there's plenty of opportunity for bike-shedding :)

I see roughly two options:

1. Make the constraints per-modifier, and add a "constraints: 
ListOfCard32" (or 64) to the response to DRI3GetSupportedModifiers. We 
can then reserve some bits for global constraints (e.g. placement) and 
some bits on a per-modifier basis (e.g. stride alignment for linear). 
You could build constraints like DRM_CONSTRAINT_PLACEMENT_SYSTEM | 
DRM_CONSTRAINT_LINEAR_STRIDE_256B.

2. Make the constraints global, and add a DRI3GetConstraints protocol 
with the same signature as DRI3GetSupportedModifiers. We'd need vendor 
namespaces for the constraint defines, to support constraints that are 
specific to vendor-specific modifiers. You could have entries like 
DRM_CONSTRAINT_PLACEMENT(DRM_CONSTRAINT_PLACEMENT_SYSTEM) and, as a 
separate list entry, DRM_CONSTRAINT_LINEAR_STRIDE(256).

Cheers,
Nicolai
Daniel Stone <daniel@fooishbar.org> writes:

> You're right that it makes little to no sense to mix the two, but I'm
> not sure what practical gain we get from expressing things as
> Pictures.

Well, abstractly, a Picture can represent any of the formats you've
talked about -- they have explicit knowledge of color and translucency,
and already provide a notion of 'source only' objects which need not
store ARGB values for each pixel.

> Ultimately, the server still deals in Window Pixmaps and
> Screen Pixmaps for backing storage, and the compositor interface is
> NameWindowPixmap. Your suggestion of another type seems nicer, but it
> doesn't look like we can avoid Pixmaps as the lowest common
> denominator anyway.

What's the plan for compositors which just want to turn window contents
into textures? I'd assume you'd want to update them to understand the
new format bits, which leaves us free to do something sensible here.

For X-based compositors (are there any left?), we can fake out the
pixmap easily enough (NameWindowPixmap creates a suitable 'real' pixmap,
CopyArea/Composite update the source region before the copy happens).

> That would much more clearly match the intention of the spec
> additions, which was to allow non-linearly-addressed (Intel X/Y*,
> Vivante tiled/supertiled, VC4 T-tiled, the infinite AMD/NV tiling
> modes) buffers.

I'm confused -- I can't see how tiling is relevant here; if you're
storing actual ARGB pixels, even in some crazy order, then you've got a
pixmap. I have to admit that all I know about is Intel X/Y tiling
though; are there ways in which the other tiling formats don't just
store ARGB pixels?

> tl;dr: we could replace FourCC with depth as the format determinant to
> better match DRI3 v1.0, or just declare that doing anything with those
> Pixmaps other than displaying them to a Window with a compatible
> Visual results in undefined behaviour

Hrm. I think I'm stuck on the notion that Pixmaps are concrete 2D
arrays of multi-bit values.

What about calling them 'source pixmaps' and offering extended requests
to determine the precise nature of the data within? Make core requests
report correct width and height values, and then fix them at depth 24
for RGB or depth 32 for ARGB. Using them as a rendering target could
generate BadMatch, or be ignored (I'd prefer the former).

DRI3BufferFromPixmap could create a shadow 'normal' pixmap and use
Damage to sync it with the source pixmap.

Creating a Picture from a 'source pixmap' could dtrt (at least at some
point; it could also just use a shadow pixmap).

A new DRI3BufferFromSourcePixmap request would be used to get the list
of buffers and appropriate format data so that updated compositors could
get improved results.
Michel Dänzer <michel@daenzer.net> writes:

> Declaring where? Once a pixmap is created, it only has a depth, no
> format, so there's nothing to base on that e.g. CopyArea between two
> pixmaps of the same depth is undefined.

I think we'd need some extension request which provides the format data
and other attributes. We can define the transformation of underlying
data into depth-based pixel data to match core drawing.

> I'm getting less and less convinced that any reference at all to formats
> in the context of pixmaps in the DRI3 specification is a good idea.

Well, if we want to deal with YUV or compressed data, then we'd better
find a way to describe the contents of the buffer returned by
DRI3BufferFromPixmap.

However, it would also be 'nice' if the underlying raw data could be
accessed over the wire (ala PutImage/GetImage). Perhaps a different
extension which talks about new formats (including deep color?) and
provides for Get/Put semantics?
Hi,

On 28 July 2017 at 16:44, Keith Packard <keithp@keithp.com> wrote:
> Daniel Stone <daniel@fooishbar.org> writes:
>> You're right that it makes little to no sense to mix the two, but I'm
>> not sure what practical gain we get from expressing things as
>> Pictures.
>
> Well, abstractly, a Picture can represent any of the formats you've
> talked about -- they have explicit knowledge of color and translucency,
> and already provide a notion of 'source only' objects which need not
> store ARGB values for each pixel.

Sure, but we've already managed to get this far with DRI3 and Present
only speaking Pixmaps. The point of modifiers isn't to deal with
colourspaces or anything, it's just to allow non-linear expression of
the existing formats we have today ...

>> Ultimately, the server still deals in Window Pixmaps and
>> Screen Pixmaps for backing storage, and the compositor interface is
>> NameWindowPixmap. Your suggestion of another type seems nicer, but it
>> doesn't look like we can avoid Pixmaps as the lowest common
>> denominator anyway.
>
> What's the plan for compositors which just want to turn window contents
> into textures? I'd assume you'd want to update them to understand the
> new format bits, which leaves us free to do something sensible here.

Same as before: they call BuffersFromPixmap, instead of BufferFromPixmap.

> For X-based compositors (are there any left?), we can fake out the
> pixmap easily enough (NameWindowPixmap creates a suitable 'real' pixmap,
> CopyArea/Composite update the source region before the copy happens).

So, if we take Michel's suggestion and ditch the FourCC format
completely and go back to depth, it's the same as DRI3 v1.0 in this
respect.

>> That would much more clearly match the intention of the spec
>> additions, which was to allow non-linearly-addressed (Intel X/Y*,
>> Vivante tiled/supertiled, VC4 T-tiled, the infinite AMD/NV tiling
>> modes) buffers.
>
> I'm confused -- I can't see how tiling is relevant here; if you're
> storing actual ARGB pixels, even in some crazy order, then you've got a
> pixmap. I have to admit that all I know about is Intel X/Y tiling
> though; are there ways in which the other tiling formats don't just
> store ARGB pixels?

They do just store ARGB pixels. It's purely the storage and addressing
that's different, requiring either detiling or decompression to
texture from.

I'm also confused here. What I was saying is:
  - DRI3 v1.0 seems to be sufficient for linear buffers so far
  - the addition of modifiers allows explicit expression of non-linear
storage, and nothing else
  - the proposed DRI3 v1.1 spec replaced 'depth' as the determinant of
format with an explicit FourCC enum to make things more explicit
  - doing that seems to have just confused everyone, so we can make it
a lot more clear by going back to using depth to determine the format
(i.e. depth 24 == XRGB8888 storage), and only adding modifiers on top
of that

The point of these protocol additions was to allow servers to
advertise their supported storage modes to clients, so e.g. using
tiling modes between multiple GPUs becomes possible. Adding the
explicit modifier token and support for auxiliary buffers also means
that you can use lossless compression on Intel/Tegra (implemented as
tiled colour buffer + auxiliary block compression status buffer), or
other block compression modes.

>> tl;dr: we could replace FourCC with depth as the format determinant to
>> better match DRI3 v1.0, or just declare that doing anything with those
>> Pixmaps other than displaying them to a Window with a compatible
>> Visual results in undefined behaviour
>
> Hrm. I think I'm stuck on the notion that Pixmaps are concrete 2D
> arrays of multi-bit values.
>
> What about calling them 'source pixmaps' and offering extended requests
> to determine the precise nature of the data within? Make core requests
> report correct width and height values, and then fix them at depth 24
> for RGB or depth 32 for ARGB. Using them as a rendering target could
> generate BadMatch, or be ignored (I'd prefer the former).
>
> DRI3BufferFromPixmap could create a shadow 'normal' pixmap and use
> Damage to sync it with the source pixmap.
>
> Creating a Picture from a 'source pixmap' could dtrt (at least at some
> point; it could also just use a shadow pixmap).
>
> A new DRI3BufferFromSourcePixmap request would be used to get the list
> of buffers and appropriate format data so that updated compositors could
> get improved results.

I'm not sure what benefit this would bring. The reason I changed depth
to a FourCC wasn't because I'm desperate to support XBGR as well as
XRGB, but because a) drm_fourcc.h modifier tokens are usually paired
with drm_fourcc.h format tokens, and b) currently with DRI3 v1.0,
there is an implicit hardcoded mapping between depth and format, and
being more explicit seemed better than being implicit. Those two are
purely cosmetic though, so going back to the DRI3 v1.0 scheme of just
using depth rather than a FourCC format would perhaps make the intent
a bit more clear. Also, the XRGB8888/ARGB8888 format restrictions
don't seem to be biting anyone: I haven't seen any demand for extended
formats.

As far as creating new SourcePixmaps with shadow Pixmaps etc goes, I'm
not sure what we gain by the type indirection. If you want to forbid
rendering to client-created buffers passed to the server over DRI3
(currently just Pixmaps), why not just document that doing so results
in undefined behaviour? That can be BadMatch, that can be rendering
ignored, that can be your Pixmap/buffer full of junk. (Also, I'm not
sure why we'd hook up Damage if we're to forbid using it as a
rendering target ... ?)

Present would also need to gain a PresentSourcePixmap (or
PresentPicture, or?) request here. Again I'm not sure what gain we
really get from all the typing ... am I missing something?

Cheers,
Daniel
On 28 July 2017 at 16:51, Keith Packard <keithp@keithp.com> wrote:
> Michel Dänzer <michel@daenzer.net> writes:
>> Declaring where? Once a pixmap is created, it only has a depth, no
>> format, so there's nothing to base on that e.g. CopyArea between two
>> pixmaps of the same depth is undefined.
>
> I think we'd need some extension request which provides the format data
> and other attributes. We can define the transformation of underlying
> data into depth-based pixel data to match core drawing.
>
>> I'm getting less and less convinced that any reference at all to formats
>> in the context of pixmaps in the DRI3 specification is a good idea.
>
> Well, if we want to deal with YUV or compressed data, then we'd better
> find a way to describe the contents of the buffer returned by
> DRI3BufferFromPixmap.
>
> However, it would also be 'nice' if the underlying raw data could be
> accessed over the wire (ala PutImage/GetImage). Perhaps a different
> extension which talks about new formats (including deep color?) and
> provides for Get/Put semantics?

Yeah. Support for YUV in all its varied and wonderful forms, extended
RGB primaries/encodings (DCI.P3/etc/etc) would sure be nice to have at
some stage, but conflating it with a specific buffer-creation
mechanism seems to be the wrong place to do it. (See, e.g. the HDR
work.)

I think Michel is right here, and we're best off keeping the DRI3
extensions as a very pure addition of the buffer storage details
(tiling/compression). How to then interpret the Pixmap that creates
can be a separate extension which deals with colourspaces, in a way
which also works for SHM content, works for NVIDIA, etc.

Cheers,
Daniel
On 28/07/17 07:39 PM, Daniel Stone wrote:
> On 28 July 2017 at 09:54, Michel Dänzer <michel@daenzer.net> wrote:
>> On 28/07/17 05:03 PM, Daniel Stone wrote:
>>> By implementation if not spec, DRI3 v1.0 enforces that depth 16 is
>>> RGB565, depth 24 is XRGB8888, and depth 32 is ARGB8888.
>>
>> No, it doesn't. How the bits stored in a pixmap are interpreted is
>> outside of the scope of DRI3 1.0.
> 
> I mean that, when using glamor_egl and Mesa, PixmapFromBuffer
> translates XRGB8888 to depth 24, and BufferFromPixmap translates depth
> 24 to XRGB8888. So, de facto, these are the established translations
> between depth and format required to use DRI3.

That's just a glamor implementation detail, nothing more.

At the risk of sounding like a broken record, a pixmap is merely a
container for storing an integer of n bits per pixel, where n is the
pixmap depth. How those integer values are interpreted isn't defined by
the pixmap itself but only by something else, e.g. the visual of a
window or the format of a picture.

A client which uses the DRI3 and Present extensions for direct rendering
has to use the format matching the window visual for rendering to the
back buffer shared with the server, otherwise the window will display
garbage. But as long as the client uses the correct format, things will
work out correctly no matter which format glamor (or any other
implementation) uses for X11 core drawing with drawables of that depth.


>>> tl;dr: we could replace FourCC with depth as the format determinant to
>>> better match DRI3 v1.0, or just declare that doing anything with those
>>> Pixmaps other than displaying them to a Window with a compatible
>>> Visual results in undefined behaviour
>>
>> I'm getting less and less convinced that any reference at all to formats
>> in the context of pixmaps in the DRI3 specification is a good idea.
> 
> I'd be fine with that, but at least documenting the implemented
> behaviour would probably be reasonable, to save other implementations
> from having to reverse-engineer the actual semantics.

I think it's pretty clear by now from this thread that talking about
formats at all in the context of pixmaps rather confuses things (and
even people who have been working with X for a long time).
On 29/07/17 11:08 PM, Daniel Stone wrote:
> On 28 July 2017 at 16:44, Keith Packard <keithp@keithp.com> wrote:
>> Daniel Stone <daniel@fooishbar.org> writes:
> 
>   - the proposed DRI3 v1.1 spec replaced 'depth' as the determinant of
> format with an explicit FourCC enum to make things more explicit
>   - doing that seems to have just confused everyone, so we can make it
> a lot more clear by going back to using depth to determine the format
> (i.e. depth 24 == XRGB8888 storage), and only adding modifiers on top
> of that

[...]

> The reason I changed depth to a FourCC wasn't because I'm desperate to
> support XBGR as well as XRGB, but because a) drm_fourcc.h modifier
> tokens are usually paired with drm_fourcc.h format tokens, and b)
> currently with DRI3 v1.0, there is an implicit hardcoded mapping
> between depth and format, and being more explicit seemed better than
> being implicit. Those two are purely cosmetic though, so going back to
> the DRI3 v1.0 scheme of just using depth rather than a FourCC format
> would perhaps make the intent a bit more clear. Also, the
> XRGB8888/ARGB8888 format restrictions don't seem to be biting anyone:
> I haven't seen any demand for extended formats.

As explained in other posts, there is no fixed format
mapping/restriction with DRI3 v1.0 anyway. It can support all formats
that can be stored as integer values of n bits, where n matches (or is
even just <= ?) any supported pixmap depth.
Daniel Stone <daniel@fooishbar.org> writes:

> They do just store ARGB pixels. It's purely the storage and addressing
> that's different, requiring either detiling or decompression to
> texture from.

You had me all confused with talk of fourcc formats.

If you want to just talk about different ways of storing bits in
DRI3-visible pixmaps/buffers, and if those pixmaps are still just
'regular' X pixmaps in the rest of the system, then it sounds like you
just need a DRI3-specific way to talk about how to pack pixels
the buffers so that they appear in the right layout when you use the
pixmap on the other side.

Do we even need to mention ARGB in this context? I mean, pixmaps just
contain bits, and presumably there's some fixed mapping from bits in the
magic format to bits as seen by GetImage? If that happens to line up
with how a visual or pictformat is going to interpret those bits for
presentation or Render, then that's a nice bonus, but not exactly
germane to the basic plan of describing where to stick each bit in the
buffer?
Daniel Stone <daniel@fooishbar.org> writes:

> On 28 July 2017 at 16:51, Keith Packard <keithp@keithp.com> wrote:
>> Michel Dänzer <michel@daenzer.net> writes:
>>> Declaring where? Once a pixmap is created, it only has a depth, no
>>> format, so there's nothing to base on that e.g. CopyArea between two
>>> pixmaps of the same depth is undefined.
>>
>> I think we'd need some extension request which provides the format data
>> and other attributes. We can define the transformation of underlying
>> data into depth-based pixel data to match core drawing.
>>
>>> I'm getting less and less convinced that any reference at all to formats
>>> in the context of pixmaps in the DRI3 specification is a good idea.
>>
>> Well, if we want to deal with YUV or compressed data, then we'd better
>> find a way to describe the contents of the buffer returned by
>> DRI3BufferFromPixmap.
>>
>> However, it would also be 'nice' if the underlying raw data could be
>> accessed over the wire (ala PutImage/GetImage). Perhaps a different
>> extension which talks about new formats (including deep color?) and
>> provides for Get/Put semantics?
>
> Yeah. Support for YUV in all its varied and wonderful forms, extended
> RGB primaries/encodings (DCI.P3/etc/etc) would sure be nice to have at
> some stage, but conflating it with a specific buffer-creation
> mechanism seems to be the wrong place to do it. (See, e.g. the HDR
> work.)
>
> I think Michel is right here, and we're best off keeping the DRI3
> extensions as a very pure addition of the buffer storage details
> (tiling/compression). How to then interpret the Pixmap that creates
> can be a separate extension which deals with colourspaces, in a way
> which also works for SHM content, works for NVIDIA, etc.

If we can successfully talk modifiers and the aux planes just using
depth and bpp without a fourcc, I agree that this is the right way to go
for now.

VC4's modifier is fine and well-defined based just on bpp (and this
should be the case for VC5 as well).