Use finer-grained pointer types in mem copying functions

Submitted by Jochen Rollwagen on Nov. 25, 2016, 10 a.m.

Details

Message ID 58380BA2.3060006@t-online.de
State New
Headers show
Series "xf86-video-ati: Fix build for xserver < 1.13" ( rev: 2 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Jochen Rollwagen Nov. 25, 2016, 10 a.m.
This commit modifies some pointer definitions in functions copying 
memory corresponding to those in memcpy.
That should enable a compiler to produce better code (though i haven't 
checked whether that's the case).
---
  src/radeon.h       |    2 +-
  src/radeon_accel.c |   10 +++++-----
  src/radeon_dri2.c  |   14 +++++++-------
  3 files changed, 13 insertions(+), 13 deletions(-)

      GCPtr gc;
      RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -435,7 +435,7 @@ radeon_dri2_copy_region2(ScreenPtr pScreen,

  void
  radeon_dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion,
-                        DRI2BufferPtr pDstBuffer, DRI2BufferPtr pSrcBuffer)
+                        DRI2BufferPtr __restrict pDstBuffer, 
DRI2BufferPtr __restrict pSrcBuffer)
  {
      return radeon_dri2_copy_region2(pDraw->pScreen, pDraw, pRegion,
                                      pDstBuffer, pSrcBuffer);

Patch hide | download patch | download mbox

diff --git a/src/radeon.h b/src/radeon.h
index ad7e69c..cbc7866 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -599,7 +599,7 @@  typedef struct {
  /* radeon_accel.c */
  extern Bool RADEONAccelInit(ScreenPtr pScreen);
  extern void RADEONEngineInit(ScrnInfoPtr pScrn);
-extern void  RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int 
size, int swap);
+extern void  RADEONCopySwap(uint8_t * __restrict dst, const uint8_t * 
__restrict src, unsigned int size, int swap);
  extern void RADEONInit3DEngine(ScrnInfoPtr pScrn);
  extern int radeon_cs_space_remaining(ScrnInfoPtr pScrn);

diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index af2fc99..8c748f2 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -128,13 +128,13 @@  int radeon_cs_space_remaining(ScrnInfoPtr pScrn)
      return (info->cs->ndw - info->cs->cdw);
  }

-void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int 
swap)
+void RADEONCopySwap(uint8_t * __restrict dst, const uint8_t * 
__restrict src, unsigned int size, int swap)
  {
      switch(swap) {
      case RADEON_HOST_DATA_SWAP_32BIT:
          {
-           unsigned int *d = (unsigned int *)dst;
-           unsigned int *s = (unsigned int *)src;
+           unsigned int * __restrict d = (unsigned int *)dst;
+           unsigned int * __restrict s = (unsigned int *)src;
             unsigned int nwords = size >> 2;

             for (; nwords > 0; --nwords, ++d, ++s)
@@ -148,8 +148,8 @@  void RADEONCopySwap(uint8_t *dst, uint8_t *src, 
unsigned int size, int swap)
          }
      case RADEON_HOST_DATA_SWAP_16BIT:
          {
-           unsigned short *d = (unsigned short *)dst;
-           unsigned short *s = (unsigned short *)src;
+           unsigned short * __restrict d = (unsigned short *)dst;
+           unsigned short * __restrict s = (unsigned short *)src;
             unsigned int nwords = size >> 1;

             for (; nwords > 0; --nwords, ++d, ++s)
diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index 860ff29..0ff42e0 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -339,14 +339,14 @@  static void
  radeon_dri2_copy_region2(ScreenPtr pScreen,
                          DrawablePtr drawable,
                          RegionPtr region,
-                        BufferPtr dest_buffer,
-                        BufferPtr src_buffer)
+                        BufferPtr __restrict dest_buffer,
+                        const BufferPtr __restrict src_buffer)
  {
-    struct dri2_buffer_priv *src_private = src_buffer->driverPrivate;
-    struct dri2_buffer_priv *dst_private = dest_buffer->driverPrivate;
+    struct dri2_buffer_priv * __restrict src_private = 
src_buffer->driverPrivate;
+    struct dri2_buffer_priv * __restrict dst_private = 
dest_buffer->driverPrivate;
      ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
-    DrawablePtr src_drawable;
-    DrawablePtr dst_drawable;
+    DrawablePtr __restrict src_drawable;
+    DrawablePtr __restrict dst_drawable;
      RegionPtr copy_clip;

Comments

On 25/11/16 07:00 PM, Jochen Rollwagen wrote:
> This commit modifies some pointer definitions in functions copying
> memory corresponding to those in memcpy.
> That should enable a compiler to produce better code (though i haven't
> checked whether that's the case).

Please always check that patches actually have an effect before
submitting them. :)

With gcc, the stripped radeon_accel.o and radeon_dri2.o object files are
identical for me with and without this patch. With clang, the patch does
seem to make the code generated for RADEONCopySwap slightly smaller, but
I'm afraid I'm wary of applying this kind of micro-optimization without
justification such as benchmark numbers showing significant gains.


> @@ -128,13 +128,13 @@ int radeon_cs_space_remaining(ScrnInfoPtr pScrn)
>      return (info->cs->ndw - info->cs->cdw);
>  }
> 
> -void RADEONCopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int
> swap)
> +void RADEONCopySwap(uint8_t * __restrict dst, const uint8_t *
> __restrict src, unsigned int size, int swap)
>  {
>      switch(swap) {
>      case RADEON_HOST_DATA_SWAP_32BIT:
>          {
> -           unsigned int *d = (unsigned int *)dst;
> -           unsigned int *s = (unsigned int *)src;
> +           unsigned int * __restrict d = (unsigned int *)dst;
> +           unsigned int * __restrict s = (unsigned int *)src;
>             unsigned int nwords = size >> 2;
> 
>             for (; nwords > 0; --nwords, ++d, ++s)

BTW, this hunk wouldn't apply to current master, looks like it's on top
of your previous patch removing the RADEON_HOST_DATA_SWAP_HDW case.
Please always either make sure patches you submit apply to current
master, or explicitly state dependencies on other patches.