Adding infrastructure to permit future AVX2 implementations

Submitted by Devulapalli, Raghuveer on Aug. 22, 2018, 5:02 p.m.

Details

Message ID 1534957322-22474-1-git-send-email-raghuveer.devulapalli@intel.com
State New
Headers show
Series "Adding infrastructure to permit future AVX2 implementations" ( rev: 1 ) in Pixman

Not browsing as part of any series.

Commit Message

Devulapalli, Raghuveer Aug. 22, 2018, 5:02 p.m.
---
 configure.ac            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 pixman/Makefile.am      | 12 ++++++++++++
 pixman/pixman-avx2.c    | 32 ++++++++++++++++++++++++++++++++
 pixman/pixman-private.h |  5 +++++
 pixman/pixman-x86.c     | 15 +++++++++++++--
 5 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 pixman/pixman-avx2.c

Patch hide | download patch | download mbox

diff --git a/configure.ac b/configure.ac
index e833e45..27f4305 100644
--- a/configure.ac
+++ b/configure.ac
@@ -503,6 +503,48 @@  fi
 AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
 
 dnl ===========================================================================
+dnl Check for AVX2 
+
+if test "x$AVX2_CFLAGS" = "x" ; then
+    AVX2_CFLAGS="-mavx2 -Winline"
+fi
+
+have_avx2_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX2 intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$AVX2_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include <immintrin.h>
+int param;
+int main () {
+    __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
+    c = _mm256_maddubs_epi16 (a, b);
+    return _mm256_cvtsi256_si32(c);
+}]])], have_avx2_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx2,
+   [AC_HELP_STRING([--disable-avx2],
+                   [disable AVX2 fast paths])],
+   [enable_avx2=$enableval], [enable_avx2=auto])
+
+if test $enable_avx2 = no ; then
+   have_avx2_intrinsics=disabled
+fi
+
+if test $have_avx2_intrinsics = yes ; then
+   AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_avx2_intrinsics)
+if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
+   AC_MSG_ERROR([AVX2 intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
+
+dnl ===========================================================================
 dnl Other special flags needed when building code using MMX or SSE instructions
 case $host_os in
    solaris*)
@@ -538,6 +580,8 @@  AC_SUBST(MMX_LDFLAGS)
 AC_SUBST(SSE2_CFLAGS)
 AC_SUBST(SSE2_LDFLAGS)
 AC_SUBST(SSSE3_CFLAGS)
+AC_SUBST(AVX2_CFLAGS)
+AC_SUBST(AVX2_LDFLAGS)
 
 dnl ===========================================================================
 dnl Check for VMX/Altivec
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 581b6f6..7204621 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -64,6 +64,18 @@  libpixman_1_la_LIBADD += libpixman-ssse3.la
 ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
 endif
 
+# avx2 code
+if USE_AVX2
+noinst_LTLIBRARIES += libpixman-avx2.la
+libpixman_avx2_la_SOURCES = \
+	pixman-avx2.c
+libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS)
+libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-avx2.la
+
+ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
+endif
+
 # arm simd code
 if USE_ARM_SIMD
 noinst_LTLIBRARIES += libpixman-arm-simd.la
diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c
new file mode 100644
index 0000000..d860d67
--- /dev/null
+++ b/pixman/pixman-avx2.c
@@ -0,0 +1,32 @@ 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX2 intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+
+static const pixman_fast_path_t avx2_fast_paths[] =
+{
+    { PIXMAN_OP_NONE },
+};
+
+static const pixman_iter_info_t avx2_iters[] = 
+{
+    { PIXMAN_null },
+};
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx2_fast_paths);
+
+    /* Set up function pointers */
+    imp->iter_info = avx2_iters;
+
+    return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 73a5414..b6b15df 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -597,6 +597,11 @@  pixman_implementation_t *
 _pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_AVX2
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 05297c4..687c83b 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -40,7 +40,8 @@  typedef enum
     X86_SSE			= (1 << 2) | X86_MMX_EXTENSIONS,
     X86_SSE2			= (1 << 3),
     X86_CMOV			= (1 << 4),
-    X86_SSSE3			= (1 << 5)
+    X86_SSSE3			= (1 << 5),
+    X86_AVX2			= (1 << 6),
 } cpu_features_t;
 
 #ifdef HAVE_GETISAX
@@ -119,7 +120,7 @@  pixman_cpuid (uint32_t feature,
     __asm__ volatile (
         "cpuid"				"\n\t"
 	: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
-	: "a" (feature));
+	: "a" (feature), "c" (0));
 #else
     /* On x86-32 we need to be careful about the handling of %ebx
      * and %esp. We can't declare either one as clobbered
@@ -172,6 +173,10 @@  detect_cpu_features (void)
 	features |= X86_SSE2;
     if (c & (1 << 9))
 	features |= X86_SSSE3;
+    
+    pixman_cpuid (0x07, &a, &b, &c, &d);
+    if (b & (1 << 5))
+	features |= X86_AVX2;
 
     /* Check for AMD specific features */
     if ((features & X86_MMX) && !(features & X86_SSE))
@@ -228,6 +233,7 @@  _pixman_x86_get_implementations (pixman_implementation_t *imp)
 #define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
 #define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
 #define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
+#define AVX2_BITS (X86_AVX2)
 
 #ifdef USE_X86_MMX
     if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
@@ -244,5 +250,10 @@  _pixman_x86_get_implementations (pixman_implementation_t *imp)
 	imp = _pixman_implementation_create_ssse3 (imp);
 #endif
 
+#if (defined USE_AVX2 && defined USE_SSE2)
+    if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
+	imp = _pixman_implementation_create_avx2(imp);
+#endif
+
     return imp;
 }

Comments

Quoting raghuveer devulapalli (2018-08-22 18:02:02)
>  #ifdef HAVE_GETISAX
> @@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
>      __asm__ volatile (
>          "cpuid"                                "\n\t"
>         : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
> -       : "a" (feature));
> +       : "a" (feature), "c" (0));
>  #else
>      /* On x86-32 we need to be careful about the handling of %ebx
>       * and %esp. We can't declare either one as clobbered
> @@ -172,6 +173,10 @@ detect_cpu_features (void)
>         features |= X86_SSE2;
>      if (c & (1 << 9))
>         features |= X86_SSSE3;
> +    
> +    pixman_cpuid (0x07, &a, &b, &c, &d);
> +    if (b & (1 << 5))
> +       features |= X86_AVX2;

It's not enough to check for the cpu feature, you need to also check for
os support. Something like:

#define xgetbv(index,eax,edx)                                   \
        __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))

#define has_YMM 0x1

unsigned cpu_detect(void)
{
        unsigned max = __get_cpuid_max(BASIC_CPUID, NULL);
        unsigned eax, ebx, ecx, edx;
        unsigned features = 0;
        unsigned extra = 0;

        if (max >= 1) {
                __cpuid(1, eax, ebx, ecx, edx);

		/* snip all the usual sse features */

                if (ecx & bit_OSXSAVE) {
                        unsigned int bv_eax, bv_ecx;
                        xgetbv(0, bv_eax, bv_ecx);
                        if ((bv_eax & 6) == 6)
                                extra |= has_YMM;
                }

                if ((extra & has_YMM) && (ecx & bit_AVX))
                        features |= AVX;
        }

        if (max >= 7) {
                __cpuid_count(7, 0, eax, ebx, ecx, edx);

                if ((extra & has_YMM) && (ebx & bit_AVX2))
                        features |= AVX2;
        }

        return features;
}
Thank you for the patches! Some comments inline.

On Wed, Aug 22, 2018 at 10:03 AM raghuveer devulapalli
<raghuveer.devulapalli@intel.com> wrote:
>
> ---
>  configure.ac            | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  pixman/Makefile.am      | 12 ++++++++++++
>  pixman/pixman-avx2.c    | 32 ++++++++++++++++++++++++++++++++
>  pixman/pixman-private.h |  5 +++++
>  pixman/pixman-x86.c     | 15 +++++++++++++--
>  5 files changed, 106 insertions(+), 2 deletions(-)
>  create mode 100644 pixman/pixman-avx2.c
>
> diff --git a/configure.ac b/configure.ac
> index e833e45..27f4305 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -503,6 +503,48 @@ fi
>  AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
>
>  dnl ===========================================================================
> +dnl Check for AVX2

Trailing whitespace

> +
> +if test "x$AVX2_CFLAGS" = "x" ; then
> +    AVX2_CFLAGS="-mavx2 -Winline"
> +fi
> +
> +have_avx2_intrinsics=no
> +AC_MSG_CHECKING(whether to use AVX2 intrinsics)
> +xserver_save_CFLAGS=$CFLAGS
> +CFLAGS="$AVX2_CFLAGS $CFLAGS"
> +
> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
> +#include <immintrin.h>
> +int param;
> +int main () {
> +    __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
> +    c = _mm256_maddubs_epi16 (a, b);
> +    return _mm256_cvtsi256_si32(c);
> +}]])], have_avx2_intrinsics=yes)
> +CFLAGS=$xserver_save_CFLAGS
> +
> +AC_ARG_ENABLE(avx2,
> +   [AC_HELP_STRING([--disable-avx2],
> +                   [disable AVX2 fast paths])],
> +   [enable_avx2=$enableval], [enable_avx2=auto])
> +
> +if test $enable_avx2 = no ; then
> +   have_avx2_intrinsics=disabled
> +fi
> +
> +if test $have_avx2_intrinsics = yes ; then
> +   AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics])
> +fi
> +
> +AC_MSG_RESULT($have_avx2_intrinsics)
> +if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
> +   AC_MSG_ERROR([AVX2 intrinsics not detected])
> +fi
> +
> +AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
> +
> +dnl ===========================================================================
>  dnl Other special flags needed when building code using MMX or SSE instructions
>  case $host_os in
>     solaris*)
> @@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
>  AC_SUBST(SSE2_CFLAGS)
>  AC_SUBST(SSE2_LDFLAGS)
>  AC_SUBST(SSSE3_CFLAGS)
> +AC_SUBST(AVX2_CFLAGS)
> +AC_SUBST(AVX2_LDFLAGS)
>
>  dnl ===========================================================================
>  dnl Check for VMX/Altivec
> diff --git a/pixman/Makefile.am b/pixman/Makefile.am
> index 581b6f6..7204621 100644
> --- a/pixman/Makefile.am
> +++ b/pixman/Makefile.am
> @@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
>  ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
>  endif
>
> +# avx2 code
> +if USE_AVX2
> +noinst_LTLIBRARIES += libpixman-avx2.la
> +libpixman_avx2_la_SOURCES = \
> +       pixman-avx2.c
> +libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS)
> +libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS)
> +libpixman_1_la_LIBADD += libpixman-avx2.la
> +
> +ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
> +endif
> +
>  # arm simd code
>  if USE_ARM_SIMD
>  noinst_LTLIBRARIES += libpixman-arm-simd.la
> diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c
> new file mode 100644
> index 0000000..d860d67
> --- /dev/null
> +++ b/pixman/pixman-avx2.c
> @@ -0,0 +1,32 @@
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <immintrin.h> /* for AVX2 intrinsics */
> +#include "pixman-private.h"
> +#include "pixman-combine32.h"
> +#include "pixman-inlines.h"
> +
> +static const pixman_fast_path_t avx2_fast_paths[] =
> +{
> +    { PIXMAN_OP_NONE },
> +};
> +
> +static const pixman_iter_info_t avx2_iters[] =

Trailing whitespace

> +{
> +    { PIXMAN_null },
> +};
> +
> +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
> +__attribute__((__force_align_arg_pointer__))
> +#endif
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback)
> +{
> +    pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx2_fast_paths);
> +
> +    /* Set up function pointers */
> +    imp->iter_info = avx2_iters;
> +
> +    return imp;
> +}
> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
> index 73a5414..b6b15df 100644
> --- a/pixman/pixman-private.h
> +++ b/pixman/pixman-private.h
> @@ -597,6 +597,11 @@ pixman_implementation_t *
>  _pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
>  #endif
>
> +#ifdef USE_AVX2
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback);
> +#endif
> +
>  #ifdef USE_ARM_SIMD
>  pixman_implementation_t *
>  _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
> diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
> index 05297c4..687c83b 100644
> --- a/pixman/pixman-x86.c
> +++ b/pixman/pixman-x86.c

At the top of this file there is a preprocessor check:

#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)

I think || defined (USE_AVX2) should be added here.

> @@ -40,7 +40,8 @@ typedef enum
>      X86_SSE                    = (1 << 2) | X86_MMX_EXTENSIONS,
>      X86_SSE2                   = (1 << 3),
>      X86_CMOV                   = (1 << 4),
> -    X86_SSSE3                  = (1 << 5)
> +    X86_SSSE3                  = (1 << 5),
> +    X86_AVX2                   = (1 << 6),

I'm not 100% we can use trailing commas in pixman due to MSVC.
Probably safer just to leave it off.

>  } cpu_features_t;
>
>  #ifdef HAVE_GETISAX
> @@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
>      __asm__ volatile (
>          "cpuid"                                "\n\t"
>         : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
> -       : "a" (feature));
> +       : "a" (feature), "c" (0));

Just to make sure I'm understanding: cpuid returns AVX2 presence in
bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to
ensure ecx is set to 0?

I think that's fine. It seems like ecx isn't required to be any
particular value for the other cases. Perhaps a comment would help
future readers understand.

>  #else
>      /* On x86-32 we need to be careful about the handling of %ebx
>       * and %esp. We can't declare either one as clobbered
> @@ -172,6 +173,10 @@ detect_cpu_features (void)
>         features |= X86_SSE2;
>      if (c & (1 << 9))
>         features |= X86_SSSE3;
> +

Spurious whitespace

> +    pixman_cpuid (0x07, &a, &b, &c, &d);
> +    if (b & (1 << 5))
> +       features |= X86_AVX2;
>
>      /* Check for AMD specific features */
>      if ((features & X86_MMX) && !(features & X86_SSE))
> @@ -228,6 +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
>  #define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
>  #define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
>  #define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
> +#define AVX2_BITS (X86_AVX2)
>
>  #ifdef USE_X86_MMX
>      if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
> @@ -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
>         imp = _pixman_implementation_create_ssse3 (imp);
>  #endif
>
> +#if (defined USE_AVX2 && defined USE_SSE2)
> +    if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
> +       imp = _pixman_implementation_create_avx2(imp);
> +#endif
> +
>      return imp;
>  }
> --
> 2.7.4
Thanks Matt and Chris for your valuable feedback. I am incorporating your feedback and preparing updated patches. Unfortunately I am out of office for the next 2-3 weeks, but I will post them shortly after I get back. 

Thanks, 
Raghuveer 

-----Original Message-----
From: Matt Turner [mailto:mattst88@gmail.com] 

Sent: Wednesday, August 29, 2018 11:43 AM
To: Devulapalli, Raghuveer <raghuveer.devulapalli@intel.com>
Cc: pixman@lists.freedesktop.org
Subject: Re: [Pixman] [PATCH] Adding infrastructure to permit future AVX2 implementations

Thank you for the patches! Some comments inline.

On Wed, Aug 22, 2018 at 10:03 AM raghuveer devulapalli <raghuveer.devulapalli@intel.com> wrote:
>

> ---

>  configure.ac            | 44 ++++++++++++++++++++++++++++++++++++++++++++

>  pixman/Makefile.am      | 12 ++++++++++++

>  pixman/pixman-avx2.c    | 32 ++++++++++++++++++++++++++++++++

>  pixman/pixman-private.h |  5 +++++

>  pixman/pixman-x86.c     | 15 +++++++++++++--

>  5 files changed, 106 insertions(+), 2 deletions(-)  create mode 

> 100644 pixman/pixman-avx2.c

>

> diff --git a/configure.ac b/configure.ac index e833e45..27f4305 100644

> --- a/configure.ac

> +++ b/configure.ac

> @@ -503,6 +503,48 @@ fi

>  AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)

>

>  dnl 

> ======================================================================

> =====

> +dnl Check for AVX2


Trailing whitespace

> +

> +if test "x$AVX2_CFLAGS" = "x" ; then

> +    AVX2_CFLAGS="-mavx2 -Winline"

> +fi

> +

> +have_avx2_intrinsics=no

> +AC_MSG_CHECKING(whether to use AVX2 intrinsics) 

> +xserver_save_CFLAGS=$CFLAGS CFLAGS="$AVX2_CFLAGS $CFLAGS"

> +

> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[

> +#include <immintrin.h>

> +int param;

> +int main () {

> +    __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;

> +    c = _mm256_maddubs_epi16 (a, b);

> +    return _mm256_cvtsi256_si32(c);

> +}]])], have_avx2_intrinsics=yes)

> +CFLAGS=$xserver_save_CFLAGS

> +

> +AC_ARG_ENABLE(avx2,

> +   [AC_HELP_STRING([--disable-avx2],

> +                   [disable AVX2 fast paths])],

> +   [enable_avx2=$enableval], [enable_avx2=auto])

> +

> +if test $enable_avx2 = no ; then

> +   have_avx2_intrinsics=disabled

> +fi

> +

> +if test $have_avx2_intrinsics = yes ; then

> +   AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics]) fi

> +

> +AC_MSG_RESULT($have_avx2_intrinsics)

> +if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then

> +   AC_MSG_ERROR([AVX2 intrinsics not detected]) fi

> +

> +AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)

> +

> +dnl 

> +=====================================================================

> +======

>  dnl Other special flags needed when building code using MMX or SSE 

> instructions  case $host_os in

>     solaris*)

> @@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)

>  AC_SUBST(SSE2_CFLAGS)

>  AC_SUBST(SSE2_LDFLAGS)

>  AC_SUBST(SSSE3_CFLAGS)

> +AC_SUBST(AVX2_CFLAGS)

> +AC_SUBST(AVX2_LDFLAGS)

>

>  dnl 

> ======================================================================

> =====

>  dnl Check for VMX/Altivec

> diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 

> 581b6f6..7204621 100644

> --- a/pixman/Makefile.am

> +++ b/pixman/Makefile.am

> @@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la

>  ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)

>  endif

>

> +# avx2 code

> +if USE_AVX2

> +noinst_LTLIBRARIES += libpixman-avx2.la libpixman_avx2_la_SOURCES = \

> +       pixman-avx2.c

> +libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS) libpixman_1_la_LDFLAGS += 

> +$(AVX2_LDFLAGS) libpixman_1_la_LIBADD += libpixman-avx2.la

> +

> +ASM_CFLAGS_avx2=$(AVX2_CFLAGS)

> +endif

> +

>  # arm simd code

>  if USE_ARM_SIMD

>  noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git 

> a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c new file mode 100644 

> index 0000000..d860d67

> --- /dev/null

> +++ b/pixman/pixman-avx2.c

> @@ -0,0 +1,32 @@

> +#ifdef HAVE_CONFIG_H

> +#include <config.h>

> +#endif

> +

> +#include <immintrin.h> /* for AVX2 intrinsics */ #include 

> +"pixman-private.h"

> +#include "pixman-combine32.h"

> +#include "pixman-inlines.h"

> +

> +static const pixman_fast_path_t avx2_fast_paths[] = {

> +    { PIXMAN_OP_NONE },

> +};

> +

> +static const pixman_iter_info_t avx2_iters[] =


Trailing whitespace

> +{

> +    { PIXMAN_null },

> +};

> +

> +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)

> +__attribute__((__force_align_arg_pointer__))

> +#endif

> +pixman_implementation_t *

> +_pixman_implementation_create_avx2 (pixman_implementation_t 

> +*fallback) {

> +    pixman_implementation_t *imp = _pixman_implementation_create 

> +(fallback, avx2_fast_paths);

> +

> +    /* Set up function pointers */

> +    imp->iter_info = avx2_iters;

> +

> +    return imp;

> +}

> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 

> 73a5414..b6b15df 100644

> --- a/pixman/pixman-private.h

> +++ b/pixman/pixman-private.h

> @@ -597,6 +597,11 @@ pixman_implementation_t *

>  _pixman_implementation_create_ssse3 (pixman_implementation_t 

> *fallback);  #endif

>

> +#ifdef USE_AVX2

> +pixman_implementation_t *

> +_pixman_implementation_create_avx2 (pixman_implementation_t 

> +*fallback); #endif

> +

>  #ifdef USE_ARM_SIMD

>  pixman_implementation_t *

>  _pixman_implementation_create_arm_simd (pixman_implementation_t 

> *fallback); diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c 

> index 05297c4..687c83b 100644

> --- a/pixman/pixman-x86.c

> +++ b/pixman/pixman-x86.c


At the top of this file there is a preprocessor check:

#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)

I think || defined (USE_AVX2) should be added here.

> @@ -40,7 +40,8 @@ typedef enum

>      X86_SSE                    = (1 << 2) | X86_MMX_EXTENSIONS,

>      X86_SSE2                   = (1 << 3),

>      X86_CMOV                   = (1 << 4),

> -    X86_SSSE3                  = (1 << 5)

> +    X86_SSSE3                  = (1 << 5),

> +    X86_AVX2                   = (1 << 6),


I'm not 100% we can use trailing commas in pixman due to MSVC.
Probably safer just to leave it off.

>  } cpu_features_t;

>

>  #ifdef HAVE_GETISAX

> @@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,

>      __asm__ volatile (

>          "cpuid"                                "\n\t"

>         : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)

> -       : "a" (feature));

> +       : "a" (feature), "c" (0));


Just to make sure I'm understanding: cpuid returns AVX2 presence in bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to ensure ecx is set to 0?

I think that's fine. It seems like ecx isn't required to be any particular value for the other cases. Perhaps a comment would help future readers understand.

>  #else

>      /* On x86-32 we need to be careful about the handling of %ebx

>       * and %esp. We can't declare either one as clobbered @@ -172,6 

> +173,10 @@ detect_cpu_features (void)

>         features |= X86_SSE2;

>      if (c & (1 << 9))

>         features |= X86_SSSE3;

> +


Spurious whitespace

> +    pixman_cpuid (0x07, &a, &b, &c, &d);

> +    if (b & (1 << 5))

> +       features |= X86_AVX2;

>

>      /* Check for AMD specific features */

>      if ((features & X86_MMX) && !(features & X86_SSE)) @@ -228,6 

> +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t 

> *imp)  #define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)  #define 

> SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)  #define 

> SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)

> +#define AVX2_BITS (X86_AVX2)

>

>  #ifdef USE_X86_MMX

>      if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) @@ 

> -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)

>         imp = _pixman_implementation_create_ssse3 (imp);  #endif

>

> +#if (defined USE_AVX2 && defined USE_SSE2)

> +    if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))

> +       imp = _pixman_implementation_create_avx2(imp);

> +#endif

> +

>      return imp;

>  }

> --

> 2.7.4