[2/2] pixman: Use maximum precision for pixman-bits-image

Submitted by Maarten Lankhorst on Jan. 3, 2019, 11:18 a.m.

Details

Message ID 20190103111847.29685-2-maarten.lankhorst@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Pixman

Not browsing as part of any series.

Commit Message

Maarten Lankhorst Jan. 3, 2019, 11:18 a.m.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 pixman/pixman-bits-image.c | 478 ++++++++++++++++++++++++++++++-------
 pixman/pixman-inlines.h    |  25 ++
 2 files changed, 418 insertions(+), 85 deletions(-)

Patch hide | download patch | download mbox

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 9fb91ff5831d..cb1c5ce12d46 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -36,43 +36,45 @@ 
 #include "pixman-combine32.h"
 #include "pixman-inlines.h"
 
-static uint32_t *
-_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
-					  const uint32_t *mask)
-{
-    pixman_iter_get_scanline_t fetch_32 = iter->data;
-    uint32_t *buffer = iter->buffer;
-
-    fetch_32 (iter, NULL);
+/* Fetch functions */
 
-    pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+static force_inline void
+fetch_pixel_no_alpha_32 (bits_image_t *image,
+			 int x, int y, pixman_bool_t check_bounds,
+			 void *out)
+{
+    uint32_t *ret = out;
 
-    return iter->buffer;
+    if (check_bounds &&
+	(x < 0 || x >= image->width || y < 0 || y >= image->height))
+	*ret = 0;
+    else
+	*ret = image->fetch_pixel_32 (image, x, y);
 }
 
-/* Fetch functions */
-
-static force_inline uint32_t
-fetch_pixel_no_alpha (bits_image_t *image,
-		      int x, int y, pixman_bool_t check_bounds)
+static force_inline void
+fetch_pixel_no_alpha_float (bits_image_t *image,
+			    int x, int y, pixman_bool_t check_bounds,
+			    void *out)
 {
+    argb_t *ret = out;
+
     if (check_bounds &&
 	(x < 0 || x >= image->width || y < 0 || y >= image->height))
-    {
-	return 0;
-    }
-
-    return image->fetch_pixel_32 (image, x, y);
+	ret->a = ret->r = ret->g = ret->b = 0.f;
+    else
+	*ret = image->fetch_pixel_float (image, x, y);
 }
 
-typedef uint32_t (* get_pixel_t) (bits_image_t *image,
-				  int x, int y, pixman_bool_t check_bounds);
+typedef void (* get_pixel_t) (bits_image_t *image,
+			      int x, int y, pixman_bool_t check_bounds, void *out);
 
-static force_inline uint32_t
+static force_inline void
 bits_image_fetch_pixel_nearest (bits_image_t   *image,
 				pixman_fixed_t  x,
 				pixman_fixed_t  y,
-				get_pixel_t	get_pixel)
+				get_pixel_t	get_pixel,
+				void	       *out)
 {
     int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
     int y0 = pixman_fixed_to_int (y - pixman_fixed_e);
@@ -82,19 +84,20 @@  bits_image_fetch_pixel_nearest (bits_image_t   *image,
 	repeat (image->common.repeat, &x0, image->width);
 	repeat (image->common.repeat, &y0, image->height);
 
-	return get_pixel (image, x0, y0, FALSE);
+	get_pixel (image, x0, y0, FALSE, out);
     }
     else
     {
-	return get_pixel (image, x0, y0, TRUE);
+	get_pixel (image, x0, y0, TRUE, out);
     }
 }
 
-static force_inline uint32_t
-bits_image_fetch_pixel_bilinear (bits_image_t   *image,
-				 pixman_fixed_t  x,
-				 pixman_fixed_t  y,
-				 get_pixel_t	 get_pixel)
+static force_inline void
+bits_image_fetch_pixel_bilinear_32 (bits_image_t   *image,
+				    pixman_fixed_t  x,
+				    pixman_fixed_t  y,
+				    get_pixel_t	    get_pixel,
+				    void	   *out)
 {
     pixman_repeat_t repeat_mode = image->common.repeat;
     int width = image->width;
@@ -102,6 +105,7 @@  bits_image_fetch_pixel_bilinear (bits_image_t   *image,
     int x1, y1, x2, y2;
     uint32_t tl, tr, bl, br;
     int32_t distx, disty;
+    uint32_t *ret = out;
 
     x1 = x - pixman_fixed_1 / 2;
     y1 = y - pixman_fixed_1 / 2;
@@ -121,27 +125,77 @@  bits_image_fetch_pixel_bilinear (bits_image_t   *image,
 	repeat (repeat_mode, &x2, width);
 	repeat (repeat_mode, &y2, height);
 
-	tl = get_pixel (image, x1, y1, FALSE);
-	bl = get_pixel (image, x1, y2, FALSE);
-	tr = get_pixel (image, x2, y1, FALSE);
-	br = get_pixel (image, x2, y2, FALSE);
+	get_pixel (image, x1, y1, FALSE, &tl);
+	get_pixel (image, x2, y1, FALSE, &tr);
+	get_pixel (image, x1, y2, FALSE, &bl);
+	get_pixel (image, x2, y2, FALSE, &br);
     }
     else
     {
-	tl = get_pixel (image, x1, y1, TRUE);
-	tr = get_pixel (image, x2, y1, TRUE);
-	bl = get_pixel (image, x1, y2, TRUE);
-	br = get_pixel (image, x2, y2, TRUE);
+	get_pixel (image, x1, y1, TRUE, &tl);
+	get_pixel (image, x2, y1, TRUE, &tr);
+	get_pixel (image, x1, y2, TRUE, &bl);
+	get_pixel (image, x2, y2, TRUE, &br);
     }
 
-    return bilinear_interpolation (tl, tr, bl, br, distx, disty);
+    *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty);
 }
 
-static force_inline uint32_t
-bits_image_fetch_pixel_convolution (bits_image_t   *image,
-				    pixman_fixed_t  x,
-				    pixman_fixed_t  y,
-				    get_pixel_t     get_pixel)
+static force_inline void
+bits_image_fetch_pixel_bilinear_float (bits_image_t   *image,
+				       pixman_fixed_t  x,
+				       pixman_fixed_t  y,
+				       get_pixel_t     get_pixel,
+				       void	      *out)
+{
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int x1, y1, x2, y2;
+    argb_t tl, tr, bl, br;
+    float distx, disty;
+    argb_t *ret = out;
+
+    x1 = x - pixman_fixed_1 / 2;
+    y1 = y - pixman_fixed_1 / 2;
+
+    distx = ((float)pixman_fixed_fraction(x1)) / 65536.f;
+    disty = ((float)pixman_fixed_fraction(y1)) / 65536.f;
+
+    x1 = pixman_fixed_to_int (x1);
+    y1 = pixman_fixed_to_int (y1);
+    x2 = x1 + 1;
+    y2 = y1 + 1;
+
+    if (repeat_mode != PIXMAN_REPEAT_NONE)
+    {
+	repeat (repeat_mode, &x1, width);
+	repeat (repeat_mode, &y1, height);
+	repeat (repeat_mode, &x2, width);
+	repeat (repeat_mode, &y2, height);
+
+	get_pixel (image, x1, y1, FALSE, &tl);
+	get_pixel (image, x2, y1, FALSE, &tr);
+	get_pixel (image, x1, y2, FALSE, &bl);
+	get_pixel (image, x2, y2, FALSE, &br);
+    }
+    else
+    {
+	get_pixel (image, x1, y1, TRUE, &tl);
+	get_pixel (image, x2, y1, TRUE, &tr);
+	get_pixel (image, x1, y2, TRUE, &bl);
+	get_pixel (image, x2, y2, TRUE, &br);
+    }
+
+    *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty);
+}
+
+static force_inline void
+bits_image_fetch_pixel_convolution_32 (bits_image_t   *image,
+				       pixman_fixed_t  x,
+				       pixman_fixed_t  y,
+				       get_pixel_t     get_pixel,
+				       void	      *out)
 {
     pixman_fixed_t *params = image->common.filter_params;
     int x_off = (params[0] - pixman_fixed_1) >> 1;
@@ -153,6 +207,7 @@  bits_image_fetch_pixel_convolution (bits_image_t   *image,
     int width = image->width;
     int height = image->height;
     int srtot, sgtot, sbtot, satot;
+    uint32_t *ret = out;
 
     params += 2;
 
@@ -181,11 +236,11 @@  bits_image_fetch_pixel_convolution (bits_image_t   *image,
 		    repeat (repeat_mode, &rx, width);
 		    repeat (repeat_mode, &ry, height);
 
-		    pixel = get_pixel (image, rx, ry, FALSE);
+		    get_pixel (image, rx, ry, FALSE, &pixel);
 		}
 		else
 		{
-		    pixel = get_pixel (image, rx, ry, TRUE);
+		    get_pixel (image, rx, ry, TRUE, &pixel);
 		}
 
 		srtot += (int)RED_8 (pixel) * f;
@@ -208,14 +263,84 @@  bits_image_fetch_pixel_convolution (bits_image_t   *image,
     sgtot = CLIP (sgtot, 0, 0xff);
     sbtot = CLIP (sbtot, 0, 0xff);
 
-    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+}
+
+static force_inline void
+bits_image_fetch_pixel_convolution_float (bits_image_t   *image,
+					  pixman_fixed_t  x,
+					  pixman_fixed_t  y,
+					  get_pixel_t     get_pixel,
+					  void	      *out)
+{
+    pixman_fixed_t *params = image->common.filter_params;
+    int x_off = (params[0] - pixman_fixed_1) >> 1;
+    int y_off = (params[1] - pixman_fixed_1) >> 1;
+    int32_t cwidth = pixman_fixed_to_int (params[0]);
+    int32_t cheight = pixman_fixed_to_int (params[1]);
+    int32_t i, j, x1, x2, y1, y2;
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int srtot, sgtot, sbtot, satot;
+    argb_t *ret = out;
+
+    params += 2;
+
+    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+    x2 = x1 + cwidth;
+    y2 = y1 + cheight;
+
+    srtot = sgtot = sbtot = satot = 0;
+
+    for (i = y1; i < y2; ++i)
+    {
+	for (j = x1; j < x2; ++j)
+	{
+	    int rx = j;
+	    int ry = i;
+
+	    pixman_fixed_t f = *params;
+
+	    if (f)
+	    {
+		argb_t pixel;
+
+		if (repeat_mode != PIXMAN_REPEAT_NONE)
+		{
+		    repeat (repeat_mode, &rx, width);
+		    repeat (repeat_mode, &ry, height);
+
+		    get_pixel (image, rx, ry, FALSE, &pixel);
+		}
+		else
+		{
+		    get_pixel (image, rx, ry, TRUE, &pixel);
+		}
+
+		satot += pixel.a * f;
+		srtot += pixel.r * f;
+		sgtot += pixel.g * f;
+		sbtot += pixel.b * f;
+	    }
+
+	    params++;
+	}
+    }
+
+    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
+    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
+    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
+    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
 }
 
-static uint32_t
-bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
-                                              pixman_fixed_t x,
-                                              pixman_fixed_t y,
-                                              get_pixel_t    get_pixel)
+static void
+bits_image_fetch_pixel_separable_convolution_32 (bits_image_t  *image,
+						 pixman_fixed_t x,
+						 pixman_fixed_t y,
+						 get_pixel_t    get_pixel,
+						 void	       *out)
 {
     pixman_fixed_t *params = image->common.filter_params;
     pixman_repeat_t repeat_mode = image->common.repeat;
@@ -234,6 +359,7 @@  bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
     int32_t x1, x2, y1, y2;
     int32_t px, py;
     int i, j;
+    uint32_t *ret = out;
 
     /* Round x and y to the middle of the closest phase before continuing. This
      * ensures that the convolution matrix is aligned right, since it was
@@ -278,11 +404,11 @@  bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
                         repeat (repeat_mode, &rx, width);
                         repeat (repeat_mode, &ry, height);
 
-                        pixel = get_pixel (image, rx, ry, FALSE);
+                        get_pixel (image, rx, ry, FALSE, &pixel);
                     }
                     else
                     {
-                        pixel = get_pixel (image, rx, ry, TRUE);
+                        get_pixel (image, rx, ry, TRUE, &pixel);
 		    }
 
                     f = (fy * fx + 0x8000) >> 16;
@@ -306,46 +432,153 @@  bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
     sgtot = CLIP (sgtot, 0, 0xff);
     sbtot = CLIP (sbtot, 0, 0xff);
 
-    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
 }
 
-static force_inline uint32_t
-bits_image_fetch_pixel_filtered (bits_image_t *image,
+static void
+bits_image_fetch_pixel_separable_convolution_float (bits_image_t  *image,
+						    pixman_fixed_t x,
+						    pixman_fixed_t y,
+						    get_pixel_t    get_pixel,
+						    void	  *out)
+{
+    pixman_fixed_t *params = image->common.filter_params;
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int cwidth = pixman_fixed_to_int (params[0]);
+    int cheight = pixman_fixed_to_int (params[1]);
+    int x_phase_bits = pixman_fixed_to_int (params[2]);
+    int y_phase_bits = pixman_fixed_to_int (params[3]);
+    int x_phase_shift = 16 - x_phase_bits;
+    int y_phase_shift = 16 - y_phase_bits;
+    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+    pixman_fixed_t *y_params;
+    int srtot, sgtot, sbtot, satot;
+    int32_t x1, x2, y1, y2;
+    int32_t px, py;
+    int i, j;
+    argb_t *ret = out;
+
+    /* Round x and y to the middle of the closest phase before continuing. This
+     * ensures that the convolution matrix is aligned right, since it was
+     * positioned relative to a particular phase (and not relative to whatever
+     * exact fraction we happen to get here).
+     */
+    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+
+    px = (x & 0xffff) >> x_phase_shift;
+    py = (y & 0xffff) >> y_phase_shift;
+
+    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+
+    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+    x2 = x1 + cwidth;
+    y2 = y1 + cheight;
+
+    srtot = sgtot = sbtot = satot = 0;
+
+    for (i = y1; i < y2; ++i)
+    {
+        pixman_fixed_48_16_t fy = *y_params++;
+        pixman_fixed_t *x_params = params + 4 + px * cwidth;
+
+        if (fy)
+        {
+            for (j = x1; j < x2; ++j)
+            {
+                pixman_fixed_t fx = *x_params++;
+		int rx = j;
+		int ry = i;
+
+                if (fx)
+                {
+                    pixman_fixed_t f;
+                    argb_t pixel;
+
+                    if (repeat_mode != PIXMAN_REPEAT_NONE)
+                    {
+                        repeat (repeat_mode, &rx, width);
+                        repeat (repeat_mode, &ry, height);
+
+                        get_pixel (image, rx, ry, FALSE, &pixel);
+                    }
+                    else
+                    {
+                        get_pixel (image, rx, ry, TRUE, &pixel);
+		    }
+
+                    f = (fy * fx + 0x8000) >> 16;
+
+		    satot += pixel.a * f;
+		    srtot += pixel.r * f;
+		    sgtot += pixel.g * f;
+		    sbtot += pixel.b * f;
+                }
+            }
+	}
+    }
+
+    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
+    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
+    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
+    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
+}
+
+static force_inline void
+bits_image_fetch_pixel_filtered (bits_image_t  *image,
+				 pixman_bool_t  wide,
 				 pixman_fixed_t x,
 				 pixman_fixed_t y,
-				 get_pixel_t    get_pixel)
+				 get_pixel_t    get_pixel,
+				 void          *out)
 {
     switch (image->common.filter)
     {
     case PIXMAN_FILTER_NEAREST:
     case PIXMAN_FILTER_FAST:
-	return bits_image_fetch_pixel_nearest (image, x, y, get_pixel);
+	bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out);
 	break;
 
     case PIXMAN_FILTER_BILINEAR:
     case PIXMAN_FILTER_GOOD:
     case PIXMAN_FILTER_BEST:
-	return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel);
+	if (wide)
+	    bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out);
+	else
+	    bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out);
 	break;
 
     case PIXMAN_FILTER_CONVOLUTION:
-	return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
+	if (wide)
+	    bits_image_fetch_pixel_convolution_float (image, x, y,
+						      get_pixel, out);
+	else
+	    bits_image_fetch_pixel_convolution_32 (image, x, y,
+						   get_pixel, out);
 	break;
 
     case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-        return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
+	if (wide)
+	    bits_image_fetch_pixel_separable_convolution_float (image, x, y,
+								get_pixel, out);
+	else
+	    bits_image_fetch_pixel_separable_convolution_32 (image, x, y,
+							     get_pixel, out);
         break;
 
     default:
         break;
     }
-
-    return 0;
 }
 
 static uint32_t *
-bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
-				  const uint32_t * mask)
+__bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
+				    pixman_bool_t    wide,
+				    const uint32_t * mask)
 {
     pixman_image_t *image  = iter->image;
     int             offset = iter->x;
@@ -357,6 +590,8 @@  bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
     pixman_fixed_t ux, uy;
     pixman_vector_t v;
     int i;
+    get_pixel_t get_pixel =
+	wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32;
 
     /* reference point is the center of the pixel */
     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
@@ -384,27 +619,45 @@  bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
     {
 	if (!mask || mask[i])
 	{
-	    buffer[i] = bits_image_fetch_pixel_filtered (
-		&image->bits, x, y, fetch_pixel_no_alpha);
+	    bits_image_fetch_pixel_filtered (
+		&image->bits, wide, x, y, get_pixel, buffer);
 	}
 
 	x += ux;
 	y += uy;
+	buffer += wide ? 4 : 1;
     }
 
-    return buffer;
+    return iter->buffer;
+}
+
+static uint32_t *
+bits_image_fetch_affine_no_alpha_32 (pixman_iter_t  *iter,
+				     const uint32_t *mask)
+{
+    return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask);
+}
+
+static uint32_t *
+bits_image_fetch_affine_no_alpha_float (pixman_iter_t  *iter,
+					const uint32_t *mask)
+{
+    return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask);
 }
 
 /* General fetcher */
-static force_inline uint32_t
-fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds)
+static force_inline void
+fetch_pixel_general_32 (bits_image_t *image,
+			int x, int y, pixman_bool_t check_bounds,
+			void *out)
 {
-    uint32_t pixel;
+    uint32_t pixel, *ret = out;
 
     if (check_bounds &&
 	(x < 0 || x >= image->width || y < 0 || y >= image->height))
     {
-	return 0;
+	*ret = 0;
+	return;
     }
 
     pixel = image->fetch_pixel_32 (image, x, y);
@@ -433,18 +686,59 @@  fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun
 	pixel |= (pixel_a << 24);
     }
 
-    return pixel;
+    *ret = pixel;
+}
+
+static force_inline void
+fetch_pixel_general_float (bits_image_t *image,
+			int x, int y, pixman_bool_t check_bounds,
+			void *out)
+{
+    argb_t *ret = out;
+
+    if (check_bounds &&
+	(x < 0 || x >= image->width || y < 0 || y >= image->height))
+    {
+	ret->a = ret->r = ret->g = ret->b = 0;
+	return;
+    }
+
+    *ret = image->fetch_pixel_float (image, x, y);
+
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	if (x < 0 || x >= image->common.alpha_map->width ||
+	    y < 0 || y >= image->common.alpha_map->height)
+	{
+	    ret->a = 0.f;
+	}
+	else
+	{
+	    argb_t alpha;
+
+	    alpha = image->common.alpha_map->fetch_pixel_float (
+		    image->common.alpha_map, x, y);
+
+	    ret->a = alpha.a;
+	}
+    }
 }
 
 static uint32_t *
-bits_image_fetch_general (pixman_iter_t  *iter,
-			  const uint32_t *mask)
+__bits_image_fetch_general (pixman_iter_t  *iter,
+			    pixman_bool_t wide,
+			    const uint32_t *mask)
 {
     pixman_image_t *image  = iter->image;
     int             offset = iter->x;
     int             line   = iter->y++;
     int             width  = iter->width;
     uint32_t *      buffer = iter->buffer;
+    get_pixel_t     get_pixel =
+	wide ? fetch_pixel_general_float : fetch_pixel_general_32;
 
     pixman_fixed_t x, y, w;
     pixman_fixed_t ux, uy, uw;
@@ -493,16 +787,31 @@  bits_image_fetch_general (pixman_iter_t  *iter,
 		y0 = 0;
 	    }
 
-	    buffer[i] = bits_image_fetch_pixel_filtered (
-		&image->bits, x0, y0, fetch_pixel_general);
+	    bits_image_fetch_pixel_filtered (
+		&image->bits, wide, x0, y0, get_pixel, buffer);
 	}
 
 	x += ux;
 	y += uy;
 	w += uw;
+	buffer += wide ? 4 : 1;
     }
 
-    return buffer;
+    return iter->buffer;
+}
+
+static uint32_t *
+bits_image_fetch_general_32 (pixman_iter_t  *iter,
+			     const uint32_t *mask)
+{
+    return __bits_image_fetch_general(iter, FALSE, mask);
+}
+
+static uint32_t *
+bits_image_fetch_general_float (pixman_iter_t  *iter,
+				const uint32_t *mask)
+{
+    return __bits_image_fetch_general(iter, TRUE, mask);
 }
 
 static void
@@ -703,15 +1012,15 @@  static const fetcher_info_t fetcher_info[] =
     /* Affine, no alpha */
     { PIXMAN_any,
       (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
-      bits_image_fetch_affine_no_alpha,
-      _pixman_image_get_scanline_generic_float
+      bits_image_fetch_affine_no_alpha_32,
+      bits_image_fetch_affine_no_alpha_float,
     },
 
     /* General */
     { PIXMAN_any,
       0,
-      bits_image_fetch_general,
-      _pixman_image_get_scanline_generic_float
+      bits_image_fetch_general_32,
+      bits_image_fetch_general_float,
     },
 
     { PIXMAN_null },
@@ -741,7 +1050,6 @@  _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
 	    }
 	    else
 	    {
-		iter->data = info->get_scanline_32;
 		iter->get_scanline = info->get_scanline_float;
 	    }
 	    return;
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index 1c8441d6dabe..332e208140a0 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -222,6 +222,31 @@  bilinear_interpolation (uint32_t tl, uint32_t tr,
 #endif
 #endif // BILINEAR_INTERPOLATION_BITS <= 4
 
+static force_inline argb_t
+bilinear_interpolation_float (argb_t tl, argb_t tr,
+			      argb_t bl, argb_t br,
+			      float distx, float disty)
+{
+    float distxy, distxiy, distixy, distixiy;
+    argb_t r;
+
+    distxy = distx * disty;
+    distxiy = distx - (1.f - distxy);
+    distixy = (1.f - distx) * disty;
+    distixiy = (1.f - distx) * (1.f - disty);
+
+    r.a = tl.a * distixiy + tr.a * distxiy +
+          bl.a * distixy  + br.a * distxy;
+    r.r = tl.r * distixiy + tr.r * distxiy +
+          bl.r * distixy  + br.r * distxy;
+    r.g = tl.g * distixiy + tr.g * distxiy +
+          bl.g * distixy  + br.g * distxy;
+    r.b = tl.b * distixiy + tr.b * distxiy +
+          bl.b * distixy  + br.b * distxy;
+
+    return r;
+}
+
 /*
  * For each scanline fetched from source image with PAD repeat:
  * - calculate how many pixels need to be padded on the left side

Comments

Oops, missing commit description..

pixman-bits-image's wide helpers first obtains the 8-bits image,
then converts it to float. This destroys all the precision that
the wide path was offering.

Fix this by making get_pixel() take a pointer instead of returning
a value. Floating point will fill in a argb_t, while the 8-bits path
will fill a 32-bits ARGB value. This also requires writing a floating
point bilinear interpolator. With this change pixman can use the full
floating point precision internally in all paths.

Op 03-01-2019 om 12:18 schreef Maarten Lankhorst:
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  pixman/pixman-bits-image.c | 478 ++++++++++++++++++++++++++++++-------
>  pixman/pixman-inlines.h    |  25 ++
>  2 files changed, 418 insertions(+), 85 deletions(-)
>
> diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
> index 9fb91ff5831d..cb1c5ce12d46 100644
> --- a/pixman/pixman-bits-image.c
> +++ b/pixman/pixman-bits-image.c
> @@ -36,43 +36,45 @@
>  #include "pixman-combine32.h"
>  #include "pixman-inlines.h"
>  
> -static uint32_t *
> -_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
> -					  const uint32_t *mask)
> -{
> -    pixman_iter_get_scanline_t fetch_32 = iter->data;
> -    uint32_t *buffer = iter->buffer;
> -
> -    fetch_32 (iter, NULL);
> +/* Fetch functions */
>  
> -    pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
> +static force_inline void
> +fetch_pixel_no_alpha_32 (bits_image_t *image,
> +			 int x, int y, pixman_bool_t check_bounds,
> +			 void *out)
> +{
> +    uint32_t *ret = out;
>  
> -    return iter->buffer;
> +    if (check_bounds &&
> +	(x < 0 || x >= image->width || y < 0 || y >= image->height))
> +	*ret = 0;
> +    else
> +	*ret = image->fetch_pixel_32 (image, x, y);
>  }
>  
> -/* Fetch functions */
> -
> -static force_inline uint32_t
> -fetch_pixel_no_alpha (bits_image_t *image,
> -		      int x, int y, pixman_bool_t check_bounds)
> +static force_inline void
> +fetch_pixel_no_alpha_float (bits_image_t *image,
> +			    int x, int y, pixman_bool_t check_bounds,
> +			    void *out)
>  {
> +    argb_t *ret = out;
> +
>      if (check_bounds &&
>  	(x < 0 || x >= image->width || y < 0 || y >= image->height))
> -    {
> -	return 0;
> -    }
> -
> -    return image->fetch_pixel_32 (image, x, y);
> +	ret->a = ret->r = ret->g = ret->b = 0.f;
> +    else
> +	*ret = image->fetch_pixel_float (image, x, y);
>  }
>  
> -typedef uint32_t (* get_pixel_t) (bits_image_t *image,
> -				  int x, int y, pixman_bool_t check_bounds);
> +typedef void (* get_pixel_t) (bits_image_t *image,
> +			      int x, int y, pixman_bool_t check_bounds, void *out);
>  
> -static force_inline uint32_t
> +static force_inline void
>  bits_image_fetch_pixel_nearest (bits_image_t   *image,
>  				pixman_fixed_t  x,
>  				pixman_fixed_t  y,
> -				get_pixel_t	get_pixel)
> +				get_pixel_t	get_pixel,
> +				void	       *out)
>  {
>      int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
>      int y0 = pixman_fixed_to_int (y - pixman_fixed_e);
> @@ -82,19 +84,20 @@ bits_image_fetch_pixel_nearest (bits_image_t   *image,
>  	repeat (image->common.repeat, &x0, image->width);
>  	repeat (image->common.repeat, &y0, image->height);
>  
> -	return get_pixel (image, x0, y0, FALSE);
> +	get_pixel (image, x0, y0, FALSE, out);
>      }
>      else
>      {
> -	return get_pixel (image, x0, y0, TRUE);
> +	get_pixel (image, x0, y0, TRUE, out);
>      }
>  }
>  
> -static force_inline uint32_t
> -bits_image_fetch_pixel_bilinear (bits_image_t   *image,
> -				 pixman_fixed_t  x,
> -				 pixman_fixed_t  y,
> -				 get_pixel_t	 get_pixel)
> +static force_inline void
> +bits_image_fetch_pixel_bilinear_32 (bits_image_t   *image,
> +				    pixman_fixed_t  x,
> +				    pixman_fixed_t  y,
> +				    get_pixel_t	    get_pixel,
> +				    void	   *out)
>  {
>      pixman_repeat_t repeat_mode = image->common.repeat;
>      int width = image->width;
> @@ -102,6 +105,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
>      int x1, y1, x2, y2;
>      uint32_t tl, tr, bl, br;
>      int32_t distx, disty;
> +    uint32_t *ret = out;
>  
>      x1 = x - pixman_fixed_1 / 2;
>      y1 = y - pixman_fixed_1 / 2;
> @@ -121,27 +125,77 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
>  	repeat (repeat_mode, &x2, width);
>  	repeat (repeat_mode, &y2, height);
>  
> -	tl = get_pixel (image, x1, y1, FALSE);
> -	bl = get_pixel (image, x1, y2, FALSE);
> -	tr = get_pixel (image, x2, y1, FALSE);
> -	br = get_pixel (image, x2, y2, FALSE);
> +	get_pixel (image, x1, y1, FALSE, &tl);
> +	get_pixel (image, x2, y1, FALSE, &tr);
> +	get_pixel (image, x1, y2, FALSE, &bl);
> +	get_pixel (image, x2, y2, FALSE, &br);
>      }
>      else
>      {
> -	tl = get_pixel (image, x1, y1, TRUE);
> -	tr = get_pixel (image, x2, y1, TRUE);
> -	bl = get_pixel (image, x1, y2, TRUE);
> -	br = get_pixel (image, x2, y2, TRUE);
> +	get_pixel (image, x1, y1, TRUE, &tl);
> +	get_pixel (image, x2, y1, TRUE, &tr);
> +	get_pixel (image, x1, y2, TRUE, &bl);
> +	get_pixel (image, x2, y2, TRUE, &br);
>      }
>  
> -    return bilinear_interpolation (tl, tr, bl, br, distx, disty);
> +    *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty);
>  }
>  
> -static force_inline uint32_t
> -bits_image_fetch_pixel_convolution (bits_image_t   *image,
> -				    pixman_fixed_t  x,
> -				    pixman_fixed_t  y,
> -				    get_pixel_t     get_pixel)
> +static force_inline void
> +bits_image_fetch_pixel_bilinear_float (bits_image_t   *image,
> +				       pixman_fixed_t  x,
> +				       pixman_fixed_t  y,
> +				       get_pixel_t     get_pixel,
> +				       void	      *out)
> +{
> +    pixman_repeat_t repeat_mode = image->common.repeat;
> +    int width = image->width;
> +    int height = image->height;
> +    int x1, y1, x2, y2;
> +    argb_t tl, tr, bl, br;
> +    float distx, disty;
> +    argb_t *ret = out;
> +
> +    x1 = x - pixman_fixed_1 / 2;
> +    y1 = y - pixman_fixed_1 / 2;
> +
> +    distx = ((float)pixman_fixed_fraction(x1)) / 65536.f;
> +    disty = ((float)pixman_fixed_fraction(y1)) / 65536.f;
> +
> +    x1 = pixman_fixed_to_int (x1);
> +    y1 = pixman_fixed_to_int (y1);
> +    x2 = x1 + 1;
> +    y2 = y1 + 1;
> +
> +    if (repeat_mode != PIXMAN_REPEAT_NONE)
> +    {
> +	repeat (repeat_mode, &x1, width);
> +	repeat (repeat_mode, &y1, height);
> +	repeat (repeat_mode, &x2, width);
> +	repeat (repeat_mode, &y2, height);
> +
> +	get_pixel (image, x1, y1, FALSE, &tl);
> +	get_pixel (image, x2, y1, FALSE, &tr);
> +	get_pixel (image, x1, y2, FALSE, &bl);
> +	get_pixel (image, x2, y2, FALSE, &br);
> +    }
> +    else
> +    {
> +	get_pixel (image, x1, y1, TRUE, &tl);
> +	get_pixel (image, x2, y1, TRUE, &tr);
> +	get_pixel (image, x1, y2, TRUE, &bl);
> +	get_pixel (image, x2, y2, TRUE, &br);
> +    }
> +
> +    *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty);
> +}
> +
> +static force_inline void
> +bits_image_fetch_pixel_convolution_32 (bits_image_t   *image,
> +				       pixman_fixed_t  x,
> +				       pixman_fixed_t  y,
> +				       get_pixel_t     get_pixel,
> +				       void	      *out)
>  {
>      pixman_fixed_t *params = image->common.filter_params;
>      int x_off = (params[0] - pixman_fixed_1) >> 1;
> @@ -153,6 +207,7 @@ bits_image_fetch_pixel_convolution (bits_image_t   *image,
>      int width = image->width;
>      int height = image->height;
>      int srtot, sgtot, sbtot, satot;
> +    uint32_t *ret = out;
>  
>      params += 2;
>  
> @@ -181,11 +236,11 @@ bits_image_fetch_pixel_convolution (bits_image_t   *image,
>  		    repeat (repeat_mode, &rx, width);
>  		    repeat (repeat_mode, &ry, height);
>  
> -		    pixel = get_pixel (image, rx, ry, FALSE);
> +		    get_pixel (image, rx, ry, FALSE, &pixel);
>  		}
>  		else
>  		{
> -		    pixel = get_pixel (image, rx, ry, TRUE);
> +		    get_pixel (image, rx, ry, TRUE, &pixel);
>  		}
>  
>  		srtot += (int)RED_8 (pixel) * f;
> @@ -208,14 +263,84 @@ bits_image_fetch_pixel_convolution (bits_image_t   *image,
>      sgtot = CLIP (sgtot, 0, 0xff);
>      sbtot = CLIP (sbtot, 0, 0xff);
>  
> -    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
> +    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
> +}
> +
> +static force_inline void
> +bits_image_fetch_pixel_convolution_float (bits_image_t   *image,
> +					  pixman_fixed_t  x,
> +					  pixman_fixed_t  y,
> +					  get_pixel_t     get_pixel,
> +					  void	      *out)
> +{
> +    pixman_fixed_t *params = image->common.filter_params;
> +    int x_off = (params[0] - pixman_fixed_1) >> 1;
> +    int y_off = (params[1] - pixman_fixed_1) >> 1;
> +    int32_t cwidth = pixman_fixed_to_int (params[0]);
> +    int32_t cheight = pixman_fixed_to_int (params[1]);
> +    int32_t i, j, x1, x2, y1, y2;
> +    pixman_repeat_t repeat_mode = image->common.repeat;
> +    int width = image->width;
> +    int height = image->height;
> +    int srtot, sgtot, sbtot, satot;
> +    argb_t *ret = out;
> +
> +    params += 2;
> +
> +    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
> +    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
> +    x2 = x1 + cwidth;
> +    y2 = y1 + cheight;
> +
> +    srtot = sgtot = sbtot = satot = 0;
> +
> +    for (i = y1; i < y2; ++i)
> +    {
> +	for (j = x1; j < x2; ++j)
> +	{
> +	    int rx = j;
> +	    int ry = i;
> +
> +	    pixman_fixed_t f = *params;
> +
> +	    if (f)
> +	    {
> +		argb_t pixel;
> +
> +		if (repeat_mode != PIXMAN_REPEAT_NONE)
> +		{
> +		    repeat (repeat_mode, &rx, width);
> +		    repeat (repeat_mode, &ry, height);
> +
> +		    get_pixel (image, rx, ry, FALSE, &pixel);
> +		}
> +		else
> +		{
> +		    get_pixel (image, rx, ry, TRUE, &pixel);
> +		}
> +
> +		satot += pixel.a * f;
> +		srtot += pixel.r * f;
> +		sgtot += pixel.g * f;
> +		sbtot += pixel.b * f;
> +	    }
> +
> +	    params++;
> +	}
> +    }
> +
> +    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
> +    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
> +    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
> +    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
>  }
>  
> -static uint32_t
> -bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
> -                                              pixman_fixed_t x,
> -                                              pixman_fixed_t y,
> -                                              get_pixel_t    get_pixel)
> +static void
> +bits_image_fetch_pixel_separable_convolution_32 (bits_image_t  *image,
> +						 pixman_fixed_t x,
> +						 pixman_fixed_t y,
> +						 get_pixel_t    get_pixel,
> +						 void	       *out)
>  {
>      pixman_fixed_t *params = image->common.filter_params;
>      pixman_repeat_t repeat_mode = image->common.repeat;
> @@ -234,6 +359,7 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>      int32_t x1, x2, y1, y2;
>      int32_t px, py;
>      int i, j;
> +    uint32_t *ret = out;
>  
>      /* Round x and y to the middle of the closest phase before continuing. This
>       * ensures that the convolution matrix is aligned right, since it was
> @@ -278,11 +404,11 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>                          repeat (repeat_mode, &rx, width);
>                          repeat (repeat_mode, &ry, height);
>  
> -                        pixel = get_pixel (image, rx, ry, FALSE);
> +                        get_pixel (image, rx, ry, FALSE, &pixel);
>                      }
>                      else
>                      {
> -                        pixel = get_pixel (image, rx, ry, TRUE);
> +                        get_pixel (image, rx, ry, TRUE, &pixel);
>  		    }
>  
>                      f = (fy * fx + 0x8000) >> 16;
> @@ -306,46 +432,153 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>      sgtot = CLIP (sgtot, 0, 0xff);
>      sbtot = CLIP (sbtot, 0, 0xff);
>  
> -    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
> +    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
>  }
>  
> -static force_inline uint32_t
> -bits_image_fetch_pixel_filtered (bits_image_t *image,
> +static void
> +bits_image_fetch_pixel_separable_convolution_float (bits_image_t  *image,
> +						    pixman_fixed_t x,
> +						    pixman_fixed_t y,
> +						    get_pixel_t    get_pixel,
> +						    void	  *out)
> +{
> +    pixman_fixed_t *params = image->common.filter_params;
> +    pixman_repeat_t repeat_mode = image->common.repeat;
> +    int width = image->width;
> +    int height = image->height;
> +    int cwidth = pixman_fixed_to_int (params[0]);
> +    int cheight = pixman_fixed_to_int (params[1]);
> +    int x_phase_bits = pixman_fixed_to_int (params[2]);
> +    int y_phase_bits = pixman_fixed_to_int (params[3]);
> +    int x_phase_shift = 16 - x_phase_bits;
> +    int y_phase_shift = 16 - y_phase_bits;
> +    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
> +    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
> +    pixman_fixed_t *y_params;
> +    int srtot, sgtot, sbtot, satot;
> +    int32_t x1, x2, y1, y2;
> +    int32_t px, py;
> +    int i, j;
> +    argb_t *ret = out;
> +
> +    /* Round x and y to the middle of the closest phase before continuing. This
> +     * ensures that the convolution matrix is aligned right, since it was
> +     * positioned relative to a particular phase (and not relative to whatever
> +     * exact fraction we happen to get here).
> +     */
> +    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
> +    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
> +
> +    px = (x & 0xffff) >> x_phase_shift;
> +    py = (y & 0xffff) >> y_phase_shift;
> +
> +    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
> +
> +    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
> +    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
> +    x2 = x1 + cwidth;
> +    y2 = y1 + cheight;
> +
> +    srtot = sgtot = sbtot = satot = 0;
> +
> +    for (i = y1; i < y2; ++i)
> +    {
> +        pixman_fixed_48_16_t fy = *y_params++;
> +        pixman_fixed_t *x_params = params + 4 + px * cwidth;
> +
> +        if (fy)
> +        {
> +            for (j = x1; j < x2; ++j)
> +            {
> +                pixman_fixed_t fx = *x_params++;
> +		int rx = j;
> +		int ry = i;
> +
> +                if (fx)
> +                {
> +                    pixman_fixed_t f;
> +                    argb_t pixel;
> +
> +                    if (repeat_mode != PIXMAN_REPEAT_NONE)
> +                    {
> +                        repeat (repeat_mode, &rx, width);
> +                        repeat (repeat_mode, &ry, height);
> +
> +                        get_pixel (image, rx, ry, FALSE, &pixel);
> +                    }
> +                    else
> +                    {
> +                        get_pixel (image, rx, ry, TRUE, &pixel);
> +		    }
> +
> +                    f = (fy * fx + 0x8000) >> 16;
> +
> +		    satot += pixel.a * f;
> +		    srtot += pixel.r * f;
> +		    sgtot += pixel.g * f;
> +		    sbtot += pixel.b * f;
> +                }
> +            }
> +	}
> +    }
> +
> +    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
> +    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
> +    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
> +    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
> +}
> +
> +static force_inline void
> +bits_image_fetch_pixel_filtered (bits_image_t  *image,
> +				 pixman_bool_t  wide,
>  				 pixman_fixed_t x,
>  				 pixman_fixed_t y,
> -				 get_pixel_t    get_pixel)
> +				 get_pixel_t    get_pixel,
> +				 void          *out)
>  {
>      switch (image->common.filter)
>      {
>      case PIXMAN_FILTER_NEAREST:
>      case PIXMAN_FILTER_FAST:
> -	return bits_image_fetch_pixel_nearest (image, x, y, get_pixel);
> +	bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_BILINEAR:
>      case PIXMAN_FILTER_GOOD:
>      case PIXMAN_FILTER_BEST:
> -	return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_CONVOLUTION:
> -	return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_convolution_float (image, x, y,
> +						      get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_convolution_32 (image, x, y,
> +						   get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
> -        return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_separable_convolution_float (image, x, y,
> +								get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_separable_convolution_32 (image, x, y,
> +							     get_pixel, out);
>          break;
>  
>      default:
>          break;
>      }
> -
> -    return 0;
>  }
>  
>  static uint32_t *
> -bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
> -				  const uint32_t * mask)
> +__bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
> +				    pixman_bool_t    wide,
> +				    const uint32_t * mask)
>  {
>      pixman_image_t *image  = iter->image;
>      int             offset = iter->x;
> @@ -357,6 +590,8 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
>      pixman_fixed_t ux, uy;
>      pixman_vector_t v;
>      int i;
> +    get_pixel_t get_pixel =
> +	wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32;
>  
>      /* reference point is the center of the pixel */
>      v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
> @@ -384,27 +619,45 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
>      {
>  	if (!mask || mask[i])
>  	{
> -	    buffer[i] = bits_image_fetch_pixel_filtered (
> -		&image->bits, x, y, fetch_pixel_no_alpha);
> +	    bits_image_fetch_pixel_filtered (
> +		&image->bits, wide, x, y, get_pixel, buffer);
>  	}
>  
>  	x += ux;
>  	y += uy;
> +	buffer += wide ? 4 : 1;
>      }
>  
> -    return buffer;
> +    return iter->buffer;
> +}
> +
> +static uint32_t *
> +bits_image_fetch_affine_no_alpha_32 (pixman_iter_t  *iter,
> +				     const uint32_t *mask)
> +{
> +    return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask);
> +}
> +
> +static uint32_t *
> +bits_image_fetch_affine_no_alpha_float (pixman_iter_t  *iter,
> +					const uint32_t *mask)
> +{
> +    return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask);
>  }
>  
>  /* General fetcher */
> -static force_inline uint32_t
> -fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds)
> +static force_inline void
> +fetch_pixel_general_32 (bits_image_t *image,
> +			int x, int y, pixman_bool_t check_bounds,
> +			void *out)
>  {
> -    uint32_t pixel;
> +    uint32_t pixel, *ret = out;
>  
>      if (check_bounds &&
>  	(x < 0 || x >= image->width || y < 0 || y >= image->height))
>      {
> -	return 0;
> +	*ret = 0;
> +	return;
>      }
>  
>      pixel = image->fetch_pixel_32 (image, x, y);
> @@ -433,18 +686,59 @@ fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun
>  	pixel |= (pixel_a << 24);
>      }
>  
> -    return pixel;
> +    *ret = pixel;
> +}
> +
> +static force_inline void
> +fetch_pixel_general_float (bits_image_t *image,
> +			int x, int y, pixman_bool_t check_bounds,
> +			void *out)
> +{
> +    argb_t *ret = out;
> +
> +    if (check_bounds &&
> +	(x < 0 || x >= image->width || y < 0 || y >= image->height))
> +    {
> +	ret->a = ret->r = ret->g = ret->b = 0;
> +	return;
> +    }
> +
> +    *ret = image->fetch_pixel_float (image, x, y);
> +
> +    if (image->common.alpha_map)
> +    {
> +	x -= image->common.alpha_origin_x;
> +	y -= image->common.alpha_origin_y;
> +
> +	if (x < 0 || x >= image->common.alpha_map->width ||
> +	    y < 0 || y >= image->common.alpha_map->height)
> +	{
> +	    ret->a = 0.f;
> +	}
> +	else
> +	{
> +	    argb_t alpha;
> +
> +	    alpha = image->common.alpha_map->fetch_pixel_float (
> +		    image->common.alpha_map, x, y);
> +
> +	    ret->a = alpha.a;
> +	}
> +    }
>  }
>  
>  static uint32_t *
> -bits_image_fetch_general (pixman_iter_t  *iter,
> -			  const uint32_t *mask)
> +__bits_image_fetch_general (pixman_iter_t  *iter,
> +			    pixman_bool_t wide,
> +			    const uint32_t *mask)
>  {
>      pixman_image_t *image  = iter->image;
>      int             offset = iter->x;
>      int             line   = iter->y++;
>      int             width  = iter->width;
>      uint32_t *      buffer = iter->buffer;
> +    get_pixel_t     get_pixel =
> +	wide ? fetch_pixel_general_float : fetch_pixel_general_32;
>  
>      pixman_fixed_t x, y, w;
>      pixman_fixed_t ux, uy, uw;
> @@ -493,16 +787,31 @@ bits_image_fetch_general (pixman_iter_t  *iter,
>  		y0 = 0;
>  	    }
>  
> -	    buffer[i] = bits_image_fetch_pixel_filtered (
> -		&image->bits, x0, y0, fetch_pixel_general);
> +	    bits_image_fetch_pixel_filtered (
> +		&image->bits, wide, x0, y0, get_pixel, buffer);
>  	}
>  
>  	x += ux;
>  	y += uy;
>  	w += uw;
> +	buffer += wide ? 4 : 1;
>      }
>  
> -    return buffer;
> +    return iter->buffer;
> +}
> +
> +static uint32_t *
> +bits_image_fetch_general_32 (pixman_iter_t  *iter,
> +			     const uint32_t *mask)
> +{
> +    return __bits_image_fetch_general(iter, FALSE, mask);
> +}
> +
> +static uint32_t *
> +bits_image_fetch_general_float (pixman_iter_t  *iter,
> +				const uint32_t *mask)
> +{
> +    return __bits_image_fetch_general(iter, TRUE, mask);
>  }
>  
>  static void
> @@ -703,15 +1012,15 @@ static const fetcher_info_t fetcher_info[] =
>      /* Affine, no alpha */
>      { PIXMAN_any,
>        (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
> -      bits_image_fetch_affine_no_alpha,
> -      _pixman_image_get_scanline_generic_float
> +      bits_image_fetch_affine_no_alpha_32,
> +      bits_image_fetch_affine_no_alpha_float,
>      },
>  
>      /* General */
>      { PIXMAN_any,
>        0,
> -      bits_image_fetch_general,
> -      _pixman_image_get_scanline_generic_float
> +      bits_image_fetch_general_32,
> +      bits_image_fetch_general_float,
>      },
>  
>      { PIXMAN_null },
> @@ -741,7 +1050,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
>  	    }
>  	    else
>  	    {
> -		iter->data = info->get_scanline_32;
>  		iter->get_scanline = info->get_scanline_float;
>  	    }
>  	    return;
> diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
> index 1c8441d6dabe..332e208140a0 100644
> --- a/pixman/pixman-inlines.h
> +++ b/pixman/pixman-inlines.h
> @@ -222,6 +222,31 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
>  #endif
>  #endif // BILINEAR_INTERPOLATION_BITS <= 4
>  
> +static force_inline argb_t
> +bilinear_interpolation_float (argb_t tl, argb_t tr,
> +			      argb_t bl, argb_t br,
> +			      float distx, float disty)
> +{
> +    float distxy, distxiy, distixy, distixiy;
> +    argb_t r;
> +
> +    distxy = distx * disty;
> +    distxiy = distx - (1.f - distxy);
> +    distixy = (1.f - distx) * disty;
> +    distixiy = (1.f - distx) * (1.f - disty);
> +
> +    r.a = tl.a * distixiy + tr.a * distxiy +
> +          bl.a * distixy  + br.a * distxy;
> +    r.r = tl.r * distixiy + tr.r * distxiy +
> +          bl.r * distixy  + br.r * distxy;
> +    r.g = tl.g * distixiy + tr.g * distxiy +
> +          bl.g * distixy  + br.g * distxy;
> +    r.b = tl.b * distixiy + tr.b * distxiy +
> +          bl.b * distixy  + br.b * distxy;
> +
> +    return r;
> +}
> +
>  /*
>   * For each scanline fetched from source image with PAD repeat:
>   * - calculate how many pixels need to be padded on the left side
I am far from qualified for doing a proper review on this, but this
looks reasonable overall.  I only have a few concerns outlined below.

Is there a way to visually test this?  Adding an example in demos/ would
be great, and would help ensuring there are no scaling issues in the
conv/sepconv filters (see inline comments below).

I am also a bit concerned by the duplication in the convolution and
separable_convolution functions.  I understand why it's there, but
(unlike the bilinear case) those functions are quite large by themselves
already.  If not too complex, I'd merge them by using type-erased
`accumulate(int *satot, int *srtot, int *sgtot, int *sbtot, void *pixel,
pixman_fixed_t params)` and `reduce(int *satot, int *srtot, int *sgtot,
int *sbtot, void *out)` argument functions.  They should get inlined by
the compiler anyways.

- Basile

On 1/3/19 12:18 PM, Maarten Lankhorst wrote:
> +static force_inline void
> +bits_image_fetch_pixel_convolution_float (bits_image_t   *image,
> +					  pixman_fixed_t  x,
> +					  pixman_fixed_t  y,
> +					  get_pixel_t     get_pixel,
> +					  void	      *out)
> +{
> +    pixman_fixed_t *params = image->common.filter_params;
> +    int x_off = (params[0] - pixman_fixed_1) >> 1;
> +    int y_off = (params[1] - pixman_fixed_1) >> 1;
> +    int32_t cwidth = pixman_fixed_to_int (params[0]);
> +    int32_t cheight = pixman_fixed_to_int (params[1]);
> +    int32_t i, j, x1, x2, y1, y2;
> +    pixman_repeat_t repeat_mode = image->common.repeat;
> +    int width = image->width;
> +    int height = image->height;
> +    int srtot, sgtot, sbtot, satot;
> +    argb_t *ret = out;
> +
> +    params += 2;
> +
> +    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
> +    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
> +    x2 = x1 + cwidth;
> +    y2 = y1 + cheight;
> +
> +    srtot = sgtot = sbtot = satot = 0;
> +
> +    for (i = y1; i < y2; ++i)
> +    {
> +	for (j = x1; j < x2; ++j)
> +	{
> +	    int rx = j;
> +	    int ry = i;
> +
> +	    pixman_fixed_t f = *params;
> +
> +	    if (f)
> +	    {
> +		argb_t pixel;
> +
> +		if (repeat_mode != PIXMAN_REPEAT_NONE)
> +		{
> +		    repeat (repeat_mode, &rx, width);
> +		    repeat (repeat_mode, &ry, height);
> +
> +		    get_pixel (image, rx, ry, FALSE, &pixel);
> +		}
> +		else
> +		{
> +		    get_pixel (image, rx, ry, TRUE, &pixel);
> +		}
> +
> +		satot += pixel.a * f;
> +		srtot += pixel.r * f;
> +		sgtot += pixel.g * f;
> +		sbtot += pixel.b * f;

I am concerned with those lines (and the corresponding lines in
separable_convolution).  Unless I am mistaken, `sXtot` and `f` are
integer variables while `pixel.X` are floating point variables in 0-1,
so it looks like more truncation than wanted may be occuring here.

> +	    }
> +
> +	    params++;
> +	}
> +    }
> +
> +    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
> +    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
> +    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
> +    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
>  }
>  
> -static uint32_t
> -bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
> -                                              pixman_fixed_t x,
> -                                              pixman_fixed_t y,
> -                                              get_pixel_t    get_pixel)
> +static void
> +bits_image_fetch_pixel_separable_convolution_32 (bits_image_t  *image,
> +						 pixman_fixed_t x,
> +						 pixman_fixed_t y,
> +						 get_pixel_t    get_pixel,
> +						 void	       *out)
>  {
>      pixman_fixed_t *params = image->common.filter_params;
>      pixman_repeat_t repeat_mode = image->common.repeat;
> @@ -234,6 +359,7 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>      int32_t x1, x2, y1, y2;
>      int32_t px, py;
>      int i, j;
> +    uint32_t *ret = out;
>  
>      /* Round x and y to the middle of the closest phase before continuing. This
>       * ensures that the convolution matrix is aligned right, since it was
> @@ -278,11 +404,11 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>                          repeat (repeat_mode, &rx, width);
>                          repeat (repeat_mode, &ry, height);
>  
> -                        pixel = get_pixel (image, rx, ry, FALSE);
> +                        get_pixel (image, rx, ry, FALSE, &pixel);
>                      }
>                      else
>                      {
> -                        pixel = get_pixel (image, rx, ry, TRUE);
> +                        get_pixel (image, rx, ry, TRUE, &pixel);
>  		    }
>  
>                      f = (fy * fx + 0x8000) >> 16;
> @@ -306,46 +432,153 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
>      sgtot = CLIP (sgtot, 0, 0xff);
>      sbtot = CLIP (sbtot, 0, 0xff);
>  
> -    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
> +    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
>  }
>  
> -static force_inline uint32_t
> -bits_image_fetch_pixel_filtered (bits_image_t *image,
> +static void
> +bits_image_fetch_pixel_separable_convolution_float (bits_image_t  *image,
> +						    pixman_fixed_t x,
> +						    pixman_fixed_t y,
> +						    get_pixel_t    get_pixel,
> +						    void	  *out)
> +{
> +    pixman_fixed_t *params = image->common.filter_params;
> +    pixman_repeat_t repeat_mode = image->common.repeat;
> +    int width = image->width;
> +    int height = image->height;
> +    int cwidth = pixman_fixed_to_int (params[0]);
> +    int cheight = pixman_fixed_to_int (params[1]);
> +    int x_phase_bits = pixman_fixed_to_int (params[2]);
> +    int y_phase_bits = pixman_fixed_to_int (params[3]);
> +    int x_phase_shift = 16 - x_phase_bits;
> +    int y_phase_shift = 16 - y_phase_bits;
> +    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
> +    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
> +    pixman_fixed_t *y_params;
> +    int srtot, sgtot, sbtot, satot;
> +    int32_t x1, x2, y1, y2;
> +    int32_t px, py;
> +    int i, j;
> +    argb_t *ret = out;
> +
> +    /* Round x and y to the middle of the closest phase before continuing. This
> +     * ensures that the convolution matrix is aligned right, since it was
> +     * positioned relative to a particular phase (and not relative to whatever
> +     * exact fraction we happen to get here).
> +     */
> +    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
> +    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
> +
> +    px = (x & 0xffff) >> x_phase_shift;
> +    py = (y & 0xffff) >> y_phase_shift;
> +
> +    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
> +
> +    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
> +    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
> +    x2 = x1 + cwidth;
> +    y2 = y1 + cheight;
> +
> +    srtot = sgtot = sbtot = satot = 0;
> +
> +    for (i = y1; i < y2; ++i)
> +    {
> +        pixman_fixed_48_16_t fy = *y_params++;
> +        pixman_fixed_t *x_params = params + 4 + px * cwidth;
> +
> +        if (fy)
> +        {
> +            for (j = x1; j < x2; ++j)
> +            {
> +                pixman_fixed_t fx = *x_params++;
> +		int rx = j;
> +		int ry = i;
> +
> +                if (fx)
> +                {
> +                    pixman_fixed_t f;
> +                    argb_t pixel;
> +
> +                    if (repeat_mode != PIXMAN_REPEAT_NONE)
> +                    {
> +                        repeat (repeat_mode, &rx, width);
> +                        repeat (repeat_mode, &ry, height);
> +
> +                        get_pixel (image, rx, ry, FALSE, &pixel);
> +                    }
> +                    else
> +                    {
> +                        get_pixel (image, rx, ry, TRUE, &pixel);
> +		    }
> +
> +                    f = (fy * fx + 0x8000) >> 16;
> +
> +		    satot += pixel.a * f;
> +		    srtot += pixel.r * f;
> +		    sgtot += pixel.g * f;
> +		    sbtot += pixel.b * f;
> +                }
> +            }
> +	}
> +    }
> +
> +    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
> +    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
> +    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
> +    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
> +}
> +
> +static force_inline void
> +bits_image_fetch_pixel_filtered (bits_image_t  *image,
> +				 pixman_bool_t  wide,
>  				 pixman_fixed_t x,
>  				 pixman_fixed_t y,
> -				 get_pixel_t    get_pixel)
> +				 get_pixel_t    get_pixel,
> +				 void          *out)
>  {
>      switch (image->common.filter)
>      {
>      case PIXMAN_FILTER_NEAREST:
>      case PIXMAN_FILTER_FAST:
> -	return bits_image_fetch_pixel_nearest (image, x, y, get_pixel);
> +	bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_BILINEAR:
>      case PIXMAN_FILTER_GOOD:
>      case PIXMAN_FILTER_BEST:
> -	return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_CONVOLUTION:
> -	return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_convolution_float (image, x, y,
> +						      get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_convolution_32 (image, x, y,
> +						   get_pixel, out);
>  	break;
>  
>      case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
> -        return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
> +	if (wide)
> +	    bits_image_fetch_pixel_separable_convolution_float (image, x, y,
> +								get_pixel, out);
> +	else
> +	    bits_image_fetch_pixel_separable_convolution_32 (image, x, y,
> +							     get_pixel, out);
>          break;
>  
>      default:
>          break;
>      }
> -
> -    return 0;
>  }
>  
>  static uint32_t *
> -bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
> -				  const uint32_t * mask)
> +__bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
> +				    pixman_bool_t    wide,
> +				    const uint32_t * mask)
>  {
>      pixman_image_t *image  = iter->image;
>      int             offset = iter->x;
> @@ -357,6 +590,8 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
>      pixman_fixed_t ux, uy;
>      pixman_vector_t v;
>      int i;
> +    get_pixel_t get_pixel =
> +	wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32;
>  
>      /* reference point is the center of the pixel */
>      v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
> @@ -384,27 +619,45 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
>      {
>  	if (!mask || mask[i])
>  	{
> -	    buffer[i] = bits_image_fetch_pixel_filtered (
> -		&image->bits, x, y, fetch_pixel_no_alpha);
> +	    bits_image_fetch_pixel_filtered (
> +		&image->bits, wide, x, y, get_pixel, buffer);
>  	}
>  
>  	x += ux;
>  	y += uy;
> +	buffer += wide ? 4 : 1;
>      }
>  
> -    return buffer;
> +    return iter->buffer;
> +}
> +
> +static uint32_t *
> +bits_image_fetch_affine_no_alpha_32 (pixman_iter_t  *iter,
> +				     const uint32_t *mask)
> +{
> +    return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask);
> +}
> +
> +static uint32_t *
> +bits_image_fetch_affine_no_alpha_float (pixman_iter_t  *iter,
> +					const uint32_t *mask)
> +{
> +    return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask);
>  }
>  
>  /* General fetcher */
> -static force_inline uint32_t
> -fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds)
> +static force_inline void
> +fetch_pixel_general_32 (bits_image_t *image,
> +			int x, int y, pixman_bool_t check_bounds,
> +			void *out)
>  {
> -    uint32_t pixel;
> +    uint32_t pixel, *ret = out;
>  
>      if (check_bounds &&
>  	(x < 0 || x >= image->width || y < 0 || y >= image->height))
>      {
> -	return 0;
> +	*ret = 0;
> +	return;
>      }
>  
>      pixel = image->fetch_pixel_32 (image, x, y);
> @@ -433,18 +686,59 @@ fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun
>  	pixel |= (pixel_a << 24);
>      }
>  
> -    return pixel;
> +    *ret = pixel;
> +}
> +
> +static force_inline void
> +fetch_pixel_general_float (bits_image_t *image,
> +			int x, int y, pixman_bool_t check_bounds,
> +			void *out)
> +{
> +    argb_t *ret = out;
> +
> +    if (check_bounds &&
> +	(x < 0 || x >= image->width || y < 0 || y >= image->height))
> +    {
> +	ret->a = ret->r = ret->g = ret->b = 0;
> +	return;
> +    }
> +
> +    *ret = image->fetch_pixel_float (image, x, y);
> +
> +    if (image->common.alpha_map)
> +    {
> +	x -= image->common.alpha_origin_x;
> +	y -= image->common.alpha_origin_y;
> +
> +	if (x < 0 || x >= image->common.alpha_map->width ||
> +	    y < 0 || y >= image->common.alpha_map->height)
> +	{
> +	    ret->a = 0.f;
> +	}
> +	else
> +	{
> +	    argb_t alpha;
> +
> +	    alpha = image->common.alpha_map->fetch_pixel_float (
> +		    image->common.alpha_map, x, y);
> +
> +	    ret->a = alpha.a;
> +	}
> +    }
>  }
>  
>  static uint32_t *
> -bits_image_fetch_general (pixman_iter_t  *iter,
> -			  const uint32_t *mask)
> +__bits_image_fetch_general (pixman_iter_t  *iter,
> +			    pixman_bool_t wide,
> +			    const uint32_t *mask)
>  {
>      pixman_image_t *image  = iter->image;
>      int             offset = iter->x;
>      int             line   = iter->y++;
>      int             width  = iter->width;
>      uint32_t *      buffer = iter->buffer;
> +    get_pixel_t     get_pixel =
> +	wide ? fetch_pixel_general_float : fetch_pixel_general_32;
>  
>      pixman_fixed_t x, y, w;
>      pixman_fixed_t ux, uy, uw;
> @@ -493,16 +787,31 @@ bits_image_fetch_general (pixman_iter_t  *iter,
>  		y0 = 0;
>  	    }
>  
> -	    buffer[i] = bits_image_fetch_pixel_filtered (
> -		&image->bits, x0, y0, fetch_pixel_general);
> +	    bits_image_fetch_pixel_filtered (
> +		&image->bits, wide, x0, y0, get_pixel, buffer);
>  	}
>  
>  	x += ux;
>  	y += uy;
>  	w += uw;
> +	buffer += wide ? 4 : 1;
>      }
>  
> -    return buffer;
> +    return iter->buffer;
> +}
> +
> +static uint32_t *
> +bits_image_fetch_general_32 (pixman_iter_t  *iter,
> +			     const uint32_t *mask)
> +{
> +    return __bits_image_fetch_general(iter, FALSE, mask);
> +}
> +
> +static uint32_t *
> +bits_image_fetch_general_float (pixman_iter_t  *iter,
> +				const uint32_t *mask)
> +{
> +    return __bits_image_fetch_general(iter, TRUE, mask);
>  }
>  
>  static void
> @@ -703,15 +1012,15 @@ static const fetcher_info_t fetcher_info[] =
>      /* Affine, no alpha */
>      { PIXMAN_any,
>        (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
> -      bits_image_fetch_affine_no_alpha,
> -      _pixman_image_get_scanline_generic_float
> +      bits_image_fetch_affine_no_alpha_32,
> +      bits_image_fetch_affine_no_alpha_float,
>      },
>  
>      /* General */
>      { PIXMAN_any,
>        0,
> -      bits_image_fetch_general,
> -      _pixman_image_get_scanline_generic_float
> +      bits_image_fetch_general_32,
> +      bits_image_fetch_general_float,
>      },
>  
>      { PIXMAN_null },
> @@ -741,7 +1050,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
>  	    }
>  	    else
>  	    {
> -		iter->data = info->get_scanline_32;
>  		iter->get_scanline = info->get_scanline_float;
>  	    }
>  	    return;
> diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
> index 1c8441d6dabe..332e208140a0 100644
> --- a/pixman/pixman-inlines.h
> +++ b/pixman/pixman-inlines.h
> @@ -222,6 +222,31 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
>  #endif
>  #endif // BILINEAR_INTERPOLATION_BITS <= 4
>  
> +static force_inline argb_t
> +bilinear_interpolation_float (argb_t tl, argb_t tr,
> +			      argb_t bl, argb_t br,
> +			      float distx, float disty)
> +{
> +    float distxy, distxiy, distixy, distixiy;
> +    argb_t r;
> +
> +    distxy = distx * disty;
> +    distxiy = distx - (1.f - distxy);
> +    distixy = (1.f - distx) * disty;
> +    distixiy = (1.f - distx) * (1.f - disty);
> +
> +    r.a = tl.a * distixiy + tr.a * distxiy +
> +          bl.a * distixy  + br.a * distxy;
> +    r.r = tl.r * distixiy + tr.r * distxiy +
> +          bl.r * distixy  + br.r * distxy;
> +    r.g = tl.g * distixiy + tr.g * distxiy +
> +          bl.g * distixy  + br.g * distxy;
> +    r.b = tl.b * distixiy + tr.b * distxiy +
> +          bl.b * distixy  + br.b * distxy;
> +
> +    return r;
> +}
> +
>  /*
>   * For each scanline fetched from source image with PAD repeat:
>   * - calculate how many pixels need to be padded on the left side