[Spice-devel,12/12] qxl-wddm-dod: Non-forced memory allocations with VSync

Submitted by Yuri Benditovich on March 12, 2017, 8:45 a.m.

Details

Message ID 1489308309-9728-13-git-send-email-yuri.benditovich@daynix.com
State New
Headers show
Series "Set of patches for further support of VSync" ( rev: 1 ) in Spice

Not browsing as part of any series.

Commit Message

Yuri Benditovich March 12, 2017, 8:45 a.m.
In case of VSync is active allocate bitmaps for drawable objects
using non-forced requests. If immediate allocation is not possible,
place entire bitmap into memory chunk allocated from the OS.
If bitmap is allocated from device memory, but one of later
chunks can't be allocated, allocate this and further chunks from
OS memory. All these 'delayed' allocations placed into linked list
which root entry is part of QXLOutput structure.
From separated thread, before send drawable objects down, we review
the list of delayed chunks and allocate device memory (forced) to
all of them.
The cost is 2 pointers added to each drawable or cursor object.
Cursor commands currently do not make use of them; in future
we have an option to offload also cursor commands.

Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
---
 qxldod/QxlDod.cpp | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 qxldod/QxlDod.h   |   3 ++
 2 files changed, 108 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
index efc1fb3..0061d4c 100755
--- a/qxldod/QxlDod.cpp
+++ b/qxldod/QxlDod.cpp
@@ -4263,6 +4263,13 @@  void QxlDevice::DrawableAddRes(QXLDrawable *drawable, Resource *res)
     AddRes(output, res);
 }
 
+static FORCEINLINE PLIST_ENTRY DelayedList(QXLDrawable *pd)
+{
+    QXLOutput *output;
+    output = (QXLOutput *)((UINT8 *)pd - sizeof(QXLOutput));
+    return &output->list;
+}
+
 void QxlDevice::CursorCmdAddRes(QXLCursorCmd *cmd, Resource *res)
 {
     PAGED_CODE();
@@ -4371,6 +4378,7 @@  QXLDrawable *QxlDevice::Drawable(UINT8 type, CONST RECT *area, CONST RECT *clip,
     drawable->surfaces_dest[1] = - 1;
     drawable->surfaces_dest[2] = -1;
     CopyRect(&drawable->bbox, area);
+    InitializeListHead(DelayedList(drawable));
 
     if (!SetClip(clip, drawable)) {
         DbgPrint(TRACE_LEVEL_VERBOSE, ("%s: set clip failed\n", __FUNCTION__));
@@ -4464,7 +4472,7 @@  BOOLEAN QxlDevice::AttachNewBitmap(QXLDrawable *drawable, UINT8 *src, UINT8 *src
     Resource *image_res;
     InternalImage *internal;
     QXLDataChunk *chunk;
-    PLIST_ENTRY pDelayedList = NULL;
+    PLIST_ENTRY pDelayedList = bForce ? NULL : DelayedList(drawable);
     UINT8* dest, *dest_end;
 
     height = drawable->u.copy.src_area.bottom;
@@ -4544,8 +4552,17 @@  BOOLEAN QxlDevice::AttachNewBitmap(QXLDrawable *drawable, UINT8 *src, UINT8 *src
         BOOLEAN b = PutBytesAlign(&chunk, &dest, &dest_end, src, line_size, alloc_size, pDelayedList);
         if (!b)
         {
-            DbgPrint(TRACE_LEVEL_WARNING, ("%s: aborting copy of lines\n", __FUNCTION__));
-            return FALSE;
+            if (pitch < 0 && bForce)
+            {
+                DbgPrint(TRACE_LEVEL_WARNING, ("%s: aborting copy of lines (forced)\n", __FUNCTION__));
+                return FALSE;
+            }
+            else
+            {
+                DbgPrint(TRACE_LEVEL_WARNING, ("%s: unexpected aborting copy of lines (force %d, pitch %d)\n", __FUNCTION__, bForce, pitch));
+                return FALSE;
+            }
+            break;
         }
     }
     return TRUE;
@@ -4599,7 +4616,7 @@  QXLDrawable *QxlDevice::BltBits (
     UINT8* src_end = src - pSrc->Pitch;
     src += pSrc->Pitch * (height - 1);
 
-    if (!AttachNewBitmap(drawable, src, src_end, (INT)pSrc->Pitch, TRUE))
+    if (!AttachNewBitmap(drawable, src, src_end, (INT)pSrc->Pitch, !g_bSupportVSync))
     {
         ReleaseOutputUnderLock(drawable->release_info.id);
         drawable = NULL;
@@ -5239,11 +5256,81 @@  void QxlDevice::StopPresentThread()
     }
 }
 
+QXLDataChunk *QxlDevice::MakeChunk(QXL_DELAYED_CHUNK *pdc)
+{
+    QXLDataChunk *chunk = (QXLDataChunk *)AllocMem(MSPACE_TYPE_VRAM, pdc->chunk.data_size + sizeof(QXLDataChunk), TRUE);
+    if (chunk)
+    {
+        chunk->data_size = pdc->chunk.data_size;
+        chunk->next_chunk = 0;
+        RtlCopyMemory(chunk->data, pdc->chunk.data, chunk->data_size);
+    }
+    return chunk;
+}
+
+ULONG QxlDevice::PrepareDrawable(QXLDrawable*& drawable)
+{
+    ULONG n = 0;
+    BOOLEAN bFail = FALSE;
+    PLIST_ENTRY pe = DelayedList(drawable);
+    QXLDataChunk *chunk, *lastchunk = NULL;
+    if (IsListEmpty(pe))
+        return 0;
+
+    while (!IsListEmpty(pe))
+    {
+        QXL_DELAYED_CHUNK *pdc = (QXL_DELAYED_CHUNK *)RemoveHeadList(pe);
+        if (!lastchunk)
+            lastchunk = (QXLDataChunk *)pdc->chunk.prev_chunk;
+        if (!lastchunk)
+        {
+            // bitmap was not allocated, this is single delayed chunk
+            if (AttachNewBitmap(
+                drawable,
+                pdc->chunk.data,
+                pdc->chunk.data + pdc->chunk.data_size,
+                -(drawable->u.copy.src_area.right * 4),
+                TRUE))
+            {
+                ++n;
+            }
+            else
+            {
+                bFail = TRUE;
+            }
+        }
+        if (lastchunk)
+        {
+            // some chunks were not allocated
+            chunk = MakeChunk(pdc);
+            if (chunk)
+            {
+                chunk->prev_chunk = PA(lastchunk, m_SurfaceMemSlot);
+                lastchunk->next_chunk = PA(chunk, m_SurfaceMemSlot);
+                lastchunk = chunk;
+                ++n;
+            }
+            else
+            {
+                bFail = TRUE;
+            }
+        }
+        delete[] reinterpret_cast<BYTE*>(pdc);
+    }
+    if (bFail)
+    {
+        ReleaseOutputUnderLock(drawable->release_info.id);
+        drawable = NULL;
+    }
+    return n;
+}
+
 void QxlDevice::PresentThreadRoutine()
 {
     PAGED_CODE();
     int wait;
     int notify;
+    ULONG delayed = 0;
     QXLDrawable** drawables;
 
     DbgPrint(TRACE_LEVEL_INFORMATION, ("--->%s\n", __FUNCTION__));
@@ -5268,13 +5355,26 @@  void QxlDevice::PresentThreadRoutine()
 
         if (drawables) {
             for (UINT i = 0; drawables[i]; ++i)
-                PushDrawable(drawables[i]);
+            {
+                ULONG n = PrepareDrawable(drawables[i]);
+                // only reason why drawables[i] is zeroed is stop flow
+                if (drawables[i])
+                {
+                    delayed += n;
+                    PushDrawable(drawables[i]);
+                }
+            }
             delete[] reinterpret_cast<BYTE*>(drawables);
         }
         else {
             DbgPrint(TRACE_LEVEL_WARNING, ("%s is being terminated\n", __FUNCTION__));
             break;
         }
+        if (delayed)
+        {
+            DbgPrint(TRACE_LEVEL_WARNING, ("%s: %d delayed chunks\n", __FUNCTION__, delayed));
+            delayed = 0;
+        }
     }
 }
 
diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
index b2f0e0c..5544a8c 100755
--- a/qxldod/QxlDod.h
+++ b/qxldod/QxlDod.h
@@ -474,6 +474,7 @@  ReleaseMutex(
 #define MAX_OUTPUT_RES 6
 
 typedef struct QXLOutput {
+    LIST_ENTRY list;
     UINT32 num_res;
 #ifdef DBG
     UINT32 type;
@@ -613,6 +614,8 @@  private:
     BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size,
                             size_t alloc_size, PLIST_ENTRY pDelayed);
+    QXLDataChunk *MakeChunk(QXL_DELAYED_CHUNK *pdc);
+    ULONG PrepareDrawable(QXLDrawable*& drawable);
     void AsyncIo(UCHAR  Port, UCHAR Value);
     void SyncIo(UCHAR  Port, UCHAR Value);
     NTSTATUS UpdateChildStatus(BOOLEAN connect);

Comments

On Sun, 2017-03-12 at 10:45 +0200, Yuri Benditovich wrote:
> In case of VSync is active allocate bitmaps for drawable objects
> using non-forced requests. If immediate allocation is not possible,
> place entire bitmap into memory chunk allocated from the OS.
> If bitmap is allocated from device memory, but one of later
> chunks can't be allocated, allocate this and further chunks from
> OS memory. All these 'delayed' allocations placed into linked list
> which root entry is part of QXLOutput structure.
> From separated thread, before send drawable objects down, we review

separated -> separate
send -> sending

> the list of delayed chunks and allocate device memory (forced) to
> all of them.
> The cost is 2 pointers added to each drawable or cursor object.
> Cursor commands currently do not make use of them; in future
> we have an option to offload also cursor commands.
> 
> Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
> ---
>  qxldod/QxlDod.cpp | 110
> +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  qxldod/QxlDod.h   |   3 ++
>  2 files changed, 108 insertions(+), 5 deletions(-)
> 
> diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
> index efc1fb3..0061d4c 100755
> --- a/qxldod/QxlDod.cpp
> +++ b/qxldod/QxlDod.cpp
> @@ -4263,6 +4263,13 @@ void QxlDevice::DrawableAddRes(QXLDrawable
> *drawable, Resource *res)
>      AddRes(output, res);
>  }
>  
> +static FORCEINLINE PLIST_ENTRY DelayedList(QXLDrawable *pd)
> +{
> +    QXLOutput *output;
> +    output = (QXLOutput *)((UINT8 *)pd - sizeof(QXLOutput));
> +    return &output->list;
> +}
> +
>  void QxlDevice::CursorCmdAddRes(QXLCursorCmd *cmd, Resource *res)
>  {
>      PAGED_CODE();
> @@ -4371,6 +4378,7 @@ QXLDrawable *QxlDevice::Drawable(UINT8 type,
> CONST RECT *area, CONST RECT *clip,
>      drawable->surfaces_dest[1] = - 1;
>      drawable->surfaces_dest[2] = -1;
>      CopyRect(&drawable->bbox, area);
> +    InitializeListHead(DelayedList(drawable));
>  
>      if (!SetClip(clip, drawable)) {
>          DbgPrint(TRACE_LEVEL_VERBOSE, ("%s: set clip failed\n",
> __FUNCTION__));
> @@ -4464,7 +4472,7 @@ BOOLEAN QxlDevice::AttachNewBitmap(QXLDrawable
> *drawable, UINT8 *src, UINT8 *src
>      Resource *image_res;
>      InternalImage *internal;
>      QXLDataChunk *chunk;
> -    PLIST_ENTRY pDelayedList = NULL;
> +    PLIST_ENTRY pDelayedList = bForce ? NULL :
> DelayedList(drawable);
>      UINT8* dest, *dest_end;
>  
>      height = drawable->u.copy.src_area.bottom;
> @@ -4544,8 +4552,17 @@ BOOLEAN QxlDevice::AttachNewBitmap(QXLDrawable
> *drawable, UINT8 *src, UINT8 *src
>          BOOLEAN b = PutBytesAlign(&chunk, &dest, &dest_end, src,
> line_size, alloc_size, pDelayedList);
>          if (!b)
>          {
> -            DbgPrint(TRACE_LEVEL_WARNING, ("%s: aborting copy of
> lines\n", __FUNCTION__));
> -            return FALSE;
> +            if (pitch < 0 && bForce)
> +            {
> +                DbgPrint(TRACE_LEVEL_WARNING, ("%s: aborting copy of
> lines (forced)\n", __FUNCTION__));
> +                return FALSE;
> +            }
> +            else
> +            {
> +                DbgPrint(TRACE_LEVEL_WARNING, ("%s: unexpected
> aborting copy of lines (force %d, pitch %d)\n", __FUNCTION__, bForce,
> pitch));
> +                return FALSE;
> +            }
> +            break;
>          }
>      }
>      return TRUE;
> @@ -4599,7 +4616,7 @@ QXLDrawable *QxlDevice::BltBits (
>      UINT8* src_end = src - pSrc->Pitch;
>      src += pSrc->Pitch * (height - 1);
>  
> -    if (!AttachNewBitmap(drawable, src, src_end, (INT)pSrc->Pitch,
> TRUE))
> +    if (!AttachNewBitmap(drawable, src, src_end, (INT)pSrc->Pitch,
> !g_bSupportVSync))
>      {
>          ReleaseOutputUnderLock(drawable->release_info.id);
>          drawable = NULL;
> @@ -5239,11 +5256,81 @@ void QxlDevice::StopPresentThread()
>      }
>  }
>  
> +QXLDataChunk *QxlDevice::MakeChunk(QXL_DELAYED_CHUNK *pdc)
> +{
> +    QXLDataChunk *chunk = (QXLDataChunk *)AllocMem(MSPACE_TYPE_VRAM,
> pdc->chunk.data_size + sizeof(QXLDataChunk), TRUE);
> +    if (chunk)
> +    {
> +        chunk->data_size = pdc->chunk.data_size;
> +        chunk->next_chunk = 0;
> +        RtlCopyMemory(chunk->data, pdc->chunk.data, chunk-
> >data_size);
> +    }
> +    return chunk;
> +}
> +
> +ULONG QxlDevice::PrepareDrawable(QXLDrawable*& drawable)
> +{
> +    ULONG n = 0;
> +    BOOLEAN bFail = FALSE;
> +    PLIST_ENTRY pe = DelayedList(drawable);
> +    QXLDataChunk *chunk, *lastchunk = NULL;
> +    if (IsListEmpty(pe))
> +        return 0;
> +
> +    while (!IsListEmpty(pe))
> +    {
> +        QXL_DELAYED_CHUNK *pdc = (QXL_DELAYED_CHUNK
> *)RemoveHeadList(pe);
> +        if (!lastchunk)
> +            lastchunk = (QXLDataChunk *)pdc->chunk.prev_chunk;
> +        if (!lastchunk)
> +        {
> +            // bitmap was not allocated, this is single delayed
> chunk
> +            if (AttachNewBitmap(
> +                drawable,
> +                pdc->chunk.data,
> +                pdc->chunk.data + pdc->chunk.data_size,
> +                -(drawable->u.copy.src_area.right * 4),
> +                TRUE))
> +            {
> +                ++n;
> +            }
> +            else
> +            {
> +                bFail = TRUE;
> +            }
> +        }
> +        if (lastchunk)
> +        {
> +            // some chunks were not allocated
> +            chunk = MakeChunk(pdc);
> +            if (chunk)
> +            {
> +                chunk->prev_chunk = PA(lastchunk, m_SurfaceMemSlot);
> +                lastchunk->next_chunk = PA(chunk, m_SurfaceMemSlot);
> +                lastchunk = chunk;
> +                ++n;
> +            }
> +            else
> +            {
> +                bFail = TRUE;
> +            }
> +        }
> +        delete[] reinterpret_cast<BYTE*>(pdc);
> +    }
> +    if (bFail)
> +    {
> +        ReleaseOutputUnderLock(drawable->release_info.id);
> +        drawable = NULL;
> +    }
> +    return n;
> +}
> +
>  void QxlDevice::PresentThreadRoutine()
>  {
>      PAGED_CODE();
>      int wait;
>      int notify;
> +    ULONG delayed = 0;
>      QXLDrawable** drawables;
>  
>      DbgPrint(TRACE_LEVEL_INFORMATION, ("--->%s\n", __FUNCTION__));
> @@ -5268,13 +5355,26 @@ void QxlDevice::PresentThreadRoutine()
>  
>          if (drawables) {
>              for (UINT i = 0; drawables[i]; ++i)
> -                PushDrawable(drawables[i]);
> +            {
> +                ULONG n = PrepareDrawable(drawables[i]);
> +                // only reason why drawables[i] is zeroed is stop
> flow
> +                if (drawables[i])
> +                {
> +                    delayed += n;
> +                    PushDrawable(drawables[i]);
> +                }
> +            }
>              delete[] reinterpret_cast<BYTE*>(drawables);
>          }
>          else {
>              DbgPrint(TRACE_LEVEL_WARNING, ("%s is being
> terminated\n", __FUNCTION__));
>              break;
>          }
> +        if (delayed)
> +        {
> +            DbgPrint(TRACE_LEVEL_WARNING, ("%s: %d delayed
> chunks\n", __FUNCTION__, delayed));
> +            delayed = 0;
> +        }
>      }
>  }
>  
> diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
> index b2f0e0c..5544a8c 100755
> --- a/qxldod/QxlDod.h
> +++ b/qxldod/QxlDod.h
> @@ -474,6 +474,7 @@ ReleaseMutex(
>  #define MAX_OUTPUT_RES 6
>  
>  typedef struct QXLOutput {
> +    LIST_ENTRY list;
>      UINT32 num_res;
>  #ifdef DBG
>      UINT32 type;
> @@ -613,6 +614,8 @@ private:
>      BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
>                              UINT8 **end_ptr, UINT8 *src, int size,
>                              size_t alloc_size, PLIST_ENTRY
> pDelayed);
> +    QXLDataChunk *MakeChunk(QXL_DELAYED_CHUNK *pdc);
> +    ULONG PrepareDrawable(QXLDrawable*& drawable);
>      void AsyncIo(UCHAR  Port, UCHAR Value);
>      void SyncIo(UCHAR  Port, UCHAR Value);
>      NTSTATUS UpdateChildStatus(BOOLEAN connect);