[Spice-devel,v2,09/12] qxl-wddm-dod: PutBytesAlign supports non-forced allocation

Submitted by Yuri Benditovich on April 1, 2017, 4:40 p.m.

Details

Message ID 1491064839-13612-10-git-send-email-yuri.benditovich@daynix.com
State New
Headers show
Series "Set of patches for further support of VSync" ( rev: 2 ) in Spice

Not browsing as part of any series.

Commit Message

Yuri Benditovich April 1, 2017, 4:40 p.m.
From: "yuri.benditovich@daynix.com" <yuri.benditovich@daynix.com>

Preparation for offload of allocations from device's memory
to separate thread. Procedure PutBytesAlign is called to
allocate memory chunk from device memory and copy data to it.
With current commit the procedure (if called with non-NULL
linked list root entry parameter) can use non-forced allocation.
If such allocation fails, it allocates 'delayed' chunk from
OS memory and copies data to it. Later before sending drawable command
this chunk shall be processed, storage allocated from device memory
(forced) and data copied to it.

Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
---
 qxldod/QxlDod.cpp | 74 ++++++++++++++++++++++++++++++++++++-------------------
 qxldod/QxlDod.h   |  9 ++++++-
 2 files changed, 57 insertions(+), 26 deletions(-)

Patch hide | download patch | download mbox

diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
index f8440d4..19efacc 100755
--- a/qxldod/QxlDod.cpp
+++ b/qxldod/QxlDod.cpp
@@ -4490,7 +4490,8 @@  QXLDrawable *QxlDevice::PrepareBltBits (
 
     for (; src != src_end; src -= pSrc->Pitch, alloc_size -= line_size) {
         if (!PutBytesAlign(&chunk, &dest, &dest_end, src,
-            line_size, alloc_size, line_size)) {
+            line_size, alloc_size, NULL)) {
+            // not reachable when forced allocation used
             ReleaseOutput(drawable->release_info.id);
             DbgPrint(TRACE_LEVEL_ERROR, ("Cannot allocate additional bitmap for drawable\n"));
             return NULL;
@@ -4515,36 +4516,60 @@  QXLDrawable *QxlDevice::PrepareBltBits (
     return drawable;
 }
 
+// can work in 2 modes:
+// forced - as before, when pDelayed not provided or if VSync is not in use
+// non-forced, if VSync is active and pDelayed provided. In this case, if memory
+// can't be allocated immediately, allocates 'delayed chunk' and copies data
+// to it. Further, before send to the device, this 'delayed chunk' should be processed,
+// regular chunk allocated from device memory and the data copied to it
 BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size,
-                            size_t alloc_size, uint32_t alignment)
+                            size_t alloc_size, PLIST_ENTRY pDelayed)
 {
     PAGED_CODE();
+    BOOLEAN bResult = TRUE;
+    BOOLEAN bForced = !g_bSupportVSync || !pDelayed;
     QXLDataChunk *chunk = *chunk_ptr;
     UINT8 *now = *now_ptr;
     UINT8 *end = *end_ptr;
+    size_t maxAllocSize = BITS_BUF_MAX - BITS_BUF_MAX % size;
+    alloc_size = MIN(size, maxAllocSize);
     DbgPrint(TRACE_LEVEL_VERBOSE, ("---> %s\n", __FUNCTION__));
 
     while (size) {
         int cp_size = (int)MIN(end - now, size);
         if (!cp_size) {
-            size_t aligned_size;
-            aligned_size = (int)MIN(alloc_size + alignment - 1, BITS_BUF_MAX);
-            aligned_size -=  aligned_size % alignment;
-
-            void *ptr = AllocMem(MSPACE_TYPE_VRAM, size + sizeof(QXLDataChunk), TRUE);
-            if (!ptr) {
-                return FALSE;
+            void *ptr = (bForced || IsListEmpty(pDelayed)) ? AllocMem(MSPACE_TYPE_VRAM, alloc_size + sizeof(QXLDataChunk), bForced) : NULL;
+            if (ptr) {
+                chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
+                ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk, m_SurfaceMemSlot);
+                chunk = (QXLDataChunk *)ptr;
+                chunk->data_size = 0;
+                chunk->next_chunk = 0;
+                now = chunk->data;
+                end = now + alloc_size;
+                cp_size = (int)MIN(end - now, size);
+            }
+            else if (pDelayed) {
+                ptr = new (PagedPool)BYTE[alloc_size + sizeof(QXL_DELAYED_CHUNK)];
+                if (ptr) {
+                    QXL_DELAYED_CHUNK *pChunk = (QXL_DELAYED_CHUNK *)ptr;
+                    InsertTailList(pDelayed, &pChunk->list);
+                    pChunk->chunk.prev_chunk = (QXLPHYSICAL)chunk;
+                    chunk = &pChunk->chunk;
+                    chunk->data_size = 0;
+                    now = chunk->data;
+                    end = now + alloc_size;
+                    cp_size = (int)MIN(end - now, size);
+                } else {
+                    bResult = FALSE;
+                    break;
+                }
+            }
+            else {
+                bResult = FALSE;
+                break;
             }
-            chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
-            ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk, m_SurfaceMemSlot);
-            chunk = (QXLDataChunk *)ptr;
-            chunk->data_size = 0;
-            chunk->next_chunk = 0;
-            now = chunk->data;
-            end = now + size;
-
-            cp_size = (int)MIN(end - now, size);
         }
         RtlCopyMemory(now, src, cp_size);
         src += cp_size;
@@ -4556,7 +4581,7 @@  BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
     *now_ptr = now;
     *end_ptr = end;
     DbgPrint(TRACE_LEVEL_VERBOSE, ("<--- %s\n", __FUNCTION__));
-    return TRUE;
+    return bResult;
 }
 
 VOID QxlDevice::BlackOutScreen(CURRENT_BDD_MODE* pCurrentBddMod)
@@ -4674,12 +4699,11 @@  NTSTATUS  QxlDevice::SetPointerShape(_In_ CONST DXGKARG_SETPOINTERSHAPE* pSetPoi
     end = (UINT8 *)res + CURSOR_ALLOC_SIZE;
     src_end = src + (pSetPointerShape->Pitch * pSetPointerShape->Height * num_images);
     for (; src != src_end; src += pSetPointerShape->Pitch) {
-        if (!PutBytesAlign(&chunk, &now, &end, src, line_size,
-                 PAGE_SIZE, 1))
-        {
-            DbgPrint(TRACE_LEVEL_ERROR, ("%s: Failed to allocate cursor bitmap\n", __FUNCTION__));
-            ReleaseOutput(cursor_cmd->release_info.id);
-            return STATUS_INSUFFICIENT_RESOURCES;
+        if (!PutBytesAlign(&chunk, &now, &end, src, line_size, PAGE_SIZE - PAGE_SIZE % line_size, NULL)) {
+            // we have a chance to get here only with color cursor bigger than 45*45
+            // and only if we modify this procedure to use non-forced allocation  
+            DbgPrint(TRACE_LEVEL_ERROR, ("%s: failed to push part of shape\n", __FUNCTION__));
+            break;
         }
     }
     CursorCmdAddRes(cursor_cmd, res);
diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
index cbcd11e..ca93322 100755
--- a/qxldod/QxlDod.h
+++ b/qxldod/QxlDod.h
@@ -260,6 +260,13 @@  public:
 };
 #endif
 
+typedef struct _QXL_DELAYED_CHUNK
+{
+    LIST_ENTRY list;
+    UINT32 type;
+    QXLDataChunk chunk;
+} QXL_DELAYED_CHUNK;
+
 class QxlDod;
 
 class HwDeviceInterface {
@@ -603,7 +610,7 @@  private:
     void PushCursor(void);
     BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size,
-                            size_t alloc_size, uint32_t alignment);
+                            size_t alloc_size, PLIST_ENTRY pDelayed);
     void AsyncIo(UCHAR  Port, UCHAR Value);
     void SyncIo(UCHAR  Port, UCHAR Value);
     NTSTATUS UpdateChildStatus(BOOLEAN connect);

Comments

> 
> From: "yuri.benditovich@daynix.com" <yuri.benditovich@daynix.com>
> 
> Preparation for offload of allocations from device's memory
> to separate thread. Procedure PutBytesAlign is called to
> allocate memory chunk from device memory and copy data to it.
> With current commit the procedure (if called with non-NULL
> linked list root entry parameter) can use non-forced allocation.
> If such allocation fails, it allocates 'delayed' chunk from
> OS memory and copies data to it. Later before sending drawable command
> this chunk shall be processed, storage allocated from device memory
> (forced) and data copied to it.
> 
> Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
> ---
>  qxldod/QxlDod.cpp | 74
>  ++++++++++++++++++++++++++++++++++++-------------------
>  qxldod/QxlDod.h   |  9 ++++++-
>  2 files changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
> index f8440d4..19efacc 100755
> --- a/qxldod/QxlDod.cpp
> +++ b/qxldod/QxlDod.cpp
> @@ -4490,7 +4490,8 @@ QXLDrawable *QxlDevice::PrepareBltBits (
>  
>      for (; src != src_end; src -= pSrc->Pitch, alloc_size -= line_size) {
>          if (!PutBytesAlign(&chunk, &dest, &dest_end, src,
> -            line_size, alloc_size, line_size)) {
> +            line_size, alloc_size, NULL)) {
> +            // not reachable when forced allocation used
>              ReleaseOutput(drawable->release_info.id);
>              DbgPrint(TRACE_LEVEL_ERROR, ("Cannot allocate additional bitmap
>              for drawable\n"));
>              return NULL;
> @@ -4515,36 +4516,60 @@ QXLDrawable *QxlDevice::PrepareBltBits (
>      return drawable;
>  }
>  
> +// can work in 2 modes:
> +// forced - as before, when pDelayed not provided or if VSync is not in use
> +// non-forced, if VSync is active and pDelayed provided. In this case, if
> memory
> +// can't be allocated immediately, allocates 'delayed chunk' and copies data
> +// to it. Further, before send to the device, this 'delayed chunk' should be
> processed,
> +// regular chunk allocated from device memory and the data copied to it
>  BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
>                              UINT8 **end_ptr, UINT8 *src, int size,
> -                            size_t alloc_size, uint32_t alignment)
> +                            size_t alloc_size, PLIST_ENTRY pDelayed)
>  {
>      PAGED_CODE();
> +    BOOLEAN bResult = TRUE;
> +    BOOLEAN bForced = !g_bSupportVSync || !pDelayed;
>      QXLDataChunk *chunk = *chunk_ptr;
>      UINT8 *now = *now_ptr;
>      UINT8 *end = *end_ptr;
> +    size_t maxAllocSize = BITS_BUF_MAX - BITS_BUF_MAX % size;
> +    alloc_size = MIN(size, maxAllocSize);
>      DbgPrint(TRACE_LEVEL_VERBOSE, ("---> %s\n", __FUNCTION__));
>  
>      while (size) {
>          int cp_size = (int)MIN(end - now, size);
>          if (!cp_size) {
> -            size_t aligned_size;
> -            aligned_size = (int)MIN(alloc_size + alignment - 1,
> BITS_BUF_MAX);
> -            aligned_size -=  aligned_size % alignment;
> -
> -            void *ptr = AllocMem(MSPACE_TYPE_VRAM, size +
> sizeof(QXLDataChunk), TRUE);
> -            if (!ptr) {
> -                return FALSE;
> +            void *ptr = (bForced || IsListEmpty(pDelayed)) ?
> AllocMem(MSPACE_TYPE_VRAM, alloc_size + sizeof(QXLDataChunk), bForced) :
> NULL;
> +            if (ptr) {
> +                chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
> +                ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk,
> m_SurfaceMemSlot);
> +                chunk = (QXLDataChunk *)ptr;
> +                chunk->data_size = 0;
> +                chunk->next_chunk = 0;
> +                now = chunk->data;
> +                end = now + alloc_size;
> +                cp_size = (int)MIN(end - now, size);

this ...

> +            }
> +            else if (pDelayed) {
> +                ptr = new (PagedPool)BYTE[alloc_size +
> sizeof(QXL_DELAYED_CHUNK)];
> +                if (ptr) {
> +                    QXL_DELAYED_CHUNK *pChunk = (QXL_DELAYED_CHUNK *)ptr;
> +                    InsertTailList(pDelayed, &pChunk->list);
> +                    pChunk->chunk.prev_chunk = (QXLPHYSICAL)chunk;
> +                    chunk = &pChunk->chunk;
> +                    chunk->data_size = 0;
> +                    now = chunk->data;
> +                    end = now + alloc_size;
> +                    cp_size = (int)MIN(end - now, size);

.. and this ...

> +                } else {
> +                    bResult = FALSE;
> +                    break;
> +                }
> +            }
> +            else {
> +                bResult = FALSE;
> +                break;
>              }
> -            chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
> -            ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk, m_SurfaceMemSlot);
> -            chunk = (QXLDataChunk *)ptr;


> -            chunk->data_size = 0;
> -            chunk->next_chunk = 0;
> -            now = chunk->data;
> -            end = now + size;
> -
> -            cp_size = (int)MIN(end - now, size);

.. are very similar to this. I think would be better
to set chunk variable and initialize the common fields, also
to minimize patch size.

>          }
>          RtlCopyMemory(now, src, cp_size);
>          src += cp_size;
> @@ -4556,7 +4581,7 @@ BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk
> **chunk_ptr, UINT8 **now_ptr,
>      *now_ptr = now;
>      *end_ptr = end;
>      DbgPrint(TRACE_LEVEL_VERBOSE, ("<--- %s\n", __FUNCTION__));
> -    return TRUE;
> +    return bResult;
>  }
>  
>  VOID QxlDevice::BlackOutScreen(CURRENT_BDD_MODE* pCurrentBddMod)
> @@ -4674,12 +4699,11 @@ NTSTATUS  QxlDevice::SetPointerShape(_In_ CONST
> DXGKARG_SETPOINTERSHAPE* pSetPoi
>      end = (UINT8 *)res + CURSOR_ALLOC_SIZE;
>      src_end = src + (pSetPointerShape->Pitch * pSetPointerShape->Height *
>      num_images);
>      for (; src != src_end; src += pSetPointerShape->Pitch) {
> -        if (!PutBytesAlign(&chunk, &now, &end, src, line_size,
> -                 PAGE_SIZE, 1))
> -        {
> -            DbgPrint(TRACE_LEVEL_ERROR, ("%s: Failed to allocate cursor
> bitmap\n", __FUNCTION__));
> -            ReleaseOutput(cursor_cmd->release_info.id);
> -            return STATUS_INSUFFICIENT_RESOURCES;
> +        if (!PutBytesAlign(&chunk, &now, &end, src, line_size, PAGE_SIZE -
> PAGE_SIZE % line_size, NULL)) {
> +            // we have a chance to get here only with color cursor bigger
> than 45*45
> +            // and only if we modify this procedure to use non-forced
> allocation
> +            DbgPrint(TRACE_LEVEL_ERROR, ("%s: failed to push part of
> shape\n", __FUNCTION__));
> +            break;
>          }
>      }
>      CursorCmdAddRes(cursor_cmd, res);
> diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
> index cbcd11e..ca93322 100755
> --- a/qxldod/QxlDod.h
> +++ b/qxldod/QxlDod.h
> @@ -260,6 +260,13 @@ public:
>  };
>  #endif
>  
> +typedef struct _QXL_DELAYED_CHUNK
> +{
> +    LIST_ENTRY list;
> +    UINT32 type;
> +    QXLDataChunk chunk;
> +} QXL_DELAYED_CHUNK;
> +
>  class QxlDod;
>  
>  class HwDeviceInterface {
> @@ -603,7 +610,7 @@ private:
>      void PushCursor(void);
>      BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
>                              UINT8 **end_ptr, UINT8 *src, int size,
> -                            size_t alloc_size, uint32_t alignment);
> +                            size_t alloc_size, PLIST_ENTRY pDelayed);
>      void AsyncIo(UCHAR  Port, UCHAR Value);
>      void SyncIo(UCHAR  Port, UCHAR Value);
>      NTSTATUS UpdateChildStatus(BOOLEAN connect);

Frediano