[Spice-devel,09/12] qxl-wddm-dod: PutBytesAlign supports non-forced allocation

Submitted by Yuri Benditovich on March 12, 2017, 8:45 a.m.

Details

Message ID 1489308309-9728-10-git-send-email-yuri.benditovich@daynix.com
State New
Headers show
Series "Set of patches for further support of VSync" ( rev: 1 ) in Spice

Not browsing as part of any series.

Commit Message

Yuri Benditovich March 12, 2017, 8:45 a.m.
Preparation for allocation offload. PutBytesAlign called to
allocate memory chunk from device memory and copy data to it.
Now, if called with linked list root entry, able to use non-
forced allocation and if it fails, allocate delayed chunk from
OS memory and copy data to it. Later before send drawable command,
this chunk shall be processed, storage allocated from device memory
(forced) and data copied to it.

Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
---
 qxldod/QxlDod.cpp | 78 ++++++++++++++++++++++++++++++++++++-------------------
 qxldod/QxlDod.h   |  9 ++++++-
 2 files changed, 60 insertions(+), 27 deletions(-)

Patch hide | download patch | download mbox

diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
index 9a3803a..56a4cf2 100755
--- a/qxldod/QxlDod.cpp
+++ b/qxldod/QxlDod.cpp
@@ -4542,8 +4542,9 @@  QXLDrawable *QxlDevice::BltBits (
 
     for (; src != src_end; src -= pSrc->Pitch, alloc_size -= line_size) {
         if (!PutBytesAlign(&chunk, &dest, &dest_end, src,
-            line_size, alloc_size, line_size))
+            line_size, alloc_size, NULL))
         {
+            // not reachable when forced allocation used
             ReleaseOutputUnderLock(drawable->release_info.id);
             DbgPrint(TRACE_LEVEL_ERROR, ("Cannot allocate additional bitmap for drawable\n"));
             return NULL;
@@ -4568,37 +4569,63 @@  QXLDrawable *QxlDevice::BltBits (
     return drawable;
 }
 
+// can work in 2 modes:
+// forced - as before, when pDelayed not provided or if VSync is not in use
+// non-forced, if VSync is active and pDelayed provided. In this case, if memory
+// can't be allocated immediately, allocates 'delayed chunk' and copies data
+// to it. Further, before send to the device, this 'delayed chunk' should be processed,
+// regular chunk allocated from device memory and the data copied to it
 BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size,
-                            size_t alloc_size, uint32_t alignment)
+                            size_t alloc_size, PLIST_ENTRY pDelayed)
 {
     PAGED_CODE();
+    BOOLEAN bResult = TRUE;
+    BOOLEAN bForced = !g_bSupportVSync || !pDelayed;
     QXLDataChunk *chunk = *chunk_ptr;
     UINT8 *now = *now_ptr;
     UINT8 *end = *end_ptr;
+    size_t maxAllocSize = BITS_BUF_MAX - BITS_BUF_MAX % size;
+    alloc_size = MIN(size, maxAllocSize);
     DbgPrint(TRACE_LEVEL_VERBOSE, ("---> %s\n", __FUNCTION__));
 
     while (size) {
         int cp_size = (int)MIN(end - now, size);
         if (!cp_size) {
-            size_t aligned_size;
-            aligned_size = (int)MIN(alloc_size + alignment - 1, BITS_BUF_MAX);
-            aligned_size -=  aligned_size % alignment;
-
-            void *ptr = AllocMem(MSPACE_TYPE_VRAM, size + sizeof(QXLDataChunk), TRUE);
-            if (!ptr)
-            {
-                return FALSE;
+            void *ptr = (bForced || IsListEmpty(pDelayed)) ? AllocMem(MSPACE_TYPE_VRAM, alloc_size + sizeof(QXLDataChunk), bForced) : NULL;
+            if (ptr) {
+                chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
+                ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk, m_SurfaceMemSlot);
+                chunk = (QXLDataChunk *)ptr;
+                chunk->data_size = 0;
+                chunk->next_chunk = 0;
+                now = chunk->data;
+                end = now + alloc_size;
+                cp_size = (int)MIN(end - now, size);
+            }
+            else if (pDelayed) {
+                ptr = new (PagedPool)BYTE[alloc_size + sizeof(QXL_DELAYED_CHUNK)];
+                if (ptr)
+                {
+                    QXL_DELAYED_CHUNK *pChunk = (QXL_DELAYED_CHUNK *)ptr;
+                    InsertTailList(pDelayed, &pChunk->list);
+                    pChunk->chunk.prev_chunk = (QXLPHYSICAL)chunk;
+                    chunk = &pChunk->chunk;
+                    chunk->data_size = 0;
+                    now = chunk->data;
+                    end = now + alloc_size;
+                    cp_size = (int)MIN(end - now, size);
+                }
+                else
+                {
+                    bResult = FALSE;
+                    break;
+                }
+            }
+            else {
+                bResult = FALSE;
+                break;
             }
-            chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
-            ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk, m_SurfaceMemSlot);
-            chunk = (QXLDataChunk *)ptr;
-            chunk->data_size = 0;
-            chunk->next_chunk = 0;
-            now = chunk->data;
-            end = now + size;
-
-            cp_size = (int)MIN(end - now, size);
         }
         RtlCopyMemory(now, src, cp_size);
         src += cp_size;
@@ -4610,7 +4637,7 @@  BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
     *now_ptr = now;
     *end_ptr = end;
     DbgPrint(TRACE_LEVEL_VERBOSE, ("<--- %s\n", __FUNCTION__));
-    return TRUE;
+    return bResult;
 }
 
 VOID QxlDevice::BlackOutScreen(CURRENT_BDD_MODE* pCurrentBddMod)
@@ -4728,12 +4755,11 @@  NTSTATUS  QxlDevice::SetPointerShape(_In_ CONST DXGKARG_SETPOINTERSHAPE* pSetPoi
     end = (UINT8 *)res + CURSOR_ALLOC_SIZE;
     src_end = src + (pSetPointerShape->Pitch * pSetPointerShape->Height * num_images);
     for (; src != src_end; src += pSetPointerShape->Pitch) {
-        if (!PutBytesAlign(&chunk, &now, &end, src, line_size,
-                 PAGE_SIZE, 1))
-        {
-            DbgPrint(TRACE_LEVEL_ERROR, ("%s: Failed to allocate cursor bitmap\n", __FUNCTION__));
-            ReleaseOutputUnderLock(cursor_cmd->release_info.id);
-            return STATUS_INSUFFICIENT_RESOURCES;
+        if (!PutBytesAlign(&chunk, &now, &end, src, line_size, PAGE_SIZE - PAGE_SIZE % line_size, NULL)) {
+            // we have a chance to get here only with color cursor bigger than 45*45
+            // and only if we modify this procedure to use non-forced allocation  
+            DbgPrint(TRACE_LEVEL_ERROR, ("%s: failed to push part of shape\n", __FUNCTION__));
+            break;
         }
     }
     CursorCmdAddRes(cursor_cmd, res);
diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
index 3ee16ae..983bc29 100755
--- a/qxldod/QxlDod.h
+++ b/qxldod/QxlDod.h
@@ -260,6 +260,13 @@  public:
 };
 #endif
 
+typedef struct _QXL_DELAYED_CHUNK
+{
+    LIST_ENTRY list;
+    UINT32 type;
+    QXLDataChunk chunk;
+} QXL_DELAYED_CHUNK;
+
 class QxlDod;
 
 class HwDeviceInterface {
@@ -604,7 +611,7 @@  private:
     void PushCursor(void);
     BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
                             UINT8 **end_ptr, UINT8 *src, int size,
-                            size_t alloc_size, uint32_t alignment);
+                            size_t alloc_size, PLIST_ENTRY pDelayed);
     void AsyncIo(UCHAR  Port, UCHAR Value);
     void SyncIo(UCHAR  Port, UCHAR Value);
     NTSTATUS UpdateChildStatus(BOOLEAN connect);

Comments

On Sun, 2017-03-12 at 10:45 +0200, Yuri Benditovich wrote:
> Preparation for allocation offload. 

Can you clarify what is meant by "allocation offload"? 

> PutBytesAlign called to
> allocate memory chunk from device memory and copy data to it.

The previous sentence is missing a verb. This requires the reader to
make an assumption about was was intended. I think it should be
"PutBytesAlign *is* called to ..." (but considering that the next
sentence starts with "Now, ...", perhaps the previous sentence was
intended to say "PutBytesAlign *was* called to..."?). A minor issue,
perhaps, but it helps with understanability.

> Now, if called with linked list root entry, able to use non-
> forced allocation and if it fails, allocate delayed chunk from

Again missing some verbs here. Perhaps better as (additions marked like
*this*):

"Now, if *PutBytesAlign() is* called with *a* linked list root entry,
*it is* able to use..."

> OS memory and copy data to it. Later before send drawable command,

send -> sending

> this chunk shall be processed, storage allocated from device memory
> (forced) and data copied to it.
> 
> Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
> ---
>  qxldod/QxlDod.cpp | 78 ++++++++++++++++++++++++++++++++++++---------
> ----------
>  qxldod/QxlDod.h   |  9 ++++++-
>  2 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/qxldod/QxlDod.cpp b/qxldod/QxlDod.cpp
> index 9a3803a..56a4cf2 100755
> --- a/qxldod/QxlDod.cpp
> +++ b/qxldod/QxlDod.cpp
> @@ -4542,8 +4542,9 @@ QXLDrawable *QxlDevice::BltBits (
>  
>      for (; src != src_end; src -= pSrc->Pitch, alloc_size -=
> line_size) {
>          if (!PutBytesAlign(&chunk, &dest, &dest_end, src,
> -            line_size, alloc_size, line_size))
> +            line_size, alloc_size, NULL))
>          {
> +            // not reachable when forced allocation used
>              ReleaseOutputUnderLock(drawable->release_info.id);
>              DbgPrint(TRACE_LEVEL_ERROR, ("Cannot allocate additional
> bitmap for drawable\n"));
>              return NULL;
> @@ -4568,37 +4569,63 @@ QXLDrawable *QxlDevice::BltBits (
>      return drawable;
>  }
>  
> +// can work in 2 modes:
> +// forced - as before, when pDelayed not provided or if VSync is not
> in use
> +// non-forced, if VSync is active and pDelayed provided. In this
> case, if memory
> +// can't be allocated immediately, allocates 'delayed chunk' and
> copies data
> +// to it. Further, before send to the device, this 'delayed chunk'
> should be processed,
> +// regular chunk allocated from device memory and the data copied to
> it
>  BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8
> **now_ptr,
>                              UINT8 **end_ptr, UINT8 *src, int size,
> -                            size_t alloc_size, uint32_t alignment)
> +                            size_t alloc_size, PLIST_ENTRY pDelayed)
>  {
>      PAGED_CODE();
> +    BOOLEAN bResult = TRUE;
> +    BOOLEAN bForced = !g_bSupportVSync || !pDelayed;
>      QXLDataChunk *chunk = *chunk_ptr;
>      UINT8 *now = *now_ptr;
>      UINT8 *end = *end_ptr;
> +    size_t maxAllocSize = BITS_BUF_MAX - BITS_BUF_MAX % size;
> +    alloc_size = MIN(size, maxAllocSize);
>      DbgPrint(TRACE_LEVEL_VERBOSE, ("---> %s\n", __FUNCTION__));
>  
>      while (size) {
>          int cp_size = (int)MIN(end - now, size);
>          if (!cp_size) {
> -            size_t aligned_size;
> -            aligned_size = (int)MIN(alloc_size + alignment - 1,
> BITS_BUF_MAX);
> -            aligned_size -=  aligned_size % alignment;
> -
> -            void *ptr = AllocMem(MSPACE_TYPE_VRAM, size +
> sizeof(QXLDataChunk), TRUE);
> -            if (!ptr)
> -            {
> -                return FALSE;
> +            void *ptr = (bForced || IsListEmpty(pDelayed)) ?
> AllocMem(MSPACE_TYPE_VRAM, alloc_size + sizeof(QXLDataChunk),
> bForced) : NULL;
> +            if (ptr) {
> +                chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
> +                ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk,
> m_SurfaceMemSlot);
> +                chunk = (QXLDataChunk *)ptr;
> +                chunk->data_size = 0;
> +                chunk->next_chunk = 0;
> +                now = chunk->data;
> +                end = now + alloc_size;
> +                cp_size = (int)MIN(end - now, size);
> +            }
> +            else if (pDelayed) {
> +                ptr = new (PagedPool)BYTE[alloc_size +
> sizeof(QXL_DELAYED_CHUNK)];
> +                if (ptr)
> +                {
> +                    QXL_DELAYED_CHUNK *pChunk = (QXL_DELAYED_CHUNK
> *)ptr;
> +                    InsertTailList(pDelayed, &pChunk->list);
> +                    pChunk->chunk.prev_chunk = (QXLPHYSICAL)chunk;
> +                    chunk = &pChunk->chunk;
> +                    chunk->data_size = 0;
> +                    now = chunk->data;
> +                    end = now + alloc_size;
> +                    cp_size = (int)MIN(end - now, size);
> +                }
> +                else
> +                {
> +                    bResult = FALSE;
> +                    break;
> +                }
> +            }
> +            else {
> +                bResult = FALSE;
> +                break;
>              }
> -            chunk->next_chunk = PA(ptr, m_SurfaceMemSlot);
> -            ((QXLDataChunk *)ptr)->prev_chunk = PA(chunk,
> m_SurfaceMemSlot);
> -            chunk = (QXLDataChunk *)ptr;
> -            chunk->data_size = 0;
> -            chunk->next_chunk = 0;
> -            now = chunk->data;
> -            end = now + size;
> -
> -            cp_size = (int)MIN(end - now, size);
>          }
>          RtlCopyMemory(now, src, cp_size);
>          src += cp_size;
> @@ -4610,7 +4637,7 @@ BOOLEAN QxlDevice::PutBytesAlign(QXLDataChunk
> **chunk_ptr, UINT8 **now_ptr,
>      *now_ptr = now;
>      *end_ptr = end;
>      DbgPrint(TRACE_LEVEL_VERBOSE, ("<--- %s\n", __FUNCTION__));
> -    return TRUE;
> +    return bResult;
>  }
>  
>  VOID QxlDevice::BlackOutScreen(CURRENT_BDD_MODE* pCurrentBddMod)
> @@ -4728,12 +4755,11 @@ NTSTATUS  QxlDevice::SetPointerShape(_In_
> CONST DXGKARG_SETPOINTERSHAPE* pSetPoi
>      end = (UINT8 *)res + CURSOR_ALLOC_SIZE;
>      src_end = src + (pSetPointerShape->Pitch * pSetPointerShape-
> >Height * num_images);
>      for (; src != src_end; src += pSetPointerShape->Pitch) {
> -        if (!PutBytesAlign(&chunk, &now, &end, src, line_size,
> -                 PAGE_SIZE, 1))
> -        {
> -            DbgPrint(TRACE_LEVEL_ERROR, ("%s: Failed to allocate
> cursor bitmap\n", __FUNCTION__));
> -            ReleaseOutputUnderLock(cursor_cmd->release_info.id);
> -            return STATUS_INSUFFICIENT_RESOURCES;
> +        if (!PutBytesAlign(&chunk, &now, &end, src, line_size,
> PAGE_SIZE - PAGE_SIZE % line_size, NULL)) {
> +            // we have a chance to get here only with color cursor
> bigger than 45*45
> +            // and only if we modify this procedure to use non-
> forced allocation  
> +            DbgPrint(TRACE_LEVEL_ERROR, ("%s: failed to push part of
> shape\n", __FUNCTION__));
> +            break;
>          }
>      }
>      CursorCmdAddRes(cursor_cmd, res);
> diff --git a/qxldod/QxlDod.h b/qxldod/QxlDod.h
> index 3ee16ae..983bc29 100755
> --- a/qxldod/QxlDod.h
> +++ b/qxldod/QxlDod.h
> @@ -260,6 +260,13 @@ public:
>  };
>  #endif
>  
> +typedef struct _QXL_DELAYED_CHUNK
> +{
> +    LIST_ENTRY list;
> +    UINT32 type;
> +    QXLDataChunk chunk;
> +} QXL_DELAYED_CHUNK;
> +
>  class QxlDod;
>  
>  class HwDeviceInterface {
> @@ -604,7 +611,7 @@ private:
>      void PushCursor(void);
>      BOOLEAN PutBytesAlign(QXLDataChunk **chunk_ptr, UINT8 **now_ptr,
>                              UINT8 **end_ptr, UINT8 *src, int size,
> -                            size_t alloc_size, uint32_t alignment);
> +                            size_t alloc_size, PLIST_ENTRY
> pDelayed);
>      void AsyncIo(UCHAR  Port, UCHAR Value);
>      void SyncIo(UCHAR  Port, UCHAR Value);
>      NTSTATUS UpdateChildStatus(BOOLEAN connect);