[Spice-devel,qxl-win,3/4] display: support clearing the pci-bar when the pdev is disabled for revision<3 as well, RHBZ #731644

Submitted by Yonit Halperin on Aug. 24, 2011, 7:05 p.m.

Details

Message ID 1314187529-13633-4-git-send-email-yhalperi@redhat.com
State New, archived
Headers show

Not browsing as part of any series.

Commit Message

Yonit Halperin Aug. 24, 2011, 7:05 p.m.
see also commit 3218f6cdb95a73.

In revision < 3, objects (surfaces and other devram objects) stayed alive in the pci ram
after DrvAssertMode(Disable) was called. Then, when another display driver session started (upon switch user),
the driver had newly initiated mspace, but an old release ring (with objects from the older session's mspace).
This state led to a crash.
---
 display/driver.c  |   98 +++++++++++++++++++++++++++++++++++++----------------
 display/res.c     |    3 +-
 display/surface.c |    6 +++
 3 files changed, 77 insertions(+), 30 deletions(-)

Patch hide | download patch | download mbox

diff --git a/display/driver.c b/display/driver.c
index 252d429..0c6fe32 100644
--- a/display/driver.c
+++ b/display/driver.c
@@ -957,24 +957,83 @@  VOID DrvDisableSurface(DHPDEV in_pdev)
     DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit\n", __FUNCTION__, pdev));
 }
 
+static void FlushSurfaces(PDev *pdev)
+{
+    UINT32 surface_id;
+    SurfaceInfo *surface_info;
+    SURFOBJ *surf_obj;
+    RECTL area = {0, 0, 0, 0};
+
+    if (pdev->pci_revision < QXL_REVISION_STABLE_V10) {
+        DEBUG_PRINT((pdev, 1, "%s: revision too old for QXL_IO_FLUSH_SURFACES", __FUNCTION__));
+        for (surface_id = pdev->n_surfaces - 1; surface_id >  0 ; --surface_id) {
+            surface_info = GetSurfaceInfo(pdev, surface_id);
+            // TODO: check what this condinitions means.
+            if (!surface_info->draw_area.base_mem) {
+                continue;
+            }
+            surf_obj = surface_info->draw_area.surf_obj;
+            if (!surf_obj) {
+                continue;
+            }
+            area.right = surf_obj->sizlBitmap.cx;
+            area.bottom = surf_obj->sizlBitmap.cy;
+            UpdateArea(pdev,&area, surface_id);
+        }
+    } else {
+        async_io(pdev, ASYNCABLE_FLUSH_SURFACES, 0);
+    }
+}
+
+static BOOL FlushRelease(PDev *pdev)
+{
+    // TODO: why didn't we use reset for revision 3?
+    if (pdev->pci_revision<  QXL_REVISION_STABLE_V10) {
+        DWORD length;
+
+        DEBUG_PRINT((pdev, 1, "%s: revision too old for QXL_IO_FLUSH_RELEASE", __FUNCTION__));
+        if (EngDeviceIoControl(pdev->driver, IOCTL_VIDEO_RESET_DEVICE,
+                               NULL, 0, NULL, 0, &length)) {
+            DEBUG_PRINT((NULL, 0, "%s: reset failed 0x%lx\n", __FUNCTION__, pdev));
+            return FALSE;
+        }
+    } else {
+        /* Free release ring contents */
+        ReleaseCacheDeviceMemoryResources(pdev);
+        EmptyReleaseRing(pdev);
+        /* Get the last free list onto the release ring */
+        sync_io(pdev, pdev->flush_release_port, 0);
+        DEBUG_PRINT((pdev, 4, "%s after FLUSH_RELEASE\n", __FUNCTION__));
+        /* And release that. mspace allocators should be clean after. */
+        EmptyReleaseRing(pdev);
+    }
+    return TRUE;
+}
+
 static BOOL AssertModeDisable(PDev *pdev)
 {
     DEBUG_PRINT((pdev, 3, "%s entry\n", __FUNCTION__));
     /* flush command ring and update all surfaces */
-    async_io(pdev, ASYNCABLE_FLUSH_SURFACES, 0);
+    FlushSurfaces(pdev);
+    DebugCountAliveSurfaces(pdev);
+    /*
+     * this call is redundant for
+     * pci_revision <  QXL_REVISION_STABLE_V10, due to the
+     * IOCTL_VIDEO_RESET_DEVICE in FlushRelease. However,
+     * MoveAllSurfacesToRam depends on destroy_all_surfaces
+     * in case of failure.
+     * TODO: make MoveAllSurfacesToRam send destroy_surface
+     * commands instead of create_surface commands in case
+     * of failure
+     */
     async_io(pdev, ASYNCABLE_DESTROY_ALL_SURFACES, 0);
     /* move all surfaces from device to system memory */
     if (!MoveAllSurfacesToRam(pdev)) {
         return FALSE;
     }
-    /* Free release ring contents */
-    ReleaseCacheDeviceMemoryResources(pdev);
-    EmptyReleaseRing(pdev);
-    /* Get the last free list onto the release ring */
-    sync_io(pdev, pdev->flush_release_port, 0);
-    DEBUG_PRINT((pdev, 4, "%s after FLUSH_RELEASE\n", __FUNCTION__));
-    /* And release that. mspace allocators should be clean after. */
-    EmptyReleaseRing(pdev);
+    if (!FlushRelease(pdev)) {
+        return FALSE;
+    }
     RemoveVRamSlot(pdev);
     DebugCountAliveSurfaces(pdev);
     DEBUG_PRINT((pdev, 4, "%s: [%d,%d] [%d,%d] [%d,%d] %lx\n", __FUNCTION__,
@@ -1002,31 +1061,12 @@  static void AssertModeEnable(PDev *pdev)
     DebugCountAliveSurfaces(pdev);
 }
 
-BOOL DrvAssertModeOld(PDev *pdev, BOOL enable)
-{
-    if (enable) {
-        InitResources(pdev);
-        EnableQXLPrimarySurface(pdev);
-        CreateVRamSlot(pdev);
-    } else {
-        DisableQXLPrimarySurface(pdev, 0);
-        RemoveVRamSlot(pdev);
-    }
-    DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit %d\n", __FUNCTION__, pdev, enable));
-    return TRUE;
-}
-
 BOOL DrvAssertMode(DHPDEV in_pdev, BOOL enable)
 {
     PDev* pdev = (PDev*)in_pdev;
     BOOL ret = TRUE;
 
-    DEBUG_PRINT((pdev, 1, "%s: 0x%lx %d\n", __FUNCTION__, pdev, enable));
-    if (pdev->pci_revision < QXL_REVISION_STABLE_V10) {
-        return DrvAssertModeOld(pdev, enable);
-    }
-    /* new implementation that works correctly only with newer devices
-     * that implement QXL_IO_FLUSH_RELEASE */
+    DEBUG_PRINT((pdev, 1, "%s: 0x%lx revision %d enable %d\n", __FUNCTION__, pdev, pdev->pci_revision, enable));
     if (pdev->enabled == enable) {
         DEBUG_PRINT((pdev, 1, "%s: called twice with same argument (%d)\n", __FUNCTION__,
             enable));
diff --git a/display/res.c b/display/res.c
index 850d35a..1274f43 100644
--- a/display/res.c
+++ b/display/res.c
@@ -676,10 +676,11 @@  void InitResources(PDev *pdev)
         pdev->Res = global_res[id];
         DEBUG_PRINT((pdev, 3, "%s: calling InitRes (id == %d)\n", __FUNCTION__, id));
         InitRes(pdev);
+         DEBUG_PRINT((pdev, 1, "%s: called InitRes (id == %d)\n", __FUNCTION__, id));
     } else {
         pdev->Res = global_res[id];
     }
-    DEBUG_PRINT((pdev, 3, "%s: exit\n", __FUNCTION__));
+    DEBUG_PRINT((pdev, 1, "%s: exit\n", __FUNCTION__));
     EngReleaseSemaphore(res_sem);
 }
 
diff --git a/display/surface.c b/display/surface.c
index b40721d..8422cd3 100644
--- a/display/surface.c
+++ b/display/surface.c
@@ -359,6 +359,12 @@  BOOL MoveAllSurfacesToRam(PDev *pdev)
             DEBUG_PRINT((pdev, 0, "%s: %d: EngModifySurface failed, sending create\n",
                          __FUNCTION__, surface_id));
             phys_mem = SurfaceToPhysical(pdev, surface_info->draw_area.base_mem);
+            /*
+             * TODO: bug. create_command should be send for all surfaces >= surface_id
+             *       since they stay in the pci-bar. Alternatively,
+             *       don't call destroy_all_surfaces, instead send destroy commands
+             *       for all surfaces with id < surface_id.
+             */
             SendSurfaceCreateCommand(pdev, surface_id, surf_obj->sizlBitmap,
                                      surface_info->bitmap_format, -surf_obj->lDelta, phys_mem,
                                      /* the surface is still there, tell server not to erase */

Comments

On Wed, Aug 24, 2011 at 03:05:28PM +0300, Yonit Halperin wrote:

See comments below.

> see also commit 3218f6cdb95a73.
> 
> In revision < 3, objects (surfaces and other devram objects) stayed alive in the pci ram
> after DrvAssertMode(Disable) was called. Then, when another display driver session started (upon switch user),
> the driver had newly initiated mspace, but an old release ring (with objects from the older session's mspace).
> This state led to a crash.
> ---
>  display/driver.c  |   98 +++++++++++++++++++++++++++++++++++++----------------
>  display/res.c     |    3 +-
>  display/surface.c |    6 +++
>  3 files changed, 77 insertions(+), 30 deletions(-)
> 
> diff --git a/display/driver.c b/display/driver.c
> index 252d429..0c6fe32 100644
> --- a/display/driver.c
> +++ b/display/driver.c
> @@ -957,24 +957,83 @@ VOID DrvDisableSurface(DHPDEV in_pdev)
>      DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit\n", __FUNCTION__, pdev));
>  }
>  
> +static void FlushSurfaces(PDev *pdev)
> +{
> +    UINT32 surface_id;
> +    SurfaceInfo *surface_info;
> +    SURFOBJ *surf_obj;
> +    RECTL area = {0, 0, 0, 0};
> +
> +    if (pdev->pci_revision < QXL_REVISION_STABLE_V10) {
> +        DEBUG_PRINT((pdev, 1, "%s: revision too old for QXL_IO_FLUSH_SURFACES", __FUNCTION__));
> +        for (surface_id = pdev->n_surfaces - 1; surface_id >  0 ; --surface_id) {
> +            surface_info = GetSurfaceInfo(pdev, surface_id);
> +            // TODO: check what this condinitions means.
remove TODO?

> +            if (!surface_info->draw_area.base_mem) {
> +                continue;
> +            }
> +            surf_obj = surface_info->draw_area.surf_obj;
> +            if (!surf_obj) {
> +                continue;
> +            }
> +            area.right = surf_obj->sizlBitmap.cx;
> +            area.bottom = surf_obj->sizlBitmap.cy;
> +            UpdateArea(pdev,&area, surface_id);
> +        }
> +    } else {
> +        async_io(pdev, ASYNCABLE_FLUSH_SURFACES, 0);
> +    }
> +}
> +
> +static BOOL FlushRelease(PDev *pdev)
> +{
> +    // TODO: why didn't we use reset for revision 3?
To make sure we don't do any accounting errors, and to use our
existing release paths (via the release ring). Reset + calling release function
for each object (that today just unallocates it from the mspace iirc) should be
equivalent. Just resetting should be the same today.

> +    if (pdev->pci_revision<  QXL_REVISION_STABLE_V10) {
> +        DWORD length;
> +
> +        DEBUG_PRINT((pdev, 1, "%s: revision too old for QXL_IO_FLUSH_RELEASE", __FUNCTION__));
> +        if (EngDeviceIoControl(pdev->driver, IOCTL_VIDEO_RESET_DEVICE,
> +                               NULL, 0, NULL, 0, &length)) {
> +            DEBUG_PRINT((NULL, 0, "%s: reset failed 0x%lx\n", __FUNCTION__, pdev));
> +            return FALSE;
> +        }
> +    } else {
> +        /* Free release ring contents */
> +        ReleaseCacheDeviceMemoryResources(pdev);
> +        EmptyReleaseRing(pdev);
> +        /* Get the last free list onto the release ring */
> +        sync_io(pdev, pdev->flush_release_port, 0);
Hmm, I see it was sync_io before too, I guess that's ok since the implementation
in qemu doesn't block on the spice worker thread but returns immediately.

> +        DEBUG_PRINT((pdev, 4, "%s after FLUSH_RELEASE\n", __FUNCTION__));
> +        /* And release that. mspace allocators should be clean after. */
> +        EmptyReleaseRing(pdev);
> +    }
> +    return TRUE;
> +}
> +
>  static BOOL AssertModeDisable(PDev *pdev)
>  {
>      DEBUG_PRINT((pdev, 3, "%s entry\n", __FUNCTION__));
>      /* flush command ring and update all surfaces */
> -    async_io(pdev, ASYNCABLE_FLUSH_SURFACES, 0);
> +    FlushSurfaces(pdev);
> +    DebugCountAliveSurfaces(pdev);
> +    /*
> +     * this call is redundant for
> +     * pci_revision <  QXL_REVISION_STABLE_V10, due to the
> +     * IOCTL_VIDEO_RESET_DEVICE in FlushRelease. However,
> +     * MoveAllSurfacesToRam depends on destroy_all_surfaces
> +     * in case of failure.
> +     * TODO: make MoveAllSurfacesToRam send destroy_surface
> +     * commands instead of create_surface commands in case
> +     * of failure
> +     */
>      async_io(pdev, ASYNCABLE_DESTROY_ALL_SURFACES, 0);
>      /* move all surfaces from device to system memory */
>      if (!MoveAllSurfacesToRam(pdev)) {
>          return FALSE;
>      }
> -    /* Free release ring contents */
> -    ReleaseCacheDeviceMemoryResources(pdev);
> -    EmptyReleaseRing(pdev);
> -    /* Get the last free list onto the release ring */
> -    sync_io(pdev, pdev->flush_release_port, 0);
> -    DEBUG_PRINT((pdev, 4, "%s after FLUSH_RELEASE\n", __FUNCTION__));
> -    /* And release that. mspace allocators should be clean after. */
> -    EmptyReleaseRing(pdev);
> +    if (!FlushRelease(pdev)) {
> +        return FALSE;
> +    }
>      RemoveVRamSlot(pdev);
>      DebugCountAliveSurfaces(pdev);
>      DEBUG_PRINT((pdev, 4, "%s: [%d,%d] [%d,%d] [%d,%d] %lx\n", __FUNCTION__,
> @@ -1002,31 +1061,12 @@ static void AssertModeEnable(PDev *pdev)
>      DebugCountAliveSurfaces(pdev);
>  }
>  
> -BOOL DrvAssertModeOld(PDev *pdev, BOOL enable)
> -{
> -    if (enable) {
> -        InitResources(pdev);
> -        EnableQXLPrimarySurface(pdev);
> -        CreateVRamSlot(pdev);
> -    } else {
> -        DisableQXLPrimarySurface(pdev, 0);
> -        RemoveVRamSlot(pdev);
> -    }
> -    DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit %d\n", __FUNCTION__, pdev, enable));
> -    return TRUE;
> -}
> -
>  BOOL DrvAssertMode(DHPDEV in_pdev, BOOL enable)
>  {
>      PDev* pdev = (PDev*)in_pdev;
>      BOOL ret = TRUE;
>  
> -    DEBUG_PRINT((pdev, 1, "%s: 0x%lx %d\n", __FUNCTION__, pdev, enable));
> -    if (pdev->pci_revision < QXL_REVISION_STABLE_V10) {
> -        return DrvAssertModeOld(pdev, enable);
> -    }
> -    /* new implementation that works correctly only with newer devices
> -     * that implement QXL_IO_FLUSH_RELEASE */
> +    DEBUG_PRINT((pdev, 1, "%s: 0x%lx revision %d enable %d\n", __FUNCTION__, pdev, pdev->pci_revision, enable));
>      if (pdev->enabled == enable) {
>          DEBUG_PRINT((pdev, 1, "%s: called twice with same argument (%d)\n", __FUNCTION__,
>              enable));
> diff --git a/display/res.c b/display/res.c
> index 850d35a..1274f43 100644
> --- a/display/res.c
> +++ b/display/res.c
> @@ -676,10 +676,11 @@ void InitResources(PDev *pdev)
>          pdev->Res = global_res[id];
>          DEBUG_PRINT((pdev, 3, "%s: calling InitRes (id == %d)\n", __FUNCTION__, id));
>          InitRes(pdev);
> +         DEBUG_PRINT((pdev, 1, "%s: called InitRes (id == %d)\n", __FUNCTION__, id));
>      } else {
>          pdev->Res = global_res[id];
>      }
> -    DEBUG_PRINT((pdev, 3, "%s: exit\n", __FUNCTION__));
> +    DEBUG_PRINT((pdev, 1, "%s: exit\n", __FUNCTION__));
>      EngReleaseSemaphore(res_sem);
>  }
>  
> diff --git a/display/surface.c b/display/surface.c
> index b40721d..8422cd3 100644
> --- a/display/surface.c
> +++ b/display/surface.c
> @@ -359,6 +359,12 @@ BOOL MoveAllSurfacesToRam(PDev *pdev)
>              DEBUG_PRINT((pdev, 0, "%s: %d: EngModifySurface failed, sending create\n",
>                           __FUNCTION__, surface_id));
>              phys_mem = SurfaceToPhysical(pdev, surface_info->draw_area.base_mem);
> +            /*
> +             * TODO: bug. create_command should be send for all surfaces >= surface_id
> +             *       since they stay in the pci-bar. Alternatively,
> +             *       don't call destroy_all_surfaces, instead send destroy commands
> +             *       for all surfaces with id < surface_id.
> +             */
>              SendSurfaceCreateCommand(pdev, surface_id, surf_obj->sizlBitmap,
>                                       surface_info->bitmap_format, -surf_obj->lDelta, phys_mem,
>                                       /* the surface is still there, tell server not to erase */
> -- 
> 1.7.4.4
>