[v3,4/4] drm/i915: Make sure we have enough memory bandwidth on ICL

Submitted by Ville Syrjälä on March 27, 2019, 8:22 p.m.

Details

Message ID 20190327202258.21966-1-ville.syrjala@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 3 ) in Intel GFX - Try Bot

Not browsing as part of any series.

Commit Message

Ville Syrjälä March 27, 2019, 8:22 p.m.
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

ICL has so many planes that it can easily exceed the maximum
effective memory bandwidth of the system. We must therefore check
that we don't exceed that limit.

The algorithm is very magic number heavy and lacks sufficient
explanation for now. We also have no sane way to query the
memory clock and timings, so we must rely on a combination of
raw readout from the memory controller and hardcoded assumptions.
The memory controller values obviously change as the system
jumps between the different SAGV points, so we try to stabilize
it first by disabling SAGV for the duration of the readout.

The utilized bandwidth is tracked via a device wide atomic
private object. That is actually not robust because we can't
afford to enforce strict global ordering between the pipes.
Thus I think I'll need to change this to simply chop up the
available bandwidth between all the active pipes. Each pipe
can then do whatever it wants as long as it doesn't exceed
its budget. That scheme will also require that we assume that
any number of planes could be active at any time.

TODO: make it robust and deal with all the open questions

v2: Sleep longer after disabling SAGV
v3: Poll for the dclk to get raised (seen it take 250ms!)
    If the system has 2133MT/s memory then we pointlessly
    wait one full second :(

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile             |   1 +
 drivers/gpu/drm/i915/i915_drv.c           | 338 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h           |  10 +
 drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
 drivers/gpu/drm/i915/intel_bw.c           | 190 ++++++++++++
 drivers/gpu/drm/i915/intel_display.c      |  39 ++-
 drivers/gpu/drm/i915/intel_drv.h          |  32 ++
 7 files changed, 629 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/intel_bw.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 60de05f3fa60..1e1318e9e72b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -127,6 +127,7 @@  i915-y += intel_audio.o \
 	  intel_atomic.o \
 	  intel_atomic_plane.o \
 	  intel_bios.o \
+	  intel_bw.o \
 	  intel_cdclk.o \
 	  intel_color.o \
 	  intel_combo_phy.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2ac7d793a314..64668f127f23 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1399,6 +1399,340 @@  bxt_get_dram_info(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+#define SA_PERF_STATUS_0_0_0_MCHBAR_PC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5918)
+#define  SKL_QCLK_RATIO_MASK (0x7f << 0)
+#define  SKL_QCLK_RATIO_SHIT 0
+#define  SKL_QCLK_REFERENCE (1 << 7)
+#define  CNL_QCLK_RATIO_MASK (0x7f << 2)
+#define  CNL_QCLK_RATIO_SHIT 2
+#define  CNL_QCLK_REFERENCE (1 << 9)
+#define  ICL_QCLK_RATIO_MASK (0xff << 2)
+#define  ICL_QCLK_RATIO_SHIT 2
+#define  ICL_QCLK_REFERENCE (1 << 10)
+
+#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
+#define MCHBAR_CH1_CR_TC_PRE_0_0_0_MCHBAR _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4400)
+#define  SKL_DRAM_T_WRPRE_MASK (0x7f << 24)
+#define  SKL_DRAM_T_WRPRE_SHIFT 24
+#define  SKL_DRAM_T_RDPRE_MASK (0xf << 16)
+#define  SKL_DRAM_T_RDPRE_SHIFT 16
+#define  SKL_DRAM_T_RAS_MASK (0x7f << 8)
+#define  SKL_DRAM_T_RAS_SHIFT 8
+#define  SKL_DRAM_T_RPAB_EXT_MASK (0x3 << 6)
+#define  SKL_DRAM_T_RPAB_EXT_SHIFT 6
+#define  SKL_DRAM_T_RP_MASK (0x3f << 0)
+#define  SKL_DRAM_T_RP_SHIFT 0
+#define  CNL_DRAM_T_WRPRE_MASK (0xff << 24)
+#define  CNL_DRAM_T_WRPRE_SHIFT 24
+#define  CNL_DRAM_T_PPD_MASK (0x7 << 21)
+#define  CNL_DRAM_T_PPD_SHIFT 21
+#define  CNL_DRAM_T_RDPRE_MASK (0x1f << 16)
+#define  CNL_DRAM_T_RDPRE_SHIFT 16
+#define  CNL_DRAM_T_RAS_MASK (0x7f << 9)
+#define  CNL_DRAM_T_RAS_SHIFT 9
+#define  CNL_DRAM_T_RPAB_EXT_MASK (0x7 << 6)
+#define  CNL_DRAM_T_RPAB_EXT_SHIFT 6
+#define  CNL_DRAM_T_RP_MASK (0x3f << 0)
+#define  CNL_DRAM_T_RP_SHIFT 0
+
+struct intel_dram_timings {
+	u8 t_rp, t_rdpre, t_ras, t_bl;
+};
+
+static int icl_get_dclk(struct drm_i915_private *dev_priv)
+{
+	int ratio, ref;
+	u32 val;
+
+	val = I915_READ(SA_PERF_STATUS_0_0_0_MCHBAR_PC);
+
+	DRM_DEBUG_KMS("SA_PERF = 0x%x\n", val);
+	DRM_DEBUG_KMS("BIOS_DATA = 0x%x\n",
+		      I915_READ(SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU));
+
+	ratio = (val & ICL_QCLK_RATIO_MASK) >> ICL_QCLK_RATIO_SHIT;
+
+	if (val & ICL_QCLK_REFERENCE)
+		ref = 6; /* 6 * 16.666 MHz = 100 MHz */
+	else
+		ref = 8; /* 8 * 16.666 MHz = 133 MHz */
+
+	return ratio * ref;
+}
+
+#if 0
+static void skl_get_dram_ch_timings(struct intel_dram_timings *t,
+				    int channel, enum intel_dram_type type,
+				    u32 val)
+{
+	t->t_rp = (val & SKL_DRAM_T_RP_MASK) >> SKL_DRAM_T_RP_SHIFT;
+	t->t_rdpre = (val & SKL_DRAM_T_RDPRE_MASK) >> SKL_DRAM_T_RDPRE_SHIFT;
+	t->t_ras = (val & SKL_DRAM_T_RAS_MASK) >> SKL_DRAM_T_RAS_SHIFT;
+	t->t_bl = type == INTEL_DRAM_DDR4 ? 4 : 8;
+
+	DRM_DEBUG_KMS("CH%d tRP=%d tRDPRE=%d tRAS=%d tBL=%d\n",
+		      channel, t->t_rp, t->t_rdpre, t->t_ras, t->t_bl);
+}
+
+static void skl_get_dram_timings(struct drm_i915_private *dev_priv,
+				 const struct dram_info *dram,
+				 struct intel_dram_timings *t)
+{
+	if (dram->channels & BIT(0)) {
+		u32 val = I915_READ(MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
+
+		skl_get_dram_ch_timings(t, 0, dram->type, val);
+	} else if (dram->channels & BIT(1)) {
+		u32 val = I915_READ(MCHBAR_CH1_CR_TC_PRE_0_0_0_MCHBAR);
+
+		skl_get_dram_ch_timings(t, 1, dram->type, val);
+	}
+}
+#endif
+
+static void cnl_get_dram_ch_timings(struct intel_dram_timings *t,
+				    int channel, enum intel_dram_type type,
+				    u32 val)
+{
+	t->t_rp = (val & CNL_DRAM_T_RP_MASK) >> CNL_DRAM_T_RP_SHIFT;
+	t->t_rdpre = (val & CNL_DRAM_T_RDPRE_MASK) >> CNL_DRAM_T_RDPRE_SHIFT;
+	t->t_ras = (val & CNL_DRAM_T_RAS_MASK) >> CNL_DRAM_T_RAS_SHIFT;
+	t->t_bl = type == INTEL_DRAM_DDR4 ? 4 : 8;
+
+	DRM_DEBUG_KMS("CH%d tRP=%d tRDPRE=%d tRAS=%d tBL=%d\n",
+		      channel, t->t_rp, t->t_rdpre, t->t_ras, t->t_bl);
+}
+
+static void cnl_get_dram_timings(struct drm_i915_private *dev_priv,
+				 const struct dram_info *dram,
+				 struct intel_dram_timings *t)
+{
+	u32 val;
+
+	if (dram->channels & BIT(0)) {
+		val = I915_READ(MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
+		cnl_get_dram_ch_timings(t, 0, dram->type, val);
+	} else if (dram->channels & BIT(1)) {
+		val = I915_READ(MCHBAR_CH1_CR_TC_PRE_0_0_0_MCHBAR);
+		cnl_get_dram_ch_timings(t, 1, dram->type, val);
+	}
+}
+
+struct intel_sagv_point {
+	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
+};
+
+struct intel_sagv_info {
+	struct intel_sagv_point points[3];
+	int num_points;
+};
+
+static void icl_get_sagv_points(struct drm_i915_private *dev_priv,
+				struct intel_sagv_info *si,
+				const struct intel_dram_timings *t)
+{
+	int dclk, i;
+
+	dclk = icl_get_dclk(dev_priv);
+
+	si->num_points = 3;
+
+	/*
+	 * ICL Hardcoded
+	 * Name  Description         MC clock(MHz)     DDR data rate(MT / s)  Gear
+	 * Low   Min voltage point   1066              2133                   2
+	 * Med   Max DDR rate point  Max DDR freq / 2  Max DDR freq           2
+	 * High  Min latency point   2667              Same as MC clock       1
+	 */
+	si->points[0].dclk = min(64, dclk);
+	si->points[1].dclk = dclk;
+	si->points[2].dclk = min(80, dclk);
+
+	for (i = 0; i < si->num_points; i++) {
+		struct intel_sagv_point *sp = &si->points[i];
+
+		/*
+		 * We assume these scale linearly.
+		 * Seems to match observed behaviour.
+		 */
+		sp->t_rp = DIV_ROUND_UP(t->t_rp * sp->dclk, dclk);
+		sp->t_rdpre = DIV_ROUND_UP(t->t_rdpre * sp->dclk, dclk);
+		sp->t_ras = DIV_ROUND_UP(t->t_ras * sp->dclk, dclk);
+
+		sp->t_rcd = sp->t_rp;
+		sp->t_rc = sp->t_rp + sp->t_ras;
+
+		DRM_DEBUG_KMS("SAGV %d DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
+			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
+			      sp->t_rcd, sp->t_rc);
+	}
+}
+
+static int icl_calc_bw(int dclk, int num, int den)
+{
+	/* multiples of 2 x 16.666MHz (100/6) */
+	return DIV_ROUND_CLOSEST(num * dclk * 2 * 100, den * 6);
+}
+
+static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
+{
+	u16 dclk = 0;
+	int i;
+
+	for (i = 0; i < si->num_points; i++)
+		dclk = max(dclk, si->points[i].dclk);
+
+	return dclk;
+}
+
+struct intel_sa_info {
+	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+};
+
+static const struct intel_sa_info icl_sa_info = {
+	.deburst = 8,
+	.mpagesize = 16,
+	.deprogbwlimit = 25, /* GB/s */
+	.displayrtids = 128,
+};
+
+static void icl_get_bw_info(struct drm_i915_private *dev_priv)
+{
+	const struct dram_info *dram = &dev_priv->dram_info;
+	struct intel_sagv_info si = {};
+	struct intel_dram_timings t = {};
+	const struct intel_sa_info *sa = &icl_sa_info;
+	bool is_y_tile = true; /* assume y tile may be used */
+	int num_channels = hweight8(dram->channels);
+	int deinterleave;
+#if 0
+	int clpchpblock;
+	int pagelimit;
+#endif
+	int ipqdepth, ipqdepthpch;
+	int dclk_max;
+	int maxdebw;
+	int i;
+
+	/*
+	 * Try to muzzle SAGV to prevent it from
+	 * messing up the memory controller readout.
+	 */
+	intel_disable_sagv(dev_priv);
+
+	/*
+	 * FIXME Pcode takes an inordinate amount of time
+	 * (~250 ms observed) to crank up the memory clock
+	 * after SAGV was disabled. What is going on?
+	 */
+	wait_for(icl_get_dclk(dev_priv) != 64, 1000);
+
+	cnl_get_dram_timings(dev_priv, dram, &t);
+
+	icl_get_sagv_points(dev_priv, &si, &t);
+
+	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
+	dclk_max = icl_sagv_max_dclk(&si);
+
+	ipqdepthpch = 16;
+
+	maxdebw = min(sa->deprogbwlimit * 1000,
+		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
+#if 0
+	clpchpblock = deinterleave * 8 / num_channels;
+	pagelimit = sa->mpagesize * deinterleave * 2 / num_channels;
+#endif
+	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
+
+	DRM_DEBUG_KMS("maxdebw = %d\n", maxdebw);
+	DRM_DEBUG_KMS("ipqdepth = %d\n", ipqdepth);
+	DRM_DEBUG_KMS("deinterleave = %d\n", deinterleave);
+	DRM_DEBUG_KMS("dclk_max = %d\n", dclk_max);
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int clpchgroup;
+		int j;
+
+		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
+		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
+
+		DRM_DEBUG_KMS("clpchgroup = %d\n", clpchgroup);
+		DRM_DEBUG_KMS("num_planes = %d\n", bi->num_planes);
+
+		for (j = 0; j < si.num_points; j++) {
+			const struct intel_sagv_point *sp = &si.points[j];
+			int ct, bw;
+
+			/*
+			 * Max row cycle time
+			 *
+			 * FIXME what is the logic behind the
+			 * assumed burst length?
+			 */
+			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
+				   (clpchgroup - 1) * t.t_bl + sp->t_rdpre);
+			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
+
+			DRM_DEBUG_KMS("ct = %d\n", ct);
+			DRM_DEBUG_KMS("bw = %d\n", bw);
+
+			bi->deratedbw[j] = min(maxdebw,
+					       bw * 9 / 10); /* 90% */
+		}
+
+		if (bi->num_planes == 1)
+			break;
+	}
+}
+
+static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
+			       int num_planes, int sagv)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[i];
+
+		if (num_planes >= bi->num_planes)
+			return bi->deratedbw[sagv];
+	}
+
+	return 0;
+}
+
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes)
+{
+	if (IS_ICELAKE(dev_priv))
+		/*
+		 * FIXME with SAGV disabled maybe we can assume
+		 * point 1 will always be used? Seems to match
+		 * the behaviour observed in the wild.
+		 */
+		return min3(icl_max_bw(dev_priv, num_planes, 0),
+			    icl_max_bw(dev_priv, num_planes, 1),
+			    icl_max_bw(dev_priv, num_planes, 2));
+	else
+		return UINT_MAX;
+}
+
+static void icl_dump_max_bw(struct drm_i915_private *dev_priv)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int j;
+
+		for (j = 0; j < ARRAY_SIZE(bi->deratedbw); j++) {
+			DRM_DEBUG_KMS("BW%d SAGV%d: num_planes=%d deratedbw=%d\n",
+				      i, j, bi->num_planes, bi->deratedbw[j]);
+		}
+	}
+}
+
 static void
 intel_get_dram_info(struct drm_i915_private *dev_priv)
 {
@@ -1577,6 +1911,10 @@  static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	intel_get_dram_info(dev_priv);
 
+	if (INTEL_GEN(dev_priv) >= 11) {
+		icl_get_bw_info(dev_priv);
+		icl_dump_max_bw(dev_priv);
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 387e5dcfe3e8..d87c9e82d107 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -54,6 +54,7 @@ 
 #include <drm/drm_cache.h>
 #include <drm/drm_util.h>
 #include <drm/drm_dsc.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_connector.h>
 #include <drm/i915_mei_hdcp_interface.h>
 
@@ -1848,6 +1849,13 @@  struct drm_i915_private {
 		} type;
 	} dram_info;
 
+	struct intel_bw_info {
+		int num_planes;
+		int deratedbw[3];
+	} max_bw[6];
+
+	struct drm_private_obj bw_obj;
+
 	struct i915_runtime_pm runtime_pm;
 
 	struct {
@@ -2663,6 +2671,8 @@  extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes);
 
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 9d32a6fcf840..de6b23ee6306 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -111,6 +111,22 @@  intel_plane_destroy_state(struct drm_plane *plane,
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int cpp = 0;
+	int i;
+
+	if (!plane_state->base.visible)
+		return 0;
+
+	for (i = 0; i < fb->format->num_planes; i++)
+		cpp += fb->format->cpp[i];
+
+	return cpp * crtc_state->pixel_rate;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -122,6 +138,7 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	new_crtc_state->active_planes &= ~BIT(plane->id);
 	new_crtc_state->nv12_planes &= ~BIT(plane->id);
 	new_crtc_state->c8_planes &= ~BIT(plane->id);
+	new_crtc_state->data_rate[plane->id] = 0;
 	new_plane_state->base.visible = false;
 
 	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
@@ -146,6 +163,9 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	if (new_plane_state->base.visible || old_plane_state->base.visible)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
+	new_crtc_state->data_rate[plane->id] =
+		intel_plane_data_rate(new_crtc_state, new_plane_state);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &new_crtc_state->base,
 					       old_plane_state,
diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
new file mode 100644
index 000000000000..bd722fe5fccb
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bw.c
@@ -0,0 +1,190 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_atomic_state_helper.h>
+
+#include "intel_drv.h"
+
+static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * We assume cursors are small enough
+	 * to not not cause bandwidth problems.
+	 */
+	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
+}
+
+static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	unsigned int data_rate = 0;
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		/*
+		 * We assume cursors are small enough
+		 * to not not cause bandwidth problems.
+		 */
+		if (plane_id == PLANE_CURSOR)
+			continue;
+
+		data_rate += crtc_state->data_rate[plane_id];
+	}
+
+	return data_rate;
+}
+
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	bw_state->data_rate[crtc->pipe] =
+		intel_bw_crtc_data_rate(crtc_state);
+	bw_state->num_active_planes[crtc->pipe] =
+		intel_bw_crtc_num_active_planes(crtc_state);
+
+	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+		      pipe_name(crtc->pipe),
+		      bw_state->data_rate[crtc->pipe],
+		      bw_state->num_active_planes[crtc->pipe]);
+}
+
+static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
+					       const struct intel_bw_state *bw_state)
+{
+	unsigned int num_active_planes = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe) {
+		num_active_planes += bw_state->num_active_planes[pipe];
+
+		DRM_DEBUG_KMS("pipe %c num active planes %u\n",
+			      pipe_name(pipe),
+			      bw_state->num_active_planes[pipe]);
+	}
+
+	return num_active_planes;
+}
+
+static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
+				       const struct intel_bw_state *bw_state)
+{
+	unsigned int data_rate = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe) {
+		data_rate += bw_state->data_rate[pipe];
+
+		DRM_DEBUG_KMS("pipe %c data rate %u\n",
+			      pipe_name(pipe),
+			      bw_state->data_rate[pipe]);
+	}
+
+	return data_rate;
+}
+
+int intel_bw_atomic_check(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_bw_state *bw_state = NULL;
+	unsigned int data_rate, max_data_rate;
+	unsigned int num_active_planes;
+	struct intel_crtc *crtc;
+	int i;
+
+	/* FIXME earlier gens need some checks too */
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		unsigned int old_data_rate =
+			intel_bw_crtc_data_rate(old_crtc_state);
+		unsigned int new_data_rate =
+			intel_bw_crtc_data_rate(new_crtc_state);
+		unsigned int old_active_planes =
+			intel_bw_crtc_num_active_planes(old_crtc_state);
+		unsigned int new_active_planes =
+			intel_bw_crtc_num_active_planes(new_crtc_state);
+
+		/*
+		 * Avoid locking the bw state when
+		 * nothing significant has changed.
+		 */
+		if (old_data_rate == new_data_rate &&
+		    old_active_planes == new_active_planes)
+			continue;
+
+		bw_state  = intel_atomic_get_bw_state(state);
+		if (IS_ERR(bw_state))
+			return PTR_ERR(bw_state);
+
+		bw_state->data_rate[crtc->pipe] = new_data_rate;
+		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+
+		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+			      pipe_name(crtc->pipe),
+			      bw_state->data_rate[crtc->pipe],
+			      bw_state->num_active_planes[crtc->pipe]);
+	}
+
+	if (!bw_state)
+		return 0;
+
+	data_rate = intel_bw_data_rate(dev_priv, bw_state);
+	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
+
+	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
+
+	data_rate = DIV_ROUND_UP(data_rate, 1000);
+
+	if (data_rate > max_data_rate) {
+		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
+			      data_rate, max_data_rate, num_active_planes);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
+{
+	struct intel_bw_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void intel_bw_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(state);
+}
+
+static const struct drm_private_state_funcs intel_bw_funcs = {
+	.atomic_duplicate_state = intel_bw_duplicate_state,
+	.atomic_destroy_state = intel_bw_destroy_state,
+};
+
+int intel_bw_init(struct drm_i915_private *dev_priv)
+{
+	struct intel_bw_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
+				    &state->base, &intel_bw_funcs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 8576a7f799f2..3e70009b0510 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2828,6 +2828,7 @@  static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 
 	intel_set_plane_visible(crtc_state, plane_state, false);
 	fixup_active_planes(crtc_state);
+	crtc_state->data_rate[plane->id] = 0;
 
 	if (plane->id == PLANE_PRIMARY)
 		intel_pre_disable_primary_noatomic(&crtc->base);
@@ -6502,6 +6503,8 @@  static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_bw_state *bw_state =
+		to_intel_bw_state(dev_priv->bw_obj.state);
 	enum intel_display_power_domain domain;
 	struct intel_plane *plane;
 	u64 domains;
@@ -6564,6 +6567,9 @@  static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
 	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
 	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
+
+	bw_state->data_rate[intel_crtc->pipe] = 0;
+	bw_state->num_active_planes[intel_crtc->pipe] = 0;
 }
 
 /*
@@ -11067,6 +11073,7 @@  int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	if (!is_crtc_enabled) {
 		plane_state->visible = visible = false;
 		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
+		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
 	}
 
 	if (!was_visible && !visible)
@@ -13157,7 +13164,15 @@  static int intel_atomic_check(struct drm_device *dev,
 		return ret;
 
 	intel_fbc_choose_crtc(dev_priv, intel_state);
-	return calc_watermark_data(intel_state);
+	ret = calc_watermark_data(intel_state);
+	if (ret)
+		return ret;
+
+	ret = intel_bw_atomic_check(intel_state);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
@@ -15539,6 +15554,10 @@  int intel_modeset_init(struct drm_device *dev)
 
 	drm_mode_config_init(dev);
 
+	ret = intel_bw_init(dev_priv);
+	if (ret)
+		return ret;
+
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 
@@ -16161,8 +16180,11 @@  static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	drm_connector_list_iter_end(&conn_iter);
 
 	for_each_intel_crtc(dev, crtc) {
+		struct intel_bw_state *bw_state =
+			to_intel_bw_state(dev_priv->bw_obj.state);
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane;
 		int min_cdclk = 0;
 
 		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
@@ -16201,6 +16223,21 @@  static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		dev_priv->min_voltage_level[crtc->pipe] =
 			crtc_state->min_voltage_level;
 
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
+
+			/*
+			 * FIXME don't have the fb yet, so can't
+			 * use intel_plane_data_rate() :(
+			 */
+			if (plane_state->base.visible)
+				crtc_state->data_rate[plane->id] =
+					4 * crtc_state->pixel_rate;
+		}
+
+		intel_bw_crtc_update(bw_state, crtc_state);
+
 		intel_pipe_config_sanity_check(dev_priv, crtc_state);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f8c7b291fdc3..e33fa580a093 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1018,6 +1018,8 @@  struct intel_crtc_state {
 
 	struct intel_crtc_wm_state wm;
 
+	u32 data_rate[I915_MAX_PLANES];
+
 	/* Gamma mode programmed on the pipe */
 	u32 gamma_mode;
 
@@ -1181,6 +1183,7 @@  struct cxsr_latency {
 #define to_intel_plane(x) container_of(x, struct intel_plane, base)
 #define to_intel_plane_state(x) container_of(x, struct intel_plane_state, base)
 #define intel_fb_obj(x) ((x) ? to_intel_bo((x)->obj[0]) : NULL)
+#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
 
 struct intel_hdmi {
 	i915_reg_t hdmi_reg;
@@ -2503,11 +2506,34 @@  intel_atomic_get_crtc_state(struct drm_atomic_state *state,
 	return to_intel_crtc_state(crtc_state);
 }
 
+struct intel_bw_state {
+	struct drm_private_state base;
+
+	unsigned int data_rate[I915_MAX_PIPES];
+	u8 num_active_planes[I915_MAX_PIPES];
+};
+
+static inline struct intel_bw_state *
+intel_atomic_get_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_private_state *bw_state;
+
+	bw_state = drm_atomic_get_private_obj_state(&state->base,
+						    &dev_priv->bw_obj);
+	if (IS_ERR(bw_state))
+		return ERR_CAST(bw_state);
+
+	return to_intel_bw_state(bw_state);
+}
+
 int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 			       struct intel_crtc *intel_crtc,
 			       struct intel_crtc_state *crtc_state);
 
 /* intel_atomic_plane.c */
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state);
 void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
@@ -2531,6 +2557,12 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 					const struct intel_plane_state *old_plane_state,
 					struct intel_plane_state *intel_state);
 
+/* intel_bw.c */
+int intel_bw_init(struct drm_i915_private *dev_priv);
+int intel_bw_atomic_check(struct intel_atomic_state *state);
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state);
+
 /* intel_color.c */
 void intel_color_init(struct intel_crtc *crtc);
 int intel_color_check(struct intel_crtc_state *crtc_state);