From ae80152ddad252f33893b92dd69f00cc53c5949f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 5 Mar 2015 21:19:49 +0200 Subject: [PATCH] drm/i915: Rewrite VLV/CHV watermark code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Assuming the PND deadline mechanism works reasonably we should do memory requests as early as possible so that PND has schedule the requests more intelligently. Currently we're still calculating the watermarks as if VLV/CHV are identical to g4x, which isn't the case. The current code also seems to calculate insufficient watermarks and hence we're seeing some underruns, especially on high resolution displays. To fix it just rip out the current code and replace is with something that tries to utilize PND as efficiently as possible. We now calculate the WM watermark to trigger when the FIFO still has 256us worth of data. 256us is the maximum deadline value supoorted by PND, so issuing memory requests earlier would mean we probably couldn't utilize the full FIFO as PND would attempt to return the data at least in at least 256us. We also clamp the watermark to at least 8 cachelines as that's the magic watermark that enabling trickle feed would also impose. I'm assuming it matches some burst size. In theory we could just enable trickle feed and ignore the WM values, except trickle feed doesn't work with max fifo mode anyway, so we'd still need to calculate the SR watermarks. It seems cleaner to just disable trickle feed and calculate all watermarks the same way. Also trickle feed wouldn't account for the 256us max deadline value, thoguh that may be a moot point in non-max fifo mode sicne the FIFOs are fairly small. On VLV max fifo mode can be used with either primary or sprite planes. So the code now also checks all the planes (apart from the cursor) when calculating the SR plane watermark. We don't have to worry about the WM1 watermarks since we're using the PND deadline scheme which means the hardware ignores WM1 values. v2: Use plane->state->fb instead of plane->fb Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 ++ drivers/gpu/drm/i915/i915_reg.h | 4 +- drivers/gpu/drm/i915/intel_pm.c | 300 +++++++++++++++++--------------- 3 files changed, 173 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5462cbf2fd56..c2a1a450b21b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1408,6 +1408,17 @@ struct ilk_wm_values { }; struct vlv_wm_values { + struct { + uint16_t primary; + uint16_t sprite[2]; + uint8_t cursor; + } pipe[3]; + + struct { + uint16_t plane; + uint8_t cursor; + } sr; + struct { uint8_t cursor; uint8_t sprite[2]; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5f9a91f8fbc7..ff60166875f3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4172,7 +4172,7 @@ enum skl_disp_power_wells { /* vlv/chv high order bits */ #define DSPHOWM (VLV_DISPLAY_BASE + 0x70064) #define DSPFW_SR_HI_SHIFT 24 -#define DSPFW_SR_HI_MASK (1<<24) +#define DSPFW_SR_HI_MASK (3<<24) /* 2 bits for chv, 1 for vlv */ #define DSPFW_SPRITEF_HI_SHIFT 23 #define DSPFW_SPRITEF_HI_MASK (1<<23) #define DSPFW_SPRITEE_HI_SHIFT 22 @@ -4193,7 +4193,7 @@ enum skl_disp_power_wells { #define DSPFW_PLANEA_HI_MASK (1<<0) #define DSPHOWM1 (VLV_DISPLAY_BASE + 0x70068) #define DSPFW_SR_WM1_HI_SHIFT 24 -#define DSPFW_SR_WM1_HI_MASK (1<<24) +#define DSPFW_SR_WM1_HI_MASK (3<<24) /* 2 bits for chv, 1 for vlv */ #define DSPFW_SPRITEF_WM1_HI_SHIFT 23 #define DSPFW_SPRITEF_WM1_HI_MASK (1<<23) #define DSPFW_SPRITEE_WM1_HI_SHIFT 22 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a06a2c73b514..d277d93cffbf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -806,6 +806,55 @@ static void vlv_write_wm_values(struct intel_crtc *crtc, (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); + I915_WRITE(DSPFW1, + ((wm->sr.plane << DSPFW_SR_SHIFT) & DSPFW_SR_MASK) | + ((wm->pipe[PIPE_B].cursor << DSPFW_CURSORB_SHIFT) & DSPFW_CURSORB_MASK) | + ((wm->pipe[PIPE_B].primary << DSPFW_PLANEB_SHIFT) & DSPFW_PLANEB_MASK_VLV) | + ((wm->pipe[PIPE_A].primary << DSPFW_PLANEA_SHIFT) & DSPFW_PLANEA_MASK_VLV)); + I915_WRITE(DSPFW2, + ((wm->pipe[PIPE_A].sprite[1] << DSPFW_SPRITEB_SHIFT) & DSPFW_SPRITEB_MASK_VLV) | + ((wm->pipe[PIPE_A].cursor << DSPFW_CURSORA_SHIFT) & DSPFW_CURSORA_MASK) | + ((wm->pipe[PIPE_A].sprite[0] << DSPFW_SPRITEA_SHIFT) & DSPFW_SPRITEA_MASK_VLV)); + I915_WRITE(DSPFW3, + ((wm->sr.cursor << DSPFW_CURSOR_SR_SHIFT) & DSPFW_CURSOR_SR_MASK)); + + if (IS_CHERRYVIEW(dev_priv)) { + I915_WRITE(DSPFW7_CHV, + ((wm->pipe[PIPE_B].sprite[1] << DSPFW_SPRITED_SHIFT) & DSPFW_SPRITED_MASK) | + ((wm->pipe[PIPE_B].sprite[0] << DSPFW_SPRITEC_SHIFT) & DSPFW_SPRITEC_MASK)); + I915_WRITE(DSPFW8_CHV, + ((wm->pipe[PIPE_C].sprite[1] << DSPFW_SPRITEF_SHIFT) & DSPFW_SPRITEF_MASK) | + ((wm->pipe[PIPE_C].sprite[0] << DSPFW_SPRITEE_SHIFT) & DSPFW_SPRITEE_MASK)); + I915_WRITE(DSPFW9_CHV, + ((wm->pipe[PIPE_C].primary << DSPFW_PLANEC_SHIFT) & DSPFW_PLANEC_MASK) | + ((wm->pipe[PIPE_C].cursor << DSPFW_CURSORC_SHIFT) & DSPFW_CURSORC_MASK)); + I915_WRITE(DSPHOWM, + (((wm->sr.plane >> 9) << DSPFW_SR_HI_SHIFT) & DSPFW_SR_HI_MASK) | + (((wm->pipe[PIPE_C].sprite[1] >> 8) << DSPFW_SPRITEF_HI_SHIFT) & DSPFW_SPRITEF_HI_MASK) | + (((wm->pipe[PIPE_C].sprite[0] >> 8) << DSPFW_SPRITEE_HI_SHIFT) & DSPFW_SPRITEE_HI_MASK) | + (((wm->pipe[PIPE_C].primary >> 8) << DSPFW_PLANEC_HI_SHIFT) & DSPFW_PLANEC_HI_MASK) | + (((wm->pipe[PIPE_B].sprite[1] >> 8) << DSPFW_SPRITED_HI_SHIFT) & DSPFW_SPRITED_HI_MASK) | + (((wm->pipe[PIPE_B].sprite[0] >> 8) << DSPFW_SPRITEC_HI_SHIFT) & DSPFW_SPRITEC_HI_MASK) | + (((wm->pipe[PIPE_B].primary >> 8) << DSPFW_PLANEB_HI_SHIFT) & DSPFW_PLANEB_HI_MASK) | + (((wm->pipe[PIPE_A].sprite[1] >> 8) << DSPFW_SPRITEB_HI_SHIFT) & DSPFW_SPRITEB_HI_MASK) | + (((wm->pipe[PIPE_A].sprite[0] >> 8) << DSPFW_SPRITEA_HI_SHIFT) & DSPFW_SPRITEA_HI_MASK) | + (((wm->pipe[PIPE_A].primary >> 8) << DSPFW_PLANEA_HI_SHIFT) & DSPFW_PLANEA_HI_MASK)); + } else { + I915_WRITE(DSPFW7, + ((wm->pipe[PIPE_B].sprite[1] << DSPFW_SPRITED_SHIFT) & DSPFW_SPRITED_MASK) | + ((wm->pipe[PIPE_B].sprite[0] << DSPFW_SPRITEC_SHIFT) & DSPFW_SPRITEC_MASK)); + I915_WRITE(DSPHOWM, + (((wm->sr.plane >> 9) << DSPFW_SR_HI_SHIFT) & DSPFW_SR_HI_MASK) | + (((wm->pipe[PIPE_B].sprite[1] >> 8) << DSPFW_SPRITED_HI_SHIFT) & DSPFW_SPRITED_HI_MASK) | + (((wm->pipe[PIPE_B].sprite[0] >> 8) << DSPFW_SPRITEC_HI_SHIFT) & DSPFW_SPRITEC_HI_MASK) | + (((wm->pipe[PIPE_B].primary >> 8) << DSPFW_PLANEB_HI_SHIFT) & DSPFW_PLANEB_HI_MASK) | + (((wm->pipe[PIPE_A].sprite[1] >> 8) << DSPFW_SPRITEB_HI_SHIFT) & DSPFW_SPRITEB_HI_MASK) | + (((wm->pipe[PIPE_A].sprite[0] >> 8) << DSPFW_SPRITEA_HI_SHIFT) & DSPFW_SPRITEA_HI_MASK) | + (((wm->pipe[PIPE_A].primary >> 8) << DSPFW_PLANEA_HI_SHIFT) & DSPFW_PLANEA_HI_MASK)); + } + + POSTING_READ(DSPFW1); + dev_priv->wm.vlv = *wm; } @@ -850,169 +899,113 @@ static uint8_t vlv_compute_drain_latency(struct drm_crtc *crtc, DDL_PRECISION_HIGH : DDL_PRECISION_LOW); } -/* - * Update drain latency registers of memory arbiter - * - * Valleyview SoC has a new memory arbiter and needs drain latency registers - * to be programmed. Each plane has a drain latency multiplier and a drain - * latency value. - */ - -static void vlv_update_drain_latency(struct drm_crtc *crtc) +static int vlv_compute_wm(struct intel_crtc *crtc, + struct intel_plane *plane, + int fifo_size) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; - struct vlv_wm_values wm = dev_priv->wm.vlv; + int clock, entries, pixel_size; - wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary); - wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor); + /* + * FIXME the plane might have an fb + * but be invisible (eg. due to clipping) + */ + if (!crtc->active || !plane->base.state->fb) + return 0; - vlv_write_wm_values(intel_crtc, &wm); -} + pixel_size = drm_format_plane_cpp(plane->base.state->fb->pixel_format, 0); + clock = crtc->config->base.adjusted_mode.crtc_clock; -#define single_plane_enabled(mask) is_power_of_2(mask) + entries = DIV_ROUND_UP(clock, 1000) * pixel_size; -static void valleyview_update_wm(struct drm_crtc *crtc) + /* + * Set up the watermark such that we don't start issuing memory + * requests until we are within PND's max deadline value (256us). + * Idea being to be idle as long as possible while still taking + * advatange of PND's deadline scheduling. The limit of 8 + * cachelines (used when the FIFO will anyway drain in less time + * than 256us) should match what we would be done if trickle + * feed were enabled. + */ + return fifo_size - clamp(DIV_ROUND_UP(256 * entries, 64), 0, fifo_size - 8); +} + +static bool vlv_compute_sr_wm(struct drm_device *dev, + struct vlv_wm_values *wm) { - struct drm_device *dev = crtc->dev; - static const int sr_latency_ns = 12000; - struct drm_i915_private *dev_priv = dev->dev_private; - int planea_wm, planeb_wm, cursora_wm, cursorb_wm; - int plane_sr, cursor_sr; - int ignore_plane_sr, ignore_cursor_sr; - unsigned int enabled = 0; - bool cxsr_enabled; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_crtc *crtc; + enum pipe pipe = INVALID_PIPE; + int num_planes = 0; + int fifo_size = 0; + struct intel_plane *plane; - vlv_update_drain_latency(crtc); + wm->sr.cursor = wm->sr.plane = 0; - if (g4x_compute_wm0(dev, PIPE_A, - &valleyview_wm_info, pessimal_latency_ns, - &valleyview_cursor_wm_info, pessimal_latency_ns, - &planea_wm, &cursora_wm)) - enabled |= 1 << PIPE_A; + crtc = single_enabled_crtc(dev); + /* maxfifo not supported on pipe C */ + if (crtc && to_intel_crtc(crtc)->pipe != PIPE_C) { + pipe = to_intel_crtc(crtc)->pipe; + num_planes = !!wm->pipe[pipe].primary + + !!wm->pipe[pipe].sprite[0] + + !!wm->pipe[pipe].sprite[1]; + fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + } - if (g4x_compute_wm0(dev, PIPE_B, - &valleyview_wm_info, pessimal_latency_ns, - &valleyview_cursor_wm_info, pessimal_latency_ns, - &planeb_wm, &cursorb_wm)) - enabled |= 1 << PIPE_B; + if (fifo_size == 0 || num_planes > 1) + return false; - if (single_plane_enabled(enabled) && - g4x_compute_srwm(dev, ffs(enabled) - 1, - sr_latency_ns, - &valleyview_wm_info, - &valleyview_cursor_wm_info, - &plane_sr, &ignore_cursor_sr) && - g4x_compute_srwm(dev, ffs(enabled) - 1, - 2*sr_latency_ns, - &valleyview_wm_info, - &valleyview_cursor_wm_info, - &ignore_plane_sr, &cursor_sr)) { - cxsr_enabled = true; - } else { - cxsr_enabled = false; - intel_set_memory_cxsr(dev_priv, false); - plane_sr = cursor_sr = 0; - } + wm->sr.cursor = vlv_compute_wm(to_intel_crtc(crtc), + to_intel_plane(crtc->cursor), 0x3f); - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " - "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", - planea_wm, cursora_wm, - planeb_wm, cursorb_wm, - plane_sr, cursor_sr); + list_for_each_entry(plane, &dev->mode_config.plane_list, base.head) { + if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + continue; - I915_WRITE(DSPFW1, - (plane_sr << DSPFW_SR_SHIFT) | - (cursorb_wm << DSPFW_CURSORB_SHIFT) | - (planeb_wm << DSPFW_PLANEB_SHIFT) | - (planea_wm << DSPFW_PLANEA_SHIFT)); - I915_WRITE(DSPFW2, - (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | - (cursora_wm << DSPFW_CURSORA_SHIFT)); - I915_WRITE(DSPFW3, - (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | - (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); + if (plane->pipe != pipe) + continue; - if (cxsr_enabled) - intel_set_memory_cxsr(dev_priv, true); + wm->sr.plane = vlv_compute_wm(to_intel_crtc(crtc), + plane, fifo_size); + if (wm->sr.plane != 0) + break; + } + + return true; } -static void cherryview_update_wm(struct drm_crtc *crtc) +static void valleyview_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - static const int sr_latency_ns = 12000; struct drm_i915_private *dev_priv = dev->dev_private; - int planea_wm, planeb_wm, planec_wm; - int cursora_wm, cursorb_wm, cursorc_wm; - int plane_sr, cursor_sr; - int ignore_plane_sr, ignore_cursor_sr; - unsigned int enabled = 0; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; bool cxsr_enabled; + struct vlv_wm_values wm = dev_priv->wm.vlv; - vlv_update_drain_latency(crtc); + wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary); + wm.pipe[pipe].primary = vlv_compute_wm(intel_crtc, + to_intel_plane(crtc->primary), + vlv_get_fifo_size(dev, pipe, 0)); - if (g4x_compute_wm0(dev, PIPE_A, - &valleyview_wm_info, pessimal_latency_ns, - &valleyview_cursor_wm_info, pessimal_latency_ns, - &planea_wm, &cursora_wm)) - enabled |= 1 << PIPE_A; + wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor); + wm.pipe[pipe].cursor = vlv_compute_wm(intel_crtc, + to_intel_plane(crtc->cursor), + 0x3f); - if (g4x_compute_wm0(dev, PIPE_B, - &valleyview_wm_info, pessimal_latency_ns, - &valleyview_cursor_wm_info, pessimal_latency_ns, - &planeb_wm, &cursorb_wm)) - enabled |= 1 << PIPE_B; + cxsr_enabled = vlv_compute_sr_wm(dev, &wm); - if (g4x_compute_wm0(dev, PIPE_C, - &valleyview_wm_info, pessimal_latency_ns, - &valleyview_cursor_wm_info, pessimal_latency_ns, - &planec_wm, &cursorc_wm)) - enabled |= 1 << PIPE_C; + if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0) + return; - if (single_plane_enabled(enabled) && - g4x_compute_srwm(dev, ffs(enabled) - 1, - sr_latency_ns, - &valleyview_wm_info, - &valleyview_cursor_wm_info, - &plane_sr, &ignore_cursor_sr) && - g4x_compute_srwm(dev, ffs(enabled) - 1, - 2*sr_latency_ns, - &valleyview_wm_info, - &valleyview_cursor_wm_info, - &ignore_plane_sr, &cursor_sr)) { - cxsr_enabled = true; - } else { - cxsr_enabled = false; - intel_set_memory_cxsr(dev_priv, false); - plane_sr = cursor_sr = 0; - } + DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " + "SR: plane=%d, cursor=%d\n", pipe_name(pipe), + wm.pipe[pipe].primary, wm.pipe[pipe].cursor, + wm.sr.plane, wm.sr.cursor); - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " - "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, " - "SR: plane=%d, cursor=%d\n", - planea_wm, cursora_wm, - planeb_wm, cursorb_wm, - planec_wm, cursorc_wm, - plane_sr, cursor_sr); + if (!cxsr_enabled) + intel_set_memory_cxsr(dev_priv, false); - I915_WRITE(DSPFW1, - (plane_sr << DSPFW_SR_SHIFT) | - (cursorb_wm << DSPFW_CURSORB_SHIFT) | - (planeb_wm << DSPFW_PLANEB_SHIFT) | - (planea_wm << DSPFW_PLANEA_SHIFT)); - I915_WRITE(DSPFW2, - (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | - (cursora_wm << DSPFW_CURSORA_SHIFT)); - I915_WRITE(DSPFW3, - (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | - (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); - I915_WRITE(DSPFW9_CHV, - (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK | - DSPFW_CURSORC_MASK)) | - (planec_wm << DSPFW_PLANEC_SHIFT) | - (cursorc_wm << DSPFW_CURSORC_SHIFT)); + vlv_write_wm_values(intel_crtc, &wm); if (cxsr_enabled) intel_set_memory_cxsr(dev_priv, true); @@ -1030,17 +1023,44 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; int sprite = to_intel_plane(plane)->plane; + bool cxsr_enabled; struct vlv_wm_values wm = dev_priv->wm.vlv; - if (enabled) + if (enabled) { wm.ddl[pipe].sprite[sprite] = vlv_compute_drain_latency(crtc, plane); - else + + wm.pipe[pipe].sprite[sprite] = + vlv_compute_wm(intel_crtc, + to_intel_plane(plane), + vlv_get_fifo_size(dev, pipe, sprite+1)); + } else { wm.ddl[pipe].sprite[sprite] = 0; + wm.pipe[pipe].sprite[sprite] = 0; + } + + cxsr_enabled = vlv_compute_sr_wm(dev, &wm); + + if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0) + return; + + DRM_DEBUG_KMS("Setting FIFO watermarks - %c: sprite %c=%d, " + "SR: plane=%d, cursor=%d\n", pipe_name(pipe), + sprite_name(pipe, sprite), + wm.pipe[pipe].sprite[sprite], + wm.sr.plane, wm.sr.cursor); + + if (!cxsr_enabled) + intel_set_memory_cxsr(dev_priv, false); vlv_write_wm_values(intel_crtc, &wm); + + if (cxsr_enabled) + intel_set_memory_cxsr(dev_priv, true); } +#define single_plane_enabled(mask) is_power_of_2(mask) + static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -6430,7 +6450,7 @@ void intel_init_pm(struct drm_device *dev) else if (INTEL_INFO(dev)->gen == 8) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { - dev_priv->display.update_wm = cherryview_update_wm; + dev_priv->display.update_wm = valleyview_update_wm; dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; dev_priv->display.init_clock_gating = cherryview_init_clock_gating; -- 2.39.5