]> git.karo-electronics.de Git - karo-tx-linux.git/blob
c01d08d8
[karo-tx-linux.git] /
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include "i915_drv.h"
30 #include "intel_drv.h"
31 #include "../../../platform/x86/intel_ips.h"
32 #include <linux/module.h>
33 #include <drm/i915_powerwell.h>
34 #include <linux/pm_runtime.h>
35
36 /**
37  * RC6 is a special power stage which allows the GPU to enter an very
38  * low-voltage mode when idle, using down to 0V while at this stage.  This
39  * stage is entered automatically when the GPU is idle when RC6 support is
40  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
41  *
42  * There are different RC6 modes available in Intel GPU, which differentiate
43  * among each other with the latency required to enter and leave RC6 and
44  * voltage consumed by the GPU in different states.
45  *
46  * The combination of the following flags define which states GPU is allowed
47  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
48  * RC6pp is deepest RC6. Their support by hardware varies according to the
49  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
50  * which brings the most power savings; deeper states save more power, but
51  * require higher latency to switch to and wake up.
52  */
53 #define INTEL_RC6_ENABLE                        (1<<0)
54 #define INTEL_RC6p_ENABLE                       (1<<1)
55 #define INTEL_RC6pp_ENABLE                      (1<<2)
56
57 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
58  * framebuffer contents in-memory, aiming at reducing the required bandwidth
59  * during in-memory transfers and, therefore, reduce the power packet.
60  *
61  * The benefits of FBC are mostly visible with solid backgrounds and
62  * variation-less patterns.
63  *
64  * FBC-related functionality can be enabled by the means of the
65  * i915.i915_enable_fbc parameter
66  */
67
68 static void i8xx_disable_fbc(struct drm_device *dev)
69 {
70         struct drm_i915_private *dev_priv = dev->dev_private;
71         u32 fbc_ctl;
72
73         /* Disable compression */
74         fbc_ctl = I915_READ(FBC_CONTROL);
75         if ((fbc_ctl & FBC_CTL_EN) == 0)
76                 return;
77
78         fbc_ctl &= ~FBC_CTL_EN;
79         I915_WRITE(FBC_CONTROL, fbc_ctl);
80
81         /* Wait for compressing bit to clear */
82         if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
83                 DRM_DEBUG_KMS("FBC idle timed out\n");
84                 return;
85         }
86
87         DRM_DEBUG_KMS("disabled FBC\n");
88 }
89
90 static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
91 {
92         struct drm_device *dev = crtc->dev;
93         struct drm_i915_private *dev_priv = dev->dev_private;
94         struct drm_framebuffer *fb = crtc->fb;
95         struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
96         struct drm_i915_gem_object *obj = intel_fb->obj;
97         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
98         int cfb_pitch;
99         int plane, i;
100         u32 fbc_ctl, fbc_ctl2;
101
102         cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
103         if (fb->pitches[0] < cfb_pitch)
104                 cfb_pitch = fb->pitches[0];
105
106         /* FBC_CTL wants 64B units */
107         cfb_pitch = (cfb_pitch / 64) - 1;
108         plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
109
110         /* Clear old tags */
111         for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
112                 I915_WRITE(FBC_TAG + (i * 4), 0);
113
114         /* Set it up... */
115         fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
116         fbc_ctl2 |= plane;
117         I915_WRITE(FBC_CONTROL2, fbc_ctl2);
118         I915_WRITE(FBC_FENCE_OFF, crtc->y);
119
120         /* enable it... */
121         fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
122         if (IS_I945GM(dev))
123                 fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
124         fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
125         fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
126         fbc_ctl |= obj->fence_reg;
127         I915_WRITE(FBC_CONTROL, fbc_ctl);
128
129         DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c, ",
130                       cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
131 }
132
133 static bool i8xx_fbc_enabled(struct drm_device *dev)
134 {
135         struct drm_i915_private *dev_priv = dev->dev_private;
136
137         return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
138 }
139
140 static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
141 {
142         struct drm_device *dev = crtc->dev;
143         struct drm_i915_private *dev_priv = dev->dev_private;
144         struct drm_framebuffer *fb = crtc->fb;
145         struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
146         struct drm_i915_gem_object *obj = intel_fb->obj;
147         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
148         int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
149         unsigned long stall_watermark = 200;
150         u32 dpfc_ctl;
151
152         dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
153         dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
154         I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
155
156         I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
157                    (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
158                    (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
159         I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
160
161         /* enable it... */
162         I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
163
164         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
165 }
166
167 static void g4x_disable_fbc(struct drm_device *dev)
168 {
169         struct drm_i915_private *dev_priv = dev->dev_private;
170         u32 dpfc_ctl;
171
172         /* Disable compression */
173         dpfc_ctl = I915_READ(DPFC_CONTROL);
174         if (dpfc_ctl & DPFC_CTL_EN) {
175                 dpfc_ctl &= ~DPFC_CTL_EN;
176                 I915_WRITE(DPFC_CONTROL, dpfc_ctl);
177
178                 DRM_DEBUG_KMS("disabled FBC\n");
179         }
180 }
181
182 static bool g4x_fbc_enabled(struct drm_device *dev)
183 {
184         struct drm_i915_private *dev_priv = dev->dev_private;
185
186         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
187 }
188
189 static void sandybridge_blit_fbc_update(struct drm_device *dev)
190 {
191         struct drm_i915_private *dev_priv = dev->dev_private;
192         u32 blt_ecoskpd;
193
194         /* Make sure blitter notifies FBC of writes */
195
196         /* Blitter is part of Media powerwell on VLV. No impact of
197          * his param in other platforms for now */
198         gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA);
199
200         blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
201         blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
202                 GEN6_BLITTER_LOCK_SHIFT;
203         I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
204         blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
205         I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
206         blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
207                          GEN6_BLITTER_LOCK_SHIFT);
208         I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
209         POSTING_READ(GEN6_BLITTER_ECOSKPD);
210
211         gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA);
212 }
213
214 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
215 {
216         struct drm_device *dev = crtc->dev;
217         struct drm_i915_private *dev_priv = dev->dev_private;
218         struct drm_framebuffer *fb = crtc->fb;
219         struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
220         struct drm_i915_gem_object *obj = intel_fb->obj;
221         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
222         int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
223         unsigned long stall_watermark = 200;
224         u32 dpfc_ctl;
225
226         dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
227         dpfc_ctl &= DPFC_RESERVED;
228         dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
229         /* Set persistent mode for front-buffer rendering, ala X. */
230         dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
231         dpfc_ctl |= DPFC_CTL_FENCE_EN;
232         if (IS_GEN5(dev))
233                 dpfc_ctl |= obj->fence_reg;
234         I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
235
236         I915_WRITE(ILK_DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
237                    (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
238                    (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
239         I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
240         I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
241         /* enable it... */
242         I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
243
244         if (IS_GEN6(dev)) {
245                 I915_WRITE(SNB_DPFC_CTL_SA,
246                            SNB_CPU_FENCE_ENABLE | obj->fence_reg);
247                 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
248                 sandybridge_blit_fbc_update(dev);
249         }
250
251         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
252 }
253
254 static void ironlake_disable_fbc(struct drm_device *dev)
255 {
256         struct drm_i915_private *dev_priv = dev->dev_private;
257         u32 dpfc_ctl;
258
259         /* Disable compression */
260         dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
261         if (dpfc_ctl & DPFC_CTL_EN) {
262                 dpfc_ctl &= ~DPFC_CTL_EN;
263                 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
264
265                 DRM_DEBUG_KMS("disabled FBC\n");
266         }
267 }
268
269 static bool ironlake_fbc_enabled(struct drm_device *dev)
270 {
271         struct drm_i915_private *dev_priv = dev->dev_private;
272
273         return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
274 }
275
276 static void gen7_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
277 {
278         struct drm_device *dev = crtc->dev;
279         struct drm_i915_private *dev_priv = dev->dev_private;
280         struct drm_framebuffer *fb = crtc->fb;
281         struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
282         struct drm_i915_gem_object *obj = intel_fb->obj;
283         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
284
285         I915_WRITE(IVB_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj));
286
287         I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
288                    IVB_DPFC_CTL_FENCE_EN |
289                    intel_crtc->plane << IVB_DPFC_CTL_PLANE_SHIFT);
290
291         if (IS_IVYBRIDGE(dev)) {
292                 /* WaFbcAsynchFlipDisableFbcQueue:ivb */
293                 I915_WRITE(ILK_DISPLAY_CHICKEN1, ILK_FBCQ_DIS);
294         } else {
295                 /* WaFbcAsynchFlipDisableFbcQueue:hsw */
296                 I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe),
297                            HSW_BYPASS_FBC_QUEUE);
298         }
299
300         I915_WRITE(SNB_DPFC_CTL_SA,
301                    SNB_CPU_FENCE_ENABLE | obj->fence_reg);
302         I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
303
304         sandybridge_blit_fbc_update(dev);
305
306         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
307 }
308
309 bool intel_fbc_enabled(struct drm_device *dev)
310 {
311         struct drm_i915_private *dev_priv = dev->dev_private;
312
313         if (!dev_priv->display.fbc_enabled)
314                 return false;
315
316         return dev_priv->display.fbc_enabled(dev);
317 }
318
319 static void intel_fbc_work_fn(struct work_struct *__work)
320 {
321         struct intel_fbc_work *work =
322                 container_of(to_delayed_work(__work),
323                              struct intel_fbc_work, work);
324         struct drm_device *dev = work->crtc->dev;
325         struct drm_i915_private *dev_priv = dev->dev_private;
326
327         mutex_lock(&dev->struct_mutex);
328         if (work == dev_priv->fbc.fbc_work) {
329                 /* Double check that we haven't switched fb without cancelling
330                  * the prior work.
331                  */
332                 if (work->crtc->fb == work->fb) {
333                         dev_priv->display.enable_fbc(work->crtc,
334                                                      work->interval);
335
336                         dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane;
337                         dev_priv->fbc.fb_id = work->crtc->fb->base.id;
338                         dev_priv->fbc.y = work->crtc->y;
339                 }
340
341                 dev_priv->fbc.fbc_work = NULL;
342         }
343         mutex_unlock(&dev->struct_mutex);
344
345         kfree(work);
346 }
347
348 static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
349 {
350         if (dev_priv->fbc.fbc_work == NULL)
351                 return;
352
353         DRM_DEBUG_KMS("cancelling pending FBC enable\n");
354
355         /* Synchronisation is provided by struct_mutex and checking of
356          * dev_priv->fbc.fbc_work, so we can perform the cancellation
357          * entirely asynchronously.
358          */
359         if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
360                 /* tasklet was killed before being run, clean up */
361                 kfree(dev_priv->fbc.fbc_work);
362
363         /* Mark the work as no longer wanted so that if it does
364          * wake-up (because the work was already running and waiting
365          * for our mutex), it will discover that is no longer
366          * necessary to run.
367          */
368         dev_priv->fbc.fbc_work = NULL;
369 }
370
371 static void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
372 {
373         struct intel_fbc_work *work;
374         struct drm_device *dev = crtc->dev;
375         struct drm_i915_private *dev_priv = dev->dev_private;
376
377         if (!dev_priv->display.enable_fbc)
378                 return;
379
380         intel_cancel_fbc_work(dev_priv);
381
382         work = kzalloc(sizeof(*work), GFP_KERNEL);
383         if (work == NULL) {
384                 DRM_ERROR("Failed to allocate FBC work structure\n");
385                 dev_priv->display.enable_fbc(crtc, interval);
386                 return;
387         }
388
389         work->crtc = crtc;
390         work->fb = crtc->fb;
391         work->interval = interval;
392         INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
393
394         dev_priv->fbc.fbc_work = work;
395
396         /* Delay the actual enabling to let pageflipping cease and the
397          * display to settle before starting the compression. Note that
398          * this delay also serves a second purpose: it allows for a
399          * vblank to pass after disabling the FBC before we attempt
400          * to modify the control registers.
401          *
402          * A more complicated solution would involve tracking vblanks
403          * following the termination of the page-flipping sequence
404          * and indeed performing the enable as a co-routine and not
405          * waiting synchronously upon the vblank.
406          *
407          * WaFbcWaitForVBlankBeforeEnable:ilk,snb
408          */
409         schedule_delayed_work(&work->work, msecs_to_jiffies(50));
410 }
411
412 void intel_disable_fbc(struct drm_device *dev)
413 {
414         struct drm_i915_private *dev_priv = dev->dev_private;
415
416         intel_cancel_fbc_work(dev_priv);
417
418         if (!dev_priv->display.disable_fbc)
419                 return;
420
421         dev_priv->display.disable_fbc(dev);
422         dev_priv->fbc.plane = -1;
423 }
424
425 static bool set_no_fbc_reason(struct drm_i915_private *dev_priv,
426                               enum no_fbc_reason reason)
427 {
428         if (dev_priv->fbc.no_fbc_reason == reason)
429                 return false;
430
431         dev_priv->fbc.no_fbc_reason = reason;
432         return true;
433 }
434
435 /**
436  * intel_update_fbc - enable/disable FBC as needed
437  * @dev: the drm_device
438  *
439  * Set up the framebuffer compression hardware at mode set time.  We
440  * enable it if possible:
441  *   - plane A only (on pre-965)
442  *   - no pixel mulitply/line duplication
443  *   - no alpha buffer discard
444  *   - no dual wide
445  *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
446  *
447  * We can't assume that any compression will take place (worst case),
448  * so the compressed buffer has to be the same size as the uncompressed
449  * one.  It also must reside (along with the line length buffer) in
450  * stolen memory.
451  *
452  * We need to enable/disable FBC on a global basis.
453  */
454 void intel_update_fbc(struct drm_device *dev)
455 {
456         struct drm_i915_private *dev_priv = dev->dev_private;
457         struct drm_crtc *crtc = NULL, *tmp_crtc;
458         struct intel_crtc *intel_crtc;
459         struct drm_framebuffer *fb;
460         struct intel_framebuffer *intel_fb;
461         struct drm_i915_gem_object *obj;
462         const struct drm_display_mode *adjusted_mode;
463         unsigned int max_width, max_height;
464
465         if (!I915_HAS_FBC(dev)) {
466                 set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED);
467                 return;
468         }
469
470         if (!i915_powersave) {
471                 if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
472                         DRM_DEBUG_KMS("fbc disabled per module param\n");
473                 return;
474         }
475
476         /*
477          * If FBC is already on, we just have to verify that we can
478          * keep it that way...
479          * Need to disable if:
480          *   - more than one pipe is active
481          *   - changing FBC params (stride, fence, mode)
482          *   - new fb is too large to fit in compressed buffer
483          *   - going to an unsupported config (interlace, pixel multiply, etc.)
484          */
485         list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
486                 if (intel_crtc_active(tmp_crtc) &&
487                     to_intel_crtc(tmp_crtc)->primary_enabled) {
488                         if (crtc) {
489                                 if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES))
490                                         DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
491                                 goto out_disable;
492                         }
493                         crtc = tmp_crtc;
494                 }
495         }
496
497         if (!crtc || crtc->fb == NULL) {
498                 if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT))
499                         DRM_DEBUG_KMS("no output, disabling\n");
500                 goto out_disable;
501         }
502
503         intel_crtc = to_intel_crtc(crtc);
504         fb = crtc->fb;
505         intel_fb = to_intel_framebuffer(fb);
506         obj = intel_fb->obj;
507         adjusted_mode = &intel_crtc->config.adjusted_mode;
508
509         if (i915_enable_fbc < 0 &&
510             INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) {
511                 if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
512                         DRM_DEBUG_KMS("disabled per chip default\n");
513                 goto out_disable;
514         }
515         if (!i915_enable_fbc) {
516                 if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
517                         DRM_DEBUG_KMS("fbc disabled per module param\n");
518                 goto out_disable;
519         }
520         if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
521             (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
522                 if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
523                         DRM_DEBUG_KMS("mode incompatible with compression, "
524                                       "disabling\n");
525                 goto out_disable;
526         }
527
528         if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) {
529                 max_width = 4096;
530                 max_height = 2048;
531         } else {
532                 max_width = 2048;
533                 max_height = 1536;
534         }
535         if (intel_crtc->config.pipe_src_w > max_width ||
536             intel_crtc->config.pipe_src_h > max_height) {
537                 if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE))
538                         DRM_DEBUG_KMS("mode too large for compression, disabling\n");
539                 goto out_disable;
540         }
541         if ((INTEL_INFO(dev)->gen < 4 || IS_HASWELL(dev)) &&
542             intel_crtc->plane != PLANE_A) {
543                 if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
544                         DRM_DEBUG_KMS("plane not A, disabling compression\n");
545                 goto out_disable;
546         }
547
548         /* The use of a CPU fence is mandatory in order to detect writes
549          * by the CPU to the scanout and trigger updates to the FBC.
550          */
551         if (obj->tiling_mode != I915_TILING_X ||
552             obj->fence_reg == I915_FENCE_REG_NONE) {
553                 if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED))
554                         DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
555                 goto out_disable;
556         }
557
558         /* If the kernel debugger is active, always disable compression */
559         if (in_dbg_master())
560                 goto out_disable;
561
562         if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
563                 if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL))
564                         DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
565                 goto out_disable;
566         }
567
568         /* If the scanout has not changed, don't modify the FBC settings.
569          * Note that we make the fundamental assumption that the fb->obj
570          * cannot be unpinned (and have its GTT offset and fence revoked)
571          * without first being decoupled from the scanout and FBC disabled.
572          */
573         if (dev_priv->fbc.plane == intel_crtc->plane &&
574             dev_priv->fbc.fb_id == fb->base.id &&
575             dev_priv->fbc.y == crtc->y)
576                 return;
577
578         if (intel_fbc_enabled(dev)) {
579                 /* We update FBC along two paths, after changing fb/crtc
580                  * configuration (modeswitching) and after page-flipping
581                  * finishes. For the latter, we know that not only did
582                  * we disable the FBC at the start of the page-flip
583                  * sequence, but also more than one vblank has passed.
584                  *
585                  * For the former case of modeswitching, it is possible
586                  * to switch between two FBC valid configurations
587                  * instantaneously so we do need to disable the FBC
588                  * before we can modify its control registers. We also
589                  * have to wait for the next vblank for that to take
590                  * effect. However, since we delay enabling FBC we can
591                  * assume that a vblank has passed since disabling and
592                  * that we can safely alter the registers in the deferred
593                  * callback.
594                  *
595                  * In the scenario that we go from a valid to invalid
596                  * and then back to valid FBC configuration we have
597                  * no strict enforcement that a vblank occurred since
598                  * disabling the FBC. However, along all current pipe
599                  * disabling paths we do need to wait for a vblank at
600                  * some point. And we wait before enabling FBC anyway.
601                  */
602                 DRM_DEBUG_KMS("disabling active FBC for update\n");
603                 intel_disable_fbc(dev);
604         }
605
606         intel_enable_fbc(crtc, 500);
607         dev_priv->fbc.no_fbc_reason = FBC_OK;
608         return;
609
610 out_disable:
611         /* Multiple disables should be harmless */
612         if (intel_fbc_enabled(dev)) {
613                 DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
614                 intel_disable_fbc(dev);
615         }
616         i915_gem_stolen_cleanup_compression(dev);
617 }
618
619 static void i915_pineview_get_mem_freq(struct drm_device *dev)
620 {
621         drm_i915_private_t *dev_priv = dev->dev_private;
622         u32 tmp;
623
624         tmp = I915_READ(CLKCFG);
625
626         switch (tmp & CLKCFG_FSB_MASK) {
627         case CLKCFG_FSB_533:
628                 dev_priv->fsb_freq = 533; /* 133*4 */
629                 break;
630         case CLKCFG_FSB_800:
631                 dev_priv->fsb_freq = 800; /* 200*4 */
632                 break;
633         case CLKCFG_FSB_667:
634                 dev_priv->fsb_freq =  667; /* 167*4 */
635                 break;
636         case CLKCFG_FSB_400:
637                 dev_priv->fsb_freq = 400; /* 100*4 */
638                 break;
639         }
640
641         switch (tmp & CLKCFG_MEM_MASK) {
642         case CLKCFG_MEM_533:
643                 dev_priv->mem_freq = 533;
644                 break;
645         case CLKCFG_MEM_667:
646                 dev_priv->mem_freq = 667;
647                 break;
648         case CLKCFG_MEM_800:
649                 dev_priv->mem_freq = 800;
650                 break;
651         }
652
653         /* detect pineview DDR3 setting */
654         tmp = I915_READ(CSHRDDR3CTL);
655         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
656 }
657
658 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
659 {
660         drm_i915_private_t *dev_priv = dev->dev_private;
661         u16 ddrpll, csipll;
662
663         ddrpll = I915_READ16(DDRMPLL1);
664         csipll = I915_READ16(CSIPLL0);
665
666         switch (ddrpll & 0xff) {
667         case 0xc:
668                 dev_priv->mem_freq = 800;
669                 break;
670         case 0x10:
671                 dev_priv->mem_freq = 1066;
672                 break;
673         case 0x14:
674                 dev_priv->mem_freq = 1333;
675                 break;
676         case 0x18:
677                 dev_priv->mem_freq = 1600;
678                 break;
679         default:
680                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
681                                  ddrpll & 0xff);
682                 dev_priv->mem_freq = 0;
683                 break;
684         }
685
686         dev_priv->ips.r_t = dev_priv->mem_freq;
687
688         switch (csipll & 0x3ff) {
689         case 0x00c:
690                 dev_priv->fsb_freq = 3200;
691                 break;
692         case 0x00e:
693                 dev_priv->fsb_freq = 3733;
694                 break;
695         case 0x010:
696                 dev_priv->fsb_freq = 4266;
697                 break;
698         case 0x012:
699                 dev_priv->fsb_freq = 4800;
700                 break;
701         case 0x014:
702                 dev_priv->fsb_freq = 5333;
703                 break;
704         case 0x016:
705                 dev_priv->fsb_freq = 5866;
706                 break;
707         case 0x018:
708                 dev_priv->fsb_freq = 6400;
709                 break;
710         default:
711                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
712                                  csipll & 0x3ff);
713                 dev_priv->fsb_freq = 0;
714                 break;
715         }
716
717         if (dev_priv->fsb_freq == 3200) {
718                 dev_priv->ips.c_m = 0;
719         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
720                 dev_priv->ips.c_m = 1;
721         } else {
722                 dev_priv->ips.c_m = 2;
723         }
724 }
725
726 static const struct cxsr_latency cxsr_latency_table[] = {
727         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
728         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
729         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
730         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
731         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
732
733         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
734         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
735         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
736         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
737         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
738
739         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
740         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
741         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
742         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
743         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
744
745         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
746         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
747         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
748         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
749         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
750
751         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
752         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
753         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
754         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
755         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
756
757         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
758         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
759         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
760         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
761         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
762 };
763
764 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
765                                                          int is_ddr3,
766                                                          int fsb,
767                                                          int mem)
768 {
769         const struct cxsr_latency *latency;
770         int i;
771
772         if (fsb == 0 || mem == 0)
773                 return NULL;
774
775         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
776                 latency = &cxsr_latency_table[i];
777                 if (is_desktop == latency->is_desktop &&
778                     is_ddr3 == latency->is_ddr3 &&
779                     fsb == latency->fsb_freq && mem == latency->mem_freq)
780                         return latency;
781         }
782
783         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
784
785         return NULL;
786 }
787
788 static void pineview_disable_cxsr(struct drm_device *dev)
789 {
790         struct drm_i915_private *dev_priv = dev->dev_private;
791
792         /* deactivate cxsr */
793         I915_WRITE(DSPFW3, I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN);
794 }
795
796 /*
797  * Latency for FIFO fetches is dependent on several factors:
798  *   - memory configuration (speed, channels)
799  *   - chipset
800  *   - current MCH state
801  * It can be fairly high in some situations, so here we assume a fairly
802  * pessimal value.  It's a tradeoff between extra memory fetches (if we
803  * set this value too high, the FIFO will fetch frequently to stay full)
804  * and power consumption (set it too low to save power and we might see
805  * FIFO underruns and display "flicker").
806  *
807  * A value of 5us seems to be a good balance; safe for very low end
808  * platforms but not overly aggressive on lower latency configs.
809  */
810 static const int latency_ns = 5000;
811
812 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
813 {
814         struct drm_i915_private *dev_priv = dev->dev_private;
815         uint32_t dsparb = I915_READ(DSPARB);
816         int size;
817
818         size = dsparb & 0x7f;
819         if (plane)
820                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
821
822         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
823                       plane ? "B" : "A", size);
824
825         return size;
826 }
827
828 static int i85x_get_fifo_size(struct drm_device *dev, int plane)
829 {
830         struct drm_i915_private *dev_priv = dev->dev_private;
831         uint32_t dsparb = I915_READ(DSPARB);
832         int size;
833
834         size = dsparb & 0x1ff;
835         if (plane)
836                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
837         size >>= 1; /* Convert to cachelines */
838
839         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
840                       plane ? "B" : "A", size);
841
842         return size;
843 }
844
845 static int i845_get_fifo_size(struct drm_device *dev, int plane)
846 {
847         struct drm_i915_private *dev_priv = dev->dev_private;
848         uint32_t dsparb = I915_READ(DSPARB);
849         int size;
850
851         size = dsparb & 0x7f;
852         size >>= 2; /* Convert to cachelines */
853
854         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
855                       plane ? "B" : "A",
856                       size);
857
858         return size;
859 }
860
861 static int i830_get_fifo_size(struct drm_device *dev, int plane)
862 {
863         struct drm_i915_private *dev_priv = dev->dev_private;
864         uint32_t dsparb = I915_READ(DSPARB);
865         int size;
866
867         size = dsparb & 0x7f;
868         size >>= 1; /* Convert to cachelines */
869
870         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
871                       plane ? "B" : "A", size);
872
873         return size;
874 }
875
876 /* Pineview has different values for various configs */
877 static const struct intel_watermark_params pineview_display_wm = {
878         PINEVIEW_DISPLAY_FIFO,
879         PINEVIEW_MAX_WM,
880         PINEVIEW_DFT_WM,
881         PINEVIEW_GUARD_WM,
882         PINEVIEW_FIFO_LINE_SIZE
883 };
884 static const struct intel_watermark_params pineview_display_hplloff_wm = {
885         PINEVIEW_DISPLAY_FIFO,
886         PINEVIEW_MAX_WM,
887         PINEVIEW_DFT_HPLLOFF_WM,
888         PINEVIEW_GUARD_WM,
889         PINEVIEW_FIFO_LINE_SIZE
890 };
891 static const struct intel_watermark_params pineview_cursor_wm = {
892         PINEVIEW_CURSOR_FIFO,
893         PINEVIEW_CURSOR_MAX_WM,
894         PINEVIEW_CURSOR_DFT_WM,
895         PINEVIEW_CURSOR_GUARD_WM,
896         PINEVIEW_FIFO_LINE_SIZE,
897 };
898 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
899         PINEVIEW_CURSOR_FIFO,
900         PINEVIEW_CURSOR_MAX_WM,
901         PINEVIEW_CURSOR_DFT_WM,
902         PINEVIEW_CURSOR_GUARD_WM,
903         PINEVIEW_FIFO_LINE_SIZE
904 };
905 static const struct intel_watermark_params g4x_wm_info = {
906         G4X_FIFO_SIZE,
907         G4X_MAX_WM,
908         G4X_MAX_WM,
909         2,
910         G4X_FIFO_LINE_SIZE,
911 };
912 static const struct intel_watermark_params g4x_cursor_wm_info = {
913         I965_CURSOR_FIFO,
914         I965_CURSOR_MAX_WM,
915         I965_CURSOR_DFT_WM,
916         2,
917         G4X_FIFO_LINE_SIZE,
918 };
919 static const struct intel_watermark_params valleyview_wm_info = {
920         VALLEYVIEW_FIFO_SIZE,
921         VALLEYVIEW_MAX_WM,
922         VALLEYVIEW_MAX_WM,
923         2,
924         G4X_FIFO_LINE_SIZE,
925 };
926 static const struct intel_watermark_params valleyview_cursor_wm_info = {
927         I965_CURSOR_FIFO,
928         VALLEYVIEW_CURSOR_MAX_WM,
929         I965_CURSOR_DFT_WM,
930         2,
931         G4X_FIFO_LINE_SIZE,
932 };
933 static const struct intel_watermark_params i965_cursor_wm_info = {
934         I965_CURSOR_FIFO,
935         I965_CURSOR_MAX_WM,
936         I965_CURSOR_DFT_WM,
937         2,
938         I915_FIFO_LINE_SIZE,
939 };
940 static const struct intel_watermark_params i945_wm_info = {
941         I945_FIFO_SIZE,
942         I915_MAX_WM,
943         1,
944         2,
945         I915_FIFO_LINE_SIZE
946 };
947 static const struct intel_watermark_params i915_wm_info = {
948         I915_FIFO_SIZE,
949         I915_MAX_WM,
950         1,
951         2,
952         I915_FIFO_LINE_SIZE
953 };
954 static const struct intel_watermark_params i855_wm_info = {
955         I855GM_FIFO_SIZE,
956         I915_MAX_WM,
957         1,
958         2,
959         I830_FIFO_LINE_SIZE
960 };
961 static const struct intel_watermark_params i830_wm_info = {
962         I830_FIFO_SIZE,
963         I915_MAX_WM,
964         1,
965         2,
966         I830_FIFO_LINE_SIZE
967 };
968
969 static const struct intel_watermark_params ironlake_display_wm_info = {
970         ILK_DISPLAY_FIFO,
971         ILK_DISPLAY_MAXWM,
972         ILK_DISPLAY_DFTWM,
973         2,
974         ILK_FIFO_LINE_SIZE
975 };
976 static const struct intel_watermark_params ironlake_cursor_wm_info = {
977         ILK_CURSOR_FIFO,
978         ILK_CURSOR_MAXWM,
979         ILK_CURSOR_DFTWM,
980         2,
981         ILK_FIFO_LINE_SIZE
982 };
983 static const struct intel_watermark_params ironlake_display_srwm_info = {
984         ILK_DISPLAY_SR_FIFO,
985         ILK_DISPLAY_MAX_SRWM,
986         ILK_DISPLAY_DFT_SRWM,
987         2,
988         ILK_FIFO_LINE_SIZE
989 };
990 static const struct intel_watermark_params ironlake_cursor_srwm_info = {
991         ILK_CURSOR_SR_FIFO,
992         ILK_CURSOR_MAX_SRWM,
993         ILK_CURSOR_DFT_SRWM,
994         2,
995         ILK_FIFO_LINE_SIZE
996 };
997
998 static const struct intel_watermark_params sandybridge_display_wm_info = {
999         SNB_DISPLAY_FIFO,
1000         SNB_DISPLAY_MAXWM,
1001         SNB_DISPLAY_DFTWM,
1002         2,
1003         SNB_FIFO_LINE_SIZE
1004 };
1005 static const struct intel_watermark_params sandybridge_cursor_wm_info = {
1006         SNB_CURSOR_FIFO,
1007         SNB_CURSOR_MAXWM,
1008         SNB_CURSOR_DFTWM,
1009         2,
1010         SNB_FIFO_LINE_SIZE
1011 };
1012 static const struct intel_watermark_params sandybridge_display_srwm_info = {
1013         SNB_DISPLAY_SR_FIFO,
1014         SNB_DISPLAY_MAX_SRWM,
1015         SNB_DISPLAY_DFT_SRWM,
1016         2,
1017         SNB_FIFO_LINE_SIZE
1018 };
1019 static const struct intel_watermark_params sandybridge_cursor_srwm_info = {
1020         SNB_CURSOR_SR_FIFO,
1021         SNB_CURSOR_MAX_SRWM,
1022         SNB_CURSOR_DFT_SRWM,
1023         2,
1024         SNB_FIFO_LINE_SIZE
1025 };
1026
1027
1028 /**
1029  * intel_calculate_wm - calculate watermark level
1030  * @clock_in_khz: pixel clock
1031  * @wm: chip FIFO params
1032  * @pixel_size: display pixel size
1033  * @latency_ns: memory latency for the platform
1034  *
1035  * Calculate the watermark level (the level at which the display plane will
1036  * start fetching from memory again).  Each chip has a different display
1037  * FIFO size and allocation, so the caller needs to figure that out and pass
1038  * in the correct intel_watermark_params structure.
1039  *
1040  * As the pixel clock runs, the FIFO will be drained at a rate that depends
1041  * on the pixel size.  When it reaches the watermark level, it'll start
1042  * fetching FIFO line sized based chunks from memory until the FIFO fills
1043  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
1044  * will occur, and a display engine hang could result.
1045  */
1046 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
1047                                         const struct intel_watermark_params *wm,
1048                                         int fifo_size,
1049                                         int pixel_size,
1050                                         unsigned long latency_ns)
1051 {
1052         long entries_required, wm_size;
1053
1054         /*
1055          * Note: we need to make sure we don't overflow for various clock &
1056          * latency values.
1057          * clocks go from a few thousand to several hundred thousand.
1058          * latency is usually a few thousand
1059          */
1060         entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
1061                 1000;
1062         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
1063
1064         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
1065
1066         wm_size = fifo_size - (entries_required + wm->guard_size);
1067
1068         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
1069
1070         /* Don't promote wm_size to unsigned... */
1071         if (wm_size > (long)wm->max_wm)
1072                 wm_size = wm->max_wm;
1073         if (wm_size <= 0)
1074                 wm_size = wm->default_wm;
1075         return wm_size;
1076 }
1077
1078 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
1079 {
1080         struct drm_crtc *crtc, *enabled = NULL;
1081
1082         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
1083                 if (intel_crtc_active(crtc)) {
1084                         if (enabled)
1085                                 return NULL;
1086                         enabled = crtc;
1087                 }
1088         }
1089
1090         return enabled;
1091 }
1092
1093 static void pineview_update_wm(struct drm_crtc *unused_crtc)
1094 {
1095         struct drm_device *dev = unused_crtc->dev;
1096         struct drm_i915_private *dev_priv = dev->dev_private;
1097         struct drm_crtc *crtc;
1098         const struct cxsr_latency *latency;
1099         u32 reg;
1100         unsigned long wm;
1101
1102         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
1103                                          dev_priv->fsb_freq, dev_priv->mem_freq);
1104         if (!latency) {
1105                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
1106                 pineview_disable_cxsr(dev);
1107                 return;
1108         }
1109
1110         crtc = single_enabled_crtc(dev);
1111         if (crtc) {
1112                 const struct drm_display_mode *adjusted_mode;
1113                 int pixel_size = crtc->fb->bits_per_pixel / 8;
1114                 int clock;
1115
1116                 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1117                 clock = adjusted_mode->crtc_clock;
1118
1119                 /* Display SR */
1120                 wm = intel_calculate_wm(clock, &pineview_display_wm,
1121                                         pineview_display_wm.fifo_size,
1122                                         pixel_size, latency->display_sr);
1123                 reg = I915_READ(DSPFW1);
1124                 reg &= ~DSPFW_SR_MASK;
1125                 reg |= wm << DSPFW_SR_SHIFT;
1126                 I915_WRITE(DSPFW1, reg);
1127                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
1128
1129                 /* cursor SR */
1130                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
1131                                         pineview_display_wm.fifo_size,
1132                                         pixel_size, latency->cursor_sr);
1133                 reg = I915_READ(DSPFW3);
1134                 reg &= ~DSPFW_CURSOR_SR_MASK;
1135                 reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
1136                 I915_WRITE(DSPFW3, reg);
1137
1138                 /* Display HPLL off SR */
1139                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
1140                                         pineview_display_hplloff_wm.fifo_size,
1141                                         pixel_size, latency->display_hpll_disable);
1142                 reg = I915_READ(DSPFW3);
1143                 reg &= ~DSPFW_HPLL_SR_MASK;
1144                 reg |= wm & DSPFW_HPLL_SR_MASK;
1145                 I915_WRITE(DSPFW3, reg);
1146
1147                 /* cursor HPLL off SR */
1148                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
1149                                         pineview_display_hplloff_wm.fifo_size,
1150                                         pixel_size, latency->cursor_hpll_disable);
1151                 reg = I915_READ(DSPFW3);
1152                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
1153                 reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
1154                 I915_WRITE(DSPFW3, reg);
1155                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
1156
1157                 /* activate cxsr */
1158                 I915_WRITE(DSPFW3,
1159                            I915_READ(DSPFW3) | PINEVIEW_SELF_REFRESH_EN);
1160                 DRM_DEBUG_KMS("Self-refresh is enabled\n");
1161         } else {
1162                 pineview_disable_cxsr(dev);
1163                 DRM_DEBUG_KMS("Self-refresh is disabled\n");
1164         }
1165 }
1166
1167 static bool g4x_compute_wm0(struct drm_device *dev,
1168                             int plane,
1169                             const struct intel_watermark_params *display,
1170                             int display_latency_ns,
1171                             const struct intel_watermark_params *cursor,
1172                             int cursor_latency_ns,
1173                             int *plane_wm,
1174                             int *cursor_wm)
1175 {
1176         struct drm_crtc *crtc;
1177         const struct drm_display_mode *adjusted_mode;
1178         int htotal, hdisplay, clock, pixel_size;
1179         int line_time_us, line_count;
1180         int entries, tlb_miss;
1181
1182         crtc = intel_get_crtc_for_plane(dev, plane);
1183         if (!intel_crtc_active(crtc)) {
1184                 *cursor_wm = cursor->guard_size;
1185                 *plane_wm = display->guard_size;
1186                 return false;
1187         }
1188
1189         adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1190         clock = adjusted_mode->crtc_clock;
1191         htotal = adjusted_mode->crtc_htotal;
1192         hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1193         pixel_size = crtc->fb->bits_per_pixel / 8;
1194
1195         /* Use the small buffer method to calculate plane watermark */
1196         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
1197         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
1198         if (tlb_miss > 0)
1199                 entries += tlb_miss;
1200         entries = DIV_ROUND_UP(entries, display->cacheline_size);
1201         *plane_wm = entries + display->guard_size;
1202         if (*plane_wm > (int)display->max_wm)
1203                 *plane_wm = display->max_wm;
1204
1205         /* Use the large buffer method to calculate cursor watermark */
1206         line_time_us = ((htotal * 1000) / clock);
1207         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
1208         entries = line_count * 64 * pixel_size;
1209         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
1210         if (tlb_miss > 0)
1211                 entries += tlb_miss;
1212         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1213         *cursor_wm = entries + cursor->guard_size;
1214         if (*cursor_wm > (int)cursor->max_wm)
1215                 *cursor_wm = (int)cursor->max_wm;
1216
1217         return true;
1218 }
1219
1220 /*
1221  * Check the wm result.
1222  *
1223  * If any calculated watermark values is larger than the maximum value that
1224  * can be programmed into the associated watermark register, that watermark
1225  * must be disabled.
1226  */
1227 static bool g4x_check_srwm(struct drm_device *dev,
1228                            int display_wm, int cursor_wm,
1229                            const struct intel_watermark_params *display,
1230                            const struct intel_watermark_params *cursor)
1231 {
1232         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
1233                       display_wm, cursor_wm);
1234
1235         if (display_wm > display->max_wm) {
1236                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
1237                               display_wm, display->max_wm);
1238                 return false;
1239         }
1240
1241         if (cursor_wm > cursor->max_wm) {
1242                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
1243                               cursor_wm, cursor->max_wm);
1244                 return false;
1245         }
1246
1247         if (!(display_wm || cursor_wm)) {
1248                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
1249                 return false;
1250         }
1251
1252         return true;
1253 }
1254
1255 static bool g4x_compute_srwm(struct drm_device *dev,
1256                              int plane,
1257                              int latency_ns,
1258                              const struct intel_watermark_params *display,
1259                              const struct intel_watermark_params *cursor,
1260                              int *display_wm, int *cursor_wm)
1261 {
1262         struct drm_crtc *crtc;
1263         const struct drm_display_mode *adjusted_mode;
1264         int hdisplay, htotal, pixel_size, clock;
1265         unsigned long line_time_us;
1266         int line_count, line_size;
1267         int small, large;
1268         int entries;
1269
1270         if (!latency_ns) {
1271                 *display_wm = *cursor_wm = 0;
1272                 return false;
1273         }
1274
1275         crtc = intel_get_crtc_for_plane(dev, plane);
1276         adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1277         clock = adjusted_mode->crtc_clock;
1278         htotal = adjusted_mode->crtc_htotal;
1279         hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1280         pixel_size = crtc->fb->bits_per_pixel / 8;
1281
1282         line_time_us = (htotal * 1000) / clock;
1283         line_count = (latency_ns / line_time_us + 1000) / 1000;
1284         line_size = hdisplay * pixel_size;
1285
1286         /* Use the minimum of the small and large buffer method for primary */
1287         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
1288         large = line_count * line_size;
1289
1290         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
1291         *display_wm = entries + display->guard_size;
1292
1293         /* calculate the self-refresh watermark for display cursor */
1294         entries = line_count * pixel_size * 64;
1295         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1296         *cursor_wm = entries + cursor->guard_size;
1297
1298         return g4x_check_srwm(dev,
1299                               *display_wm, *cursor_wm,
1300                               display, cursor);
1301 }
1302
1303 static bool vlv_compute_drain_latency(struct drm_device *dev,
1304                                      int plane,
1305                                      int *plane_prec_mult,
1306                                      int *plane_dl,
1307                                      int *cursor_prec_mult,
1308                                      int *cursor_dl)
1309 {
1310         struct drm_crtc *crtc;
1311         int clock, pixel_size;
1312         int entries;
1313
1314         crtc = intel_get_crtc_for_plane(dev, plane);
1315         if (!intel_crtc_active(crtc))
1316                 return false;
1317
1318         clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
1319         pixel_size = crtc->fb->bits_per_pixel / 8;      /* BPP */
1320
1321         entries = (clock / 1000) * pixel_size;
1322         *plane_prec_mult = (entries > 256) ?
1323                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1324         *plane_dl = (64 * (*plane_prec_mult) * 4) / ((clock / 1000) *
1325                                                      pixel_size);
1326
1327         entries = (clock / 1000) * 4;   /* BPP is always 4 for cursor */
1328         *cursor_prec_mult = (entries > 256) ?
1329                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1330         *cursor_dl = (64 * (*cursor_prec_mult) * 4) / ((clock / 1000) * 4);
1331
1332         return true;
1333 }
1334
1335 /*
1336  * Update drain latency registers of memory arbiter
1337  *
1338  * Valleyview SoC has a new memory arbiter and needs drain latency registers
1339  * to be programmed. Each plane has a drain latency multiplier and a drain
1340  * latency value.
1341  */
1342
1343 static void vlv_update_drain_latency(struct drm_device *dev)
1344 {
1345         struct drm_i915_private *dev_priv = dev->dev_private;
1346         int planea_prec, planea_dl, planeb_prec, planeb_dl;
1347         int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
1348         int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
1349                                                         either 16 or 32 */
1350
1351         /* For plane A, Cursor A */
1352         if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
1353                                       &cursor_prec_mult, &cursora_dl)) {
1354                 cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1355                         DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_16;
1356                 planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1357                         DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_16;
1358
1359                 I915_WRITE(VLV_DDL1, cursora_prec |
1360                                 (cursora_dl << DDL_CURSORA_SHIFT) |
1361                                 planea_prec | planea_dl);
1362         }
1363
1364         /* For plane B, Cursor B */
1365         if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
1366                                       &cursor_prec_mult, &cursorb_dl)) {
1367                 cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1368                         DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_16;
1369                 planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1370                         DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_16;
1371
1372                 I915_WRITE(VLV_DDL2, cursorb_prec |
1373                                 (cursorb_dl << DDL_CURSORB_SHIFT) |
1374                                 planeb_prec | planeb_dl);
1375         }
1376 }
1377
1378 #define single_plane_enabled(mask) is_power_of_2(mask)
1379
1380 static void valleyview_update_wm(struct drm_crtc *crtc)
1381 {
1382         struct drm_device *dev = crtc->dev;
1383         static const int sr_latency_ns = 12000;
1384         struct drm_i915_private *dev_priv = dev->dev_private;
1385         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1386         int plane_sr, cursor_sr;
1387         int ignore_plane_sr, ignore_cursor_sr;
1388         unsigned int enabled = 0;
1389
1390         vlv_update_drain_latency(dev);
1391
1392         if (g4x_compute_wm0(dev, PIPE_A,
1393                             &valleyview_wm_info, latency_ns,
1394                             &valleyview_cursor_wm_info, latency_ns,
1395                             &planea_wm, &cursora_wm))
1396                 enabled |= 1 << PIPE_A;
1397
1398         if (g4x_compute_wm0(dev, PIPE_B,
1399                             &valleyview_wm_info, latency_ns,
1400                             &valleyview_cursor_wm_info, latency_ns,
1401                             &planeb_wm, &cursorb_wm))
1402                 enabled |= 1 << PIPE_B;
1403
1404         if (single_plane_enabled(enabled) &&
1405             g4x_compute_srwm(dev, ffs(enabled) - 1,
1406                              sr_latency_ns,
1407                              &valleyview_wm_info,
1408                              &valleyview_cursor_wm_info,
1409                              &plane_sr, &ignore_cursor_sr) &&
1410             g4x_compute_srwm(dev, ffs(enabled) - 1,
1411                              2*sr_latency_ns,
1412                              &valleyview_wm_info,
1413                              &valleyview_cursor_wm_info,
1414                              &ignore_plane_sr, &cursor_sr)) {
1415                 I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
1416         } else {
1417                 I915_WRITE(FW_BLC_SELF_VLV,
1418                            I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN);
1419                 plane_sr = cursor_sr = 0;
1420         }
1421
1422         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1423                       planea_wm, cursora_wm,
1424                       planeb_wm, cursorb_wm,
1425                       plane_sr, cursor_sr);
1426
1427         I915_WRITE(DSPFW1,
1428                    (plane_sr << DSPFW_SR_SHIFT) |
1429                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1430                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
1431                    planea_wm);
1432         I915_WRITE(DSPFW2,
1433                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1434                    (cursora_wm << DSPFW_CURSORA_SHIFT));
1435         I915_WRITE(DSPFW3,
1436                    (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
1437                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1438 }
1439
1440 static void g4x_update_wm(struct drm_crtc *crtc)
1441 {
1442         struct drm_device *dev = crtc->dev;
1443         static const int sr_latency_ns = 12000;
1444         struct drm_i915_private *dev_priv = dev->dev_private;
1445         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1446         int plane_sr, cursor_sr;
1447         unsigned int enabled = 0;
1448
1449         if (g4x_compute_wm0(dev, PIPE_A,
1450                             &g4x_wm_info, latency_ns,
1451                             &g4x_cursor_wm_info, latency_ns,
1452                             &planea_wm, &cursora_wm))
1453                 enabled |= 1 << PIPE_A;
1454
1455         if (g4x_compute_wm0(dev, PIPE_B,
1456                             &g4x_wm_info, latency_ns,
1457                             &g4x_cursor_wm_info, latency_ns,
1458                             &planeb_wm, &cursorb_wm))
1459                 enabled |= 1 << PIPE_B;
1460
1461         if (single_plane_enabled(enabled) &&
1462             g4x_compute_srwm(dev, ffs(enabled) - 1,
1463                              sr_latency_ns,
1464                              &g4x_wm_info,
1465                              &g4x_cursor_wm_info,
1466                              &plane_sr, &cursor_sr)) {
1467                 I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1468         } else {
1469                 I915_WRITE(FW_BLC_SELF,
1470                            I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN);
1471                 plane_sr = cursor_sr = 0;
1472         }
1473
1474         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1475                       planea_wm, cursora_wm,
1476                       planeb_wm, cursorb_wm,
1477                       plane_sr, cursor_sr);
1478
1479         I915_WRITE(DSPFW1,
1480                    (plane_sr << DSPFW_SR_SHIFT) |
1481                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1482                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
1483                    planea_wm);
1484         I915_WRITE(DSPFW2,
1485                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1486                    (cursora_wm << DSPFW_CURSORA_SHIFT));
1487         /* HPLL off in SR has some issues on G4x... disable it */
1488         I915_WRITE(DSPFW3,
1489                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1490                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1491 }
1492
1493 static void i965_update_wm(struct drm_crtc *unused_crtc)
1494 {
1495         struct drm_device *dev = unused_crtc->dev;
1496         struct drm_i915_private *dev_priv = dev->dev_private;
1497         struct drm_crtc *crtc;
1498         int srwm = 1;
1499         int cursor_sr = 16;
1500
1501         /* Calc sr entries for one plane configs */
1502         crtc = single_enabled_crtc(dev);
1503         if (crtc) {
1504                 /* self-refresh has much higher latency */
1505                 static const int sr_latency_ns = 12000;
1506                 const struct drm_display_mode *adjusted_mode =
1507                         &to_intel_crtc(crtc)->config.adjusted_mode;
1508                 int clock = adjusted_mode->crtc_clock;
1509                 int htotal = adjusted_mode->crtc_htotal;
1510                 int hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1511                 int pixel_size = crtc->fb->bits_per_pixel / 8;
1512                 unsigned long line_time_us;
1513                 int entries;
1514
1515                 line_time_us = ((htotal * 1000) / clock);
1516
1517                 /* Use ns/us then divide to preserve precision */
1518                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1519                         pixel_size * hdisplay;
1520                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1521                 srwm = I965_FIFO_SIZE - entries;
1522                 if (srwm < 0)
1523                         srwm = 1;
1524                 srwm &= 0x1ff;
1525                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1526                               entries, srwm);
1527
1528                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1529                         pixel_size * 64;
1530                 entries = DIV_ROUND_UP(entries,
1531                                           i965_cursor_wm_info.cacheline_size);
1532                 cursor_sr = i965_cursor_wm_info.fifo_size -
1533                         (entries + i965_cursor_wm_info.guard_size);
1534
1535                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1536                         cursor_sr = i965_cursor_wm_info.max_wm;
1537
1538                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1539                               "cursor %d\n", srwm, cursor_sr);
1540
1541                 if (IS_CRESTLINE(dev))
1542                         I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1543         } else {
1544                 /* Turn off self refresh if both pipes are enabled */
1545                 if (IS_CRESTLINE(dev))
1546                         I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
1547                                    & ~FW_BLC_SELF_EN);
1548         }
1549
1550         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1551                       srwm);
1552
1553         /* 965 has limitations... */
1554         I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
1555                    (8 << 16) | (8 << 8) | (8 << 0));
1556         I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
1557         /* update cursor SR watermark */
1558         I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1559 }
1560
1561 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1562 {
1563         struct drm_device *dev = unused_crtc->dev;
1564         struct drm_i915_private *dev_priv = dev->dev_private;
1565         const struct intel_watermark_params *wm_info;
1566         uint32_t fwater_lo;
1567         uint32_t fwater_hi;
1568         int cwm, srwm = 1;
1569         int fifo_size;
1570         int planea_wm, planeb_wm;
1571         struct drm_crtc *crtc, *enabled = NULL;
1572
1573         if (IS_I945GM(dev))
1574                 wm_info = &i945_wm_info;
1575         else if (!IS_GEN2(dev))
1576                 wm_info = &i915_wm_info;
1577         else
1578                 wm_info = &i855_wm_info;
1579
1580         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1581         crtc = intel_get_crtc_for_plane(dev, 0);
1582         if (intel_crtc_active(crtc)) {
1583                 const struct drm_display_mode *adjusted_mode;
1584                 int cpp = crtc->fb->bits_per_pixel / 8;
1585                 if (IS_GEN2(dev))
1586                         cpp = 4;
1587
1588                 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1589                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1590                                                wm_info, fifo_size, cpp,
1591                                                latency_ns);
1592                 enabled = crtc;
1593         } else
1594                 planea_wm = fifo_size - wm_info->guard_size;
1595
1596         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1597         crtc = intel_get_crtc_for_plane(dev, 1);
1598         if (intel_crtc_active(crtc)) {
1599                 const struct drm_display_mode *adjusted_mode;
1600                 int cpp = crtc->fb->bits_per_pixel / 8;
1601                 if (IS_GEN2(dev))
1602                         cpp = 4;
1603
1604                 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1605                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1606                                                wm_info, fifo_size, cpp,
1607                                                latency_ns);
1608                 if (enabled == NULL)
1609                         enabled = crtc;
1610                 else
1611                         enabled = NULL;
1612         } else
1613                 planeb_wm = fifo_size - wm_info->guard_size;
1614
1615         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1616
1617         /*
1618          * Overlay gets an aggressive default since video jitter is bad.
1619          */
1620         cwm = 2;
1621
1622         /* Play safe and disable self-refresh before adjusting watermarks. */
1623         if (IS_I945G(dev) || IS_I945GM(dev))
1624                 I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN_MASK | 0);
1625         else if (IS_I915GM(dev))
1626                 I915_WRITE(INSTPM, I915_READ(INSTPM) & ~INSTPM_SELF_EN);
1627
1628         /* Calc sr entries for one plane configs */
1629         if (HAS_FW_BLC(dev) && enabled) {
1630                 /* self-refresh has much higher latency */
1631                 static const int sr_latency_ns = 6000;
1632                 const struct drm_display_mode *adjusted_mode =
1633                         &to_intel_crtc(enabled)->config.adjusted_mode;
1634                 int clock = adjusted_mode->crtc_clock;
1635                 int htotal = adjusted_mode->crtc_htotal;
1636                 int hdisplay = to_intel_crtc(enabled)->config.pipe_src_w;
1637                 int pixel_size = enabled->fb->bits_per_pixel / 8;
1638                 unsigned long line_time_us;
1639                 int entries;
1640
1641                 line_time_us = (htotal * 1000) / clock;
1642
1643                 /* Use ns/us then divide to preserve precision */
1644                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1645                         pixel_size * hdisplay;
1646                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1647                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1648                 srwm = wm_info->fifo_size - entries;
1649                 if (srwm < 0)
1650                         srwm = 1;
1651
1652                 if (IS_I945G(dev) || IS_I945GM(dev))
1653                         I915_WRITE(FW_BLC_SELF,
1654                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1655                 else if (IS_I915GM(dev))
1656                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1657         }
1658
1659         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1660                       planea_wm, planeb_wm, cwm, srwm);
1661
1662         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1663         fwater_hi = (cwm & 0x1f);
1664
1665         /* Set request length to 8 cachelines per fetch */
1666         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1667         fwater_hi = fwater_hi | (1 << 8);
1668
1669         I915_WRITE(FW_BLC, fwater_lo);
1670         I915_WRITE(FW_BLC2, fwater_hi);
1671
1672         if (HAS_FW_BLC(dev)) {
1673                 if (enabled) {
1674                         if (IS_I945G(dev) || IS_I945GM(dev))
1675                                 I915_WRITE(FW_BLC_SELF,
1676                                            FW_BLC_SELF_EN_MASK | FW_BLC_SELF_EN);
1677                         else if (IS_I915GM(dev))
1678                                 I915_WRITE(INSTPM, I915_READ(INSTPM) | INSTPM_SELF_EN);
1679                         DRM_DEBUG_KMS("memory self refresh enabled\n");
1680                 } else
1681                         DRM_DEBUG_KMS("memory self refresh disabled\n");
1682         }
1683 }
1684
1685 static void i830_update_wm(struct drm_crtc *unused_crtc)
1686 {
1687         struct drm_device *dev = unused_crtc->dev;
1688         struct drm_i915_private *dev_priv = dev->dev_private;
1689         struct drm_crtc *crtc;
1690         const struct drm_display_mode *adjusted_mode;
1691         uint32_t fwater_lo;
1692         int planea_wm;
1693
1694         crtc = single_enabled_crtc(dev);
1695         if (crtc == NULL)
1696                 return;
1697
1698         adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1699         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1700                                        &i830_wm_info,
1701                                        dev_priv->display.get_fifo_size(dev, 0),
1702                                        4, latency_ns);
1703         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1704         fwater_lo |= (3<<8) | planea_wm;
1705
1706         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1707
1708         I915_WRITE(FW_BLC, fwater_lo);
1709 }
1710
1711 /*
1712  * Check the wm result.
1713  *
1714  * If any calculated watermark values is larger than the maximum value that
1715  * can be programmed into the associated watermark register, that watermark
1716  * must be disabled.
1717  */
1718 static bool ironlake_check_srwm(struct drm_device *dev, int level,
1719                                 int fbc_wm, int display_wm, int cursor_wm,
1720                                 const struct intel_watermark_params *display,
1721                                 const struct intel_watermark_params *cursor)
1722 {
1723         struct drm_i915_private *dev_priv = dev->dev_private;
1724
1725         DRM_DEBUG_KMS("watermark %d: display plane %d, fbc lines %d,"
1726                       " cursor %d\n", level, display_wm, fbc_wm, cursor_wm);
1727
1728         if (fbc_wm > SNB_FBC_MAX_SRWM) {
1729                 DRM_DEBUG_KMS("fbc watermark(%d) is too large(%d), disabling wm%d+\n",
1730                               fbc_wm, SNB_FBC_MAX_SRWM, level);
1731
1732                 /* fbc has it's own way to disable FBC WM */
1733                 I915_WRITE(DISP_ARB_CTL,
1734                            I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS);
1735                 return false;
1736         } else if (INTEL_INFO(dev)->gen >= 6) {
1737                 /* enable FBC WM (except on ILK, where it must remain off) */
1738                 I915_WRITE(DISP_ARB_CTL,
1739                            I915_READ(DISP_ARB_CTL) & ~DISP_FBC_WM_DIS);
1740         }
1741
1742         if (display_wm > display->max_wm) {
1743                 DRM_DEBUG_KMS("display watermark(%d) is too large(%d), disabling wm%d+\n",
1744                               display_wm, SNB_DISPLAY_MAX_SRWM, level);
1745                 return false;
1746         }
1747
1748         if (cursor_wm > cursor->max_wm) {
1749                 DRM_DEBUG_KMS("cursor watermark(%d) is too large(%d), disabling wm%d+\n",
1750                               cursor_wm, SNB_CURSOR_MAX_SRWM, level);
1751                 return false;
1752         }
1753
1754         if (!(fbc_wm || display_wm || cursor_wm)) {
1755                 DRM_DEBUG_KMS("latency %d is 0, disabling wm%d+\n", level, level);
1756                 return false;
1757         }
1758
1759         return true;
1760 }
1761
1762 /*
1763  * Compute watermark values of WM[1-3],
1764  */
1765 static bool ironlake_compute_srwm(struct drm_device *dev, int level, int plane,
1766                                   int latency_ns,
1767                                   const struct intel_watermark_params *display,
1768                                   const struct intel_watermark_params *cursor,
1769                                   int *fbc_wm, int *display_wm, int *cursor_wm)
1770 {
1771         struct drm_crtc *crtc;
1772         const struct drm_display_mode *adjusted_mode;
1773         unsigned long line_time_us;
1774         int hdisplay, htotal, pixel_size, clock;
1775         int line_count, line_size;
1776         int small, large;
1777         int entries;
1778
1779         if (!latency_ns) {
1780                 *fbc_wm = *display_wm = *cursor_wm = 0;
1781                 return false;
1782         }
1783
1784         crtc = intel_get_crtc_for_plane(dev, plane);
1785         adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1786         clock = adjusted_mode->crtc_clock;
1787         htotal = adjusted_mode->crtc_htotal;
1788         hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1789         pixel_size = crtc->fb->bits_per_pixel / 8;
1790
1791         line_time_us = (htotal * 1000) / clock;
1792         line_count = (latency_ns / line_time_us + 1000) / 1000;
1793         line_size = hdisplay * pixel_size;
1794
1795         /* Use the minimum of the small and large buffer method for primary */
1796         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
1797         large = line_count * line_size;
1798
1799         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
1800         *display_wm = entries + display->guard_size;
1801
1802         /*
1803          * Spec says:
1804          * FBC WM = ((Final Primary WM * 64) / number of bytes per line) + 2
1805          */
1806         *fbc_wm = DIV_ROUND_UP(*display_wm * 64, line_size) + 2;
1807
1808         /* calculate the self-refresh watermark for display cursor */
1809         entries = line_count * pixel_size * 64;
1810         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1811         *cursor_wm = entries + cursor->guard_size;
1812
1813         return ironlake_check_srwm(dev, level,
1814                                    *fbc_wm, *display_wm, *cursor_wm,
1815                                    display, cursor);
1816 }
1817
1818 static void ironlake_update_wm(struct drm_crtc *crtc)
1819 {
1820         struct drm_device *dev = crtc->dev;
1821         struct drm_i915_private *dev_priv = dev->dev_private;
1822         int fbc_wm, plane_wm, cursor_wm;
1823         unsigned int enabled;
1824
1825         enabled = 0;
1826         if (g4x_compute_wm0(dev, PIPE_A,
1827                             &ironlake_display_wm_info,
1828                             dev_priv->wm.pri_latency[0] * 100,
1829                             &ironlake_cursor_wm_info,
1830                             dev_priv->wm.cur_latency[0] * 100,
1831                             &plane_wm, &cursor_wm)) {
1832                 I915_WRITE(WM0_PIPEA_ILK,
1833                            (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
1834                 DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
1835                               " plane %d, " "cursor: %d\n",
1836                               plane_wm, cursor_wm);
1837                 enabled |= 1 << PIPE_A;
1838         }
1839
1840         if (g4x_compute_wm0(dev, PIPE_B,
1841                             &ironlake_display_wm_info,
1842                             dev_priv->wm.pri_latency[0] * 100,
1843                             &ironlake_cursor_wm_info,
1844                             dev_priv->wm.cur_latency[0] * 100,
1845                             &plane_wm, &cursor_wm)) {
1846                 I915_WRITE(WM0_PIPEB_ILK,
1847                            (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
1848                 DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
1849                               " plane %d, cursor: %d\n",
1850                               plane_wm, cursor_wm);
1851                 enabled |= 1 << PIPE_B;
1852         }
1853
1854         /*
1855          * Calculate and update the self-refresh watermark only when one
1856          * display plane is used.
1857          */
1858         I915_WRITE(WM3_LP_ILK, 0);
1859         I915_WRITE(WM2_LP_ILK, 0);
1860         I915_WRITE(WM1_LP_ILK, 0);
1861
1862         if (!single_plane_enabled(enabled))
1863                 return;
1864         enabled = ffs(enabled) - 1;
1865
1866         /* WM1 */
1867         if (!ironlake_compute_srwm(dev, 1, enabled,
1868                                    dev_priv->wm.pri_latency[1] * 500,
1869                                    &ironlake_display_srwm_info,
1870                                    &ironlake_cursor_srwm_info,
1871                                    &fbc_wm, &plane_wm, &cursor_wm))
1872                 return;
1873
1874         I915_WRITE(WM1_LP_ILK,
1875                    WM1_LP_SR_EN |
1876                    (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
1877                    (fbc_wm << WM1_LP_FBC_SHIFT) |
1878                    (plane_wm << WM1_LP_SR_SHIFT) |
1879                    cursor_wm);
1880
1881         /* WM2 */
1882         if (!ironlake_compute_srwm(dev, 2, enabled,
1883                                    dev_priv->wm.pri_latency[2] * 500,
1884                                    &ironlake_display_srwm_info,
1885                                    &ironlake_cursor_srwm_info,
1886                                    &fbc_wm, &plane_wm, &cursor_wm))
1887                 return;
1888
1889         I915_WRITE(WM2_LP_ILK,
1890                    WM2_LP_EN |
1891                    (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
1892                    (fbc_wm << WM1_LP_FBC_SHIFT) |
1893                    (plane_wm << WM1_LP_SR_SHIFT) |
1894                    cursor_wm);
1895
1896         /*
1897          * WM3 is unsupported on ILK, probably because we don't have latency
1898          * data for that power state
1899          */
1900 }
1901
1902 static void sandybridge_update_wm(struct drm_crtc *crtc)
1903 {
1904         struct drm_device *dev = crtc->dev;
1905         struct drm_i915_private *dev_priv = dev->dev_private;
1906         int latency = dev_priv->wm.pri_latency[0] * 100;        /* In unit 0.1us */
1907         u32 val;
1908         int fbc_wm, plane_wm, cursor_wm;
1909         unsigned int enabled;
1910
1911         enabled = 0;
1912         if (g4x_compute_wm0(dev, PIPE_A,
1913                             &sandybridge_display_wm_info, latency,
1914                             &sandybridge_cursor_wm_info, latency,
1915                             &plane_wm, &cursor_wm)) {
1916                 val = I915_READ(WM0_PIPEA_ILK);
1917                 val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1918                 I915_WRITE(WM0_PIPEA_ILK, val |
1919                            ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1920                 DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
1921                               " plane %d, " "cursor: %d\n",
1922                               plane_wm, cursor_wm);
1923                 enabled |= 1 << PIPE_A;
1924         }
1925
1926         if (g4x_compute_wm0(dev, PIPE_B,
1927                             &sandybridge_display_wm_info, latency,
1928                             &sandybridge_cursor_wm_info, latency,
1929                             &plane_wm, &cursor_wm)) {
1930                 val = I915_READ(WM0_PIPEB_ILK);
1931                 val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1932                 I915_WRITE(WM0_PIPEB_ILK, val |
1933                            ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1934                 DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
1935                               " plane %d, cursor: %d\n",
1936                               plane_wm, cursor_wm);
1937                 enabled |= 1 << PIPE_B;
1938         }
1939
1940         /*
1941          * Calculate and update the self-refresh watermark only when one
1942          * display plane is used.
1943          *
1944          * SNB support 3 levels of watermark.
1945          *
1946          * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
1947          * and disabled in the descending order
1948          *
1949          */
1950         I915_WRITE(WM3_LP_ILK, 0);
1951         I915_WRITE(WM2_LP_ILK, 0);
1952         I915_WRITE(WM1_LP_ILK, 0);
1953
1954         if (!single_plane_enabled(enabled) ||
1955             dev_priv->sprite_scaling_enabled)
1956                 return;
1957         enabled = ffs(enabled) - 1;
1958
1959         /* WM1 */
1960         if (!ironlake_compute_srwm(dev, 1, enabled,
1961                                    dev_priv->wm.pri_latency[1] * 500,
1962                                    &sandybridge_display_srwm_info,
1963                                    &sandybridge_cursor_srwm_info,
1964                                    &fbc_wm, &plane_wm, &cursor_wm))
1965                 return;
1966
1967         I915_WRITE(WM1_LP_ILK,
1968                    WM1_LP_SR_EN |
1969                    (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
1970                    (fbc_wm << WM1_LP_FBC_SHIFT) |
1971                    (plane_wm << WM1_LP_SR_SHIFT) |
1972                    cursor_wm);
1973
1974         /* WM2 */
1975         if (!ironlake_compute_srwm(dev, 2, enabled,
1976                                    dev_priv->wm.pri_latency[2] * 500,
1977                                    &sandybridge_display_srwm_info,
1978                                    &sandybridge_cursor_srwm_info,
1979                                    &fbc_wm, &plane_wm, &cursor_wm))
1980                 return;
1981
1982         I915_WRITE(WM2_LP_ILK,
1983                    WM2_LP_EN |
1984                    (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
1985                    (fbc_wm << WM1_LP_FBC_SHIFT) |
1986                    (plane_wm << WM1_LP_SR_SHIFT) |
1987                    cursor_wm);
1988
1989         /* WM3 */
1990         if (!ironlake_compute_srwm(dev, 3, enabled,
1991                                    dev_priv->wm.pri_latency[3] * 500,
1992                                    &sandybridge_display_srwm_info,
1993                                    &sandybridge_cursor_srwm_info,
1994                                    &fbc_wm, &plane_wm, &cursor_wm))
1995                 return;
1996
1997         I915_WRITE(WM3_LP_ILK,
1998                    WM3_LP_EN |
1999                    (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
2000                    (fbc_wm << WM1_LP_FBC_SHIFT) |
2001                    (plane_wm << WM1_LP_SR_SHIFT) |
2002                    cursor_wm);
2003 }
2004
2005 static void ivybridge_update_wm(struct drm_crtc *crtc)
2006 {
2007         struct drm_device *dev = crtc->dev;
2008         struct drm_i915_private *dev_priv = dev->dev_private;
2009         int latency = dev_priv->wm.pri_latency[0] * 100;        /* In unit 0.1us */
2010         u32 val;
2011         int fbc_wm, plane_wm, cursor_wm;
2012         int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
2013         unsigned int enabled;
2014
2015         enabled = 0;
2016         if (g4x_compute_wm0(dev, PIPE_A,
2017                             &sandybridge_display_wm_info, latency,
2018                             &sandybridge_cursor_wm_info, latency,
2019                             &plane_wm, &cursor_wm)) {
2020                 val = I915_READ(WM0_PIPEA_ILK);
2021                 val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
2022                 I915_WRITE(WM0_PIPEA_ILK, val |
2023                            ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
2024                 DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
2025                               " plane %d, " "cursor: %d\n",
2026                               plane_wm, cursor_wm);
2027                 enabled |= 1 << PIPE_A;
2028         }
2029
2030         if (g4x_compute_wm0(dev, PIPE_B,
2031                             &sandybridge_display_wm_info, latency,
2032                             &sandybridge_cursor_wm_info, latency,
2033                             &plane_wm, &cursor_wm)) {
2034                 val = I915_READ(WM0_PIPEB_ILK);
2035                 val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
2036                 I915_WRITE(WM0_PIPEB_ILK, val |
2037                            ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
2038                 DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
2039                               " plane %d, cursor: %d\n",
2040                               plane_wm, cursor_wm);
2041                 enabled |= 1 << PIPE_B;
2042         }
2043
2044         if (g4x_compute_wm0(dev, PIPE_C,
2045                             &sandybridge_display_wm_info, latency,
2046                             &sandybridge_cursor_wm_info, latency,
2047                             &plane_wm, &cursor_wm)) {
2048                 val = I915_READ(WM0_PIPEC_IVB);
2049                 val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
2050                 I915_WRITE(WM0_PIPEC_IVB, val |
2051                            ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
2052                 DRM_DEBUG_KMS("FIFO watermarks For pipe C -"
2053                               " plane %d, cursor: %d\n",
2054                               plane_wm, cursor_wm);
2055                 enabled |= 1 << PIPE_C;
2056         }
2057
2058         /*
2059          * Calculate and update the self-refresh watermark only when one
2060          * display plane is used.
2061          *
2062          * SNB support 3 levels of watermark.
2063          *
2064          * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
2065          * and disabled in the descending order
2066          *
2067          */
2068         I915_WRITE(WM3_LP_ILK, 0);
2069         I915_WRITE(WM2_LP_ILK, 0);
2070         I915_WRITE(WM1_LP_ILK, 0);
2071
2072         if (!single_plane_enabled(enabled) ||
2073             dev_priv->sprite_scaling_enabled)
2074                 return;
2075         enabled = ffs(enabled) - 1;
2076
2077         /* WM1 */
2078         if (!ironlake_compute_srwm(dev, 1, enabled,
2079                                    dev_priv->wm.pri_latency[1] * 500,
2080                                    &sandybridge_display_srwm_info,
2081                                    &sandybridge_cursor_srwm_info,
2082                                    &fbc_wm, &plane_wm, &cursor_wm))
2083                 return;
2084
2085         I915_WRITE(WM1_LP_ILK,
2086                    WM1_LP_SR_EN |
2087                    (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
2088                    (fbc_wm << WM1_LP_FBC_SHIFT) |
2089                    (plane_wm << WM1_LP_SR_SHIFT) |
2090                    cursor_wm);
2091
2092         /* WM2 */
2093         if (!ironlake_compute_srwm(dev, 2, enabled,
2094                                    dev_priv->wm.pri_latency[2] * 500,
2095                                    &sandybridge_display_srwm_info,
2096                                    &sandybridge_cursor_srwm_info,
2097                                    &fbc_wm, &plane_wm, &cursor_wm))
2098                 return;
2099
2100         I915_WRITE(WM2_LP_ILK,
2101                    WM2_LP_EN |
2102                    (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
2103                    (fbc_wm << WM1_LP_FBC_SHIFT) |
2104                    (plane_wm << WM1_LP_SR_SHIFT) |
2105                    cursor_wm);
2106
2107         /* WM3, note we have to correct the cursor latency */
2108         if (!ironlake_compute_srwm(dev, 3, enabled,
2109                                    dev_priv->wm.pri_latency[3] * 500,
2110                                    &sandybridge_display_srwm_info,
2111                                    &sandybridge_cursor_srwm_info,
2112                                    &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
2113             !ironlake_compute_srwm(dev, 3, enabled,
2114                                    dev_priv->wm.cur_latency[3] * 500,
2115                                    &sandybridge_display_srwm_info,
2116                                    &sandybridge_cursor_srwm_info,
2117                                    &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
2118                 return;
2119
2120         I915_WRITE(WM3_LP_ILK,
2121                    WM3_LP_EN |
2122                    (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
2123                    (fbc_wm << WM1_LP_FBC_SHIFT) |
2124                    (plane_wm << WM1_LP_SR_SHIFT) |
2125                    cursor_wm);
2126 }
2127
2128 static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
2129                                     struct drm_crtc *crtc)
2130 {
2131         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2132         uint32_t pixel_rate;
2133
2134         pixel_rate = intel_crtc->config.adjusted_mode.crtc_clock;
2135
2136         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
2137          * adjust the pixel_rate here. */
2138
2139         if (intel_crtc->config.pch_pfit.enabled) {
2140                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
2141                 uint32_t pfit_size = intel_crtc->config.pch_pfit.size;
2142
2143                 pipe_w = intel_crtc->config.pipe_src_w;
2144                 pipe_h = intel_crtc->config.pipe_src_h;
2145                 pfit_w = (pfit_size >> 16) & 0xFFFF;
2146                 pfit_h = pfit_size & 0xFFFF;
2147                 if (pipe_w < pfit_w)
2148                         pipe_w = pfit_w;
2149                 if (pipe_h < pfit_h)
2150                         pipe_h = pfit_h;
2151
2152                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
2153                                      pfit_w * pfit_h);
2154         }
2155
2156         return pixel_rate;
2157 }
2158
2159 /* latency must be in 0.1us units. */
2160 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
2161                                uint32_t latency)
2162 {
2163         uint64_t ret;
2164
2165         if (WARN(latency == 0, "Latency value missing\n"))
2166                 return UINT_MAX;
2167
2168         ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
2169         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
2170
2171         return ret;
2172 }
2173
2174 /* latency must be in 0.1us units. */
2175 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
2176                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
2177                                uint32_t latency)
2178 {
2179         uint32_t ret;
2180
2181         if (WARN(latency == 0, "Latency value missing\n"))
2182                 return UINT_MAX;
2183
2184         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
2185         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
2186         ret = DIV_ROUND_UP(ret, 64) + 2;
2187         return ret;
2188 }
2189
2190 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2191                            uint8_t bytes_per_pixel)
2192 {
2193         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
2194 }
2195
2196 struct hsw_pipe_wm_parameters {
2197         bool active;
2198         uint32_t pipe_htotal;
2199         uint32_t pixel_rate;
2200         struct intel_plane_wm_parameters pri;
2201         struct intel_plane_wm_parameters spr;
2202         struct intel_plane_wm_parameters cur;
2203 };
2204
2205 struct hsw_wm_maximums {
2206         uint16_t pri;
2207         uint16_t spr;
2208         uint16_t cur;
2209         uint16_t fbc;
2210 };
2211
2212 /* used in computing the new watermarks state */
2213 struct intel_wm_config {
2214         unsigned int num_pipes_active;
2215         bool sprites_enabled;
2216         bool sprites_scaled;
2217 };
2218
2219 /*
2220  * For both WM_PIPE and WM_LP.
2221  * mem_value must be in 0.1us units.
2222  */
2223 static uint32_t ilk_compute_pri_wm(const struct hsw_pipe_wm_parameters *params,
2224                                    uint32_t mem_value,
2225                                    bool is_lp)
2226 {
2227         uint32_t method1, method2;
2228
2229         if (!params->active || !params->pri.enabled)
2230                 return 0;
2231
2232         method1 = ilk_wm_method1(params->pixel_rate,
2233                                  params->pri.bytes_per_pixel,
2234                                  mem_value);
2235
2236         if (!is_lp)
2237                 return method1;
2238
2239         method2 = ilk_wm_method2(params->pixel_rate,
2240                                  params->pipe_htotal,
2241                                  params->pri.horiz_pixels,
2242                                  params->pri.bytes_per_pixel,
2243                                  mem_value);
2244
2245         return min(method1, method2);
2246 }
2247
2248 /*
2249  * For both WM_PIPE and WM_LP.
2250  * mem_value must be in 0.1us units.
2251  */
2252 static uint32_t ilk_compute_spr_wm(const struct hsw_pipe_wm_parameters *params,
2253                                    uint32_t mem_value)
2254 {
2255         uint32_t method1, method2;
2256
2257         if (!params->active || !params->spr.enabled)
2258                 return 0;
2259
2260         method1 = ilk_wm_method1(params->pixel_rate,
2261                                  params->spr.bytes_per_pixel,
2262                                  mem_value);
2263         method2 = ilk_wm_method2(params->pixel_rate,
2264                                  params->pipe_htotal,
2265                                  params->spr.horiz_pixels,
2266                                  params->spr.bytes_per_pixel,
2267                                  mem_value);
2268         return min(method1, method2);
2269 }
2270
2271 /*
2272  * For both WM_PIPE and WM_LP.
2273  * mem_value must be in 0.1us units.
2274  */
2275 static uint32_t ilk_compute_cur_wm(const struct hsw_pipe_wm_parameters *params,
2276                                    uint32_t mem_value)
2277 {
2278         if (!params->active || !params->cur.enabled)
2279                 return 0;
2280
2281         return ilk_wm_method2(params->pixel_rate,
2282                               params->pipe_htotal,
2283                               params->cur.horiz_pixels,
2284                               params->cur.bytes_per_pixel,
2285                               mem_value);
2286 }
2287
2288 /* Only for WM_LP. */
2289 static uint32_t ilk_compute_fbc_wm(const struct hsw_pipe_wm_parameters *params,
2290                                    uint32_t pri_val)
2291 {
2292         if (!params->active || !params->pri.enabled)
2293                 return 0;
2294
2295         return ilk_wm_fbc(pri_val,
2296                           params->pri.horiz_pixels,
2297                           params->pri.bytes_per_pixel);
2298 }
2299
2300 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
2301 {
2302         if (INTEL_INFO(dev)->gen >= 8)
2303                 return 3072;
2304         else if (INTEL_INFO(dev)->gen >= 7)
2305                 return 768;
2306         else
2307                 return 512;
2308 }
2309
2310 /* Calculate the maximum primary/sprite plane watermark */
2311 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2312                                      int level,
2313                                      const struct intel_wm_config *config,
2314                                      enum intel_ddb_partitioning ddb_partitioning,
2315                                      bool is_sprite)
2316 {
2317         unsigned int fifo_size = ilk_display_fifo_size(dev);
2318         unsigned int max;
2319
2320         /* if sprites aren't enabled, sprites get nothing */
2321         if (is_sprite && !config->sprites_enabled)
2322                 return 0;
2323
2324         /* HSW allows LP1+ watermarks even with multiple pipes */
2325         if (level == 0 || config->num_pipes_active > 1) {
2326                 fifo_size /= INTEL_INFO(dev)->num_pipes;
2327
2328                 /*
2329                  * For some reason the non self refresh
2330                  * FIFO size is only half of the self
2331                  * refresh FIFO size on ILK/SNB.
2332                  */
2333                 if (INTEL_INFO(dev)->gen <= 6)
2334                         fifo_size /= 2;
2335         }
2336
2337         if (config->sprites_enabled) {
2338                 /* level 0 is always calculated with 1:1 split */
2339                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2340                         if (is_sprite)
2341                                 fifo_size *= 5;
2342                         fifo_size /= 6;
2343                 } else {
2344                         fifo_size /= 2;
2345                 }
2346         }
2347
2348         /* clamp to max that the registers can hold */
2349         if (INTEL_INFO(dev)->gen >= 8)
2350                 max = level == 0 ? 255 : 2047;
2351         else if (INTEL_INFO(dev)->gen >= 7)
2352                 /* IVB/HSW primary/sprite plane watermarks */
2353                 max = level == 0 ? 127 : 1023;
2354         else if (!is_sprite)
2355                 /* ILK/SNB primary plane watermarks */
2356                 max = level == 0 ? 127 : 511;
2357         else
2358                 /* ILK/SNB sprite plane watermarks */
2359                 max = level == 0 ? 63 : 255;
2360
2361         return min(fifo_size, max);
2362 }
2363
2364 /* Calculate the maximum cursor plane watermark */
2365 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2366                                       int level,
2367                                       const struct intel_wm_config *config)
2368 {
2369         /* HSW LP1+ watermarks w/ multiple pipes */
2370         if (level > 0 && config->num_pipes_active > 1)
2371                 return 64;
2372
2373         /* otherwise just report max that registers can hold */
2374         if (INTEL_INFO(dev)->gen >= 7)
2375                 return level == 0 ? 63 : 255;
2376         else
2377                 return level == 0 ? 31 : 63;
2378 }
2379
2380 /* Calculate the maximum FBC watermark */
2381 static unsigned int ilk_fbc_wm_max(struct drm_device *dev)
2382 {
2383         /* max that registers can hold */
2384         if (INTEL_INFO(dev)->gen >= 8)
2385                 return 31;
2386         else
2387                 return 15;
2388 }
2389
2390 static void ilk_compute_wm_maximums(struct drm_device *dev,
2391                                     int level,
2392                                     const struct intel_wm_config *config,
2393                                     enum intel_ddb_partitioning ddb_partitioning,
2394                                     struct hsw_wm_maximums *max)
2395 {
2396         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2397         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2398         max->cur = ilk_cursor_wm_max(dev, level, config);
2399         max->fbc = ilk_fbc_wm_max(dev);
2400 }
2401
2402 static bool ilk_validate_wm_level(int level,
2403                                   const struct hsw_wm_maximums *max,
2404                                   struct intel_wm_level *result)
2405 {
2406         bool ret;
2407
2408         /* already determined to be invalid? */
2409         if (!result->enable)
2410                 return false;
2411
2412         result->enable = result->pri_val <= max->pri &&
2413                          result->spr_val <= max->spr &&
2414                          result->cur_val <= max->cur;
2415
2416         ret = result->enable;
2417
2418         /*
2419          * HACK until we can pre-compute everything,
2420          * and thus fail gracefully if LP0 watermarks
2421          * are exceeded...
2422          */
2423         if (level == 0 && !result->enable) {
2424                 if (result->pri_val > max->pri)
2425                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2426                                       level, result->pri_val, max->pri);
2427                 if (result->spr_val > max->spr)
2428                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2429                                       level, result->spr_val, max->spr);
2430                 if (result->cur_val > max->cur)
2431                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2432                                       level, result->cur_val, max->cur);
2433
2434                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2435                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2436                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2437                 result->enable = true;
2438         }
2439
2440         return ret;
2441 }
2442
2443 static void ilk_compute_wm_level(struct drm_i915_private *dev_priv,
2444                                  int level,
2445                                  const struct hsw_pipe_wm_parameters *p,
2446                                  struct intel_wm_level *result)
2447 {
2448         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2449         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2450         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2451
2452         /* WM1+ latency values stored in 0.5us units */
2453         if (level > 0) {
2454                 pri_latency *= 5;
2455                 spr_latency *= 5;
2456                 cur_latency *= 5;
2457         }
2458
2459         result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
2460         result->spr_val = ilk_compute_spr_wm(p, spr_latency);
2461         result->cur_val = ilk_compute_cur_wm(p, cur_latency);
2462         result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
2463         result->enable = true;
2464 }
2465
2466 static uint32_t
2467 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2468 {
2469         struct drm_i915_private *dev_priv = dev->dev_private;
2470         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2471         struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
2472         u32 linetime, ips_linetime;
2473
2474         if (!intel_crtc_active(crtc))
2475                 return 0;
2476
2477         /* The WM are computed with base on how long it takes to fill a single
2478          * row at the given clock rate, multiplied by 8.
2479          * */
2480         linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
2481                                      mode->crtc_clock);
2482         ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
2483                                          intel_ddi_get_cdclk_freq(dev_priv));
2484
2485         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2486                PIPE_WM_LINETIME_TIME(linetime);
2487 }
2488
2489 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5])
2490 {
2491         struct drm_i915_private *dev_priv = dev->dev_private;
2492
2493         if (IS_HASWELL(dev)) {
2494                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2495
2496                 wm[0] = (sskpd >> 56) & 0xFF;
2497                 if (wm[0] == 0)
2498                         wm[0] = sskpd & 0xF;
2499                 wm[1] = (sskpd >> 4) & 0xFF;
2500                 wm[2] = (sskpd >> 12) & 0xFF;
2501                 wm[3] = (sskpd >> 20) & 0x1FF;
2502                 wm[4] = (sskpd >> 32) & 0x1FF;
2503         } else if (INTEL_INFO(dev)->gen >= 6) {
2504                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2505
2506                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2507                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2508                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2509                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2510         } else if (INTEL_INFO(dev)->gen >= 5) {
2511                 uint32_t mltr = I915_READ(MLTR_ILK);
2512
2513                 /* ILK primary LP0 latency is 700 ns */
2514                 wm[0] = 7;
2515                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2516                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2517         }
2518 }
2519
2520 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2521 {
2522         /* ILK sprite LP0 latency is 1300 ns */
2523         if (INTEL_INFO(dev)->gen == 5)
2524                 wm[0] = 13;
2525 }
2526
2527 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2528 {
2529         /* ILK cursor LP0 latency is 1300 ns */
2530         if (INTEL_INFO(dev)->gen == 5)
2531                 wm[0] = 13;
2532
2533         /* WaDoubleCursorLP3Latency:ivb */
2534         if (IS_IVYBRIDGE(dev))
2535                 wm[3] *= 2;
2536 }
2537
2538 static int ilk_wm_max_level(const struct drm_device *dev)
2539 {
2540         /* how many WM levels are we expecting */
2541         if (IS_HASWELL(dev))
2542                 return 4;
2543         else if (INTEL_INFO(dev)->gen >= 6)
2544                 return 3;
2545         else
2546                 return 2;
2547 }
2548
2549 static void intel_print_wm_latency(struct drm_device *dev,
2550                                    const char *name,
2551                                    const uint16_t wm[5])
2552 {
2553         int level, max_level = ilk_wm_max_level(dev);
2554
2555         for (level = 0; level <= max_level; level++) {
2556                 unsigned int latency = wm[level];
2557
2558                 if (latency == 0) {
2559                         DRM_ERROR("%s WM%d latency not provided\n",
2560                                   name, level);
2561                         continue;
2562                 }
2563
2564                 /* WM1+ latency values in 0.5us units */
2565                 if (level > 0)
2566                         latency *= 5;
2567
2568                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2569                               name, level, wm[level],
2570                               latency / 10, latency % 10);
2571         }
2572 }
2573
2574 static void intel_setup_wm_latency(struct drm_device *dev)
2575 {
2576         struct drm_i915_private *dev_priv = dev->dev_private;
2577
2578         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2579
2580         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2581                sizeof(dev_priv->wm.pri_latency));
2582         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2583                sizeof(dev_priv->wm.pri_latency));
2584
2585         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2586         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2587
2588         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2589         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2590         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2591 }
2592
2593 static void hsw_compute_wm_parameters(struct drm_crtc *crtc,
2594                                       struct hsw_pipe_wm_parameters *p,
2595                                       struct intel_wm_config *config)
2596 {
2597         struct drm_device *dev = crtc->dev;
2598         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2599         enum pipe pipe = intel_crtc->pipe;
2600         struct drm_plane *plane;
2601
2602         p->active = intel_crtc_active(crtc);
2603         if (p->active) {
2604                 p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
2605                 p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
2606                 p->pri.bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
2607                 p->cur.bytes_per_pixel = 4;
2608                 p->pri.horiz_pixels = intel_crtc->config.pipe_src_w;
2609                 p->cur.horiz_pixels = 64;
2610                 /* TODO: for now, assume primary and cursor planes are always enabled. */
2611                 p->pri.enabled = true;
2612                 p->cur.enabled = true;
2613         }
2614
2615         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
2616                 config->num_pipes_active += intel_crtc_active(crtc);
2617
2618         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2619                 struct intel_plane *intel_plane = to_intel_plane(plane);
2620
2621                 if (intel_plane->pipe == pipe)
2622                         p->spr = intel_plane->wm;
2623
2624                 config->sprites_enabled |= intel_plane->wm.enabled;
2625                 config->sprites_scaled |= intel_plane->wm.scaled;
2626         }
2627 }
2628
2629 /* Compute new watermarks for the pipe */
2630 static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
2631                                   const struct hsw_pipe_wm_parameters *params,
2632                                   struct intel_pipe_wm *pipe_wm)
2633 {
2634         struct drm_device *dev = crtc->dev;
2635         struct drm_i915_private *dev_priv = dev->dev_private;
2636         int level, max_level = ilk_wm_max_level(dev);
2637         /* LP0 watermark maximums depend on this pipe alone */
2638         struct intel_wm_config config = {
2639                 .num_pipes_active = 1,
2640                 .sprites_enabled = params->spr.enabled,
2641                 .sprites_scaled = params->spr.scaled,
2642         };
2643         struct hsw_wm_maximums max;
2644
2645         /* LP0 watermarks always use 1/2 DDB partitioning */
2646         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2647
2648         for (level = 0; level <= max_level; level++)
2649                 ilk_compute_wm_level(dev_priv, level, params,
2650                                      &pipe_wm->wm[level]);
2651
2652         pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2653
2654         /* At least LP0 must be valid */
2655         return ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]);
2656 }
2657
2658 /*
2659  * Merge the watermarks from all active pipes for a specific level.
2660  */
2661 static void ilk_merge_wm_level(struct drm_device *dev,
2662                                int level,
2663                                struct intel_wm_level *ret_wm)
2664 {
2665         const struct intel_crtc *intel_crtc;
2666
2667         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
2668                 const struct intel_wm_level *wm =
2669                         &intel_crtc->wm.active.wm[level];
2670
2671                 if (!wm->enable)
2672                         return;
2673
2674                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2675                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2676                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2677                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2678         }
2679
2680         ret_wm->enable = true;
2681 }
2682
2683 /*
2684  * Merge all low power watermarks for all active pipes.
2685  */
2686 static void ilk_wm_merge(struct drm_device *dev,
2687                          const struct hsw_wm_maximums *max,
2688                          struct intel_pipe_wm *merged)
2689 {
2690         int level, max_level = ilk_wm_max_level(dev);
2691
2692         merged->fbc_wm_enabled = true;
2693
2694         /* merge each WM1+ level */
2695         for (level = 1; level <= max_level; level++) {
2696                 struct intel_wm_level *wm = &merged->wm[level];
2697
2698                 ilk_merge_wm_level(dev, level, wm);
2699
2700                 if (!ilk_validate_wm_level(level, max, wm))
2701                         break;
2702
2703                 /*
2704                  * The spec says it is preferred to disable
2705                  * FBC WMs instead of disabling a WM level.
2706                  */
2707                 if (wm->fbc_val > max->fbc) {
2708                         merged->fbc_wm_enabled = false;
2709                         wm->fbc_val = 0;
2710                 }
2711         }
2712 }
2713
2714 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2715 {
2716         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2717         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2718 }
2719
2720 static void hsw_compute_wm_results(struct drm_device *dev,
2721                                    const struct intel_pipe_wm *merged,
2722                                    enum intel_ddb_partitioning partitioning,
2723                                    struct hsw_wm_values *results)
2724 {
2725         struct intel_crtc *intel_crtc;
2726         int level, wm_lp;
2727
2728         results->enable_fbc_wm = merged->fbc_wm_enabled;
2729         results->partitioning = partitioning;
2730
2731         /* LP1+ register values */
2732         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2733                 const struct intel_wm_level *r;
2734
2735                 level = ilk_wm_lp_to_level(wm_lp, merged);
2736
2737                 r = &merged->wm[level];
2738                 if (!r->enable)
2739                         break;
2740
2741                 results->wm_lp[wm_lp - 1] = WM3_LP_EN |
2742                         ((level * 2) << WM1_LP_LATENCY_SHIFT) |
2743                         (r->pri_val << WM1_LP_SR_SHIFT) |
2744                         r->cur_val;
2745
2746                 if (INTEL_INFO(dev)->gen >= 8)
2747                         results->wm_lp[wm_lp - 1] |=
2748                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2749                 else
2750                         results->wm_lp[wm_lp - 1] |=
2751                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2752
2753                 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2754         }
2755
2756         /* LP0 register values */
2757         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
2758                 enum pipe pipe = intel_crtc->pipe;
2759                 const struct intel_wm_level *r =
2760                         &intel_crtc->wm.active.wm[0];
2761
2762                 if (WARN_ON(!r->enable))
2763                         continue;
2764
2765                 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2766
2767                 results->wm_pipe[pipe] =
2768                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2769                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2770                         r->cur_val;
2771         }
2772 }
2773
2774 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2775  * case both are at the same level. Prefer r1 in case they're the same. */
2776 static struct intel_pipe_wm *hsw_find_best_result(struct drm_device *dev,
2777                                                   struct intel_pipe_wm *r1,
2778                                                   struct intel_pipe_wm *r2)
2779 {
2780         int level, max_level = ilk_wm_max_level(dev);
2781         int level1 = 0, level2 = 0;
2782
2783         for (level = 1; level <= max_level; level++) {
2784                 if (r1->wm[level].enable)
2785                         level1 = level;
2786                 if (r2->wm[level].enable)
2787                         level2 = level;
2788         }
2789
2790         if (level1 == level2) {
2791                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2792                         return r2;
2793                 else
2794                         return r1;
2795         } else if (level1 > level2) {
2796                 return r1;
2797         } else {
2798                 return r2;
2799         }
2800 }
2801
2802 /* dirty bits used to track which watermarks need changes */
2803 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2804 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2805 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2806 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2807 #define WM_DIRTY_FBC (1 << 24)
2808 #define WM_DIRTY_DDB (1 << 25)
2809
2810 static unsigned int ilk_compute_wm_dirty(struct drm_device *dev,
2811                                          const struct hsw_wm_values *old,
2812                                          const struct hsw_wm_values *new)
2813 {
2814         unsigned int dirty = 0;
2815         enum pipe pipe;
2816         int wm_lp;
2817
2818         for_each_pipe(pipe) {
2819                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2820                         dirty |= WM_DIRTY_LINETIME(pipe);
2821                         /* Must disable LP1+ watermarks too */
2822                         dirty |= WM_DIRTY_LP_ALL;
2823                 }
2824
2825                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2826                         dirty |= WM_DIRTY_PIPE(pipe);
2827                         /* Must disable LP1+ watermarks too */
2828                         dirty |= WM_DIRTY_LP_ALL;
2829                 }
2830         }
2831
2832         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2833                 dirty |= WM_DIRTY_FBC;
2834                 /* Must disable LP1+ watermarks too */
2835                 dirty |= WM_DIRTY_LP_ALL;
2836         }
2837
2838         if (old->partitioning != new->partitioning) {
2839                 dirty |= WM_DIRTY_DDB;
2840                 /* Must disable LP1+ watermarks too */
2841                 dirty |= WM_DIRTY_LP_ALL;
2842         }
2843
2844         /* LP1+ watermarks already deemed dirty, no need to continue */
2845         if (dirty & WM_DIRTY_LP_ALL)
2846                 return dirty;
2847
2848         /* Find the lowest numbered LP1+ watermark in need of an update... */
2849         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2850                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2851                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2852                         break;
2853         }
2854
2855         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2856         for (; wm_lp <= 3; wm_lp++)
2857                 dirty |= WM_DIRTY_LP(wm_lp);
2858
2859         return dirty;
2860 }
2861
2862 /*
2863  * The spec says we shouldn't write when we don't need, because every write
2864  * causes WMs to be re-evaluated, expending some power.
2865  */
2866 static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
2867                                 struct hsw_wm_values *results)
2868 {
2869         struct hsw_wm_values *previous = &dev_priv->wm.hw;
2870         unsigned int dirty;
2871         uint32_t val;
2872
2873         dirty = ilk_compute_wm_dirty(dev_priv->dev, previous, results);
2874         if (!dirty)
2875                 return;
2876
2877         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != 0)
2878                 I915_WRITE(WM3_LP_ILK, 0);
2879         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != 0)
2880                 I915_WRITE(WM2_LP_ILK, 0);
2881         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != 0)
2882                 I915_WRITE(WM1_LP_ILK, 0);
2883
2884         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2885                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2886         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2887                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2888         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2889                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2890
2891         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2892                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2893         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2894                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2895         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2896                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2897
2898         if (dirty & WM_DIRTY_DDB) {
2899                 val = I915_READ(WM_MISC);
2900                 if (results->partitioning == INTEL_DDB_PART_1_2)
2901                         val &= ~WM_MISC_DATA_PARTITION_5_6;
2902                 else
2903                         val |= WM_MISC_DATA_PARTITION_5_6;
2904                 I915_WRITE(WM_MISC, val);
2905         }
2906
2907         if (dirty & WM_DIRTY_FBC) {
2908                 val = I915_READ(DISP_ARB_CTL);
2909                 if (results->enable_fbc_wm)
2910                         val &= ~DISP_FBC_WM_DIS;
2911                 else
2912                         val |= DISP_FBC_WM_DIS;
2913                 I915_WRITE(DISP_ARB_CTL, val);
2914         }
2915
2916         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2917                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2918         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2919                 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2920         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2921                 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2922
2923         if (dirty & WM_DIRTY_LP(1) && results->wm_lp[0] != 0)
2924                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2925         if (dirty & WM_DIRTY_LP(2) && results->wm_lp[1] != 0)
2926                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2927         if (dirty & WM_DIRTY_LP(3) && results->wm_lp[2] != 0)
2928                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2929
2930         dev_priv->wm.hw = *results;
2931 }
2932
2933 static void haswell_update_wm(struct drm_crtc *crtc)
2934 {
2935         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2936         struct drm_device *dev = crtc->dev;
2937         struct drm_i915_private *dev_priv = dev->dev_private;
2938         struct hsw_wm_maximums max;
2939         struct hsw_pipe_wm_parameters params = {};
2940         struct hsw_wm_values results = {};
2941         enum intel_ddb_partitioning partitioning;
2942         struct intel_pipe_wm pipe_wm = {};
2943         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
2944         struct intel_wm_config config = {};
2945
2946         hsw_compute_wm_parameters(crtc, &params, &config);
2947
2948         intel_compute_pipe_wm(crtc, &params, &pipe_wm);
2949
2950         if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
2951                 return;
2952
2953         intel_crtc->wm.active = pipe_wm;
2954
2955         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
2956         ilk_wm_merge(dev, &max, &lp_wm_1_2);
2957
2958         /* 5/6 split only in single pipe config on IVB+ */
2959         if (INTEL_INFO(dev)->gen >= 7 &&
2960             config.num_pipes_active == 1 && config.sprites_enabled) {
2961                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
2962                 ilk_wm_merge(dev, &max, &lp_wm_5_6);
2963
2964                 best_lp_wm = hsw_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
2965         } else {
2966                 best_lp_wm = &lp_wm_1_2;
2967         }
2968
2969         partitioning = (best_lp_wm == &lp_wm_1_2) ?
2970                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
2971
2972         hsw_compute_wm_results(dev, best_lp_wm, partitioning, &results);
2973
2974         hsw_write_wm_values(dev_priv, &results);
2975 }
2976
2977 static void haswell_update_sprite_wm(struct drm_plane *plane,
2978                                      struct drm_crtc *crtc,
2979                                      uint32_t sprite_width, int pixel_size,
2980                                      bool enabled, bool scaled)
2981 {
2982         struct intel_plane *intel_plane = to_intel_plane(plane);
2983
2984         intel_plane->wm.enabled = enabled;
2985         intel_plane->wm.scaled = scaled;
2986         intel_plane->wm.horiz_pixels = sprite_width;
2987         intel_plane->wm.bytes_per_pixel = pixel_size;
2988
2989         haswell_update_wm(crtc);
2990 }
2991
2992 static bool
2993 sandybridge_compute_sprite_wm(struct drm_device *dev, int plane,
2994                               uint32_t sprite_width, int pixel_size,
2995                               const struct intel_watermark_params *display,
2996                               int display_latency_ns, int *sprite_wm)
2997 {
2998         struct drm_crtc *crtc;
2999         int clock;
3000         int entries, tlb_miss;
3001
3002         crtc = intel_get_crtc_for_plane(dev, plane);
3003         if (!intel_crtc_active(crtc)) {
3004                 *sprite_wm = display->guard_size;
3005                 return false;
3006         }
3007
3008         clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
3009
3010         /* Use the small buffer method to calculate the sprite watermark */
3011         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
3012         tlb_miss = display->fifo_size*display->cacheline_size -
3013                 sprite_width * 8;
3014         if (tlb_miss > 0)
3015                 entries += tlb_miss;
3016         entries = DIV_ROUND_UP(entries, display->cacheline_size);
3017         *sprite_wm = entries + display->guard_size;
3018         if (*sprite_wm > (int)display->max_wm)
3019                 *sprite_wm = display->max_wm;
3020
3021         return true;
3022 }
3023
3024 static bool
3025 sandybridge_compute_sprite_srwm(struct drm_device *dev, int plane,
3026                                 uint32_t sprite_width, int pixel_size,
3027                                 const struct intel_watermark_params *display,
3028                                 int latency_ns, int *sprite_wm)
3029 {
3030         struct drm_crtc *crtc;
3031         unsigned long line_time_us;
3032         int clock;
3033         int line_count, line_size;
3034         int small, large;
3035         int entries;
3036
3037         if (!latency_ns) {
3038                 *sprite_wm = 0;
3039                 return false;
3040         }
3041
3042         crtc = intel_get_crtc_for_plane(dev, plane);
3043         clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
3044         if (!clock) {
3045                 *sprite_wm = 0;
3046                 return false;
3047         }
3048
3049         line_time_us = (sprite_width * 1000) / clock;
3050         if (!line_time_us) {
3051                 *sprite_wm = 0;
3052                 return false;
3053         }
3054
3055         line_count = (latency_ns / line_time_us + 1000) / 1000;
3056         line_size = sprite_width * pixel_size;
3057
3058         /* Use the minimum of the small and large buffer method for primary */
3059         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
3060         large = line_count * line_size;
3061
3062         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
3063         *sprite_wm = entries + display->guard_size;
3064
3065         return *sprite_wm > 0x3ff ? false : true;
3066 }
3067
3068 static void sandybridge_update_sprite_wm(struct drm_plane *plane,
3069                                          struct drm_crtc *crtc,
3070                                          uint32_t sprite_width, int pixel_size,
3071                                          bool enabled, bool scaled)
3072 {
3073         struct drm_device *dev = plane->dev;
3074         struct drm_i915_private *dev_priv = dev->dev_private;
3075         int pipe = to_intel_plane(plane)->pipe;
3076         int latency = dev_priv->wm.spr_latency[0] * 100;        /* In unit 0.1us */
3077         u32 val;
3078         int sprite_wm, reg;
3079         int ret;
3080
3081         if (!enabled)
3082                 return;
3083
3084         switch (pipe) {
3085         case 0:
3086                 reg = WM0_PIPEA_ILK;
3087                 break;
3088         case 1:
3089                 reg = WM0_PIPEB_ILK;
3090                 break;
3091         case 2:
3092                 reg = WM0_PIPEC_IVB;
3093                 break;
3094         default:
3095                 return; /* bad pipe */
3096         }
3097
3098         ret = sandybridge_compute_sprite_wm(dev, pipe, sprite_width, pixel_size,
3099                                             &sandybridge_display_wm_info,
3100                                             latency, &sprite_wm);
3101         if (!ret) {
3102                 DRM_DEBUG_KMS("failed to compute sprite wm for pipe %c\n",
3103                               pipe_name(pipe));
3104                 return;
3105         }
3106
3107         val = I915_READ(reg);
3108         val &= ~WM0_PIPE_SPRITE_MASK;
3109         I915_WRITE(reg, val | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
3110         DRM_DEBUG_KMS("sprite watermarks For pipe %c - %d\n", pipe_name(pipe), sprite_wm);
3111
3112
3113         ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
3114                                               pixel_size,
3115                                               &sandybridge_display_srwm_info,
3116                                               dev_priv->wm.spr_latency[1] * 500,
3117                                               &sprite_wm);
3118         if (!ret) {
3119                 DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
3120                               pipe_name(pipe));
3121                 return;
3122         }
3123         I915_WRITE(WM1S_LP_ILK, sprite_wm);
3124
3125         /* Only IVB has two more LP watermarks for sprite */
3126         if (!IS_IVYBRIDGE(dev))
3127                 return;
3128
3129         ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
3130                                               pixel_size,
3131                                               &sandybridge_display_srwm_info,
3132                                               dev_priv->wm.spr_latency[2] * 500,
3133                                               &sprite_wm);
3134         if (!ret) {
3135                 DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
3136                               pipe_name(pipe));
3137                 return;
3138         }
3139         I915_WRITE(WM2S_LP_IVB, sprite_wm);
3140
3141         ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
3142                                               pixel_size,
3143                                               &sandybridge_display_srwm_info,
3144                                               dev_priv->wm.spr_latency[3] * 500,
3145                                               &sprite_wm);
3146         if (!ret) {
3147                 DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
3148                               pipe_name(pipe));
3149                 return;
3150         }
3151         I915_WRITE(WM3S_LP_IVB, sprite_wm);
3152 }
3153
3154 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3155 {
3156         struct drm_device *dev = crtc->dev;
3157         struct drm_i915_private *dev_priv = dev->dev_private;
3158         struct hsw_wm_values *hw = &dev_priv->wm.hw;
3159         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3160         struct intel_pipe_wm *active = &intel_crtc->wm.active;
3161         enum pipe pipe = intel_crtc->pipe;
3162         static const unsigned int wm0_pipe_reg[] = {
3163                 [PIPE_A] = WM0_PIPEA_ILK,
3164                 [PIPE_B] = WM0_PIPEB_ILK,
3165                 [PIPE_C] = WM0_PIPEC_IVB,
3166         };
3167
3168         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3169         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3170
3171         if (intel_crtc_active(crtc)) {
3172                 u32 tmp = hw->wm_pipe[pipe];
3173
3174                 /*
3175                  * For active pipes LP0 watermark is marked as
3176                  * enabled, and LP1+ watermaks as disabled since
3177                  * we can't really reverse compute them in case
3178                  * multiple pipes are active.
3179                  */
3180                 active->wm[0].enable = true;
3181                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3182                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3183                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3184                 active->linetime = hw->wm_linetime[pipe];
3185         } else {
3186                 int level, max_level = ilk_wm_max_level(dev);
3187
3188                 /*
3189                  * For inactive pipes, all watermark levels
3190                  * should be marked as enabled but zeroed,
3191                  * which is what we'd compute them to.
3192                  */
3193                 for (level = 0; level <= max_level; level++)
3194                         active->wm[level].enable = true;
3195         }
3196 }
3197
3198 void ilk_wm_get_hw_state(struct drm_device *dev)
3199 {
3200         struct drm_i915_private *dev_priv = dev->dev_private;
3201         struct hsw_wm_values *hw = &dev_priv->wm.hw;
3202         struct drm_crtc *crtc;
3203
3204         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3205                 ilk_pipe_wm_get_hw_state(crtc);
3206
3207         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
3208         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
3209         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
3210
3211         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
3212         hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
3213         hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
3214
3215         hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
3216                 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
3217
3218         hw->enable_fbc_wm =
3219                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
3220 }
3221
3222 /**
3223  * intel_update_watermarks - update FIFO watermark values based on current modes
3224  *
3225  * Calculate watermark values for the various WM regs based on current mode
3226  * and plane configuration.
3227  *
3228  * There are several cases to deal with here:
3229  *   - normal (i.e. non-self-refresh)
3230  *   - self-refresh (SR) mode
3231  *   - lines are large relative to FIFO size (buffer can hold up to 2)
3232  *   - lines are small relative to FIFO size (buffer can hold more than 2
3233  *     lines), so need to account for TLB latency
3234  *
3235  *   The normal calculation is:
3236  *     watermark = dotclock * bytes per pixel * latency
3237  *   where latency is platform & configuration dependent (we assume pessimal
3238  *   values here).
3239  *
3240  *   The SR calculation is:
3241  *     watermark = (trunc(latency/line time)+1) * surface width *
3242  *       bytes per pixel
3243  *   where
3244  *     line time = htotal / dotclock
3245  *     surface width = hdisplay for normal plane and 64 for cursor
3246  *   and latency is assumed to be high, as above.
3247  *
3248  * The final value programmed to the register should always be rounded up,
3249  * and include an extra 2 entries to account for clock crossings.
3250  *
3251  * We don't use the sprite, so we can ignore that.  And on Crestline we have
3252  * to set the non-SR watermarks to 8.
3253  */
3254 void intel_update_watermarks(struct drm_crtc *crtc)
3255 {
3256         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
3257
3258         if (dev_priv->display.update_wm)
3259                 dev_priv->display.update_wm(crtc);
3260 }
3261
3262 void intel_update_sprite_watermarks(struct drm_plane *plane,
3263                                     struct drm_crtc *crtc,
3264                                     uint32_t sprite_width, int pixel_size,
3265                                     bool enabled, bool scaled)
3266 {
3267         struct drm_i915_private *dev_priv = plane->dev->dev_private;
3268
3269         if (dev_priv->display.update_sprite_wm)
3270                 dev_priv->display.update_sprite_wm(plane, crtc, sprite_width,
3271                                                    pixel_size, enabled, scaled);
3272 }
3273
3274 static struct drm_i915_gem_object *
3275 intel_alloc_context_page(struct drm_device *dev)
3276 {
3277         struct drm_i915_gem_object *ctx;
3278         int ret;
3279
3280         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3281
3282         ctx = i915_gem_alloc_object(dev, 4096);
3283         if (!ctx) {
3284                 DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
3285                 return NULL;
3286         }
3287
3288         ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
3289         if (ret) {
3290                 DRM_ERROR("failed to pin power context: %d\n", ret);
3291                 goto err_unref;
3292         }
3293
3294         ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
3295         if (ret) {
3296                 DRM_ERROR("failed to set-domain on power context: %d\n", ret);
3297                 goto err_unpin;
3298         }
3299
3300         return ctx;
3301
3302 err_unpin:
3303         i915_gem_object_unpin(ctx);
3304 err_unref:
3305         drm_gem_object_unreference(&ctx->base);
3306         return NULL;
3307 }
3308
3309 /**
3310  * Lock protecting IPS related data structures
3311  */
3312 DEFINE_SPINLOCK(mchdev_lock);
3313
3314 /* Global for IPS driver to get at the current i915 device. Protected by
3315  * mchdev_lock. */
3316 static struct drm_i915_private *i915_mch_dev;
3317
3318 bool ironlake_set_drps(struct drm_device *dev, u8 val)
3319 {
3320         struct drm_i915_private *dev_priv = dev->dev_private;
3321         u16 rgvswctl;
3322
3323         assert_spin_locked(&mchdev_lock);
3324
3325         rgvswctl = I915_READ16(MEMSWCTL);
3326         if (rgvswctl & MEMCTL_CMD_STS) {
3327                 DRM_DEBUG("gpu busy, RCS change rejected\n");
3328                 return false; /* still busy with another command */
3329         }
3330
3331         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
3332                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
3333         I915_WRITE16(MEMSWCTL, rgvswctl);
3334         POSTING_READ16(MEMSWCTL);
3335
3336         rgvswctl |= MEMCTL_CMD_STS;
3337         I915_WRITE16(MEMSWCTL, rgvswctl);
3338
3339         return true;
3340 }
3341
3342 static void ironlake_enable_drps(struct drm_device *dev)
3343 {
3344         struct drm_i915_private *dev_priv = dev->dev_private;
3345         u32 rgvmodectl = I915_READ(MEMMODECTL);
3346         u8 fmax, fmin, fstart, vstart;
3347
3348         spin_lock_irq(&mchdev_lock);
3349
3350         /* Enable temp reporting */
3351         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
3352         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
3353
3354         /* 100ms RC evaluation intervals */
3355         I915_WRITE(RCUPEI, 100000);
3356         I915_WRITE(RCDNEI, 100000);
3357
3358         /* Set max/min thresholds to 90ms and 80ms respectively */
3359         I915_WRITE(RCBMAXAVG, 90000);
3360         I915_WRITE(RCBMINAVG, 80000);
3361
3362         I915_WRITE(MEMIHYST, 1);
3363
3364         /* Set up min, max, and cur for interrupt handling */
3365         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
3366         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
3367         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
3368                 MEMMODE_FSTART_SHIFT;
3369
3370         vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
3371                 PXVFREQ_PX_SHIFT;
3372
3373         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
3374         dev_priv->ips.fstart = fstart;
3375
3376         dev_priv->ips.max_delay = fstart;
3377         dev_priv->ips.min_delay = fmin;
3378         dev_priv->ips.cur_delay = fstart;
3379
3380         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
3381                          fmax, fmin, fstart);
3382
3383         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
3384
3385         /*
3386          * Interrupts will be enabled in ironlake_irq_postinstall
3387          */
3388
3389         I915_WRITE(VIDSTART, vstart);
3390         POSTING_READ(VIDSTART);
3391
3392         rgvmodectl |= MEMMODE_SWMODE_EN;
3393         I915_WRITE(MEMMODECTL, rgvmodectl);
3394
3395         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
3396                 DRM_ERROR("stuck trying to change perf mode\n");
3397         mdelay(1);
3398
3399         ironlake_set_drps(dev, fstart);
3400
3401         dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
3402                 I915_READ(0x112e0);
3403         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
3404         dev_priv->ips.last_count2 = I915_READ(0x112f4);
3405         getrawmonotonic(&dev_priv->ips.last_time2);
3406
3407         spin_unlock_irq(&mchdev_lock);
3408 }
3409
3410 static void ironlake_disable_drps(struct drm_device *dev)
3411 {
3412         struct drm_i915_private *dev_priv = dev->dev_private;
3413         u16 rgvswctl;
3414
3415         spin_lock_irq(&mchdev_lock);
3416
3417         rgvswctl = I915_READ16(MEMSWCTL);
3418
3419         /* Ack interrupts, disable EFC interrupt */
3420         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
3421         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
3422         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
3423         I915_WRITE(DEIIR, DE_PCU_EVENT);
3424         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
3425
3426         /* Go back to the starting frequency */
3427         ironlake_set_drps(dev, dev_priv->ips.fstart);
3428         mdelay(1);
3429         rgvswctl |= MEMCTL_CMD_STS;
3430         I915_WRITE(MEMSWCTL, rgvswctl);
3431         mdelay(1);
3432
3433         spin_unlock_irq(&mchdev_lock);
3434 }
3435
3436 /* There's a funny hw issue where the hw returns all 0 when reading from
3437  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
3438  * ourselves, instead of doing a rmw cycle (which might result in us clearing
3439  * all limits and the gpu stuck at whatever frequency it is at atm).
3440  */
3441 static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
3442 {
3443         u32 limits;
3444
3445         /* Only set the down limit when we've reached the lowest level to avoid
3446          * getting more interrupts, otherwise leave this clear. This prevents a
3447          * race in the hw when coming out of rc6: There's a tiny window where
3448          * the hw runs at the minimal clock before selecting the desired
3449          * frequency, if the down threshold expires in that window we will not
3450          * receive a down interrupt. */
3451         limits = dev_priv->rps.max_delay << 24;
3452         if (val <= dev_priv->rps.min_delay)
3453                 limits |= dev_priv->rps.min_delay << 16;
3454
3455         return limits;
3456 }
3457
3458 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
3459 {
3460         int new_power;
3461
3462         new_power = dev_priv->rps.power;
3463         switch (dev_priv->rps.power) {
3464         case LOW_POWER:
3465                 if (val > dev_priv->rps.rpe_delay + 1 && val > dev_priv->rps.cur_delay)
3466                         new_power = BETWEEN;
3467                 break;
3468
3469         case BETWEEN:
3470                 if (val <= dev_priv->rps.rpe_delay && val < dev_priv->rps.cur_delay)
3471                         new_power = LOW_POWER;
3472                 else if (val >= dev_priv->rps.rp0_delay && val > dev_priv->rps.cur_delay)
3473                         new_power = HIGH_POWER;
3474                 break;
3475
3476         case HIGH_POWER:
3477                 if (val < (dev_priv->rps.rp1_delay + dev_priv->rps.rp0_delay) >> 1 && val < dev_priv->rps.cur_delay)
3478                         new_power = BETWEEN;
3479                 break;
3480         }
3481         /* Max/min bins are special */
3482         if (val == dev_priv->rps.min_delay)
3483                 new_power = LOW_POWER;
3484         if (val == dev_priv->rps.max_delay)
3485                 new_power = HIGH_POWER;
3486         if (new_power == dev_priv->rps.power)
3487                 return;
3488
3489         /* Note the units here are not exactly 1us, but 1280ns. */
3490         switch (new_power) {
3491         case LOW_POWER:
3492                 /* Upclock if more than 95% busy over 16ms */
3493                 I915_WRITE(GEN6_RP_UP_EI, 12500);
3494                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
3495
3496                 /* Downclock if less than 85% busy over 32ms */
3497                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3498                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
3499
3500                 I915_WRITE(GEN6_RP_CONTROL,
3501                            GEN6_RP_MEDIA_TURBO |
3502                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3503                            GEN6_RP_MEDIA_IS_GFX |
3504                            GEN6_RP_ENABLE |
3505                            GEN6_RP_UP_BUSY_AVG |
3506                            GEN6_RP_DOWN_IDLE_AVG);
3507                 break;
3508
3509         case BETWEEN:
3510                 /* Upclock if more than 90% busy over 13ms */
3511                 I915_WRITE(GEN6_RP_UP_EI, 10250);
3512                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
3513
3514                 /* Downclock if less than 75% busy over 32ms */
3515                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3516                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
3517
3518                 I915_WRITE(GEN6_RP_CONTROL,
3519                            GEN6_RP_MEDIA_TURBO |
3520                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3521                            GEN6_RP_MEDIA_IS_GFX |
3522                            GEN6_RP_ENABLE |
3523                            GEN6_RP_UP_BUSY_AVG |
3524                            GEN6_RP_DOWN_IDLE_AVG);
3525                 break;
3526
3527         case HIGH_POWER:
3528                 /* Upclock if more than 85% busy over 10ms */
3529                 I915_WRITE(GEN6_RP_UP_EI, 8000);
3530                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
3531
3532                 /* Downclock if less than 60% busy over 32ms */
3533                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3534                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
3535
3536                 I915_WRITE(GEN6_RP_CONTROL,
3537                            GEN6_RP_MEDIA_TURBO |
3538                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3539                            GEN6_RP_MEDIA_IS_GFX |
3540                            GEN6_RP_ENABLE |
3541                            GEN6_RP_UP_BUSY_AVG |
3542                            GEN6_RP_DOWN_IDLE_AVG);
3543                 break;
3544         }
3545
3546         dev_priv->rps.power = new_power;
3547         dev_priv->rps.last_adj = 0;
3548 }
3549
3550 void gen6_set_rps(struct drm_device *dev, u8 val)
3551 {
3552         struct drm_i915_private *dev_priv = dev->dev_private;
3553
3554         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3555         WARN_ON(val > dev_priv->rps.max_delay);
3556         WARN_ON(val < dev_priv->rps.min_delay);
3557
3558         if (val == dev_priv->rps.cur_delay)
3559                 return;
3560
3561         gen6_set_rps_thresholds(dev_priv, val);
3562
3563         if (IS_HASWELL(dev))
3564                 I915_WRITE(GEN6_RPNSWREQ,
3565                            HSW_FREQUENCY(val));
3566         else
3567                 I915_WRITE(GEN6_RPNSWREQ,
3568                            GEN6_FREQUENCY(val) |
3569                            GEN6_OFFSET(0) |
3570                            GEN6_AGGRESSIVE_TURBO);
3571
3572         /* Make sure we continue to get interrupts
3573          * until we hit the minimum or maximum frequencies.
3574          */
3575         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
3576                    gen6_rps_limits(dev_priv, val));
3577
3578         POSTING_READ(GEN6_RPNSWREQ);
3579
3580         dev_priv->rps.cur_delay = val;
3581
3582         trace_intel_gpu_freq_change(val * 50);
3583 }
3584
3585 void gen6_rps_idle(struct drm_i915_private *dev_priv)
3586 {
3587         mutex_lock(&dev_priv->rps.hw_lock);
3588         if (dev_priv->rps.enabled) {
3589                 if (dev_priv->info->is_valleyview)
3590                         valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3591                 else
3592                         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3593                 dev_priv->rps.last_adj = 0;
3594         }
3595         mutex_unlock(&dev_priv->rps.hw_lock);
3596 }
3597
3598 void gen6_rps_boost(struct drm_i915_private *dev_priv)
3599 {
3600         mutex_lock(&dev_priv->rps.hw_lock);
3601         if (dev_priv->rps.enabled) {
3602                 if (dev_priv->info->is_valleyview)
3603                         valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_delay);
3604                 else
3605                         gen6_set_rps(dev_priv->dev, dev_priv->rps.max_delay);
3606                 dev_priv->rps.last_adj = 0;
3607         }
3608         mutex_unlock(&dev_priv->rps.hw_lock);
3609 }
3610
3611 void valleyview_set_rps(struct drm_device *dev, u8 val)
3612 {
3613         struct drm_i915_private *dev_priv = dev->dev_private;
3614
3615         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3616         WARN_ON(val > dev_priv->rps.max_delay);
3617         WARN_ON(val < dev_priv->rps.min_delay);
3618
3619         DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n",
3620                          vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
3621                          dev_priv->rps.cur_delay,
3622                          vlv_gpu_freq(dev_priv, val), val);
3623
3624         if (val == dev_priv->rps.cur_delay)
3625                 return;
3626
3627         vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
3628
3629         dev_priv->rps.cur_delay = val;
3630
3631         trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val));
3632 }
3633
3634 static void gen6_disable_rps_interrupts(struct drm_device *dev)
3635 {
3636         struct drm_i915_private *dev_priv = dev->dev_private;
3637
3638         I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
3639         I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
3640         /* Complete PM interrupt masking here doesn't race with the rps work
3641          * item again unmasking PM interrupts because that is using a different
3642          * register (PMIMR) to mask PM interrupts. The only risk is in leaving
3643          * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
3644
3645         spin_lock_irq(&dev_priv->irq_lock);
3646         dev_priv->rps.pm_iir = 0;
3647         spin_unlock_irq(&dev_priv->irq_lock);
3648
3649         I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3650 }
3651
3652 static void gen6_disable_rps(struct drm_device *dev)
3653 {
3654         struct drm_i915_private *dev_priv = dev->dev_private;
3655
3656         I915_WRITE(GEN6_RC_CONTROL, 0);
3657         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
3658
3659         gen6_disable_rps_interrupts(dev);
3660 }
3661
3662 static void valleyview_disable_rps(struct drm_device *dev)
3663 {
3664         struct drm_i915_private *dev_priv = dev->dev_private;
3665
3666         I915_WRITE(GEN6_RC_CONTROL, 0);
3667
3668         gen6_disable_rps_interrupts(dev);
3669
3670         if (dev_priv->vlv_pctx) {
3671                 drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
3672                 dev_priv->vlv_pctx = NULL;
3673         }
3674 }
3675
3676 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
3677 {
3678         if (IS_GEN6(dev))
3679                 DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
3680
3681         if (IS_HASWELL(dev))
3682                 DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
3683
3684         DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
3685                         (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
3686                         (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
3687                         (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
3688 }
3689
3690 int intel_enable_rc6(const struct drm_device *dev)
3691 {
3692         /* No RC6 before Ironlake */
3693         if (INTEL_INFO(dev)->gen < 5)
3694                 return 0;
3695
3696         /* Respect the kernel parameter if it is set */
3697         if (i915_enable_rc6 >= 0)
3698                 return i915_enable_rc6;
3699
3700         /* Disable RC6 on Ironlake */
3701         if (INTEL_INFO(dev)->gen == 5)
3702                 return 0;
3703
3704         if (IS_HASWELL(dev))
3705                 return INTEL_RC6_ENABLE;
3706
3707         /* snb/ivb have more than one rc6 state. */
3708         if (INTEL_INFO(dev)->gen == 6)
3709                 return INTEL_RC6_ENABLE;
3710
3711         return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
3712 }
3713
3714 static void gen6_enable_rps_interrupts(struct drm_device *dev)
3715 {
3716         struct drm_i915_private *dev_priv = dev->dev_private;
3717         u32 enabled_intrs;
3718
3719         spin_lock_irq(&dev_priv->irq_lock);
3720         WARN_ON(dev_priv->rps.pm_iir);
3721         snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
3722         I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3723         spin_unlock_irq(&dev_priv->irq_lock);
3724
3725         /* only unmask PM interrupts we need. Mask all others. */
3726         enabled_intrs = GEN6_PM_RPS_EVENTS;
3727
3728         /* IVB and SNB hard hangs on looping batchbuffer
3729          * if GEN6_PM_UP_EI_EXPIRED is masked.
3730          */
3731         if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
3732                 enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
3733
3734         I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
3735 }
3736
3737 static void gen8_enable_rps(struct drm_device *dev)
3738 {
3739         struct drm_i915_private *dev_priv = dev->dev_private;
3740         struct intel_ring_buffer *ring;
3741         uint32_t rc6_mask = 0, rp_state_cap;
3742         int unused;
3743
3744         /* 1a: Software RC state - RC0 */
3745         I915_WRITE(GEN6_RC_STATE, 0);
3746
3747         /* 1c & 1d: Get forcewake during program sequence. Although the driver
3748          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
3749         gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
3750
3751         /* 2a: Disable RC states. */
3752         I915_WRITE(GEN6_RC_CONTROL, 0);
3753
3754         rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3755
3756         /* 2b: Program RC6 thresholds.*/
3757         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
3758         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
3759         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3760         for_each_ring(ring, dev_priv, unused)
3761                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3762         I915_WRITE(GEN6_RC_SLEEP, 0);
3763         I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
3764
3765         /* 3: Enable RC6 */
3766         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
3767                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
3768         DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
3769         I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
3770                         GEN6_RC_CTL_EI_MODE(1) |
3771                         rc6_mask);
3772
3773         /* 4 Program defaults and thresholds for RPS*/
3774         I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
3775         I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(12)); /* Request 600 MHz */
3776         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
3777         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
3778
3779         /* Docs recommend 900MHz, and 300 MHz respectively */
3780         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
3781                    dev_priv->rps.max_delay << 24 |
3782                    dev_priv->rps.min_delay << 16);
3783
3784         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
3785         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
3786         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
3787         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
3788
3789         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3790
3791         /* 5: Enable RPS */
3792         I915_WRITE(GEN6_RP_CONTROL,
3793                    GEN6_RP_MEDIA_TURBO |
3794                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
3795                    GEN6_RP_MEDIA_IS_GFX |
3796                    GEN6_RP_ENABLE |
3797                    GEN6_RP_UP_BUSY_AVG |
3798                    GEN6_RP_DOWN_IDLE_AVG);
3799
3800         /* 6: Ring frequency + overclocking (our driver does this later */
3801
3802         gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
3803
3804         gen6_enable_rps_interrupts(dev);
3805
3806         gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
3807 }
3808
3809 static void gen6_enable_rps(struct drm_device *dev)
3810 {
3811         struct drm_i915_private *dev_priv = dev->dev_private;
3812         struct intel_ring_buffer *ring;
3813         u32 rp_state_cap;
3814         u32 gt_perf_status;
3815         u32 rc6vids, pcu_mbox, rc6_mask = 0;
3816         u32 gtfifodbg;
3817         int rc6_mode;
3818         int i, ret;
3819
3820         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3821
3822         /* Here begins a magic sequence of register writes to enable
3823          * auto-downclocking.
3824          *
3825          * Perhaps there might be some value in exposing these to
3826          * userspace...
3827          */
3828         I915_WRITE(GEN6_RC_STATE, 0);
3829
3830         /* Clear the DBG now so we don't confuse earlier errors */
3831         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
3832                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
3833                 I915_WRITE(GTFIFODBG, gtfifodbg);
3834         }
3835
3836         gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
3837
3838         rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3839         gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
3840
3841         /* In units of 50MHz */
3842         dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
3843         dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
3844         dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
3845         dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
3846         dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
3847         dev_priv->rps.cur_delay = 0;
3848
3849         /* disable the counters and set deterministic thresholds */
3850         I915_WRITE(GEN6_RC_CONTROL, 0);
3851
3852         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
3853         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
3854         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
3855         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
3856         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
3857
3858         for_each_ring(ring, dev_priv, i)
3859                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3860
3861         I915_WRITE(GEN6_RC_SLEEP, 0);
3862         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
3863         if (IS_IVYBRIDGE(dev))
3864                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
3865         else
3866                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
3867         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
3868         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
3869
3870         /* Check if we are enabling RC6 */
3871         rc6_mode = intel_enable_rc6(dev_priv->dev);
3872         if (rc6_mode & INTEL_RC6_ENABLE)
3873                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
3874
3875         /* We don't use those on Haswell */
3876         if (!IS_HASWELL(dev)) {
3877                 if (rc6_mode & INTEL_RC6p_ENABLE)
3878                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
3879
3880                 if (rc6_mode & INTEL_RC6pp_ENABLE)
3881                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
3882         }
3883
3884         intel_print_rc6_info(dev, rc6_mask);
3885
3886         I915_WRITE(GEN6_RC_CONTROL,
3887                    rc6_mask |
3888                    GEN6_RC_CTL_EI_MODE(1) |
3889                    GEN6_RC_CTL_HW_ENABLE);
3890
3891         /* Power down if completely idle for over 50ms */
3892         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
3893         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3894
3895         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
3896         if (!ret) {
3897                 pcu_mbox = 0;
3898                 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
3899                 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
3900                         DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
3901                                          (dev_priv->rps.max_delay & 0xff) * 50,
3902                                          (pcu_mbox & 0xff) * 50);
3903                         dev_priv->rps.hw_max = pcu_mbox & 0xff;
3904                 }
3905         } else {
3906                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
3907         }
3908
3909         dev_priv->rps.power = HIGH_POWER; /* force a reset */
3910         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3911
3912         gen6_enable_rps_interrupts(dev);
3913
3914         rc6vids = 0;
3915         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
3916         if (IS_GEN6(dev) && ret) {
3917                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
3918         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
3919                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
3920                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
3921                 rc6vids &= 0xffff00;
3922                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
3923                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
3924                 if (ret)
3925                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
3926         }
3927
3928         gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
3929 }
3930
3931 void gen6_update_ring_freq(struct drm_device *dev)
3932 {
3933         struct drm_i915_private *dev_priv = dev->dev_private;
3934         int min_freq = 15;
3935         unsigned int gpu_freq;
3936         unsigned int max_ia_freq, min_ring_freq;
3937         int scaling_factor = 180;
3938         struct cpufreq_policy *policy;
3939
3940         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3941
3942         policy = cpufreq_cpu_get(0);
3943         if (policy) {
3944                 max_ia_freq = policy->cpuinfo.max_freq;
3945                 cpufreq_cpu_put(policy);
3946         } else {
3947                 /*
3948                  * Default to measured freq if none found, PCU will ensure we
3949                  * don't go over
3950                  */
3951                 max_ia_freq = tsc_khz;
3952         }
3953
3954         /* Convert from kHz to MHz */
3955         max_ia_freq /= 1000;
3956
3957         min_ring_freq = I915_READ(DCLK) & 0xf;
3958         /* convert DDR frequency from units of 266.6MHz to bandwidth */
3959         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
3960
3961         /*
3962          * For each potential GPU frequency, load a ring frequency we'd like
3963          * to use for memory access.  We do this by specifying the IA frequency
3964          * the PCU should use as a reference to determine the ring frequency.
3965          */
3966         for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
3967              gpu_freq--) {
3968                 int diff = dev_priv->rps.max_delay - gpu_freq;
3969                 unsigned int ia_freq = 0, ring_freq = 0;
3970
3971                 if (INTEL_INFO(dev)->gen >= 8) {
3972                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
3973                         ring_freq = max(min_ring_freq, gpu_freq);
3974                 } else if (IS_HASWELL(dev)) {
3975                         ring_freq = mult_frac(gpu_freq, 5, 4);
3976                         ring_freq = max(min_ring_freq, ring_freq);
3977                         /* leave ia_freq as the default, chosen by cpufreq */
3978                 } else {
3979                         /* On older processors, there is no separate ring
3980                          * clock domain, so in order to boost the bandwidth
3981                          * of the ring, we need to upclock the CPU (ia_freq).
3982                          *
3983                          * For GPU frequencies less than 750MHz,
3984                          * just use the lowest ring freq.
3985                          */
3986                         if (gpu_freq < min_freq)
3987                                 ia_freq = 800;
3988                         else
3989                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
3990                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
3991                 }
3992
3993                 sandybridge_pcode_write(dev_priv,
3994                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
3995                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
3996                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
3997                                         gpu_freq);
3998         }
3999 }
4000
4001 int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
4002 {
4003         u32 val, rp0;
4004
4005         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
4006
4007         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
4008         /* Clamp to max */
4009         rp0 = min_t(u32, rp0, 0xea);
4010
4011         return rp0;
4012 }
4013
4014 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
4015 {
4016         u32 val, rpe;
4017
4018         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
4019         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
4020         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
4021         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
4022
4023         return rpe;
4024 }
4025
4026 int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
4027 {
4028         return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
4029 }
4030
4031 static void valleyview_setup_pctx(struct drm_device *dev)
4032 {
4033         struct drm_i915_private *dev_priv = dev->dev_private;
4034         struct drm_i915_gem_object *pctx;
4035         unsigned long pctx_paddr;
4036         u32 pcbr;
4037         int pctx_size = 24*1024;
4038
4039         pcbr = I915_READ(VLV_PCBR);
4040         if (pcbr) {
4041                 /* BIOS set it up already, grab the pre-alloc'd space */
4042                 int pcbr_offset;
4043
4044                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
4045                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
4046                                                                       pcbr_offset,
4047                                                                       I915_GTT_OFFSET_NONE,
4048                                                                       pctx_size);
4049                 goto out;
4050         }
4051
4052         /*
4053          * From the Gunit register HAS:
4054          * The Gfx driver is expected to program this register and ensure
4055          * proper allocation within Gfx stolen memory.  For example, this
4056          * register should be programmed such than the PCBR range does not
4057          * overlap with other ranges, such as the frame buffer, protected
4058          * memory, or any other relevant ranges.
4059          */
4060         pctx = i915_gem_object_create_stolen(dev, pctx_size);
4061         if (!pctx) {
4062                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
4063                 return;
4064         }
4065
4066         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
4067         I915_WRITE(VLV_PCBR, pctx_paddr);
4068
4069 out:
4070         dev_priv->vlv_pctx = pctx;
4071 }
4072
4073 static void valleyview_enable_rps(struct drm_device *dev)
4074 {
4075         struct drm_i915_private *dev_priv = dev->dev_private;
4076         struct intel_ring_buffer *ring;
4077         u32 gtfifodbg, val, rc6_mode = 0;
4078         int i;
4079
4080         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4081
4082         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4083                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
4084                                  gtfifodbg);
4085                 I915_WRITE(GTFIFODBG, gtfifodbg);
4086         }
4087
4088         valleyview_setup_pctx(dev);
4089
4090         /* If VLV, Forcewake all wells, else re-direct to regular path */
4091         gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
4092
4093         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
4094         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
4095         I915_WRITE(GEN6_RP_UP_EI, 66000);
4096         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
4097
4098         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4099
4100         I915_WRITE(GEN6_RP_CONTROL,
4101                    GEN6_RP_MEDIA_TURBO |
4102                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4103                    GEN6_RP_MEDIA_IS_GFX |
4104                    GEN6_RP_ENABLE |
4105                    GEN6_RP_UP_BUSY_AVG |
4106                    GEN6_RP_DOWN_IDLE_CONT);
4107
4108         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
4109         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4110         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4111
4112         for_each_ring(ring, dev_priv, i)
4113                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4114
4115         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
4116
4117         /* allows RC6 residency counter to work */
4118         I915_WRITE(VLV_COUNTER_CONTROL,
4119                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
4120                                       VLV_MEDIA_RC6_COUNT_EN |
4121                                       VLV_RENDER_RC6_COUNT_EN));
4122         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4123                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
4124
4125         intel_print_rc6_info(dev, rc6_mode);
4126
4127         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
4128
4129         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4130
4131         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
4132         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
4133
4134         dev_priv->rps.cur_delay = (val >> 8) & 0xff;
4135         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
4136                          vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
4137                          dev_priv->rps.cur_delay);
4138
4139         dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
4140         dev_priv->rps.hw_max = dev_priv->rps.max_delay;
4141         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
4142                          vlv_gpu_freq(dev_priv, dev_priv->rps.max_delay),
4143                          dev_priv->rps.max_delay);
4144
4145         dev_priv->rps.rpe_delay = valleyview_rps_rpe_freq(dev_priv);
4146         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
4147                          vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
4148                          dev_priv->rps.rpe_delay);
4149
4150         dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv);
4151         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
4152                          vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay),
4153                          dev_priv->rps.min_delay);
4154
4155         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
4156                          vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
4157                          dev_priv->rps.rpe_delay);
4158
4159         valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
4160
4161         gen6_enable_rps_interrupts(dev);
4162
4163         gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
4164 }
4165
4166 void ironlake_teardown_rc6(struct drm_device *dev)
4167 {
4168         struct drm_i915_private *dev_priv = dev->dev_private;
4169
4170         if (dev_priv->ips.renderctx) {
4171                 i915_gem_object_unpin(dev_priv->ips.renderctx);
4172                 drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
4173                 dev_priv->ips.renderctx = NULL;
4174         }
4175
4176         if (dev_priv->ips.pwrctx) {
4177                 i915_gem_object_unpin(dev_priv->ips.pwrctx);
4178                 drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
4179                 dev_priv->ips.pwrctx = NULL;
4180         }
4181 }
4182
4183 static void ironlake_disable_rc6(struct drm_device *dev)
4184 {
4185         struct drm_i915_private *dev_priv = dev->dev_private;
4186
4187         if (I915_READ(PWRCTXA)) {
4188                 /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
4189                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
4190                 wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
4191                          50);
4192
4193                 I915_WRITE(PWRCTXA, 0);
4194                 POSTING_READ(PWRCTXA);
4195
4196                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
4197                 POSTING_READ(RSTDBYCTL);
4198         }
4199 }
4200
4201 static int ironlake_setup_rc6(struct drm_device *dev)
4202 {
4203         struct drm_i915_private *dev_priv = dev->dev_private;
4204
4205         if (dev_priv->ips.renderctx == NULL)
4206                 dev_priv->ips.renderctx = intel_alloc_context_page(dev);
4207         if (!dev_priv->ips.renderctx)
4208                 return -ENOMEM;
4209
4210         if (dev_priv->ips.pwrctx == NULL)
4211                 dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
4212         if (!dev_priv->ips.pwrctx) {
4213                 ironlake_teardown_rc6(dev);
4214                 return -ENOMEM;
4215         }
4216
4217         return 0;
4218 }
4219
4220 static void ironlake_enable_rc6(struct drm_device *dev)
4221 {
4222         struct drm_i915_private *dev_priv = dev->dev_private;
4223         struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
4224         bool was_interruptible;
4225         int ret;
4226
4227         /* rc6 disabled by default due to repeated reports of hanging during
4228          * boot and resume.
4229          */
4230         if (!intel_enable_rc6(dev))
4231                 return;
4232
4233         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4234
4235         ret = ironlake_setup_rc6(dev);
4236         if (ret)
4237                 return;
4238
4239         was_interruptible = dev_priv->mm.interruptible;
4240         dev_priv->mm.interruptible = false;
4241
4242         /*
4243          * GPU can automatically power down the render unit if given a page
4244          * to save state.
4245          */
4246         ret = intel_ring_begin(ring, 6);
4247         if (ret) {
4248                 ironlake_teardown_rc6(dev);
4249                 dev_priv->mm.interruptible = was_interruptible;
4250                 return;
4251         }
4252
4253         intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
4254         intel_ring_emit(ring, MI_SET_CONTEXT);
4255         intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
4256                         MI_MM_SPACE_GTT |
4257                         MI_SAVE_EXT_STATE_EN |
4258                         MI_RESTORE_EXT_STATE_EN |
4259                         MI_RESTORE_INHIBIT);
4260         intel_ring_emit(ring, MI_SUSPEND_FLUSH);
4261         intel_ring_emit(ring, MI_NOOP);
4262         intel_ring_emit(ring, MI_FLUSH);
4263         intel_ring_advance(ring);
4264
4265         /*
4266          * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
4267          * does an implicit flush, combined with MI_FLUSH above, it should be
4268          * safe to assume that renderctx is valid
4269          */
4270         ret = intel_ring_idle(ring);
4271         dev_priv->mm.interruptible = was_interruptible;
4272         if (ret) {
4273                 DRM_ERROR("failed to enable ironlake power savings\n");
4274                 ironlake_teardown_rc6(dev);
4275                 return;
4276         }
4277
4278         I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
4279         I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
4280
4281         intel_print_rc6_info(dev, INTEL_RC6_ENABLE);
4282 }
4283
4284 static unsigned long intel_pxfreq(u32 vidfreq)
4285 {
4286         unsigned long freq;
4287         int div = (vidfreq & 0x3f0000) >> 16;
4288         int post = (vidfreq & 0x3000) >> 12;
4289         int pre = (vidfreq & 0x7);
4290
4291         if (!pre)
4292                 return 0;
4293
4294         freq = ((div * 133333) / ((1<<post) * pre));
4295
4296         return freq;
4297 }
4298
4299 static const struct cparams {
4300         u16 i;
4301         u16 t;
4302         u16 m;
4303         u16 c;
4304 } cparams[] = {
4305         { 1, 1333, 301, 28664 },
4306         { 1, 1066, 294, 24460 },
4307         { 1, 800, 294, 25192 },
4308         { 0, 1333, 276, 27605 },
4309         { 0, 1066, 276, 27605 },
4310         { 0, 800, 231, 23784 },
4311 };
4312
4313 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
4314 {
4315         u64 total_count, diff, ret;
4316         u32 count1, count2, count3, m = 0, c = 0;
4317         unsigned long now = jiffies_to_msecs(jiffies), diff1;
4318         int i;
4319
4320         assert_spin_locked(&mchdev_lock);
4321
4322         diff1 = now - dev_priv->ips.last_time1;
4323
4324         /* Prevent division-by-zero if we are asking too fast.
4325          * Also, we don't get interesting results if we are polling
4326          * faster than once in 10ms, so just return the saved value
4327          * in such cases.
4328          */
4329         if (diff1 <= 10)
4330                 return dev_priv->ips.chipset_power;
4331
4332         count1 = I915_READ(DMIEC);
4333         count2 = I915_READ(DDREC);
4334         count3 = I915_READ(CSIEC);
4335
4336         total_count = count1 + count2 + count3;
4337
4338         /* FIXME: handle per-counter overflow */
4339         if (total_count < dev_priv->ips.last_count1) {
4340                 diff = ~0UL - dev_priv->ips.last_count1;
4341                 diff += total_count;
4342         } else {
4343                 diff = total_count - dev_priv->ips.last_count1;
4344         }
4345
4346         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
4347                 if (cparams[i].i == dev_priv->ips.c_m &&
4348                     cparams[i].t == dev_priv->ips.r_t) {
4349                         m = cparams[i].m;
4350                         c = cparams[i].c;
4351                         break;
4352                 }
4353         }
4354
4355         diff = div_u64(diff, diff1);
4356         ret = ((m * diff) + c);
4357         ret = div_u64(ret, 10);
4358
4359         dev_priv->ips.last_count1 = total_count;
4360         dev_priv->ips.last_time1 = now;
4361
4362         dev_priv->ips.chipset_power = ret;
4363
4364         return ret;
4365 }
4366
4367 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
4368 {
4369         unsigned long val;
4370
4371         if (dev_priv->info->gen != 5)
4372                 return 0;
4373
4374         spin_lock_irq(&mchdev_lock);
4375
4376         val = __i915_chipset_val(dev_priv);
4377
4378         spin_unlock_irq(&mchdev_lock);
4379
4380         return val;
4381 }
4382
4383 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
4384 {
4385         unsigned long m, x, b;
4386         u32 tsfs;
4387
4388         tsfs = I915_READ(TSFS);
4389
4390         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
4391         x = I915_READ8(TR1);
4392
4393         b = tsfs & TSFS_INTR_MASK;
4394
4395         return ((m * x) / 127) - b;
4396 }
4397
4398 static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
4399 {
4400         static const struct v_table {
4401                 u16 vd; /* in .1 mil */
4402                 u16 vm; /* in .1 mil */
4403         } v_table[] = {
4404                 { 0, 0, },
4405                 { 375, 0, },
4406                 { 500, 0, },
4407                 { 625, 0, },
4408                 { 750, 0, },
4409                 { 875, 0, },
4410                 { 1000, 0, },
4411                 { 1125, 0, },
4412                 { 4125, 3000, },
4413                 { 4125, 3000, },
4414                 { 4125, 3000, },
4415                 { 4125, 3000, },
4416                 { 4125, 3000, },
4417                 { 4125, 3000, },
4418                 { 4125, 3000, },
4419                 { 4125, 3000, },
4420                 { 4125, 3000, },
4421                 { 4125, 3000, },
4422                 { 4125, 3000, },
4423                 { 4125, 3000, },
4424                 { 4125, 3000, },
4425                 { 4125, 3000, },
4426                 { 4125, 3000, },
4427                 { 4125, 3000, },
4428                 { 4125, 3000, },
4429                 { 4125, 3000, },
4430                 { 4125, 3000, },
4431                 { 4125, 3000, },
4432                 { 4125, 3000, },
4433                 { 4125, 3000, },
4434                 { 4125, 3000, },
4435                 { 4125, 3000, },
4436                 { 4250, 3125, },
4437                 { 4375, 3250, },
4438                 { 4500, 3375, },
4439                 { 4625, 3500, },
4440                 { 4750, 3625, },
4441                 { 4875, 3750, },
4442                 { 5000, 3875, },
4443                 { 5125, 4000, },
4444                 { 5250, 4125, },
4445                 { 5375, 4250, },
4446                 { 5500, 4375, },
4447                 { 5625, 4500, },
4448                 { 5750, 4625, },
4449                 { 5875, 4750, },
4450                 { 6000, 4875, },
4451                 { 6125, 5000, },
4452                 { 6250, 5125, },
4453                 { 6375, 5250, },
4454                 { 6500, 5375, },
4455                 { 6625, 5500, },
4456                 { 6750, 5625, },
4457                 { 6875, 5750, },
4458                 { 7000, 5875, },
4459                 { 7125, 6000, },
4460                 { 7250, 6125, },
4461                 { 7375, 6250, },
4462                 { 7500, 6375, },
4463                 { 7625, 6500, },
4464                 { 7750, 6625, },
4465                 { 7875, 6750, },
4466                 { 8000, 6875, },
4467                 { 8125, 7000, },
4468                 { 8250, 7125, },
4469                 { 8375, 7250, },
4470                 { 8500, 7375, },
4471                 { 8625, 7500, },
4472                 { 8750, 7625, },
4473                 { 8875, 7750, },
4474                 { 9000, 7875, },
4475                 { 9125, 8000, },
4476                 { 9250, 8125, },
4477                 { 9375, 8250, },
4478                 { 9500, 8375, },
4479                 { 9625, 8500, },
4480                 { 9750, 8625, },
4481                 { 9875, 8750, },
4482                 { 10000, 8875, },
4483                 { 10125, 9000, },
4484                 { 10250, 9125, },
4485                 { 10375, 9250, },
4486                 { 10500, 9375, },
4487                 { 10625, 9500, },
4488                 { 10750, 9625, },
4489                 { 10875, 9750, },
4490                 { 11000, 9875, },
4491                 { 11125, 10000, },
4492                 { 11250, 10125, },
4493                 { 11375, 10250, },
4494                 { 11500, 10375, },
4495                 { 11625, 10500, },
4496                 { 11750, 10625, },
4497                 { 11875, 10750, },
4498                 { 12000, 10875, },
4499                 { 12125, 11000, },
4500                 { 12250, 11125, },
4501                 { 12375, 11250, },
4502                 { 12500, 11375, },
4503                 { 12625, 11500, },
4504                 { 12750, 11625, },
4505                 { 12875, 11750, },
4506                 { 13000, 11875, },
4507                 { 13125, 12000, },
4508                 { 13250, 12125, },
4509                 { 13375, 12250, },
4510                 { 13500, 12375, },
4511                 { 13625, 12500, },
4512                 { 13750, 12625, },
4513                 { 13875, 12750, },
4514                 { 14000, 12875, },
4515                 { 14125, 13000, },
4516                 { 14250, 13125, },
4517                 { 14375, 13250, },
4518                 { 14500, 13375, },
4519                 { 14625, 13500, },
4520                 { 14750, 13625, },
4521                 { 14875, 13750, },
4522                 { 15000, 13875, },
4523                 { 15125, 14000, },
4524                 { 15250, 14125, },
4525                 { 15375, 14250, },
4526                 { 15500, 14375, },
4527                 { 15625, 14500, },
4528                 { 15750, 14625, },
4529                 { 15875, 14750, },
4530                 { 16000, 14875, },
4531                 { 16125, 15000, },
4532         };
4533         if (dev_priv->info->is_mobile)
4534                 return v_table[pxvid].vm;
4535         else
4536                 return v_table[pxvid].vd;
4537 }
4538
4539 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
4540 {
4541         struct timespec now, diff1;
4542         u64 diff;
4543         unsigned long diffms;
4544         u32 count;
4545
4546         assert_spin_locked(&mchdev_lock);
4547
4548         getrawmonotonic(&now);
4549         diff1 = timespec_sub(now, dev_priv->ips.last_time2);
4550
4551         /* Don't divide by 0 */
4552         diffms = diff1.tv_sec * 1000 + diff1.tv_nsec / 1000000;
4553         if (!diffms)
4554                 return;
4555
4556         count = I915_READ(GFXEC);
4557
4558         if (count < dev_priv->ips.last_count2) {
4559                 diff = ~0UL - dev_priv->ips.last_count2;
4560                 diff += count;
4561         } else {
4562                 diff = count - dev_priv->ips.last_count2;
4563         }
4564
4565         dev_priv->ips.last_count2 = count;
4566         dev_priv->ips.last_time2 = now;
4567
4568         /* More magic constants... */
4569         diff = diff * 1181;
4570         diff = div_u64(diff, diffms * 10);
4571         dev_priv->ips.gfx_power = diff;
4572 }
4573
4574 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
4575 {
4576         if (dev_priv->info->gen != 5)
4577                 return;
4578
4579         spin_lock_irq(&mchdev_lock);
4580
4581         __i915_update_gfx_val(dev_priv);
4582
4583         spin_unlock_irq(&mchdev_lock);
4584 }
4585
4586 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
4587 {
4588         unsigned long t, corr, state1, corr2, state2;
4589         u32 pxvid, ext_v;
4590
4591         assert_spin_locked(&mchdev_lock);
4592
4593         pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_delay * 4));
4594         pxvid = (pxvid >> 24) & 0x7f;
4595         ext_v = pvid_to_extvid(dev_priv, pxvid);
4596
4597         state1 = ext_v;
4598
4599         t = i915_mch_val(dev_priv);
4600
4601         /* Revel in the empirically derived constants */
4602
4603         /* Correction factor in 1/100000 units */
4604         if (t > 80)
4605                 corr = ((t * 2349) + 135940);
4606         else if (t >= 50)
4607                 corr = ((t * 964) + 29317);
4608         else /* < 50 */
4609                 corr = ((t * 301) + 1004);
4610
4611         corr = corr * ((150142 * state1) / 10000 - 78642);
4612         corr /= 100000;
4613         corr2 = (corr * dev_priv->ips.corr);
4614
4615         state2 = (corr2 * state1) / 10000;
4616         state2 /= 100; /* convert to mW */
4617
4618         __i915_update_gfx_val(dev_priv);
4619
4620         return dev_priv->ips.gfx_power + state2;
4621 }
4622
4623 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
4624 {
4625         unsigned long val;
4626
4627         if (dev_priv->info->gen != 5)
4628                 return 0;
4629
4630         spin_lock_irq(&mchdev_lock);
4631
4632         val = __i915_gfx_val(dev_priv);
4633
4634         spin_unlock_irq(&mchdev_lock);
4635
4636         return val;
4637 }
4638
4639 /**
4640  * i915_read_mch_val - return value for IPS use
4641  *
4642  * Calculate and return a value for the IPS driver to use when deciding whether
4643  * we have thermal and power headroom to increase CPU or GPU power budget.
4644  */
4645 unsigned long i915_read_mch_val(void)
4646 {
4647         struct drm_i915_private *dev_priv;
4648         unsigned long chipset_val, graphics_val, ret = 0;
4649
4650         spin_lock_irq(&mchdev_lock);
4651         if (!i915_mch_dev)
4652                 goto out_unlock;
4653         dev_priv = i915_mch_dev;
4654
4655         chipset_val = __i915_chipset_val(dev_priv);
4656         graphics_val = __i915_gfx_val(dev_priv);
4657
4658         ret = chipset_val + graphics_val;
4659
4660 out_unlock:
4661         spin_unlock_irq(&mchdev_lock);
4662
4663         return ret;
4664 }
4665 EXPORT_SYMBOL_GPL(i915_read_mch_val);
4666
4667 /**
4668  * i915_gpu_raise - raise GPU frequency limit
4669  *
4670  * Raise the limit; IPS indicates we have thermal headroom.
4671  */
4672 bool i915_gpu_raise(void)
4673 {
4674         struct drm_i915_private *dev_priv;
4675         bool ret = true;
4676
4677         spin_lock_irq(&mchdev_lock);
4678         if (!i915_mch_dev) {
4679                 ret = false;
4680                 goto out_unlock;
4681         }
4682         dev_priv = i915_mch_dev;
4683
4684         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
4685                 dev_priv->ips.max_delay--;
4686
4687 out_unlock:
4688         spin_unlock_irq(&mchdev_lock);
4689
4690         return ret;
4691 }
4692 EXPORT_SYMBOL_GPL(i915_gpu_raise);
4693
4694 /**
4695  * i915_gpu_lower - lower GPU frequency limit
4696  *
4697  * IPS indicates we're close to a thermal limit, so throttle back the GPU
4698  * frequency maximum.
4699  */
4700 bool i915_gpu_lower(void)
4701 {
4702         struct drm_i915_private *dev_priv;
4703         bool ret = true;
4704
4705         spin_lock_irq(&mchdev_lock);
4706         if (!i915_mch_dev) {
4707                 ret = false;
4708                 goto out_unlock;
4709         }
4710         dev_priv = i915_mch_dev;
4711
4712         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
4713                 dev_priv->ips.max_delay++;
4714
4715 out_unlock:
4716         spin_unlock_irq(&mchdev_lock);
4717
4718         return ret;
4719 }
4720 EXPORT_SYMBOL_GPL(i915_gpu_lower);
4721
4722 /**
4723  * i915_gpu_busy - indicate GPU business to IPS
4724  *
4725  * Tell the IPS driver whether or not the GPU is busy.
4726  */
4727 bool i915_gpu_busy(void)
4728 {
4729         struct drm_i915_private *dev_priv;
4730         struct intel_ring_buffer *ring;
4731         bool ret = false;
4732         int i;
4733
4734         spin_lock_irq(&mchdev_lock);
4735         if (!i915_mch_dev)
4736                 goto out_unlock;
4737         dev_priv = i915_mch_dev;
4738
4739         for_each_ring(ring, dev_priv, i)
4740                 ret |= !list_empty(&ring->request_list);
4741
4742 out_unlock:
4743         spin_unlock_irq(&mchdev_lock);
4744
4745         return ret;
4746 }
4747 EXPORT_SYMBOL_GPL(i915_gpu_busy);
4748
4749 /**
4750  * i915_gpu_turbo_disable - disable graphics turbo
4751  *
4752  * Disable graphics turbo by resetting the max frequency and setting the
4753  * current frequency to the default.
4754  */
4755 bool i915_gpu_turbo_disable(void)
4756 {
4757         struct drm_i915_private *dev_priv;
4758         bool ret = true;
4759
4760         spin_lock_irq(&mchdev_lock);
4761         if (!i915_mch_dev) {
4762                 ret = false;
4763                 goto out_unlock;
4764         }
4765         dev_priv = i915_mch_dev;
4766
4767         dev_priv->ips.max_delay = dev_priv->ips.fstart;
4768
4769         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
4770                 ret = false;
4771
4772 out_unlock:
4773         spin_unlock_irq(&mchdev_lock);
4774
4775         return ret;
4776 }
4777 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
4778
4779 /**
4780  * Tells the intel_ips driver that the i915 driver is now loaded, if
4781  * IPS got loaded first.
4782  *
4783  * This awkward dance is so that neither module has to depend on the
4784  * other in order for IPS to do the appropriate communication of
4785  * GPU turbo limits to i915.
4786  */
4787 static void
4788 ips_ping_for_i915_load(void)
4789 {
4790         void (*link)(void);
4791
4792         link = symbol_get(ips_link_to_i915_driver);
4793         if (link) {
4794                 link();
4795                 symbol_put(ips_link_to_i915_driver);
4796         }
4797 }
4798
4799 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
4800 {
4801         /* We only register the i915 ips part with intel-ips once everything is
4802          * set up, to avoid intel-ips sneaking in and reading bogus values. */
4803         spin_lock_irq(&mchdev_lock);
4804         i915_mch_dev = dev_priv;
4805         spin_unlock_irq(&mchdev_lock);
4806
4807         ips_ping_for_i915_load();
4808 }
4809
4810 void intel_gpu_ips_teardown(void)
4811 {
4812         spin_lock_irq(&mchdev_lock);
4813         i915_mch_dev = NULL;
4814         spin_unlock_irq(&mchdev_lock);
4815 }
4816 static void intel_init_emon(struct drm_device *dev)
4817 {
4818         struct drm_i915_private *dev_priv = dev->dev_private;
4819         u32 lcfuse;
4820         u8 pxw[16];
4821         int i;
4822
4823         /* Disable to program */
4824         I915_WRITE(ECR, 0);
4825         POSTING_READ(ECR);
4826
4827         /* Program energy weights for various events */
4828         I915_WRITE(SDEW, 0x15040d00);
4829         I915_WRITE(CSIEW0, 0x007f0000);
4830         I915_WRITE(CSIEW1, 0x1e220004);
4831         I915_WRITE(CSIEW2, 0x04000004);
4832
4833         for (i = 0; i < 5; i++)
4834                 I915_WRITE(PEW + (i * 4), 0);
4835         for (i = 0; i < 3; i++)
4836                 I915_WRITE(DEW + (i * 4), 0);
4837
4838         /* Program P-state weights to account for frequency power adjustment */
4839         for (i = 0; i < 16; i++) {
4840                 u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
4841                 unsigned long freq = intel_pxfreq(pxvidfreq);
4842                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
4843                         PXVFREQ_PX_SHIFT;
4844                 unsigned long val;
4845
4846                 val = vid * vid;
4847                 val *= (freq / 1000);
4848                 val *= 255;
4849                 val /= (127*127*900);
4850                 if (val > 0xff)
4851                         DRM_ERROR("bad pxval: %ld\n", val);
4852                 pxw[i] = val;
4853         }
4854         /* Render standby states get 0 weight */
4855         pxw[14] = 0;
4856         pxw[15] = 0;
4857
4858         for (i = 0; i < 4; i++) {
4859                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
4860                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
4861                 I915_WRITE(PXW + (i * 4), val);
4862         }
4863
4864         /* Adjust magic regs to magic values (more experimental results) */
4865         I915_WRITE(OGW0, 0);
4866         I915_WRITE(OGW1, 0);
4867         I915_WRITE(EG0, 0x00007f00);
4868         I915_WRITE(EG1, 0x0000000e);
4869         I915_WRITE(EG2, 0x000e0000);
4870         I915_WRITE(EG3, 0x68000300);
4871         I915_WRITE(EG4, 0x42000000);
4872         I915_WRITE(EG5, 0x00140031);
4873         I915_WRITE(EG6, 0);
4874         I915_WRITE(EG7, 0);
4875
4876         for (i = 0; i < 8; i++)
4877                 I915_WRITE(PXWL + (i * 4), 0);
4878
4879         /* Enable PMON + select events */
4880         I915_WRITE(ECR, 0x80000019);
4881
4882         lcfuse = I915_READ(LCFUSE02);
4883
4884         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
4885 }
4886
4887 void intel_disable_gt_powersave(struct drm_device *dev)
4888 {
4889         struct drm_i915_private *dev_priv = dev->dev_private;
4890
4891         /* Interrupts should be disabled already to avoid re-arming. */
4892         WARN_ON(dev->irq_enabled);
4893
4894         if (IS_IRONLAKE_M(dev)) {
4895                 ironlake_disable_drps(dev);
4896                 ironlake_disable_rc6(dev);
4897         } else if (INTEL_INFO(dev)->gen >= 6) {
4898                 cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
4899                 cancel_work_sync(&dev_priv->rps.work);
4900                 mutex_lock(&dev_priv->rps.hw_lock);
4901                 if (IS_VALLEYVIEW(dev))
4902                         valleyview_disable_rps(dev);
4903                 else
4904                         gen6_disable_rps(dev);
4905                 dev_priv->rps.enabled = false;
4906                 mutex_unlock(&dev_priv->rps.hw_lock);
4907         }
4908 }
4909
4910 static void intel_gen6_powersave_work(struct work_struct *work)
4911 {
4912         struct drm_i915_private *dev_priv =
4913                 container_of(work, struct drm_i915_private,
4914                              rps.delayed_resume_work.work);
4915         struct drm_device *dev = dev_priv->dev;
4916
4917         mutex_lock(&dev_priv->rps.hw_lock);
4918
4919         if (IS_VALLEYVIEW(dev)) {
4920                 valleyview_enable_rps(dev);
4921         } else if (IS_BROADWELL(dev)) {
4922                 gen8_enable_rps(dev);
4923                 gen6_update_ring_freq(dev);
4924         } else {
4925                 gen6_enable_rps(dev);
4926                 gen6_update_ring_freq(dev);
4927         }
4928         dev_priv->rps.enabled = true;
4929         mutex_unlock(&dev_priv->rps.hw_lock);
4930 }
4931
4932 void intel_enable_gt_powersave(struct drm_device *dev)
4933 {
4934         struct drm_i915_private *dev_priv = dev->dev_private;
4935
4936         if (IS_IRONLAKE_M(dev)) {
4937                 ironlake_enable_drps(dev);
4938                 ironlake_enable_rc6(dev);
4939                 intel_init_emon(dev);
4940         } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
4941                 /*
4942                  * PCU communication is slow and this doesn't need to be
4943                  * done at any specific time, so do this out of our fast path
4944                  * to make resume and init faster.
4945                  */
4946                 schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
4947                                       round_jiffies_up_relative(HZ));
4948         }
4949 }
4950
4951 static void ibx_init_clock_gating(struct drm_device *dev)
4952 {
4953         struct drm_i915_private *dev_priv = dev->dev_private;
4954
4955         /*
4956          * On Ibex Peak and Cougar Point, we need to disable clock
4957          * gating for the panel power sequencer or it will fail to
4958          * start up when no ports are active.
4959          */
4960         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
4961 }
4962
4963 static void g4x_disable_trickle_feed(struct drm_device *dev)
4964 {
4965         struct drm_i915_private *dev_priv = dev->dev_private;
4966         int pipe;
4967
4968         for_each_pipe(pipe) {
4969                 I915_WRITE(DSPCNTR(pipe),
4970                            I915_READ(DSPCNTR(pipe)) |
4971                            DISPPLANE_TRICKLE_FEED_DISABLE);
4972                 intel_flush_primary_plane(dev_priv, pipe);
4973         }
4974 }
4975
4976 static void ironlake_init_clock_gating(struct drm_device *dev)
4977 {
4978         struct drm_i915_private *dev_priv = dev->dev_private;
4979         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
4980
4981         /*
4982          * Required for FBC
4983          * WaFbcDisableDpfcClockGating:ilk
4984          */
4985         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
4986                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
4987                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
4988
4989         I915_WRITE(PCH_3DCGDIS0,
4990                    MARIUNIT_CLOCK_GATE_DISABLE |
4991                    SVSMUNIT_CLOCK_GATE_DISABLE);
4992         I915_WRITE(PCH_3DCGDIS1,
4993                    VFMUNIT_CLOCK_GATE_DISABLE);
4994
4995         /*
4996          * According to the spec the following bits should be set in
4997          * order to enable memory self-refresh
4998          * The bit 22/21 of 0x42004
4999          * The bit 5 of 0x42020
5000          * The bit 15 of 0x45000
5001          */
5002         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5003                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
5004                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
5005         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
5006         I915_WRITE(DISP_ARB_CTL,
5007                    (I915_READ(DISP_ARB_CTL) |
5008                     DISP_FBC_WM_DIS));
5009         I915_WRITE(WM3_LP_ILK, 0);
5010         I915_WRITE(WM2_LP_ILK, 0);
5011         I915_WRITE(WM1_LP_ILK, 0);
5012
5013         /*
5014          * Based on the document from hardware guys the following bits
5015          * should be set unconditionally in order to enable FBC.
5016          * The bit 22 of 0x42000
5017          * The bit 22 of 0x42004
5018          * The bit 7,8,9 of 0x42020.
5019          */
5020         if (IS_IRONLAKE_M(dev)) {
5021                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
5022                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
5023                            I915_READ(ILK_DISPLAY_CHICKEN1) |
5024                            ILK_FBCQ_DIS);
5025                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
5026                            I915_READ(ILK_DISPLAY_CHICKEN2) |
5027                            ILK_DPARB_GATE);
5028         }
5029
5030         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5031
5032         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5033                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5034                    ILK_ELPIN_409_SELECT);
5035         I915_WRITE(_3D_CHICKEN2,
5036                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
5037                    _3D_CHICKEN2_WM_READ_PIPELINED);
5038
5039         /* WaDisableRenderCachePipelinedFlush:ilk */
5040         I915_WRITE(CACHE_MODE_0,
5041                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
5042
5043         g4x_disable_trickle_feed(dev);
5044
5045         ibx_init_clock_gating(dev);
5046 }
5047
5048 static void cpt_init_clock_gating(struct drm_device *dev)
5049 {
5050         struct drm_i915_private *dev_priv = dev->dev_private;
5051         int pipe;
5052         uint32_t val;
5053
5054         /*
5055          * On Ibex Peak and Cougar Point, we need to disable clock
5056          * gating for the panel power sequencer or it will fail to
5057          * start up when no ports are active.
5058          */
5059         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
5060                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
5061                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
5062         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
5063                    DPLS_EDP_PPS_FIX_DIS);
5064         /* The below fixes the weird display corruption, a few pixels shifted
5065          * downward, on (only) LVDS of some HP laptops with IVY.
5066          */
5067         for_each_pipe(pipe) {
5068                 val = I915_READ(TRANS_CHICKEN2(pipe));
5069                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
5070                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5071                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
5072                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5073                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
5074                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
5075                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
5076                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
5077         }
5078         /* WADP0ClockGatingDisable */
5079         for_each_pipe(pipe) {
5080                 I915_WRITE(TRANS_CHICKEN1(pipe),
5081                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5082         }
5083 }
5084
5085 static void gen6_check_mch_setup(struct drm_device *dev)
5086 {
5087         struct drm_i915_private *dev_priv = dev->dev_private;
5088         uint32_t tmp;
5089
5090         tmp = I915_READ(MCH_SSKPD);
5091         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) {
5092                 DRM_INFO("Wrong MCH_SSKPD value: 0x%08x\n", tmp);
5093                 DRM_INFO("This can cause pipe underruns and display issues.\n");
5094                 DRM_INFO("Please upgrade your BIOS to fix this.\n");
5095         }
5096 }
5097
5098 static void gen6_init_clock_gating(struct drm_device *dev)
5099 {
5100         struct drm_i915_private *dev_priv = dev->dev_private;
5101         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
5102
5103         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5104
5105         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5106                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5107                    ILK_ELPIN_409_SELECT);
5108
5109         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
5110         I915_WRITE(_3D_CHICKEN,
5111                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
5112
5113         /* WaSetupGtModeTdRowDispatch:snb */
5114         if (IS_SNB_GT1(dev))
5115                 I915_WRITE(GEN6_GT_MODE,
5116                            _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
5117
5118         I915_WRITE(WM3_LP_ILK, 0);
5119         I915_WRITE(WM2_LP_ILK, 0);
5120         I915_WRITE(WM1_LP_ILK, 0);
5121
5122         I915_WRITE(CACHE_MODE_0,
5123                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
5124
5125         I915_WRITE(GEN6_UCGCTL1,
5126                    I915_READ(GEN6_UCGCTL1) |
5127                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
5128                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
5129
5130         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
5131          * gating disable must be set.  Failure to set it results in
5132          * flickering pixels due to Z write ordering failures after
5133          * some amount of runtime in the Mesa "fire" demo, and Unigine
5134          * Sanctuary and Tropics, and apparently anything else with
5135          * alpha test or pixel discard.
5136          *
5137          * According to the spec, bit 11 (RCCUNIT) must also be set,
5138          * but we didn't debug actual testcases to find it out.
5139          *
5140          * Also apply WaDisableVDSUnitClockGating:snb and
5141          * WaDisableRCPBUnitClockGating:snb.
5142          */
5143         I915_WRITE(GEN6_UCGCTL2,
5144                    GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
5145                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
5146                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
5147
5148         /* Bspec says we need to always set all mask bits. */
5149         I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
5150                    _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
5151
5152         /*
5153          * According to the spec the following bits should be
5154          * set in order to enable memory self-refresh and fbc:
5155          * The bit21 and bit22 of 0x42000
5156          * The bit21 and bit22 of 0x42004
5157          * The bit5 and bit7 of 0x42020
5158          * The bit14 of 0x70180
5159          * The bit14 of 0x71180
5160          *
5161          * WaFbcAsynchFlipDisableFbcQueue:snb
5162          */
5163         I915_WRITE(ILK_DISPLAY_CHICKEN1,
5164                    I915_READ(ILK_DISPLAY_CHICKEN1) |
5165                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
5166         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5167                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5168                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
5169         I915_WRITE(ILK_DSPCLK_GATE_D,
5170                    I915_READ(ILK_DSPCLK_GATE_D) |
5171                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
5172                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
5173
5174         g4x_disable_trickle_feed(dev);
5175
5176         /* The default value should be 0x200 according to docs, but the two
5177          * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
5178         I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
5179         I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
5180
5181         cpt_init_clock_gating(dev);
5182
5183         gen6_check_mch_setup(dev);
5184 }
5185
5186 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
5187 {
5188         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
5189
5190         reg &= ~GEN7_FF_SCHED_MASK;
5191         reg |= GEN7_FF_TS_SCHED_HW;
5192         reg |= GEN7_FF_VS_SCHED_HW;
5193         reg |= GEN7_FF_DS_SCHED_HW;
5194
5195         if (IS_HASWELL(dev_priv->dev))
5196                 reg &= ~GEN7_FF_VS_REF_CNT_FFME;
5197
5198         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
5199 }
5200
5201 static void lpt_init_clock_gating(struct drm_device *dev)
5202 {
5203         struct drm_i915_private *dev_priv = dev->dev_private;
5204
5205         /*
5206          * TODO: this bit should only be enabled when really needed, then
5207          * disabled when not needed anymore in order to save power.
5208          */
5209         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
5210                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
5211                            I915_READ(SOUTH_DSPCLK_GATE_D) |
5212                            PCH_LP_PARTITION_LEVEL_DISABLE);
5213
5214         /* WADPOClockGatingDisable:hsw */
5215         I915_WRITE(_TRANSA_CHICKEN1,
5216                    I915_READ(_TRANSA_CHICKEN1) |
5217                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5218 }
5219
5220 static void lpt_suspend_hw(struct drm_device *dev)
5221 {
5222         struct drm_i915_private *dev_priv = dev->dev_private;
5223
5224         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
5225                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
5226
5227                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
5228                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
5229         }
5230 }
5231
5232 static void gen8_init_clock_gating(struct drm_device *dev)
5233 {
5234         struct drm_i915_private *dev_priv = dev->dev_private;
5235         enum pipe i;
5236
5237         I915_WRITE(WM3_LP_ILK, 0);
5238         I915_WRITE(WM2_LP_ILK, 0);
5239         I915_WRITE(WM1_LP_ILK, 0);
5240
5241         /* FIXME(BDW): Check all the w/a, some might only apply to
5242          * pre-production hw. */
5243
5244         WARN(!i915_preliminary_hw_support,
5245              "GEN8_CENTROID_PIXEL_OPT_DIS not be needed for production\n");
5246         I915_WRITE(HALF_SLICE_CHICKEN3,
5247                    _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
5248         I915_WRITE(HALF_SLICE_CHICKEN3,
5249                    _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
5250         I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
5251
5252         I915_WRITE(_3D_CHICKEN3,
5253                    _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2));
5254
5255         I915_WRITE(COMMON_SLICE_CHICKEN2,
5256                    _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
5257
5258         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
5259                    _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
5260
5261         /* WaSwitchSolVfFArbitrationPriority */
5262         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
5263
5264         /* WaPsrDPAMaskVBlankInSRD */
5265         I915_WRITE(CHICKEN_PAR1_1,
5266                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
5267
5268         /* WaPsrDPRSUnmaskVBlankInSRD */
5269         for_each_pipe(i) {
5270                 I915_WRITE(CHICKEN_PIPESL_1(i),
5271                            I915_READ(CHICKEN_PIPESL_1(i) |
5272                                      DPRS_MASK_VBLANK_SRD));
5273         }
5274 }
5275
5276 static void haswell_init_clock_gating(struct drm_device *dev)
5277 {
5278         struct drm_i915_private *dev_priv = dev->dev_private;
5279
5280         I915_WRITE(WM3_LP_ILK, 0);
5281         I915_WRITE(WM2_LP_ILK, 0);
5282         I915_WRITE(WM1_LP_ILK, 0);
5283
5284         /* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
5285          * This implements the WaDisableRCZUnitClockGating:hsw workaround.
5286          */
5287         I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
5288
5289         /* Apply the WaDisableRHWOOptimizationForRenderHang:hsw workaround. */
5290         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
5291                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
5292
5293         /* WaApplyL3ControlAndL3ChickenMode:hsw */
5294         I915_WRITE(GEN7_L3CNTLREG1,
5295                         GEN7_WA_FOR_GEN7_L3_CONTROL);
5296         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
5297                         GEN7_WA_L3_CHICKEN_MODE);
5298
5299         /* L3 caching of data atomics doesn't work -- disable it. */
5300         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
5301         I915_WRITE(HSW_ROW_CHICKEN3,
5302                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
5303
5304         /* This is required by WaCatErrorRejectionIssue:hsw */
5305         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
5306                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
5307                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
5308
5309         /* WaVSRefCountFullforceMissDisable:hsw */
5310         gen7_setup_fixed_func_scheduler(dev_priv);
5311
5312         /* WaDisable4x2SubspanOptimization:hsw */
5313         I915_WRITE(CACHE_MODE_1,
5314                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
5315
5316         /* WaSwitchSolVfFArbitrationPriority:hsw */
5317         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
5318
5319         /* WaRsPkgCStateDisplayPMReq:hsw */
5320         I915_WRITE(CHICKEN_PAR1_1,
5321                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
5322
5323         lpt_init_clock_gating(dev);
5324 }
5325
5326 static void ivybridge_init_clock_gating(struct drm_device *dev)
5327 {
5328         struct drm_i915_private *dev_priv = dev->dev_private;
5329         uint32_t snpcr;
5330
5331         I915_WRITE(WM3_LP_ILK, 0);
5332         I915_WRITE(WM2_LP_ILK, 0);
5333         I915_WRITE(WM1_LP_ILK, 0);
5334
5335         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
5336
5337         /* WaDisableEarlyCull:ivb */
5338         I915_WRITE(_3D_CHICKEN3,
5339                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
5340
5341         /* WaDisableBackToBackFlipFix:ivb */
5342         I915_WRITE(IVB_CHICKEN3,
5343                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
5344                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
5345
5346         /* WaDisablePSDDualDispatchEnable:ivb */
5347         if (IS_IVB_GT1(dev))
5348                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
5349                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
5350         else
5351                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
5352                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
5353
5354         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
5355         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
5356                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
5357
5358         /* WaApplyL3ControlAndL3ChickenMode:ivb */
5359         I915_WRITE(GEN7_L3CNTLREG1,
5360                         GEN7_WA_FOR_GEN7_L3_CONTROL);
5361         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
5362                    GEN7_WA_L3_CHICKEN_MODE);
5363         if (IS_IVB_GT1(dev))
5364                 I915_WRITE(GEN7_ROW_CHICKEN2,
5365                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
5366         else
5367                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
5368                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
5369
5370
5371         /* WaForceL3Serialization:ivb */
5372         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
5373                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
5374
5375         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
5376          * gating disable must be set.  Failure to set it results in
5377          * flickering pixels due to Z write ordering failures after
5378          * some amount of runtime in the Mesa "fire" demo, and Unigine
5379          * Sanctuary and Tropics, and apparently anything else with
5380          * alpha test or pixel discard.
5381          *
5382          * According to the spec, bit 11 (RCCUNIT) must also be set,
5383          * but we didn't debug actual testcases to find it out.
5384          *
5385          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
5386          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
5387          */
5388         I915_WRITE(GEN6_UCGCTL2,
5389                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
5390                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
5391
5392         /* This is required by WaCatErrorRejectionIssue:ivb */
5393         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
5394                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
5395                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
5396
5397         g4x_disable_trickle_feed(dev);
5398
5399         /* WaVSRefCountFullforceMissDisable:ivb */
5400         gen7_setup_fixed_func_scheduler(dev_priv);
5401
5402         /* WaDisable4x2SubspanOptimization:ivb */
5403         I915_WRITE(CACHE_MODE_1,
5404                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
5405
5406         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
5407         snpcr &= ~GEN6_MBC_SNPCR_MASK;
5408         snpcr |= GEN6_MBC_SNPCR_MED;
5409         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
5410
5411         if (!HAS_PCH_NOP(dev))
5412                 cpt_init_clock_gating(dev);
5413
5414         gen6_check_mch_setup(dev);
5415 }
5416
5417 static void valleyview_init_clock_gating(struct drm_device *dev)
5418 {
5419         struct drm_i915_private *dev_priv = dev->dev_private;
5420         u32 val;
5421
5422         mutex_lock(&dev_priv->rps.hw_lock);
5423         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5424         mutex_unlock(&dev_priv->rps.hw_lock);
5425         switch ((val >> 6) & 3) {
5426         case 0:
5427                 dev_priv->mem_freq = 800;
5428                 break;
5429         case 1:
5430                 dev_priv->mem_freq = 1066;
5431                 break;
5432         case 2:
5433                 dev_priv->mem_freq = 1333;
5434                 break;
5435         case 3:
5436                 dev_priv->mem_freq = 1333;
5437                 break;
5438         }
5439         DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
5440
5441         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
5442
5443         /* WaDisableEarlyCull:vlv */
5444         I915_WRITE(_3D_CHICKEN3,
5445                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
5446
5447         /* WaDisableBackToBackFlipFix:vlv */
5448         I915_WRITE(IVB_CHICKEN3,
5449                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
5450                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
5451
5452         /* WaDisablePSDDualDispatchEnable:vlv */
5453         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
5454                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
5455                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
5456
5457         /* Apply the WaDisableRHWOOptimizationForRenderHang:vlv workaround. */
5458         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
5459                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
5460
5461         /* WaApplyL3ControlAndL3ChickenMode:vlv */
5462         I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
5463         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
5464
5465         /* WaForceL3Serialization:vlv */
5466         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
5467                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
5468
5469         /* WaDisableDopClockGating:vlv */
5470         I915_WRITE(GEN7_ROW_CHICKEN2,
5471                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
5472
5473         /* This is required by WaCatErrorRejectionIssue:vlv */
5474         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
5475                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
5476                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
5477
5478         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
5479          * gating disable must be set.  Failure to set it results in
5480          * flickering pixels due to Z write ordering failures after
5481          * some amount of runtime in the Mesa "fire" demo, and Unigine
5482          * Sanctuary and Tropics, and apparently anything else with
5483          * alpha test or pixel discard.
5484          *
5485          * According to the spec, bit 11 (RCCUNIT) must also be set,
5486          * but we didn't debug actual testcases to find it out.
5487          *
5488          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
5489          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
5490          *
5491          * Also apply WaDisableVDSUnitClockGating:vlv and
5492          * WaDisableRCPBUnitClockGating:vlv.
5493          */
5494         I915_WRITE(GEN6_UCGCTL2,
5495                    GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
5496                    GEN7_TDLUNIT_CLOCK_GATE_DISABLE |
5497                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
5498                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
5499                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
5500
5501         I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
5502
5503         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
5504
5505         I915_WRITE(CACHE_MODE_1,
5506                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
5507
5508         /*
5509          * WaDisableVLVClockGating_VBIIssue:vlv
5510          * Disable clock gating on th GCFG unit to prevent a delay
5511          * in the reporting of vblank events.
5512          */
5513         I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
5514
5515         /* Conservative clock gating settings for now */
5516         I915_WRITE(0x9400, 0xffffffff);
5517         I915_WRITE(0x9404, 0xffffffff);
5518         I915_WRITE(0x9408, 0xffffffff);
5519         I915_WRITE(0x940c, 0xffffffff);
5520         I915_WRITE(0x9410, 0xffffffff);
5521         I915_WRITE(0x9414, 0xffffffff);
5522         I915_WRITE(0x9418, 0xffffffff);
5523 }
5524
5525 static void g4x_init_clock_gating(struct drm_device *dev)
5526 {
5527         struct drm_i915_private *dev_priv = dev->dev_private;
5528         uint32_t dspclk_gate;
5529
5530         I915_WRITE(RENCLK_GATE_D1, 0);
5531         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
5532                    GS_UNIT_CLOCK_GATE_DISABLE |
5533                    CL_UNIT_CLOCK_GATE_DISABLE);
5534         I915_WRITE(RAMCLK_GATE_D, 0);
5535         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
5536                 OVRUNIT_CLOCK_GATE_DISABLE |
5537                 OVCUNIT_CLOCK_GATE_DISABLE;
5538         if (IS_GM45(dev))
5539                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
5540         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
5541
5542         /* WaDisableRenderCachePipelinedFlush */
5543         I915_WRITE(CACHE_MODE_0,
5544                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
5545
5546         g4x_disable_trickle_feed(dev);
5547 }
5548
5549 static void crestline_init_clock_gating(struct drm_device *dev)
5550 {
5551         struct drm_i915_private *dev_priv = dev->dev_private;
5552
5553         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
5554         I915_WRITE(RENCLK_GATE_D2, 0);
5555         I915_WRITE(DSPCLK_GATE_D, 0);
5556         I915_WRITE(RAMCLK_GATE_D, 0);
5557         I915_WRITE16(DEUC, 0);
5558         I915_WRITE(MI_ARB_STATE,
5559                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
5560 }
5561
5562 static void broadwater_init_clock_gating(struct drm_device *dev)
5563 {
5564         struct drm_i915_private *dev_priv = dev->dev_private;
5565
5566         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
5567                    I965_RCC_CLOCK_GATE_DISABLE |
5568                    I965_RCPB_CLOCK_GATE_DISABLE |
5569                    I965_ISC_CLOCK_GATE_DISABLE |
5570                    I965_FBC_CLOCK_GATE_DISABLE);
5571         I915_WRITE(RENCLK_GATE_D2, 0);
5572         I915_WRITE(MI_ARB_STATE,
5573                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
5574 }
5575
5576 static void gen3_init_clock_gating(struct drm_device *dev)
5577 {
5578         struct drm_i915_private *dev_priv = dev->dev_private;
5579         u32 dstate = I915_READ(D_STATE);
5580
5581         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
5582                 DSTATE_DOT_CLOCK_GATING;
5583         I915_WRITE(D_STATE, dstate);
5584
5585         if (IS_PINEVIEW(dev))
5586                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
5587
5588         /* IIR "flip pending" means done if this bit is set */
5589         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
5590 }
5591
5592 static void i85x_init_clock_gating(struct drm_device *dev)
5593 {
5594         struct drm_i915_private *dev_priv = dev->dev_private;
5595
5596         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
5597 }
5598
5599 static void i830_init_clock_gating(struct drm_device *dev)
5600 {
5601         struct drm_i915_private *dev_priv = dev->dev_private;
5602
5603         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
5604 }
5605
5606 void intel_init_clock_gating(struct drm_device *dev)
5607 {
5608         struct drm_i915_private *dev_priv = dev->dev_private;
5609
5610         dev_priv->display.init_clock_gating(dev);
5611 }
5612
5613 void intel_suspend_hw(struct drm_device *dev)
5614 {
5615         if (HAS_PCH_LPT(dev))
5616                 lpt_suspend_hw(dev);
5617 }
5618
5619 #define for_each_power_well(i, power_well, domain_mask, power_domains)  \
5620         for (i = 0;                                                     \
5621              i < (power_domains)->power_well_count &&                   \
5622                  ((power_well) = &(power_domains)->power_wells[i]);     \
5623              i++)                                                       \
5624                 if ((power_well)->domains & (domain_mask))
5625
5626 #define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \
5627         for (i = (power_domains)->power_well_count - 1;                  \
5628              i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\
5629              i--)                                                        \
5630                 if ((power_well)->domains & (domain_mask))
5631
5632 /**
5633  * We should only use the power well if we explicitly asked the hardware to
5634  * enable it, so check if it's enabled and also check if we've requested it to
5635  * be enabled.
5636  */
5637 static bool hsw_power_well_enabled(struct drm_device *dev,
5638                                    struct i915_power_well *power_well)
5639 {
5640         struct drm_i915_private *dev_priv = dev->dev_private;
5641
5642         return I915_READ(HSW_PWR_WELL_DRIVER) ==
5643                      (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
5644 }
5645
5646 bool intel_display_power_enabled_sw(struct drm_device *dev,
5647                                     enum intel_display_power_domain domain)
5648 {
5649         struct drm_i915_private *dev_priv = dev->dev_private;
5650         struct i915_power_domains *power_domains;
5651
5652         power_domains = &dev_priv->power_domains;
5653
5654         return power_domains->domain_use_count[domain];
5655 }
5656
5657 bool intel_display_power_enabled(struct drm_device *dev,
5658                                  enum intel_display_power_domain domain)
5659 {
5660         struct drm_i915_private *dev_priv = dev->dev_private;
5661         struct i915_power_domains *power_domains;
5662         struct i915_power_well *power_well;
5663         bool is_enabled;
5664         int i;
5665
5666         power_domains = &dev_priv->power_domains;
5667
5668         is_enabled = true;
5669
5670         mutex_lock(&power_domains->lock);
5671         for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
5672                 if (power_well->always_on)
5673                         continue;
5674
5675                 if (!power_well->is_enabled(dev, power_well)) {
5676                         is_enabled = false;
5677                         break;
5678                 }
5679         }
5680         mutex_unlock(&power_domains->lock);
5681
5682         return is_enabled;
5683 }
5684
5685 static void hsw_set_power_well(struct drm_device *dev,
5686                                struct i915_power_well *power_well, bool enable)
5687 {
5688         struct drm_i915_private *dev_priv = dev->dev_private;
5689         bool is_enabled, enable_requested;
5690         unsigned long irqflags;
5691         uint32_t tmp;
5692
5693         WARN_ON(dev_priv->pc8.enabled);
5694
5695         tmp = I915_READ(HSW_PWR_WELL_DRIVER);
5696         is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
5697         enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
5698
5699         if (enable) {
5700                 if (!enable_requested)
5701                         I915_WRITE(HSW_PWR_WELL_DRIVER,
5702                                    HSW_PWR_WELL_ENABLE_REQUEST);
5703
5704                 if (!is_enabled) {
5705                         DRM_DEBUG_KMS("Enabling power well\n");
5706                         if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
5707                                       HSW_PWR_WELL_STATE_ENABLED), 20))
5708                                 DRM_ERROR("Timeout enabling power well\n");
5709                 }
5710
5711                 if (IS_BROADWELL(dev)) {
5712                         spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
5713                         I915_WRITE(GEN8_DE_PIPE_IMR(PIPE_B),
5714                                    dev_priv->de_irq_mask[PIPE_B]);
5715                         I915_WRITE(GEN8_DE_PIPE_IER(PIPE_B),
5716                                    ~dev_priv->de_irq_mask[PIPE_B] |
5717                                    GEN8_PIPE_VBLANK);
5718                         I915_WRITE(GEN8_DE_PIPE_IMR(PIPE_C),
5719                                    dev_priv->de_irq_mask[PIPE_C]);
5720                         I915_WRITE(GEN8_DE_PIPE_IER(PIPE_C),
5721                                    ~dev_priv->de_irq_mask[PIPE_C] |
5722                                    GEN8_PIPE_VBLANK);
5723                         POSTING_READ(GEN8_DE_PIPE_IER(PIPE_C));
5724                         spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
5725                 }
5726         } else {
5727                 if (enable_requested) {
5728                         enum pipe p;
5729
5730                         I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
5731                         POSTING_READ(HSW_PWR_WELL_DRIVER);
5732                         DRM_DEBUG_KMS("Requesting to disable the power well\n");
5733
5734                         /*
5735                          * After this, the registers on the pipes that are part
5736                          * of the power well will become zero, so we have to
5737                          * adjust our counters according to that.
5738                          *
5739                          * FIXME: Should we do this in general in
5740                          * drm_vblank_post_modeset?
5741                          */
5742                         spin_lock_irqsave(&dev->vbl_lock, irqflags);
5743                         for_each_pipe(p)
5744                                 if (p != PIPE_A)
5745                                         dev->vblank[p].last = 0;
5746                         spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
5747                 }
5748         }
5749 }
5750
5751 static void __intel_power_well_get(struct drm_device *dev,
5752                                    struct i915_power_well *power_well)
5753 {
5754         struct drm_i915_private *dev_priv = dev->dev_private;
5755
5756         if (!power_well->count++ && power_well->set) {
5757                 hsw_disable_package_c8(dev_priv);
5758                 power_well->set(dev, power_well, true);
5759         }
5760 }
5761
5762 static void __intel_power_well_put(struct drm_device *dev,
5763                                    struct i915_power_well *power_well)
5764 {
5765         struct drm_i915_private *dev_priv = dev->dev_private;
5766
5767         WARN_ON(!power_well->count);
5768
5769         if (!--power_well->count && power_well->set &&
5770             i915_disable_power_well) {
5771                 power_well->set(dev, power_well, false);
5772                 hsw_enable_package_c8(dev_priv);
5773         }
5774 }
5775
5776 void intel_display_power_get(struct drm_device *dev,
5777                              enum intel_display_power_domain domain)
5778 {
5779         struct drm_i915_private *dev_priv = dev->dev_private;
5780         struct i915_power_domains *power_domains;
5781         struct i915_power_well *power_well;
5782         int i;
5783
5784         power_domains = &dev_priv->power_domains;
5785
5786         mutex_lock(&power_domains->lock);
5787
5788         for_each_power_well(i, power_well, BIT(domain), power_domains)
5789                 __intel_power_well_get(dev, power_well);
5790
5791         power_domains->domain_use_count[domain]++;
5792
5793         mutex_unlock(&power_domains->lock);
5794 }
5795
5796 void intel_display_power_put(struct drm_device *dev,
5797                              enum intel_display_power_domain domain)
5798 {
5799         struct drm_i915_private *dev_priv = dev->dev_private;
5800         struct i915_power_domains *power_domains;
5801         struct i915_power_well *power_well;
5802         int i;
5803
5804         power_domains = &dev_priv->power_domains;
5805
5806         mutex_lock(&power_domains->lock);
5807
5808         WARN_ON(!power_domains->domain_use_count[domain]);
5809         power_domains->domain_use_count[domain]--;
5810
5811         for_each_power_well_rev(i, power_well, BIT(domain), power_domains)
5812                 __intel_power_well_put(dev, power_well);
5813
5814         mutex_unlock(&power_domains->lock);
5815 }
5816
5817 static struct i915_power_domains *hsw_pwr;
5818
5819 /* Display audio driver power well request */
5820 void i915_request_power_well(void)
5821 {
5822         struct drm_i915_private *dev_priv;
5823
5824         if (WARN_ON(!hsw_pwr))
5825                 return;
5826
5827         dev_priv = container_of(hsw_pwr, struct drm_i915_private,
5828                                 power_domains);
5829         intel_display_power_get(dev_priv->dev, POWER_DOMAIN_AUDIO);
5830 }
5831 EXPORT_SYMBOL_GPL(i915_request_power_well);
5832
5833 /* Display audio driver power well release */
5834 void i915_release_power_well(void)
5835 {
5836         struct drm_i915_private *dev_priv;
5837
5838         if (WARN_ON(!hsw_pwr))
5839                 return;
5840
5841         dev_priv = container_of(hsw_pwr, struct drm_i915_private,
5842                                 power_domains);
5843         intel_display_power_put(dev_priv->dev, POWER_DOMAIN_AUDIO);
5844 }
5845 EXPORT_SYMBOL_GPL(i915_release_power_well);
5846
5847 static struct i915_power_well i9xx_always_on_power_well[] = {
5848         {
5849                 .name = "always-on",
5850                 .always_on = 1,
5851                 .domains = POWER_DOMAIN_MASK,
5852         },
5853 };
5854
5855 static struct i915_power_well hsw_power_wells[] = {
5856         {
5857                 .name = "always-on",
5858                 .always_on = 1,
5859                 .domains = HSW_ALWAYS_ON_POWER_DOMAINS,
5860         },
5861         {
5862                 .name = "display",
5863                 .domains = POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS,
5864                 .is_enabled = hsw_power_well_enabled,
5865                 .set = hsw_set_power_well,
5866         },
5867 };
5868
5869 static struct i915_power_well bdw_power_wells[] = {
5870         {
5871                 .name = "always-on",
5872                 .always_on = 1,
5873                 .domains = BDW_ALWAYS_ON_POWER_DOMAINS,
5874         },
5875         {
5876                 .name = "display",
5877                 .domains = POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS,
5878                 .is_enabled = hsw_power_well_enabled,
5879                 .set = hsw_set_power_well,
5880         },
5881 };
5882
5883 #define set_power_wells(power_domains, __power_wells) ({                \
5884         (power_domains)->power_wells = (__power_wells);                 \
5885         (power_domains)->power_well_count = ARRAY_SIZE(__power_wells);  \
5886 })
5887
5888 int intel_power_domains_init(struct drm_device *dev)
5889 {
5890         struct drm_i915_private *dev_priv = dev->dev_private;
5891         struct i915_power_domains *power_domains = &dev_priv->power_domains;
5892
5893         mutex_init(&power_domains->lock);
5894
5895         /*
5896          * The enabling order will be from lower to higher indexed wells,
5897          * the disabling order is reversed.
5898          */
5899         if (IS_HASWELL(dev)) {
5900                 set_power_wells(power_domains, hsw_power_wells);
5901                 hsw_pwr = power_domains;
5902         } else if (IS_BROADWELL(dev)) {
5903                 set_power_wells(power_domains, bdw_power_wells);
5904                 hsw_pwr = power_domains;
5905         } else {
5906                 set_power_wells(power_domains, i9xx_always_on_power_well);
5907         }
5908
5909         return 0;
5910 }
5911
5912 void intel_power_domains_remove(struct drm_device *dev)
5913 {
5914         hsw_pwr = NULL;
5915 }
5916
5917 static void intel_power_domains_resume(struct drm_device *dev)
5918 {
5919         struct drm_i915_private *dev_priv = dev->dev_private;
5920         struct i915_power_domains *power_domains = &dev_priv->power_domains;
5921         struct i915_power_well *power_well;
5922         int i;
5923
5924         mutex_lock(&power_domains->lock);
5925         for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
5926                 if (power_well->set)
5927                         power_well->set(dev, power_well, power_well->count > 0);
5928         }
5929         mutex_unlock(&power_domains->lock);
5930 }
5931
5932 /*
5933  * Starting with Haswell, we have a "Power Down Well" that can be turned off
5934  * when not needed anymore. We have 4 registers that can request the power well
5935  * to be enabled, and it will only be disabled if none of the registers is
5936  * requesting it to be enabled.
5937  */
5938 void intel_power_domains_init_hw(struct drm_device *dev)
5939 {
5940         struct drm_i915_private *dev_priv = dev->dev_private;
5941
5942         /* For now, we need the power well to be always enabled. */
5943         intel_display_set_init_power(dev, true);
5944         intel_power_domains_resume(dev);
5945
5946         if (!(IS_HASWELL(dev) || IS_BROADWELL(dev)))
5947                 return;
5948
5949         /* We're taking over the BIOS, so clear any requests made by it since
5950          * the driver is in charge now. */
5951         if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
5952                 I915_WRITE(HSW_PWR_WELL_BIOS, 0);
5953 }
5954
5955 /* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
5956 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
5957 {
5958         hsw_disable_package_c8(dev_priv);
5959 }
5960
5961 void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
5962 {
5963         hsw_enable_package_c8(dev_priv);
5964 }
5965
5966 void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
5967 {
5968         struct drm_device *dev = dev_priv->dev;
5969         struct device *device = &dev->pdev->dev;
5970
5971         if (!HAS_RUNTIME_PM(dev))
5972                 return;
5973
5974         pm_runtime_get_sync(device);
5975         WARN(dev_priv->pm.suspended, "Device still suspended.\n");
5976 }
5977
5978 void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
5979 {
5980         struct drm_device *dev = dev_priv->dev;
5981         struct device *device = &dev->pdev->dev;
5982
5983         if (!HAS_RUNTIME_PM(dev))
5984                 return;
5985
5986         pm_runtime_mark_last_busy(device);
5987         pm_runtime_put_autosuspend(device);
5988 }
5989
5990 void intel_init_runtime_pm(struct drm_i915_private *dev_priv)
5991 {
5992         struct drm_device *dev = dev_priv->dev;
5993         struct device *device = &dev->pdev->dev;
5994
5995         dev_priv->pm.suspended = false;
5996
5997         if (!HAS_RUNTIME_PM(dev))
5998                 return;
5999
6000         pm_runtime_set_active(device);
6001
6002         pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */
6003         pm_runtime_mark_last_busy(device);
6004         pm_runtime_use_autosuspend(device);
6005 }
6006
6007 void intel_fini_runtime_pm(struct drm_i915_private *dev_priv)
6008 {
6009         struct drm_device *dev = dev_priv->dev;
6010         struct device *device = &dev->pdev->dev;
6011
6012         if (!HAS_RUNTIME_PM(dev))
6013                 return;
6014
6015         /* Make sure we're not suspended first. */
6016         pm_runtime_get_sync(device);
6017         pm_runtime_disable(device);
6018 }
6019
6020 /* Set up chip specific power management-related functions */
6021 void intel_init_pm(struct drm_device *dev)
6022 {
6023         struct drm_i915_private *dev_priv = dev->dev_private;
6024
6025         if (I915_HAS_FBC(dev)) {
6026                 if (INTEL_INFO(dev)->gen >= 7) {
6027                         dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
6028                         dev_priv->display.enable_fbc = gen7_enable_fbc;
6029                         dev_priv->display.disable_fbc = ironlake_disable_fbc;
6030                 } else if (INTEL_INFO(dev)->gen >= 5) {
6031                         dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
6032                         dev_priv->display.enable_fbc = ironlake_enable_fbc;
6033                         dev_priv->display.disable_fbc = ironlake_disable_fbc;
6034                 } else if (IS_GM45(dev)) {
6035                         dev_priv->display.fbc_enabled = g4x_fbc_enabled;
6036                         dev_priv->display.enable_fbc = g4x_enable_fbc;
6037                         dev_priv->display.disable_fbc = g4x_disable_fbc;
6038                 } else {
6039                         dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
6040                         dev_priv->display.enable_fbc = i8xx_enable_fbc;
6041                         dev_priv->display.disable_fbc = i8xx_disable_fbc;
6042                 }
6043         }
6044
6045         /* For cxsr */
6046         if (IS_PINEVIEW(dev))
6047                 i915_pineview_get_mem_freq(dev);
6048         else if (IS_GEN5(dev))
6049                 i915_ironlake_get_mem_freq(dev);
6050
6051         /* For FIFO watermark updates */
6052         if (HAS_PCH_SPLIT(dev)) {
6053                 intel_setup_wm_latency(dev);
6054
6055                 if (IS_GEN5(dev)) {
6056                         if (dev_priv->wm.pri_latency[1] &&
6057                             dev_priv->wm.spr_latency[1] &&
6058                             dev_priv->wm.cur_latency[1])
6059                                 dev_priv->display.update_wm = ironlake_update_wm;
6060                         else {
6061                                 DRM_DEBUG_KMS("Failed to get proper latency. "
6062                                               "Disable CxSR\n");
6063                                 dev_priv->display.update_wm = NULL;
6064                         }
6065                         dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
6066                 } else if (IS_GEN6(dev)) {
6067                         if (dev_priv->wm.pri_latency[0] &&
6068                             dev_priv->wm.spr_latency[0] &&
6069                             dev_priv->wm.cur_latency[0]) {
6070                                 dev_priv->display.update_wm = sandybridge_update_wm;
6071                                 dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
6072                         } else {
6073                                 DRM_DEBUG_KMS("Failed to read display plane latency. "
6074                                               "Disable CxSR\n");
6075                                 dev_priv->display.update_wm = NULL;
6076                         }
6077                         dev_priv->display.init_clock_gating = gen6_init_clock_gating;
6078                 } else if (IS_IVYBRIDGE(dev)) {
6079                         if (dev_priv->wm.pri_latency[0] &&
6080                             dev_priv->wm.spr_latency[0] &&
6081                             dev_priv->wm.cur_latency[0]) {
6082                                 dev_priv->display.update_wm = ivybridge_update_wm;
6083                                 dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
6084                         } else {
6085                                 DRM_DEBUG_KMS("Failed to read display plane latency. "
6086                                               "Disable CxSR\n");
6087                                 dev_priv->display.update_wm = NULL;
6088                         }
6089                         dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
6090                 } else if (IS_HASWELL(dev)) {
6091                         if (dev_priv->wm.pri_latency[0] &&
6092                             dev_priv->wm.spr_latency[0] &&
6093                             dev_priv->wm.cur_latency[0]) {
6094                                 dev_priv->display.update_wm = haswell_update_wm;
6095                                 dev_priv->display.update_sprite_wm =
6096                                         haswell_update_sprite_wm;
6097                         } else {
6098                                 DRM_DEBUG_KMS("Failed to read display plane latency. "
6099                                               "Disable CxSR\n");
6100                                 dev_priv->display.update_wm = NULL;
6101                         }
6102                         dev_priv->display.init_clock_gating = haswell_init_clock_gating;
6103                 } else if (INTEL_INFO(dev)->gen == 8) {
6104                         dev_priv->display.init_clock_gating = gen8_init_clock_gating;
6105                 } else
6106                         dev_priv->display.update_wm = NULL;
6107         } else if (IS_VALLEYVIEW(dev)) {
6108                 dev_priv->display.update_wm = valleyview_update_wm;
6109                 dev_priv->display.init_clock_gating =
6110                         valleyview_init_clock_gating;
6111         } else if (IS_PINEVIEW(dev)) {
6112                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
6113                                             dev_priv->is_ddr3,
6114                                             dev_priv->fsb_freq,
6115                                             dev_priv->mem_freq)) {
6116                         DRM_INFO("failed to find known CxSR latency "
6117                                  "(found ddr%s fsb freq %d, mem freq %d), "
6118                                  "disabling CxSR\n",
6119                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
6120                                  dev_priv->fsb_freq, dev_priv->mem_freq);
6121                         /* Disable CxSR and never update its watermark again */
6122                         pineview_disable_cxsr(dev);
6123                         dev_priv->display.update_wm = NULL;
6124                 } else
6125                         dev_priv->display.update_wm = pineview_update_wm;
6126                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6127         } else if (IS_G4X(dev)) {
6128                 dev_priv->display.update_wm = g4x_update_wm;
6129                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
6130         } else if (IS_GEN4(dev)) {
6131                 dev_priv->display.update_wm = i965_update_wm;
6132                 if (IS_CRESTLINE(dev))
6133                         dev_priv->display.init_clock_gating = crestline_init_clock_gating;
6134                 else if (IS_BROADWATER(dev))
6135                         dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
6136         } else if (IS_GEN3(dev)) {
6137                 dev_priv->display.update_wm = i9xx_update_wm;
6138                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
6139                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6140         } else if (IS_I865G(dev)) {
6141                 dev_priv->display.update_wm = i830_update_wm;
6142                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
6143                 dev_priv->display.get_fifo_size = i830_get_fifo_size;
6144         } else if (IS_I85X(dev)) {
6145                 dev_priv->display.update_wm = i9xx_update_wm;
6146                 dev_priv->display.get_fifo_size = i85x_get_fifo_size;
6147                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
6148         } else {
6149                 dev_priv->display.update_wm = i830_update_wm;
6150                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
6151                 if (IS_845G(dev))
6152                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
6153                 else
6154                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
6155         }
6156 }
6157
6158 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
6159 {
6160         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6161
6162         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6163                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
6164                 return -EAGAIN;
6165         }
6166
6167         I915_WRITE(GEN6_PCODE_DATA, *val);
6168         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6169
6170         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6171                      500)) {
6172                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
6173                 return -ETIMEDOUT;
6174         }
6175
6176         *val = I915_READ(GEN6_PCODE_DATA);
6177         I915_WRITE(GEN6_PCODE_DATA, 0);
6178
6179         return 0;
6180 }
6181
6182 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
6183 {
6184         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6185
6186         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6187                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
6188                 return -EAGAIN;
6189         }
6190
6191         I915_WRITE(GEN6_PCODE_DATA, val);
6192         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6193
6194         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6195                      500)) {
6196                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
6197                 return -ETIMEDOUT;
6198         }
6199
6200         I915_WRITE(GEN6_PCODE_DATA, 0);
6201
6202         return 0;
6203 }
6204
6205 int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val)
6206 {
6207         int div;
6208
6209         /* 4 x czclk */
6210         switch (dev_priv->mem_freq) {
6211         case 800:
6212                 div = 10;
6213                 break;
6214         case 1066:
6215                 div = 12;
6216                 break;
6217         case 1333:
6218                 div = 16;
6219                 break;
6220         default:
6221                 return -1;
6222         }
6223
6224         return DIV_ROUND_CLOSEST(dev_priv->mem_freq * (val + 6 - 0xbd), 4 * div);
6225 }
6226
6227 int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val)
6228 {
6229         int mul;
6230
6231         /* 4 x czclk */
6232         switch (dev_priv->mem_freq) {
6233         case 800:
6234                 mul = 10;
6235                 break;
6236         case 1066:
6237                 mul = 12;
6238                 break;
6239         case 1333:
6240                 mul = 16;
6241                 break;
6242         default:
6243                 return -1;
6244         }
6245
6246         return DIV_ROUND_CLOSEST(4 * mul * val, dev_priv->mem_freq) + 0xbd - 6;
6247 }
6248
6249 void intel_pm_setup(struct drm_device *dev)
6250 {
6251         struct drm_i915_private *dev_priv = dev->dev_private;
6252
6253         mutex_init(&dev_priv->rps.hw_lock);
6254
6255         mutex_init(&dev_priv->pc8.lock);
6256         dev_priv->pc8.requirements_met = false;
6257         dev_priv->pc8.gpu_idle = false;
6258         dev_priv->pc8.irqs_disabled = false;
6259         dev_priv->pc8.enabled = false;
6260         dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */
6261         INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work);
6262         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
6263                           intel_gen6_powersave_work);
6264 }