]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - drivers/gpu/drm/radeon/r100.c
Merge branch 'master' into tk71
[mv-sheeva.git] / drivers / gpu / drm / radeon / r100.c
index e59422320bb6df9873fbf88f9e29d34fdc412110..e372f9e1e5ce10a56b0fc17e9ee9930f8719daf4 100644 (file)
@@ -68,6 +68,39 @@ MODULE_FIRMWARE(FIRMWARE_R520);
  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  */
 
+void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
+{
+       /* enable the pflip int */
+       radeon_irq_kms_pflip_irq_get(rdev, crtc);
+}
+
+void r100_post_page_flip(struct radeon_device *rdev, int crtc)
+{
+       /* disable the pflip int */
+       radeon_irq_kms_pflip_irq_put(rdev, crtc);
+}
+
+u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
+{
+       struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
+       u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
+
+       /* Lock the graphics update lock */
+       /* update the scanout addresses */
+       WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
+
+       /* Wait for update_pending to go high. */
+       while (!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET));
+       DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
+
+       /* Unlock the lock, so double-buffering can take place inside vblank */
+       tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
+       WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
+
+       /* Return current update_pending status: */
+       return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
+}
+
 void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 {
        int i;
@@ -442,7 +475,7 @@ int r100_pci_gart_init(struct radeon_device *rdev)
        int r;
 
        if (rdev->gart.table.ram.ptr) {
-               WARN(1, "R100 PCI GART already initialized.\n");
+               WARN(1, "R100 PCI GART already initialized\n");
                return 0;
        }
        /* Initialize common gart structure */
@@ -516,7 +549,7 @@ int r100_irq_set(struct radeon_device *rdev)
        uint32_t tmp = 0;
 
        if (!rdev->irq.installed) {
-               WARN(1, "Can't enable IRQ/MSI because no handler is installed.\n");
+               WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
                WREG32(R_000040_GEN_INT_CNTL, 0);
                return -EINVAL;
        }
@@ -526,10 +559,12 @@ int r100_irq_set(struct radeon_device *rdev)
        if (rdev->irq.gui_idle) {
                tmp |= RADEON_GUI_IDLE_MASK;
        }
-       if (rdev->irq.crtc_vblank_int[0]) {
+       if (rdev->irq.crtc_vblank_int[0] ||
+           rdev->irq.pflip[0]) {
                tmp |= RADEON_CRTC_VBLANK_MASK;
        }
-       if (rdev->irq.crtc_vblank_int[1]) {
+       if (rdev->irq.crtc_vblank_int[1] ||
+           rdev->irq.pflip[1]) {
                tmp |= RADEON_CRTC2_VBLANK_MASK;
        }
        if (rdev->irq.hpd[0]) {
@@ -600,14 +635,22 @@ int r100_irq_process(struct radeon_device *rdev)
                }
                /* Vertical blank interrupts */
                if (status & RADEON_CRTC_VBLANK_STAT) {
-                       drm_handle_vblank(rdev->ddev, 0);
-                       rdev->pm.vblank_sync = true;
-                       wake_up(&rdev->irq.vblank_queue);
+                       if (rdev->irq.crtc_vblank_int[0]) {
+                               drm_handle_vblank(rdev->ddev, 0);
+                               rdev->pm.vblank_sync = true;
+                               wake_up(&rdev->irq.vblank_queue);
+                       }
+                       if (rdev->irq.pflip[0])
+                               radeon_crtc_handle_flip(rdev, 0);
                }
                if (status & RADEON_CRTC2_VBLANK_STAT) {
-                       drm_handle_vblank(rdev->ddev, 1);
-                       rdev->pm.vblank_sync = true;
-                       wake_up(&rdev->irq.vblank_queue);
+                       if (rdev->irq.crtc_vblank_int[1]) {
+                               drm_handle_vblank(rdev->ddev, 1);
+                               rdev->pm.vblank_sync = true;
+                               wake_up(&rdev->irq.vblank_queue);
+                       }
+                       if (rdev->irq.pflip[1])
+                               radeon_crtc_handle_flip(rdev, 1);
                }
                if (status & RADEON_FP_DETECT_STAT) {
                        queue_hotplug = true;
@@ -622,7 +665,7 @@ int r100_irq_process(struct radeon_device *rdev)
        /* reset gui idle ack.  the status bit is broken */
        rdev->irq.gui_idle_acked = false;
        if (queue_hotplug)
-               queue_work(rdev->wq, &rdev->hotplug_work);
+               schedule_work(&rdev->hotplug_work);
        if (rdev->msi_enabled) {
                switch (rdev->family) {
                case CHIP_RS400:
@@ -675,67 +718,6 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
        radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
 }
 
-int r100_wb_init(struct radeon_device *rdev)
-{
-       int r;
-
-       if (rdev->wb.wb_obj == NULL) {
-               r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true,
-                                       RADEON_GEM_DOMAIN_GTT,
-                                       &rdev->wb.wb_obj);
-               if (r) {
-                       dev_err(rdev->dev, "(%d) create WB buffer failed\n", r);
-                       return r;
-               }
-               r = radeon_bo_reserve(rdev->wb.wb_obj, false);
-               if (unlikely(r != 0))
-                       return r;
-               r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT,
-                                       &rdev->wb.gpu_addr);
-               if (r) {
-                       dev_err(rdev->dev, "(%d) pin WB buffer failed\n", r);
-                       radeon_bo_unreserve(rdev->wb.wb_obj);
-                       return r;
-               }
-               r = radeon_bo_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
-               radeon_bo_unreserve(rdev->wb.wb_obj);
-               if (r) {
-                       dev_err(rdev->dev, "(%d) map WB buffer failed\n", r);
-                       return r;
-               }
-       }
-       WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr);
-       WREG32(R_00070C_CP_RB_RPTR_ADDR,
-               S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + 1024) >> 2));
-       WREG32(R_000770_SCRATCH_UMSK, 0xff);
-       return 0;
-}
-
-void r100_wb_disable(struct radeon_device *rdev)
-{
-       WREG32(R_000770_SCRATCH_UMSK, 0);
-}
-
-void r100_wb_fini(struct radeon_device *rdev)
-{
-       int r;
-
-       r100_wb_disable(rdev);
-       if (rdev->wb.wb_obj) {
-               r = radeon_bo_reserve(rdev->wb.wb_obj, false);
-               if (unlikely(r != 0)) {
-                       dev_err(rdev->dev, "(%d) can't finish WB\n", r);
-                       return;
-               }
-               radeon_bo_kunmap(rdev->wb.wb_obj);
-               radeon_bo_unpin(rdev->wb.wb_obj);
-               radeon_bo_unreserve(rdev->wb.wb_obj);
-               radeon_bo_unref(&rdev->wb.wb_obj);
-               rdev->wb.wb = NULL;
-               rdev->wb.wb_obj = NULL;
-       }
-}
-
 int r100_copy_blit(struct radeon_device *rdev,
                   uint64_t src_offset,
                   uint64_t dst_offset,
@@ -996,20 +978,32 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
-              REG_SET(RADEON_MAX_FETCH, max_fetch) |
-              RADEON_RB_NO_UPDATE);
+              REG_SET(RADEON_MAX_FETCH, max_fetch));
 #ifdef __BIG_ENDIAN
        tmp |= RADEON_BUF_SWAP_32BIT;
 #endif
-       WREG32(RADEON_CP_RB_CNTL, tmp);
+       WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
 
        /* Set ring address */
        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
        WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
        /* Force read & write ptr to 0 */
-       WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
+       WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
        WREG32(RADEON_CP_RB_RPTR_WR, 0);
        WREG32(RADEON_CP_RB_WPTR, 0);
+
+       /* set the wb address whether it's enabled or not */
+       WREG32(R_00070C_CP_RB_RPTR_ADDR,
+               S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
+       WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
+
+       if (rdev->wb.enabled)
+               WREG32(R_000770_SCRATCH_UMSK, 0xff);
+       else {
+               tmp |= RADEON_RB_NO_UPDATE;
+               WREG32(R_000770_SCRATCH_UMSK, 0);
+       }
+
        WREG32(RADEON_CP_RB_CNTL, tmp);
        udelay(10);
        rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
@@ -1020,8 +1014,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
        WREG32(RADEON_CP_CSQ_MODE,
               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
-       WREG32(0x718, 0);
-       WREG32(0x744, 0x00004D4D);
+       WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
+       WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
        radeon_ring_start(rdev);
        r = radeon_ring_test(rdev);
@@ -1030,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
                return r;
        }
        rdev->cp.ready = true;
-       rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
        return 0;
 }
 
@@ -1048,10 +1042,11 @@ void r100_cp_fini(struct radeon_device *rdev)
 void r100_cp_disable(struct radeon_device *rdev)
 {
        /* Disable ring */
-       rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
        rdev->cp.ready = false;
        WREG32(RADEON_CP_CSQ_MODE, 0);
        WREG32(RADEON_CP_CSQ_CNTL, 0);
+       WREG32(R_000770_SCRATCH_UMSK, 0);
        if (r100_gui_wait_for_idle(rdev)) {
                printk(KERN_WARNING "Failed to wait GUI idle while "
                       "programming pipes. Bad things might happen.\n");
@@ -1415,6 +1410,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                }
                track->zb.robj = reloc->robj;
                track->zb.offset = idx_value;
+               track->zb_dirty = true;
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                break;
        case RADEON_RB3D_COLOROFFSET:
@@ -1427,6 +1423,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                }
                track->cb[0].robj = reloc->robj;
                track->cb[0].offset = idx_value;
+               track->cb_dirty = true;
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                break;
        case RADEON_PP_TXOFFSET_0:
@@ -1442,6 +1439,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                }
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                track->textures[i].robj = reloc->robj;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_CUBIC_OFFSET_T0_0:
        case RADEON_PP_CUBIC_OFFSET_T0_1:
@@ -1459,6 +1457,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                track->textures[0].cube_info[i].offset = idx_value;
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                track->textures[0].cube_info[i].robj = reloc->robj;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_CUBIC_OFFSET_T1_0:
        case RADEON_PP_CUBIC_OFFSET_T1_1:
@@ -1476,6 +1475,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                track->textures[1].cube_info[i].offset = idx_value;
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                track->textures[1].cube_info[i].robj = reloc->robj;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_CUBIC_OFFSET_T2_0:
        case RADEON_PP_CUBIC_OFFSET_T2_1:
@@ -1493,9 +1493,12 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                track->textures[2].cube_info[i].offset = idx_value;
                ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
                track->textures[2].cube_info[i].robj = reloc->robj;
+               track->tex_dirty = true;
                break;
        case RADEON_RE_WIDTH_HEIGHT:
                track->maxy = ((idx_value >> 16) & 0x7FF);
+               track->cb_dirty = true;
+               track->zb_dirty = true;
                break;
        case RADEON_RB3D_COLORPITCH:
                r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1516,9 +1519,11 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                ib[idx] = tmp;
 
                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
+               track->cb_dirty = true;
                break;
        case RADEON_RB3D_DEPTHPITCH:
                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
+               track->zb_dirty = true;
                break;
        case RADEON_RB3D_CNTL:
                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -1543,6 +1548,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                        return -EINVAL;
                }
                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
+               track->cb_dirty = true;
+               track->zb_dirty = true;
                break;
        case RADEON_RB3D_ZSTENCILCNTL:
                switch (idx_value & 0xf) {
@@ -1560,6 +1567,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                default:
                        break;
                }
+               track->zb_dirty = true;
                break;
        case RADEON_RB3D_ZPASS_ADDR:
                r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1576,6 +1584,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                        uint32_t temp = idx_value >> 4;
                        for (i = 0; i < track->num_texture; i++)
                                track->textures[i].enabled = !!(temp & (1 << i));
+                       track->tex_dirty = true;
                }
                break;
        case RADEON_SE_VF_CNTL:
@@ -1590,12 +1599,14 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_TEX_PITCH_0:
        case RADEON_PP_TEX_PITCH_1:
        case RADEON_PP_TEX_PITCH_2:
                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
                track->textures[i].pitch = idx_value + 32;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_TXFILTER_0:
        case RADEON_PP_TXFILTER_1:
@@ -1609,6 +1620,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                tmp = (idx_value >> 27) & 0x7;
                if (tmp == 2 || tmp == 6)
                        track->textures[i].roundup_h = false;
+               track->tex_dirty = true;
                break;
        case RADEON_PP_TXFORMAT_0:
        case RADEON_PP_TXFORMAT_1:
@@ -1661,6 +1673,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                }
                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
+               track->tex_dirty = true;
                break;
        case RADEON_PP_CUBIC_FACES_0:
        case RADEON_PP_CUBIC_FACES_1:
@@ -1671,6 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
                }
+               track->tex_dirty = true;
                break;
        default:
                printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
@@ -2074,12 +2088,13 @@ int r100_asic_reset(struct radeon_device *rdev)
 {
        struct r100_mc_save save;
        u32 status, tmp;
+       int ret = 0;
 
-       r100_mc_stop(rdev, &save);
        status = RREG32(R_000E40_RBBM_STATUS);
        if (!G_000E40_GUI_ACTIVE(status)) {
                return 0;
        }
+       r100_mc_stop(rdev, &save);
        status = RREG32(R_000E40_RBBM_STATUS);
        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
        /* stop CP */
@@ -2119,11 +2134,11 @@ int r100_asic_reset(struct radeon_device *rdev)
                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
                dev_err(rdev->dev, "failed to reset GPU\n");
                rdev->gpu_lockup = true;
-               return -1;
-       }
+               ret = -1;
+       } else
+               dev_info(rdev->dev, "GPU reset succeed\n");
        r100_mc_resume(rdev, &save);
-       dev_info(rdev->dev, "GPU reset succeed\n");
-       return 0;
+       return ret;
 }
 
 void r100_set_common_regs(struct radeon_device *rdev)
@@ -2297,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
        /* FIXME we don't use the second aperture yet when we could use it */
        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
                rdev->mc.visible_vram_size = rdev->mc.aper_size;
-       rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
        if (rdev->flags & RADEON_IS_IGP) {
                uint32_t tom;
@@ -2318,6 +2332,9 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
                 * Novell bug 204882 + along with lots of ubuntu ones
                 */
+               if (rdev->mc.aper_size > config_aper_size)
+                       config_aper_size = rdev->mc.aper_size;
+
                if (config_aper_size > rdev->mc.real_vram_size)
                        rdev->mc.mc_vram_size = config_aper_size;
                else
@@ -2331,10 +2348,10 @@ void r100_vga_set_state(struct radeon_device *rdev, bool state)
 
        temp = RREG32(RADEON_CONFIG_CNTL);
        if (state == false) {
-               temp &= ~(1<<8);
-               temp |= (1<<9);
+               temp &= ~RADEON_CFG_VGA_RAM_EN;
+               temp |= RADEON_CFG_VGA_IO_DIS;
        } else {
-               temp &= ~(1<<9);
+               temp &= ~RADEON_CFG_VGA_IO_DIS;
        }
        WREG32(RADEON_CONFIG_CNTL, temp);
 }
@@ -3225,6 +3242,8 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev,
        for (u = 0; u < track->num_texture; u++) {
                if (!track->textures[u].enabled)
                        continue;
+               if (track->textures[u].lookup_disable)
+                       continue;
                robj = track->textures[u].robj;
                if (robj == NULL) {
                        DRM_ERROR("No texture bound to unit %u\n", u);
@@ -3300,9 +3319,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
        unsigned long size;
        unsigned prim_walk;
        unsigned nverts;
-       unsigned num_cb = track->num_cb;
+       unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
 
-       if (!track->zb_cb_clear && !track->color_channel_mask &&
+       if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
            !track->blend_read_enable)
                num_cb = 0;
 
@@ -3323,7 +3342,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
                        return -EINVAL;
                }
        }
-       if (track->z_enabled) {
+       track->cb_dirty = false;
+
+       if (track->zb_dirty && track->z_enabled) {
                if (track->zb.robj == NULL) {
                        DRM_ERROR("[drm] No buffer for z buffer !\n");
                        return -EINVAL;
@@ -3340,6 +3361,28 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
                        return -EINVAL;
                }
        }
+       track->zb_dirty = false;
+
+       if (track->aa_dirty && track->aaresolve) {
+               if (track->aa.robj == NULL) {
+                       DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
+                       return -EINVAL;
+               }
+               /* I believe the format comes from colorbuffer0. */
+               size = track->aa.pitch * track->cb[0].cpp * track->maxy;
+               size += track->aa.offset;
+               if (size > radeon_bo_size(track->aa.robj)) {
+                       DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
+                                 "(need %lu have %lu) !\n", i, size,
+                                 radeon_bo_size(track->aa.robj));
+                       DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
+                                 i, track->aa.pitch, track->cb[0].cpp,
+                                 track->aa.offset, track->maxy);
+                       return -EINVAL;
+               }
+       }
+       track->aa_dirty = false;
+
        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
        if (track->vap_vf_cntl & (1 << 14)) {
                nverts = track->vap_alt_nverts;
@@ -3399,13 +3442,23 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
                          prim_walk);
                return -EINVAL;
        }
-       return r100_cs_track_texture_check(rdev, track);
+
+       if (track->tex_dirty) {
+               track->tex_dirty = false;
+               return r100_cs_track_texture_check(rdev, track);
+       }
+       return 0;
 }
 
 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
 {
        unsigned i, face;
 
+       track->cb_dirty = true;
+       track->zb_dirty = true;
+       track->tex_dirty = true;
+       track->aa_dirty = true;
+
        if (rdev->family < CHIP_R300) {
                track->num_cb = 1;
                if (rdev->family <= CHIP_RS200)
@@ -3419,6 +3472,8 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
                track->num_texture = 16;
                track->maxy = 4096;
                track->separate_cube = 0;
+               track->aaresolve = false;
+               track->aa.robj = NULL;
        }
 
        for (i = 0; i < track->num_cb; i++) {
@@ -3459,6 +3514,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
                track->textures[i].robj = NULL;
                /* CS IB emission code makes sure texture unit are disabled */
                track->textures[i].enabled = false;
+               track->textures[i].lookup_disable = false;
                track->textures[i].roundup_w = true;
                track->textures[i].roundup_h = true;
                if (track->separate_cube)
@@ -3503,7 +3559,7 @@ int r100_ring_test(struct radeon_device *rdev)
        if (i < rdev->usec_timeout) {
                DRM_INFO("ring test succeeded in %d usecs\n", i);
        } else {
-               DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+               DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
                          scratch, tmp);
                r = -EINVAL;
        }
@@ -3727,8 +3783,6 @@ static int r100_startup(struct radeon_device *rdev)
        r100_mc_program(rdev);
        /* Resume clock */
        r100_clock_startup(rdev);
-       /* Initialize GPU configuration (# pipes, ...) */
-//     r100_gpu_init(rdev);
        /* Initialize GART (initialize after TTM so we can allocate
         * memory through TTM but finalize after TTM) */
        r100_enable_bm(rdev);
@@ -3737,6 +3791,12 @@ static int r100_startup(struct radeon_device *rdev)
                if (r)
                        return r;
        }
+
+       /* allocate wb buffer */
+       r = radeon_wb_init(rdev);
+       if (r)
+               return r;
+
        /* Enable IRQ */
        r100_irq_set(rdev);
        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -3746,9 +3806,6 @@ static int r100_startup(struct radeon_device *rdev)
                dev_err(rdev->dev, "failled initializing CP (%d).\n", r);
                return r;
        }
-       r = r100_wb_init(rdev);
-       if (r)
-               dev_err(rdev->dev, "failled initializing WB (%d).\n", r);
        r = r100_ib_init(rdev);
        if (r) {
                dev_err(rdev->dev, "failled initializing IB (%d).\n", r);
@@ -3782,7 +3839,7 @@ int r100_resume(struct radeon_device *rdev)
 int r100_suspend(struct radeon_device *rdev)
 {
        r100_cp_disable(rdev);
-       r100_wb_disable(rdev);
+       radeon_wb_disable(rdev);
        r100_irq_disable(rdev);
        if (rdev->flags & RADEON_IS_PCI)
                r100_pci_gart_disable(rdev);
@@ -3792,7 +3849,7 @@ int r100_suspend(struct radeon_device *rdev)
 void r100_fini(struct radeon_device *rdev)
 {
        r100_cp_fini(rdev);
-       r100_wb_fini(rdev);
+       radeon_wb_fini(rdev);
        r100_ib_fini(rdev);
        radeon_gem_fini(rdev);
        if (rdev->flags & RADEON_IS_PCI)
@@ -3905,7 +3962,7 @@ int r100_init(struct radeon_device *rdev)
                /* Somethings want wront with the accel init stop accel */
                dev_err(rdev->dev, "Disabling GPU acceleration\n");
                r100_cp_fini(rdev);
-               r100_wb_fini(rdev);
+               radeon_wb_fini(rdev);
                r100_ib_fini(rdev);
                radeon_irq_kms_fini(rdev);
                if (rdev->flags & RADEON_IS_PCI)