]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/radeon_state.c
drm/radeon: fix the r100/r200 ums block 0 page fix
[karo-tx-linux.git] / drivers / gpu / drm / radeon / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     struct drm_file * file_priv,
43                                                     u32 *offset)
44 {
45         u64 off = *offset;
46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47         struct drm_radeon_driver_file_fields *radeon_priv;
48
49         /* Hrm ... the story of the offset ... So this function converts
50          * the various ideas of what userland clients might have for an
51          * offset in the card address space into an offset into the card
52          * address space :) So with a sane client, it should just keep
53          * the value intact and just do some boundary checking. However,
54          * not all clients are sane. Some older clients pass us 0 based
55          * offsets relative to the start of the framebuffer and some may
56          * assume the AGP aperture it appended to the framebuffer, so we
57          * try to detect those cases and fix them up.
58          *
59          * Note: It might be a good idea here to make sure the offset lands
60          * in some "allowed" area to protect things like the PCIE GART...
61          */
62
63         /* First, the best case, the offset already lands in either the
64          * framebuffer or the GART mapped space
65          */
66         if (radeon_check_offset(dev_priv, off))
67                 return 0;
68
69         /* Ok, that didn't happen... now check if we have a zero based
70          * offset that fits in the framebuffer + gart space, apply the
71          * magic offset we get from SETPARAM or calculated from fb_location
72          */
73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74                 radeon_priv = file_priv->driver_priv;
75                 off += radeon_priv->radeon_fb_delta;
76         }
77
78         /* Finally, assume we aimed at a GART offset if beyond the fb */
79         if (off > fb_end)
80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81
82         /* Now recheck and fail if out of bounds */
83         if (radeon_check_offset(dev_priv, off)) {
84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85                 *offset = off;
86                 return 0;
87         }
88         return -EINVAL;
89 }
90
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92                                                      dev_priv,
93                                                      struct drm_file *file_priv,
94                                                      int id, u32 *data)
95 {
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101                         DRM_ERROR("Invalid depth buffer offset\n");
102                         return -EINVAL;
103                 }
104                 dev_priv->have_z_offset = 1;
105                 break;
106
107         case RADEON_EMIT_PP_CNTL:
108                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
109                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
110                         DRM_ERROR("Invalid colour buffer offset\n");
111                         return -EINVAL;
112                 }
113                 break;
114
115         case R200_EMIT_PP_TXOFFSET_0:
116         case R200_EMIT_PP_TXOFFSET_1:
117         case R200_EMIT_PP_TXOFFSET_2:
118         case R200_EMIT_PP_TXOFFSET_3:
119         case R200_EMIT_PP_TXOFFSET_4:
120         case R200_EMIT_PP_TXOFFSET_5:
121                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
122                                                   &data[0])) {
123                         DRM_ERROR("Invalid R200 texture offset\n");
124                         return -EINVAL;
125                 }
126                 break;
127
128         case RADEON_EMIT_PP_TXFILTER_0:
129         case RADEON_EMIT_PP_TXFILTER_1:
130         case RADEON_EMIT_PP_TXFILTER_2:
131                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
132                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
133                         DRM_ERROR("Invalid R100 texture offset\n");
134                         return -EINVAL;
135                 }
136                 break;
137
138         case R200_EMIT_PP_CUBIC_OFFSETS_0:
139         case R200_EMIT_PP_CUBIC_OFFSETS_1:
140         case R200_EMIT_PP_CUBIC_OFFSETS_2:
141         case R200_EMIT_PP_CUBIC_OFFSETS_3:
142         case R200_EMIT_PP_CUBIC_OFFSETS_4:
143         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
144                         int i;
145                         for (i = 0; i < 5; i++) {
146                                 if (radeon_check_and_fixup_offset(dev_priv,
147                                                                   file_priv,
148                                                                   &data[i])) {
149                                         DRM_ERROR
150                                             ("Invalid R200 cubic texture offset\n");
151                                         return -EINVAL;
152                                 }
153                         }
154                         break;
155                 }
156
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
159         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
160                         int i;
161                         for (i = 0; i < 5; i++) {
162                                 if (radeon_check_and_fixup_offset(dev_priv,
163                                                                   file_priv,
164                                                                   &data[i])) {
165                                         DRM_ERROR
166                                             ("Invalid R100 cubic texture offset\n");
167                                         return -EINVAL;
168                                 }
169                         }
170                 }
171                 break;
172
173         case R200_EMIT_VAP_CTL:{
174                         RING_LOCALS;
175                         BEGIN_RING(2);
176                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
177                         ADVANCE_RING();
178                 }
179                 break;
180
181         case RADEON_EMIT_RB3D_COLORPITCH:
182         case RADEON_EMIT_RE_LINE_PATTERN:
183         case RADEON_EMIT_SE_LINE_WIDTH:
184         case RADEON_EMIT_PP_LUM_MATRIX:
185         case RADEON_EMIT_PP_ROT_MATRIX_0:
186         case RADEON_EMIT_RB3D_STENCILREFMASK:
187         case RADEON_EMIT_SE_VPORT_XSCALE:
188         case RADEON_EMIT_SE_CNTL:
189         case RADEON_EMIT_SE_CNTL_STATUS:
190         case RADEON_EMIT_RE_MISC:
191         case RADEON_EMIT_PP_BORDER_COLOR_0:
192         case RADEON_EMIT_PP_BORDER_COLOR_1:
193         case RADEON_EMIT_PP_BORDER_COLOR_2:
194         case RADEON_EMIT_SE_ZBIAS_FACTOR:
195         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
196         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
197         case R200_EMIT_PP_TXCBLEND_0:
198         case R200_EMIT_PP_TXCBLEND_1:
199         case R200_EMIT_PP_TXCBLEND_2:
200         case R200_EMIT_PP_TXCBLEND_3:
201         case R200_EMIT_PP_TXCBLEND_4:
202         case R200_EMIT_PP_TXCBLEND_5:
203         case R200_EMIT_PP_TXCBLEND_6:
204         case R200_EMIT_PP_TXCBLEND_7:
205         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
206         case R200_EMIT_TFACTOR_0:
207         case R200_EMIT_VTX_FMT_0:
208         case R200_EMIT_MATRIX_SELECT_0:
209         case R200_EMIT_TEX_PROC_CTL_2:
210         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
211         case R200_EMIT_PP_TXFILTER_0:
212         case R200_EMIT_PP_TXFILTER_1:
213         case R200_EMIT_PP_TXFILTER_2:
214         case R200_EMIT_PP_TXFILTER_3:
215         case R200_EMIT_PP_TXFILTER_4:
216         case R200_EMIT_PP_TXFILTER_5:
217         case R200_EMIT_VTE_CNTL:
218         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
219         case R200_EMIT_PP_TAM_DEBUG3:
220         case R200_EMIT_PP_CNTL_X:
221         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
222         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
223         case R200_EMIT_RE_SCISSOR_TL_0:
224         case R200_EMIT_RE_SCISSOR_TL_1:
225         case R200_EMIT_RE_SCISSOR_TL_2:
226         case R200_EMIT_SE_VAP_CNTL_STATUS:
227         case R200_EMIT_SE_VTX_STATE_CNTL:
228         case R200_EMIT_RE_POINTSIZE:
229         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
230         case R200_EMIT_PP_CUBIC_FACES_0:
231         case R200_EMIT_PP_CUBIC_FACES_1:
232         case R200_EMIT_PP_CUBIC_FACES_2:
233         case R200_EMIT_PP_CUBIC_FACES_3:
234         case R200_EMIT_PP_CUBIC_FACES_4:
235         case R200_EMIT_PP_CUBIC_FACES_5:
236         case RADEON_EMIT_PP_TEX_SIZE_0:
237         case RADEON_EMIT_PP_TEX_SIZE_1:
238         case RADEON_EMIT_PP_TEX_SIZE_2:
239         case R200_EMIT_RB3D_BLENDCOLOR:
240         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
241         case RADEON_EMIT_PP_CUBIC_FACES_0:
242         case RADEON_EMIT_PP_CUBIC_FACES_1:
243         case RADEON_EMIT_PP_CUBIC_FACES_2:
244         case R200_EMIT_PP_TRI_PERF_CNTL:
245         case R200_EMIT_PP_AFS_0:
246         case R200_EMIT_PP_AFS_1:
247         case R200_EMIT_ATF_TFACTOR:
248         case R200_EMIT_PP_TXCTLALL_0:
249         case R200_EMIT_PP_TXCTLALL_1:
250         case R200_EMIT_PP_TXCTLALL_2:
251         case R200_EMIT_PP_TXCTLALL_3:
252         case R200_EMIT_PP_TXCTLALL_4:
253         case R200_EMIT_PP_TXCTLALL_5:
254         case R200_EMIT_VAP_PVS_CNTL:
255                 /* These packets don't contain memory offsets */
256                 break;
257
258         default:
259                 DRM_ERROR("Unknown state packet ID %d\n", id);
260                 return -EINVAL;
261         }
262
263         return 0;
264 }
265
266 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
267                                                      dev_priv,
268                                                      struct drm_file *file_priv,
269                                                      drm_radeon_kcmd_buffer_t *
270                                                      cmdbuf,
271                                                      unsigned int *cmdsz)
272 {
273         u32 *cmd = (u32 *) cmdbuf->buf;
274         u32 offset, narrays;
275         int count, i, k;
276
277         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
278
279         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
280                 DRM_ERROR("Not a type 3 packet\n");
281                 return -EINVAL;
282         }
283
284         if (4 * *cmdsz > cmdbuf->bufsz) {
285                 DRM_ERROR("Packet size larger than size of data provided\n");
286                 return -EINVAL;
287         }
288
289         switch(cmd[0] & 0xff00) {
290         /* XXX Are there old drivers needing other packets? */
291
292         case RADEON_3D_DRAW_IMMD:
293         case RADEON_3D_DRAW_VBUF:
294         case RADEON_3D_DRAW_INDX:
295         case RADEON_WAIT_FOR_IDLE:
296         case RADEON_CP_NOP:
297         case RADEON_3D_CLEAR_ZMASK:
298 /*      case RADEON_CP_NEXT_CHAR:
299         case RADEON_CP_PLY_NEXTSCAN:
300         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
301                 /* these packets are safe */
302                 break;
303
304         case RADEON_CP_3D_DRAW_IMMD_2:
305         case RADEON_CP_3D_DRAW_VBUF_2:
306         case RADEON_CP_3D_DRAW_INDX_2:
307         case RADEON_3D_CLEAR_HIZ:
308                 /* safe but r200 only */
309                 if (dev_priv->microcode_version != UCODE_R200) {
310                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
311                         return -EINVAL;
312                 }
313                 break;
314
315         case RADEON_3D_LOAD_VBPNTR:
316                 count = (cmd[0] >> 16) & 0x3fff;
317
318                 if (count > 18) { /* 12 arrays max */
319                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320                                   count);
321                         return -EINVAL;
322                 }
323
324                 /* carefully check packet contents */
325                 narrays = cmd[1] & ~0xc000;
326                 k = 0;
327                 i = 2;
328                 while ((k < narrays) && (i < (count + 2))) {
329                         i++;            /* skip attribute field */
330                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331                                                           &cmd[i])) {
332                                 DRM_ERROR
333                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334                                      k, i);
335                                 return -EINVAL;
336                         }
337                         k++;
338                         i++;
339                         if (k == narrays)
340                                 break;
341                         /* have one more to process, they come in pairs */
342                         if (radeon_check_and_fixup_offset(dev_priv,
343                                                           file_priv, &cmd[i]))
344                         {
345                                 DRM_ERROR
346                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347                                      k, i);
348                                 return -EINVAL;
349                         }
350                         k++;
351                         i++;
352                 }
353                 /* do the counts match what we expect ? */
354                 if ((k != narrays) || (i != (count + 2))) {
355                         DRM_ERROR
356                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357                               k, i, narrays, count + 1);
358                         return -EINVAL;
359                 }
360                 break;
361
362         case RADEON_3D_RNDR_GEN_INDX_PRIM:
363                 if (dev_priv->microcode_version != UCODE_R100) {
364                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
365                         return -EINVAL;
366                 }
367                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
369                                 return -EINVAL;
370                 }
371                 break;
372
373         case RADEON_CP_INDX_BUFFER:
374                 if (dev_priv->microcode_version != UCODE_R200) {
375                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
376                         return -EINVAL;
377                 }
378                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
379                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
380                         return -EINVAL;
381                 }
382                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
383                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
384                         return -EINVAL;
385                 }
386                 break;
387
388         case RADEON_CNTL_HOSTDATA_BLT:
389         case RADEON_CNTL_PAINT_MULTI:
390         case RADEON_CNTL_BITBLT_MULTI:
391                 /* MSB of opcode: next DWORD GUI_CNTL */
392                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
393                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
394                         offset = cmd[2] << 10;
395                         if (radeon_check_and_fixup_offset
396                             (dev_priv, file_priv, &offset)) {
397                                 DRM_ERROR("Invalid first packet offset\n");
398                                 return -EINVAL;
399                         }
400                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
401                 }
402
403                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
404                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
405                         offset = cmd[3] << 10;
406                         if (radeon_check_and_fixup_offset
407                             (dev_priv, file_priv, &offset)) {
408                                 DRM_ERROR("Invalid second packet offset\n");
409                                 return -EINVAL;
410                         }
411                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
412                 }
413                 break;
414
415         default:
416                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
417                 return -EINVAL;
418         }
419
420         return 0;
421 }
422
423 /* ================================================================
424  * CP hardware state programming functions
425  */
426
427 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
428                                              struct drm_clip_rect * box)
429 {
430         RING_LOCALS;
431
432         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
433                   box->x1, box->y1, box->x2, box->y2);
434
435         BEGIN_RING(4);
436         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
437         OUT_RING((box->y1 << 16) | box->x1);
438         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
439         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
440         ADVANCE_RING();
441 }
442
443 /* Emit 1.1 state
444  */
445 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
446                              struct drm_file *file_priv,
447                              drm_radeon_context_regs_t * ctx,
448                              drm_radeon_texture_regs_t * tex,
449                              unsigned int dirty)
450 {
451         RING_LOCALS;
452         DRM_DEBUG("dirty=0x%08x\n", dirty);
453
454         if (dirty & RADEON_UPLOAD_CONTEXT) {
455                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
456                                                   &ctx->rb3d_depthoffset)) {
457                         DRM_ERROR("Invalid depth buffer offset\n");
458                         return -EINVAL;
459                 }
460
461                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
462                                                   &ctx->rb3d_coloroffset)) {
463                         DRM_ERROR("Invalid depth buffer offset\n");
464                         return -EINVAL;
465                 }
466
467                 BEGIN_RING(14);
468                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
469                 OUT_RING(ctx->pp_misc);
470                 OUT_RING(ctx->pp_fog_color);
471                 OUT_RING(ctx->re_solid_color);
472                 OUT_RING(ctx->rb3d_blendcntl);
473                 OUT_RING(ctx->rb3d_depthoffset);
474                 OUT_RING(ctx->rb3d_depthpitch);
475                 OUT_RING(ctx->rb3d_zstencilcntl);
476                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
477                 OUT_RING(ctx->pp_cntl);
478                 OUT_RING(ctx->rb3d_cntl);
479                 OUT_RING(ctx->rb3d_coloroffset);
480                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
481                 OUT_RING(ctx->rb3d_colorpitch);
482                 ADVANCE_RING();
483         }
484
485         if (dirty & RADEON_UPLOAD_VERTFMT) {
486                 BEGIN_RING(2);
487                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
488                 OUT_RING(ctx->se_coord_fmt);
489                 ADVANCE_RING();
490         }
491
492         if (dirty & RADEON_UPLOAD_LINE) {
493                 BEGIN_RING(5);
494                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
495                 OUT_RING(ctx->re_line_pattern);
496                 OUT_RING(ctx->re_line_state);
497                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
498                 OUT_RING(ctx->se_line_width);
499                 ADVANCE_RING();
500         }
501
502         if (dirty & RADEON_UPLOAD_BUMPMAP) {
503                 BEGIN_RING(5);
504                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
505                 OUT_RING(ctx->pp_lum_matrix);
506                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
507                 OUT_RING(ctx->pp_rot_matrix_0);
508                 OUT_RING(ctx->pp_rot_matrix_1);
509                 ADVANCE_RING();
510         }
511
512         if (dirty & RADEON_UPLOAD_MASKS) {
513                 BEGIN_RING(4);
514                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
515                 OUT_RING(ctx->rb3d_stencilrefmask);
516                 OUT_RING(ctx->rb3d_ropcntl);
517                 OUT_RING(ctx->rb3d_planemask);
518                 ADVANCE_RING();
519         }
520
521         if (dirty & RADEON_UPLOAD_VIEWPORT) {
522                 BEGIN_RING(7);
523                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
524                 OUT_RING(ctx->se_vport_xscale);
525                 OUT_RING(ctx->se_vport_xoffset);
526                 OUT_RING(ctx->se_vport_yscale);
527                 OUT_RING(ctx->se_vport_yoffset);
528                 OUT_RING(ctx->se_vport_zscale);
529                 OUT_RING(ctx->se_vport_zoffset);
530                 ADVANCE_RING();
531         }
532
533         if (dirty & RADEON_UPLOAD_SETUP) {
534                 BEGIN_RING(4);
535                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
536                 OUT_RING(ctx->se_cntl);
537                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
538                 OUT_RING(ctx->se_cntl_status);
539                 ADVANCE_RING();
540         }
541
542         if (dirty & RADEON_UPLOAD_MISC) {
543                 BEGIN_RING(2);
544                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
545                 OUT_RING(ctx->re_misc);
546                 ADVANCE_RING();
547         }
548
549         if (dirty & RADEON_UPLOAD_TEX0) {
550                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
551                                                   &tex[0].pp_txoffset)) {
552                         DRM_ERROR("Invalid texture offset for unit 0\n");
553                         return -EINVAL;
554                 }
555
556                 BEGIN_RING(9);
557                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
558                 OUT_RING(tex[0].pp_txfilter);
559                 OUT_RING(tex[0].pp_txformat);
560                 OUT_RING(tex[0].pp_txoffset);
561                 OUT_RING(tex[0].pp_txcblend);
562                 OUT_RING(tex[0].pp_txablend);
563                 OUT_RING(tex[0].pp_tfactor);
564                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
565                 OUT_RING(tex[0].pp_border_color);
566                 ADVANCE_RING();
567         }
568
569         if (dirty & RADEON_UPLOAD_TEX1) {
570                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
571                                                   &tex[1].pp_txoffset)) {
572                         DRM_ERROR("Invalid texture offset for unit 1\n");
573                         return -EINVAL;
574                 }
575
576                 BEGIN_RING(9);
577                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
578                 OUT_RING(tex[1].pp_txfilter);
579                 OUT_RING(tex[1].pp_txformat);
580                 OUT_RING(tex[1].pp_txoffset);
581                 OUT_RING(tex[1].pp_txcblend);
582                 OUT_RING(tex[1].pp_txablend);
583                 OUT_RING(tex[1].pp_tfactor);
584                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
585                 OUT_RING(tex[1].pp_border_color);
586                 ADVANCE_RING();
587         }
588
589         if (dirty & RADEON_UPLOAD_TEX2) {
590                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
591                                                   &tex[2].pp_txoffset)) {
592                         DRM_ERROR("Invalid texture offset for unit 2\n");
593                         return -EINVAL;
594                 }
595
596                 BEGIN_RING(9);
597                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
598                 OUT_RING(tex[2].pp_txfilter);
599                 OUT_RING(tex[2].pp_txformat);
600                 OUT_RING(tex[2].pp_txoffset);
601                 OUT_RING(tex[2].pp_txcblend);
602                 OUT_RING(tex[2].pp_txablend);
603                 OUT_RING(tex[2].pp_tfactor);
604                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
605                 OUT_RING(tex[2].pp_border_color);
606                 ADVANCE_RING();
607         }
608
609         return 0;
610 }
611
612 /* Emit 1.2 state
613  */
614 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
615                               struct drm_file *file_priv,
616                               drm_radeon_state_t * state)
617 {
618         RING_LOCALS;
619
620         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
621                 BEGIN_RING(3);
622                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
623                 OUT_RING(state->context2.se_zbias_factor);
624                 OUT_RING(state->context2.se_zbias_constant);
625                 ADVANCE_RING();
626         }
627
628         return radeon_emit_state(dev_priv, file_priv, &state->context,
629                                  state->tex, state->dirty);
630 }
631
632 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
633  * 1.3 cmdbuffers allow all previous state to be updated as well as
634  * the tcl scalar and vector areas.
635  */
636 static struct {
637         int start;
638         int len;
639         const char *name;
640 } packet[RADEON_MAX_STATE_PACKETS] = {
641         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
642         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
643         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
644         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
645         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
646         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
647         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
648         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
649         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
650         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
651         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
652         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
653         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
654         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
655         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
656         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
657         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
658         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
659         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
660         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
661         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
662                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
663         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
664         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
665         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
666         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
667         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
668         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
669         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
670         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
671         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
672         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
673         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
674         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
675         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
676         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
677         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
678         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
679         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
680         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
681         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
682         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
683         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
684         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
685         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
686         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
687         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
688         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
689         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
690         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
691         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
692          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
693         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
694         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
695         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
696         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
697         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
698         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
699         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
700         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
701         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
702         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
703         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
704                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
705         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
706         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
707         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
708         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
709         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
710         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
711         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
712         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
713         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
714         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
715         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
716         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
717         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
718         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
719         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
720         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
721         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
722         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
723         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
724         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
725         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
726         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
727         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
728         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
729         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
730         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
731         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
732         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
733         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
734         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
735         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
736         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
737         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
738         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
739 };
740
741 /* ================================================================
742  * Performance monitoring functions
743  */
744
745 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
746                              struct drm_radeon_master_private *master_priv,
747                              int x, int y, int w, int h, int r, int g, int b)
748 {
749         u32 color;
750         RING_LOCALS;
751
752         x += master_priv->sarea_priv->boxes[0].x1;
753         y += master_priv->sarea_priv->boxes[0].y1;
754
755         switch (dev_priv->color_fmt) {
756         case RADEON_COLOR_FORMAT_RGB565:
757                 color = (((r & 0xf8) << 8) |
758                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
759                 break;
760         case RADEON_COLOR_FORMAT_ARGB8888:
761         default:
762                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
763                 break;
764         }
765
766         BEGIN_RING(4);
767         RADEON_WAIT_UNTIL_3D_IDLE();
768         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
769         OUT_RING(0xffffffff);
770         ADVANCE_RING();
771
772         BEGIN_RING(6);
773
774         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
775         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776                  RADEON_GMC_BRUSH_SOLID_COLOR |
777                  (dev_priv->color_fmt << 8) |
778                  RADEON_GMC_SRC_DATATYPE_COLOR |
779                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
780
781         if (master_priv->sarea_priv->pfCurrentPage == 1) {
782                 OUT_RING(dev_priv->front_pitch_offset);
783         } else {
784                 OUT_RING(dev_priv->back_pitch_offset);
785         }
786
787         OUT_RING(color);
788
789         OUT_RING((x << 16) | y);
790         OUT_RING((w << 16) | h);
791
792         ADVANCE_RING();
793 }
794
795 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
796 {
797         /* Collapse various things into a wait flag -- trying to
798          * guess if userspase slept -- better just to have them tell us.
799          */
800         if (dev_priv->stats.last_frame_reads > 1 ||
801             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
802                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
803         }
804
805         if (dev_priv->stats.freelist_loops) {
806                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
807         }
808
809         /* Purple box for page flipping
810          */
811         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
812                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
813
814         /* Red box if we have to wait for idle at any point
815          */
816         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
817                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
818
819         /* Blue box: lost context?
820          */
821
822         /* Yellow box for texture swaps
823          */
824         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
825                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
826
827         /* Green box if hardware never idles (as far as we can tell)
828          */
829         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
830                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
831
832         /* Draw bars indicating number of buffers allocated
833          * (not a great measure, easily confused)
834          */
835         if (dev_priv->stats.requested_bufs) {
836                 if (dev_priv->stats.requested_bufs > 100)
837                         dev_priv->stats.requested_bufs = 100;
838
839                 radeon_clear_box(dev_priv, master_priv, 4, 16,
840                                  dev_priv->stats.requested_bufs, 4,
841                                  196, 128, 128);
842         }
843
844         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
845
846 }
847
848 /* ================================================================
849  * CP command dispatch functions
850  */
851
852 static void radeon_cp_dispatch_clear(struct drm_device * dev,
853                                      struct drm_master *master,
854                                      drm_radeon_clear_t * clear,
855                                      drm_radeon_clear_rect_t * depth_boxes)
856 {
857         drm_radeon_private_t *dev_priv = dev->dev_private;
858         struct drm_radeon_master_private *master_priv = master->driver_priv;
859         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
860         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
861         int nbox = sarea_priv->nbox;
862         struct drm_clip_rect *pbox = sarea_priv->boxes;
863         unsigned int flags = clear->flags;
864         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
865         int i;
866         RING_LOCALS;
867         DRM_DEBUG("flags = 0x%x\n", flags);
868
869         dev_priv->stats.clears++;
870
871         if (sarea_priv->pfCurrentPage == 1) {
872                 unsigned int tmp = flags;
873
874                 flags &= ~(RADEON_FRONT | RADEON_BACK);
875                 if (tmp & RADEON_FRONT)
876                         flags |= RADEON_BACK;
877                 if (tmp & RADEON_BACK)
878                         flags |= RADEON_FRONT;
879         }
880         if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
881                 if (!dev_priv->have_z_offset) {
882                         printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
883                         flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
884                 }
885         }
886
887         if (flags & (RADEON_FRONT | RADEON_BACK)) {
888
889                 BEGIN_RING(4);
890
891                 /* Ensure the 3D stream is idle before doing a
892                  * 2D fill to clear the front or back buffer.
893                  */
894                 RADEON_WAIT_UNTIL_3D_IDLE();
895
896                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
897                 OUT_RING(clear->color_mask);
898
899                 ADVANCE_RING();
900
901                 /* Make sure we restore the 3D state next time.
902                  */
903                 sarea_priv->ctx_owner = 0;
904
905                 for (i = 0; i < nbox; i++) {
906                         int x = pbox[i].x1;
907                         int y = pbox[i].y1;
908                         int w = pbox[i].x2 - x;
909                         int h = pbox[i].y2 - y;
910
911                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
912                                   x, y, w, h, flags);
913
914                         if (flags & RADEON_FRONT) {
915                                 BEGIN_RING(6);
916
917                                 OUT_RING(CP_PACKET3
918                                          (RADEON_CNTL_PAINT_MULTI, 4));
919                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
920                                          RADEON_GMC_BRUSH_SOLID_COLOR |
921                                          (dev_priv->
922                                           color_fmt << 8) |
923                                          RADEON_GMC_SRC_DATATYPE_COLOR |
924                                          RADEON_ROP3_P |
925                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
926
927                                 OUT_RING(dev_priv->front_pitch_offset);
928                                 OUT_RING(clear->clear_color);
929
930                                 OUT_RING((x << 16) | y);
931                                 OUT_RING((w << 16) | h);
932
933                                 ADVANCE_RING();
934                         }
935
936                         if (flags & RADEON_BACK) {
937                                 BEGIN_RING(6);
938
939                                 OUT_RING(CP_PACKET3
940                                          (RADEON_CNTL_PAINT_MULTI, 4));
941                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
942                                          RADEON_GMC_BRUSH_SOLID_COLOR |
943                                          (dev_priv->
944                                           color_fmt << 8) |
945                                          RADEON_GMC_SRC_DATATYPE_COLOR |
946                                          RADEON_ROP3_P |
947                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
948
949                                 OUT_RING(dev_priv->back_pitch_offset);
950                                 OUT_RING(clear->clear_color);
951
952                                 OUT_RING((x << 16) | y);
953                                 OUT_RING((w << 16) | h);
954
955                                 ADVANCE_RING();
956                         }
957                 }
958         }
959
960         /* hyper z clear */
961         /* no docs available, based on reverse engeneering by Stephane Marchesin */
962         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
963             && (flags & RADEON_CLEAR_FASTZ)) {
964
965                 int i;
966                 int depthpixperline =
967                     dev_priv->depth_fmt ==
968                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
969                                                        2) : (dev_priv->
970                                                              depth_pitch / 4);
971
972                 u32 clearmask;
973
974                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
975                     ((clear->depth_mask & 0xff) << 24);
976
977                 /* Make sure we restore the 3D state next time.
978                  * we haven't touched any "normal" state - still need this?
979                  */
980                 sarea_priv->ctx_owner = 0;
981
982                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
983                     && (flags & RADEON_USE_HIERZ)) {
984                         /* FIXME : reverse engineer that for Rx00 cards */
985                         /* FIXME : the mask supposedly contains low-res z values. So can't set
986                            just to the max (0xff? or actually 0x3fff?), need to take z clear
987                            value into account? */
988                         /* pattern seems to work for r100, though get slight
989                            rendering errors with glxgears. If hierz is not enabled for r100,
990                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
991                            other ones are ignored, and the same clear mask can be used. That's
992                            very different behaviour than R200 which needs different clear mask
993                            and different number of tiles to clear if hierz is enabled or not !?!
994                          */
995                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
996                 } else {
997                         /* clear mask : chooses the clearing pattern.
998                            rv250: could be used to clear only parts of macrotiles
999                            (but that would get really complicated...)?
1000                            bit 0 and 1 (either or both of them ?!?!) are used to
1001                            not clear tile (or maybe one of the bits indicates if the tile is
1002                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
1003                            Pattern is as follows:
1004                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1005                            bits -------------------------------------------------
1006                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1007                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1008                            covers 256 pixels ?!?
1009                          */
1010                         clearmask = 0x0;
1011                 }
1012
1013                 BEGIN_RING(8);
1014                 RADEON_WAIT_UNTIL_2D_IDLE();
1015                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1016                              tempRB3D_DEPTHCLEARVALUE);
1017                 /* what offset is this exactly ? */
1018                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1019                 /* need ctlstat, otherwise get some strange black flickering */
1020                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1021                              RADEON_RB3D_ZC_FLUSH_ALL);
1022                 ADVANCE_RING();
1023
1024                 for (i = 0; i < nbox; i++) {
1025                         int tileoffset, nrtilesx, nrtilesy, j;
1026                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1027                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1028                             && !(dev_priv->microcode_version == UCODE_R200)) {
1029                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1030                                    maybe r200 actually doesn't need to put the low-res z value into
1031                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1032                                    Works for R100, both with hierz and without.
1033                                    R100 seems to operate on 2x1 8x8 tiles, but...
1034                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1035                                    problematic with resolutions which are not 64 pix aligned? */
1036                                 tileoffset =
1037                                     ((pbox[i].y1 >> 3) * depthpixperline +
1038                                      pbox[i].x1) >> 6;
1039                                 nrtilesx =
1040                                     ((pbox[i].x2 & ~63) -
1041                                      (pbox[i].x1 & ~63)) >> 4;
1042                                 nrtilesy =
1043                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1044                                 for (j = 0; j <= nrtilesy; j++) {
1045                                         BEGIN_RING(4);
1046                                         OUT_RING(CP_PACKET3
1047                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1048                                         /* first tile */
1049                                         OUT_RING(tileoffset * 8);
1050                                         /* the number of tiles to clear */
1051                                         OUT_RING(nrtilesx + 4);
1052                                         /* clear mask : chooses the clearing pattern. */
1053                                         OUT_RING(clearmask);
1054                                         ADVANCE_RING();
1055                                         tileoffset += depthpixperline >> 6;
1056                                 }
1057                         } else if (dev_priv->microcode_version == UCODE_R200) {
1058                                 /* works for rv250. */
1059                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1060                                 tileoffset =
1061                                     ((pbox[i].y1 >> 3) * depthpixperline +
1062                                      pbox[i].x1) >> 5;
1063                                 nrtilesx =
1064                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1065                                 nrtilesy =
1066                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1067                                 for (j = 0; j <= nrtilesy; j++) {
1068                                         BEGIN_RING(4);
1069                                         OUT_RING(CP_PACKET3
1070                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1071                                         /* first tile */
1072                                         /* judging by the first tile offset needed, could possibly
1073                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1074                                            macro tiles, though would still need clear mask for
1075                                            right/bottom if truely 4x4 granularity is desired ? */
1076                                         OUT_RING(tileoffset * 16);
1077                                         /* the number of tiles to clear */
1078                                         OUT_RING(nrtilesx + 1);
1079                                         /* clear mask : chooses the clearing pattern. */
1080                                         OUT_RING(clearmask);
1081                                         ADVANCE_RING();
1082                                         tileoffset += depthpixperline >> 5;
1083                                 }
1084                         } else {        /* rv 100 */
1085                                 /* rv100 might not need 64 pix alignment, who knows */
1086                                 /* offsets are, hmm, weird */
1087                                 tileoffset =
1088                                     ((pbox[i].y1 >> 4) * depthpixperline +
1089                                      pbox[i].x1) >> 6;
1090                                 nrtilesx =
1091                                     ((pbox[i].x2 & ~63) -
1092                                      (pbox[i].x1 & ~63)) >> 4;
1093                                 nrtilesy =
1094                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1095                                 for (j = 0; j <= nrtilesy; j++) {
1096                                         BEGIN_RING(4);
1097                                         OUT_RING(CP_PACKET3
1098                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1099                                         OUT_RING(tileoffset * 128);
1100                                         /* the number of tiles to clear */
1101                                         OUT_RING(nrtilesx + 4);
1102                                         /* clear mask : chooses the clearing pattern. */
1103                                         OUT_RING(clearmask);
1104                                         ADVANCE_RING();
1105                                         tileoffset += depthpixperline >> 6;
1106                                 }
1107                         }
1108                 }
1109
1110                 /* TODO don't always clear all hi-level z tiles */
1111                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1112                     && (dev_priv->microcode_version == UCODE_R200)
1113                     && (flags & RADEON_USE_HIERZ))
1114                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1115                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1116                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1117                            value into account? */
1118                 {
1119                         BEGIN_RING(4);
1120                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1121                         OUT_RING(0x0);  /* First tile */
1122                         OUT_RING(0x3cc0);
1123                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1124                         ADVANCE_RING();
1125                 }
1126         }
1127
1128         /* We have to clear the depth and/or stencil buffers by
1129          * rendering a quad into just those buffers.  Thus, we have to
1130          * make sure the 3D engine is configured correctly.
1131          */
1132         else if ((dev_priv->microcode_version == UCODE_R200) &&
1133                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1134
1135                 int tempPP_CNTL;
1136                 int tempRE_CNTL;
1137                 int tempRB3D_CNTL;
1138                 int tempRB3D_ZSTENCILCNTL;
1139                 int tempRB3D_STENCILREFMASK;
1140                 int tempRB3D_PLANEMASK;
1141                 int tempSE_CNTL;
1142                 int tempSE_VTE_CNTL;
1143                 int tempSE_VTX_FMT_0;
1144                 int tempSE_VTX_FMT_1;
1145                 int tempSE_VAP_CNTL;
1146                 int tempRE_AUX_SCISSOR_CNTL;
1147
1148                 tempPP_CNTL = 0;
1149                 tempRE_CNTL = 0;
1150
1151                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1152
1153                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1154                 tempRB3D_STENCILREFMASK = 0x0;
1155
1156                 tempSE_CNTL = depth_clear->se_cntl;
1157
1158                 /* Disable TCL */
1159
1160                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1161                                           (0x9 <<
1162                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1163
1164                 tempRB3D_PLANEMASK = 0x0;
1165
1166                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1167
1168                 tempSE_VTE_CNTL =
1169                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1170
1171                 /* Vertex format (X, Y, Z, W) */
1172                 tempSE_VTX_FMT_0 =
1173                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1174                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1175                 tempSE_VTX_FMT_1 = 0x0;
1176
1177                 /*
1178                  * Depth buffer specific enables
1179                  */
1180                 if (flags & RADEON_DEPTH) {
1181                         /* Enable depth buffer */
1182                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1183                 } else {
1184                         /* Disable depth buffer */
1185                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1186                 }
1187
1188                 /*
1189                  * Stencil buffer specific enables
1190                  */
1191                 if (flags & RADEON_STENCIL) {
1192                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1193                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1194                 } else {
1195                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1196                         tempRB3D_STENCILREFMASK = 0x00000000;
1197                 }
1198
1199                 if (flags & RADEON_USE_COMP_ZBUF) {
1200                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1201                             RADEON_Z_DECOMPRESSION_ENABLE;
1202                 }
1203                 if (flags & RADEON_USE_HIERZ) {
1204                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1205                 }
1206
1207                 BEGIN_RING(26);
1208                 RADEON_WAIT_UNTIL_2D_IDLE();
1209
1210                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1211                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1212                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1213                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1214                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1215                              tempRB3D_STENCILREFMASK);
1216                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1217                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1218                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1219                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1220                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1221                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1222                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1223                 ADVANCE_RING();
1224
1225                 /* Make sure we restore the 3D state next time.
1226                  */
1227                 sarea_priv->ctx_owner = 0;
1228
1229                 for (i = 0; i < nbox; i++) {
1230
1231                         /* Funny that this should be required --
1232                          *  sets top-left?
1233                          */
1234                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1235
1236                         BEGIN_RING(14);
1237                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1238                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1239                                   RADEON_PRIM_WALK_RING |
1240                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1243                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244                         OUT_RING(0x3f800000);
1245                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1246                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1247                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1248                         OUT_RING(0x3f800000);
1249                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1250                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1251                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1252                         OUT_RING(0x3f800000);
1253                         ADVANCE_RING();
1254                 }
1255         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1256
1257                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1258
1259                 rb3d_cntl = depth_clear->rb3d_cntl;
1260
1261                 if (flags & RADEON_DEPTH) {
1262                         rb3d_cntl |= RADEON_Z_ENABLE;
1263                 } else {
1264                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1265                 }
1266
1267                 if (flags & RADEON_STENCIL) {
1268                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1269                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1270                 } else {
1271                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1272                         rb3d_stencilrefmask = 0x00000000;
1273                 }
1274
1275                 if (flags & RADEON_USE_COMP_ZBUF) {
1276                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1277                             RADEON_Z_DECOMPRESSION_ENABLE;
1278                 }
1279                 if (flags & RADEON_USE_HIERZ) {
1280                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1281                 }
1282
1283                 BEGIN_RING(13);
1284                 RADEON_WAIT_UNTIL_2D_IDLE();
1285
1286                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1287                 OUT_RING(0x00000000);
1288                 OUT_RING(rb3d_cntl);
1289
1290                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1291                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1292                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1293                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1294                 ADVANCE_RING();
1295
1296                 /* Make sure we restore the 3D state next time.
1297                  */
1298                 sarea_priv->ctx_owner = 0;
1299
1300                 for (i = 0; i < nbox; i++) {
1301
1302                         /* Funny that this should be required --
1303                          *  sets top-left?
1304                          */
1305                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1306
1307                         BEGIN_RING(15);
1308
1309                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1310                         OUT_RING(RADEON_VTX_Z_PRESENT |
1311                                  RADEON_VTX_PKCOLOR_PRESENT);
1312                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1313                                   RADEON_PRIM_WALK_RING |
1314                                   RADEON_MAOS_ENABLE |
1315                                   RADEON_VTX_FMT_RADEON_MODE |
1316                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1317
1318                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1319                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1320                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1321                         OUT_RING(0x0);
1322
1323                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1324                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1325                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1326                         OUT_RING(0x0);
1327
1328                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1329                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1330                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1331                         OUT_RING(0x0);
1332
1333                         ADVANCE_RING();
1334                 }
1335         }
1336
1337         /* Increment the clear counter.  The client-side 3D driver must
1338          * wait on this value before performing the clear ioctl.  We
1339          * need this because the card's so damned fast...
1340          */
1341         sarea_priv->last_clear++;
1342
1343         BEGIN_RING(4);
1344
1345         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1346         RADEON_WAIT_UNTIL_IDLE();
1347
1348         ADVANCE_RING();
1349 }
1350
1351 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1352 {
1353         drm_radeon_private_t *dev_priv = dev->dev_private;
1354         struct drm_radeon_master_private *master_priv = master->driver_priv;
1355         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1356         int nbox = sarea_priv->nbox;
1357         struct drm_clip_rect *pbox = sarea_priv->boxes;
1358         int i;
1359         RING_LOCALS;
1360         DRM_DEBUG("\n");
1361
1362         /* Do some trivial performance monitoring...
1363          */
1364         if (dev_priv->do_boxes)
1365                 radeon_cp_performance_boxes(dev_priv, master_priv);
1366
1367         /* Wait for the 3D stream to idle before dispatching the bitblt.
1368          * This will prevent data corruption between the two streams.
1369          */
1370         BEGIN_RING(2);
1371
1372         RADEON_WAIT_UNTIL_3D_IDLE();
1373
1374         ADVANCE_RING();
1375
1376         for (i = 0; i < nbox; i++) {
1377                 int x = pbox[i].x1;
1378                 int y = pbox[i].y1;
1379                 int w = pbox[i].x2 - x;
1380                 int h = pbox[i].y2 - y;
1381
1382                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1383
1384                 BEGIN_RING(9);
1385
1386                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1387                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1388                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1389                          RADEON_GMC_BRUSH_NONE |
1390                          (dev_priv->color_fmt << 8) |
1391                          RADEON_GMC_SRC_DATATYPE_COLOR |
1392                          RADEON_ROP3_S |
1393                          RADEON_DP_SRC_SOURCE_MEMORY |
1394                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1395
1396                 /* Make this work even if front & back are flipped:
1397                  */
1398                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1399                 if (sarea_priv->pfCurrentPage == 0) {
1400                         OUT_RING(dev_priv->back_pitch_offset);
1401                         OUT_RING(dev_priv->front_pitch_offset);
1402                 } else {
1403                         OUT_RING(dev_priv->front_pitch_offset);
1404                         OUT_RING(dev_priv->back_pitch_offset);
1405                 }
1406
1407                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1408                 OUT_RING((x << 16) | y);
1409                 OUT_RING((x << 16) | y);
1410                 OUT_RING((w << 16) | h);
1411
1412                 ADVANCE_RING();
1413         }
1414
1415         /* Increment the frame counter.  The client-side 3D driver must
1416          * throttle the framerate by waiting for this value before
1417          * performing the swapbuffer ioctl.
1418          */
1419         sarea_priv->last_frame++;
1420
1421         BEGIN_RING(4);
1422
1423         RADEON_FRAME_AGE(sarea_priv->last_frame);
1424         RADEON_WAIT_UNTIL_2D_IDLE();
1425
1426         ADVANCE_RING();
1427 }
1428
1429 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1430 {
1431         drm_radeon_private_t *dev_priv = dev->dev_private;
1432         struct drm_radeon_master_private *master_priv = master->driver_priv;
1433         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1434         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1435             ? dev_priv->front_offset : dev_priv->back_offset;
1436         RING_LOCALS;
1437         DRM_DEBUG("pfCurrentPage=%d\n",
1438                   master_priv->sarea_priv->pfCurrentPage);
1439
1440         /* Do some trivial performance monitoring...
1441          */
1442         if (dev_priv->do_boxes) {
1443                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1444                 radeon_cp_performance_boxes(dev_priv, master_priv);
1445         }
1446
1447         /* Update the frame offsets for both CRTCs
1448          */
1449         BEGIN_RING(6);
1450
1451         RADEON_WAIT_UNTIL_3D_IDLE();
1452         OUT_RING_REG(RADEON_CRTC_OFFSET,
1453                      ((sarea->frame.y * dev_priv->front_pitch +
1454                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1455                      + offset);
1456         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1457                      + offset);
1458
1459         ADVANCE_RING();
1460
1461         /* Increment the frame counter.  The client-side 3D driver must
1462          * throttle the framerate by waiting for this value before
1463          * performing the swapbuffer ioctl.
1464          */
1465         master_priv->sarea_priv->last_frame++;
1466         master_priv->sarea_priv->pfCurrentPage =
1467                 1 - master_priv->sarea_priv->pfCurrentPage;
1468
1469         BEGIN_RING(2);
1470
1471         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1472
1473         ADVANCE_RING();
1474 }
1475
1476 static int bad_prim_vertex_nr(int primitive, int nr)
1477 {
1478         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1479         case RADEON_PRIM_TYPE_NONE:
1480         case RADEON_PRIM_TYPE_POINT:
1481                 return nr < 1;
1482         case RADEON_PRIM_TYPE_LINE:
1483                 return (nr & 1) || nr == 0;
1484         case RADEON_PRIM_TYPE_LINE_STRIP:
1485                 return nr < 2;
1486         case RADEON_PRIM_TYPE_TRI_LIST:
1487         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1488         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1489         case RADEON_PRIM_TYPE_RECT_LIST:
1490                 return nr % 3 || nr == 0;
1491         case RADEON_PRIM_TYPE_TRI_FAN:
1492         case RADEON_PRIM_TYPE_TRI_STRIP:
1493                 return nr < 3;
1494         default:
1495                 return 1;
1496         }
1497 }
1498
1499 typedef struct {
1500         unsigned int start;
1501         unsigned int finish;
1502         unsigned int prim;
1503         unsigned int numverts;
1504         unsigned int offset;
1505         unsigned int vc_format;
1506 } drm_radeon_tcl_prim_t;
1507
1508 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1509                                       struct drm_file *file_priv,
1510                                       struct drm_buf * buf,
1511                                       drm_radeon_tcl_prim_t * prim)
1512 {
1513         drm_radeon_private_t *dev_priv = dev->dev_private;
1514         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1515         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1516         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1517         int numverts = (int)prim->numverts;
1518         int nbox = sarea_priv->nbox;
1519         int i = 0;
1520         RING_LOCALS;
1521
1522         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1523                   prim->prim,
1524                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1525
1526         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1527                 DRM_ERROR("bad prim %x numverts %d\n",
1528                           prim->prim, prim->numverts);
1529                 return;
1530         }
1531
1532         do {
1533                 /* Emit the next cliprect */
1534                 if (i < nbox) {
1535                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1536                 }
1537
1538                 /* Emit the vertex buffer rendering commands */
1539                 BEGIN_RING(5);
1540
1541                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1542                 OUT_RING(offset);
1543                 OUT_RING(numverts);
1544                 OUT_RING(prim->vc_format);
1545                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1546                          RADEON_COLOR_ORDER_RGBA |
1547                          RADEON_VTX_FMT_RADEON_MODE |
1548                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1549
1550                 ADVANCE_RING();
1551
1552                 i++;
1553         } while (i < nbox);
1554 }
1555
1556 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1557 {
1558         drm_radeon_private_t *dev_priv = dev->dev_private;
1559         struct drm_radeon_master_private *master_priv = master->driver_priv;
1560         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1561         RING_LOCALS;
1562
1563         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1564
1565         /* Emit the vertex buffer age */
1566         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1567                 BEGIN_RING(3);
1568                 R600_DISPATCH_AGE(buf_priv->age);
1569                 ADVANCE_RING();
1570         } else {
1571                 BEGIN_RING(2);
1572                 RADEON_DISPATCH_AGE(buf_priv->age);
1573                 ADVANCE_RING();
1574         }
1575
1576         buf->pending = 1;
1577         buf->used = 0;
1578 }
1579
1580 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1581                                         struct drm_buf * buf, int start, int end)
1582 {
1583         drm_radeon_private_t *dev_priv = dev->dev_private;
1584         RING_LOCALS;
1585         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1586
1587         if (start != end) {
1588                 int offset = (dev_priv->gart_buffers_offset
1589                               + buf->offset + start);
1590                 int dwords = (end - start + 3) / sizeof(u32);
1591
1592                 /* Indirect buffer data must be an even number of
1593                  * dwords, so if we've been given an odd number we must
1594                  * pad the data with a Type-2 CP packet.
1595                  */
1596                 if (dwords & 1) {
1597                         u32 *data = (u32 *)
1598                             ((char *)dev->agp_buffer_map->handle
1599                              + buf->offset + start);
1600                         data[dwords++] = RADEON_CP_PACKET2;
1601                 }
1602
1603                 /* Fire off the indirect buffer */
1604                 BEGIN_RING(3);
1605
1606                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1607                 OUT_RING(offset);
1608                 OUT_RING(dwords);
1609
1610                 ADVANCE_RING();
1611         }
1612 }
1613
1614 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1615                                        struct drm_master *master,
1616                                        struct drm_buf * elt_buf,
1617                                        drm_radeon_tcl_prim_t * prim)
1618 {
1619         drm_radeon_private_t *dev_priv = dev->dev_private;
1620         struct drm_radeon_master_private *master_priv = master->driver_priv;
1621         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1622         int offset = dev_priv->gart_buffers_offset + prim->offset;
1623         u32 *data;
1624         int dwords;
1625         int i = 0;
1626         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1627         int count = (prim->finish - start) / sizeof(u16);
1628         int nbox = sarea_priv->nbox;
1629
1630         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1631                   prim->prim,
1632                   prim->vc_format,
1633                   prim->start, prim->finish, prim->offset, prim->numverts);
1634
1635         if (bad_prim_vertex_nr(prim->prim, count)) {
1636                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1637                 return;
1638         }
1639
1640         if (start >= prim->finish || (prim->start & 0x7)) {
1641                 DRM_ERROR("buffer prim %d\n", prim->prim);
1642                 return;
1643         }
1644
1645         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1646
1647         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1648                         elt_buf->offset + prim->start);
1649
1650         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1651         data[1] = offset;
1652         data[2] = prim->numverts;
1653         data[3] = prim->vc_format;
1654         data[4] = (prim->prim |
1655                    RADEON_PRIM_WALK_IND |
1656                    RADEON_COLOR_ORDER_RGBA |
1657                    RADEON_VTX_FMT_RADEON_MODE |
1658                    (count << RADEON_NUM_VERTICES_SHIFT));
1659
1660         do {
1661                 if (i < nbox)
1662                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1663
1664                 radeon_cp_dispatch_indirect(dev, elt_buf,
1665                                             prim->start, prim->finish);
1666
1667                 i++;
1668         } while (i < nbox);
1669
1670 }
1671
1672 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1673
1674 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1675                                       struct drm_file *file_priv,
1676                                       drm_radeon_texture_t * tex,
1677                                       drm_radeon_tex_image_t * image)
1678 {
1679         drm_radeon_private_t *dev_priv = dev->dev_private;
1680         struct drm_buf *buf;
1681         u32 format;
1682         u32 *buffer;
1683         const u8 __user *data;
1684         int size, dwords, tex_width, blit_width, spitch;
1685         u32 height;
1686         int i;
1687         u32 texpitch, microtile;
1688         u32 offset, byte_offset;
1689         RING_LOCALS;
1690
1691         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1692                 DRM_ERROR("Invalid destination offset\n");
1693                 return -EINVAL;
1694         }
1695
1696         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1697
1698         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1699          * up with the texture data from the host data blit, otherwise
1700          * part of the texture image may be corrupted.
1701          */
1702         BEGIN_RING(4);
1703         RADEON_FLUSH_CACHE();
1704         RADEON_WAIT_UNTIL_IDLE();
1705         ADVANCE_RING();
1706
1707         /* The compiler won't optimize away a division by a variable,
1708          * even if the only legal values are powers of two.  Thus, we'll
1709          * use a shift instead.
1710          */
1711         switch (tex->format) {
1712         case RADEON_TXFORMAT_ARGB8888:
1713         case RADEON_TXFORMAT_RGBA8888:
1714                 format = RADEON_COLOR_FORMAT_ARGB8888;
1715                 tex_width = tex->width * 4;
1716                 blit_width = image->width * 4;
1717                 break;
1718         case RADEON_TXFORMAT_AI88:
1719         case RADEON_TXFORMAT_ARGB1555:
1720         case RADEON_TXFORMAT_RGB565:
1721         case RADEON_TXFORMAT_ARGB4444:
1722         case RADEON_TXFORMAT_VYUY422:
1723         case RADEON_TXFORMAT_YVYU422:
1724                 format = RADEON_COLOR_FORMAT_RGB565;
1725                 tex_width = tex->width * 2;
1726                 blit_width = image->width * 2;
1727                 break;
1728         case RADEON_TXFORMAT_I8:
1729         case RADEON_TXFORMAT_RGB332:
1730                 format = RADEON_COLOR_FORMAT_CI8;
1731                 tex_width = tex->width * 1;
1732                 blit_width = image->width * 1;
1733                 break;
1734         default:
1735                 DRM_ERROR("invalid texture format %d\n", tex->format);
1736                 return -EINVAL;
1737         }
1738         spitch = blit_width >> 6;
1739         if (spitch == 0 && image->height > 1)
1740                 return -EINVAL;
1741
1742         texpitch = tex->pitch;
1743         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1744                 microtile = 1;
1745                 if (tex_width < 64) {
1746                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1747                         /* we got tiled coordinates, untile them */
1748                         image->x *= 2;
1749                 }
1750         } else
1751                 microtile = 0;
1752
1753         /* this might fail for zero-sized uploads - are those illegal? */
1754         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1755                                 blit_width - 1)) {
1756                 DRM_ERROR("Invalid final destination offset\n");
1757                 return -EINVAL;
1758         }
1759
1760         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1761
1762         do {
1763                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1764                           tex->offset >> 10, tex->pitch, tex->format,
1765                           image->x, image->y, image->width, image->height);
1766
1767                 /* Make a copy of some parameters in case we have to
1768                  * update them for a multi-pass texture blit.
1769                  */
1770                 height = image->height;
1771                 data = (const u8 __user *)image->data;
1772
1773                 size = height * blit_width;
1774
1775                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1776                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1777                         size = height * blit_width;
1778                 } else if (size < 4 && size > 0) {
1779                         size = 4;
1780                 } else if (size == 0) {
1781                         return 0;
1782                 }
1783
1784                 buf = radeon_freelist_get(dev);
1785                 if (0 && !buf) {
1786                         radeon_do_cp_idle(dev_priv);
1787                         buf = radeon_freelist_get(dev);
1788                 }
1789                 if (!buf) {
1790                         DRM_DEBUG("EAGAIN\n");
1791                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1792                                 return -EFAULT;
1793                         return -EAGAIN;
1794                 }
1795
1796                 /* Dispatch the indirect buffer.
1797                  */
1798                 buffer =
1799                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1800                 dwords = size / 4;
1801
1802 #define RADEON_COPY_MT(_buf, _data, _width) \
1803         do { \
1804                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1805                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1806                         return -EFAULT; \
1807                 } \
1808         } while(0)
1809
1810                 if (microtile) {
1811                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1812                            however, we cannot use blitter directly for texture width < 64 bytes,
1813                            since minimum tex pitch is 64 bytes and we need this to match
1814                            the texture width, otherwise the blitter will tile it wrong.
1815                            Thus, tiling manually in this case. Additionally, need to special
1816                            case tex height = 1, since our actual image will have height 2
1817                            and we need to ensure we don't read beyond the texture size
1818                            from user space. */
1819                         if (tex->height == 1) {
1820                                 if (tex_width >= 64 || tex_width <= 16) {
1821                                         RADEON_COPY_MT(buffer, data,
1822                                                 (int)(tex_width * sizeof(u32)));
1823                                 } else if (tex_width == 32) {
1824                                         RADEON_COPY_MT(buffer, data, 16);
1825                                         RADEON_COPY_MT(buffer + 8,
1826                                                        data + 16, 16);
1827                                 }
1828                         } else if (tex_width >= 64 || tex_width == 16) {
1829                                 RADEON_COPY_MT(buffer, data,
1830                                                (int)(dwords * sizeof(u32)));
1831                         } else if (tex_width < 16) {
1832                                 for (i = 0; i < tex->height; i++) {
1833                                         RADEON_COPY_MT(buffer, data, tex_width);
1834                                         buffer += 4;
1835                                         data += tex_width;
1836                                 }
1837                         } else if (tex_width == 32) {
1838                                 /* TODO: make sure this works when not fitting in one buffer
1839                                    (i.e. 32bytes x 2048...) */
1840                                 for (i = 0; i < tex->height; i += 2) {
1841                                         RADEON_COPY_MT(buffer, data, 16);
1842                                         data += 16;
1843                                         RADEON_COPY_MT(buffer + 8, data, 16);
1844                                         data += 16;
1845                                         RADEON_COPY_MT(buffer + 4, data, 16);
1846                                         data += 16;
1847                                         RADEON_COPY_MT(buffer + 12, data, 16);
1848                                         data += 16;
1849                                         buffer += 16;
1850                                 }
1851                         }
1852                 } else {
1853                         if (tex_width >= 32) {
1854                                 /* Texture image width is larger than the minimum, so we
1855                                  * can upload it directly.
1856                                  */
1857                                 RADEON_COPY_MT(buffer, data,
1858                                                (int)(dwords * sizeof(u32)));
1859                         } else {
1860                                 /* Texture image width is less than the minimum, so we
1861                                  * need to pad out each image scanline to the minimum
1862                                  * width.
1863                                  */
1864                                 for (i = 0; i < tex->height; i++) {
1865                                         RADEON_COPY_MT(buffer, data, tex_width);
1866                                         buffer += 8;
1867                                         data += tex_width;
1868                                 }
1869                         }
1870                 }
1871
1872 #undef RADEON_COPY_MT
1873                 byte_offset = (image->y & ~2047) * blit_width;
1874                 buf->file_priv = file_priv;
1875                 buf->used = size;
1876                 offset = dev_priv->gart_buffers_offset + buf->offset;
1877                 BEGIN_RING(9);
1878                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1879                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1880                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1881                          RADEON_GMC_BRUSH_NONE |
1882                          (format << 8) |
1883                          RADEON_GMC_SRC_DATATYPE_COLOR |
1884                          RADEON_ROP3_S |
1885                          RADEON_DP_SRC_SOURCE_MEMORY |
1886                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1887                 OUT_RING((spitch << 22) | (offset >> 10));
1888                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1889                 OUT_RING(0);
1890                 OUT_RING((image->x << 16) | (image->y % 2048));
1891                 OUT_RING((image->width << 16) | height);
1892                 RADEON_WAIT_UNTIL_2D_IDLE();
1893                 ADVANCE_RING();
1894                 COMMIT_RING();
1895
1896                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
1897
1898                 /* Update the input parameters for next time */
1899                 image->y += height;
1900                 image->height -= height;
1901                 image->data = (const u8 __user *)image->data + size;
1902         } while (image->height > 0);
1903
1904         /* Flush the pixel cache after the blit completes.  This ensures
1905          * the texture data is written out to memory before rendering
1906          * continues.
1907          */
1908         BEGIN_RING(4);
1909         RADEON_FLUSH_CACHE();
1910         RADEON_WAIT_UNTIL_2D_IDLE();
1911         ADVANCE_RING();
1912         COMMIT_RING();
1913
1914         return 0;
1915 }
1916
1917 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1918 {
1919         drm_radeon_private_t *dev_priv = dev->dev_private;
1920         int i;
1921         RING_LOCALS;
1922         DRM_DEBUG("\n");
1923
1924         BEGIN_RING(35);
1925
1926         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1927         OUT_RING(0x00000000);
1928
1929         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1930         for (i = 0; i < 32; i++) {
1931                 OUT_RING(stipple[i]);
1932         }
1933
1934         ADVANCE_RING();
1935 }
1936
1937 static void radeon_apply_surface_regs(int surf_index,
1938                                       drm_radeon_private_t *dev_priv)
1939 {
1940         if (!dev_priv->mmio)
1941                 return;
1942
1943         radeon_do_cp_idle(dev_priv);
1944
1945         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1946                      dev_priv->surfaces[surf_index].flags);
1947         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1948                      dev_priv->surfaces[surf_index].lower);
1949         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1950                      dev_priv->surfaces[surf_index].upper);
1951 }
1952
1953 /* Allocates a virtual surface
1954  * doesn't always allocate a real surface, will stretch an existing
1955  * surface when possible.
1956  *
1957  * Note that refcount can be at most 2, since during a free refcount=3
1958  * might mean we have to allocate a new surface which might not always
1959  * be available.
1960  * For example : we allocate three contigous surfaces ABC. If B is
1961  * freed, we suddenly need two surfaces to store A and C, which might
1962  * not always be available.
1963  */
1964 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1965                          drm_radeon_private_t *dev_priv,
1966                          struct drm_file *file_priv)
1967 {
1968         struct radeon_virt_surface *s;
1969         int i;
1970         int virt_surface_index;
1971         uint32_t new_upper, new_lower;
1972
1973         new_lower = new->address;
1974         new_upper = new_lower + new->size - 1;
1975
1976         /* sanity check */
1977         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1978             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1979              RADEON_SURF_ADDRESS_FIXED_MASK)
1980             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1981                 return -1;
1982
1983         /* make sure there is no overlap with existing surfaces */
1984         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1985                 if ((dev_priv->surfaces[i].refcount != 0) &&
1986                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1987                       (new_lower < dev_priv->surfaces[i].upper)) ||
1988                      ((new_lower < dev_priv->surfaces[i].lower) &&
1989                       (new_upper > dev_priv->surfaces[i].lower)))) {
1990                         return -1;
1991                 }
1992         }
1993
1994         /* find a virtual surface */
1995         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1996                 if (dev_priv->virt_surfaces[i].file_priv == NULL)
1997                         break;
1998         if (i == 2 * RADEON_MAX_SURFACES) {
1999                 return -1;
2000         }
2001         virt_surface_index = i;
2002
2003         /* try to reuse an existing surface */
2004         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2005                 /* extend before */
2006                 if ((dev_priv->surfaces[i].refcount == 1) &&
2007                     (new->flags == dev_priv->surfaces[i].flags) &&
2008                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2009                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2010                         s->surface_index = i;
2011                         s->lower = new_lower;
2012                         s->upper = new_upper;
2013                         s->flags = new->flags;
2014                         s->file_priv = file_priv;
2015                         dev_priv->surfaces[i].refcount++;
2016                         dev_priv->surfaces[i].lower = s->lower;
2017                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2018                         return virt_surface_index;
2019                 }
2020
2021                 /* extend after */
2022                 if ((dev_priv->surfaces[i].refcount == 1) &&
2023                     (new->flags == dev_priv->surfaces[i].flags) &&
2024                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2025                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2026                         s->surface_index = i;
2027                         s->lower = new_lower;
2028                         s->upper = new_upper;
2029                         s->flags = new->flags;
2030                         s->file_priv = file_priv;
2031                         dev_priv->surfaces[i].refcount++;
2032                         dev_priv->surfaces[i].upper = s->upper;
2033                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2034                         return virt_surface_index;
2035                 }
2036         }
2037
2038         /* okay, we need a new one */
2039         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2040                 if (dev_priv->surfaces[i].refcount == 0) {
2041                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2042                         s->surface_index = i;
2043                         s->lower = new_lower;
2044                         s->upper = new_upper;
2045                         s->flags = new->flags;
2046                         s->file_priv = file_priv;
2047                         dev_priv->surfaces[i].refcount = 1;
2048                         dev_priv->surfaces[i].lower = s->lower;
2049                         dev_priv->surfaces[i].upper = s->upper;
2050                         dev_priv->surfaces[i].flags = s->flags;
2051                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2052                         return virt_surface_index;
2053                 }
2054         }
2055
2056         /* we didn't find anything */
2057         return -1;
2058 }
2059
2060 static int free_surface(struct drm_file *file_priv,
2061                         drm_radeon_private_t * dev_priv,
2062                         int lower)
2063 {
2064         struct radeon_virt_surface *s;
2065         int i;
2066         /* find the virtual surface */
2067         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2068                 s = &(dev_priv->virt_surfaces[i]);
2069                 if (s->file_priv) {
2070                         if ((lower == s->lower) && (file_priv == s->file_priv))
2071                         {
2072                                 if (dev_priv->surfaces[s->surface_index].
2073                                     lower == s->lower)
2074                                         dev_priv->surfaces[s->surface_index].
2075                                             lower = s->upper;
2076
2077                                 if (dev_priv->surfaces[s->surface_index].
2078                                     upper == s->upper)
2079                                         dev_priv->surfaces[s->surface_index].
2080                                             upper = s->lower;
2081
2082                                 dev_priv->surfaces[s->surface_index].refcount--;
2083                                 if (dev_priv->surfaces[s->surface_index].
2084                                     refcount == 0)
2085                                         dev_priv->surfaces[s->surface_index].
2086                                             flags = 0;
2087                                 s->file_priv = NULL;
2088                                 radeon_apply_surface_regs(s->surface_index,
2089                                                           dev_priv);
2090                                 return 0;
2091                         }
2092                 }
2093         }
2094         return 1;
2095 }
2096
2097 static void radeon_surfaces_release(struct drm_file *file_priv,
2098                                     drm_radeon_private_t * dev_priv)
2099 {
2100         int i;
2101         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2102                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2103                         free_surface(file_priv, dev_priv,
2104                                      dev_priv->virt_surfaces[i].lower);
2105         }
2106 }
2107
2108 /* ================================================================
2109  * IOCTL functions
2110  */
2111 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2112 {
2113         drm_radeon_private_t *dev_priv = dev->dev_private;
2114         drm_radeon_surface_alloc_t *alloc = data;
2115
2116         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2117                 return -EINVAL;
2118         else
2119                 return 0;
2120 }
2121
2122 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2123 {
2124         drm_radeon_private_t *dev_priv = dev->dev_private;
2125         drm_radeon_surface_free_t *memfree = data;
2126
2127         if (free_surface(file_priv, dev_priv, memfree->address))
2128                 return -EINVAL;
2129         else
2130                 return 0;
2131 }
2132
2133 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2134 {
2135         drm_radeon_private_t *dev_priv = dev->dev_private;
2136         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2137         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2138         drm_radeon_clear_t *clear = data;
2139         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2140         DRM_DEBUG("\n");
2141
2142         LOCK_TEST_WITH_RETURN(dev, file_priv);
2143
2144         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2145
2146         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2147                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2148
2149         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2150                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2151                 return -EFAULT;
2152
2153         radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2154
2155         COMMIT_RING();
2156         return 0;
2157 }
2158
2159 /* Not sure why this isn't set all the time:
2160  */
2161 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2162 {
2163         drm_radeon_private_t *dev_priv = dev->dev_private;
2164         struct drm_radeon_master_private *master_priv = master->driver_priv;
2165         RING_LOCALS;
2166
2167         DRM_DEBUG("\n");
2168
2169         BEGIN_RING(6);
2170         RADEON_WAIT_UNTIL_3D_IDLE();
2171         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2172         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2173                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2174         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2175         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2176                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2177         ADVANCE_RING();
2178
2179         dev_priv->page_flipping = 1;
2180
2181         if (master_priv->sarea_priv->pfCurrentPage != 1)
2182                 master_priv->sarea_priv->pfCurrentPage = 0;
2183
2184         return 0;
2185 }
2186
2187 /* Swapping and flipping are different operations, need different ioctls.
2188  * They can & should be intermixed to support multiple 3d windows.
2189  */
2190 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2191 {
2192         drm_radeon_private_t *dev_priv = dev->dev_private;
2193         DRM_DEBUG("\n");
2194
2195         LOCK_TEST_WITH_RETURN(dev, file_priv);
2196
2197         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2198
2199         if (!dev_priv->page_flipping)
2200                 radeon_do_init_pageflip(dev, file_priv->master);
2201
2202         radeon_cp_dispatch_flip(dev, file_priv->master);
2203
2204         COMMIT_RING();
2205         return 0;
2206 }
2207
2208 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2209 {
2210         drm_radeon_private_t *dev_priv = dev->dev_private;
2211         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2212         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2213
2214         DRM_DEBUG("\n");
2215
2216         LOCK_TEST_WITH_RETURN(dev, file_priv);
2217
2218         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2219
2220         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2221                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2222
2223         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2224                 r600_cp_dispatch_swap(dev, file_priv);
2225         else
2226                 radeon_cp_dispatch_swap(dev, file_priv->master);
2227         sarea_priv->ctx_owner = 0;
2228
2229         COMMIT_RING();
2230         return 0;
2231 }
2232
2233 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2234 {
2235         drm_radeon_private_t *dev_priv = dev->dev_private;
2236         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2237         drm_radeon_sarea_t *sarea_priv;
2238         struct drm_device_dma *dma = dev->dma;
2239         struct drm_buf *buf;
2240         drm_radeon_vertex_t *vertex = data;
2241         drm_radeon_tcl_prim_t prim;
2242
2243         LOCK_TEST_WITH_RETURN(dev, file_priv);
2244
2245         sarea_priv = master_priv->sarea_priv;
2246
2247         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2248                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2249
2250         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2251                 DRM_ERROR("buffer index %d (of %d max)\n",
2252                           vertex->idx, dma->buf_count - 1);
2253                 return -EINVAL;
2254         }
2255         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2256                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2257                 return -EINVAL;
2258         }
2259
2260         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2261         VB_AGE_TEST_WITH_RETURN(dev_priv);
2262
2263         buf = dma->buflist[vertex->idx];
2264
2265         if (buf->file_priv != file_priv) {
2266                 DRM_ERROR("process %d using buffer owned by %p\n",
2267                           DRM_CURRENTPID, buf->file_priv);
2268                 return -EINVAL;
2269         }
2270         if (buf->pending) {
2271                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2272                 return -EINVAL;
2273         }
2274
2275         /* Build up a prim_t record:
2276          */
2277         if (vertex->count) {
2278                 buf->used = vertex->count;      /* not used? */
2279
2280                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2281                         if (radeon_emit_state(dev_priv, file_priv,
2282                                               &sarea_priv->context_state,
2283                                               sarea_priv->tex_state,
2284                                               sarea_priv->dirty)) {
2285                                 DRM_ERROR("radeon_emit_state failed\n");
2286                                 return -EINVAL;
2287                         }
2288
2289                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2290                                                RADEON_UPLOAD_TEX1IMAGES |
2291                                                RADEON_UPLOAD_TEX2IMAGES |
2292                                                RADEON_REQUIRE_QUIESCENCE);
2293                 }
2294
2295                 prim.start = 0;
2296                 prim.finish = vertex->count;    /* unused */
2297                 prim.prim = vertex->prim;
2298                 prim.numverts = vertex->count;
2299                 prim.vc_format = sarea_priv->vc_format;
2300
2301                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2302         }
2303
2304         if (vertex->discard) {
2305                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2306         }
2307
2308         COMMIT_RING();
2309         return 0;
2310 }
2311
2312 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2313 {
2314         drm_radeon_private_t *dev_priv = dev->dev_private;
2315         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2316         drm_radeon_sarea_t *sarea_priv;
2317         struct drm_device_dma *dma = dev->dma;
2318         struct drm_buf *buf;
2319         drm_radeon_indices_t *elts = data;
2320         drm_radeon_tcl_prim_t prim;
2321         int count;
2322
2323         LOCK_TEST_WITH_RETURN(dev, file_priv);
2324
2325         sarea_priv = master_priv->sarea_priv;
2326
2327         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2328                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2329                   elts->discard);
2330
2331         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2332                 DRM_ERROR("buffer index %d (of %d max)\n",
2333                           elts->idx, dma->buf_count - 1);
2334                 return -EINVAL;
2335         }
2336         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2337                 DRM_ERROR("buffer prim %d\n", elts->prim);
2338                 return -EINVAL;
2339         }
2340
2341         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2342         VB_AGE_TEST_WITH_RETURN(dev_priv);
2343
2344         buf = dma->buflist[elts->idx];
2345
2346         if (buf->file_priv != file_priv) {
2347                 DRM_ERROR("process %d using buffer owned by %p\n",
2348                           DRM_CURRENTPID, buf->file_priv);
2349                 return -EINVAL;
2350         }
2351         if (buf->pending) {
2352                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2353                 return -EINVAL;
2354         }
2355
2356         count = (elts->end - elts->start) / sizeof(u16);
2357         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2358
2359         if (elts->start & 0x7) {
2360                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2361                 return -EINVAL;
2362         }
2363         if (elts->start < buf->used) {
2364                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2365                 return -EINVAL;
2366         }
2367
2368         buf->used = elts->end;
2369
2370         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2371                 if (radeon_emit_state(dev_priv, file_priv,
2372                                       &sarea_priv->context_state,
2373                                       sarea_priv->tex_state,
2374                                       sarea_priv->dirty)) {
2375                         DRM_ERROR("radeon_emit_state failed\n");
2376                         return -EINVAL;
2377                 }
2378
2379                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2380                                        RADEON_UPLOAD_TEX1IMAGES |
2381                                        RADEON_UPLOAD_TEX2IMAGES |
2382                                        RADEON_REQUIRE_QUIESCENCE);
2383         }
2384
2385         /* Build up a prim_t record:
2386          */
2387         prim.start = elts->start;
2388         prim.finish = elts->end;
2389         prim.prim = elts->prim;
2390         prim.offset = 0;        /* offset from start of dma buffers */
2391         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2392         prim.vc_format = sarea_priv->vc_format;
2393
2394         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2395         if (elts->discard) {
2396                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2397         }
2398
2399         COMMIT_RING();
2400         return 0;
2401 }
2402
2403 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2404 {
2405         drm_radeon_private_t *dev_priv = dev->dev_private;
2406         drm_radeon_texture_t *tex = data;
2407         drm_radeon_tex_image_t image;
2408         int ret;
2409
2410         LOCK_TEST_WITH_RETURN(dev, file_priv);
2411
2412         if (tex->image == NULL) {
2413                 DRM_ERROR("null texture image!\n");
2414                 return -EINVAL;
2415         }
2416
2417         if (DRM_COPY_FROM_USER(&image,
2418                                (drm_radeon_tex_image_t __user *) tex->image,
2419                                sizeof(image)))
2420                 return -EFAULT;
2421
2422         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2423         VB_AGE_TEST_WITH_RETURN(dev_priv);
2424
2425         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2426                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2427         else
2428                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2429
2430         return ret;
2431 }
2432
2433 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2434 {
2435         drm_radeon_private_t *dev_priv = dev->dev_private;
2436         drm_radeon_stipple_t *stipple = data;
2437         u32 mask[32];
2438
2439         LOCK_TEST_WITH_RETURN(dev, file_priv);
2440
2441         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2442                 return -EFAULT;
2443
2444         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2445
2446         radeon_cp_dispatch_stipple(dev, mask);
2447
2448         COMMIT_RING();
2449         return 0;
2450 }
2451
2452 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2453 {
2454         drm_radeon_private_t *dev_priv = dev->dev_private;
2455         struct drm_device_dma *dma = dev->dma;
2456         struct drm_buf *buf;
2457         drm_radeon_indirect_t *indirect = data;
2458         RING_LOCALS;
2459
2460         LOCK_TEST_WITH_RETURN(dev, file_priv);
2461
2462         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2463                   indirect->idx, indirect->start, indirect->end,
2464                   indirect->discard);
2465
2466         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2467                 DRM_ERROR("buffer index %d (of %d max)\n",
2468                           indirect->idx, dma->buf_count - 1);
2469                 return -EINVAL;
2470         }
2471
2472         buf = dma->buflist[indirect->idx];
2473
2474         if (buf->file_priv != file_priv) {
2475                 DRM_ERROR("process %d using buffer owned by %p\n",
2476                           DRM_CURRENTPID, buf->file_priv);
2477                 return -EINVAL;
2478         }
2479         if (buf->pending) {
2480                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2481                 return -EINVAL;
2482         }
2483
2484         if (indirect->start < buf->used) {
2485                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2486                           indirect->start, buf->used);
2487                 return -EINVAL;
2488         }
2489
2490         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2491         VB_AGE_TEST_WITH_RETURN(dev_priv);
2492
2493         buf->used = indirect->end;
2494
2495         /* Dispatch the indirect buffer full of commands from the
2496          * X server.  This is insecure and is thus only available to
2497          * privileged clients.
2498          */
2499         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2500                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2501         else {
2502                 /* Wait for the 3D stream to idle before the indirect buffer
2503                  * containing 2D acceleration commands is processed.
2504                  */
2505                 BEGIN_RING(2);
2506                 RADEON_WAIT_UNTIL_3D_IDLE();
2507                 ADVANCE_RING();
2508                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2509         }
2510
2511         if (indirect->discard) {
2512                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2513         }
2514
2515         COMMIT_RING();
2516         return 0;
2517 }
2518
2519 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2520 {
2521         drm_radeon_private_t *dev_priv = dev->dev_private;
2522         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2523         drm_radeon_sarea_t *sarea_priv;
2524         struct drm_device_dma *dma = dev->dma;
2525         struct drm_buf *buf;
2526         drm_radeon_vertex2_t *vertex = data;
2527         int i;
2528         unsigned char laststate;
2529
2530         LOCK_TEST_WITH_RETURN(dev, file_priv);
2531
2532         sarea_priv = master_priv->sarea_priv;
2533
2534         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2535                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2536
2537         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2538                 DRM_ERROR("buffer index %d (of %d max)\n",
2539                           vertex->idx, dma->buf_count - 1);
2540                 return -EINVAL;
2541         }
2542
2543         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2544         VB_AGE_TEST_WITH_RETURN(dev_priv);
2545
2546         buf = dma->buflist[vertex->idx];
2547
2548         if (buf->file_priv != file_priv) {
2549                 DRM_ERROR("process %d using buffer owned by %p\n",
2550                           DRM_CURRENTPID, buf->file_priv);
2551                 return -EINVAL;
2552         }
2553
2554         if (buf->pending) {
2555                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2556                 return -EINVAL;
2557         }
2558
2559         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2560                 return -EINVAL;
2561
2562         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2563                 drm_radeon_prim_t prim;
2564                 drm_radeon_tcl_prim_t tclprim;
2565
2566                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2567                         return -EFAULT;
2568
2569                 if (prim.stateidx != laststate) {
2570                         drm_radeon_state_t state;
2571
2572                         if (DRM_COPY_FROM_USER(&state,
2573                                                &vertex->state[prim.stateidx],
2574                                                sizeof(state)))
2575                                 return -EFAULT;
2576
2577                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2578                                 DRM_ERROR("radeon_emit_state2 failed\n");
2579                                 return -EINVAL;
2580                         }
2581
2582                         laststate = prim.stateidx;
2583                 }
2584
2585                 tclprim.start = prim.start;
2586                 tclprim.finish = prim.finish;
2587                 tclprim.prim = prim.prim;
2588                 tclprim.vc_format = prim.vc_format;
2589
2590                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2591                         tclprim.offset = prim.numverts * 64;
2592                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2593
2594                         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2595                 } else {
2596                         tclprim.numverts = prim.numverts;
2597                         tclprim.offset = 0;     /* not used */
2598
2599                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2600                 }
2601
2602                 if (sarea_priv->nbox == 1)
2603                         sarea_priv->nbox = 0;
2604         }
2605
2606         if (vertex->discard) {
2607                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2608         }
2609
2610         COMMIT_RING();
2611         return 0;
2612 }
2613
2614 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2615                                struct drm_file *file_priv,
2616                                drm_radeon_cmd_header_t header,
2617                                drm_radeon_kcmd_buffer_t *cmdbuf)
2618 {
2619         int id = (int)header.packet.packet_id;
2620         int sz, reg;
2621         int *data = (int *)cmdbuf->buf;
2622         RING_LOCALS;
2623
2624         if (id >= RADEON_MAX_STATE_PACKETS)
2625                 return -EINVAL;
2626
2627         sz = packet[id].len;
2628         reg = packet[id].start;
2629
2630         if (sz * sizeof(int) > cmdbuf->bufsz) {
2631                 DRM_ERROR("Packet size provided larger than data provided\n");
2632                 return -EINVAL;
2633         }
2634
2635         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2636                 DRM_ERROR("Packet verification failed\n");
2637                 return -EINVAL;
2638         }
2639
2640         BEGIN_RING(sz + 1);
2641         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2642         OUT_RING_TABLE(data, sz);
2643         ADVANCE_RING();
2644
2645         cmdbuf->buf += sz * sizeof(int);
2646         cmdbuf->bufsz -= sz * sizeof(int);
2647         return 0;
2648 }
2649
2650 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2651                                           drm_radeon_cmd_header_t header,
2652                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2653 {
2654         int sz = header.scalars.count;
2655         int start = header.scalars.offset;
2656         int stride = header.scalars.stride;
2657         RING_LOCALS;
2658
2659         BEGIN_RING(3 + sz);
2660         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2661         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2662         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2663         OUT_RING_TABLE(cmdbuf->buf, sz);
2664         ADVANCE_RING();
2665         cmdbuf->buf += sz * sizeof(int);
2666         cmdbuf->bufsz -= sz * sizeof(int);
2667         return 0;
2668 }
2669
2670 /* God this is ugly
2671  */
2672 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2673                                            drm_radeon_cmd_header_t header,
2674                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2675 {
2676         int sz = header.scalars.count;
2677         int start = ((unsigned int)header.scalars.offset) + 0x100;
2678         int stride = header.scalars.stride;
2679         RING_LOCALS;
2680
2681         BEGIN_RING(3 + sz);
2682         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2683         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2684         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2685         OUT_RING_TABLE(cmdbuf->buf, sz);
2686         ADVANCE_RING();
2687         cmdbuf->buf += sz * sizeof(int);
2688         cmdbuf->bufsz -= sz * sizeof(int);
2689         return 0;
2690 }
2691
2692 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2693                                           drm_radeon_cmd_header_t header,
2694                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2695 {
2696         int sz = header.vectors.count;
2697         int start = header.vectors.offset;
2698         int stride = header.vectors.stride;
2699         RING_LOCALS;
2700
2701         BEGIN_RING(5 + sz);
2702         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2703         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2704         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2705         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2706         OUT_RING_TABLE(cmdbuf->buf, sz);
2707         ADVANCE_RING();
2708
2709         cmdbuf->buf += sz * sizeof(int);
2710         cmdbuf->bufsz -= sz * sizeof(int);
2711         return 0;
2712 }
2713
2714 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2715                                           drm_radeon_cmd_header_t header,
2716                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2717 {
2718         int sz = header.veclinear.count * 4;
2719         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2720         RING_LOCALS;
2721
2722         if (!sz)
2723                 return 0;
2724         if (sz * 4 > cmdbuf->bufsz)
2725                 return -EINVAL;
2726
2727         BEGIN_RING(5 + sz);
2728         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2729         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2730         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2731         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2732         OUT_RING_TABLE(cmdbuf->buf, sz);
2733         ADVANCE_RING();
2734
2735         cmdbuf->buf += sz * sizeof(int);
2736         cmdbuf->bufsz -= sz * sizeof(int);
2737         return 0;
2738 }
2739
2740 static int radeon_emit_packet3(struct drm_device * dev,
2741                                struct drm_file *file_priv,
2742                                drm_radeon_kcmd_buffer_t *cmdbuf)
2743 {
2744         drm_radeon_private_t *dev_priv = dev->dev_private;
2745         unsigned int cmdsz;
2746         int ret;
2747         RING_LOCALS;
2748
2749         DRM_DEBUG("\n");
2750
2751         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2752                                                   cmdbuf, &cmdsz))) {
2753                 DRM_ERROR("Packet verification failed\n");
2754                 return ret;
2755         }
2756
2757         BEGIN_RING(cmdsz);
2758         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2759         ADVANCE_RING();
2760
2761         cmdbuf->buf += cmdsz * 4;
2762         cmdbuf->bufsz -= cmdsz * 4;
2763         return 0;
2764 }
2765
2766 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2767                                         struct drm_file *file_priv,
2768                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2769                                         int orig_nbox)
2770 {
2771         drm_radeon_private_t *dev_priv = dev->dev_private;
2772         struct drm_clip_rect box;
2773         unsigned int cmdsz;
2774         int ret;
2775         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2776         int i = 0;
2777         RING_LOCALS;
2778
2779         DRM_DEBUG("\n");
2780
2781         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2782                                                   cmdbuf, &cmdsz))) {
2783                 DRM_ERROR("Packet verification failed\n");
2784                 return ret;
2785         }
2786
2787         if (!orig_nbox)
2788                 goto out;
2789
2790         do {
2791                 if (i < cmdbuf->nbox) {
2792                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2793                                 return -EFAULT;
2794                         /* FIXME The second and subsequent times round
2795                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2796                          * calling emit_clip_rect(). This fixes a
2797                          * lockup on fast machines when sending
2798                          * several cliprects with a cmdbuf, as when
2799                          * waving a 2D window over a 3D
2800                          * window. Something in the commands from user
2801                          * space seems to hang the card when they're
2802                          * sent several times in a row. That would be
2803                          * the correct place to fix it but this works
2804                          * around it until I can figure that out - Tim
2805                          * Smith */
2806                         if (i) {
2807                                 BEGIN_RING(2);
2808                                 RADEON_WAIT_UNTIL_3D_IDLE();
2809                                 ADVANCE_RING();
2810                         }
2811                         radeon_emit_clip_rect(dev_priv, &box);
2812                 }
2813
2814                 BEGIN_RING(cmdsz);
2815                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2816                 ADVANCE_RING();
2817
2818         } while (++i < cmdbuf->nbox);
2819         if (cmdbuf->nbox == 1)
2820                 cmdbuf->nbox = 0;
2821
2822       out:
2823         cmdbuf->buf += cmdsz * 4;
2824         cmdbuf->bufsz -= cmdsz * 4;
2825         return 0;
2826 }
2827
2828 static int radeon_emit_wait(struct drm_device * dev, int flags)
2829 {
2830         drm_radeon_private_t *dev_priv = dev->dev_private;
2831         RING_LOCALS;
2832
2833         DRM_DEBUG("%x\n", flags);
2834         switch (flags) {
2835         case RADEON_WAIT_2D:
2836                 BEGIN_RING(2);
2837                 RADEON_WAIT_UNTIL_2D_IDLE();
2838                 ADVANCE_RING();
2839                 break;
2840         case RADEON_WAIT_3D:
2841                 BEGIN_RING(2);
2842                 RADEON_WAIT_UNTIL_3D_IDLE();
2843                 ADVANCE_RING();
2844                 break;
2845         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2846                 BEGIN_RING(2);
2847                 RADEON_WAIT_UNTIL_IDLE();
2848                 ADVANCE_RING();
2849                 break;
2850         default:
2851                 return -EINVAL;
2852         }
2853
2854         return 0;
2855 }
2856
2857 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2858 {
2859         drm_radeon_private_t *dev_priv = dev->dev_private;
2860         struct drm_device_dma *dma = dev->dma;
2861         struct drm_buf *buf = NULL;
2862         int idx;
2863         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2864         drm_radeon_cmd_header_t header;
2865         int orig_nbox, orig_bufsz;
2866         char *kbuf = NULL;
2867
2868         LOCK_TEST_WITH_RETURN(dev, file_priv);
2869
2870         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2871         VB_AGE_TEST_WITH_RETURN(dev_priv);
2872
2873         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2874                 return -EINVAL;
2875         }
2876
2877         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2878          * races between checking values and using those values in other code,
2879          * and simply to avoid a lot of function calls to copy in data.
2880          */
2881         orig_bufsz = cmdbuf->bufsz;
2882         if (orig_bufsz != 0) {
2883                 kbuf = kmalloc(cmdbuf->bufsz, GFP_KERNEL);
2884                 if (kbuf == NULL)
2885                         return -ENOMEM;
2886                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2887                                        cmdbuf->bufsz)) {
2888                         kfree(kbuf);
2889                         return -EFAULT;
2890                 }
2891                 cmdbuf->buf = kbuf;
2892         }
2893
2894         orig_nbox = cmdbuf->nbox;
2895
2896         if (dev_priv->microcode_version == UCODE_R300) {
2897                 int temp;
2898                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2899
2900                 if (orig_bufsz != 0)
2901                         kfree(kbuf);
2902
2903                 return temp;
2904         }
2905
2906         /* microcode_version != r300 */
2907         while (cmdbuf->bufsz >= sizeof(header)) {
2908
2909                 header.i = *(int *)cmdbuf->buf;
2910                 cmdbuf->buf += sizeof(header);
2911                 cmdbuf->bufsz -= sizeof(header);
2912
2913                 switch (header.header.cmd_type) {
2914                 case RADEON_CMD_PACKET:
2915                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2916                         if (radeon_emit_packets
2917                             (dev_priv, file_priv, header, cmdbuf)) {
2918                                 DRM_ERROR("radeon_emit_packets failed\n");
2919                                 goto err;
2920                         }
2921                         break;
2922
2923                 case RADEON_CMD_SCALARS:
2924                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2925                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2926                                 DRM_ERROR("radeon_emit_scalars failed\n");
2927                                 goto err;
2928                         }
2929                         break;
2930
2931                 case RADEON_CMD_VECTORS:
2932                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2933                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2934                                 DRM_ERROR("radeon_emit_vectors failed\n");
2935                                 goto err;
2936                         }
2937                         break;
2938
2939                 case RADEON_CMD_DMA_DISCARD:
2940                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2941                         idx = header.dma.buf_idx;
2942                         if (idx < 0 || idx >= dma->buf_count) {
2943                                 DRM_ERROR("buffer index %d (of %d max)\n",
2944                                           idx, dma->buf_count - 1);
2945                                 goto err;
2946                         }
2947
2948                         buf = dma->buflist[idx];
2949                         if (buf->file_priv != file_priv || buf->pending) {
2950                                 DRM_ERROR("bad buffer %p %p %d\n",
2951                                           buf->file_priv, file_priv,
2952                                           buf->pending);
2953                                 goto err;
2954                         }
2955
2956                         radeon_cp_discard_buffer(dev, file_priv->master, buf);
2957                         break;
2958
2959                 case RADEON_CMD_PACKET3:
2960                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2961                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2962                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2963                                 goto err;
2964                         }
2965                         break;
2966
2967                 case RADEON_CMD_PACKET3_CLIP:
2968                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2969                         if (radeon_emit_packet3_cliprect
2970                             (dev, file_priv, cmdbuf, orig_nbox)) {
2971                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2972                                 goto err;
2973                         }
2974                         break;
2975
2976                 case RADEON_CMD_SCALARS2:
2977                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2978                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2979                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2980                                 goto err;
2981                         }
2982                         break;
2983
2984                 case RADEON_CMD_WAIT:
2985                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2986                         if (radeon_emit_wait(dev, header.wait.flags)) {
2987                                 DRM_ERROR("radeon_emit_wait failed\n");
2988                                 goto err;
2989                         }
2990                         break;
2991                 case RADEON_CMD_VECLINEAR:
2992                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2993                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2994                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2995                                 goto err;
2996                         }
2997                         break;
2998
2999                 default:
3000                         DRM_ERROR("bad cmd_type %d at %p\n",
3001                                   header.header.cmd_type,
3002                                   cmdbuf->buf - sizeof(header));
3003                         goto err;
3004                 }
3005         }
3006
3007         if (orig_bufsz != 0)
3008                 kfree(kbuf);
3009
3010         DRM_DEBUG("DONE\n");
3011         COMMIT_RING();
3012         return 0;
3013
3014       err:
3015         if (orig_bufsz != 0)
3016                 kfree(kbuf);
3017         return -EINVAL;
3018 }
3019
3020 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3021 {
3022         drm_radeon_private_t *dev_priv = dev->dev_private;
3023         drm_radeon_getparam_t *param = data;
3024         int value;
3025
3026         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3027
3028         switch (param->param) {
3029         case RADEON_PARAM_GART_BUFFER_OFFSET:
3030                 value = dev_priv->gart_buffers_offset;
3031                 break;
3032         case RADEON_PARAM_LAST_FRAME:
3033                 dev_priv->stats.last_frame_reads++;
3034                 value = GET_SCRATCH(dev_priv, 0);
3035                 break;
3036         case RADEON_PARAM_LAST_DISPATCH:
3037                 value = GET_SCRATCH(dev_priv, 1);
3038                 break;
3039         case RADEON_PARAM_LAST_CLEAR:
3040                 dev_priv->stats.last_clear_reads++;
3041                 value = GET_SCRATCH(dev_priv, 2);
3042                 break;
3043         case RADEON_PARAM_IRQ_NR:
3044                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3045                         value = 0;
3046                 else
3047                         value = drm_dev_to_irq(dev);
3048                 break;
3049         case RADEON_PARAM_GART_BASE:
3050                 value = dev_priv->gart_vm_start;
3051                 break;
3052         case RADEON_PARAM_REGISTER_HANDLE:
3053                 value = dev_priv->mmio->offset;
3054                 break;
3055         case RADEON_PARAM_STATUS_HANDLE:
3056                 value = dev_priv->ring_rptr_offset;
3057                 break;
3058 #if BITS_PER_LONG == 32
3059                 /*
3060                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3061                  * pointer which can't fit into an int-sized variable.  According to
3062                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3063                  * not supporting it shouldn't be a problem.  If the same functionality
3064                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3065                  * so backwards-compatibility for the embedded platforms can be
3066                  * maintained.  --davidm 4-Feb-2004.
3067                  */
3068         case RADEON_PARAM_SAREA_HANDLE:
3069                 /* The lock is the first dword in the sarea. */
3070                 /* no users of this parameter */
3071                 break;
3072 #endif
3073         case RADEON_PARAM_GART_TEX_HANDLE:
3074                 value = dev_priv->gart_textures_offset;
3075                 break;
3076         case RADEON_PARAM_SCRATCH_OFFSET:
3077                 if (!dev_priv->writeback_works)
3078                         return -EINVAL;
3079                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3080                         value = R600_SCRATCH_REG_OFFSET;
3081                 else
3082                         value = RADEON_SCRATCH_REG_OFFSET;
3083                 break;
3084         case RADEON_PARAM_CARD_TYPE:
3085                 if (dev_priv->flags & RADEON_IS_PCIE)
3086                         value = RADEON_CARD_PCIE;
3087                 else if (dev_priv->flags & RADEON_IS_AGP)
3088                         value = RADEON_CARD_AGP;
3089                 else
3090                         value = RADEON_CARD_PCI;
3091                 break;
3092         case RADEON_PARAM_VBLANK_CRTC:
3093                 value = radeon_vblank_crtc_get(dev);
3094                 break;
3095         case RADEON_PARAM_FB_LOCATION:
3096                 value = radeon_read_fb_location(dev_priv);
3097                 break;
3098         case RADEON_PARAM_NUM_GB_PIPES:
3099                 value = dev_priv->num_gb_pipes;
3100                 break;
3101         case RADEON_PARAM_NUM_Z_PIPES:
3102                 value = dev_priv->num_z_pipes;
3103                 break;
3104         default:
3105                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3106                 return -EINVAL;
3107         }
3108
3109         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3110                 DRM_ERROR("copy_to_user\n");
3111                 return -EFAULT;
3112         }
3113
3114         return 0;
3115 }
3116
3117 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3118 {
3119         drm_radeon_private_t *dev_priv = dev->dev_private;
3120         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3121         drm_radeon_setparam_t *sp = data;
3122         struct drm_radeon_driver_file_fields *radeon_priv;
3123
3124         switch (sp->param) {
3125         case RADEON_SETPARAM_FB_LOCATION:
3126                 radeon_priv = file_priv->driver_priv;
3127                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3128                     sp->value;
3129                 break;
3130         case RADEON_SETPARAM_SWITCH_TILING:
3131                 if (sp->value == 0) {
3132                         DRM_DEBUG("color tiling disabled\n");
3133                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3134                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3135                         if (master_priv->sarea_priv)
3136                                 master_priv->sarea_priv->tiling_enabled = 0;
3137                 } else if (sp->value == 1) {
3138                         DRM_DEBUG("color tiling enabled\n");
3139                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3140                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3141                         if (master_priv->sarea_priv)
3142                                 master_priv->sarea_priv->tiling_enabled = 1;
3143                 }
3144                 break;
3145         case RADEON_SETPARAM_PCIGART_LOCATION:
3146                 dev_priv->pcigart_offset = sp->value;
3147                 dev_priv->pcigart_offset_set = 1;
3148                 break;
3149         case RADEON_SETPARAM_NEW_MEMMAP:
3150                 dev_priv->new_memmap = sp->value;
3151                 break;
3152         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3153                 dev_priv->gart_info.table_size = sp->value;
3154                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3155                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3156                 break;
3157         case RADEON_SETPARAM_VBLANK_CRTC:
3158                 return radeon_vblank_crtc_set(dev, sp->value);
3159                 break;
3160         default:
3161                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3162                 return -EINVAL;
3163         }
3164
3165         return 0;
3166 }
3167
3168 /* When a client dies:
3169  *    - Check for and clean up flipped page state
3170  *    - Free any alloced GART memory.
3171  *    - Free any alloced radeon surfaces.
3172  *
3173  * DRM infrastructure takes care of reclaiming dma buffers.
3174  */
3175 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3176 {
3177         if (dev->dev_private) {
3178                 drm_radeon_private_t *dev_priv = dev->dev_private;
3179                 dev_priv->page_flipping = 0;
3180                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3181                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3182                 radeon_surfaces_release(file_priv, dev_priv);
3183         }
3184 }
3185
3186 void radeon_driver_lastclose(struct drm_device *dev)
3187 {
3188         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3189         radeon_do_release(dev);
3190 }
3191
3192 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3193 {
3194         drm_radeon_private_t *dev_priv = dev->dev_private;
3195         struct drm_radeon_driver_file_fields *radeon_priv;
3196
3197         DRM_DEBUG("\n");
3198         radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
3199
3200         if (!radeon_priv)
3201                 return -ENOMEM;
3202
3203         file_priv->driver_priv = radeon_priv;
3204
3205         if (dev_priv)
3206                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3207         else
3208                 radeon_priv->radeon_fb_delta = 0;
3209         return 0;
3210 }
3211
3212 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3213 {
3214         struct drm_radeon_driver_file_fields *radeon_priv =
3215             file_priv->driver_priv;
3216
3217         kfree(radeon_priv);
3218 }
3219
3220 struct drm_ioctl_desc radeon_ioctls[] = {
3221         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3222         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3223         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3224         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3225         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3226         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3227         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3228         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3229         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3230         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3231         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3232         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3233         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3234         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3235         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3236         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3237         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3238         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3239         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3240         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3241         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3242         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3243         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3244         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3245         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3246         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3247         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3248         DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3249 };
3250
3251 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);