]> git.karo-electronics.de Git - linux-beck.git/blob - drivers/char/drm/radeon_state.c
2eae60bd340fa11f3b28919b054b19298eb8a8c0
[linux-beck.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 * offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         if (off >= dev_priv->fb_location &&
49             off < (dev_priv->gart_vm_start + dev_priv->gart_size))
50                 return 0;
51
52         radeon_priv = filp_priv->driver_priv;
53         off += radeon_priv->radeon_fb_delta;
54
55         DRM_DEBUG("offset fixed up to 0x%x\n", off);
56
57         if (off < dev_priv->fb_location ||
58             off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
59                 return DRM_ERR(EINVAL);
60
61         *offset = off;
62
63         return 0;
64 }
65
66 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
67                                                      dev_priv,
68                                                      drm_file_t * filp_priv,
69                                                      int id, u32 __user * data)
70 {
71         switch (id) {
72
73         case RADEON_EMIT_PP_MISC:
74                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
75                                                   &data[(RADEON_RB3D_DEPTHOFFSET
76                                                          -
77                                                          RADEON_PP_MISC) /
78                                                         4])) {
79                         DRM_ERROR("Invalid depth buffer offset\n");
80                         return DRM_ERR(EINVAL);
81                 }
82                 break;
83
84         case RADEON_EMIT_PP_CNTL:
85                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
86                                                   &data[(RADEON_RB3D_COLOROFFSET
87                                                          -
88                                                          RADEON_PP_CNTL) /
89                                                         4])) {
90                         DRM_ERROR("Invalid colour buffer offset\n");
91                         return DRM_ERR(EINVAL);
92                 }
93                 break;
94
95         case R200_EMIT_PP_TXOFFSET_0:
96         case R200_EMIT_PP_TXOFFSET_1:
97         case R200_EMIT_PP_TXOFFSET_2:
98         case R200_EMIT_PP_TXOFFSET_3:
99         case R200_EMIT_PP_TXOFFSET_4:
100         case R200_EMIT_PP_TXOFFSET_5:
101                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
102                                                   &data[0])) {
103                         DRM_ERROR("Invalid R200 texture offset\n");
104                         return DRM_ERR(EINVAL);
105                 }
106                 break;
107
108         case RADEON_EMIT_PP_TXFILTER_0:
109         case RADEON_EMIT_PP_TXFILTER_1:
110         case RADEON_EMIT_PP_TXFILTER_2:
111                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
112                                                   &data[(RADEON_PP_TXOFFSET_0
113                                                          -
114                                                          RADEON_PP_TXFILTER_0) /
115                                                         4])) {
116                         DRM_ERROR("Invalid R100 texture offset\n");
117                         return DRM_ERR(EINVAL);
118                 }
119                 break;
120
121         case R200_EMIT_PP_CUBIC_OFFSETS_0:
122         case R200_EMIT_PP_CUBIC_OFFSETS_1:
123         case R200_EMIT_PP_CUBIC_OFFSETS_2:
124         case R200_EMIT_PP_CUBIC_OFFSETS_3:
125         case R200_EMIT_PP_CUBIC_OFFSETS_4:
126         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
127                         int i;
128                         for (i = 0; i < 5; i++) {
129                                 if (radeon_check_and_fixup_offset
130                                     (dev_priv, filp_priv, &data[i])) {
131                                         DRM_ERROR
132                                             ("Invalid R200 cubic texture offset\n");
133                                         return DRM_ERR(EINVAL);
134                                 }
135                         }
136                         break;
137                 }
138
139         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
140         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
141         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
142                         int i;
143                         for (i = 0; i < 5; i++) {
144                                 if (radeon_check_and_fixup_offset(dev_priv,
145                                                                   filp_priv,
146                                                                   &data[i])) {
147                                         DRM_ERROR
148                                             ("Invalid R100 cubic texture offset\n");
149                                         return DRM_ERR(EINVAL);
150                                 }
151                         }
152                 }
153                 break;
154
155         case RADEON_EMIT_RB3D_COLORPITCH:
156         case RADEON_EMIT_RE_LINE_PATTERN:
157         case RADEON_EMIT_SE_LINE_WIDTH:
158         case RADEON_EMIT_PP_LUM_MATRIX:
159         case RADEON_EMIT_PP_ROT_MATRIX_0:
160         case RADEON_EMIT_RB3D_STENCILREFMASK:
161         case RADEON_EMIT_SE_VPORT_XSCALE:
162         case RADEON_EMIT_SE_CNTL:
163         case RADEON_EMIT_SE_CNTL_STATUS:
164         case RADEON_EMIT_RE_MISC:
165         case RADEON_EMIT_PP_BORDER_COLOR_0:
166         case RADEON_EMIT_PP_BORDER_COLOR_1:
167         case RADEON_EMIT_PP_BORDER_COLOR_2:
168         case RADEON_EMIT_SE_ZBIAS_FACTOR:
169         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
170         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
171         case R200_EMIT_PP_TXCBLEND_0:
172         case R200_EMIT_PP_TXCBLEND_1:
173         case R200_EMIT_PP_TXCBLEND_2:
174         case R200_EMIT_PP_TXCBLEND_3:
175         case R200_EMIT_PP_TXCBLEND_4:
176         case R200_EMIT_PP_TXCBLEND_5:
177         case R200_EMIT_PP_TXCBLEND_6:
178         case R200_EMIT_PP_TXCBLEND_7:
179         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
180         case R200_EMIT_TFACTOR_0:
181         case R200_EMIT_VTX_FMT_0:
182         case R200_EMIT_VAP_CTL:
183         case R200_EMIT_MATRIX_SELECT_0:
184         case R200_EMIT_TEX_PROC_CTL_2:
185         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
186         case R200_EMIT_PP_TXFILTER_0:
187         case R200_EMIT_PP_TXFILTER_1:
188         case R200_EMIT_PP_TXFILTER_2:
189         case R200_EMIT_PP_TXFILTER_3:
190         case R200_EMIT_PP_TXFILTER_4:
191         case R200_EMIT_PP_TXFILTER_5:
192         case R200_EMIT_VTE_CNTL:
193         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
194         case R200_EMIT_PP_TAM_DEBUG3:
195         case R200_EMIT_PP_CNTL_X:
196         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
197         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
198         case R200_EMIT_RE_SCISSOR_TL_0:
199         case R200_EMIT_RE_SCISSOR_TL_1:
200         case R200_EMIT_RE_SCISSOR_TL_2:
201         case R200_EMIT_SE_VAP_CNTL_STATUS:
202         case R200_EMIT_SE_VTX_STATE_CNTL:
203         case R200_EMIT_RE_POINTSIZE:
204         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
205         case R200_EMIT_PP_CUBIC_FACES_0:
206         case R200_EMIT_PP_CUBIC_FACES_1:
207         case R200_EMIT_PP_CUBIC_FACES_2:
208         case R200_EMIT_PP_CUBIC_FACES_3:
209         case R200_EMIT_PP_CUBIC_FACES_4:
210         case R200_EMIT_PP_CUBIC_FACES_5:
211         case RADEON_EMIT_PP_TEX_SIZE_0:
212         case RADEON_EMIT_PP_TEX_SIZE_1:
213         case RADEON_EMIT_PP_TEX_SIZE_2:
214         case R200_EMIT_RB3D_BLENDCOLOR:
215         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
216         case RADEON_EMIT_PP_CUBIC_FACES_0:
217         case RADEON_EMIT_PP_CUBIC_FACES_1:
218         case RADEON_EMIT_PP_CUBIC_FACES_2:
219         case R200_EMIT_PP_TRI_PERF_CNTL:
220         case R200_EMIT_PP_AFS_0:
221         case R200_EMIT_PP_AFS_1:
222         case R200_EMIT_ATF_TFACTOR:
223         case R200_EMIT_PP_TXCTLALL_0:
224         case R200_EMIT_PP_TXCTLALL_1:
225         case R200_EMIT_PP_TXCTLALL_2:
226         case R200_EMIT_PP_TXCTLALL_3:
227         case R200_EMIT_PP_TXCTLALL_4:
228         case R200_EMIT_PP_TXCTLALL_5:
229                 /* These packets don't contain memory offsets */
230                 break;
231
232         default:
233                 DRM_ERROR("Unknown state packet ID %d\n", id);
234                 return DRM_ERR(EINVAL);
235         }
236
237         return 0;
238 }
239
240 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
241                                                      dev_priv,
242                                                      drm_file_t * filp_priv,
243                                                      drm_radeon_cmd_buffer_t *
244                                                      cmdbuf,
245                                                      unsigned int *cmdsz)
246 {
247         u32 *cmd = (u32 *) cmdbuf->buf;
248
249         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
250
251         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
252                 DRM_ERROR("Not a type 3 packet\n");
253                 return DRM_ERR(EINVAL);
254         }
255
256         if (4 * *cmdsz > cmdbuf->bufsz) {
257                 DRM_ERROR("Packet size larger than size of data provided\n");
258                 return DRM_ERR(EINVAL);
259         }
260
261         /* Check client state and fix it up if necessary */
262         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
263                 u32 offset;
264
265                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
266                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
267                         offset = cmd[2] << 10;
268                         if (radeon_check_and_fixup_offset
269                             (dev_priv, filp_priv, &offset)) {
270                                 DRM_ERROR("Invalid first packet offset\n");
271                                 return DRM_ERR(EINVAL);
272                         }
273                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
274                 }
275
276                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
277                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
278                         offset = cmd[3] << 10;
279                         if (radeon_check_and_fixup_offset
280                             (dev_priv, filp_priv, &offset)) {
281                                 DRM_ERROR("Invalid second packet offset\n");
282                                 return DRM_ERR(EINVAL);
283                         }
284                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
285                 }
286         }
287
288         return 0;
289 }
290
291 /* ================================================================
292  * CP hardware state programming functions
293  */
294
295 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
296                                              drm_clip_rect_t * box)
297 {
298         RING_LOCALS;
299
300         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
301                   box->x1, box->y1, box->x2, box->y2);
302
303         BEGIN_RING(4);
304         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
305         OUT_RING((box->y1 << 16) | box->x1);
306         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
307         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
308         ADVANCE_RING();
309 }
310
311 /* Emit 1.1 state
312  */
313 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
314                              drm_file_t * filp_priv,
315                              drm_radeon_context_regs_t * ctx,
316                              drm_radeon_texture_regs_t * tex,
317                              unsigned int dirty)
318 {
319         RING_LOCALS;
320         DRM_DEBUG("dirty=0x%08x\n", dirty);
321
322         if (dirty & RADEON_UPLOAD_CONTEXT) {
323                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
324                                                   &ctx->rb3d_depthoffset)) {
325                         DRM_ERROR("Invalid depth buffer offset\n");
326                         return DRM_ERR(EINVAL);
327                 }
328
329                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
330                                                   &ctx->rb3d_coloroffset)) {
331                         DRM_ERROR("Invalid depth buffer offset\n");
332                         return DRM_ERR(EINVAL);
333                 }
334
335                 BEGIN_RING(14);
336                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
337                 OUT_RING(ctx->pp_misc);
338                 OUT_RING(ctx->pp_fog_color);
339                 OUT_RING(ctx->re_solid_color);
340                 OUT_RING(ctx->rb3d_blendcntl);
341                 OUT_RING(ctx->rb3d_depthoffset);
342                 OUT_RING(ctx->rb3d_depthpitch);
343                 OUT_RING(ctx->rb3d_zstencilcntl);
344                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
345                 OUT_RING(ctx->pp_cntl);
346                 OUT_RING(ctx->rb3d_cntl);
347                 OUT_RING(ctx->rb3d_coloroffset);
348                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
349                 OUT_RING(ctx->rb3d_colorpitch);
350                 ADVANCE_RING();
351         }
352
353         if (dirty & RADEON_UPLOAD_VERTFMT) {
354                 BEGIN_RING(2);
355                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
356                 OUT_RING(ctx->se_coord_fmt);
357                 ADVANCE_RING();
358         }
359
360         if (dirty & RADEON_UPLOAD_LINE) {
361                 BEGIN_RING(5);
362                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
363                 OUT_RING(ctx->re_line_pattern);
364                 OUT_RING(ctx->re_line_state);
365                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
366                 OUT_RING(ctx->se_line_width);
367                 ADVANCE_RING();
368         }
369
370         if (dirty & RADEON_UPLOAD_BUMPMAP) {
371                 BEGIN_RING(5);
372                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
373                 OUT_RING(ctx->pp_lum_matrix);
374                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
375                 OUT_RING(ctx->pp_rot_matrix_0);
376                 OUT_RING(ctx->pp_rot_matrix_1);
377                 ADVANCE_RING();
378         }
379
380         if (dirty & RADEON_UPLOAD_MASKS) {
381                 BEGIN_RING(4);
382                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
383                 OUT_RING(ctx->rb3d_stencilrefmask);
384                 OUT_RING(ctx->rb3d_ropcntl);
385                 OUT_RING(ctx->rb3d_planemask);
386                 ADVANCE_RING();
387         }
388
389         if (dirty & RADEON_UPLOAD_VIEWPORT) {
390                 BEGIN_RING(7);
391                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
392                 OUT_RING(ctx->se_vport_xscale);
393                 OUT_RING(ctx->se_vport_xoffset);
394                 OUT_RING(ctx->se_vport_yscale);
395                 OUT_RING(ctx->se_vport_yoffset);
396                 OUT_RING(ctx->se_vport_zscale);
397                 OUT_RING(ctx->se_vport_zoffset);
398                 ADVANCE_RING();
399         }
400
401         if (dirty & RADEON_UPLOAD_SETUP) {
402                 BEGIN_RING(4);
403                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
404                 OUT_RING(ctx->se_cntl);
405                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
406                 OUT_RING(ctx->se_cntl_status);
407                 ADVANCE_RING();
408         }
409
410         if (dirty & RADEON_UPLOAD_MISC) {
411                 BEGIN_RING(2);
412                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
413                 OUT_RING(ctx->re_misc);
414                 ADVANCE_RING();
415         }
416
417         if (dirty & RADEON_UPLOAD_TEX0) {
418                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
419                                                   &tex[0].pp_txoffset)) {
420                         DRM_ERROR("Invalid texture offset for unit 0\n");
421                         return DRM_ERR(EINVAL);
422                 }
423
424                 BEGIN_RING(9);
425                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
426                 OUT_RING(tex[0].pp_txfilter);
427                 OUT_RING(tex[0].pp_txformat);
428                 OUT_RING(tex[0].pp_txoffset);
429                 OUT_RING(tex[0].pp_txcblend);
430                 OUT_RING(tex[0].pp_txablend);
431                 OUT_RING(tex[0].pp_tfactor);
432                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
433                 OUT_RING(tex[0].pp_border_color);
434                 ADVANCE_RING();
435         }
436
437         if (dirty & RADEON_UPLOAD_TEX1) {
438                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
439                                                   &tex[1].pp_txoffset)) {
440                         DRM_ERROR("Invalid texture offset for unit 1\n");
441                         return DRM_ERR(EINVAL);
442                 }
443
444                 BEGIN_RING(9);
445                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
446                 OUT_RING(tex[1].pp_txfilter);
447                 OUT_RING(tex[1].pp_txformat);
448                 OUT_RING(tex[1].pp_txoffset);
449                 OUT_RING(tex[1].pp_txcblend);
450                 OUT_RING(tex[1].pp_txablend);
451                 OUT_RING(tex[1].pp_tfactor);
452                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
453                 OUT_RING(tex[1].pp_border_color);
454                 ADVANCE_RING();
455         }
456
457         if (dirty & RADEON_UPLOAD_TEX2) {
458                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
459                                                   &tex[2].pp_txoffset)) {
460                         DRM_ERROR("Invalid texture offset for unit 2\n");
461                         return DRM_ERR(EINVAL);
462                 }
463
464                 BEGIN_RING(9);
465                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
466                 OUT_RING(tex[2].pp_txfilter);
467                 OUT_RING(tex[2].pp_txformat);
468                 OUT_RING(tex[2].pp_txoffset);
469                 OUT_RING(tex[2].pp_txcblend);
470                 OUT_RING(tex[2].pp_txablend);
471                 OUT_RING(tex[2].pp_tfactor);
472                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
473                 OUT_RING(tex[2].pp_border_color);
474                 ADVANCE_RING();
475         }
476
477         return 0;
478 }
479
480 /* Emit 1.2 state
481  */
482 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
483                               drm_file_t * filp_priv,
484                               drm_radeon_state_t * state)
485 {
486         RING_LOCALS;
487
488         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
489                 BEGIN_RING(3);
490                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
491                 OUT_RING(state->context2.se_zbias_factor);
492                 OUT_RING(state->context2.se_zbias_constant);
493                 ADVANCE_RING();
494         }
495
496         return radeon_emit_state(dev_priv, filp_priv, &state->context,
497                                  state->tex, state->dirty);
498 }
499
500 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
501  * 1.3 cmdbuffers allow all previous state to be updated as well as
502  * the tcl scalar and vector areas.
503  */
504 static struct {
505         int start;
506         int len;
507         const char *name;
508 } packet[RADEON_MAX_STATE_PACKETS] = {
509         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
510         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
511         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
512         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
513         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
514         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
515         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
516         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
517         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
518         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
519         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
520         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
521         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
522         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
523         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
524         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
525         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
526         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
527         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
528         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
529         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
530                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
531         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
532         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
533         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
534         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
535         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
536         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
537         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
538         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
539         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
540         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
541         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
542         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
543         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
544         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
545         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
546         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
547         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
548         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
549         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
550         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
551         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
552         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
553         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
554         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
555         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
556         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
557         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
558         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
559         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
560         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
561         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
562         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
563         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
564         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
565         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
566         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
567         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
568         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
569         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
570         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
571                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
572         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
573         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"},    /* 62 */
574         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
575         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
576         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
577         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
578         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
579         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
580         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
581         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
582         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
583         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
584         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
585         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
586         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
587         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
588         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
589         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
590         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
591         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
592         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
593         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
594         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
595         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
596         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},   /* 85 */
597         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
598         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
599         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
600         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
601         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
602         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
603         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
604         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
605 };
606
607 /* ================================================================
608  * Performance monitoring functions
609  */
610
611 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
612                              int x, int y, int w, int h, int r, int g, int b)
613 {
614         u32 color;
615         RING_LOCALS;
616
617         x += dev_priv->sarea_priv->boxes[0].x1;
618         y += dev_priv->sarea_priv->boxes[0].y1;
619
620         switch (dev_priv->color_fmt) {
621         case RADEON_COLOR_FORMAT_RGB565:
622                 color = (((r & 0xf8) << 8) |
623                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
624                 break;
625         case RADEON_COLOR_FORMAT_ARGB8888:
626         default:
627                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
628                 break;
629         }
630
631         BEGIN_RING(4);
632         RADEON_WAIT_UNTIL_3D_IDLE();
633         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
634         OUT_RING(0xffffffff);
635         ADVANCE_RING();
636
637         BEGIN_RING(6);
638
639         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
640         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
641                  RADEON_GMC_BRUSH_SOLID_COLOR |
642                  (dev_priv->color_fmt << 8) |
643                  RADEON_GMC_SRC_DATATYPE_COLOR |
644                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
645
646         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
647                 OUT_RING(dev_priv->front_pitch_offset);
648         } else {
649                 OUT_RING(dev_priv->back_pitch_offset);
650         }
651
652         OUT_RING(color);
653
654         OUT_RING((x << 16) | y);
655         OUT_RING((w << 16) | h);
656
657         ADVANCE_RING();
658 }
659
660 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
661 {
662         /* Collapse various things into a wait flag -- trying to
663          * guess if userspase slept -- better just to have them tell us.
664          */
665         if (dev_priv->stats.last_frame_reads > 1 ||
666             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
667                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
668         }
669
670         if (dev_priv->stats.freelist_loops) {
671                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
672         }
673
674         /* Purple box for page flipping
675          */
676         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
677                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
678
679         /* Red box if we have to wait for idle at any point
680          */
681         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
682                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
683
684         /* Blue box: lost context?
685          */
686
687         /* Yellow box for texture swaps
688          */
689         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
690                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
691
692         /* Green box if hardware never idles (as far as we can tell)
693          */
694         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
695                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
696
697         /* Draw bars indicating number of buffers allocated
698          * (not a great measure, easily confused)
699          */
700         if (dev_priv->stats.requested_bufs) {
701                 if (dev_priv->stats.requested_bufs > 100)
702                         dev_priv->stats.requested_bufs = 100;
703
704                 radeon_clear_box(dev_priv, 4, 16,
705                                  dev_priv->stats.requested_bufs, 4,
706                                  196, 128, 128);
707         }
708
709         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
710
711 }
712
713 /* ================================================================
714  * CP command dispatch functions
715  */
716
717 static void radeon_cp_dispatch_clear(drm_device_t * dev,
718                                      drm_radeon_clear_t * clear,
719                                      drm_radeon_clear_rect_t * depth_boxes)
720 {
721         drm_radeon_private_t *dev_priv = dev->dev_private;
722         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
723         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
724         int nbox = sarea_priv->nbox;
725         drm_clip_rect_t *pbox = sarea_priv->boxes;
726         unsigned int flags = clear->flags;
727         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
728         int i;
729         RING_LOCALS;
730         DRM_DEBUG("flags = 0x%x\n", flags);
731
732         dev_priv->stats.clears++;
733
734         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
735                 unsigned int tmp = flags;
736
737                 flags &= ~(RADEON_FRONT | RADEON_BACK);
738                 if (tmp & RADEON_FRONT)
739                         flags |= RADEON_BACK;
740                 if (tmp & RADEON_BACK)
741                         flags |= RADEON_FRONT;
742         }
743
744         if (flags & (RADEON_FRONT | RADEON_BACK)) {
745
746                 BEGIN_RING(4);
747
748                 /* Ensure the 3D stream is idle before doing a
749                  * 2D fill to clear the front or back buffer.
750                  */
751                 RADEON_WAIT_UNTIL_3D_IDLE();
752
753                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
754                 OUT_RING(clear->color_mask);
755
756                 ADVANCE_RING();
757
758                 /* Make sure we restore the 3D state next time.
759                  */
760                 dev_priv->sarea_priv->ctx_owner = 0;
761
762                 for (i = 0; i < nbox; i++) {
763                         int x = pbox[i].x1;
764                         int y = pbox[i].y1;
765                         int w = pbox[i].x2 - x;
766                         int h = pbox[i].y2 - y;
767
768                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
769                                   x, y, w, h, flags);
770
771                         if (flags & RADEON_FRONT) {
772                                 BEGIN_RING(6);
773
774                                 OUT_RING(CP_PACKET3
775                                          (RADEON_CNTL_PAINT_MULTI, 4));
776                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
777                                          RADEON_GMC_BRUSH_SOLID_COLOR |
778                                          (dev_priv->
779                                           color_fmt << 8) |
780                                          RADEON_GMC_SRC_DATATYPE_COLOR |
781                                          RADEON_ROP3_P |
782                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
783
784                                 OUT_RING(dev_priv->front_pitch_offset);
785                                 OUT_RING(clear->clear_color);
786
787                                 OUT_RING((x << 16) | y);
788                                 OUT_RING((w << 16) | h);
789
790                                 ADVANCE_RING();
791                         }
792
793                         if (flags & RADEON_BACK) {
794                                 BEGIN_RING(6);
795
796                                 OUT_RING(CP_PACKET3
797                                          (RADEON_CNTL_PAINT_MULTI, 4));
798                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
799                                          RADEON_GMC_BRUSH_SOLID_COLOR |
800                                          (dev_priv->
801                                           color_fmt << 8) |
802                                          RADEON_GMC_SRC_DATATYPE_COLOR |
803                                          RADEON_ROP3_P |
804                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
805
806                                 OUT_RING(dev_priv->back_pitch_offset);
807                                 OUT_RING(clear->clear_color);
808
809                                 OUT_RING((x << 16) | y);
810                                 OUT_RING((w << 16) | h);
811
812                                 ADVANCE_RING();
813                         }
814                 }
815         }
816
817         /* hyper z clear */
818         /* no docs available, based on reverse engeneering by Stephane Marchesin */
819         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
820             && (flags & RADEON_CLEAR_FASTZ)) {
821
822                 int i;
823                 int depthpixperline =
824                     dev_priv->depth_fmt ==
825                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
826                                                        2) : (dev_priv->
827                                                              depth_pitch / 4);
828
829                 u32 clearmask;
830
831                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
832                     ((clear->depth_mask & 0xff) << 24);
833
834                 /* Make sure we restore the 3D state next time.
835                  * we haven't touched any "normal" state - still need this?
836                  */
837                 dev_priv->sarea_priv->ctx_owner = 0;
838
839                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
840                     && (flags & RADEON_USE_HIERZ)) {
841                         /* FIXME : reverse engineer that for Rx00 cards */
842                         /* FIXME : the mask supposedly contains low-res z values. So can't set
843                            just to the max (0xff? or actually 0x3fff?), need to take z clear
844                            value into account? */
845                         /* pattern seems to work for r100, though get slight
846                            rendering errors with glxgears. If hierz is not enabled for r100,
847                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
848                            other ones are ignored, and the same clear mask can be used. That's
849                            very different behaviour than R200 which needs different clear mask
850                            and different number of tiles to clear if hierz is enabled or not !?!
851                          */
852                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
853                 } else {
854                         /* clear mask : chooses the clearing pattern.
855                            rv250: could be used to clear only parts of macrotiles
856                            (but that would get really complicated...)?
857                            bit 0 and 1 (either or both of them ?!?!) are used to
858                            not clear tile (or maybe one of the bits indicates if the tile is
859                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
860                            Pattern is as follows:
861                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
862                            bits -------------------------------------------------
863                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
864                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
865                            covers 256 pixels ?!?
866                          */
867                         clearmask = 0x0;
868                 }
869
870                 BEGIN_RING(8);
871                 RADEON_WAIT_UNTIL_2D_IDLE();
872                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
873                              tempRB3D_DEPTHCLEARVALUE);
874                 /* what offset is this exactly ? */
875                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
876                 /* need ctlstat, otherwise get some strange black flickering */
877                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
878                              RADEON_RB3D_ZC_FLUSH_ALL);
879                 ADVANCE_RING();
880
881                 for (i = 0; i < nbox; i++) {
882                         int tileoffset, nrtilesx, nrtilesy, j;
883                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
884                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
885                             && !(dev_priv->microcode_version == UCODE_R200)) {
886                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
887                                    maybe r200 actually doesn't need to put the low-res z value into
888                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
889                                    Works for R100, both with hierz and without.
890                                    R100 seems to operate on 2x1 8x8 tiles, but...
891                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
892                                    problematic with resolutions which are not 64 pix aligned? */
893                                 tileoffset =
894                                     ((pbox[i].y1 >> 3) * depthpixperline +
895                                      pbox[i].x1) >> 6;
896                                 nrtilesx =
897                                     ((pbox[i].x2 & ~63) -
898                                      (pbox[i].x1 & ~63)) >> 4;
899                                 nrtilesy =
900                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
901                                 for (j = 0; j <= nrtilesy; j++) {
902                                         BEGIN_RING(4);
903                                         OUT_RING(CP_PACKET3
904                                                  (RADEON_3D_CLEAR_ZMASK, 2));
905                                         /* first tile */
906                                         OUT_RING(tileoffset * 8);
907                                         /* the number of tiles to clear */
908                                         OUT_RING(nrtilesx + 4);
909                                         /* clear mask : chooses the clearing pattern. */
910                                         OUT_RING(clearmask);
911                                         ADVANCE_RING();
912                                         tileoffset += depthpixperline >> 6;
913                                 }
914                         } else if (dev_priv->microcode_version == UCODE_R200) {
915                                 /* works for rv250. */
916                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
917                                 tileoffset =
918                                     ((pbox[i].y1 >> 3) * depthpixperline +
919                                      pbox[i].x1) >> 5;
920                                 nrtilesx =
921                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
922                                 nrtilesy =
923                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
924                                 for (j = 0; j <= nrtilesy; j++) {
925                                         BEGIN_RING(4);
926                                         OUT_RING(CP_PACKET3
927                                                  (RADEON_3D_CLEAR_ZMASK, 2));
928                                         /* first tile */
929                                         /* judging by the first tile offset needed, could possibly
930                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
931                                            macro tiles, though would still need clear mask for
932                                            right/bottom if truely 4x4 granularity is desired ? */
933                                         OUT_RING(tileoffset * 16);
934                                         /* the number of tiles to clear */
935                                         OUT_RING(nrtilesx + 1);
936                                         /* clear mask : chooses the clearing pattern. */
937                                         OUT_RING(clearmask);
938                                         ADVANCE_RING();
939                                         tileoffset += depthpixperline >> 5;
940                                 }
941                         } else {        /* rv 100 */
942                                 /* rv100 might not need 64 pix alignment, who knows */
943                                 /* offsets are, hmm, weird */
944                                 tileoffset =
945                                     ((pbox[i].y1 >> 4) * depthpixperline +
946                                      pbox[i].x1) >> 6;
947                                 nrtilesx =
948                                     ((pbox[i].x2 & ~63) -
949                                      (pbox[i].x1 & ~63)) >> 4;
950                                 nrtilesy =
951                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
952                                 for (j = 0; j <= nrtilesy; j++) {
953                                         BEGIN_RING(4);
954                                         OUT_RING(CP_PACKET3
955                                                  (RADEON_3D_CLEAR_ZMASK, 2));
956                                         OUT_RING(tileoffset * 128);
957                                         /* the number of tiles to clear */
958                                         OUT_RING(nrtilesx + 4);
959                                         /* clear mask : chooses the clearing pattern. */
960                                         OUT_RING(clearmask);
961                                         ADVANCE_RING();
962                                         tileoffset += depthpixperline >> 6;
963                                 }
964                         }
965                 }
966
967                 /* TODO don't always clear all hi-level z tiles */
968                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
969                     && (dev_priv->microcode_version == UCODE_R200)
970                     && (flags & RADEON_USE_HIERZ))
971                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
972                         /* FIXME : the mask supposedly contains low-res z values. So can't set
973                            just to the max (0xff? or actually 0x3fff?), need to take z clear
974                            value into account? */
975                 {
976                         BEGIN_RING(4);
977                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
978                         OUT_RING(0x0);  /* First tile */
979                         OUT_RING(0x3cc0);
980                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
981                         ADVANCE_RING();
982                 }
983         }
984
985         /* We have to clear the depth and/or stencil buffers by
986          * rendering a quad into just those buffers.  Thus, we have to
987          * make sure the 3D engine is configured correctly.
988          */
989         if ((dev_priv->microcode_version == UCODE_R200) &&
990             (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
991
992                 int tempPP_CNTL;
993                 int tempRE_CNTL;
994                 int tempRB3D_CNTL;
995                 int tempRB3D_ZSTENCILCNTL;
996                 int tempRB3D_STENCILREFMASK;
997                 int tempRB3D_PLANEMASK;
998                 int tempSE_CNTL;
999                 int tempSE_VTE_CNTL;
1000                 int tempSE_VTX_FMT_0;
1001                 int tempSE_VTX_FMT_1;
1002                 int tempSE_VAP_CNTL;
1003                 int tempRE_AUX_SCISSOR_CNTL;
1004
1005                 tempPP_CNTL = 0;
1006                 tempRE_CNTL = 0;
1007
1008                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1009
1010                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1011                 tempRB3D_STENCILREFMASK = 0x0;
1012
1013                 tempSE_CNTL = depth_clear->se_cntl;
1014
1015                 /* Disable TCL */
1016
1017                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1018                                           (0x9 <<
1019                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1020
1021                 tempRB3D_PLANEMASK = 0x0;
1022
1023                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1024
1025                 tempSE_VTE_CNTL =
1026                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1027
1028                 /* Vertex format (X, Y, Z, W) */
1029                 tempSE_VTX_FMT_0 =
1030                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1031                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1032                 tempSE_VTX_FMT_1 = 0x0;
1033
1034                 /*
1035                  * Depth buffer specific enables
1036                  */
1037                 if (flags & RADEON_DEPTH) {
1038                         /* Enable depth buffer */
1039                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1040                 } else {
1041                         /* Disable depth buffer */
1042                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1043                 }
1044
1045                 /*
1046                  * Stencil buffer specific enables
1047                  */
1048                 if (flags & RADEON_STENCIL) {
1049                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1050                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1051                 } else {
1052                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1053                         tempRB3D_STENCILREFMASK = 0x00000000;
1054                 }
1055
1056                 if (flags & RADEON_USE_COMP_ZBUF) {
1057                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1058                             RADEON_Z_DECOMPRESSION_ENABLE;
1059                 }
1060                 if (flags & RADEON_USE_HIERZ) {
1061                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1062                 }
1063
1064                 BEGIN_RING(26);
1065                 RADEON_WAIT_UNTIL_2D_IDLE();
1066
1067                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1068                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1069                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1070                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1071                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1072                              tempRB3D_STENCILREFMASK);
1073                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1074                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1075                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1076                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1077                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1078                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1079                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1080                 ADVANCE_RING();
1081
1082                 /* Make sure we restore the 3D state next time.
1083                  */
1084                 dev_priv->sarea_priv->ctx_owner = 0;
1085
1086                 for (i = 0; i < nbox; i++) {
1087
1088                         /* Funny that this should be required --
1089                          *  sets top-left?
1090                          */
1091                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1092
1093                         BEGIN_RING(14);
1094                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1095                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1096                                   RADEON_PRIM_WALK_RING |
1097                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1098                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1099                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1100                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1101                         OUT_RING(0x3f800000);
1102                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1103                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1104                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1105                         OUT_RING(0x3f800000);
1106                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1107                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1108                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1109                         OUT_RING(0x3f800000);
1110                         ADVANCE_RING();
1111                 }
1112         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1113
1114                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1115
1116                 rb3d_cntl = depth_clear->rb3d_cntl;
1117
1118                 if (flags & RADEON_DEPTH) {
1119                         rb3d_cntl |= RADEON_Z_ENABLE;
1120                 } else {
1121                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1122                 }
1123
1124                 if (flags & RADEON_STENCIL) {
1125                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1126                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1127                 } else {
1128                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1129                         rb3d_stencilrefmask = 0x00000000;
1130                 }
1131
1132                 if (flags & RADEON_USE_COMP_ZBUF) {
1133                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1134                             RADEON_Z_DECOMPRESSION_ENABLE;
1135                 }
1136                 if (flags & RADEON_USE_HIERZ) {
1137                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1138                 }
1139
1140                 BEGIN_RING(13);
1141                 RADEON_WAIT_UNTIL_2D_IDLE();
1142
1143                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1144                 OUT_RING(0x00000000);
1145                 OUT_RING(rb3d_cntl);
1146
1147                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1148                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1149                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1150                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1151                 ADVANCE_RING();
1152
1153                 /* Make sure we restore the 3D state next time.
1154                  */
1155                 dev_priv->sarea_priv->ctx_owner = 0;
1156
1157                 for (i = 0; i < nbox; i++) {
1158
1159                         /* Funny that this should be required --
1160                          *  sets top-left?
1161                          */
1162                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1163
1164                         BEGIN_RING(15);
1165
1166                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1167                         OUT_RING(RADEON_VTX_Z_PRESENT |
1168                                  RADEON_VTX_PKCOLOR_PRESENT);
1169                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1170                                   RADEON_PRIM_WALK_RING |
1171                                   RADEON_MAOS_ENABLE |
1172                                   RADEON_VTX_FMT_RADEON_MODE |
1173                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1174
1175                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1176                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1177                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1178                         OUT_RING(0x0);
1179
1180                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1181                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1182                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1183                         OUT_RING(0x0);
1184
1185                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1186                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1187                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1188                         OUT_RING(0x0);
1189
1190                         ADVANCE_RING();
1191                 }
1192         }
1193
1194         /* Increment the clear counter.  The client-side 3D driver must
1195          * wait on this value before performing the clear ioctl.  We
1196          * need this because the card's so damned fast...
1197          */
1198         dev_priv->sarea_priv->last_clear++;
1199
1200         BEGIN_RING(4);
1201
1202         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1203         RADEON_WAIT_UNTIL_IDLE();
1204
1205         ADVANCE_RING();
1206 }
1207
1208 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1209 {
1210         drm_radeon_private_t *dev_priv = dev->dev_private;
1211         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1212         int nbox = sarea_priv->nbox;
1213         drm_clip_rect_t *pbox = sarea_priv->boxes;
1214         int i;
1215         RING_LOCALS;
1216         DRM_DEBUG("\n");
1217
1218         /* Do some trivial performance monitoring...
1219          */
1220         if (dev_priv->do_boxes)
1221                 radeon_cp_performance_boxes(dev_priv);
1222
1223         /* Wait for the 3D stream to idle before dispatching the bitblt.
1224          * This will prevent data corruption between the two streams.
1225          */
1226         BEGIN_RING(2);
1227
1228         RADEON_WAIT_UNTIL_3D_IDLE();
1229
1230         ADVANCE_RING();
1231
1232         for (i = 0; i < nbox; i++) {
1233                 int x = pbox[i].x1;
1234                 int y = pbox[i].y1;
1235                 int w = pbox[i].x2 - x;
1236                 int h = pbox[i].y2 - y;
1237
1238                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1239
1240                 BEGIN_RING(7);
1241
1242                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1243                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1244                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1245                          RADEON_GMC_BRUSH_NONE |
1246                          (dev_priv->color_fmt << 8) |
1247                          RADEON_GMC_SRC_DATATYPE_COLOR |
1248                          RADEON_ROP3_S |
1249                          RADEON_DP_SRC_SOURCE_MEMORY |
1250                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1251
1252                 /* Make this work even if front & back are flipped:
1253                  */
1254                 if (dev_priv->current_page == 0) {
1255                         OUT_RING(dev_priv->back_pitch_offset);
1256                         OUT_RING(dev_priv->front_pitch_offset);
1257                 } else {
1258                         OUT_RING(dev_priv->front_pitch_offset);
1259                         OUT_RING(dev_priv->back_pitch_offset);
1260                 }
1261
1262                 OUT_RING((x << 16) | y);
1263                 OUT_RING((x << 16) | y);
1264                 OUT_RING((w << 16) | h);
1265
1266                 ADVANCE_RING();
1267         }
1268
1269         /* Increment the frame counter.  The client-side 3D driver must
1270          * throttle the framerate by waiting for this value before
1271          * performing the swapbuffer ioctl.
1272          */
1273         dev_priv->sarea_priv->last_frame++;
1274
1275         BEGIN_RING(4);
1276
1277         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1278         RADEON_WAIT_UNTIL_2D_IDLE();
1279
1280         ADVANCE_RING();
1281 }
1282
1283 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1284 {
1285         drm_radeon_private_t *dev_priv = dev->dev_private;
1286         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1287         int offset = (dev_priv->current_page == 1)
1288             ? dev_priv->front_offset : dev_priv->back_offset;
1289         RING_LOCALS;
1290         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1291                   __FUNCTION__,
1292                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1293
1294         /* Do some trivial performance monitoring...
1295          */
1296         if (dev_priv->do_boxes) {
1297                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1298                 radeon_cp_performance_boxes(dev_priv);
1299         }
1300
1301         /* Update the frame offsets for both CRTCs
1302          */
1303         BEGIN_RING(6);
1304
1305         RADEON_WAIT_UNTIL_3D_IDLE();
1306         OUT_RING_REG(RADEON_CRTC_OFFSET,
1307                      ((sarea->frame.y * dev_priv->front_pitch +
1308                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1309                      + offset);
1310         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1311                      + offset);
1312
1313         ADVANCE_RING();
1314
1315         /* Increment the frame counter.  The client-side 3D driver must
1316          * throttle the framerate by waiting for this value before
1317          * performing the swapbuffer ioctl.
1318          */
1319         dev_priv->sarea_priv->last_frame++;
1320         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1321             1 - dev_priv->current_page;
1322
1323         BEGIN_RING(2);
1324
1325         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1326
1327         ADVANCE_RING();
1328 }
1329
1330 static int bad_prim_vertex_nr(int primitive, int nr)
1331 {
1332         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1333         case RADEON_PRIM_TYPE_NONE:
1334         case RADEON_PRIM_TYPE_POINT:
1335                 return nr < 1;
1336         case RADEON_PRIM_TYPE_LINE:
1337                 return (nr & 1) || nr == 0;
1338         case RADEON_PRIM_TYPE_LINE_STRIP:
1339                 return nr < 2;
1340         case RADEON_PRIM_TYPE_TRI_LIST:
1341         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1342         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1343         case RADEON_PRIM_TYPE_RECT_LIST:
1344                 return nr % 3 || nr == 0;
1345         case RADEON_PRIM_TYPE_TRI_FAN:
1346         case RADEON_PRIM_TYPE_TRI_STRIP:
1347                 return nr < 3;
1348         default:
1349                 return 1;
1350         }
1351 }
1352
1353 typedef struct {
1354         unsigned int start;
1355         unsigned int finish;
1356         unsigned int prim;
1357         unsigned int numverts;
1358         unsigned int offset;
1359         unsigned int vc_format;
1360 } drm_radeon_tcl_prim_t;
1361
1362 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1363                                       drm_buf_t * buf,
1364                                       drm_radeon_tcl_prim_t * prim)
1365 {
1366         drm_radeon_private_t *dev_priv = dev->dev_private;
1367         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1368         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1369         int numverts = (int)prim->numverts;
1370         int nbox = sarea_priv->nbox;
1371         int i = 0;
1372         RING_LOCALS;
1373
1374         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1375                   prim->prim,
1376                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1377
1378         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1379                 DRM_ERROR("bad prim %x numverts %d\n",
1380                           prim->prim, prim->numverts);
1381                 return;
1382         }
1383
1384         do {
1385                 /* Emit the next cliprect */
1386                 if (i < nbox) {
1387                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1388                 }
1389
1390                 /* Emit the vertex buffer rendering commands */
1391                 BEGIN_RING(5);
1392
1393                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1394                 OUT_RING(offset);
1395                 OUT_RING(numverts);
1396                 OUT_RING(prim->vc_format);
1397                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1398                          RADEON_COLOR_ORDER_RGBA |
1399                          RADEON_VTX_FMT_RADEON_MODE |
1400                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1401
1402                 ADVANCE_RING();
1403
1404                 i++;
1405         } while (i < nbox);
1406 }
1407
1408 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1409 {
1410         drm_radeon_private_t *dev_priv = dev->dev_private;
1411         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1412         RING_LOCALS;
1413
1414         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1415
1416         /* Emit the vertex buffer age */
1417         BEGIN_RING(2);
1418         RADEON_DISPATCH_AGE(buf_priv->age);
1419         ADVANCE_RING();
1420
1421         buf->pending = 1;
1422         buf->used = 0;
1423 }
1424
1425 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1426                                         drm_buf_t * buf, int start, int end)
1427 {
1428         drm_radeon_private_t *dev_priv = dev->dev_private;
1429         RING_LOCALS;
1430         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1431
1432         if (start != end) {
1433                 int offset = (dev_priv->gart_buffers_offset
1434                               + buf->offset + start);
1435                 int dwords = (end - start + 3) / sizeof(u32);
1436
1437                 /* Indirect buffer data must be an even number of
1438                  * dwords, so if we've been given an odd number we must
1439                  * pad the data with a Type-2 CP packet.
1440                  */
1441                 if (dwords & 1) {
1442                         u32 *data = (u32 *)
1443                             ((char *)dev->agp_buffer_map->handle
1444                              + buf->offset + start);
1445                         data[dwords++] = RADEON_CP_PACKET2;
1446                 }
1447
1448                 /* Fire off the indirect buffer */
1449                 BEGIN_RING(3);
1450
1451                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1452                 OUT_RING(offset);
1453                 OUT_RING(dwords);
1454
1455                 ADVANCE_RING();
1456         }
1457 }
1458
1459 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1460                                        drm_buf_t * elt_buf,
1461                                        drm_radeon_tcl_prim_t * prim)
1462 {
1463         drm_radeon_private_t *dev_priv = dev->dev_private;
1464         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1465         int offset = dev_priv->gart_buffers_offset + prim->offset;
1466         u32 *data;
1467         int dwords;
1468         int i = 0;
1469         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1470         int count = (prim->finish - start) / sizeof(u16);
1471         int nbox = sarea_priv->nbox;
1472
1473         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1474                   prim->prim,
1475                   prim->vc_format,
1476                   prim->start, prim->finish, prim->offset, prim->numverts);
1477
1478         if (bad_prim_vertex_nr(prim->prim, count)) {
1479                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1480                 return;
1481         }
1482
1483         if (start >= prim->finish || (prim->start & 0x7)) {
1484                 DRM_ERROR("buffer prim %d\n", prim->prim);
1485                 return;
1486         }
1487
1488         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1489
1490         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1491                         elt_buf->offset + prim->start);
1492
1493         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1494         data[1] = offset;
1495         data[2] = prim->numverts;
1496         data[3] = prim->vc_format;
1497         data[4] = (prim->prim |
1498                    RADEON_PRIM_WALK_IND |
1499                    RADEON_COLOR_ORDER_RGBA |
1500                    RADEON_VTX_FMT_RADEON_MODE |
1501                    (count << RADEON_NUM_VERTICES_SHIFT));
1502
1503         do {
1504                 if (i < nbox)
1505                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1506
1507                 radeon_cp_dispatch_indirect(dev, elt_buf,
1508                                             prim->start, prim->finish);
1509
1510                 i++;
1511         } while (i < nbox);
1512
1513 }
1514
1515 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1516
1517 static int radeon_cp_dispatch_texture(DRMFILE filp,
1518                                       drm_device_t * dev,
1519                                       drm_radeon_texture_t * tex,
1520                                       drm_radeon_tex_image_t * image)
1521 {
1522         drm_radeon_private_t *dev_priv = dev->dev_private;
1523         drm_file_t *filp_priv;
1524         drm_buf_t *buf;
1525         u32 format;
1526         u32 *buffer;
1527         const u8 __user *data;
1528         int size, dwords, tex_width, blit_width, spitch;
1529         u32 height;
1530         int i;
1531         u32 texpitch, microtile;
1532         u32 offset;
1533         RING_LOCALS;
1534
1535         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1536
1537         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1538                 DRM_ERROR("Invalid destination offset\n");
1539                 return DRM_ERR(EINVAL);
1540         }
1541
1542         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1543
1544         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1545          * up with the texture data from the host data blit, otherwise
1546          * part of the texture image may be corrupted.
1547          */
1548         BEGIN_RING(4);
1549         RADEON_FLUSH_CACHE();
1550         RADEON_WAIT_UNTIL_IDLE();
1551         ADVANCE_RING();
1552
1553         /* The compiler won't optimize away a division by a variable,
1554          * even if the only legal values are powers of two.  Thus, we'll
1555          * use a shift instead.
1556          */
1557         switch (tex->format) {
1558         case RADEON_TXFORMAT_ARGB8888:
1559         case RADEON_TXFORMAT_RGBA8888:
1560                 format = RADEON_COLOR_FORMAT_ARGB8888;
1561                 tex_width = tex->width * 4;
1562                 blit_width = image->width * 4;
1563                 break;
1564         case RADEON_TXFORMAT_AI88:
1565         case RADEON_TXFORMAT_ARGB1555:
1566         case RADEON_TXFORMAT_RGB565:
1567         case RADEON_TXFORMAT_ARGB4444:
1568         case RADEON_TXFORMAT_VYUY422:
1569         case RADEON_TXFORMAT_YVYU422:
1570                 format = RADEON_COLOR_FORMAT_RGB565;
1571                 tex_width = tex->width * 2;
1572                 blit_width = image->width * 2;
1573                 break;
1574         case RADEON_TXFORMAT_I8:
1575         case RADEON_TXFORMAT_RGB332:
1576                 format = RADEON_COLOR_FORMAT_CI8;
1577                 tex_width = tex->width * 1;
1578                 blit_width = image->width * 1;
1579                 break;
1580         default:
1581                 DRM_ERROR("invalid texture format %d\n", tex->format);
1582                 return DRM_ERR(EINVAL);
1583         }
1584         spitch = blit_width >> 6;
1585         if (spitch == 0 && image->height > 1)
1586                 return DRM_ERR(EINVAL);
1587
1588         texpitch = tex->pitch;
1589         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1590                 microtile = 1;
1591                 if (tex_width < 64) {
1592                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1593                         /* we got tiled coordinates, untile them */
1594                         image->x *= 2;
1595                 }
1596         } else
1597                 microtile = 0;
1598
1599         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1600
1601         do {
1602                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1603                           tex->offset >> 10, tex->pitch, tex->format,
1604                           image->x, image->y, image->width, image->height);
1605
1606                 /* Make a copy of some parameters in case we have to
1607                  * update them for a multi-pass texture blit.
1608                  */
1609                 height = image->height;
1610                 data = (const u8 __user *)image->data;
1611
1612                 size = height * blit_width;
1613
1614                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1615                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1616                         size = height * blit_width;
1617                 } else if (size < 4 && size > 0) {
1618                         size = 4;
1619                 } else if (size == 0) {
1620                         return 0;
1621                 }
1622
1623                 buf = radeon_freelist_get(dev);
1624                 if (0 && !buf) {
1625                         radeon_do_cp_idle(dev_priv);
1626                         buf = radeon_freelist_get(dev);
1627                 }
1628                 if (!buf) {
1629                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1630                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1631                                 return DRM_ERR(EFAULT);
1632                         return DRM_ERR(EAGAIN);
1633                 }
1634
1635                 /* Dispatch the indirect buffer.
1636                  */
1637                 buffer =
1638                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1639                 dwords = size / 4;
1640
1641                 if (microtile) {
1642                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1643                            however, we cannot use blitter directly for texture width < 64 bytes,
1644                            since minimum tex pitch is 64 bytes and we need this to match
1645                            the texture width, otherwise the blitter will tile it wrong.
1646                            Thus, tiling manually in this case. Additionally, need to special
1647                            case tex height = 1, since our actual image will have height 2
1648                            and we need to ensure we don't read beyond the texture size
1649                            from user space. */
1650                         if (tex->height == 1) {
1651                                 if (tex_width >= 64 || tex_width <= 16) {
1652                                         if (DRM_COPY_FROM_USER(buffer, data,
1653                                                                tex_width *
1654                                                                sizeof(u32))) {
1655                                                 DRM_ERROR
1656                                                     ("EFAULT on pad, %d bytes\n",
1657                                                      tex_width);
1658                                                 return DRM_ERR(EFAULT);
1659                                         }
1660                                 } else if (tex_width == 32) {
1661                                         if (DRM_COPY_FROM_USER
1662                                             (buffer, data, 16)) {
1663                                                 DRM_ERROR
1664                                                     ("EFAULT on pad, %d bytes\n",
1665                                                      tex_width);
1666                                                 return DRM_ERR(EFAULT);
1667                                         }
1668                                         if (DRM_COPY_FROM_USER
1669                                             (buffer + 8, data + 16, 16)) {
1670                                                 DRM_ERROR
1671                                                     ("EFAULT on pad, %d bytes\n",
1672                                                      tex_width);
1673                                                 return DRM_ERR(EFAULT);
1674                                         }
1675                                 }
1676                         } else if (tex_width >= 64 || tex_width == 16) {
1677                                 if (DRM_COPY_FROM_USER(buffer, data,
1678                                                        dwords * sizeof(u32))) {
1679                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1680                                                   dwords);
1681                                         return DRM_ERR(EFAULT);
1682                                 }
1683                         } else if (tex_width < 16) {
1684                                 for (i = 0; i < tex->height; i++) {
1685                                         if (DRM_COPY_FROM_USER
1686                                             (buffer, data, tex_width)) {
1687                                                 DRM_ERROR
1688                                                     ("EFAULT on pad, %d bytes\n",
1689                                                      tex_width);
1690                                                 return DRM_ERR(EFAULT);
1691                                         }
1692                                         buffer += 4;
1693                                         data += tex_width;
1694                                 }
1695                         } else if (tex_width == 32) {
1696                                 /* TODO: make sure this works when not fitting in one buffer
1697                                    (i.e. 32bytes x 2048...) */
1698                                 for (i = 0; i < tex->height; i += 2) {
1699                                         if (DRM_COPY_FROM_USER
1700                                             (buffer, data, 16)) {
1701                                                 DRM_ERROR
1702                                                     ("EFAULT on pad, %d bytes\n",
1703                                                      tex_width);
1704                                                 return DRM_ERR(EFAULT);
1705                                         }
1706                                         data += 16;
1707                                         if (DRM_COPY_FROM_USER
1708                                             (buffer + 8, data, 16)) {
1709                                                 DRM_ERROR
1710                                                     ("EFAULT on pad, %d bytes\n",
1711                                                      tex_width);
1712                                                 return DRM_ERR(EFAULT);
1713                                         }
1714                                         data += 16;
1715                                         if (DRM_COPY_FROM_USER
1716                                             (buffer + 4, data, 16)) {
1717                                                 DRM_ERROR
1718                                                     ("EFAULT on pad, %d bytes\n",
1719                                                      tex_width);
1720                                                 return DRM_ERR(EFAULT);
1721                                         }
1722                                         data += 16;
1723                                         if (DRM_COPY_FROM_USER
1724                                             (buffer + 12, data, 16)) {
1725                                                 DRM_ERROR
1726                                                     ("EFAULT on pad, %d bytes\n",
1727                                                      tex_width);
1728                                                 return DRM_ERR(EFAULT);
1729                                         }
1730                                         data += 16;
1731                                         buffer += 16;
1732                                 }
1733                         }
1734                 } else {
1735                         if (tex_width >= 32) {
1736                                 /* Texture image width is larger than the minimum, so we
1737                                  * can upload it directly.
1738                                  */
1739                                 if (DRM_COPY_FROM_USER(buffer, data,
1740                                                        dwords * sizeof(u32))) {
1741                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1742                                                   dwords);
1743                                         return DRM_ERR(EFAULT);
1744                                 }
1745                         } else {
1746                                 /* Texture image width is less than the minimum, so we
1747                                  * need to pad out each image scanline to the minimum
1748                                  * width.
1749                                  */
1750                                 for (i = 0; i < tex->height; i++) {
1751                                         if (DRM_COPY_FROM_USER
1752                                             (buffer, data, tex_width)) {
1753                                                 DRM_ERROR
1754                                                     ("EFAULT on pad, %d bytes\n",
1755                                                      tex_width);
1756                                                 return DRM_ERR(EFAULT);
1757                                         }
1758                                         buffer += 8;
1759                                         data += tex_width;
1760                                 }
1761                         }
1762                 }
1763
1764                 buf->filp = filp;
1765                 buf->used = size;
1766                 offset = dev_priv->gart_buffers_offset + buf->offset;
1767                 BEGIN_RING(9);
1768                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1769                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1770                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1771                          RADEON_GMC_BRUSH_NONE |
1772                          (format << 8) |
1773                          RADEON_GMC_SRC_DATATYPE_COLOR |
1774                          RADEON_ROP3_S |
1775                          RADEON_DP_SRC_SOURCE_MEMORY |
1776                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1777                 OUT_RING((spitch << 22) | (offset >> 10));
1778                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1779                 OUT_RING(0);
1780                 OUT_RING((image->x << 16) | image->y);
1781                 OUT_RING((image->width << 16) | height);
1782                 RADEON_WAIT_UNTIL_2D_IDLE();
1783                 ADVANCE_RING();
1784
1785                 radeon_cp_discard_buffer(dev, buf);
1786
1787                 /* Update the input parameters for next time */
1788                 image->y += height;
1789                 image->height -= height;
1790                 image->data = (const u8 __user *)image->data + size;
1791         } while (image->height > 0);
1792
1793         /* Flush the pixel cache after the blit completes.  This ensures
1794          * the texture data is written out to memory before rendering
1795          * continues.
1796          */
1797         BEGIN_RING(4);
1798         RADEON_FLUSH_CACHE();
1799         RADEON_WAIT_UNTIL_2D_IDLE();
1800         ADVANCE_RING();
1801         return 0;
1802 }
1803
1804 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1805 {
1806         drm_radeon_private_t *dev_priv = dev->dev_private;
1807         int i;
1808         RING_LOCALS;
1809         DRM_DEBUG("\n");
1810
1811         BEGIN_RING(35);
1812
1813         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1814         OUT_RING(0x00000000);
1815
1816         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1817         for (i = 0; i < 32; i++) {
1818                 OUT_RING(stipple[i]);
1819         }
1820
1821         ADVANCE_RING();
1822 }
1823
1824 static void radeon_apply_surface_regs(int surf_index,
1825                                       drm_radeon_private_t * dev_priv)
1826 {
1827         if (!dev_priv->mmio)
1828                 return;
1829
1830         radeon_do_cp_idle(dev_priv);
1831
1832         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1833                      dev_priv->surfaces[surf_index].flags);
1834         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1835                      dev_priv->surfaces[surf_index].lower);
1836         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1837                      dev_priv->surfaces[surf_index].upper);
1838 }
1839
1840 /* Allocates a virtual surface
1841  * doesn't always allocate a real surface, will stretch an existing
1842  * surface when possible.
1843  *
1844  * Note that refcount can be at most 2, since during a free refcount=3
1845  * might mean we have to allocate a new surface which might not always
1846  * be available.
1847  * For example : we allocate three contigous surfaces ABC. If B is
1848  * freed, we suddenly need two surfaces to store A and C, which might
1849  * not always be available.
1850  */
1851 static int alloc_surface(drm_radeon_surface_alloc_t * new,
1852                          drm_radeon_private_t * dev_priv, DRMFILE filp)
1853 {
1854         struct radeon_virt_surface *s;
1855         int i;
1856         int virt_surface_index;
1857         uint32_t new_upper, new_lower;
1858
1859         new_lower = new->address;
1860         new_upper = new_lower + new->size - 1;
1861
1862         /* sanity check */
1863         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1864             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1865              RADEON_SURF_ADDRESS_FIXED_MASK)
1866             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1867                 return -1;
1868
1869         /* make sure there is no overlap with existing surfaces */
1870         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1871                 if ((dev_priv->surfaces[i].refcount != 0) &&
1872                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1873                       (new_lower < dev_priv->surfaces[i].upper)) ||
1874                      ((new_lower < dev_priv->surfaces[i].lower) &&
1875                       (new_upper > dev_priv->surfaces[i].lower)))) {
1876                         return -1;
1877                 }
1878         }
1879
1880         /* find a virtual surface */
1881         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1882                 if (dev_priv->virt_surfaces[i].filp == 0)
1883                         break;
1884         if (i == 2 * RADEON_MAX_SURFACES) {
1885                 return -1;
1886         }
1887         virt_surface_index = i;
1888
1889         /* try to reuse an existing surface */
1890         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1891                 /* extend before */
1892                 if ((dev_priv->surfaces[i].refcount == 1) &&
1893                     (new->flags == dev_priv->surfaces[i].flags) &&
1894                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1895                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1896                         s->surface_index = i;
1897                         s->lower = new_lower;
1898                         s->upper = new_upper;
1899                         s->flags = new->flags;
1900                         s->filp = filp;
1901                         dev_priv->surfaces[i].refcount++;
1902                         dev_priv->surfaces[i].lower = s->lower;
1903                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1904                         return virt_surface_index;
1905                 }
1906
1907                 /* extend after */
1908                 if ((dev_priv->surfaces[i].refcount == 1) &&
1909                     (new->flags == dev_priv->surfaces[i].flags) &&
1910                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1911                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1912                         s->surface_index = i;
1913                         s->lower = new_lower;
1914                         s->upper = new_upper;
1915                         s->flags = new->flags;
1916                         s->filp = filp;
1917                         dev_priv->surfaces[i].refcount++;
1918                         dev_priv->surfaces[i].upper = s->upper;
1919                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1920                         return virt_surface_index;
1921                 }
1922         }
1923
1924         /* okay, we need a new one */
1925         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1926                 if (dev_priv->surfaces[i].refcount == 0) {
1927                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1928                         s->surface_index = i;
1929                         s->lower = new_lower;
1930                         s->upper = new_upper;
1931                         s->flags = new->flags;
1932                         s->filp = filp;
1933                         dev_priv->surfaces[i].refcount = 1;
1934                         dev_priv->surfaces[i].lower = s->lower;
1935                         dev_priv->surfaces[i].upper = s->upper;
1936                         dev_priv->surfaces[i].flags = s->flags;
1937                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1938                         return virt_surface_index;
1939                 }
1940         }
1941
1942         /* we didn't find anything */
1943         return -1;
1944 }
1945
1946 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1947                         int lower)
1948 {
1949         struct radeon_virt_surface *s;
1950         int i;
1951         /* find the virtual surface */
1952         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1953                 s = &(dev_priv->virt_surfaces[i]);
1954                 if (s->filp) {
1955                         if ((lower == s->lower) && (filp == s->filp)) {
1956                                 if (dev_priv->surfaces[s->surface_index].
1957                                     lower == s->lower)
1958                                         dev_priv->surfaces[s->surface_index].
1959                                             lower = s->upper;
1960
1961                                 if (dev_priv->surfaces[s->surface_index].
1962                                     upper == s->upper)
1963                                         dev_priv->surfaces[s->surface_index].
1964                                             upper = s->lower;
1965
1966                                 dev_priv->surfaces[s->surface_index].refcount--;
1967                                 if (dev_priv->surfaces[s->surface_index].
1968                                     refcount == 0)
1969                                         dev_priv->surfaces[s->surface_index].
1970                                             flags = 0;
1971                                 s->filp = NULL;
1972                                 radeon_apply_surface_regs(s->surface_index,
1973                                                           dev_priv);
1974                                 return 0;
1975                         }
1976                 }
1977         }
1978         return 1;
1979 }
1980
1981 static void radeon_surfaces_release(DRMFILE filp,
1982                                     drm_radeon_private_t * dev_priv)
1983 {
1984         int i;
1985         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1986                 if (dev_priv->virt_surfaces[i].filp == filp)
1987                         free_surface(filp, dev_priv,
1988                                      dev_priv->virt_surfaces[i].lower);
1989         }
1990 }
1991
1992 /* ================================================================
1993  * IOCTL functions
1994  */
1995 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1996 {
1997         DRM_DEVICE;
1998         drm_radeon_private_t *dev_priv = dev->dev_private;
1999         drm_radeon_surface_alloc_t alloc;
2000
2001         if (!dev_priv) {
2002                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2003                 return DRM_ERR(EINVAL);
2004         }
2005
2006         DRM_COPY_FROM_USER_IOCTL(alloc,
2007                                  (drm_radeon_surface_alloc_t __user *) data,
2008                                  sizeof(alloc));
2009
2010         if (alloc_surface(&alloc, dev_priv, filp) == -1)
2011                 return DRM_ERR(EINVAL);
2012         else
2013                 return 0;
2014 }
2015
2016 static int radeon_surface_free(DRM_IOCTL_ARGS)
2017 {
2018         DRM_DEVICE;
2019         drm_radeon_private_t *dev_priv = dev->dev_private;
2020         drm_radeon_surface_free_t memfree;
2021
2022         if (!dev_priv) {
2023                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2024                 return DRM_ERR(EINVAL);
2025         }
2026
2027         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
2028                                  sizeof(memfree));
2029
2030         if (free_surface(filp, dev_priv, memfree.address))
2031                 return DRM_ERR(EINVAL);
2032         else
2033                 return 0;
2034 }
2035
2036 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2037 {
2038         DRM_DEVICE;
2039         drm_radeon_private_t *dev_priv = dev->dev_private;
2040         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2041         drm_radeon_clear_t clear;
2042         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2043         DRM_DEBUG("\n");
2044
2045         LOCK_TEST_WITH_RETURN(dev, filp);
2046
2047         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2048                                  sizeof(clear));
2049
2050         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2051
2052         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2053                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2054
2055         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2056                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2057                 return DRM_ERR(EFAULT);
2058
2059         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2060
2061         COMMIT_RING();
2062         return 0;
2063 }
2064
2065 /* Not sure why this isn't set all the time:
2066  */
2067 static int radeon_do_init_pageflip(drm_device_t * dev)
2068 {
2069         drm_radeon_private_t *dev_priv = dev->dev_private;
2070         RING_LOCALS;
2071
2072         DRM_DEBUG("\n");
2073
2074         BEGIN_RING(6);
2075         RADEON_WAIT_UNTIL_3D_IDLE();
2076         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2077         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2078                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2079         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2080         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2081                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2082         ADVANCE_RING();
2083
2084         dev_priv->page_flipping = 1;
2085         dev_priv->current_page = 0;
2086         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2087
2088         return 0;
2089 }
2090
2091 /* Called whenever a client dies, from drm_release.
2092  * NOTE:  Lock isn't necessarily held when this is called!
2093  */
2094 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2095 {
2096         drm_radeon_private_t *dev_priv = dev->dev_private;
2097         DRM_DEBUG("\n");
2098
2099         if (dev_priv->current_page != 0)
2100                 radeon_cp_dispatch_flip(dev);
2101
2102         dev_priv->page_flipping = 0;
2103         return 0;
2104 }
2105
2106 /* Swapping and flipping are different operations, need different ioctls.
2107  * They can & should be intermixed to support multiple 3d windows.
2108  */
2109 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2110 {
2111         DRM_DEVICE;
2112         drm_radeon_private_t *dev_priv = dev->dev_private;
2113         DRM_DEBUG("\n");
2114
2115         LOCK_TEST_WITH_RETURN(dev, filp);
2116
2117         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2118
2119         if (!dev_priv->page_flipping)
2120                 radeon_do_init_pageflip(dev);
2121
2122         radeon_cp_dispatch_flip(dev);
2123
2124         COMMIT_RING();
2125         return 0;
2126 }
2127
2128 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2129 {
2130         DRM_DEVICE;
2131         drm_radeon_private_t *dev_priv = dev->dev_private;
2132         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2133         DRM_DEBUG("\n");
2134
2135         LOCK_TEST_WITH_RETURN(dev, filp);
2136
2137         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2138
2139         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2140                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2141
2142         radeon_cp_dispatch_swap(dev);
2143         dev_priv->sarea_priv->ctx_owner = 0;
2144
2145         COMMIT_RING();
2146         return 0;
2147 }
2148
2149 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2150 {
2151         DRM_DEVICE;
2152         drm_radeon_private_t *dev_priv = dev->dev_private;
2153         drm_file_t *filp_priv;
2154         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2155         drm_device_dma_t *dma = dev->dma;
2156         drm_buf_t *buf;
2157         drm_radeon_vertex_t vertex;
2158         drm_radeon_tcl_prim_t prim;
2159
2160         LOCK_TEST_WITH_RETURN(dev, filp);
2161
2162         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2163
2164         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2165                                  sizeof(vertex));
2166
2167         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2168                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2169
2170         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2171                 DRM_ERROR("buffer index %d (of %d max)\n",
2172                           vertex.idx, dma->buf_count - 1);
2173                 return DRM_ERR(EINVAL);
2174         }
2175         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2176                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2177                 return DRM_ERR(EINVAL);
2178         }
2179
2180         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2181         VB_AGE_TEST_WITH_RETURN(dev_priv);
2182
2183         buf = dma->buflist[vertex.idx];
2184
2185         if (buf->filp != filp) {
2186                 DRM_ERROR("process %d using buffer owned by %p\n",
2187                           DRM_CURRENTPID, buf->filp);
2188                 return DRM_ERR(EINVAL);
2189         }
2190         if (buf->pending) {
2191                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2192                 return DRM_ERR(EINVAL);
2193         }
2194
2195         /* Build up a prim_t record:
2196          */
2197         if (vertex.count) {
2198                 buf->used = vertex.count;       /* not used? */
2199
2200                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2201                         if (radeon_emit_state(dev_priv, filp_priv,
2202                                               &sarea_priv->context_state,
2203                                               sarea_priv->tex_state,
2204                                               sarea_priv->dirty)) {
2205                                 DRM_ERROR("radeon_emit_state failed\n");
2206                                 return DRM_ERR(EINVAL);
2207                         }
2208
2209                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2210                                                RADEON_UPLOAD_TEX1IMAGES |
2211                                                RADEON_UPLOAD_TEX2IMAGES |
2212                                                RADEON_REQUIRE_QUIESCENCE);
2213                 }
2214
2215                 prim.start = 0;
2216                 prim.finish = vertex.count;     /* unused */
2217                 prim.prim = vertex.prim;
2218                 prim.numverts = vertex.count;
2219                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2220
2221                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2222         }
2223
2224         if (vertex.discard) {
2225                 radeon_cp_discard_buffer(dev, buf);
2226         }
2227
2228         COMMIT_RING();
2229         return 0;
2230 }
2231
2232 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2233 {
2234         DRM_DEVICE;
2235         drm_radeon_private_t *dev_priv = dev->dev_private;
2236         drm_file_t *filp_priv;
2237         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2238         drm_device_dma_t *dma = dev->dma;
2239         drm_buf_t *buf;
2240         drm_radeon_indices_t elts;
2241         drm_radeon_tcl_prim_t prim;
2242         int count;
2243
2244         LOCK_TEST_WITH_RETURN(dev, filp);
2245
2246         if (!dev_priv) {
2247                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2248                 return DRM_ERR(EINVAL);
2249         }
2250
2251         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2252
2253         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2254                                  sizeof(elts));
2255
2256         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2257                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2258
2259         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2260                 DRM_ERROR("buffer index %d (of %d max)\n",
2261                           elts.idx, dma->buf_count - 1);
2262                 return DRM_ERR(EINVAL);
2263         }
2264         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2265                 DRM_ERROR("buffer prim %d\n", elts.prim);
2266                 return DRM_ERR(EINVAL);
2267         }
2268
2269         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2270         VB_AGE_TEST_WITH_RETURN(dev_priv);
2271
2272         buf = dma->buflist[elts.idx];
2273
2274         if (buf->filp != filp) {
2275                 DRM_ERROR("process %d using buffer owned by %p\n",
2276                           DRM_CURRENTPID, buf->filp);
2277                 return DRM_ERR(EINVAL);
2278         }
2279         if (buf->pending) {
2280                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2281                 return DRM_ERR(EINVAL);
2282         }
2283
2284         count = (elts.end - elts.start) / sizeof(u16);
2285         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2286
2287         if (elts.start & 0x7) {
2288                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2289                 return DRM_ERR(EINVAL);
2290         }
2291         if (elts.start < buf->used) {
2292                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2293                 return DRM_ERR(EINVAL);
2294         }
2295
2296         buf->used = elts.end;
2297
2298         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2299                 if (radeon_emit_state(dev_priv, filp_priv,
2300                                       &sarea_priv->context_state,
2301                                       sarea_priv->tex_state,
2302                                       sarea_priv->dirty)) {
2303                         DRM_ERROR("radeon_emit_state failed\n");
2304                         return DRM_ERR(EINVAL);
2305                 }
2306
2307                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2308                                        RADEON_UPLOAD_TEX1IMAGES |
2309                                        RADEON_UPLOAD_TEX2IMAGES |
2310                                        RADEON_REQUIRE_QUIESCENCE);
2311         }
2312
2313         /* Build up a prim_t record:
2314          */
2315         prim.start = elts.start;
2316         prim.finish = elts.end;
2317         prim.prim = elts.prim;
2318         prim.offset = 0;        /* offset from start of dma buffers */
2319         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2320         prim.vc_format = dev_priv->sarea_priv->vc_format;
2321
2322         radeon_cp_dispatch_indices(dev, buf, &prim);
2323         if (elts.discard) {
2324                 radeon_cp_discard_buffer(dev, buf);
2325         }
2326
2327         COMMIT_RING();
2328         return 0;
2329 }
2330
2331 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2332 {
2333         DRM_DEVICE;
2334         drm_radeon_private_t *dev_priv = dev->dev_private;
2335         drm_radeon_texture_t tex;
2336         drm_radeon_tex_image_t image;
2337         int ret;
2338
2339         LOCK_TEST_WITH_RETURN(dev, filp);
2340
2341         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2342                                  sizeof(tex));
2343
2344         if (tex.image == NULL) {
2345                 DRM_ERROR("null texture image!\n");
2346                 return DRM_ERR(EINVAL);
2347         }
2348
2349         if (DRM_COPY_FROM_USER(&image,
2350                                (drm_radeon_tex_image_t __user *) tex.image,
2351                                sizeof(image)))
2352                 return DRM_ERR(EFAULT);
2353
2354         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2355         VB_AGE_TEST_WITH_RETURN(dev_priv);
2356
2357         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2358
2359         COMMIT_RING();
2360         return ret;
2361 }
2362
2363 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2364 {
2365         DRM_DEVICE;
2366         drm_radeon_private_t *dev_priv = dev->dev_private;
2367         drm_radeon_stipple_t stipple;
2368         u32 mask[32];
2369
2370         LOCK_TEST_WITH_RETURN(dev, filp);
2371
2372         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2373                                  sizeof(stipple));
2374
2375         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2376                 return DRM_ERR(EFAULT);
2377
2378         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2379
2380         radeon_cp_dispatch_stipple(dev, mask);
2381
2382         COMMIT_RING();
2383         return 0;
2384 }
2385
2386 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2387 {
2388         DRM_DEVICE;
2389         drm_radeon_private_t *dev_priv = dev->dev_private;
2390         drm_device_dma_t *dma = dev->dma;
2391         drm_buf_t *buf;
2392         drm_radeon_indirect_t indirect;
2393         RING_LOCALS;
2394
2395         LOCK_TEST_WITH_RETURN(dev, filp);
2396
2397         if (!dev_priv) {
2398                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2399                 return DRM_ERR(EINVAL);
2400         }
2401
2402         DRM_COPY_FROM_USER_IOCTL(indirect,
2403                                  (drm_radeon_indirect_t __user *) data,
2404                                  sizeof(indirect));
2405
2406         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2407                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2408
2409         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2410                 DRM_ERROR("buffer index %d (of %d max)\n",
2411                           indirect.idx, dma->buf_count - 1);
2412                 return DRM_ERR(EINVAL);
2413         }
2414
2415         buf = dma->buflist[indirect.idx];
2416
2417         if (buf->filp != filp) {
2418                 DRM_ERROR("process %d using buffer owned by %p\n",
2419                           DRM_CURRENTPID, buf->filp);
2420                 return DRM_ERR(EINVAL);
2421         }
2422         if (buf->pending) {
2423                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2424                 return DRM_ERR(EINVAL);
2425         }
2426
2427         if (indirect.start < buf->used) {
2428                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2429                           indirect.start, buf->used);
2430                 return DRM_ERR(EINVAL);
2431         }
2432
2433         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2434         VB_AGE_TEST_WITH_RETURN(dev_priv);
2435
2436         buf->used = indirect.end;
2437
2438         /* Wait for the 3D stream to idle before the indirect buffer
2439          * containing 2D acceleration commands is processed.
2440          */
2441         BEGIN_RING(2);
2442
2443         RADEON_WAIT_UNTIL_3D_IDLE();
2444
2445         ADVANCE_RING();
2446
2447         /* Dispatch the indirect buffer full of commands from the
2448          * X server.  This is insecure and is thus only available to
2449          * privileged clients.
2450          */
2451         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2452         if (indirect.discard) {
2453                 radeon_cp_discard_buffer(dev, buf);
2454         }
2455
2456         COMMIT_RING();
2457         return 0;
2458 }
2459
2460 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2461 {
2462         DRM_DEVICE;
2463         drm_radeon_private_t *dev_priv = dev->dev_private;
2464         drm_file_t *filp_priv;
2465         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2466         drm_device_dma_t *dma = dev->dma;
2467         drm_buf_t *buf;
2468         drm_radeon_vertex2_t vertex;
2469         int i;
2470         unsigned char laststate;
2471
2472         LOCK_TEST_WITH_RETURN(dev, filp);
2473
2474         if (!dev_priv) {
2475                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2476                 return DRM_ERR(EINVAL);
2477         }
2478
2479         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2480
2481         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2482                                  sizeof(vertex));
2483
2484         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2485                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2486
2487         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2488                 DRM_ERROR("buffer index %d (of %d max)\n",
2489                           vertex.idx, dma->buf_count - 1);
2490                 return DRM_ERR(EINVAL);
2491         }
2492
2493         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2494         VB_AGE_TEST_WITH_RETURN(dev_priv);
2495
2496         buf = dma->buflist[vertex.idx];
2497
2498         if (buf->filp != filp) {
2499                 DRM_ERROR("process %d using buffer owned by %p\n",
2500                           DRM_CURRENTPID, buf->filp);
2501                 return DRM_ERR(EINVAL);
2502         }
2503
2504         if (buf->pending) {
2505                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2506                 return DRM_ERR(EINVAL);
2507         }
2508
2509         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2510                 return DRM_ERR(EINVAL);
2511
2512         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2513                 drm_radeon_prim_t prim;
2514                 drm_radeon_tcl_prim_t tclprim;
2515
2516                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2517                         return DRM_ERR(EFAULT);
2518
2519                 if (prim.stateidx != laststate) {
2520                         drm_radeon_state_t state;
2521
2522                         if (DRM_COPY_FROM_USER(&state,
2523                                                &vertex.state[prim.stateidx],
2524                                                sizeof(state)))
2525                                 return DRM_ERR(EFAULT);
2526
2527                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2528                                 DRM_ERROR("radeon_emit_state2 failed\n");
2529                                 return DRM_ERR(EINVAL);
2530                         }
2531
2532                         laststate = prim.stateidx;
2533                 }
2534
2535                 tclprim.start = prim.start;
2536                 tclprim.finish = prim.finish;
2537                 tclprim.prim = prim.prim;
2538                 tclprim.vc_format = prim.vc_format;
2539
2540                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2541                         tclprim.offset = prim.numverts * 64;
2542                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2543
2544                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2545                 } else {
2546                         tclprim.numverts = prim.numverts;
2547                         tclprim.offset = 0;     /* not used */
2548
2549                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2550                 }
2551
2552                 if (sarea_priv->nbox == 1)
2553                         sarea_priv->nbox = 0;
2554         }
2555
2556         if (vertex.discard) {
2557                 radeon_cp_discard_buffer(dev, buf);
2558         }
2559
2560         COMMIT_RING();
2561         return 0;
2562 }
2563
2564 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2565                                drm_file_t * filp_priv,
2566                                drm_radeon_cmd_header_t header,
2567                                drm_radeon_cmd_buffer_t * cmdbuf)
2568 {
2569         int id = (int)header.packet.packet_id;
2570         int sz, reg;
2571         int *data = (int *)cmdbuf->buf;
2572         RING_LOCALS;
2573
2574         if (id >= RADEON_MAX_STATE_PACKETS)
2575                 return DRM_ERR(EINVAL);
2576
2577         sz = packet[id].len;
2578         reg = packet[id].start;
2579
2580         if (sz * sizeof(int) > cmdbuf->bufsz) {
2581                 DRM_ERROR("Packet size provided larger than data provided\n");
2582                 return DRM_ERR(EINVAL);
2583         }
2584
2585         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2586                 DRM_ERROR("Packet verification failed\n");
2587                 return DRM_ERR(EINVAL);
2588         }
2589
2590         BEGIN_RING(sz + 1);
2591         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2592         OUT_RING_TABLE(data, sz);
2593         ADVANCE_RING();
2594
2595         cmdbuf->buf += sz * sizeof(int);
2596         cmdbuf->bufsz -= sz * sizeof(int);
2597         return 0;
2598 }
2599
2600 static __inline__ int radeon_emit_scalars(drm_radeon_private_t * dev_priv,
2601                                           drm_radeon_cmd_header_t header,
2602                                           drm_radeon_cmd_buffer_t * cmdbuf)
2603 {
2604         int sz = header.scalars.count;
2605         int start = header.scalars.offset;
2606         int stride = header.scalars.stride;
2607         RING_LOCALS;
2608
2609         BEGIN_RING(3 + sz);
2610         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2611         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2612         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2613         OUT_RING_TABLE(cmdbuf->buf, sz);
2614         ADVANCE_RING();
2615         cmdbuf->buf += sz * sizeof(int);
2616         cmdbuf->bufsz -= sz * sizeof(int);
2617         return 0;
2618 }
2619
2620 /* God this is ugly
2621  */
2622 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t * dev_priv,
2623                                            drm_radeon_cmd_header_t header,
2624                                            drm_radeon_cmd_buffer_t * cmdbuf)
2625 {
2626         int sz = header.scalars.count;
2627         int start = ((unsigned int)header.scalars.offset) + 0x100;
2628         int stride = header.scalars.stride;
2629         RING_LOCALS;
2630
2631         BEGIN_RING(3 + sz);
2632         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2633         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2634         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2635         OUT_RING_TABLE(cmdbuf->buf, sz);
2636         ADVANCE_RING();
2637         cmdbuf->buf += sz * sizeof(int);
2638         cmdbuf->bufsz -= sz * sizeof(int);
2639         return 0;
2640 }
2641
2642 static __inline__ int radeon_emit_vectors(drm_radeon_private_t * dev_priv,
2643                                           drm_radeon_cmd_header_t header,
2644                                           drm_radeon_cmd_buffer_t * cmdbuf)
2645 {
2646         int sz = header.vectors.count;
2647         int start = header.vectors.offset;
2648         int stride = header.vectors.stride;
2649         RING_LOCALS;
2650
2651         BEGIN_RING(3 + sz);
2652         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2653         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2654         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2655         OUT_RING_TABLE(cmdbuf->buf, sz);
2656         ADVANCE_RING();
2657
2658         cmdbuf->buf += sz * sizeof(int);
2659         cmdbuf->bufsz -= sz * sizeof(int);
2660         return 0;
2661 }
2662
2663 static int radeon_emit_packet3(drm_device_t * dev,
2664                                drm_file_t * filp_priv,
2665                                drm_radeon_cmd_buffer_t * cmdbuf)
2666 {
2667         drm_radeon_private_t *dev_priv = dev->dev_private;
2668         unsigned int cmdsz;
2669         int ret;
2670         RING_LOCALS;
2671
2672         DRM_DEBUG("\n");
2673
2674         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2675                                                   cmdbuf, &cmdsz))) {
2676                 DRM_ERROR("Packet verification failed\n");
2677                 return ret;
2678         }
2679
2680         BEGIN_RING(cmdsz);
2681         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2682         ADVANCE_RING();
2683
2684         cmdbuf->buf += cmdsz * 4;
2685         cmdbuf->bufsz -= cmdsz * 4;
2686         return 0;
2687 }
2688
2689 static int radeon_emit_packet3_cliprect(drm_device_t * dev,
2690                                         drm_file_t * filp_priv,
2691                                         drm_radeon_cmd_buffer_t * cmdbuf,
2692                                         int orig_nbox)
2693 {
2694         drm_radeon_private_t *dev_priv = dev->dev_private;
2695         drm_clip_rect_t box;
2696         unsigned int cmdsz;
2697         int ret;
2698         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2699         int i = 0;
2700         RING_LOCALS;
2701
2702         DRM_DEBUG("\n");
2703
2704         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2705                                                   cmdbuf, &cmdsz))) {
2706                 DRM_ERROR("Packet verification failed\n");
2707                 return ret;
2708         }
2709
2710         if (!orig_nbox)
2711                 goto out;
2712
2713         do {
2714                 if (i < cmdbuf->nbox) {
2715                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2716                                 return DRM_ERR(EFAULT);
2717                         /* FIXME The second and subsequent times round
2718                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2719                          * calling emit_clip_rect(). This fixes a
2720                          * lockup on fast machines when sending
2721                          * several cliprects with a cmdbuf, as when
2722                          * waving a 2D window over a 3D
2723                          * window. Something in the commands from user
2724                          * space seems to hang the card when they're
2725                          * sent several times in a row. That would be
2726                          * the correct place to fix it but this works
2727                          * around it until I can figure that out - Tim
2728                          * Smith */
2729                         if (i) {
2730                                 BEGIN_RING(2);
2731                                 RADEON_WAIT_UNTIL_3D_IDLE();
2732                                 ADVANCE_RING();
2733                         }
2734                         radeon_emit_clip_rect(dev_priv, &box);
2735                 }
2736
2737                 BEGIN_RING(cmdsz);
2738                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2739                 ADVANCE_RING();
2740
2741         } while (++i < cmdbuf->nbox);
2742         if (cmdbuf->nbox == 1)
2743                 cmdbuf->nbox = 0;
2744
2745       out:
2746         cmdbuf->buf += cmdsz * 4;
2747         cmdbuf->bufsz -= cmdsz * 4;
2748         return 0;
2749 }
2750
2751 static int radeon_emit_wait(drm_device_t * dev, int flags)
2752 {
2753         drm_radeon_private_t *dev_priv = dev->dev_private;
2754         RING_LOCALS;
2755
2756         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2757         switch (flags) {
2758         case RADEON_WAIT_2D:
2759                 BEGIN_RING(2);
2760                 RADEON_WAIT_UNTIL_2D_IDLE();
2761                 ADVANCE_RING();
2762                 break;
2763         case RADEON_WAIT_3D:
2764                 BEGIN_RING(2);
2765                 RADEON_WAIT_UNTIL_3D_IDLE();
2766                 ADVANCE_RING();
2767                 break;
2768         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2769                 BEGIN_RING(2);
2770                 RADEON_WAIT_UNTIL_IDLE();
2771                 ADVANCE_RING();
2772                 break;
2773         default:
2774                 return DRM_ERR(EINVAL);
2775         }
2776
2777         return 0;
2778 }
2779
2780 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2781 {
2782         DRM_DEVICE;
2783         drm_radeon_private_t *dev_priv = dev->dev_private;
2784         drm_file_t *filp_priv;
2785         drm_device_dma_t *dma = dev->dma;
2786         drm_buf_t *buf = NULL;
2787         int idx;
2788         drm_radeon_cmd_buffer_t cmdbuf;
2789         drm_radeon_cmd_header_t header;
2790         int orig_nbox, orig_bufsz;
2791         char *kbuf = NULL;
2792
2793         LOCK_TEST_WITH_RETURN(dev, filp);
2794
2795         if (!dev_priv) {
2796                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2797                 return DRM_ERR(EINVAL);
2798         }
2799
2800         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2801
2802         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2803                                  (drm_radeon_cmd_buffer_t __user *) data,
2804                                  sizeof(cmdbuf));
2805
2806         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2807         VB_AGE_TEST_WITH_RETURN(dev_priv);
2808
2809         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2810                 return DRM_ERR(EINVAL);
2811         }
2812
2813         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2814          * races between checking values and using those values in other code,
2815          * and simply to avoid a lot of function calls to copy in data.
2816          */
2817         orig_bufsz = cmdbuf.bufsz;
2818         if (orig_bufsz != 0) {
2819                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2820                 if (kbuf == NULL)
2821                         return DRM_ERR(ENOMEM);
2822                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2823                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2824                         return DRM_ERR(EFAULT);
2825                 }
2826                 cmdbuf.buf = kbuf;
2827         }
2828
2829         orig_nbox = cmdbuf.nbox;
2830
2831         if (dev_priv->microcode_version == UCODE_R300) {
2832                 int temp;
2833                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2834
2835                 if (orig_bufsz != 0)
2836                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2837
2838                 return temp;
2839         }
2840
2841         /* microcode_version != r300 */
2842         while (cmdbuf.bufsz >= sizeof(header)) {
2843
2844                 header.i = *(int *)cmdbuf.buf;
2845                 cmdbuf.buf += sizeof(header);
2846                 cmdbuf.bufsz -= sizeof(header);
2847
2848                 switch (header.header.cmd_type) {
2849                 case RADEON_CMD_PACKET:
2850                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2851                         if (radeon_emit_packets
2852                             (dev_priv, filp_priv, header, &cmdbuf)) {
2853                                 DRM_ERROR("radeon_emit_packets failed\n");
2854                                 goto err;
2855                         }
2856                         break;
2857
2858                 case RADEON_CMD_SCALARS:
2859                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2860                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2861                                 DRM_ERROR("radeon_emit_scalars failed\n");
2862                                 goto err;
2863                         }
2864                         break;
2865
2866                 case RADEON_CMD_VECTORS:
2867                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2868                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2869                                 DRM_ERROR("radeon_emit_vectors failed\n");
2870                                 goto err;
2871                         }
2872                         break;
2873
2874                 case RADEON_CMD_DMA_DISCARD:
2875                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2876                         idx = header.dma.buf_idx;
2877                         if (idx < 0 || idx >= dma->buf_count) {
2878                                 DRM_ERROR("buffer index %d (of %d max)\n",
2879                                           idx, dma->buf_count - 1);
2880                                 goto err;
2881                         }
2882
2883                         buf = dma->buflist[idx];
2884                         if (buf->filp != filp || buf->pending) {
2885                                 DRM_ERROR("bad buffer %p %p %d\n",
2886                                           buf->filp, filp, buf->pending);
2887                                 goto err;
2888                         }
2889
2890                         radeon_cp_discard_buffer(dev, buf);
2891                         break;
2892
2893                 case RADEON_CMD_PACKET3:
2894                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2895                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2896                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2897                                 goto err;
2898                         }
2899                         break;
2900
2901                 case RADEON_CMD_PACKET3_CLIP:
2902                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2903                         if (radeon_emit_packet3_cliprect
2904                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2905                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2906                                 goto err;
2907                         }
2908                         break;
2909
2910                 case RADEON_CMD_SCALARS2:
2911                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2912                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2913                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2914                                 goto err;
2915                         }
2916                         break;
2917
2918                 case RADEON_CMD_WAIT:
2919                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2920                         if (radeon_emit_wait(dev, header.wait.flags)) {
2921                                 DRM_ERROR("radeon_emit_wait failed\n");
2922                                 goto err;
2923                         }
2924                         break;
2925                 default:
2926                         DRM_ERROR("bad cmd_type %d at %p\n",
2927                                   header.header.cmd_type,
2928                                   cmdbuf.buf - sizeof(header));
2929                         goto err;
2930                 }
2931         }
2932
2933         if (orig_bufsz != 0)
2934                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2935
2936         DRM_DEBUG("DONE\n");
2937         COMMIT_RING();
2938         return 0;
2939
2940       err:
2941         if (orig_bufsz != 0)
2942                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2943         return DRM_ERR(EINVAL);
2944 }
2945
2946 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2947 {
2948         DRM_DEVICE;
2949         drm_radeon_private_t *dev_priv = dev->dev_private;
2950         drm_radeon_getparam_t param;
2951         int value;
2952
2953         if (!dev_priv) {
2954                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2955                 return DRM_ERR(EINVAL);
2956         }
2957
2958         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2959                                  sizeof(param));
2960
2961         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2962
2963         switch (param.param) {
2964         case RADEON_PARAM_GART_BUFFER_OFFSET:
2965                 value = dev_priv->gart_buffers_offset;
2966                 break;
2967         case RADEON_PARAM_LAST_FRAME:
2968                 dev_priv->stats.last_frame_reads++;
2969                 value = GET_SCRATCH(0);
2970                 break;
2971         case RADEON_PARAM_LAST_DISPATCH:
2972                 value = GET_SCRATCH(1);
2973                 break;
2974         case RADEON_PARAM_LAST_CLEAR:
2975                 dev_priv->stats.last_clear_reads++;
2976                 value = GET_SCRATCH(2);
2977                 break;
2978         case RADEON_PARAM_IRQ_NR:
2979                 value = dev->irq;
2980                 break;
2981         case RADEON_PARAM_GART_BASE:
2982                 value = dev_priv->gart_vm_start;
2983                 break;
2984         case RADEON_PARAM_REGISTER_HANDLE:
2985                 value = dev_priv->mmio_offset;
2986                 break;
2987         case RADEON_PARAM_STATUS_HANDLE:
2988                 value = dev_priv->ring_rptr_offset;
2989                 break;
2990 #if BITS_PER_LONG == 32
2991                 /*
2992                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2993                  * pointer which can't fit into an int-sized variable.  According to
2994                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2995                  * not supporting it shouldn't be a problem.  If the same functionality
2996                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2997                  * so backwards-compatibility for the embedded platforms can be
2998                  * maintained.  --davidm 4-Feb-2004.
2999                  */
3000         case RADEON_PARAM_SAREA_HANDLE:
3001                 /* The lock is the first dword in the sarea. */
3002                 value = (long)dev->lock.hw_lock;
3003                 break;
3004 #endif
3005         case RADEON_PARAM_GART_TEX_HANDLE:
3006                 value = dev_priv->gart_textures_offset;
3007                 break;
3008         default:
3009                 return DRM_ERR(EINVAL);
3010         }
3011
3012         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3013                 DRM_ERROR("copy_to_user\n");
3014                 return DRM_ERR(EFAULT);
3015         }
3016
3017         return 0;
3018 }
3019
3020 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3021 {
3022         DRM_DEVICE;
3023         drm_radeon_private_t *dev_priv = dev->dev_private;
3024         drm_file_t *filp_priv;
3025         drm_radeon_setparam_t sp;
3026         struct drm_radeon_driver_file_fields *radeon_priv;
3027
3028         if (!dev_priv) {
3029                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3030                 return DRM_ERR(EINVAL);
3031         }
3032
3033         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3034
3035         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3036                                  sizeof(sp));
3037
3038         switch (sp.param) {
3039         case RADEON_SETPARAM_FB_LOCATION:
3040                 radeon_priv = filp_priv->driver_priv;
3041                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3042                 break;
3043         case RADEON_SETPARAM_SWITCH_TILING:
3044                 if (sp.value == 0) {
3045                         DRM_DEBUG("color tiling disabled\n");
3046                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3047                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3048                         dev_priv->sarea_priv->tiling_enabled = 0;
3049                 } else if (sp.value == 1) {
3050                         DRM_DEBUG("color tiling enabled\n");
3051                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3052                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3053                         dev_priv->sarea_priv->tiling_enabled = 1;
3054                 }
3055                 break;
3056         case RADEON_SETPARAM_PCIGART_LOCATION:
3057                 dev_priv->pcigart_offset = sp.value;
3058                 break;
3059         default:
3060                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3061                 return DRM_ERR(EINVAL);
3062         }
3063
3064         return 0;
3065 }
3066
3067 /* When a client dies:
3068  *    - Check for and clean up flipped page state
3069  *    - Free any alloced GART memory.
3070  *
3071  * DRM infrastructure takes care of reclaiming dma buffers.
3072  */
3073 void radeon_driver_prerelease(drm_device_t * dev, DRMFILE filp)
3074 {
3075         if (dev->dev_private) {
3076                 drm_radeon_private_t *dev_priv = dev->dev_private;
3077                 if (dev_priv->page_flipping) {
3078                         radeon_do_cleanup_pageflip(dev);
3079                 }
3080                 radeon_mem_release(filp, dev_priv->gart_heap);
3081                 radeon_mem_release(filp, dev_priv->fb_heap);
3082                 radeon_surfaces_release(filp, dev_priv);
3083         }
3084 }
3085
3086 void radeon_driver_pretakedown(drm_device_t * dev)
3087 {
3088         radeon_do_release(dev);
3089 }
3090
3091 int radeon_driver_open_helper(drm_device_t * dev, drm_file_t * filp_priv)
3092 {
3093         drm_radeon_private_t *dev_priv = dev->dev_private;
3094         struct drm_radeon_driver_file_fields *radeon_priv;
3095
3096         radeon_priv =
3097             (struct drm_radeon_driver_file_fields *)
3098             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3099
3100         if (!radeon_priv)
3101                 return -ENOMEM;
3102
3103         filp_priv->driver_priv = radeon_priv;
3104         if (dev_priv)
3105                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3106         else
3107                 radeon_priv->radeon_fb_delta = 0;
3108         return 0;
3109 }
3110
3111 void radeon_driver_free_filp_priv(drm_device_t * dev, drm_file_t * filp_priv)
3112 {
3113         struct drm_radeon_driver_file_fields *radeon_priv =
3114             filp_priv->driver_priv;
3115
3116         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3117 }
3118
3119 drm_ioctl_desc_t radeon_ioctls[] = {
3120         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, 1, 1},
3121         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, 1, 1},
3122         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, 1, 1},
3123         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, 1, 1},
3124         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, 1, 0},
3125         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, 1, 0},
3126         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, 1, 0},
3127         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, 1, 0},
3128         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, 1, 0},
3129         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, 1, 0},
3130         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, 1, 0},
3131         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, 1, 0},
3132         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, 1, 0},
3133         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, 1, 0},
3134         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, 1, 1},
3135         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, 1, 0},
3136         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, 1, 0},
3137         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, 1, 0},
3138         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, 1, 0},
3139         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, 1, 0},
3140         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, 1, 0},
3141         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, 1, 1},
3142         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, 1, 0},
3143         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, 1, 0},
3144         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, 1, 0},
3145         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, 1, 0},
3146         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, 1, 0}
3147 };
3148
3149 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);