]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/gpu/drm/vc4/vc4_validate_shaders.c
drm/vc4: Add support for branching in shader validation.
[karo-tx-linux.git] / drivers / gpu / drm / vc4 / vc4_validate_shaders.c
index b984f123df09fdb18ae53df70998b152224c7cef..75c8e8fb182727f8b1e9796a8af31927eef0f67b 100644 (file)
@@ -39,6 +39,8 @@
 #include "vc4_drv.h"
 #include "vc4_qpu_defines.h"
 
+#define LIVE_REG_COUNT (32 + 32 + 4)
+
 struct vc4_shader_validation_state {
        /* Current IP being validated. */
        uint32_t ip;
@@ -57,8 +59,9 @@ struct vc4_shader_validation_state {
         *
         * This is used for the validation of direct address memory reads.
         */
-       uint32_t live_min_clamp_offsets[32 + 32 + 4];
-       bool live_max_clamp_regs[32 + 32 + 4];
+       uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
+       bool live_max_clamp_regs[LIVE_REG_COUNT];
+       uint32_t live_immediates[LIVE_REG_COUNT];
 
        /* Bitfield of which IPs are used as branch targets.
         *
@@ -66,6 +69,20 @@ struct vc4_shader_validation_state {
         * points and clearing the texturing/clamping state.
         */
        unsigned long *branch_targets;
+
+       /* Set when entering a basic block, and cleared when the uniform
+        * address update is found.  This is used to make sure that we don't
+        * read uniforms when the address is undefined.
+        */
+       bool needs_uniform_address_update;
+
+       /* Set when we find a backwards branch.  If the branch is backwards,
+        * the taraget is probably doing an address reset to read uniforms,
+        * and so we need to be sure that a uniforms address is present in the
+        * stream, even if the shader didn't need to read uniforms in later
+        * basic blocks.
+        */
+       bool needs_uniform_address_for_loop;
 };
 
 static uint32_t
@@ -227,8 +244,14 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
        /* Since direct uses a RADDR uniform reference, it will get counted in
         * check_instruction_reads()
         */
-       if (!is_direct)
+       if (!is_direct) {
+               if (validation_state->needs_uniform_address_update) {
+                       DRM_ERROR("Texturing with undefined uniform address\n");
+                       return false;
+               }
+
                validated_shader->uniforms_size += 4;
+       }
 
        if (submit) {
                if (!record_texture_sample(validated_shader,
@@ -242,6 +265,98 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
        return true;
 }
 
+static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
+{
+       uint32_t o = validated_shader->num_uniform_addr_offsets;
+       uint32_t num_uniforms = validated_shader->uniforms_size / 4;
+
+       validated_shader->uniform_addr_offsets =
+               krealloc(validated_shader->uniform_addr_offsets,
+                        (o + 1) *
+                        sizeof(*validated_shader->uniform_addr_offsets),
+                        GFP_KERNEL);
+       if (!validated_shader->uniform_addr_offsets)
+               return false;
+
+       validated_shader->uniform_addr_offsets[o] = num_uniforms;
+       validated_shader->num_uniform_addr_offsets++;
+
+       return true;
+}
+
+static bool
+validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
+                              struct vc4_shader_validation_state *validation_state,
+                              bool is_mul)
+{
+       uint64_t inst = validation_state->shader[validation_state->ip];
+       u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+       u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+       u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+       u32 add_lri = raddr_add_a_to_live_reg_index(inst);
+       /* We want our reset to be pointing at whatever uniform follows the
+        * uniforms base address.
+        */
+       u32 expected_offset = validated_shader->uniforms_size + 4;
+
+       /* We only support absolute uniform address changes, and we
+        * require that they be in the current basic block before any
+        * of its uniform reads.
+        *
+        * One could potentially emit more efficient QPU code, by
+        * noticing that (say) an if statement does uniform control
+        * flow for all threads and that the if reads the same number
+        * of uniforms on each side.  However, this scheme is easy to
+        * validate so it's all we allow for now.
+        */
+
+       if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
+               DRM_ERROR("uniforms address change must be "
+                         "normal math\n");
+               return false;
+       }
+
+       if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+               DRM_ERROR("Uniform address reset must be an ADD.\n");
+               return false;
+       }
+
+       if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
+               DRM_ERROR("Uniform address reset must be unconditional.\n");
+               return false;
+       }
+
+       if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
+           !(inst & QPU_PM)) {
+               DRM_ERROR("No packing allowed on uniforms reset\n");
+               return false;
+       }
+
+       if (add_lri == -1) {
+               DRM_ERROR("First argument of uniform address write must be "
+                         "an immediate value.\n");
+               return false;
+       }
+
+       if (validation_state->live_immediates[add_lri] != expected_offset) {
+               DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
+                         validation_state->live_immediates[add_lri],
+                         expected_offset);
+               return false;
+       }
+
+       if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+           !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+               DRM_ERROR("Second argument of uniform address write must be "
+                         "a uniform.\n");
+               return false;
+       }
+
+       validation_state->needs_uniform_address_update = false;
+       validation_state->needs_uniform_address_for_loop = false;
+       return require_uniform_address_uniform(validated_shader);
+}
+
 static bool
 check_reg_write(struct vc4_validated_shader_info *validated_shader,
                struct vc4_shader_validation_state *validation_state,
@@ -251,14 +366,37 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
        uint32_t waddr = (is_mul ?
                          QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
                          QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+       uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+       bool ws = inst & QPU_WS;
+       bool is_b = is_mul ^ ws;
+       u32 lri = waddr_to_live_reg_index(waddr, is_b);
+
+       if (lri != -1) {
+               uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+               uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
+
+               if (sig == QPU_SIG_LOAD_IMM &&
+                   QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
+                   ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
+                    (!is_mul && cond_add == QPU_COND_ALWAYS))) {
+                       validation_state->live_immediates[lri] =
+                               QPU_GET_FIELD(inst, QPU_LOAD_IMM);
+               } else {
+                       validation_state->live_immediates[lri] = ~0;
+               }
+       }
 
        switch (waddr) {
        case QPU_W_UNIFORMS_ADDRESS:
-               /* XXX: We'll probably need to support this for reladdr, but
-                * it's definitely a security-related one.
-                */
-               DRM_ERROR("uniforms address load unsupported\n");
-               return false;
+               if (is_b) {
+                       DRM_ERROR("relative uniforms address change "
+                                 "unsupported\n");
+                       return false;
+               }
+
+               return validate_uniform_address_write(validated_shader,
+                                                     validation_state,
+                                                     is_mul);
 
        case QPU_W_TLB_COLOR_MS:
        case QPU_W_TLB_COLOR_ALL:
@@ -406,9 +544,35 @@ check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
 }
 
 static bool
-check_instruction_reads(uint64_t inst,
-                       struct vc4_validated_shader_info *validated_shader)
+check_branch(uint64_t inst,
+            struct vc4_validated_shader_info *validated_shader,
+            struct vc4_shader_validation_state *validation_state,
+            int ip)
+{
+       int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+       uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+       uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+
+       if ((int)branch_imm < 0)
+               validation_state->needs_uniform_address_for_loop = true;
+
+       /* We don't want to have to worry about validation of this, and
+        * there's no need for it.
+        */
+       if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
+               DRM_ERROR("branch instruction at %d wrote a register.\n",
+                         validation_state->ip);
+               return false;
+       }
+
+       return true;
+}
+
+static bool
+check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
+                       struct vc4_shader_validation_state *validation_state)
 {
+       uint64_t inst = validation_state->shader[validation_state->ip];
        uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
        uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
        uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
@@ -420,6 +584,12 @@ check_instruction_reads(uint64_t inst,
                 * already be OOM.
                 */
                validated_shader->uniforms_size += 4;
+
+               if (validation_state->needs_uniform_address_update) {
+                       DRM_ERROR("Uniform read with undefined uniform "
+                                 "address\n");
+                       return false;
+               }
        }
 
        return true;
@@ -516,6 +686,65 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
        return true;
 }
 
+/* Resets any known state for the shader, used when we may be branched to from
+ * multiple locations in the program (or at shader start).
+ */
+static void
+reset_validation_state(struct vc4_shader_validation_state *validation_state)
+{
+       int i;
+
+       for (i = 0; i < 8; i++)
+               validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
+
+       for (i = 0; i < LIVE_REG_COUNT; i++) {
+               validation_state->live_min_clamp_offsets[i] = ~0;
+               validation_state->live_max_clamp_regs[i] = false;
+               validation_state->live_immediates[i] = ~0;
+       }
+}
+
+static bool
+texturing_in_progress(struct vc4_shader_validation_state *validation_state)
+{
+       return (validation_state->tmu_write_count[0] != 0 ||
+               validation_state->tmu_write_count[1] != 0);
+}
+
+static bool
+vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
+{
+       uint32_t ip = validation_state->ip;
+
+       if (!test_bit(ip, validation_state->branch_targets))
+               return true;
+
+       if (texturing_in_progress(validation_state)) {
+               DRM_ERROR("Branch target landed during TMU setup\n");
+               return false;
+       }
+
+       /* Reset our live values tracking, since this instruction may have
+        * multiple predecessors.
+        *
+        * One could potentially do analysis to determine that, for
+        * example, all predecessors have a live max clamp in the same
+        * register, but we don't bother with that.
+        */
+       reset_validation_state(validation_state);
+
+       /* Since we've entered a basic block from potentially multiple
+        * predecessors, we need the uniforms address to be updated before any
+        * unforms are read.  We require that after any branch point, the next
+        * uniform to be loaded is a uniform address offset.  That uniform's
+        * offset will be marked by the uniform address register write
+        * validation, or a one-off the end-of-program check.
+        */
+       validation_state->needs_uniform_address_update = true;
+
+       return true;
+}
+
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 {
@@ -524,16 +753,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
        uint32_t ip;
        struct vc4_validated_shader_info *validated_shader = NULL;
        struct vc4_shader_validation_state validation_state;
-       int i;
 
        memset(&validation_state, 0, sizeof(validation_state));
        validation_state.shader = shader_obj->vaddr;
        validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
 
-       for (i = 0; i < 8; i++)
-               validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
-       for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
-               validation_state.live_min_clamp_offsets[i] = ~0;
+       reset_validation_state(&validation_state);
 
        validation_state.branch_targets =
                kcalloc(BITS_TO_LONGS(validation_state.max_ip),
@@ -554,6 +779,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 
                validation_state.ip = ip;
 
+               if (!vc4_handle_branch_target(&validation_state))
+                       goto fail;
+
                switch (sig) {
                case QPU_SIG_NONE:
                case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -569,7 +797,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                                goto fail;
                        }
 
-                       if (!check_instruction_reads(inst, validated_shader))
+                       if (!check_instruction_reads(validated_shader,
+                                                    &validation_state))
                                goto fail;
 
                        if (sig == QPU_SIG_PROG_END) {
@@ -587,6 +816,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                        }
                        break;
 
+               case QPU_SIG_BRANCH:
+                       if (!check_branch(inst, validated_shader,
+                                         &validation_state, ip))
+                               goto fail;
+                       break;
                default:
                        DRM_ERROR("Unsupported QPU signal %d at "
                                  "instruction %d\n", sig, ip);
@@ -607,6 +841,21 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                goto fail;
        }
 
+       /* If we did a backwards branch and we haven't emitted a uniforms
+        * reset since then, we still need the uniforms stream to have the
+        * uniforms address available so that the backwards branch can do its
+        * uniforms reset.
+        *
+        * We could potentially prove that the backwards branch doesn't
+        * contain any uses of uniforms until program exit, but that doesn't
+        * seem to be worth the trouble.
+        */
+       if (validation_state.needs_uniform_address_for_loop) {
+               if (!require_uniform_address_uniform(validated_shader))
+                       goto fail;
+               validated_shader->uniforms_size += 4;
+       }
+
        /* Again, no chance of integer overflow here because the worst case
         * scenario is 8 bytes of uniforms plus handles per 8-byte
         * instruction.