]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/tile/kernel/unaligned.c
ARM: sa11x0/assabet: ensure CS2 is configured appropriately
[karo-tx-linux.git] / arch / tile / kernel / unaligned.c
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/thread_info.h>
21 #include <linux/uaccess.h>
22 #include <linux/mman.h>
23 #include <linux/types.h>
24 #include <linux/err.h>
25 #include <linux/module.h>
26 #include <linux/compat.h>
27 #include <linux/prctl.h>
28 #include <asm/cacheflush.h>
29 #include <asm/traps.h>
30 #include <asm/uaccess.h>
31 #include <asm/unaligned.h>
32 #include <arch/abi.h>
33 #include <arch/spr_def.h>
34 #include <arch/opcode.h>
35
36
37 /*
38  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
39  * exception is supported out of single_step.c
40  */
41
42 int unaligned_printk;
43
44 static int __init setup_unaligned_printk(char *str)
45 {
46         long val;
47         if (kstrtol(str, 0, &val) != 0)
48                 return 0;
49         unaligned_printk = val;
50         pr_info("Printk for each unaligned data accesses is %s\n",
51                 unaligned_printk ? "enabled" : "disabled");
52         return 1;
53 }
54 __setup("unaligned_printk=", setup_unaligned_printk);
55
56 unsigned int unaligned_fixup_count;
57
58 #ifdef __tilegx__
59
60 /*
61  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
62  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
63  * instruction bundle followed by 14 JIT bundles.
64  */
65
66 struct unaligned_jit_fragment {
67         unsigned long       pc;
68         tilegx_bundle_bits  bundle;
69         tilegx_bundle_bits  insn[14];
70 };
71
72 /*
73  * Check if a nop or fnop at bundle's pipeline X0.
74  */
75
76 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
77 {
78         return (((get_UnaryOpcodeExtension_X0(bundle) ==
79                   NOP_UNARY_OPCODE_X0) &&
80                  (get_RRROpcodeExtension_X0(bundle) ==
81                   UNARY_RRR_0_OPCODE_X0) &&
82                  (get_Opcode_X0(bundle) ==
83                   RRR_0_OPCODE_X0)) ||
84                 ((get_UnaryOpcodeExtension_X0(bundle) ==
85                   FNOP_UNARY_OPCODE_X0) &&
86                  (get_RRROpcodeExtension_X0(bundle) ==
87                   UNARY_RRR_0_OPCODE_X0) &&
88                  (get_Opcode_X0(bundle) ==
89                   RRR_0_OPCODE_X0)));
90 }
91
92 /*
93  * Check if nop or fnop at bundle's pipeline X1.
94  */
95
96 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
97 {
98         return (((get_UnaryOpcodeExtension_X1(bundle) ==
99                   NOP_UNARY_OPCODE_X1) &&
100                  (get_RRROpcodeExtension_X1(bundle) ==
101                   UNARY_RRR_0_OPCODE_X1) &&
102                  (get_Opcode_X1(bundle) ==
103                   RRR_0_OPCODE_X1)) ||
104                 ((get_UnaryOpcodeExtension_X1(bundle) ==
105                   FNOP_UNARY_OPCODE_X1) &&
106                  (get_RRROpcodeExtension_X1(bundle) ==
107                   UNARY_RRR_0_OPCODE_X1) &&
108                  (get_Opcode_X1(bundle) ==
109                   RRR_0_OPCODE_X1)));
110 }
111
112 /*
113  * Check if nop or fnop at bundle's Y0 pipeline.
114  */
115
116 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
117 {
118         return (((get_UnaryOpcodeExtension_Y0(bundle) ==
119                   NOP_UNARY_OPCODE_Y0) &&
120                  (get_RRROpcodeExtension_Y0(bundle) ==
121                   UNARY_RRR_1_OPCODE_Y0) &&
122                  (get_Opcode_Y0(bundle) ==
123                   RRR_1_OPCODE_Y0)) ||
124                 ((get_UnaryOpcodeExtension_Y0(bundle) ==
125                   FNOP_UNARY_OPCODE_Y0) &&
126                  (get_RRROpcodeExtension_Y0(bundle) ==
127                   UNARY_RRR_1_OPCODE_Y0) &&
128                  (get_Opcode_Y0(bundle) ==
129                   RRR_1_OPCODE_Y0)));
130 }
131
132 /*
133  * Check if nop or fnop at bundle's pipeline Y1.
134  */
135
136 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
137 {
138         return (((get_UnaryOpcodeExtension_Y1(bundle) ==
139                   NOP_UNARY_OPCODE_Y1) &&
140                  (get_RRROpcodeExtension_Y1(bundle) ==
141                   UNARY_RRR_1_OPCODE_Y1) &&
142                  (get_Opcode_Y1(bundle) ==
143                   RRR_1_OPCODE_Y1)) ||
144                 ((get_UnaryOpcodeExtension_Y1(bundle) ==
145                   FNOP_UNARY_OPCODE_Y1) &&
146                  (get_RRROpcodeExtension_Y1(bundle) ==
147                   UNARY_RRR_1_OPCODE_Y1) &&
148                  (get_Opcode_Y1(bundle) ==
149                   RRR_1_OPCODE_Y1)));
150 }
151
152 /*
153  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
154  */
155
156 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
157 {
158         return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
159 }
160
161 /*
162  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
163  */
164
165 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
166 {
167         return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
168 }
169
170 /*
171  * Find the destination, source registers of fault unalign access instruction
172  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
173  * clob3, which are guaranteed different from any register used in the fault
174  * bundle. r_alias is used to return if the other instructions other than the
175  * unalign load/store shares same register with ra, rb and rd.
176  */
177
178 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
179                       uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
180                       uint64_t *clob3, bool *r_alias)
181 {
182         int i;
183         uint64_t reg;
184         uint64_t reg_map = 0, alias_reg_map = 0, map;
185         bool alias;
186
187         *ra = -1;
188         *rb = -1;
189
190         if (rd)
191                 *rd = -1;
192
193         *clob1 = -1;
194         *clob2 = -1;
195         *clob3 = -1;
196         alias = false;
197
198         /*
199          * Parse fault bundle, find potential used registers and mark
200          * corresponding bits in reg_map and alias_map. These 2 bit maps
201          * are used to find the scratch registers and determine if there
202          * is register alais.
203          */
204         if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
205
206                 reg = get_SrcA_Y2(bundle);
207                 reg_map |= 1ULL << reg;
208                 *ra = reg;
209                 reg = get_SrcBDest_Y2(bundle);
210                 reg_map |= 1ULL << reg;
211
212                 if (rd) {
213                         /* Load. */
214                         *rd = reg;
215                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
216                 } else {
217                         /* Store. */
218                         *rb = reg;
219                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
220                 }
221
222                 if (!is_bundle_y1_nop(bundle)) {
223                         reg = get_SrcA_Y1(bundle);
224                         reg_map |= (1ULL << reg);
225                         map = (1ULL << reg);
226
227                         reg = get_SrcB_Y1(bundle);
228                         reg_map |= (1ULL << reg);
229                         map |= (1ULL << reg);
230
231                         reg = get_Dest_Y1(bundle);
232                         reg_map |= (1ULL << reg);
233                         map |= (1ULL << reg);
234
235                         if (map & alias_reg_map)
236                                 alias = true;
237                 }
238
239                 if (!is_bundle_y0_nop(bundle)) {
240                         reg = get_SrcA_Y0(bundle);
241                         reg_map |= (1ULL << reg);
242                         map = (1ULL << reg);
243
244                         reg = get_SrcB_Y0(bundle);
245                         reg_map |= (1ULL << reg);
246                         map |= (1ULL << reg);
247
248                         reg = get_Dest_Y0(bundle);
249                         reg_map |= (1ULL << reg);
250                         map |= (1ULL << reg);
251
252                         if (map & alias_reg_map)
253                                 alias = true;
254                 }
255         } else  { /* X Mode Bundle. */
256
257                 reg = get_SrcA_X1(bundle);
258                 reg_map |= (1ULL << reg);
259                 *ra = reg;
260                 if (rd) {
261                         /* Load. */
262                         reg = get_Dest_X1(bundle);
263                         reg_map |= (1ULL << reg);
264                         *rd = reg;
265                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
266                 } else {
267                         /* Store. */
268                         reg = get_SrcB_X1(bundle);
269                         reg_map |= (1ULL << reg);
270                         *rb = reg;
271                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
272                 }
273
274                 if (!is_bundle_x0_nop(bundle)) {
275                         reg = get_SrcA_X0(bundle);
276                         reg_map |= (1ULL << reg);
277                         map = (1ULL << reg);
278
279                         reg = get_SrcB_X0(bundle);
280                         reg_map |= (1ULL << reg);
281                         map |= (1ULL << reg);
282
283                         reg = get_Dest_X0(bundle);
284                         reg_map |= (1ULL << reg);
285                         map |= (1ULL << reg);
286
287                         if (map & alias_reg_map)
288                                 alias = true;
289                 }
290         }
291
292         /*
293          * "alias" indicates if the unalign access registers have collision
294          * with others in the same bundle. We jsut simply test all register
295          * operands case (RRR), ignored the case with immidate. If a bundle
296          * has no register alias, we may do fixup in a simple or fast manner.
297          * So if an immidata field happens to hit with a register, we may end
298          * up fall back to the generic handling.
299          */
300
301         *r_alias = alias;
302
303         /* Flip bits on reg_map. */
304         reg_map ^= -1ULL;
305
306         /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
307         for (i = 0; i < TREG_SP; i++) {
308                 if (reg_map & (0x1ULL << i)) {
309                         if (*clob1 == -1) {
310                                 *clob1 = i;
311                         } else if (*clob2 == -1) {
312                                 *clob2 = i;
313                         } else if (*clob3 == -1) {
314                                 *clob3 = i;
315                                 return;
316                         }
317                 }
318         }
319 }
320
321 /*
322  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
323  * is unexpected.
324  */
325
326 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
327                        uint64_t clob1, uint64_t clob2,  uint64_t clob3)
328 {
329         bool unexpected = false;
330         if ((ra >= 56) && (ra != TREG_ZERO))
331                 unexpected = true;
332
333         if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
334                 unexpected = true;
335
336         if (rd != -1) {
337                 if ((rd >= 56) && (rd != TREG_ZERO))
338                         unexpected = true;
339         } else {
340                 if ((rb >= 56) && (rb != TREG_ZERO))
341                         unexpected = true;
342         }
343         return unexpected;
344 }
345
346
347 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
348 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
349 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
350 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
351 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
352
353 #ifdef __LITTLE_ENDIAN
354 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
355 #else
356 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
357 #endif /* __LITTLE_ENDIAN */
358
359 /*
360  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
361  * The corresponding static function jix_x#_###(.) generates partial or
362  * whole bundle based on the template and given arguments.
363  */
364
365 #define __JIT_CODE(_X_)                                         \
366         asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
367              _X_"\n"                                            \
368              ".popsection\n")
369
370 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
371 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
372 {
373         extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
374         return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
375                 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
376 }
377
378 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
379 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
380 {
381         extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
382         return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
383                 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
384 }
385
386 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
387 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
388 {
389         extern  tilegx_bundle_bits __unalign_jit_x0_addi;
390         return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
391                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
392                 create_Imm8_X0(imm8);
393 }
394
395 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
396 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
397 {
398         extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
399         return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
400                 create_Dest_X1(rd) | create_SrcA_X1(ra);
401 }
402
403 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
404 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
405 {
406         extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
407         return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
408                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
409                 create_SrcB_X0(rb);
410 }
411
412 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
413 static tilegx_bundle_bits  jit_x1_iret(void)
414 {
415         extern  tilegx_bundle_bits __unalign_jit_x1_iret;
416         return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
417 }
418
419 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
420 static tilegx_bundle_bits  jit_x0_fnop(void)
421 {
422         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
423         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
424 }
425
426 static tilegx_bundle_bits  jit_x1_fnop(void)
427 {
428         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
429         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
430 }
431
432 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
433 static tilegx_bundle_bits  jit_y2_dummy(void)
434 {
435         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
436         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
437 }
438
439 static tilegx_bundle_bits  jit_y1_fnop(void)
440 {
441         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
442         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
443 }
444
445 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
446 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
447 {
448         extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
449         return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
450                 (~create_SrcA_X1(-1)) &
451                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
452                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
453 }
454
455 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
456 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
457 {
458         extern  tilegx_bundle_bits __unalign_jit_x1_st;
459         return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
460                 create_SrcA_X1(ra) | create_SrcB_X1(rb);
461 }
462
463 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
464 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
465 {
466         extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
467         return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
468                 (~create_SrcA_X1(-1)) &
469                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
470                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
471 }
472
473 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
474 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
475 {
476         extern  tilegx_bundle_bits __unalign_jit_x1_ld;
477         return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
478                 create_Dest_X1(rd) | create_SrcA_X1(ra);
479 }
480
481 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
482 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
483 {
484         extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
485         return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
486                 (~create_Dest_X1(-1)) &
487                 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
488                 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
489 }
490
491 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
492 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
493 {
494         extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
495         return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
496                 GX_INSN_X0_MASK) |
497                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
498                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
499 }
500
501 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
502 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
503 {
504         extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
505         return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
506                 GX_INSN_X0_MASK) |
507                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
508                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
509 }
510
511 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
512 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
513 {
514         extern  tilegx_bundle_bits __unalign_jit_x1_addi;
515         return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
516                 create_Dest_X1(rd) | create_SrcA_X1(ra) |
517                 create_Imm8_X1(imm8);
518 }
519
520 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
521 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
522 {
523         extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
524         return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
525                 GX_INSN_X0_MASK) |
526                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
527                 create_ShAmt_X0(imm6);
528 }
529
530 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
531 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
532 {
533         extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
534         return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
535                 GX_INSN_X0_MASK) |
536                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
537                 create_ShAmt_X0(imm6);
538 }
539
540 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
541 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
542 {
543         extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
544         return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
545                 GX_INSN_X1_MASK) |
546                 create_SrcA_X1(ra) | create_BrOff_X1(broff);
547 }
548
549 #undef __JIT_CODE
550
551 /*
552  * This function generates unalign fixup JIT.
553  *
554  * We fist find unalign load/store instruction's destination, source
555  * reguisters: ra, rb and rd. and 3 scratch registers by calling
556  * find_regs(...). 3 scratch clobbers should not alias with any register
557  * used in the fault bundle. Then analyze the fault bundle to determine
558  * if it's a load or store, operand width, branch or address increment etc.
559  * At last generated JIT is copied into JIT code area in user space.
560  */
561
562 static
563 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
564                     int align_ctl)
565 {
566         struct thread_info *info = current_thread_info();
567         struct unaligned_jit_fragment frag;
568         struct unaligned_jit_fragment *jit_code_area;
569         tilegx_bundle_bits bundle_2 = 0;
570         /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
571         bool     bundle_2_enable = true;
572         uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
573         /*
574          * Indicate if the unalign access
575          * instruction's registers hit with
576          * others in the same bundle.
577          */
578         bool     alias = false;
579         bool     load_n_store = true;
580         bool     load_store_signed = false;
581         unsigned int  load_store_size = 8;
582         bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
583         int      y1_br_reg = 0;
584         /* True for link operation. i.e. jalr or lnk at Y1 */
585         bool     y1_lr = false;
586         int      y1_lr_reg = 0;
587         bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
588         int      x1_add_imm8 = 0;
589         bool     unexpected = false;
590         int      n = 0, k;
591
592         jit_code_area =
593                 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
594
595         memset((void *)&frag, 0, sizeof(frag));
596
597         /* 0: X mode, Otherwise: Y mode. */
598         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
599                 unsigned int mod, opcode;
600
601                 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
602                     get_RRROpcodeExtension_Y1(bundle) ==
603                     UNARY_RRR_1_OPCODE_Y1) {
604
605                         opcode = get_UnaryOpcodeExtension_Y1(bundle);
606
607                         /*
608                          * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
609                          * pipeline.
610                          */
611                         switch (opcode) {
612                         case JALR_UNARY_OPCODE_Y1:
613                         case JALRP_UNARY_OPCODE_Y1:
614                                 y1_lr = true;
615                                 y1_lr_reg = 55; /* Link register. */
616                                 /* FALLTHROUGH */
617                         case JR_UNARY_OPCODE_Y1:
618                         case JRP_UNARY_OPCODE_Y1:
619                                 y1_br = true;
620                                 y1_br_reg = get_SrcA_Y1(bundle);
621                                 break;
622                         case LNK_UNARY_OPCODE_Y1:
623                                 /* "lnk" at Y1 pipeline. */
624                                 y1_lr = true;
625                                 y1_lr_reg = get_Dest_Y1(bundle);
626                                 break;
627                         }
628                 }
629
630                 opcode = get_Opcode_Y2(bundle);
631                 mod = get_Mode(bundle);
632
633                 /*
634                  *  bundle_2 is bundle after making Y2 as a dummy operation
635                  *  - ld zero, sp
636                  */
637                 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
638
639                 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
640                 if (y1_br || y1_lr) {
641                         bundle_2 &= ~(GX_INSN_Y1_MASK);
642                         bundle_2 |= jit_y1_fnop();
643                 }
644
645                 if (is_y0_y1_nop(bundle_2))
646                         bundle_2_enable = false;
647
648                 if (mod == MODE_OPCODE_YC2) {
649                         /* Store. */
650                         load_n_store = false;
651                         load_store_size = 1 << opcode;
652                         load_store_signed = false;
653                         find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
654                                   &clob3, &alias);
655                         if (load_store_size > 8)
656                                 unexpected = true;
657                 } else {
658                         /* Load. */
659                         load_n_store = true;
660                         if (mod == MODE_OPCODE_YB2) {
661                                 switch (opcode) {
662                                 case LD_OPCODE_Y2:
663                                         load_store_signed = false;
664                                         load_store_size = 8;
665                                         break;
666                                 case LD4S_OPCODE_Y2:
667                                         load_store_signed = true;
668                                         load_store_size = 4;
669                                         break;
670                                 case LD4U_OPCODE_Y2:
671                                         load_store_signed = false;
672                                         load_store_size = 4;
673                                         break;
674                                 default:
675                                         unexpected = true;
676                                 }
677                         } else if (mod == MODE_OPCODE_YA2) {
678                                 if (opcode == LD2S_OPCODE_Y2) {
679                                         load_store_signed = true;
680                                         load_store_size = 2;
681                                 } else if (opcode == LD2U_OPCODE_Y2) {
682                                         load_store_signed = false;
683                                         load_store_size = 2;
684                                 } else
685                                         unexpected = true;
686                         } else
687                                 unexpected = true;
688                         find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
689                                   &clob3, &alias);
690                 }
691         } else {
692                 unsigned int opcode;
693
694                 /* bundle_2 is bundle after making X1 as "fnop". */
695                 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
696
697                 if (is_x0_x1_nop(bundle_2))
698                         bundle_2_enable = false;
699
700                 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
701                         opcode = get_UnaryOpcodeExtension_X1(bundle);
702
703                         if (get_RRROpcodeExtension_X1(bundle) ==
704                             UNARY_RRR_0_OPCODE_X1) {
705                                 load_n_store = true;
706                                 find_regs(bundle, &rd, &ra, &rb, &clob1,
707                                           &clob2, &clob3, &alias);
708
709                                 switch (opcode) {
710                                 case LD_UNARY_OPCODE_X1:
711                                         load_store_signed = false;
712                                         load_store_size = 8;
713                                         break;
714                                 case LD4S_UNARY_OPCODE_X1:
715                                         load_store_signed = true;
716                                         /* FALLTHROUGH */
717                                 case LD4U_UNARY_OPCODE_X1:
718                                         load_store_size = 4;
719                                         break;
720
721                                 case LD2S_UNARY_OPCODE_X1:
722                                         load_store_signed = true;
723                                         /* FALLTHROUGH */
724                                 case LD2U_UNARY_OPCODE_X1:
725                                         load_store_size = 2;
726                                         break;
727                                 default:
728                                         unexpected = true;
729                                 }
730                         } else {
731                                 load_n_store = false;
732                                 load_store_signed = false;
733                                 find_regs(bundle, 0, &ra, &rb,
734                                           &clob1, &clob2, &clob3,
735                                           &alias);
736
737                                 opcode = get_RRROpcodeExtension_X1(bundle);
738                                 switch (opcode) {
739                                 case ST_RRR_0_OPCODE_X1:
740                                         load_store_size = 8;
741                                         break;
742                                 case ST4_RRR_0_OPCODE_X1:
743                                         load_store_size = 4;
744                                         break;
745                                 case ST2_RRR_0_OPCODE_X1:
746                                         load_store_size = 2;
747                                         break;
748                                 default:
749                                         unexpected = true;
750                                 }
751                         }
752                 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
753                         load_n_store = true;
754                         opcode = get_Imm8OpcodeExtension_X1(bundle);
755                         switch (opcode) {
756                         case LD_ADD_IMM8_OPCODE_X1:
757                                 load_store_size = 8;
758                                 break;
759
760                         case LD4S_ADD_IMM8_OPCODE_X1:
761                                 load_store_signed = true;
762                                 /* FALLTHROUGH */
763                         case LD4U_ADD_IMM8_OPCODE_X1:
764                                 load_store_size = 4;
765                                 break;
766
767                         case LD2S_ADD_IMM8_OPCODE_X1:
768                                 load_store_signed = true;
769                                 /* FALLTHROUGH */
770                         case LD2U_ADD_IMM8_OPCODE_X1:
771                                 load_store_size = 2;
772                                 break;
773
774                         case ST_ADD_IMM8_OPCODE_X1:
775                                 load_n_store = false;
776                                 load_store_size = 8;
777                                 break;
778                         case ST4_ADD_IMM8_OPCODE_X1:
779                                 load_n_store = false;
780                                 load_store_size = 4;
781                                 break;
782                         case ST2_ADD_IMM8_OPCODE_X1:
783                                 load_n_store = false;
784                                 load_store_size = 2;
785                                 break;
786                         default:
787                                 unexpected = true;
788                         }
789
790                         if (!unexpected) {
791                                 x1_add = true;
792                                 if (load_n_store)
793                                         x1_add_imm8 = get_Imm8_X1(bundle);
794                                 else
795                                         x1_add_imm8 = get_Dest_Imm8_X1(bundle);
796                         }
797
798                         find_regs(bundle, load_n_store ? (&rd) : NULL,
799                                   &ra, &rb, &clob1, &clob2, &clob3, &alias);
800                 } else
801                         unexpected = true;
802         }
803
804         /*
805          * Some sanity check for register numbers extracted from fault bundle.
806          */
807         if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
808                 unexpected = true;
809
810         /* Give warning if register ra has an aligned address. */
811         if (!unexpected)
812                 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
813
814
815         /*
816          * Fault came from kernel space, here we only need take care of
817          * unaligned "get_user/put_user" macros defined in "uaccess.h".
818          * Basically, we will handle bundle like this:
819          * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
820          * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
821          * For either load or store, byte-wise operation is performed by calling
822          * get_user() or put_user(). If the macro returns non-zero value,
823          * set the value to rx, otherwise set zero to rx. Finally make pc point
824          * to next bundle and return.
825          */
826
827         if (EX1_PL(regs->ex1) != USER_PL) {
828
829                 unsigned long rx = 0;
830                 unsigned long x = 0, ret = 0;
831
832                 if (y1_br || y1_lr || x1_add ||
833                     (load_store_signed !=
834                      (load_n_store && load_store_size == 4))) {
835                         /* No branch, link, wrong sign-ext or load/store add. */
836                         unexpected = true;
837                 } else if (!unexpected) {
838                         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
839                                 /*
840                                  * Fault bundle is Y mode.
841                                  * Check if the Y1 and Y0 is the form of
842                                  * { movei rx, 0; nop/fnop }, if yes,
843                                  * find the rx.
844                                  */
845
846                                 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
847                                     && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
848                                     (get_Imm8_Y1(bundle) == 0) &&
849                                     is_bundle_y0_nop(bundle)) {
850                                         rx = get_Dest_Y1(bundle);
851                                 } else if ((get_Opcode_Y0(bundle) ==
852                                             ADDI_OPCODE_Y0) &&
853                                            (get_SrcA_Y0(bundle) == TREG_ZERO) &&
854                                            (get_Imm8_Y0(bundle) == 0) &&
855                                            is_bundle_y1_nop(bundle)) {
856                                         rx = get_Dest_Y0(bundle);
857                                 } else {
858                                         unexpected = true;
859                                 }
860                         } else {
861                                 /*
862                                  * Fault bundle is X mode.
863                                  * Check if the X0 is 'movei rx, 0',
864                                  * if yes, find the rx.
865                                  */
866
867                                 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
868                                     && (get_Imm8OpcodeExtension_X0(bundle) ==
869                                         ADDI_IMM8_OPCODE_X0) &&
870                                     (get_SrcA_X0(bundle) == TREG_ZERO) &&
871                                     (get_Imm8_X0(bundle) == 0)) {
872                                         rx = get_Dest_X0(bundle);
873                                 } else {
874                                         unexpected = true;
875                                 }
876                         }
877
878                         /* rx should be less than 56. */
879                         if (!unexpected && (rx >= 56))
880                                 unexpected = true;
881                 }
882
883                 if (!search_exception_tables(regs->pc)) {
884                         /* No fixup in the exception tables for the pc. */
885                         unexpected = true;
886                 }
887
888                 if (unexpected) {
889                         /* Unexpected unalign kernel fault. */
890                         struct task_struct *tsk = validate_current();
891
892                         bust_spinlocks(1);
893
894                         show_regs(regs);
895
896                         if (unlikely(tsk->pid < 2)) {
897                                 panic("Kernel unalign fault running %s!",
898                                       tsk->pid ? "init" : "the idle task");
899                         }
900 #ifdef SUPPORT_DIE
901                         die("Oops", regs);
902 #endif
903                         bust_spinlocks(1);
904
905                         do_group_exit(SIGKILL);
906
907                 } else {
908                         unsigned long i, b = 0;
909                         unsigned char *ptr =
910                                 (unsigned char *)regs->regs[ra];
911                         if (load_n_store) {
912                                 /* handle get_user(x, ptr) */
913                                 for (i = 0; i < load_store_size; i++) {
914                                         ret = get_user(b, ptr++);
915                                         if (!ret) {
916                                                 /* Success! update x. */
917 #ifdef __LITTLE_ENDIAN
918                                                 x |= (b << (8 * i));
919 #else
920                                                 x <<= 8;
921                                                 x |= b;
922 #endif /* __LITTLE_ENDIAN */
923                                         } else {
924                                                 x = 0;
925                                                 break;
926                                         }
927                                 }
928
929                                 /* Sign-extend 4-byte loads. */
930                                 if (load_store_size == 4)
931                                         x = (long)(int)x;
932
933                                 /* Set register rd. */
934                                 regs->regs[rd] = x;
935
936                                 /* Set register rx. */
937                                 regs->regs[rx] = ret;
938
939                                 /* Bump pc. */
940                                 regs->pc += 8;
941
942                         } else {
943                                 /* Handle put_user(x, ptr) */
944                                 x = regs->regs[rb];
945 #ifdef __LITTLE_ENDIAN
946                                 b = x;
947 #else
948                                 /*
949                                  * Swap x in order to store x from low
950                                  * to high memory same as the
951                                  * little-endian case.
952                                  */
953                                 switch (load_store_size) {
954                                 case 8:
955                                         b = swab64(x);
956                                         break;
957                                 case 4:
958                                         b = swab32(x);
959                                         break;
960                                 case 2:
961                                         b = swab16(x);
962                                         break;
963                                 }
964 #endif /* __LITTLE_ENDIAN */
965                                 for (i = 0; i < load_store_size; i++) {
966                                         ret = put_user(b, ptr++);
967                                         if (ret)
968                                                 break;
969                                         /* Success! shift 1 byte. */
970                                         b >>= 8;
971                                 }
972                                 /* Set register rx. */
973                                 regs->regs[rx] = ret;
974
975                                 /* Bump pc. */
976                                 regs->pc += 8;
977                         }
978                 }
979
980                 unaligned_fixup_count++;
981
982                 if (unaligned_printk) {
983                         pr_info("%s/%d. Unalign fixup for kernel access "
984                                 "to userspace %lx.",
985                                 current->comm, current->pid, regs->regs[ra]);
986                 }
987
988                 /* Done! Return to the exception handler. */
989                 return;
990         }
991
992         if ((align_ctl == 0) || unexpected) {
993                 siginfo_t info = {
994                         .si_signo = SIGBUS,
995                         .si_code = BUS_ADRALN,
996                         .si_addr = (unsigned char __user *)0
997                 };
998                 if (unaligned_printk)
999                         pr_info("Unalign bundle: unexp @%llx, %llx",
1000                                 (unsigned long long)regs->pc,
1001                                 (unsigned long long)bundle);
1002
1003                 if (ra < 56) {
1004                         unsigned long uaa = (unsigned long)regs->regs[ra];
1005                         /* Set bus Address. */
1006                         info.si_addr = (unsigned char __user *)uaa;
1007                 }
1008
1009                 unaligned_fixup_count++;
1010
1011                 trace_unhandled_signal("unaligned fixup trap", regs,
1012                                        (unsigned long)info.si_addr, SIGBUS);
1013                 force_sig_info(info.si_signo, &info, current);
1014                 return;
1015         }
1016
1017 #ifdef __LITTLE_ENDIAN
1018 #define UA_FIXUP_ADDR_DELTA          1
1019 #define UA_FIXUP_BFEXT_START(_B_)    0
1020 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1021 #else /* __BIG_ENDIAN */
1022 #define UA_FIXUP_ADDR_DELTA          -1
1023 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1024 #define UA_FIXUP_BFEXT_END(_B_)      63
1025 #endif /* __LITTLE_ENDIAN */
1026
1027
1028
1029         if ((ra != rb) && (rd != TREG_SP) && !alias &&
1030             !y1_br && !y1_lr && !x1_add) {
1031                 /*
1032                  * Simple case: ra != rb and no register alias found,
1033                  * and no branch or link. This will be the majority.
1034                  * We can do a little better for simplae case than the
1035                  * generic scheme below.
1036                  */
1037                 if (!load_n_store) {
1038                         /*
1039                          * Simple store: ra != rb, no need for scratch register.
1040                          * Just store and rotate to right bytewise.
1041                          */
1042 #ifdef __BIG_ENDIAN
1043                         frag.insn[n++] =
1044                                 jit_x0_addi(ra, ra, load_store_size - 1) |
1045                                 jit_x1_fnop();
1046 #endif /* __BIG_ENDIAN */
1047                         for (k = 0; k < load_store_size; k++) {
1048                                 /* Store a byte. */
1049                                 frag.insn[n++] =
1050                                         jit_x0_rotli(rb, rb, 56) |
1051                                         jit_x1_st1_add(ra, rb,
1052                                                        UA_FIXUP_ADDR_DELTA);
1053                         }
1054 #ifdef __BIG_ENDIAN
1055                         frag.insn[n] = jit_x1_addi(ra, ra, 1);
1056 #else
1057                         frag.insn[n] = jit_x1_addi(ra, ra,
1058                                                    -1 * load_store_size);
1059 #endif /* __LITTLE_ENDIAN */
1060
1061                         if (load_store_size == 8) {
1062                                 frag.insn[n] |= jit_x0_fnop();
1063                         } else if (load_store_size == 4) {
1064                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1065                         } else { /* = 2 */
1066                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1067                         }
1068                         n++;
1069                         if (bundle_2_enable)
1070                                 frag.insn[n++] = bundle_2;
1071                         frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1072                 } else {
1073                         if (rd == ra) {
1074                                 /* Use two clobber registers: clob1/2. */
1075                                 frag.insn[n++] =
1076                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1077                                         jit_x1_fnop();
1078                                 frag.insn[n++] =
1079                                         jit_x0_addi(clob1, ra, 7) |
1080                                         jit_x1_st_add(TREG_SP, clob1, -8);
1081                                 frag.insn[n++] =
1082                                         jit_x0_addi(clob2, ra, 0) |
1083                                         jit_x1_st(TREG_SP, clob2);
1084                                 frag.insn[n++] =
1085                                         jit_x0_fnop() |
1086                                         jit_x1_ldna(rd, ra);
1087                                 frag.insn[n++] =
1088                                         jit_x0_fnop() |
1089                                         jit_x1_ldna(clob1, clob1);
1090                                 /*
1091                                  * Note: we must make sure that rd must not
1092                                  * be sp. Recover clob1/2 from stack.
1093                                  */
1094                                 frag.insn[n++] =
1095                                         jit_x0_dblalign(rd, clob1, clob2) |
1096                                         jit_x1_ld_add(clob2, TREG_SP, 8);
1097                                 frag.insn[n++] =
1098                                         jit_x0_fnop() |
1099                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1100                         } else {
1101                                 /* Use one clobber register: clob1 only. */
1102                                 frag.insn[n++] =
1103                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1104                                         jit_x1_fnop();
1105                                 frag.insn[n++] =
1106                                         jit_x0_addi(clob1, ra, 7) |
1107                                         jit_x1_st(TREG_SP, clob1);
1108                                 frag.insn[n++] =
1109                                         jit_x0_fnop() |
1110                                         jit_x1_ldna(rd, ra);
1111                                 frag.insn[n++] =
1112                                         jit_x0_fnop() |
1113                                         jit_x1_ldna(clob1, clob1);
1114                                 /*
1115                                  * Note: we must make sure that rd must not
1116                                  * be sp. Recover clob1 from stack.
1117                                  */
1118                                 frag.insn[n++] =
1119                                         jit_x0_dblalign(rd, clob1, ra) |
1120                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1121                         }
1122
1123                         if (bundle_2_enable)
1124                                 frag.insn[n++] = bundle_2;
1125                         /*
1126                          * For non 8-byte load, extract corresponding bytes and
1127                          * signed extension.
1128                          */
1129                         if (load_store_size == 4) {
1130                                 if (load_store_signed)
1131                                         frag.insn[n++] =
1132                                                 jit_x0_bfexts(
1133                                                         rd, rd,
1134                                                         UA_FIXUP_BFEXT_START(4),
1135                                                         UA_FIXUP_BFEXT_END(4)) |
1136                                                 jit_x1_fnop();
1137                                 else
1138                                         frag.insn[n++] =
1139                                                 jit_x0_bfextu(
1140                                                         rd, rd,
1141                                                         UA_FIXUP_BFEXT_START(4),
1142                                                         UA_FIXUP_BFEXT_END(4)) |
1143                                                 jit_x1_fnop();
1144                         } else if (load_store_size == 2) {
1145                                 if (load_store_signed)
1146                                         frag.insn[n++] =
1147                                                 jit_x0_bfexts(
1148                                                         rd, rd,
1149                                                         UA_FIXUP_BFEXT_START(2),
1150                                                         UA_FIXUP_BFEXT_END(2)) |
1151                                                 jit_x1_fnop();
1152                                 else
1153                                         frag.insn[n++] =
1154                                                 jit_x0_bfextu(
1155                                                         rd, rd,
1156                                                         UA_FIXUP_BFEXT_START(2),
1157                                                         UA_FIXUP_BFEXT_END(2)) |
1158                                                 jit_x1_fnop();
1159                         }
1160
1161                         frag.insn[n++] =
1162                                 jit_x0_fnop()  |
1163                                 jit_x1_iret();
1164                 }
1165         } else if (!load_n_store) {
1166
1167                 /*
1168                  * Generic memory store cases: use 3 clobber registers.
1169                  *
1170                  * Alloc space for saveing clob2,1,3 on user's stack.
1171                  * register clob3 points to where clob2 saved, followed by
1172                  * clob1 and 3 from high to low memory.
1173                  */
1174                 frag.insn[n++] =
1175                         jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1176                         jit_x1_fnop();
1177                 frag.insn[n++] =
1178                         jit_x0_addi(clob3, TREG_SP, 16)  |
1179                         jit_x1_st_add(TREG_SP, clob3, 8);
1180 #ifdef __LITTLE_ENDIAN
1181                 frag.insn[n++] =
1182                         jit_x0_addi(clob1, ra, 0)   |
1183                         jit_x1_st_add(TREG_SP, clob1, 8);
1184 #else
1185                 frag.insn[n++] =
1186                         jit_x0_addi(clob1, ra, load_store_size - 1)   |
1187                         jit_x1_st_add(TREG_SP, clob1, 8);
1188 #endif
1189                 if (load_store_size == 8) {
1190                         /*
1191                          * We save one byte a time, not for fast, but compact
1192                          * code. After each store, data source register shift
1193                          * right one byte. unchanged after 8 stores.
1194                          */
1195                         frag.insn[n++] =
1196                                 jit_x0_addi(clob2, TREG_ZERO, 7)     |
1197                                 jit_x1_st_add(TREG_SP, clob2, 16);
1198                         frag.insn[n++] =
1199                                 jit_x0_rotli(rb, rb, 56)      |
1200                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1201                         frag.insn[n++] =
1202                                 jit_x0_addi(clob2, clob2, -1) |
1203                                 jit_x1_bnezt(clob2, -1);
1204                         frag.insn[n++] =
1205                                 jit_x0_fnop()                 |
1206                                 jit_x1_addi(clob2, y1_br_reg, 0);
1207                 } else if (load_store_size == 4) {
1208                         frag.insn[n++] =
1209                                 jit_x0_addi(clob2, TREG_ZERO, 3)     |
1210                                 jit_x1_st_add(TREG_SP, clob2, 16);
1211                         frag.insn[n++] =
1212                                 jit_x0_rotli(rb, rb, 56)      |
1213                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1214                         frag.insn[n++] =
1215                                 jit_x0_addi(clob2, clob2, -1) |
1216                                 jit_x1_bnezt(clob2, -1);
1217                         /*
1218                          * same as 8-byte case, but need shift another 4
1219                          * byte to recover rb for 4-byte store.
1220                          */
1221                         frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1222                                 jit_x1_addi(clob2, y1_br_reg, 0);
1223                 } else { /* =2 */
1224                         frag.insn[n++] =
1225                                 jit_x0_addi(clob2, rb, 0)     |
1226                                 jit_x1_st_add(TREG_SP, clob2, 16);
1227                         for (k = 0; k < 2; k++) {
1228                                 frag.insn[n++] =
1229                                         jit_x0_shrui(rb, rb, 8)  |
1230                                         jit_x1_st1_add(clob1, rb,
1231                                                        UA_FIXUP_ADDR_DELTA);
1232                         }
1233                         frag.insn[n++] =
1234                                 jit_x0_addi(rb, clob2, 0)       |
1235                                 jit_x1_addi(clob2, y1_br_reg, 0);
1236                 }
1237
1238                 if (bundle_2_enable)
1239                         frag.insn[n++] = bundle_2;
1240
1241                 if (y1_lr) {
1242                         frag.insn[n++] =
1243                                 jit_x0_fnop()                    |
1244                                 jit_x1_mfspr(y1_lr_reg,
1245                                              SPR_EX_CONTEXT_0_0);
1246                 }
1247                 if (y1_br) {
1248                         frag.insn[n++] =
1249                                 jit_x0_fnop()                    |
1250                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1251                                              clob2);
1252                 }
1253                 if (x1_add) {
1254                         frag.insn[n++] =
1255                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1256                                 jit_x1_ld_add(clob2, clob3, -8);
1257                 } else {
1258                         frag.insn[n++] =
1259                                 jit_x0_fnop()                    |
1260                                 jit_x1_ld_add(clob2, clob3, -8);
1261                 }
1262                 frag.insn[n++] =
1263                         jit_x0_fnop()   |
1264                         jit_x1_ld_add(clob1, clob3, -8);
1265                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1266                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1267
1268         } else {
1269                 /*
1270                  * Generic memory load cases.
1271                  *
1272                  * Alloc space for saveing clob1,2,3 on user's stack.
1273                  * register clob3 points to where clob1 saved, followed
1274                  * by clob2 and 3 from high to low memory.
1275                  */
1276
1277                 frag.insn[n++] =
1278                         jit_x0_addi(TREG_SP, TREG_SP, -32) |
1279                         jit_x1_fnop();
1280                 frag.insn[n++] =
1281                         jit_x0_addi(clob3, TREG_SP, 16) |
1282                         jit_x1_st_add(TREG_SP, clob3, 8);
1283                 frag.insn[n++] =
1284                         jit_x0_addi(clob2, ra, 0) |
1285                         jit_x1_st_add(TREG_SP, clob2, 8);
1286
1287                 if (y1_br) {
1288                         frag.insn[n++] =
1289                                 jit_x0_addi(clob1, y1_br_reg, 0) |
1290                                 jit_x1_st_add(TREG_SP, clob1, 16);
1291                 } else {
1292                         frag.insn[n++] =
1293                                 jit_x0_fnop() |
1294                                 jit_x1_st_add(TREG_SP, clob1, 16);
1295                 }
1296
1297                 if (bundle_2_enable)
1298                         frag.insn[n++] = bundle_2;
1299
1300                 if (y1_lr) {
1301                         frag.insn[n++] =
1302                                 jit_x0_fnop()  |
1303                                 jit_x1_mfspr(y1_lr_reg,
1304                                              SPR_EX_CONTEXT_0_0);
1305                 }
1306
1307                 if (y1_br) {
1308                         frag.insn[n++] =
1309                                 jit_x0_fnop() |
1310                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1311                                              clob1);
1312                 }
1313
1314                 frag.insn[n++] =
1315                         jit_x0_addi(clob1, clob2, 7)      |
1316                         jit_x1_ldna(rd, clob2);
1317                 frag.insn[n++] =
1318                         jit_x0_fnop()                     |
1319                         jit_x1_ldna(clob1, clob1);
1320                 frag.insn[n++] =
1321                         jit_x0_dblalign(rd, clob1, clob2) |
1322                         jit_x1_ld_add(clob1, clob3, -8);
1323                 if (x1_add) {
1324                         frag.insn[n++] =
1325                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1326                                 jit_x1_ld_add(clob2, clob3, -8);
1327                 } else {
1328                         frag.insn[n++] =
1329                                 jit_x0_fnop()  |
1330                                 jit_x1_ld_add(clob2, clob3, -8);
1331                 }
1332
1333                 frag.insn[n++] =
1334                         jit_x0_fnop() |
1335                         jit_x1_ld(clob3, clob3);
1336
1337                 if (load_store_size == 4) {
1338                         if (load_store_signed)
1339                                 frag.insn[n++] =
1340                                         jit_x0_bfexts(
1341                                                 rd, rd,
1342                                                 UA_FIXUP_BFEXT_START(4),
1343                                                 UA_FIXUP_BFEXT_END(4)) |
1344                                         jit_x1_fnop();
1345                         else
1346                                 frag.insn[n++] =
1347                                         jit_x0_bfextu(
1348                                                 rd, rd,
1349                                                 UA_FIXUP_BFEXT_START(4),
1350                                                 UA_FIXUP_BFEXT_END(4)) |
1351                                         jit_x1_fnop();
1352                 } else if (load_store_size == 2) {
1353                         if (load_store_signed)
1354                                 frag.insn[n++] =
1355                                         jit_x0_bfexts(
1356                                                 rd, rd,
1357                                                 UA_FIXUP_BFEXT_START(2),
1358                                                 UA_FIXUP_BFEXT_END(2)) |
1359                                         jit_x1_fnop();
1360                         else
1361                                 frag.insn[n++] =
1362                                         jit_x0_bfextu(
1363                                                 rd, rd,
1364                                                 UA_FIXUP_BFEXT_START(2),
1365                                                 UA_FIXUP_BFEXT_END(2)) |
1366                                         jit_x1_fnop();
1367                 }
1368
1369                 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1370         }
1371
1372         /* Max JIT bundle count is 14. */
1373         WARN_ON(n > 14);
1374
1375         if (!unexpected) {
1376                 int status = 0;
1377                 int idx = (regs->pc >> 3) &
1378                         ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1379
1380                 frag.pc = regs->pc;
1381                 frag.bundle = bundle;
1382
1383                 if (unaligned_printk) {
1384                         pr_info("%s/%d, Unalign fixup: pc=%lx "
1385                                 "bundle=%lx %d %d %d %d %d %d %d %d.",
1386                                 current->comm, current->pid,
1387                                 (unsigned long)frag.pc,
1388                                 (unsigned long)frag.bundle,
1389                                 (int)alias, (int)rd, (int)ra,
1390                                 (int)rb, (int)bundle_2_enable,
1391                                 (int)y1_lr, (int)y1_br, (int)x1_add);
1392
1393                         for (k = 0; k < n; k += 2)
1394                                 pr_info("[%d] %016llx %016llx", k,
1395                                         (unsigned long long)frag.insn[k],
1396                                         (unsigned long long)frag.insn[k+1]);
1397                 }
1398
1399                 /* Swap bundle byte order for big endian sys. */
1400 #ifdef __BIG_ENDIAN
1401                 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1402                 for (k = 0; k < n; k++)
1403                         frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1404 #endif /* __BIG_ENDIAN */
1405
1406                 status = copy_to_user((void __user *)&jit_code_area[idx],
1407                                       &frag, sizeof(frag));
1408                 if (status) {
1409                         /* Fail to copy JIT into user land. send SIGSEGV. */
1410                         siginfo_t info = {
1411                                 .si_signo = SIGSEGV,
1412                                 .si_code = SEGV_MAPERR,
1413                                 .si_addr = (void __user *)&jit_code_area[idx]
1414                         };
1415
1416                         pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
1417                                 current->pid, current->comm,
1418                                 (unsigned long long)&jit_code_area[idx]);
1419
1420                         trace_unhandled_signal("segfault in unalign fixup",
1421                                                regs,
1422                                                (unsigned long)info.si_addr,
1423                                                SIGSEGV);
1424                         force_sig_info(info.si_signo, &info, current);
1425                         return;
1426                 }
1427
1428
1429                 /* Do a cheaper increment, not accurate. */
1430                 unaligned_fixup_count++;
1431                 __flush_icache_range((unsigned long)&jit_code_area[idx],
1432                                      (unsigned long)&jit_code_area[idx] +
1433                                      sizeof(frag));
1434
1435                 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1436                 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1437                 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1438
1439                 /* Modify pc at the start of new JIT. */
1440                 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1441                 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1442                 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1443         }
1444 }
1445
1446
1447 /*
1448  * C function to generate unalign data JIT. Called from unalign data
1449  * interrupt handler.
1450  *
1451  * First check if unalign fix is disabled or exception did not not come from
1452  * user space or sp register points to unalign address, if true, generate a
1453  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1454  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1455  * back to exception handler.
1456  *
1457  * The exception handler will "iret" to new generated JIT code after
1458  * restoring caller saved registers. In theory, the JIT code will perform
1459  * another "iret" to resume user's program.
1460  */
1461
1462 void do_unaligned(struct pt_regs *regs, int vecnum)
1463 {
1464         tilegx_bundle_bits __user  *pc;
1465         tilegx_bundle_bits bundle;
1466         struct thread_info *info = current_thread_info();
1467         int align_ctl;
1468
1469         /* Checks the per-process unaligned JIT flags */
1470         align_ctl = unaligned_fixup;
1471         switch (task_thread_info(current)->align_ctl) {
1472         case PR_UNALIGN_NOPRINT:
1473                 align_ctl = 1;
1474                 break;
1475         case PR_UNALIGN_SIGBUS:
1476                 align_ctl = 0;
1477                 break;
1478         }
1479
1480         /* Enable iterrupt in order to access user land. */
1481         local_irq_enable();
1482
1483         /*
1484          * The fault came from kernel space. Two choices:
1485          * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1486          *     to return -EFAULT. If no fixup, simply panic the kernel.
1487          * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1488          *     if it was triggered by get_user/put_user() macros. Panic the
1489          *     kernel if it is not fixable.
1490          */
1491
1492         if (EX1_PL(regs->ex1) != USER_PL) {
1493
1494                 if (align_ctl < 1) {
1495                         unaligned_fixup_count++;
1496                         /* If exception came from kernel, try fix it up. */
1497                         if (fixup_exception(regs)) {
1498                                 if (unaligned_printk)
1499                                         pr_info("Unalign fixup: %d %llx @%llx",
1500                                                 (int)unaligned_fixup,
1501                                                 (unsigned long long)regs->ex1,
1502                                                 (unsigned long long)regs->pc);
1503                                 return;
1504                         }
1505                         /* Not fixable. Go panic. */
1506                         panic("Unalign exception in Kernel. pc=%lx",
1507                               regs->pc);
1508                         return;
1509                 } else {
1510                         /*
1511                          * Try to fix the exception. If we can't, panic the
1512                          * kernel.
1513                          */
1514                         bundle = GX_INSN_BSWAP(
1515                                 *((tilegx_bundle_bits *)(regs->pc)));
1516                         jit_bundle_gen(regs, bundle, align_ctl);
1517                         return;
1518                 }
1519         }
1520
1521         /*
1522          * Fault came from user with ICS or stack is not aligned.
1523          * If so, we will trigger SIGBUS.
1524          */
1525         if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1526                 siginfo_t info = {
1527                         .si_signo = SIGBUS,
1528                         .si_code = BUS_ADRALN,
1529                         .si_addr = (unsigned char __user *)0
1530                 };
1531
1532                 if (unaligned_printk)
1533                         pr_info("Unalign fixup: %d %llx @%llx",
1534                                 (int)unaligned_fixup,
1535                                 (unsigned long long)regs->ex1,
1536                                 (unsigned long long)regs->pc);
1537
1538                 unaligned_fixup_count++;
1539
1540                 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1541                 force_sig_info(info.si_signo, &info, current);
1542                 return;
1543         }
1544
1545
1546         /* Read the bundle casued the exception! */
1547         pc = (tilegx_bundle_bits __user *)(regs->pc);
1548         if (get_user(bundle, pc) != 0) {
1549                 /* Probably never be here since pc is valid user address.*/
1550                 siginfo_t info = {
1551                         .si_signo = SIGSEGV,
1552                         .si_code = SEGV_MAPERR,
1553                         .si_addr = (void __user *)pc
1554                 };
1555                 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1556                 trace_unhandled_signal("segfault in unalign fixup", regs,
1557                                        (unsigned long)info.si_addr, SIGSEGV);
1558                 force_sig_info(info.si_signo, &info, current);
1559                 return;
1560         }
1561
1562         if (!info->unalign_jit_base) {
1563                 void __user *user_page;
1564
1565                 /*
1566                  * Allocate a page in userland.
1567                  * For 64-bit processes we try to place the mapping far
1568                  * from anything else that might be going on (specifically
1569                  * 64 GB below the top of the user address space).  If it
1570                  * happens not to be possible to put it there, it's OK;
1571                  * the kernel will choose another location and we'll
1572                  * remember it for later.
1573                  */
1574                 if (is_compat_task())
1575                         user_page = NULL;
1576                 else
1577                         user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1578                                 (current->pid << PAGE_SHIFT);
1579
1580                 user_page = (void __user *) vm_mmap(NULL,
1581                                                     (unsigned long)user_page,
1582                                                     PAGE_SIZE,
1583                                                     PROT_EXEC | PROT_READ |
1584                                                     PROT_WRITE,
1585 #ifdef CONFIG_HOMECACHE
1586                                                     MAP_CACHE_HOME_TASK |
1587 #endif
1588                                                     MAP_PRIVATE |
1589                                                     MAP_ANONYMOUS,
1590                                                     0);
1591
1592                 if (IS_ERR((void __force *)user_page)) {
1593                         pr_err("Out of kernel pages trying do_mmap.\n");
1594                         return;
1595                 }
1596
1597                 /* Save the address in the thread_info struct */
1598                 info->unalign_jit_base = user_page;
1599                 if (unaligned_printk)
1600                         pr_info("Unalign bundle: %d:%d, allocate page @%llx",
1601                                 raw_smp_processor_id(), current->pid,
1602                                 (unsigned long long)user_page);
1603         }
1604
1605         /* Generate unalign JIT */
1606         jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1607 }
1608
1609 #endif /* __tilegx__ */