]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/mxc/ipu3/ipu_calc_stripes_sizes.c
ENGR00290579 IPUv3: Fix split mode warnings
[karo-tx-linux.git] / drivers / mxc / ipu3 / ipu_calc_stripes_sizes.c
1 /*
2  * Copyright 2009-2013 Freescale Semiconductor, Inc. All Rights Reserved.
3  */
4
5 /*
6  * The code contained herein is licensed under the GNU General Public
7  * License. You may obtain a copy of the GNU General Public License
8  * Version 2 or later at the following locations:
9  *
10  * http://www.opensource.org/licenses/gpl-license.html
11  * http://www.gnu.org/copyleft/gpl.html
12  */
13
14 /*
15  * @file ipu_calc_stripes_sizes.c
16  *
17  * @brief IPU IC functions
18  *
19  * @ingroup IPU
20  */
21
22 #include <linux/ipu-v3.h>
23 #include <linux/module.h>
24 #include <linux/math64.h>
25
26 #define BPP_32 0
27 #define BPP_16 3
28 #define BPP_8 5
29 #define BPP_24 1
30 #define BPP_12 4
31 #define BPP_18 2
32
33 static u32 truncate(u32 up, /* 0: down; else: up */
34                                         u64 a, /* must be non-negative */
35                                         u32 b)
36 {
37         u32 d;
38         u64 div;
39         div = div_u64(a, b);
40         d = b * (div >> 32);
41         if (up && (a > (((u64)d) << 32)))
42                 return d+b;
43         else
44                 return d;
45 }
46
47 static unsigned int f_calc(unsigned int pfs, unsigned int bpp, unsigned int *write)
48 {/* return input_f */
49         unsigned int f_calculated = 0;
50         switch (pfs) {
51         case IPU_PIX_FMT_YVU422P:
52         case IPU_PIX_FMT_YUV422P:
53         case IPU_PIX_FMT_YUV420P2:
54         case IPU_PIX_FMT_YUV420P:
55         case IPU_PIX_FMT_YVU420P:
56         case IPU_PIX_FMT_YUV444P:
57                 f_calculated = 16;
58                 break;
59
60         case IPU_PIX_FMT_RGB565:
61         case IPU_PIX_FMT_YUYV:
62         case IPU_PIX_FMT_UYVY:
63                 f_calculated = 8;
64                 break;
65
66         case IPU_PIX_FMT_NV12:
67                 f_calculated = 8;
68                 break;
69
70         default:
71                 f_calculated = 0;
72                 break;
73
74         }
75         if (!f_calculated) {
76                 switch (bpp) {
77                 case BPP_32:
78                         f_calculated = 2;
79                         break;
80
81                 case BPP_16:
82                         f_calculated = 4;
83                         break;
84
85                 case BPP_8:
86                 case BPP_24:
87                         f_calculated = 8;
88                         break;
89
90                 case BPP_12:
91                         f_calculated = 16;
92                         break;
93
94                 case BPP_18:
95                         f_calculated = 32;
96                         break;
97
98                 default:
99                         f_calculated = 0;
100                         break;
101                         }
102                 }
103         return f_calculated;
104 }
105
106
107 static unsigned int m_calc(unsigned int pfs)
108 {
109         unsigned int m_calculated = 0;
110         switch (pfs) {
111         case IPU_PIX_FMT_YUV420P2:
112         case IPU_PIX_FMT_YUV420P:
113         case IPU_PIX_FMT_YVU422P:
114         case IPU_PIX_FMT_YUV422P:
115         case IPU_PIX_FMT_YVU420P:
116         case IPU_PIX_FMT_YUV444P:
117                 m_calculated = 16;
118                 break;
119
120         case IPU_PIX_FMT_NV12:
121         case IPU_PIX_FMT_YUYV:
122         case IPU_PIX_FMT_UYVY:
123                 m_calculated = 8;
124                 break;
125
126         default:
127                 m_calculated = 8;
128                 break;
129
130         }
131         return m_calculated;
132 }
133
134 static int calc_split_resize_coeffs(unsigned int inSize, unsigned int outSize,
135                                     unsigned int *resizeCoeff,
136                                     unsigned int *downsizeCoeff)
137 {
138         uint32_t tempSize;
139         uint32_t tempDownsize;
140
141         if (inSize > 4096) {
142                 pr_debug("IC input size(%d) cannot exceed 4096\n",
143                         inSize);
144                 return -EINVAL;
145         }
146
147         if (outSize > 1024) {
148                 pr_debug("IC output size(%d) cannot exceed 1024\n",
149                         outSize);
150                 return -EINVAL;
151         }
152
153         if ((outSize << 3) < inSize) {
154                 pr_debug("IC cannot downsize more than 8:1\n");
155                 return -EINVAL;
156         }
157
158         /* Compute downsizing coefficient */
159         /* Output of downsizing unit cannot be more than 1024 */
160         tempDownsize = 0;
161         tempSize = inSize;
162         while (((tempSize > 1024) || (tempSize >= outSize * 2)) &&
163                (tempDownsize < 2)) {
164                 tempSize >>= 1;
165                 tempDownsize++;
166         }
167         *downsizeCoeff = tempDownsize;
168
169         /* compute resizing coefficient using the following equation:
170            resizeCoeff = M*(SI -1)/(SO - 1)
171            where M = 2^13, SI - input size, SO - output size    */
172         *resizeCoeff = (8192L * (tempSize - 1)) / (outSize - 1);
173         if (*resizeCoeff >= 16384L) {
174                 pr_debug("Overflow on IC resize coefficient.\n");
175                 return -EINVAL;
176         }
177
178         pr_debug("resizing from %u -> %u pixels, "
179                 "downsize=%u, resize=%u.%lu (reg=%u)\n", inSize, outSize,
180                 *downsizeCoeff, (*resizeCoeff >= 8192L) ? 1 : 0,
181                 ((*resizeCoeff & 0x1FFF) * 10000L) / 8192L, *resizeCoeff);
182
183         return 0;
184 }
185
186 /* Stripe parameters calculator */
187 /**************************************************************************
188 Notes:
189 MSW = the maximal width allowed for a stripe
190         i.MX31: 720, i.MX35: 800, i.MX37/51/53: 1024
191 cirr = the maximal inverse resizing ratio for which overlap in the input
192         is requested; typically cirr~2
193 flags
194         bit 0 - equal_stripes
195                 0  each stripe is allowed to have independent parameters
196                 for maximal image quality
197                 1  the stripes are requested to have identical parameters
198         (except the base address), for maximal performance
199         bit 1 - vertical/horizontal
200                 0 horizontal
201                 1 vertical
202
203 If performance is the top priority (above image quality)
204         Avoid overlap, by setting CIRR = 0
205                 This will also force effectively identical_stripes = 1
206         Choose IF & OF that corresponds to the same IOX/SX for both stripes
207         Choose IFW & OFW such that
208         IFW/IM, IFW/IF, OFW/OM, OFW/OF are even integers
209         The function returns an error status:
210         0: no error
211         1: invalid input parameters -> aborted without result
212                 Valid parameters should satisfy the following conditions
213                 IFW <= OFW, otherwise downsizing is required
214                                          - which is not supported yet
215                 4 <= IFW,OFW, so some interpolation may be needed even without overlap
216                 IM, OM, IF, OF should not vanish
217                 2*IF <= IFW
218                 so the frame can be split to two equal stripes, even without overlap
219                 2*(OF+IF/irr_opt) <= OFW
220                 so a valid positive INW exists even for equal stripes
221                 OF <= MSW, otherwise, the left stripe cannot be sufficiently large
222                 MSW < OFW, so splitting to stripes is required
223                 OFW <= 2*MSW, so two stripes are sufficient
224                 (this also implies that 2<=MSW)
225         2: OF is not a multiple of OM - not fully-supported yet
226         Output is produced but OW is not guaranited to be a multiple of OM
227         4: OFW reduced to be a multiple of OM
228         8: CIRR > 1: truncated to 1
229         Overlap is not supported (and not needed) y for upsizing)
230 **************************************************************************/
231 int ipu_calc_stripes_sizes(const unsigned int input_frame_width,
232                            /* input frame width;>1 */
233                            unsigned int output_frame_width, /* output frame width; >1 */
234                            const unsigned int maximal_stripe_width,
235                            /* the maximal width allowed for a stripe */
236                            const unsigned long long cirr, /* see above */
237                            const unsigned int flags, /* see above */
238                            u32 input_pixelformat,/* pixel format after of read channel*/
239                            u32 output_pixelformat,/* pixel format after of write channel*/
240                            struct stripe_param *left,
241                            struct stripe_param *right)
242 {
243         const unsigned int irr_frac_bits = 13;
244         const unsigned long irr_steps = 1 << irr_frac_bits;
245         const u64 dirr = ((u64)1) << (32 - 2);
246         /* The maximum relative difference allowed between the irrs */
247         const u64 cr = ((u64)4) << 32;
248         /* The importance ratio between the two terms in the cost function below */
249
250         unsigned int status;
251         unsigned int temp;
252         unsigned int onw_min;
253         unsigned int inw = 0, onw = 0, inw_best = 0;
254         /* number of pixels in the left stripe NOT hidden by the right stripe */
255         u64 irr_opt; /* the optimal inverse resizing ratio */
256         u64 rr_opt; /* the optimal resizing ratio = 1/irr_opt*/
257         u64 dinw; /* the misalignment between the stripes */
258         /* (measured in units of input columns) */
259         u64 difwl, difwr = 0;
260         /* The number of input columns not reflected in the output */
261         /* the resizing ratio used for the right stripe is */
262         /*   left->irr and right->irr respectively */
263         u64 cost, cost_min;
264         u64 div; /* result of division */
265         bool equal_stripes = (flags & 0x1) != 0;
266         bool vertical =      (flags & 0x2) != 0;
267
268         unsigned int input_m, input_f, output_m, output_f; /* parameters for upsizing by stripes */
269         unsigned int resize_coeff;
270         unsigned int downsize_coeff;
271
272         status = 0;
273
274         if (vertical) {
275                 input_f = 2;
276                 input_m = 8;
277                 output_f = 8;
278                 output_m = 2;
279         } else {
280                 input_f = f_calc(input_pixelformat, 0, NULL);
281                 input_m = m_calc(input_pixelformat);
282                 output_f = input_m;
283                 output_m = m_calc(output_pixelformat);
284         }
285         if ((input_frame_width < 4) || (output_frame_width < 4))
286                 return 1;
287
288         irr_opt = div_u64((((u64)(input_frame_width - 1)) << 32),
289                           (output_frame_width - 1));
290         rr_opt = div_u64((((u64)(output_frame_width - 1)) << 32),
291                          (input_frame_width - 1));
292
293         if ((input_m == 0) || (output_m == 0) || (input_f == 0) || (output_f == 0)
294             || (input_frame_width < (2 * input_f))
295             || ((((u64)output_frame_width) << 32) <
296                 (2 * ((((u64)output_f) << 32) + (input_f * rr_opt))))
297             || (maximal_stripe_width < output_f)
298             || (output_frame_width <= maximal_stripe_width)
299             || ((2 * maximal_stripe_width) < output_frame_width))
300                 return 1;
301
302         if (output_f % output_m)
303                 status += 2;
304
305         temp = truncate(0, (((u64)output_frame_width) << 32), output_m);
306         if (temp < output_frame_width) {
307                 output_frame_width = temp;
308                 status += 4;
309         }
310
311         pr_debug("---------------->\n"
312                    "if  = %d\n"
313                    "im  = %d\n"
314                    "of = %d\n"
315                    "om = %d\n"
316                    "irr_opt  = %llu\n"
317                    "rr_opt   = %llu\n"
318                    "cirr     = %llu\n"
319                    "pixel in  = %08x\n"
320                    "pixel out = %08x\n"
321                    "ifw = %d\n"
322                    "ofwidth = %d\n",
323                    input_f,
324                    input_m,
325                    output_f,
326                    output_m,
327                    irr_opt,
328                    rr_opt,
329                    cirr,
330                    input_pixelformat,
331                    output_pixelformat,
332                    input_frame_width,
333                    output_frame_width
334                    );
335
336         if (equal_stripes) {
337                 if ((irr_opt > cirr) /* overlap in the input is not requested */
338                     && ((input_frame_width % (input_m << 1)) == 0)
339                     && ((input_frame_width % (input_f << 1)) == 0)
340                     && ((output_frame_width % (output_m << 1)) == 0)
341                     && ((output_frame_width % (output_f << 1)) == 0)) {
342                         /* without overlap */
343                         left->input_width = right->input_width = right->input_column =
344                                 input_frame_width >> 1;
345                         left->output_width = right->output_width = right->output_column =
346                                 output_frame_width >> 1;
347                         left->input_column = 0;
348                         left->output_column = 0;
349                         div = div_u64(((((u64)irr_steps) << 32) *
350                                        (right->input_width - 1)), (right->output_width - 1));
351                         left->irr = right->irr = truncate(0, div, 1);
352                 } else { /* with overlap */
353                         onw = truncate(0, (((u64)output_frame_width - 1) << 32) >> 1,
354                                        output_f);
355                         inw = truncate(0, onw * irr_opt, input_f);
356                         /* this is the maximal inw which allows the same resizing ratio */
357                         /* in both stripes */
358                         onw = truncate(1, (inw * rr_opt), output_f);
359                         div = div_u64((((u64)(irr_steps * inw)) <<
360                                        32), onw);
361                         left->irr = right->irr = truncate(0, div, 1);
362                         left->output_width = right->output_width =
363                                 output_frame_width - onw;
364                         /* These are valid assignments for output_width, */
365                         /* assuming output_f is a multiple of output_m */
366                         div = (((u64)(left->output_width-1) * (left->irr)) << 32);
367                         div = (((u64)1) << 32) + div_u64(div, irr_steps);
368
369                         left->input_width = right->input_width = truncate(1, div, input_m);
370
371                         div = div_u64((((u64)((right->output_width - 1) * right->irr)) <<
372                                        32), irr_steps);
373                         difwr = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
374                         div = div_u64((difwr + (((u64)input_f) << 32)), 2);
375                         left->input_column = truncate(0, div, input_f);
376
377
378                         /* This splits the truncated input columns evenly */
379                         /*    between the left and right margins */
380                         right->input_column = left->input_column + inw;
381                         left->output_column = 0;
382                         right->output_column = onw;
383                 }
384                 if (left->input_width > left->output_width) {
385                         if (calc_split_resize_coeffs(left->input_width,
386                                                      left->output_width,
387                                                      &resize_coeff,
388                                                      &downsize_coeff) < 0)
389                                 return -EINVAL;
390                         left->irr = right->irr =
391                                 (downsize_coeff << 14) | resize_coeff;
392                 }
393                 pr_debug("inw %d, onw %d, ilw %d, ilc %d, olw %d,"
394                          " irw %d, irc %d, orw %d, orc %d, "
395                          "difwr  %llu, lirr %u\n",
396                          inw, onw, left->input_width,
397                          left->input_column, left->output_width,
398                          right->input_width, right->input_column,
399                          right->output_width,
400                          right->output_column, difwr, left->irr);
401                 } else { /* independent stripes */
402                 onw_min = output_frame_width - maximal_stripe_width;
403                 /* onw is a multiple of output_f, in the range */
404                 /* [max(output_f,output_frame_width-maximal_stripe_width),*/
405                 /*min(output_frame_width-2,maximal_stripe_width)] */
406                 /* definitely beyond the cost of any valid setting */
407                 cost_min = (((u64)input_frame_width) << 32) + cr;
408                 onw = truncate(0, ((u64)maximal_stripe_width), output_f);
409                 if (output_frame_width - onw == 1)
410                         onw -= output_f; /*  => onw and output_frame_width-1-onw are positive */
411                 inw = truncate(0, onw * irr_opt, input_f);
412                 /* this is the maximal inw which allows the same resizing ratio */
413                 /* in both stripes */
414                 onw = truncate(1, inw * rr_opt, output_f);
415                 do {
416                         div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
417                         left->irr = truncate(0, div, 1);
418                         div = div_u64((((u64)(onw * left->irr)) << 32),
419                                       irr_steps);
420                         dinw = (((u64)inw) << 32) - div;
421
422                         div = div_u64((((u64)((output_frame_width - 1 - onw) * left->irr)) <<
423                                        32), irr_steps);
424
425                         difwl = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
426
427                         cost = difwl + (((u64)(cr * dinw)) >> 32);
428
429                         if (cost < cost_min) {
430                                 inw_best = inw;
431                                 cost_min = cost;
432                         }
433
434                         inw -= input_f;
435                         onw = truncate(1, inw * rr_opt, output_f);
436                         /* This is the minimal onw which allows the same resizing ratio */
437                         /*     in both stripes */
438                 } while (onw >= onw_min);
439
440                 inw = inw_best;
441                 onw = truncate(1, inw * rr_opt, output_f);
442                 div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
443                 left->irr = truncate(0, div, 1);
444
445                 left->output_width = onw;
446                 right->output_width = output_frame_width - onw;
447                 /* These are valid assignments for output_width, */
448                 /* assuming output_f is a multiple of output_m */
449                 left->input_width = truncate(1, ((u64)(inw + 1)) << 32, input_m);
450                 right->input_width = truncate(1, ((u64)(input_frame_width - inw)) <<
451                                               32, input_m);
452
453                 div = div_u64((((u64)(irr_steps * (input_frame_width - 1 - inw))) <<
454                                32), (right->output_width - 1));
455                 right->irr = truncate(0, div, 1);
456                 temp = truncate(0, ((u64)left->irr) * ((((u64)1) << 32) + dirr), 1);
457                 if (temp < right->irr)
458                         right->irr = temp;
459                 div = div_u64(((u64)((right->output_width - 1) * right->irr) <<
460                                32), irr_steps);
461                 difwr = (u64)(input_frame_width - 1 - inw) - div;
462
463
464                 div = div_u64((difwr + (((u64)input_f) << 32)), 2);
465                 left->input_column = truncate(0, div, input_f);
466
467                 /* This splits the truncated input columns evenly */
468                 /*    between the left and right margins */
469                 right->input_column = left->input_column + inw;
470                 left->output_column = 0;
471                 right->output_column = onw;
472                 if (left->input_width > left->output_width) {
473                         if (calc_split_resize_coeffs(left->input_width,
474                                                      left->output_width,
475                                                      &resize_coeff,
476                                                      &downsize_coeff) < 0)
477                                 return -EINVAL;
478                         left->irr = (downsize_coeff << 14) | resize_coeff;
479                 }
480                 if (right->input_width > right->output_width) {
481                         if (calc_split_resize_coeffs(right->input_width,
482                                                      right->output_width,
483                                                      &resize_coeff,
484                                                      &downsize_coeff) < 0)
485                                 return -EINVAL;
486                         right->irr = (downsize_coeff << 14) | resize_coeff;
487                 }
488         }
489         return status;
490 }
491 EXPORT_SYMBOL(ipu_calc_stripes_sizes);