2 * Copyright 2009-2013 Freescale Semiconductor, Inc. All Rights Reserved.
6 * The code contained herein is licensed under the GNU General Public
7 * License. You may obtain a copy of the GNU General Public License
8 * Version 2 or later at the following locations:
10 * http://www.opensource.org/licenses/gpl-license.html
11 * http://www.gnu.org/copyleft/gpl.html
15 * @file ipu_calc_stripes_sizes.c
17 * @brief IPU IC functions
22 #include <linux/ipu-v3.h>
23 #include <linux/module.h>
24 #include <linux/math64.h>
33 static u32 truncate(u32 up, /* 0: down; else: up */
34 u64 a, /* must be non-negative */
41 if (up && (a > (((u64)d) << 32)))
47 static unsigned int f_calc(unsigned int pfs, unsigned int bpp, unsigned int *write)
49 unsigned int f_calculated = 0;
51 case IPU_PIX_FMT_YVU422P:
52 case IPU_PIX_FMT_YUV422P:
53 case IPU_PIX_FMT_YUV420P2:
54 case IPU_PIX_FMT_YUV420P:
55 case IPU_PIX_FMT_YVU420P:
56 case IPU_PIX_FMT_YUV444P:
60 case IPU_PIX_FMT_RGB565:
61 case IPU_PIX_FMT_YUYV:
62 case IPU_PIX_FMT_UYVY:
66 case IPU_PIX_FMT_NV12:
107 static unsigned int m_calc(unsigned int pfs)
109 unsigned int m_calculated = 0;
111 case IPU_PIX_FMT_YUV420P2:
112 case IPU_PIX_FMT_YUV420P:
113 case IPU_PIX_FMT_YVU422P:
114 case IPU_PIX_FMT_YUV422P:
115 case IPU_PIX_FMT_YVU420P:
116 case IPU_PIX_FMT_YUV444P:
120 case IPU_PIX_FMT_NV12:
121 case IPU_PIX_FMT_YUYV:
122 case IPU_PIX_FMT_UYVY:
134 static int calc_split_resize_coeffs(unsigned int inSize, unsigned int outSize,
135 unsigned int *resizeCoeff,
136 unsigned int *downsizeCoeff)
139 uint32_t tempDownsize;
142 pr_debug("IC input size(%d) cannot exceed 4096\n",
147 if (outSize > 1024) {
148 pr_debug("IC output size(%d) cannot exceed 1024\n",
153 if ((outSize << 3) < inSize) {
154 pr_debug("IC cannot downsize more than 8:1\n");
158 /* Compute downsizing coefficient */
159 /* Output of downsizing unit cannot be more than 1024 */
162 while (((tempSize > 1024) || (tempSize >= outSize * 2)) &&
163 (tempDownsize < 2)) {
167 *downsizeCoeff = tempDownsize;
169 /* compute resizing coefficient using the following equation:
170 resizeCoeff = M*(SI -1)/(SO - 1)
171 where M = 2^13, SI - input size, SO - output size */
172 *resizeCoeff = (8192L * (tempSize - 1)) / (outSize - 1);
173 if (*resizeCoeff >= 16384L) {
174 pr_debug("Overflow on IC resize coefficient.\n");
178 pr_debug("resizing from %u -> %u pixels, "
179 "downsize=%u, resize=%u.%lu (reg=%u)\n", inSize, outSize,
180 *downsizeCoeff, (*resizeCoeff >= 8192L) ? 1 : 0,
181 ((*resizeCoeff & 0x1FFF) * 10000L) / 8192L, *resizeCoeff);
186 /* Stripe parameters calculator */
187 /**************************************************************************
189 MSW = the maximal width allowed for a stripe
190 i.MX31: 720, i.MX35: 800, i.MX37/51/53: 1024
191 cirr = the maximal inverse resizing ratio for which overlap in the input
192 is requested; typically cirr~2
194 bit 0 - equal_stripes
195 0 each stripe is allowed to have independent parameters
196 for maximal image quality
197 1 the stripes are requested to have identical parameters
198 (except the base address), for maximal performance
199 bit 1 - vertical/horizontal
203 If performance is the top priority (above image quality)
204 Avoid overlap, by setting CIRR = 0
205 This will also force effectively identical_stripes = 1
206 Choose IF & OF that corresponds to the same IOX/SX for both stripes
207 Choose IFW & OFW such that
208 IFW/IM, IFW/IF, OFW/OM, OFW/OF are even integers
209 The function returns an error status:
211 1: invalid input parameters -> aborted without result
212 Valid parameters should satisfy the following conditions
213 IFW <= OFW, otherwise downsizing is required
214 - which is not supported yet
215 4 <= IFW,OFW, so some interpolation may be needed even without overlap
216 IM, OM, IF, OF should not vanish
218 so the frame can be split to two equal stripes, even without overlap
219 2*(OF+IF/irr_opt) <= OFW
220 so a valid positive INW exists even for equal stripes
221 OF <= MSW, otherwise, the left stripe cannot be sufficiently large
222 MSW < OFW, so splitting to stripes is required
223 OFW <= 2*MSW, so two stripes are sufficient
224 (this also implies that 2<=MSW)
225 2: OF is not a multiple of OM - not fully-supported yet
226 Output is produced but OW is not guaranited to be a multiple of OM
227 4: OFW reduced to be a multiple of OM
228 8: CIRR > 1: truncated to 1
229 Overlap is not supported (and not needed) y for upsizing)
230 **************************************************************************/
231 int ipu_calc_stripes_sizes(const unsigned int input_frame_width,
232 /* input frame width;>1 */
233 unsigned int output_frame_width, /* output frame width; >1 */
234 const unsigned int maximal_stripe_width,
235 /* the maximal width allowed for a stripe */
236 const unsigned long long cirr, /* see above */
237 const unsigned int flags, /* see above */
238 u32 input_pixelformat,/* pixel format after of read channel*/
239 u32 output_pixelformat,/* pixel format after of write channel*/
240 struct stripe_param *left,
241 struct stripe_param *right)
243 const unsigned int irr_frac_bits = 13;
244 const unsigned long irr_steps = 1 << irr_frac_bits;
245 const u64 dirr = ((u64)1) << (32 - 2);
246 /* The maximum relative difference allowed between the irrs */
247 const u64 cr = ((u64)4) << 32;
248 /* The importance ratio between the two terms in the cost function below */
252 unsigned int onw_min;
253 unsigned int inw = 0, onw = 0, inw_best = 0;
254 /* number of pixels in the left stripe NOT hidden by the right stripe */
255 u64 irr_opt; /* the optimal inverse resizing ratio */
256 u64 rr_opt; /* the optimal resizing ratio = 1/irr_opt*/
257 u64 dinw; /* the misalignment between the stripes */
258 /* (measured in units of input columns) */
259 u64 difwl, difwr = 0;
260 /* The number of input columns not reflected in the output */
261 /* the resizing ratio used for the right stripe is */
262 /* left->irr and right->irr respectively */
264 u64 div; /* result of division */
265 bool equal_stripes = (flags & 0x1) != 0;
266 bool vertical = (flags & 0x2) != 0;
268 unsigned int input_m, input_f, output_m, output_f; /* parameters for upsizing by stripes */
269 unsigned int resize_coeff;
270 unsigned int downsize_coeff;
280 input_f = f_calc(input_pixelformat, 0, NULL);
281 input_m = m_calc(input_pixelformat);
283 output_m = m_calc(output_pixelformat);
285 if ((input_frame_width < 4) || (output_frame_width < 4))
288 irr_opt = div_u64((((u64)(input_frame_width - 1)) << 32),
289 (output_frame_width - 1));
290 rr_opt = div_u64((((u64)(output_frame_width - 1)) << 32),
291 (input_frame_width - 1));
293 if ((input_m == 0) || (output_m == 0) || (input_f == 0) || (output_f == 0)
294 || (input_frame_width < (2 * input_f))
295 || ((((u64)output_frame_width) << 32) <
296 (2 * ((((u64)output_f) << 32) + (input_f * rr_opt))))
297 || (maximal_stripe_width < output_f)
298 || (output_frame_width <= maximal_stripe_width)
299 || ((2 * maximal_stripe_width) < output_frame_width))
302 if (output_f % output_m)
305 temp = truncate(0, (((u64)output_frame_width) << 32), output_m);
306 if (temp < output_frame_width) {
307 output_frame_width = temp;
311 pr_debug("---------------->\n"
337 if ((irr_opt > cirr) /* overlap in the input is not requested */
338 && ((input_frame_width % (input_m << 1)) == 0)
339 && ((input_frame_width % (input_f << 1)) == 0)
340 && ((output_frame_width % (output_m << 1)) == 0)
341 && ((output_frame_width % (output_f << 1)) == 0)) {
342 /* without overlap */
343 left->input_width = right->input_width = right->input_column =
344 input_frame_width >> 1;
345 left->output_width = right->output_width = right->output_column =
346 output_frame_width >> 1;
347 left->input_column = 0;
348 left->output_column = 0;
349 div = div_u64(((((u64)irr_steps) << 32) *
350 (right->input_width - 1)), (right->output_width - 1));
351 left->irr = right->irr = truncate(0, div, 1);
352 } else { /* with overlap */
353 onw = truncate(0, (((u64)output_frame_width - 1) << 32) >> 1,
355 inw = truncate(0, onw * irr_opt, input_f);
356 /* this is the maximal inw which allows the same resizing ratio */
357 /* in both stripes */
358 onw = truncate(1, (inw * rr_opt), output_f);
359 div = div_u64((((u64)(irr_steps * inw)) <<
361 left->irr = right->irr = truncate(0, div, 1);
362 left->output_width = right->output_width =
363 output_frame_width - onw;
364 /* These are valid assignments for output_width, */
365 /* assuming output_f is a multiple of output_m */
366 div = (((u64)(left->output_width-1) * (left->irr)) << 32);
367 div = (((u64)1) << 32) + div_u64(div, irr_steps);
369 left->input_width = right->input_width = truncate(1, div, input_m);
371 div = div_u64((((u64)((right->output_width - 1) * right->irr)) <<
373 difwr = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
374 div = div_u64((difwr + (((u64)input_f) << 32)), 2);
375 left->input_column = truncate(0, div, input_f);
378 /* This splits the truncated input columns evenly */
379 /* between the left and right margins */
380 right->input_column = left->input_column + inw;
381 left->output_column = 0;
382 right->output_column = onw;
384 if (left->input_width > left->output_width) {
385 if (calc_split_resize_coeffs(left->input_width,
388 &downsize_coeff) < 0)
390 left->irr = right->irr =
391 (downsize_coeff << 14) | resize_coeff;
393 pr_debug("inw %d, onw %d, ilw %d, ilc %d, olw %d,"
394 " irw %d, irc %d, orw %d, orc %d, "
395 "difwr %llu, lirr %u\n",
396 inw, onw, left->input_width,
397 left->input_column, left->output_width,
398 right->input_width, right->input_column,
400 right->output_column, difwr, left->irr);
401 } else { /* independent stripes */
402 onw_min = output_frame_width - maximal_stripe_width;
403 /* onw is a multiple of output_f, in the range */
404 /* [max(output_f,output_frame_width-maximal_stripe_width),*/
405 /*min(output_frame_width-2,maximal_stripe_width)] */
406 /* definitely beyond the cost of any valid setting */
407 cost_min = (((u64)input_frame_width) << 32) + cr;
408 onw = truncate(0, ((u64)maximal_stripe_width), output_f);
409 if (output_frame_width - onw == 1)
410 onw -= output_f; /* => onw and output_frame_width-1-onw are positive */
411 inw = truncate(0, onw * irr_opt, input_f);
412 /* this is the maximal inw which allows the same resizing ratio */
413 /* in both stripes */
414 onw = truncate(1, inw * rr_opt, output_f);
416 div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
417 left->irr = truncate(0, div, 1);
418 div = div_u64((((u64)(onw * left->irr)) << 32),
420 dinw = (((u64)inw) << 32) - div;
422 div = div_u64((((u64)((output_frame_width - 1 - onw) * left->irr)) <<
425 difwl = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
427 cost = difwl + (((u64)(cr * dinw)) >> 32);
429 if (cost < cost_min) {
435 onw = truncate(1, inw * rr_opt, output_f);
436 /* This is the minimal onw which allows the same resizing ratio */
437 /* in both stripes */
438 } while (onw >= onw_min);
441 onw = truncate(1, inw * rr_opt, output_f);
442 div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
443 left->irr = truncate(0, div, 1);
445 left->output_width = onw;
446 right->output_width = output_frame_width - onw;
447 /* These are valid assignments for output_width, */
448 /* assuming output_f is a multiple of output_m */
449 left->input_width = truncate(1, ((u64)(inw + 1)) << 32, input_m);
450 right->input_width = truncate(1, ((u64)(input_frame_width - inw)) <<
453 div = div_u64((((u64)(irr_steps * (input_frame_width - 1 - inw))) <<
454 32), (right->output_width - 1));
455 right->irr = truncate(0, div, 1);
456 temp = truncate(0, ((u64)left->irr) * ((((u64)1) << 32) + dirr), 1);
457 if (temp < right->irr)
459 div = div_u64(((u64)((right->output_width - 1) * right->irr) <<
461 difwr = (u64)(input_frame_width - 1 - inw) - div;
464 div = div_u64((difwr + (((u64)input_f) << 32)), 2);
465 left->input_column = truncate(0, div, input_f);
467 /* This splits the truncated input columns evenly */
468 /* between the left and right margins */
469 right->input_column = left->input_column + inw;
470 left->output_column = 0;
471 right->output_column = onw;
472 if (left->input_width > left->output_width) {
473 if (calc_split_resize_coeffs(left->input_width,
476 &downsize_coeff) < 0)
478 left->irr = (downsize_coeff << 14) | resize_coeff;
480 if (right->input_width > right->output_width) {
481 if (calc_split_resize_coeffs(right->input_width,
484 &downsize_coeff) < 0)
486 right->irr = (downsize_coeff << 14) | resize_coeff;
491 EXPORT_SYMBOL(ipu_calc_stripes_sizes);