LCOV - code coverage report
Current view: top level - media/libjpeg - jidctfst.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 118 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * jidctfst.c
       3             :  *
       4             :  * This file was part of the Independent JPEG Group's software:
       5             :  * Copyright (C) 1994-1998, Thomas G. Lane.
       6             :  * libjpeg-turbo Modifications:
       7             :  * Copyright (C) 2015, D. R. Commander.
       8             :  * For conditions of distribution and use, see the accompanying README.ijg
       9             :  * file.
      10             :  *
      11             :  * This file contains a fast, not so accurate integer implementation of the
      12             :  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
      13             :  * must also perform dequantization of the input coefficients.
      14             :  *
      15             :  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
      16             :  * on each row (or vice versa, but it's more convenient to emit a row at
      17             :  * a time).  Direct algorithms are also available, but they are much more
      18             :  * complex and seem not to be any faster when reduced to code.
      19             :  *
      20             :  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
      21             :  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
      22             :  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
      23             :  * JPEG textbook (see REFERENCES section in file README.ijg).  The following
      24             :  * code is based directly on figure 4-8 in P&M.
      25             :  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
      26             :  * possible to arrange the computation so that many of the multiplies are
      27             :  * simple scalings of the final outputs.  These multiplies can then be
      28             :  * folded into the multiplications or divisions by the JPEG quantization
      29             :  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
      30             :  * to be done in the DCT itself.
      31             :  * The primary disadvantage of this method is that with fixed-point math,
      32             :  * accuracy is lost due to imprecise representation of the scaled
      33             :  * quantization values.  The smaller the quantization table entry, the less
      34             :  * precise the scaled value, so this implementation does worse with high-
      35             :  * quality-setting files than with low-quality ones.
      36             :  */
      37             : 
      38             : #define JPEG_INTERNALS
      39             : #include "jinclude.h"
      40             : #include "jpeglib.h"
      41             : #include "jdct.h"               /* Private declarations for DCT subsystem */
      42             : 
      43             : #ifdef DCT_IFAST_SUPPORTED
      44             : 
      45             : 
      46             : /*
      47             :  * This module is specialized to the case DCTSIZE = 8.
      48             :  */
      49             : 
      50             : #if DCTSIZE != 8
      51             :   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
      52             : #endif
      53             : 
      54             : 
      55             : /* Scaling decisions are generally the same as in the LL&M algorithm;
      56             :  * see jidctint.c for more details.  However, we choose to descale
      57             :  * (right shift) multiplication products as soon as they are formed,
      58             :  * rather than carrying additional fractional bits into subsequent additions.
      59             :  * This compromises accuracy slightly, but it lets us save a few shifts.
      60             :  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
      61             :  * everywhere except in the multiplications proper; this saves a good deal
      62             :  * of work on 16-bit-int machines.
      63             :  *
      64             :  * The dequantized coefficients are not integers because the AA&N scaling
      65             :  * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
      66             :  * so that the first and second IDCT rounds have the same input scaling.
      67             :  * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
      68             :  * avoid a descaling shift; this compromises accuracy rather drastically
      69             :  * for small quantization table entries, but it saves a lot of shifts.
      70             :  * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
      71             :  * so we use a much larger scaling factor to preserve accuracy.
      72             :  *
      73             :  * A final compromise is to represent the multiplicative constants to only
      74             :  * 8 fractional bits, rather than 13.  This saves some shifting work on some
      75             :  * machines, and may also reduce the cost of multiplication (since there
      76             :  * are fewer one-bits in the constants).
      77             :  */
      78             : 
      79             : #if BITS_IN_JSAMPLE == 8
      80             : #define CONST_BITS  8
      81             : #define PASS1_BITS  2
      82             : #else
      83             : #define CONST_BITS  8
      84             : #define PASS1_BITS  1           /* lose a little precision to avoid overflow */
      85             : #endif
      86             : 
      87             : /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
      88             :  * causing a lot of useless floating-point operations at run time.
      89             :  * To get around this we use the following pre-calculated constants.
      90             :  * If you change CONST_BITS you may want to add appropriate values.
      91             :  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
      92             :  */
      93             : 
      94             : #if CONST_BITS == 8
      95             : #define FIX_1_082392200  ((JLONG)  277)         /* FIX(1.082392200) */
      96             : #define FIX_1_414213562  ((JLONG)  362)         /* FIX(1.414213562) */
      97             : #define FIX_1_847759065  ((JLONG)  473)         /* FIX(1.847759065) */
      98             : #define FIX_2_613125930  ((JLONG)  669)         /* FIX(2.613125930) */
      99             : #else
     100             : #define FIX_1_082392200  FIX(1.082392200)
     101             : #define FIX_1_414213562  FIX(1.414213562)
     102             : #define FIX_1_847759065  FIX(1.847759065)
     103             : #define FIX_2_613125930  FIX(2.613125930)
     104             : #endif
     105             : 
     106             : 
     107             : /* We can gain a little more speed, with a further compromise in accuracy,
     108             :  * by omitting the addition in a descaling shift.  This yields an incorrectly
     109             :  * rounded result half the time...
     110             :  */
     111             : 
     112             : #ifndef USE_ACCURATE_ROUNDING
     113             : #undef DESCALE
     114             : #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
     115             : #endif
     116             : 
     117             : 
     118             : /* Multiply a DCTELEM variable by an JLONG constant, and immediately
     119             :  * descale to yield a DCTELEM result.
     120             :  */
     121             : 
     122             : #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
     123             : 
     124             : 
     125             : /* Dequantize a coefficient by multiplying it by the multiplier-table
     126             :  * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
     127             :  * multiplication will do.  For 12-bit data, the multiplier table is
     128             :  * declared JLONG, so a 32-bit multiply will be used.
     129             :  */
     130             : 
     131             : #if BITS_IN_JSAMPLE == 8
     132             : #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
     133             : #else
     134             : #define DEQUANTIZE(coef,quantval)  \
     135             :         DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
     136             : #endif
     137             : 
     138             : 
     139             : /* Like DESCALE, but applies to a DCTELEM and produces an int.
     140             :  * We assume that int right shift is unsigned if JLONG right shift is.
     141             :  */
     142             : 
     143             : #ifdef RIGHT_SHIFT_IS_UNSIGNED
     144             : #define ISHIFT_TEMPS    DCTELEM ishift_temp;
     145             : #if BITS_IN_JSAMPLE == 8
     146             : #define DCTELEMBITS  16         /* DCTELEM may be 16 or 32 bits */
     147             : #else
     148             : #define DCTELEMBITS  32         /* DCTELEM must be 32 bits */
     149             : #endif
     150             : #define IRIGHT_SHIFT(x,shft)  \
     151             :     ((ishift_temp = (x)) < 0 ? \
     152             :      (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
     153             :      (ishift_temp >> (shft)))
     154             : #else
     155             : #define ISHIFT_TEMPS
     156             : #define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
     157             : #endif
     158             : 
     159             : #ifdef USE_ACCURATE_ROUNDING
     160             : #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
     161             : #else
     162             : #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
     163             : #endif
     164             : 
     165             : 
     166             : /*
     167             :  * Perform dequantization and inverse DCT on one block of coefficients.
     168             :  */
     169             : 
     170             : GLOBAL(void)
     171           0 : jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     172             :                  JCOEFPTR coef_block,
     173             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
     174             : {
     175             :   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     176             :   DCTELEM tmp10, tmp11, tmp12, tmp13;
     177             :   DCTELEM z5, z10, z11, z12, z13;
     178             :   JCOEFPTR inptr;
     179             :   IFAST_MULT_TYPE *quantptr;
     180             :   int *wsptr;
     181             :   JSAMPROW outptr;
     182           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     183             :   int ctr;
     184             :   int workspace[DCTSIZE2];      /* buffers data between passes */
     185             :   SHIFT_TEMPS                   /* for DESCALE */
     186             :   ISHIFT_TEMPS                  /* for IDESCALE */
     187             : 
     188             :   /* Pass 1: process columns from input, store into work array. */
     189             : 
     190           0 :   inptr = coef_block;
     191           0 :   quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
     192           0 :   wsptr = workspace;
     193           0 :   for (ctr = DCTSIZE; ctr > 0; ctr--) {
     194             :     /* Due to quantization, we will usually find that many of the input
     195             :      * coefficients are zero, especially the AC terms.  We can exploit this
     196             :      * by short-circuiting the IDCT calculation for any column in which all
     197             :      * the AC terms are zero.  In that case each output is equal to the
     198             :      * DC coefficient (with scale factor as needed).
     199             :      * With typical images and quantization tables, half or more of the
     200             :      * column DCT calculations can be simplified this way.
     201             :      */
     202             : 
     203           0 :     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
     204           0 :         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
     205           0 :         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
     206           0 :         inptr[DCTSIZE*7] == 0) {
     207             :       /* AC terms all zero */
     208           0 :       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     209             : 
     210           0 :       wsptr[DCTSIZE*0] = dcval;
     211           0 :       wsptr[DCTSIZE*1] = dcval;
     212           0 :       wsptr[DCTSIZE*2] = dcval;
     213           0 :       wsptr[DCTSIZE*3] = dcval;
     214           0 :       wsptr[DCTSIZE*4] = dcval;
     215           0 :       wsptr[DCTSIZE*5] = dcval;
     216           0 :       wsptr[DCTSIZE*6] = dcval;
     217           0 :       wsptr[DCTSIZE*7] = dcval;
     218             : 
     219           0 :       inptr++;                  /* advance pointers to next column */
     220           0 :       quantptr++;
     221           0 :       wsptr++;
     222           0 :       continue;
     223             :     }
     224             : 
     225             :     /* Even part */
     226             : 
     227           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     228           0 :     tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     229           0 :     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     230           0 :     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
     231             : 
     232           0 :     tmp10 = tmp0 + tmp2;        /* phase 3 */
     233           0 :     tmp11 = tmp0 - tmp2;
     234             : 
     235           0 :     tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     236           0 :     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
     237             : 
     238           0 :     tmp0 = tmp10 + tmp13;       /* phase 2 */
     239           0 :     tmp3 = tmp10 - tmp13;
     240           0 :     tmp1 = tmp11 + tmp12;
     241           0 :     tmp2 = tmp11 - tmp12;
     242             : 
     243             :     /* Odd part */
     244             : 
     245           0 :     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     246           0 :     tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     247           0 :     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     248           0 :     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     249             : 
     250           0 :     z13 = tmp6 + tmp5;          /* phase 6 */
     251           0 :     z10 = tmp6 - tmp5;
     252           0 :     z11 = tmp4 + tmp7;
     253           0 :     z12 = tmp4 - tmp7;
     254             : 
     255           0 :     tmp7 = z11 + z13;           /* phase 5 */
     256           0 :     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
     257             : 
     258           0 :     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     259           0 :     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     260           0 :     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
     261             : 
     262           0 :     tmp6 = tmp12 - tmp7;        /* phase 2 */
     263           0 :     tmp5 = tmp11 - tmp6;
     264           0 :     tmp4 = tmp10 + tmp5;
     265             : 
     266           0 :     wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
     267           0 :     wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
     268           0 :     wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
     269           0 :     wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
     270           0 :     wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
     271           0 :     wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
     272           0 :     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
     273           0 :     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
     274             : 
     275           0 :     inptr++;                    /* advance pointers to next column */
     276           0 :     quantptr++;
     277           0 :     wsptr++;
     278             :   }
     279             : 
     280             :   /* Pass 2: process rows from work array, store into output array. */
     281             :   /* Note that we must descale the results by a factor of 8 == 2**3, */
     282             :   /* and also undo the PASS1_BITS scaling. */
     283             : 
     284           0 :   wsptr = workspace;
     285           0 :   for (ctr = 0; ctr < DCTSIZE; ctr++) {
     286           0 :     outptr = output_buf[ctr] + output_col;
     287             :     /* Rows of zeroes can be exploited in the same way as we did with columns.
     288             :      * However, the column calculation has created many nonzero AC terms, so
     289             :      * the simplification applies less often (typically 5% to 10% of the time).
     290             :      * On machines with very fast multiplication, it's possible that the
     291             :      * test takes more time than it's worth.  In that case this section
     292             :      * may be commented out.
     293             :      */
     294             : 
     295             : #ifndef NO_ZERO_ROW_TEST
     296           0 :     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
     297           0 :         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
     298             :       /* AC terms all zero */
     299           0 :       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
     300           0 :                                   & RANGE_MASK];
     301             : 
     302           0 :       outptr[0] = dcval;
     303           0 :       outptr[1] = dcval;
     304           0 :       outptr[2] = dcval;
     305           0 :       outptr[3] = dcval;
     306           0 :       outptr[4] = dcval;
     307           0 :       outptr[5] = dcval;
     308           0 :       outptr[6] = dcval;
     309           0 :       outptr[7] = dcval;
     310             : 
     311           0 :       wsptr += DCTSIZE;         /* advance pointer to next row */
     312           0 :       continue;
     313             :     }
     314             : #endif
     315             : 
     316             :     /* Even part */
     317             : 
     318           0 :     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
     319           0 :     tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
     320             : 
     321           0 :     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
     322           0 :     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
     323           0 :             - tmp13;
     324             : 
     325           0 :     tmp0 = tmp10 + tmp13;
     326           0 :     tmp3 = tmp10 - tmp13;
     327           0 :     tmp1 = tmp11 + tmp12;
     328           0 :     tmp2 = tmp11 - tmp12;
     329             : 
     330             :     /* Odd part */
     331             : 
     332           0 :     z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
     333           0 :     z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
     334           0 :     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
     335           0 :     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
     336             : 
     337           0 :     tmp7 = z11 + z13;           /* phase 5 */
     338           0 :     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
     339             : 
     340           0 :     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     341           0 :     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     342           0 :     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
     343             : 
     344           0 :     tmp6 = tmp12 - tmp7;        /* phase 2 */
     345           0 :     tmp5 = tmp11 - tmp6;
     346           0 :     tmp4 = tmp10 + tmp5;
     347             : 
     348             :     /* Final output stage: scale down by a factor of 8 and range-limit */
     349             : 
     350           0 :     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
     351           0 :                             & RANGE_MASK];
     352           0 :     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
     353           0 :                             & RANGE_MASK];
     354           0 :     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
     355           0 :                             & RANGE_MASK];
     356           0 :     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
     357           0 :                             & RANGE_MASK];
     358           0 :     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
     359           0 :                             & RANGE_MASK];
     360           0 :     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
     361           0 :                             & RANGE_MASK];
     362           0 :     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
     363           0 :                             & RANGE_MASK];
     364           0 :     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
     365           0 :                             & RANGE_MASK];
     366             : 
     367           0 :     wsptr += DCTSIZE;           /* advance pointer to next row */
     368             :   }
     369           0 : }
     370             : 
     371             : #endif /* DCT_IFAST_SUPPORTED */

Generated by: LCOV version 1.13