LCOV - code coverage report
Current view: top level - media/libjpeg - jidctint.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 1459 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 13 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * jidctint.c
       3             :  *
       4             :  * This file was part of the Independent JPEG Group's software.
       5             :  * Copyright (C) 1991-1998, Thomas G. Lane.
       6             :  * Modification developed 2002-2009 by Guido Vollbeding.
       7             :  * libjpeg-turbo Modifications:
       8             :  * Copyright (C) 2015, D. R. Commander.
       9             :  * For conditions of distribution and use, see the accompanying README.ijg
      10             :  * file.
      11             :  *
      12             :  * This file contains a slow-but-accurate integer implementation of the
      13             :  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
      14             :  * must also perform dequantization of the input coefficients.
      15             :  *
      16             :  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
      17             :  * on each row (or vice versa, but it's more convenient to emit a row at
      18             :  * a time).  Direct algorithms are also available, but they are much more
      19             :  * complex and seem not to be any faster when reduced to code.
      20             :  *
      21             :  * This implementation is based on an algorithm described in
      22             :  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
      23             :  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
      24             :  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
      25             :  * The primary algorithm described there uses 11 multiplies and 29 adds.
      26             :  * We use their alternate method with 12 multiplies and 32 adds.
      27             :  * The advantage of this method is that no data path contains more than one
      28             :  * multiplication; this allows a very simple and accurate implementation in
      29             :  * scaled fixed-point arithmetic, with a minimal number of shifts.
      30             :  *
      31             :  * We also provide IDCT routines with various output sample block sizes for
      32             :  * direct resolution reduction or enlargement without additional resampling:
      33             :  * NxN (N=1...16) pixels for one 8x8 input DCT block.
      34             :  *
      35             :  * For N<8 we simply take the corresponding low-frequency coefficients of
      36             :  * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
      37             :  * to yield the downscaled outputs.
      38             :  * This can be seen as direct low-pass downsampling from the DCT domain
      39             :  * point of view rather than the usual spatial domain point of view,
      40             :  * yielding significant computational savings and results at least
      41             :  * as good as common bilinear (averaging) spatial downsampling.
      42             :  *
      43             :  * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
      44             :  * lower frequencies and higher frequencies assumed to be zero.
      45             :  * It turns out that the computational effort is similar to the 8x8 IDCT
      46             :  * regarding the output size.
      47             :  * Furthermore, the scaling and descaling is the same for all IDCT sizes.
      48             :  *
      49             :  * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
      50             :  * since there would be too many additional constants to pre-calculate.
      51             :  */
      52             : 
      53             : #define JPEG_INTERNALS
      54             : #include "jinclude.h"
      55             : #include "jpeglib.h"
      56             : #include "jdct.h"               /* Private declarations for DCT subsystem */
      57             : 
      58             : #ifdef DCT_ISLOW_SUPPORTED
      59             : 
      60             : 
      61             : /*
      62             :  * This module is specialized to the case DCTSIZE = 8.
      63             :  */
      64             : 
      65             : #if DCTSIZE != 8
      66             :   Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
      67             : #endif
      68             : 
      69             : 
      70             : /*
      71             :  * The poop on this scaling stuff is as follows:
      72             :  *
      73             :  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
      74             :  * larger than the true IDCT outputs.  The final outputs are therefore
      75             :  * a factor of N larger than desired; since N=8 this can be cured by
      76             :  * a simple right shift at the end of the algorithm.  The advantage of
      77             :  * this arrangement is that we save two multiplications per 1-D IDCT,
      78             :  * because the y0 and y4 inputs need not be divided by sqrt(N).
      79             :  *
      80             :  * We have to do addition and subtraction of the integer inputs, which
      81             :  * is no problem, and multiplication by fractional constants, which is
      82             :  * a problem to do in integer arithmetic.  We multiply all the constants
      83             :  * by CONST_SCALE and convert them to integer constants (thus retaining
      84             :  * CONST_BITS bits of precision in the constants).  After doing a
      85             :  * multiplication we have to divide the product by CONST_SCALE, with proper
      86             :  * rounding, to produce the correct output.  This division can be done
      87             :  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
      88             :  * as long as possible so that partial sums can be added together with
      89             :  * full fractional precision.
      90             :  *
      91             :  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
      92             :  * they are represented to better-than-integral precision.  These outputs
      93             :  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
      94             :  * with the recommended scaling.  (To scale up 12-bit sample data further, an
      95             :  * intermediate JLONG array would be needed.)
      96             :  *
      97             :  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
      98             :  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
      99             :  * shows that the values given below are the most effective.
     100             :  */
     101             : 
     102             : #if BITS_IN_JSAMPLE == 8
     103             : #define CONST_BITS  13
     104             : #define PASS1_BITS  2
     105             : #else
     106             : #define CONST_BITS  13
     107             : #define PASS1_BITS  1           /* lose a little precision to avoid overflow */
     108             : #endif
     109             : 
     110             : /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
     111             :  * causing a lot of useless floating-point operations at run time.
     112             :  * To get around this we use the following pre-calculated constants.
     113             :  * If you change CONST_BITS you may want to add appropriate values.
     114             :  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
     115             :  */
     116             : 
     117             : #if CONST_BITS == 13
     118             : #define FIX_0_298631336  ((JLONG)  2446)        /* FIX(0.298631336) */
     119             : #define FIX_0_390180644  ((JLONG)  3196)        /* FIX(0.390180644) */
     120             : #define FIX_0_541196100  ((JLONG)  4433)        /* FIX(0.541196100) */
     121             : #define FIX_0_765366865  ((JLONG)  6270)        /* FIX(0.765366865) */
     122             : #define FIX_0_899976223  ((JLONG)  7373)        /* FIX(0.899976223) */
     123             : #define FIX_1_175875602  ((JLONG)  9633)        /* FIX(1.175875602) */
     124             : #define FIX_1_501321110  ((JLONG)  12299)       /* FIX(1.501321110) */
     125             : #define FIX_1_847759065  ((JLONG)  15137)       /* FIX(1.847759065) */
     126             : #define FIX_1_961570560  ((JLONG)  16069)       /* FIX(1.961570560) */
     127             : #define FIX_2_053119869  ((JLONG)  16819)       /* FIX(2.053119869) */
     128             : #define FIX_2_562915447  ((JLONG)  20995)       /* FIX(2.562915447) */
     129             : #define FIX_3_072711026  ((JLONG)  25172)       /* FIX(3.072711026) */
     130             : #else
     131             : #define FIX_0_298631336  FIX(0.298631336)
     132             : #define FIX_0_390180644  FIX(0.390180644)
     133             : #define FIX_0_541196100  FIX(0.541196100)
     134             : #define FIX_0_765366865  FIX(0.765366865)
     135             : #define FIX_0_899976223  FIX(0.899976223)
     136             : #define FIX_1_175875602  FIX(1.175875602)
     137             : #define FIX_1_501321110  FIX(1.501321110)
     138             : #define FIX_1_847759065  FIX(1.847759065)
     139             : #define FIX_1_961570560  FIX(1.961570560)
     140             : #define FIX_2_053119869  FIX(2.053119869)
     141             : #define FIX_2_562915447  FIX(2.562915447)
     142             : #define FIX_3_072711026  FIX(3.072711026)
     143             : #endif
     144             : 
     145             : 
     146             : /* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
     147             :  * For 8-bit samples with the recommended scaling, all the variable
     148             :  * and constant values involved are no more than 16 bits wide, so a
     149             :  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
     150             :  * For 12-bit samples, a full 32-bit multiplication will be needed.
     151             :  */
     152             : 
     153             : #if BITS_IN_JSAMPLE == 8
     154             : #define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
     155             : #else
     156             : #define MULTIPLY(var,const)  ((var) * (const))
     157             : #endif
     158             : 
     159             : 
     160             : /* Dequantize a coefficient by multiplying it by the multiplier-table
     161             :  * entry; produce an int result.  In this module, both inputs and result
     162             :  * are 16 bits or less, so either int or short multiply will work.
     163             :  */
     164             : 
     165             : #define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
     166             : 
     167             : 
     168             : /*
     169             :  * Perform dequantization and inverse DCT on one block of coefficients.
     170             :  */
     171             : 
     172             : GLOBAL(void)
     173           0 : jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     174             :                  JCOEFPTR coef_block,
     175             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
     176             : {
     177             :   JLONG tmp0, tmp1, tmp2, tmp3;
     178             :   JLONG tmp10, tmp11, tmp12, tmp13;
     179             :   JLONG z1, z2, z3, z4, z5;
     180             :   JCOEFPTR inptr;
     181             :   ISLOW_MULT_TYPE *quantptr;
     182             :   int *wsptr;
     183             :   JSAMPROW outptr;
     184           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     185             :   int ctr;
     186             :   int workspace[DCTSIZE2];      /* buffers data between passes */
     187             :   SHIFT_TEMPS
     188             : 
     189             :   /* Pass 1: process columns from input, store into work array. */
     190             :   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
     191             :   /* furthermore, we scale the results by 2**PASS1_BITS. */
     192             : 
     193           0 :   inptr = coef_block;
     194           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     195           0 :   wsptr = workspace;
     196           0 :   for (ctr = DCTSIZE; ctr > 0; ctr--) {
     197             :     /* Due to quantization, we will usually find that many of the input
     198             :      * coefficients are zero, especially the AC terms.  We can exploit this
     199             :      * by short-circuiting the IDCT calculation for any column in which all
     200             :      * the AC terms are zero.  In that case each output is equal to the
     201             :      * DC coefficient (with scale factor as needed).
     202             :      * With typical images and quantization tables, half or more of the
     203             :      * column DCT calculations can be simplified this way.
     204             :      */
     205             : 
     206           0 :     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
     207           0 :         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
     208           0 :         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
     209           0 :         inptr[DCTSIZE*7] == 0) {
     210             :       /* AC terms all zero */
     211           0 :       int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]),
     212             :                              PASS1_BITS);
     213             : 
     214           0 :       wsptr[DCTSIZE*0] = dcval;
     215           0 :       wsptr[DCTSIZE*1] = dcval;
     216           0 :       wsptr[DCTSIZE*2] = dcval;
     217           0 :       wsptr[DCTSIZE*3] = dcval;
     218           0 :       wsptr[DCTSIZE*4] = dcval;
     219           0 :       wsptr[DCTSIZE*5] = dcval;
     220           0 :       wsptr[DCTSIZE*6] = dcval;
     221           0 :       wsptr[DCTSIZE*7] = dcval;
     222             : 
     223           0 :       inptr++;                  /* advance pointers to next column */
     224           0 :       quantptr++;
     225           0 :       wsptr++;
     226           0 :       continue;
     227             :     }
     228             : 
     229             :     /* Even part: reverse the even part of the forward DCT. */
     230             :     /* The rotator is sqrt(2)*c(-6). */
     231             : 
     232           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     233           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
     234             : 
     235           0 :     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     236           0 :     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     237           0 :     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
     238             : 
     239           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     240           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     241             : 
     242           0 :     tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS);
     243           0 :     tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS);
     244             : 
     245           0 :     tmp10 = tmp0 + tmp3;
     246           0 :     tmp13 = tmp0 - tmp3;
     247           0 :     tmp11 = tmp1 + tmp2;
     248           0 :     tmp12 = tmp1 - tmp2;
     249             : 
     250             :     /* Odd part per figure 8; the matrix is unitary and hence its
     251             :      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
     252             :      */
     253             : 
     254           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     255           0 :     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     256           0 :     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     257           0 :     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     258             : 
     259           0 :     z1 = tmp0 + tmp3;
     260           0 :     z2 = tmp1 + tmp2;
     261           0 :     z3 = tmp0 + tmp2;
     262           0 :     z4 = tmp1 + tmp3;
     263           0 :     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
     264             : 
     265           0 :     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     266           0 :     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     267           0 :     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
     268           0 :     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
     269           0 :     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
     270           0 :     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     271           0 :     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     272           0 :     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
     273             : 
     274           0 :     z3 += z5;
     275           0 :     z4 += z5;
     276             : 
     277           0 :     tmp0 += z1 + z3;
     278           0 :     tmp1 += z2 + z4;
     279           0 :     tmp2 += z2 + z3;
     280           0 :     tmp3 += z1 + z4;
     281             : 
     282             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
     283             : 
     284           0 :     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     285           0 :     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     286           0 :     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
     287           0 :     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
     288           0 :     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
     289           0 :     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     290           0 :     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     291           0 :     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
     292             : 
     293           0 :     inptr++;                    /* advance pointers to next column */
     294           0 :     quantptr++;
     295           0 :     wsptr++;
     296             :   }
     297             : 
     298             :   /* Pass 2: process rows from work array, store into output array. */
     299             :   /* Note that we must descale the results by a factor of 8 == 2**3, */
     300             :   /* and also undo the PASS1_BITS scaling. */
     301             : 
     302           0 :   wsptr = workspace;
     303           0 :   for (ctr = 0; ctr < DCTSIZE; ctr++) {
     304           0 :     outptr = output_buf[ctr] + output_col;
     305             :     /* Rows of zeroes can be exploited in the same way as we did with columns.
     306             :      * However, the column calculation has created many nonzero AC terms, so
     307             :      * the simplification applies less often (typically 5% to 10% of the time).
     308             :      * On machines with very fast multiplication, it's possible that the
     309             :      * test takes more time than it's worth.  In that case this section
     310             :      * may be commented out.
     311             :      */
     312             : 
     313             : #ifndef NO_ZERO_ROW_TEST
     314           0 :     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
     315           0 :         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
     316             :       /* AC terms all zero */
     317           0 :       JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3)
     318           0 :                                   & RANGE_MASK];
     319             : 
     320           0 :       outptr[0] = dcval;
     321           0 :       outptr[1] = dcval;
     322           0 :       outptr[2] = dcval;
     323           0 :       outptr[3] = dcval;
     324           0 :       outptr[4] = dcval;
     325           0 :       outptr[5] = dcval;
     326           0 :       outptr[6] = dcval;
     327           0 :       outptr[7] = dcval;
     328             : 
     329           0 :       wsptr += DCTSIZE;         /* advance pointer to next row */
     330           0 :       continue;
     331             :     }
     332             : #endif
     333             : 
     334             :     /* Even part: reverse the even part of the forward DCT. */
     335             :     /* The rotator is sqrt(2)*c(-6). */
     336             : 
     337           0 :     z2 = (JLONG) wsptr[2];
     338           0 :     z3 = (JLONG) wsptr[6];
     339             : 
     340           0 :     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     341           0 :     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     342           0 :     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
     343             : 
     344           0 :     tmp0 = LEFT_SHIFT((JLONG) wsptr[0] + (JLONG) wsptr[4], CONST_BITS);
     345           0 :     tmp1 = LEFT_SHIFT((JLONG) wsptr[0] - (JLONG) wsptr[4], CONST_BITS);
     346             : 
     347           0 :     tmp10 = tmp0 + tmp3;
     348           0 :     tmp13 = tmp0 - tmp3;
     349           0 :     tmp11 = tmp1 + tmp2;
     350           0 :     tmp12 = tmp1 - tmp2;
     351             : 
     352             :     /* Odd part per figure 8; the matrix is unitary and hence its
     353             :      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
     354             :      */
     355             : 
     356           0 :     tmp0 = (JLONG) wsptr[7];
     357           0 :     tmp1 = (JLONG) wsptr[5];
     358           0 :     tmp2 = (JLONG) wsptr[3];
     359           0 :     tmp3 = (JLONG) wsptr[1];
     360             : 
     361           0 :     z1 = tmp0 + tmp3;
     362           0 :     z2 = tmp1 + tmp2;
     363           0 :     z3 = tmp0 + tmp2;
     364           0 :     z4 = tmp1 + tmp3;
     365           0 :     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
     366             : 
     367           0 :     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     368           0 :     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     369           0 :     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
     370           0 :     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
     371           0 :     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
     372           0 :     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     373           0 :     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     374           0 :     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
     375             : 
     376           0 :     z3 += z5;
     377           0 :     z4 += z5;
     378             : 
     379           0 :     tmp0 += z1 + z3;
     380           0 :     tmp1 += z2 + z4;
     381           0 :     tmp2 += z2 + z3;
     382           0 :     tmp3 += z1 + z4;
     383             : 
     384             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
     385             : 
     386           0 :     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
     387             :                                           CONST_BITS+PASS1_BITS+3)
     388           0 :                             & RANGE_MASK];
     389           0 :     outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
     390             :                                           CONST_BITS+PASS1_BITS+3)
     391           0 :                             & RANGE_MASK];
     392           0 :     outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
     393             :                                           CONST_BITS+PASS1_BITS+3)
     394           0 :                             & RANGE_MASK];
     395           0 :     outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
     396             :                                           CONST_BITS+PASS1_BITS+3)
     397           0 :                             & RANGE_MASK];
     398           0 :     outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
     399             :                                           CONST_BITS+PASS1_BITS+3)
     400           0 :                             & RANGE_MASK];
     401           0 :     outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
     402             :                                           CONST_BITS+PASS1_BITS+3)
     403           0 :                             & RANGE_MASK];
     404           0 :     outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
     405             :                                           CONST_BITS+PASS1_BITS+3)
     406           0 :                             & RANGE_MASK];
     407           0 :     outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
     408             :                                           CONST_BITS+PASS1_BITS+3)
     409           0 :                             & RANGE_MASK];
     410             : 
     411           0 :     wsptr += DCTSIZE;           /* advance pointer to next row */
     412             :   }
     413           0 : }
     414             : 
     415             : #ifdef IDCT_SCALING_SUPPORTED
     416             : 
     417             : 
     418             : /*
     419             :  * Perform dequantization and inverse DCT on one block of coefficients,
     420             :  * producing a 7x7 output block.
     421             :  *
     422             :  * Optimized algorithm with 12 multiplications in the 1-D kernel.
     423             :  * cK represents sqrt(2) * cos(K*pi/14).
     424             :  */
     425             : 
     426             : GLOBAL(void)
     427           0 : jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     428             :                JCOEFPTR coef_block,
     429             :                JSAMPARRAY output_buf, JDIMENSION output_col)
     430             : {
     431             :   JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
     432             :   JLONG z1, z2, z3;
     433             :   JCOEFPTR inptr;
     434             :   ISLOW_MULT_TYPE *quantptr;
     435             :   int *wsptr;
     436             :   JSAMPROW outptr;
     437           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     438             :   int ctr;
     439             :   int workspace[7*7];   /* buffers data between passes */
     440             :   SHIFT_TEMPS
     441             : 
     442             :   /* Pass 1: process columns from input, store into work array. */
     443             : 
     444           0 :   inptr = coef_block;
     445           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     446           0 :   wsptr = workspace;
     447           0 :   for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
     448             :     /* Even part */
     449             : 
     450           0 :     tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     451           0 :     tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
     452             :     /* Add fudge factor here for final descale. */
     453           0 :     tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
     454             : 
     455           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     456           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     457           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
     458             : 
     459           0 :     tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
     460           0 :     tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
     461           0 :     tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
     462           0 :     tmp0 = z1 + z3;
     463           0 :     z2 -= tmp0;
     464           0 :     tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
     465           0 :     tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
     466           0 :     tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
     467           0 :     tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
     468             : 
     469             :     /* Odd part */
     470             : 
     471           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     472           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     473           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     474             : 
     475           0 :     tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
     476           0 :     tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
     477           0 :     tmp0 = tmp1 - tmp2;
     478           0 :     tmp1 += tmp2;
     479           0 :     tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
     480           0 :     tmp1 += tmp2;
     481           0 :     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
     482           0 :     tmp0 += z2;
     483           0 :     tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
     484             : 
     485             :     /* Final output stage */
     486             : 
     487           0 :     wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
     488           0 :     wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
     489           0 :     wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
     490           0 :     wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
     491           0 :     wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
     492           0 :     wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
     493           0 :     wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
     494             :   }
     495             : 
     496             :   /* Pass 2: process 7 rows from work array, store into output array. */
     497             : 
     498           0 :   wsptr = workspace;
     499           0 :   for (ctr = 0; ctr < 7; ctr++) {
     500           0 :     outptr = output_buf[ctr] + output_col;
     501             : 
     502             :     /* Even part */
     503             : 
     504             :     /* Add fudge factor here for final descale. */
     505           0 :     tmp13 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     506           0 :     tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
     507             : 
     508           0 :     z1 = (JLONG) wsptr[2];
     509           0 :     z2 = (JLONG) wsptr[4];
     510           0 :     z3 = (JLONG) wsptr[6];
     511             : 
     512           0 :     tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
     513           0 :     tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
     514           0 :     tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
     515           0 :     tmp0 = z1 + z3;
     516           0 :     z2 -= tmp0;
     517           0 :     tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
     518           0 :     tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
     519           0 :     tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
     520           0 :     tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
     521             : 
     522             :     /* Odd part */
     523             : 
     524           0 :     z1 = (JLONG) wsptr[1];
     525           0 :     z2 = (JLONG) wsptr[3];
     526           0 :     z3 = (JLONG) wsptr[5];
     527             : 
     528           0 :     tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
     529           0 :     tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
     530           0 :     tmp0 = tmp1 - tmp2;
     531           0 :     tmp1 += tmp2;
     532           0 :     tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
     533           0 :     tmp1 += tmp2;
     534           0 :     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
     535           0 :     tmp0 += z2;
     536           0 :     tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
     537             : 
     538             :     /* Final output stage */
     539             : 
     540           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
     541             :                                               CONST_BITS+PASS1_BITS+3)
     542           0 :                             & RANGE_MASK];
     543           0 :     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
     544             :                                               CONST_BITS+PASS1_BITS+3)
     545           0 :                             & RANGE_MASK];
     546           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
     547             :                                               CONST_BITS+PASS1_BITS+3)
     548           0 :                             & RANGE_MASK];
     549           0 :     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
     550             :                                               CONST_BITS+PASS1_BITS+3)
     551           0 :                             & RANGE_MASK];
     552           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
     553             :                                               CONST_BITS+PASS1_BITS+3)
     554           0 :                             & RANGE_MASK];
     555           0 :     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
     556             :                                               CONST_BITS+PASS1_BITS+3)
     557           0 :                             & RANGE_MASK];
     558           0 :     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
     559             :                                               CONST_BITS+PASS1_BITS+3)
     560           0 :                             & RANGE_MASK];
     561             : 
     562           0 :     wsptr += 7;         /* advance pointer to next row */
     563             :   }
     564           0 : }
     565             : 
     566             : 
     567             : /*
     568             :  * Perform dequantization and inverse DCT on one block of coefficients,
     569             :  * producing a reduced-size 6x6 output block.
     570             :  *
     571             :  * Optimized algorithm with 3 multiplications in the 1-D kernel.
     572             :  * cK represents sqrt(2) * cos(K*pi/12).
     573             :  */
     574             : 
     575             : GLOBAL(void)
     576           0 : jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     577             :                JCOEFPTR coef_block,
     578             :                JSAMPARRAY output_buf, JDIMENSION output_col)
     579             : {
     580             :   JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
     581             :   JLONG z1, z2, z3;
     582             :   JCOEFPTR inptr;
     583             :   ISLOW_MULT_TYPE *quantptr;
     584             :   int *wsptr;
     585             :   JSAMPROW outptr;
     586           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     587             :   int ctr;
     588             :   int workspace[6*6];   /* buffers data between passes */
     589             :   SHIFT_TEMPS
     590             : 
     591             :   /* Pass 1: process columns from input, store into work array. */
     592             : 
     593           0 :   inptr = coef_block;
     594           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     595           0 :   wsptr = workspace;
     596           0 :   for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
     597             :     /* Even part */
     598             : 
     599           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     600           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     601             :     /* Add fudge factor here for final descale. */
     602           0 :     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
     603           0 :     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     604           0 :     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
     605           0 :     tmp1 = tmp0 + tmp10;
     606           0 :     tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
     607           0 :     tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     608           0 :     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
     609           0 :     tmp10 = tmp1 + tmp0;
     610           0 :     tmp12 = tmp1 - tmp0;
     611             : 
     612             :     /* Odd part */
     613             : 
     614           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     615           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     616           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     617           0 :     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
     618           0 :     tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
     619           0 :     tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
     620           0 :     tmp1 = LEFT_SHIFT(z1 - z2 - z3, PASS1_BITS);
     621             : 
     622             :     /* Final output stage */
     623             : 
     624           0 :     wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
     625           0 :     wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
     626           0 :     wsptr[6*1] = (int) (tmp11 + tmp1);
     627           0 :     wsptr[6*4] = (int) (tmp11 - tmp1);
     628           0 :     wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
     629           0 :     wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
     630             :   }
     631             : 
     632             :   /* Pass 2: process 6 rows from work array, store into output array. */
     633             : 
     634           0 :   wsptr = workspace;
     635           0 :   for (ctr = 0; ctr < 6; ctr++) {
     636           0 :     outptr = output_buf[ctr] + output_col;
     637             : 
     638             :     /* Even part */
     639             : 
     640             :     /* Add fudge factor here for final descale. */
     641           0 :     tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     642           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     643           0 :     tmp2 = (JLONG) wsptr[4];
     644           0 :     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
     645           0 :     tmp1 = tmp0 + tmp10;
     646           0 :     tmp11 = tmp0 - tmp10 - tmp10;
     647           0 :     tmp10 = (JLONG) wsptr[2];
     648           0 :     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
     649           0 :     tmp10 = tmp1 + tmp0;
     650           0 :     tmp12 = tmp1 - tmp0;
     651             : 
     652             :     /* Odd part */
     653             : 
     654           0 :     z1 = (JLONG) wsptr[1];
     655           0 :     z2 = (JLONG) wsptr[3];
     656           0 :     z3 = (JLONG) wsptr[5];
     657           0 :     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
     658           0 :     tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
     659           0 :     tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
     660           0 :     tmp1 = LEFT_SHIFT(z1 - z2 - z3, CONST_BITS);
     661             : 
     662             :     /* Final output stage */
     663             : 
     664           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
     665             :                                               CONST_BITS+PASS1_BITS+3)
     666           0 :                             & RANGE_MASK];
     667           0 :     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
     668             :                                               CONST_BITS+PASS1_BITS+3)
     669           0 :                             & RANGE_MASK];
     670           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
     671             :                                               CONST_BITS+PASS1_BITS+3)
     672           0 :                             & RANGE_MASK];
     673           0 :     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
     674             :                                               CONST_BITS+PASS1_BITS+3)
     675           0 :                             & RANGE_MASK];
     676           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
     677             :                                               CONST_BITS+PASS1_BITS+3)
     678           0 :                             & RANGE_MASK];
     679           0 :     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
     680             :                                               CONST_BITS+PASS1_BITS+3)
     681           0 :                             & RANGE_MASK];
     682             : 
     683           0 :     wsptr += 6;         /* advance pointer to next row */
     684             :   }
     685           0 : }
     686             : 
     687             : 
     688             : /*
     689             :  * Perform dequantization and inverse DCT on one block of coefficients,
     690             :  * producing a reduced-size 5x5 output block.
     691             :  *
     692             :  * Optimized algorithm with 5 multiplications in the 1-D kernel.
     693             :  * cK represents sqrt(2) * cos(K*pi/10).
     694             :  */
     695             : 
     696             : GLOBAL(void)
     697           0 : jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     698             :                JCOEFPTR coef_block,
     699             :                JSAMPARRAY output_buf, JDIMENSION output_col)
     700             : {
     701             :   JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
     702             :   JLONG z1, z2, z3;
     703             :   JCOEFPTR inptr;
     704             :   ISLOW_MULT_TYPE *quantptr;
     705             :   int *wsptr;
     706             :   JSAMPROW outptr;
     707           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     708             :   int ctr;
     709             :   int workspace[5*5];   /* buffers data between passes */
     710             :   SHIFT_TEMPS
     711             : 
     712             :   /* Pass 1: process columns from input, store into work array. */
     713             : 
     714           0 :   inptr = coef_block;
     715           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     716           0 :   wsptr = workspace;
     717           0 :   for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
     718             :     /* Even part */
     719             : 
     720           0 :     tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     721           0 :     tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
     722             :     /* Add fudge factor here for final descale. */
     723           0 :     tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
     724           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     725           0 :     tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     726           0 :     z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
     727           0 :     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
     728           0 :     z3 = tmp12 + z2;
     729           0 :     tmp10 = z3 + z1;
     730           0 :     tmp11 = z3 - z1;
     731           0 :     tmp12 -= LEFT_SHIFT(z2, 2);
     732             : 
     733             :     /* Odd part */
     734             : 
     735           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     736           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     737             : 
     738           0 :     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
     739           0 :     tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
     740           0 :     tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
     741             : 
     742             :     /* Final output stage */
     743             : 
     744           0 :     wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
     745           0 :     wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
     746           0 :     wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
     747           0 :     wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
     748           0 :     wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
     749             :   }
     750             : 
     751             :   /* Pass 2: process 5 rows from work array, store into output array. */
     752             : 
     753           0 :   wsptr = workspace;
     754           0 :   for (ctr = 0; ctr < 5; ctr++) {
     755           0 :     outptr = output_buf[ctr] + output_col;
     756             : 
     757             :     /* Even part */
     758             : 
     759             :     /* Add fudge factor here for final descale. */
     760           0 :     tmp12 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     761           0 :     tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
     762           0 :     tmp0 = (JLONG) wsptr[2];
     763           0 :     tmp1 = (JLONG) wsptr[4];
     764           0 :     z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
     765           0 :     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
     766           0 :     z3 = tmp12 + z2;
     767           0 :     tmp10 = z3 + z1;
     768           0 :     tmp11 = z3 - z1;
     769           0 :     tmp12 -= LEFT_SHIFT(z2, 2);
     770             : 
     771             :     /* Odd part */
     772             : 
     773           0 :     z2 = (JLONG) wsptr[1];
     774           0 :     z3 = (JLONG) wsptr[3];
     775             : 
     776           0 :     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
     777           0 :     tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
     778           0 :     tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
     779             : 
     780             :     /* Final output stage */
     781             : 
     782           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
     783             :                                               CONST_BITS+PASS1_BITS+3)
     784           0 :                             & RANGE_MASK];
     785           0 :     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
     786             :                                               CONST_BITS+PASS1_BITS+3)
     787           0 :                             & RANGE_MASK];
     788           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
     789             :                                               CONST_BITS+PASS1_BITS+3)
     790           0 :                             & RANGE_MASK];
     791           0 :     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
     792             :                                               CONST_BITS+PASS1_BITS+3)
     793           0 :                             & RANGE_MASK];
     794           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
     795             :                                               CONST_BITS+PASS1_BITS+3)
     796           0 :                             & RANGE_MASK];
     797             : 
     798           0 :     wsptr += 5;         /* advance pointer to next row */
     799             :   }
     800           0 : }
     801             : 
     802             : 
     803             : /*
     804             :  * Perform dequantization and inverse DCT on one block of coefficients,
     805             :  * producing a reduced-size 3x3 output block.
     806             :  *
     807             :  * Optimized algorithm with 2 multiplications in the 1-D kernel.
     808             :  * cK represents sqrt(2) * cos(K*pi/6).
     809             :  */
     810             : 
     811             : GLOBAL(void)
     812           0 : jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     813             :                JCOEFPTR coef_block,
     814             :                JSAMPARRAY output_buf, JDIMENSION output_col)
     815             : {
     816             :   JLONG tmp0, tmp2, tmp10, tmp12;
     817             :   JCOEFPTR inptr;
     818             :   ISLOW_MULT_TYPE *quantptr;
     819             :   int *wsptr;
     820             :   JSAMPROW outptr;
     821           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     822             :   int ctr;
     823             :   int workspace[3*3];   /* buffers data between passes */
     824             :   SHIFT_TEMPS
     825             : 
     826             :   /* Pass 1: process columns from input, store into work array. */
     827             : 
     828           0 :   inptr = coef_block;
     829           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     830           0 :   wsptr = workspace;
     831           0 :   for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
     832             :     /* Even part */
     833             : 
     834           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     835           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     836             :     /* Add fudge factor here for final descale. */
     837           0 :     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
     838           0 :     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     839           0 :     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
     840           0 :     tmp10 = tmp0 + tmp12;
     841           0 :     tmp2 = tmp0 - tmp12 - tmp12;
     842             : 
     843             :     /* Odd part */
     844             : 
     845           0 :     tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     846           0 :     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
     847             : 
     848             :     /* Final output stage */
     849             : 
     850           0 :     wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
     851           0 :     wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
     852           0 :     wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
     853             :   }
     854             : 
     855             :   /* Pass 2: process 3 rows from work array, store into output array. */
     856             : 
     857           0 :   wsptr = workspace;
     858           0 :   for (ctr = 0; ctr < 3; ctr++) {
     859           0 :     outptr = output_buf[ctr] + output_col;
     860             : 
     861             :     /* Even part */
     862             : 
     863             :     /* Add fudge factor here for final descale. */
     864           0 :     tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     865           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     866           0 :     tmp2 = (JLONG) wsptr[2];
     867           0 :     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
     868           0 :     tmp10 = tmp0 + tmp12;
     869           0 :     tmp2 = tmp0 - tmp12 - tmp12;
     870             : 
     871             :     /* Odd part */
     872             : 
     873           0 :     tmp12 = (JLONG) wsptr[1];
     874           0 :     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
     875             : 
     876             :     /* Final output stage */
     877             : 
     878           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
     879             :                                               CONST_BITS+PASS1_BITS+3)
     880           0 :                             & RANGE_MASK];
     881           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
     882             :                                               CONST_BITS+PASS1_BITS+3)
     883           0 :                             & RANGE_MASK];
     884           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
     885             :                                               CONST_BITS+PASS1_BITS+3)
     886           0 :                             & RANGE_MASK];
     887             : 
     888           0 :     wsptr += 3;         /* advance pointer to next row */
     889             :   }
     890           0 : }
     891             : 
     892             : 
     893             : /*
     894             :  * Perform dequantization and inverse DCT on one block of coefficients,
     895             :  * producing a 9x9 output block.
     896             :  *
     897             :  * Optimized algorithm with 10 multiplications in the 1-D kernel.
     898             :  * cK represents sqrt(2) * cos(K*pi/18).
     899             :  */
     900             : 
     901             : GLOBAL(void)
     902           0 : jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
     903             :                JCOEFPTR coef_block,
     904             :                JSAMPARRAY output_buf, JDIMENSION output_col)
     905             : {
     906             :   JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
     907             :   JLONG z1, z2, z3, z4;
     908             :   JCOEFPTR inptr;
     909             :   ISLOW_MULT_TYPE *quantptr;
     910             :   int *wsptr;
     911             :   JSAMPROW outptr;
     912           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
     913             :   int ctr;
     914             :   int workspace[8*9];   /* buffers data between passes */
     915             :   SHIFT_TEMPS
     916             : 
     917             :   /* Pass 1: process columns from input, store into work array. */
     918             : 
     919           0 :   inptr = coef_block;
     920           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
     921           0 :   wsptr = workspace;
     922           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
     923             :     /* Even part */
     924             : 
     925           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     926           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     927             :     /* Add fudge factor here for final descale. */
     928           0 :     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
     929             : 
     930           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     931           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     932           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
     933             : 
     934           0 :     tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
     935           0 :     tmp1 = tmp0 + tmp3;
     936           0 :     tmp2 = tmp0 - tmp3 - tmp3;
     937             : 
     938           0 :     tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
     939           0 :     tmp11 = tmp2 + tmp0;
     940           0 :     tmp14 = tmp2 - tmp0 - tmp0;
     941             : 
     942           0 :     tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
     943           0 :     tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
     944           0 :     tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
     945             : 
     946           0 :     tmp10 = tmp1 + tmp0 - tmp3;
     947           0 :     tmp12 = tmp1 - tmp0 + tmp2;
     948           0 :     tmp13 = tmp1 - tmp2 + tmp3;
     949             : 
     950             :     /* Odd part */
     951             : 
     952           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
     953           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     954           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     955           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     956             : 
     957           0 :     z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
     958             : 
     959           0 :     tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
     960           0 :     tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
     961           0 :     tmp0 = tmp2 + tmp3 - z2;
     962           0 :     tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
     963           0 :     tmp2 += z2 - tmp1;
     964           0 :     tmp3 += z2 + tmp1;
     965           0 :     tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
     966             : 
     967             :     /* Final output stage */
     968             : 
     969           0 :     wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
     970           0 :     wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
     971           0 :     wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
     972           0 :     wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
     973           0 :     wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
     974           0 :     wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
     975           0 :     wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
     976           0 :     wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
     977           0 :     wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
     978             :   }
     979             : 
     980             :   /* Pass 2: process 9 rows from work array, store into output array. */
     981             : 
     982           0 :   wsptr = workspace;
     983           0 :   for (ctr = 0; ctr < 9; ctr++) {
     984           0 :     outptr = output_buf[ctr] + output_col;
     985             : 
     986             :     /* Even part */
     987             : 
     988             :     /* Add fudge factor here for final descale. */
     989           0 :     tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     990           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     991             : 
     992           0 :     z1 = (JLONG) wsptr[2];
     993           0 :     z2 = (JLONG) wsptr[4];
     994           0 :     z3 = (JLONG) wsptr[6];
     995             : 
     996           0 :     tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
     997           0 :     tmp1 = tmp0 + tmp3;
     998           0 :     tmp2 = tmp0 - tmp3 - tmp3;
     999             : 
    1000           0 :     tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
    1001           0 :     tmp11 = tmp2 + tmp0;
    1002           0 :     tmp14 = tmp2 - tmp0 - tmp0;
    1003             : 
    1004           0 :     tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
    1005           0 :     tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
    1006           0 :     tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
    1007             : 
    1008           0 :     tmp10 = tmp1 + tmp0 - tmp3;
    1009           0 :     tmp12 = tmp1 - tmp0 + tmp2;
    1010           0 :     tmp13 = tmp1 - tmp2 + tmp3;
    1011             : 
    1012             :     /* Odd part */
    1013             : 
    1014           0 :     z1 = (JLONG) wsptr[1];
    1015           0 :     z2 = (JLONG) wsptr[3];
    1016           0 :     z3 = (JLONG) wsptr[5];
    1017           0 :     z4 = (JLONG) wsptr[7];
    1018             : 
    1019           0 :     z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
    1020             : 
    1021           0 :     tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
    1022           0 :     tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
    1023           0 :     tmp0 = tmp2 + tmp3 - z2;
    1024           0 :     tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
    1025           0 :     tmp2 += z2 - tmp1;
    1026           0 :     tmp3 += z2 + tmp1;
    1027           0 :     tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
    1028             : 
    1029             :     /* Final output stage */
    1030             : 
    1031           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
    1032             :                                               CONST_BITS+PASS1_BITS+3)
    1033           0 :                             & RANGE_MASK];
    1034           0 :     outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
    1035             :                                               CONST_BITS+PASS1_BITS+3)
    1036           0 :                             & RANGE_MASK];
    1037           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
    1038             :                                               CONST_BITS+PASS1_BITS+3)
    1039           0 :                             & RANGE_MASK];
    1040           0 :     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
    1041             :                                               CONST_BITS+PASS1_BITS+3)
    1042           0 :                             & RANGE_MASK];
    1043           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
    1044             :                                               CONST_BITS+PASS1_BITS+3)
    1045           0 :                             & RANGE_MASK];
    1046           0 :     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
    1047             :                                               CONST_BITS+PASS1_BITS+3)
    1048           0 :                             & RANGE_MASK];
    1049           0 :     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
    1050             :                                               CONST_BITS+PASS1_BITS+3)
    1051           0 :                             & RANGE_MASK];
    1052           0 :     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
    1053             :                                               CONST_BITS+PASS1_BITS+3)
    1054           0 :                             & RANGE_MASK];
    1055           0 :     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
    1056             :                                               CONST_BITS+PASS1_BITS+3)
    1057           0 :                             & RANGE_MASK];
    1058             : 
    1059           0 :     wsptr += 8;         /* advance pointer to next row */
    1060             :   }
    1061           0 : }
    1062             : 
    1063             : 
    1064             : /*
    1065             :  * Perform dequantization and inverse DCT on one block of coefficients,
    1066             :  * producing a 10x10 output block.
    1067             :  *
    1068             :  * Optimized algorithm with 12 multiplications in the 1-D kernel.
    1069             :  * cK represents sqrt(2) * cos(K*pi/20).
    1070             :  */
    1071             : 
    1072             : GLOBAL(void)
    1073           0 : jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    1074             :                  JCOEFPTR coef_block,
    1075             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    1076             : {
    1077             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
    1078             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
    1079             :   JLONG z1, z2, z3, z4, z5;
    1080             :   JCOEFPTR inptr;
    1081             :   ISLOW_MULT_TYPE *quantptr;
    1082             :   int *wsptr;
    1083             :   JSAMPROW outptr;
    1084           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    1085             :   int ctr;
    1086             :   int workspace[8*10];  /* buffers data between passes */
    1087             :   SHIFT_TEMPS
    1088             : 
    1089             :   /* Pass 1: process columns from input, store into work array. */
    1090             : 
    1091           0 :   inptr = coef_block;
    1092           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    1093           0 :   wsptr = workspace;
    1094           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    1095             :     /* Even part */
    1096             : 
    1097           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    1098           0 :     z3 = LEFT_SHIFT(z3, CONST_BITS);
    1099             :     /* Add fudge factor here for final descale. */
    1100           0 :     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
    1101           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    1102           0 :     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
    1103           0 :     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
    1104           0 :     tmp10 = z3 + z1;
    1105           0 :     tmp11 = z3 - z2;
    1106             : 
    1107           0 :     tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1),
    1108             :                         CONST_BITS-PASS1_BITS);  /* c0 = (c4-c8)*2 */
    1109             : 
    1110           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    1111           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    1112             : 
    1113           0 :     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
    1114           0 :     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
    1115           0 :     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
    1116             : 
    1117           0 :     tmp20 = tmp10 + tmp12;
    1118           0 :     tmp24 = tmp10 - tmp12;
    1119           0 :     tmp21 = tmp11 + tmp13;
    1120           0 :     tmp23 = tmp11 - tmp13;
    1121             : 
    1122             :     /* Odd part */
    1123             : 
    1124           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    1125           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    1126           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    1127           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    1128             : 
    1129           0 :     tmp11 = z2 + z4;
    1130           0 :     tmp13 = z2 - z4;
    1131             : 
    1132           0 :     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
    1133           0 :     z5 = LEFT_SHIFT(z3, CONST_BITS);
    1134             : 
    1135           0 :     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
    1136           0 :     z4 = z5 + tmp12;
    1137             : 
    1138           0 :     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
    1139           0 :     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
    1140             : 
    1141           0 :     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
    1142           0 :     z4 = z5 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
    1143             : 
    1144           0 :     tmp12 = LEFT_SHIFT(z1 - tmp13 - z3, PASS1_BITS);
    1145             : 
    1146           0 :     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
    1147           0 :     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
    1148             : 
    1149             :     /* Final output stage */
    1150             : 
    1151           0 :     wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    1152           0 :     wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    1153           0 :     wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    1154           0 :     wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    1155           0 :     wsptr[8*2] = (int) (tmp22 + tmp12);
    1156           0 :     wsptr[8*7] = (int) (tmp22 - tmp12);
    1157           0 :     wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
    1158           0 :     wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
    1159           0 :     wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    1160           0 :     wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    1161             :   }
    1162             : 
    1163             :   /* Pass 2: process 10 rows from work array, store into output array. */
    1164             : 
    1165           0 :   wsptr = workspace;
    1166           0 :   for (ctr = 0; ctr < 10; ctr++) {
    1167           0 :     outptr = output_buf[ctr] + output_col;
    1168             : 
    1169             :     /* Even part */
    1170             : 
    1171             :     /* Add fudge factor here for final descale. */
    1172           0 :     z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    1173           0 :     z3 = LEFT_SHIFT(z3, CONST_BITS);
    1174           0 :     z4 = (JLONG) wsptr[4];
    1175           0 :     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
    1176           0 :     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
    1177           0 :     tmp10 = z3 + z1;
    1178           0 :     tmp11 = z3 - z2;
    1179             : 
    1180           0 :     tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1);         /* c0 = (c4-c8)*2 */
    1181             : 
    1182           0 :     z2 = (JLONG) wsptr[2];
    1183           0 :     z3 = (JLONG) wsptr[6];
    1184             : 
    1185           0 :     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
    1186           0 :     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
    1187           0 :     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
    1188             : 
    1189           0 :     tmp20 = tmp10 + tmp12;
    1190           0 :     tmp24 = tmp10 - tmp12;
    1191           0 :     tmp21 = tmp11 + tmp13;
    1192           0 :     tmp23 = tmp11 - tmp13;
    1193             : 
    1194             :     /* Odd part */
    1195             : 
    1196           0 :     z1 = (JLONG) wsptr[1];
    1197           0 :     z2 = (JLONG) wsptr[3];
    1198           0 :     z3 = (JLONG) wsptr[5];
    1199           0 :     z3 = LEFT_SHIFT(z3, CONST_BITS);
    1200           0 :     z4 = (JLONG) wsptr[7];
    1201             : 
    1202           0 :     tmp11 = z2 + z4;
    1203           0 :     tmp13 = z2 - z4;
    1204             : 
    1205           0 :     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
    1206             : 
    1207           0 :     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
    1208           0 :     z4 = z3 + tmp12;
    1209             : 
    1210           0 :     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
    1211           0 :     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
    1212             : 
    1213           0 :     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
    1214           0 :     z4 = z3 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
    1215             : 
    1216           0 :     tmp12 = LEFT_SHIFT(z1 - tmp13, CONST_BITS) - z3;
    1217             : 
    1218           0 :     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
    1219           0 :     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
    1220             : 
    1221             :     /* Final output stage */
    1222             : 
    1223           0 :     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    1224             :                                               CONST_BITS+PASS1_BITS+3)
    1225           0 :                             & RANGE_MASK];
    1226           0 :     outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    1227             :                                               CONST_BITS+PASS1_BITS+3)
    1228           0 :                             & RANGE_MASK];
    1229           0 :     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    1230             :                                               CONST_BITS+PASS1_BITS+3)
    1231           0 :                             & RANGE_MASK];
    1232           0 :     outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    1233             :                                               CONST_BITS+PASS1_BITS+3)
    1234           0 :                             & RANGE_MASK];
    1235           0 :     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    1236             :                                               CONST_BITS+PASS1_BITS+3)
    1237           0 :                             & RANGE_MASK];
    1238           0 :     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    1239             :                                               CONST_BITS+PASS1_BITS+3)
    1240           0 :                             & RANGE_MASK];
    1241           0 :     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    1242             :                                               CONST_BITS+PASS1_BITS+3)
    1243           0 :                             & RANGE_MASK];
    1244           0 :     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    1245             :                                               CONST_BITS+PASS1_BITS+3)
    1246           0 :                             & RANGE_MASK];
    1247           0 :     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    1248             :                                               CONST_BITS+PASS1_BITS+3)
    1249           0 :                             & RANGE_MASK];
    1250           0 :     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    1251             :                                               CONST_BITS+PASS1_BITS+3)
    1252           0 :                             & RANGE_MASK];
    1253             : 
    1254           0 :     wsptr += 8;         /* advance pointer to next row */
    1255             :   }
    1256           0 : }
    1257             : 
    1258             : 
    1259             : /*
    1260             :  * Perform dequantization and inverse DCT on one block of coefficients,
    1261             :  * producing a 11x11 output block.
    1262             :  *
    1263             :  * Optimized algorithm with 24 multiplications in the 1-D kernel.
    1264             :  * cK represents sqrt(2) * cos(K*pi/22).
    1265             :  */
    1266             : 
    1267             : GLOBAL(void)
    1268           0 : jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    1269             :                  JCOEFPTR coef_block,
    1270             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    1271             : {
    1272             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
    1273             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
    1274             :   JLONG z1, z2, z3, z4;
    1275             :   JCOEFPTR inptr;
    1276             :   ISLOW_MULT_TYPE *quantptr;
    1277             :   int *wsptr;
    1278             :   JSAMPROW outptr;
    1279           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    1280             :   int ctr;
    1281             :   int workspace[8*11];  /* buffers data between passes */
    1282             :   SHIFT_TEMPS
    1283             : 
    1284             :   /* Pass 1: process columns from input, store into work array. */
    1285             : 
    1286           0 :   inptr = coef_block;
    1287           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    1288           0 :   wsptr = workspace;
    1289           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    1290             :     /* Even part */
    1291             : 
    1292           0 :     tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    1293           0 :     tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
    1294             :     /* Add fudge factor here for final descale. */
    1295           0 :     tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
    1296             : 
    1297           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    1298           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    1299           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    1300             : 
    1301           0 :     tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
    1302           0 :     tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
    1303           0 :     z4 = z1 + z3;
    1304           0 :     tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
    1305           0 :     z4 -= z2;
    1306           0 :     tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
    1307           0 :     tmp21 = tmp20 + tmp23 + tmp25 -
    1308           0 :             MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
    1309           0 :     tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
    1310           0 :     tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
    1311           0 :     tmp24 += tmp25;
    1312           0 :     tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
    1313           0 :     tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
    1314           0 :              MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
    1315           0 :     tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
    1316             : 
    1317             :     /* Odd part */
    1318             : 
    1319           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    1320           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    1321           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    1322           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    1323             : 
    1324           0 :     tmp11 = z1 + z2;
    1325           0 :     tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
    1326           0 :     tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
    1327           0 :     tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
    1328           0 :     tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
    1329           0 :     tmp10 = tmp11 + tmp12 + tmp13 -
    1330           0 :             MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
    1331           0 :     z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
    1332           0 :     tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
    1333           0 :     tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
    1334           0 :     z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
    1335           0 :     tmp11 += z1;
    1336           0 :     tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
    1337           0 :     tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
    1338           0 :              MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
    1339           0 :              MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
    1340             : 
    1341             :     /* Final output stage */
    1342             : 
    1343           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    1344           0 :     wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    1345           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    1346           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    1347           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
    1348           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
    1349           0 :     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
    1350           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
    1351           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    1352           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    1353           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
    1354             :   }
    1355             : 
    1356             :   /* Pass 2: process 11 rows from work array, store into output array. */
    1357             : 
    1358           0 :   wsptr = workspace;
    1359           0 :   for (ctr = 0; ctr < 11; ctr++) {
    1360           0 :     outptr = output_buf[ctr] + output_col;
    1361             : 
    1362             :     /* Even part */
    1363             : 
    1364             :     /* Add fudge factor here for final descale. */
    1365           0 :     tmp10 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    1366           0 :     tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
    1367             : 
    1368           0 :     z1 = (JLONG) wsptr[2];
    1369           0 :     z2 = (JLONG) wsptr[4];
    1370           0 :     z3 = (JLONG) wsptr[6];
    1371             : 
    1372           0 :     tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
    1373           0 :     tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
    1374           0 :     z4 = z1 + z3;
    1375           0 :     tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
    1376           0 :     z4 -= z2;
    1377           0 :     tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
    1378           0 :     tmp21 = tmp20 + tmp23 + tmp25 -
    1379           0 :             MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
    1380           0 :     tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
    1381           0 :     tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
    1382           0 :     tmp24 += tmp25;
    1383           0 :     tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
    1384           0 :     tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
    1385           0 :              MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
    1386           0 :     tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
    1387             : 
    1388             :     /* Odd part */
    1389             : 
    1390           0 :     z1 = (JLONG) wsptr[1];
    1391           0 :     z2 = (JLONG) wsptr[3];
    1392           0 :     z3 = (JLONG) wsptr[5];
    1393           0 :     z4 = (JLONG) wsptr[7];
    1394             : 
    1395           0 :     tmp11 = z1 + z2;
    1396           0 :     tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
    1397           0 :     tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
    1398           0 :     tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
    1399           0 :     tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
    1400           0 :     tmp10 = tmp11 + tmp12 + tmp13 -
    1401           0 :             MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
    1402           0 :     z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
    1403           0 :     tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
    1404           0 :     tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
    1405           0 :     z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
    1406           0 :     tmp11 += z1;
    1407           0 :     tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
    1408           0 :     tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
    1409           0 :              MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
    1410           0 :              MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
    1411             : 
    1412             :     /* Final output stage */
    1413             : 
    1414           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    1415             :                                                CONST_BITS+PASS1_BITS+3)
    1416           0 :                              & RANGE_MASK];
    1417           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    1418             :                                                CONST_BITS+PASS1_BITS+3)
    1419           0 :                              & RANGE_MASK];
    1420           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    1421             :                                                CONST_BITS+PASS1_BITS+3)
    1422           0 :                              & RANGE_MASK];
    1423           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    1424             :                                                CONST_BITS+PASS1_BITS+3)
    1425           0 :                              & RANGE_MASK];
    1426           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    1427             :                                                CONST_BITS+PASS1_BITS+3)
    1428           0 :                              & RANGE_MASK];
    1429           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    1430             :                                                CONST_BITS+PASS1_BITS+3)
    1431           0 :                              & RANGE_MASK];
    1432           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    1433             :                                                CONST_BITS+PASS1_BITS+3)
    1434           0 :                              & RANGE_MASK];
    1435           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    1436             :                                                CONST_BITS+PASS1_BITS+3)
    1437           0 :                              & RANGE_MASK];
    1438           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    1439             :                                                CONST_BITS+PASS1_BITS+3)
    1440           0 :                              & RANGE_MASK];
    1441           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    1442             :                                                CONST_BITS+PASS1_BITS+3)
    1443           0 :                              & RANGE_MASK];
    1444           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
    1445             :                                                CONST_BITS+PASS1_BITS+3)
    1446           0 :                              & RANGE_MASK];
    1447             : 
    1448           0 :     wsptr += 8;         /* advance pointer to next row */
    1449             :   }
    1450           0 : }
    1451             : 
    1452             : 
    1453             : /*
    1454             :  * Perform dequantization and inverse DCT on one block of coefficients,
    1455             :  * producing a 12x12 output block.
    1456             :  *
    1457             :  * Optimized algorithm with 15 multiplications in the 1-D kernel.
    1458             :  * cK represents sqrt(2) * cos(K*pi/24).
    1459             :  */
    1460             : 
    1461             : GLOBAL(void)
    1462           0 : jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    1463             :                  JCOEFPTR coef_block,
    1464             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    1465             : {
    1466             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
    1467             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
    1468             :   JLONG z1, z2, z3, z4;
    1469             :   JCOEFPTR inptr;
    1470             :   ISLOW_MULT_TYPE *quantptr;
    1471             :   int *wsptr;
    1472             :   JSAMPROW outptr;
    1473           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    1474             :   int ctr;
    1475             :   int workspace[8*12];  /* buffers data between passes */
    1476             :   SHIFT_TEMPS
    1477             : 
    1478             :   /* Pass 1: process columns from input, store into work array. */
    1479             : 
    1480           0 :   inptr = coef_block;
    1481           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    1482           0 :   wsptr = workspace;
    1483           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    1484             :     /* Even part */
    1485             : 
    1486           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    1487           0 :     z3 = LEFT_SHIFT(z3, CONST_BITS);
    1488             :     /* Add fudge factor here for final descale. */
    1489           0 :     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
    1490             : 
    1491           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    1492           0 :     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
    1493             : 
    1494           0 :     tmp10 = z3 + z4;
    1495           0 :     tmp11 = z3 - z4;
    1496             : 
    1497           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    1498           0 :     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
    1499           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    1500           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    1501           0 :     z2 = LEFT_SHIFT(z2, CONST_BITS);
    1502             : 
    1503           0 :     tmp12 = z1 - z2;
    1504             : 
    1505           0 :     tmp21 = z3 + tmp12;
    1506           0 :     tmp24 = z3 - tmp12;
    1507             : 
    1508           0 :     tmp12 = z4 + z2;
    1509             : 
    1510           0 :     tmp20 = tmp10 + tmp12;
    1511           0 :     tmp25 = tmp10 - tmp12;
    1512             : 
    1513           0 :     tmp12 = z4 - z1 - z2;
    1514             : 
    1515           0 :     tmp22 = tmp11 + tmp12;
    1516           0 :     tmp23 = tmp11 - tmp12;
    1517             : 
    1518             :     /* Odd part */
    1519             : 
    1520           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    1521           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    1522           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    1523           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    1524             : 
    1525           0 :     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
    1526           0 :     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
    1527             : 
    1528           0 :     tmp10 = z1 + z3;
    1529           0 :     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
    1530           0 :     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
    1531           0 :     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
    1532           0 :     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
    1533           0 :     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
    1534           0 :     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
    1535           0 :     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
    1536           0 :              MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
    1537             : 
    1538           0 :     z1 -= z4;
    1539           0 :     z2 -= z3;
    1540           0 :     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
    1541           0 :     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
    1542           0 :     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
    1543             : 
    1544             :     /* Final output stage */
    1545             : 
    1546           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    1547           0 :     wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    1548           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    1549           0 :     wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    1550           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
    1551           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
    1552           0 :     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
    1553           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
    1554           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    1555           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    1556           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
    1557           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
    1558             :   }
    1559             : 
    1560             :   /* Pass 2: process 12 rows from work array, store into output array. */
    1561             : 
    1562           0 :   wsptr = workspace;
    1563           0 :   for (ctr = 0; ctr < 12; ctr++) {
    1564           0 :     outptr = output_buf[ctr] + output_col;
    1565             : 
    1566             :     /* Even part */
    1567             : 
    1568             :     /* Add fudge factor here for final descale. */
    1569           0 :     z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    1570           0 :     z3 = LEFT_SHIFT(z3, CONST_BITS);
    1571             : 
    1572           0 :     z4 = (JLONG) wsptr[4];
    1573           0 :     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
    1574             : 
    1575           0 :     tmp10 = z3 + z4;
    1576           0 :     tmp11 = z3 - z4;
    1577             : 
    1578           0 :     z1 = (JLONG) wsptr[2];
    1579           0 :     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
    1580           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    1581           0 :     z2 = (JLONG) wsptr[6];
    1582           0 :     z2 = LEFT_SHIFT(z2, CONST_BITS);
    1583             : 
    1584           0 :     tmp12 = z1 - z2;
    1585             : 
    1586           0 :     tmp21 = z3 + tmp12;
    1587           0 :     tmp24 = z3 - tmp12;
    1588             : 
    1589           0 :     tmp12 = z4 + z2;
    1590             : 
    1591           0 :     tmp20 = tmp10 + tmp12;
    1592           0 :     tmp25 = tmp10 - tmp12;
    1593             : 
    1594           0 :     tmp12 = z4 - z1 - z2;
    1595             : 
    1596           0 :     tmp22 = tmp11 + tmp12;
    1597           0 :     tmp23 = tmp11 - tmp12;
    1598             : 
    1599             :     /* Odd part */
    1600             : 
    1601           0 :     z1 = (JLONG) wsptr[1];
    1602           0 :     z2 = (JLONG) wsptr[3];
    1603           0 :     z3 = (JLONG) wsptr[5];
    1604           0 :     z4 = (JLONG) wsptr[7];
    1605             : 
    1606           0 :     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
    1607           0 :     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
    1608             : 
    1609           0 :     tmp10 = z1 + z3;
    1610           0 :     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
    1611           0 :     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
    1612           0 :     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
    1613           0 :     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
    1614           0 :     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
    1615           0 :     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
    1616           0 :     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
    1617           0 :              MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
    1618             : 
    1619           0 :     z1 -= z4;
    1620           0 :     z2 -= z3;
    1621           0 :     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
    1622           0 :     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
    1623           0 :     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
    1624             : 
    1625             :     /* Final output stage */
    1626             : 
    1627           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    1628             :                                                CONST_BITS+PASS1_BITS+3)
    1629           0 :                              & RANGE_MASK];
    1630           0 :     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    1631             :                                                CONST_BITS+PASS1_BITS+3)
    1632           0 :                              & RANGE_MASK];
    1633           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    1634             :                                                CONST_BITS+PASS1_BITS+3)
    1635           0 :                              & RANGE_MASK];
    1636           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    1637             :                                                CONST_BITS+PASS1_BITS+3)
    1638           0 :                              & RANGE_MASK];
    1639           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    1640             :                                                CONST_BITS+PASS1_BITS+3)
    1641           0 :                              & RANGE_MASK];
    1642           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    1643             :                                                CONST_BITS+PASS1_BITS+3)
    1644           0 :                              & RANGE_MASK];
    1645           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    1646             :                                                CONST_BITS+PASS1_BITS+3)
    1647           0 :                              & RANGE_MASK];
    1648           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    1649             :                                                CONST_BITS+PASS1_BITS+3)
    1650           0 :                              & RANGE_MASK];
    1651           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    1652             :                                                CONST_BITS+PASS1_BITS+3)
    1653           0 :                              & RANGE_MASK];
    1654           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    1655             :                                                CONST_BITS+PASS1_BITS+3)
    1656           0 :                              & RANGE_MASK];
    1657           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
    1658             :                                                CONST_BITS+PASS1_BITS+3)
    1659           0 :                              & RANGE_MASK];
    1660           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
    1661             :                                                CONST_BITS+PASS1_BITS+3)
    1662           0 :                              & RANGE_MASK];
    1663             : 
    1664           0 :     wsptr += 8;         /* advance pointer to next row */
    1665             :   }
    1666           0 : }
    1667             : 
    1668             : 
    1669             : /*
    1670             :  * Perform dequantization and inverse DCT on one block of coefficients,
    1671             :  * producing a 13x13 output block.
    1672             :  *
    1673             :  * Optimized algorithm with 29 multiplications in the 1-D kernel.
    1674             :  * cK represents sqrt(2) * cos(K*pi/26).
    1675             :  */
    1676             : 
    1677             : GLOBAL(void)
    1678           0 : jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    1679             :                  JCOEFPTR coef_block,
    1680             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    1681             : {
    1682             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
    1683             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
    1684             :   JLONG z1, z2, z3, z4;
    1685             :   JCOEFPTR inptr;
    1686             :   ISLOW_MULT_TYPE *quantptr;
    1687             :   int *wsptr;
    1688             :   JSAMPROW outptr;
    1689           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    1690             :   int ctr;
    1691             :   int workspace[8*13];  /* buffers data between passes */
    1692             :   SHIFT_TEMPS
    1693             : 
    1694             :   /* Pass 1: process columns from input, store into work array. */
    1695             : 
    1696           0 :   inptr = coef_block;
    1697           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    1698           0 :   wsptr = workspace;
    1699           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    1700             :     /* Even part */
    1701             : 
    1702           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    1703           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    1704             :     /* Add fudge factor here for final descale. */
    1705           0 :     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
    1706             : 
    1707           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    1708           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    1709           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    1710             : 
    1711           0 :     tmp10 = z3 + z4;
    1712           0 :     tmp11 = z3 - z4;
    1713             : 
    1714           0 :     tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
    1715           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
    1716             : 
    1717           0 :     tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
    1718           0 :     tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
    1719             : 
    1720           0 :     tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
    1721           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
    1722             : 
    1723           0 :     tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
    1724           0 :     tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
    1725             : 
    1726           0 :     tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
    1727           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
    1728             : 
    1729           0 :     tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
    1730           0 :     tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
    1731             : 
    1732           0 :     tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
    1733             : 
    1734             :     /* Odd part */
    1735             : 
    1736           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    1737           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    1738           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    1739           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    1740             : 
    1741           0 :     tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
    1742           0 :     tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
    1743           0 :     tmp15 = z1 + z4;
    1744           0 :     tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
    1745           0 :     tmp10 = tmp11 + tmp12 + tmp13 -
    1746           0 :             MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
    1747           0 :     tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
    1748           0 :     tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
    1749           0 :     tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
    1750           0 :     tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
    1751           0 :     tmp11 += tmp14;
    1752           0 :     tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
    1753           0 :     tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
    1754           0 :     tmp12 += tmp14;
    1755           0 :     tmp13 += tmp14;
    1756           0 :     tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
    1757           0 :     tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
    1758           0 :             MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
    1759           0 :     z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
    1760           0 :     tmp14 += z1;
    1761           0 :     tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
    1762           0 :              MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
    1763             : 
    1764             :     /* Final output stage */
    1765             : 
    1766           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    1767           0 :     wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    1768           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    1769           0 :     wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    1770           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
    1771           0 :     wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
    1772           0 :     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
    1773           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
    1774           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    1775           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    1776           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
    1777           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
    1778           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
    1779             :   }
    1780             : 
    1781             :   /* Pass 2: process 13 rows from work array, store into output array. */
    1782             : 
    1783           0 :   wsptr = workspace;
    1784           0 :   for (ctr = 0; ctr < 13; ctr++) {
    1785           0 :     outptr = output_buf[ctr] + output_col;
    1786             : 
    1787             :     /* Even part */
    1788             : 
    1789             :     /* Add fudge factor here for final descale. */
    1790           0 :     z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    1791           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    1792             : 
    1793           0 :     z2 = (JLONG) wsptr[2];
    1794           0 :     z3 = (JLONG) wsptr[4];
    1795           0 :     z4 = (JLONG) wsptr[6];
    1796             : 
    1797           0 :     tmp10 = z3 + z4;
    1798           0 :     tmp11 = z3 - z4;
    1799             : 
    1800           0 :     tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
    1801           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
    1802             : 
    1803           0 :     tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
    1804           0 :     tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
    1805             : 
    1806           0 :     tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
    1807           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
    1808             : 
    1809           0 :     tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
    1810           0 :     tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
    1811             : 
    1812           0 :     tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
    1813           0 :     tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
    1814             : 
    1815           0 :     tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
    1816           0 :     tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
    1817             : 
    1818           0 :     tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
    1819             : 
    1820             :     /* Odd part */
    1821             : 
    1822           0 :     z1 = (JLONG) wsptr[1];
    1823           0 :     z2 = (JLONG) wsptr[3];
    1824           0 :     z3 = (JLONG) wsptr[5];
    1825           0 :     z4 = (JLONG) wsptr[7];
    1826             : 
    1827           0 :     tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
    1828           0 :     tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
    1829           0 :     tmp15 = z1 + z4;
    1830           0 :     tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
    1831           0 :     tmp10 = tmp11 + tmp12 + tmp13 -
    1832           0 :             MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
    1833           0 :     tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
    1834           0 :     tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
    1835           0 :     tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
    1836           0 :     tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
    1837           0 :     tmp11 += tmp14;
    1838           0 :     tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
    1839           0 :     tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
    1840           0 :     tmp12 += tmp14;
    1841           0 :     tmp13 += tmp14;
    1842           0 :     tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
    1843           0 :     tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
    1844           0 :             MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
    1845           0 :     z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
    1846           0 :     tmp14 += z1;
    1847           0 :     tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
    1848           0 :              MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
    1849             : 
    1850             :     /* Final output stage */
    1851             : 
    1852           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    1853             :                                                CONST_BITS+PASS1_BITS+3)
    1854           0 :                              & RANGE_MASK];
    1855           0 :     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    1856             :                                                CONST_BITS+PASS1_BITS+3)
    1857           0 :                              & RANGE_MASK];
    1858           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    1859             :                                                CONST_BITS+PASS1_BITS+3)
    1860           0 :                              & RANGE_MASK];
    1861           0 :     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    1862             :                                                CONST_BITS+PASS1_BITS+3)
    1863           0 :                              & RANGE_MASK];
    1864           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    1865             :                                                CONST_BITS+PASS1_BITS+3)
    1866           0 :                              & RANGE_MASK];
    1867           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    1868             :                                                CONST_BITS+PASS1_BITS+3)
    1869           0 :                              & RANGE_MASK];
    1870           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    1871             :                                                CONST_BITS+PASS1_BITS+3)
    1872           0 :                              & RANGE_MASK];
    1873           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    1874             :                                                CONST_BITS+PASS1_BITS+3)
    1875           0 :                              & RANGE_MASK];
    1876           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    1877             :                                                CONST_BITS+PASS1_BITS+3)
    1878           0 :                              & RANGE_MASK];
    1879           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    1880             :                                                CONST_BITS+PASS1_BITS+3)
    1881           0 :                              & RANGE_MASK];
    1882           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
    1883             :                                                CONST_BITS+PASS1_BITS+3)
    1884           0 :                              & RANGE_MASK];
    1885           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
    1886             :                                                CONST_BITS+PASS1_BITS+3)
    1887           0 :                              & RANGE_MASK];
    1888           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
    1889             :                                                CONST_BITS+PASS1_BITS+3)
    1890           0 :                              & RANGE_MASK];
    1891             : 
    1892           0 :     wsptr += 8;         /* advance pointer to next row */
    1893             :   }
    1894           0 : }
    1895             : 
    1896             : 
    1897             : /*
    1898             :  * Perform dequantization and inverse DCT on one block of coefficients,
    1899             :  * producing a 14x14 output block.
    1900             :  *
    1901             :  * Optimized algorithm with 20 multiplications in the 1-D kernel.
    1902             :  * cK represents sqrt(2) * cos(K*pi/28).
    1903             :  */
    1904             : 
    1905             : GLOBAL(void)
    1906           0 : jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    1907             :                  JCOEFPTR coef_block,
    1908             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    1909             : {
    1910             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
    1911             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
    1912             :   JLONG z1, z2, z3, z4;
    1913             :   JCOEFPTR inptr;
    1914             :   ISLOW_MULT_TYPE *quantptr;
    1915             :   int *wsptr;
    1916             :   JSAMPROW outptr;
    1917           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    1918             :   int ctr;
    1919             :   int workspace[8*14];  /* buffers data between passes */
    1920             :   SHIFT_TEMPS
    1921             : 
    1922             :   /* Pass 1: process columns from input, store into work array. */
    1923             : 
    1924           0 :   inptr = coef_block;
    1925           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    1926           0 :   wsptr = workspace;
    1927           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    1928             :     /* Even part */
    1929             : 
    1930           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    1931           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    1932             :     /* Add fudge factor here for final descale. */
    1933           0 :     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
    1934           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    1935           0 :     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
    1936           0 :     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
    1937           0 :     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
    1938             : 
    1939           0 :     tmp10 = z1 + z2;
    1940           0 :     tmp11 = z1 + z3;
    1941           0 :     tmp12 = z1 - z4;
    1942             : 
    1943           0 :     tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1),
    1944             :                         CONST_BITS-PASS1_BITS);  /* c0 = (c4+c12-c8)*2 */
    1945             : 
    1946           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    1947           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    1948             : 
    1949           0 :     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
    1950             : 
    1951           0 :     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
    1952           0 :     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
    1953           0 :     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
    1954           0 :             MULTIPLY(z2, FIX(1.378756276));      /* c2 */
    1955             : 
    1956           0 :     tmp20 = tmp10 + tmp13;
    1957           0 :     tmp26 = tmp10 - tmp13;
    1958           0 :     tmp21 = tmp11 + tmp14;
    1959           0 :     tmp25 = tmp11 - tmp14;
    1960           0 :     tmp22 = tmp12 + tmp15;
    1961           0 :     tmp24 = tmp12 - tmp15;
    1962             : 
    1963             :     /* Odd part */
    1964             : 
    1965           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    1966           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    1967           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    1968           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    1969           0 :     tmp13 = LEFT_SHIFT(z4, CONST_BITS);
    1970             : 
    1971           0 :     tmp14 = z1 + z3;
    1972           0 :     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
    1973           0 :     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
    1974           0 :     tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
    1975           0 :     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
    1976           0 :     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
    1977           0 :     z1    -= z2;
    1978           0 :     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
    1979           0 :     tmp16 += tmp15;
    1980           0 :     z1    += z4;
    1981           0 :     z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
    1982           0 :     tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
    1983           0 :     tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
    1984           0 :     z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
    1985           0 :     tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
    1986           0 :     tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
    1987             : 
    1988           0 :     tmp13 = LEFT_SHIFT(z1 - z3, PASS1_BITS);
    1989             : 
    1990             :     /* Final output stage */
    1991             : 
    1992           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    1993           0 :     wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    1994           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    1995           0 :     wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    1996           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
    1997           0 :     wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
    1998           0 :     wsptr[8*3]  = (int) (tmp23 + tmp13);
    1999           0 :     wsptr[8*10] = (int) (tmp23 - tmp13);
    2000           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    2001           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    2002           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
    2003           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
    2004           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
    2005           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
    2006             :   }
    2007             : 
    2008             :   /* Pass 2: process 14 rows from work array, store into output array. */
    2009             : 
    2010           0 :   wsptr = workspace;
    2011           0 :   for (ctr = 0; ctr < 14; ctr++) {
    2012           0 :     outptr = output_buf[ctr] + output_col;
    2013             : 
    2014             :     /* Even part */
    2015             : 
    2016             :     /* Add fudge factor here for final descale. */
    2017           0 :     z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    2018           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    2019           0 :     z4 = (JLONG) wsptr[4];
    2020           0 :     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
    2021           0 :     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
    2022           0 :     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
    2023             : 
    2024           0 :     tmp10 = z1 + z2;
    2025           0 :     tmp11 = z1 + z3;
    2026           0 :     tmp12 = z1 - z4;
    2027             : 
    2028           0 :     tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1);    /* c0 = (c4+c12-c8)*2 */
    2029             : 
    2030           0 :     z1 = (JLONG) wsptr[2];
    2031           0 :     z2 = (JLONG) wsptr[6];
    2032             : 
    2033           0 :     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
    2034             : 
    2035           0 :     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
    2036           0 :     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
    2037           0 :     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
    2038           0 :             MULTIPLY(z2, FIX(1.378756276));      /* c2 */
    2039             : 
    2040           0 :     tmp20 = tmp10 + tmp13;
    2041           0 :     tmp26 = tmp10 - tmp13;
    2042           0 :     tmp21 = tmp11 + tmp14;
    2043           0 :     tmp25 = tmp11 - tmp14;
    2044           0 :     tmp22 = tmp12 + tmp15;
    2045           0 :     tmp24 = tmp12 - tmp15;
    2046             : 
    2047             :     /* Odd part */
    2048             : 
    2049           0 :     z1 = (JLONG) wsptr[1];
    2050           0 :     z2 = (JLONG) wsptr[3];
    2051           0 :     z3 = (JLONG) wsptr[5];
    2052           0 :     z4 = (JLONG) wsptr[7];
    2053           0 :     z4 = LEFT_SHIFT(z4, CONST_BITS);
    2054             : 
    2055           0 :     tmp14 = z1 + z3;
    2056           0 :     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
    2057           0 :     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
    2058           0 :     tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
    2059           0 :     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
    2060           0 :     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
    2061           0 :     z1    -= z2;
    2062           0 :     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
    2063           0 :     tmp16 += tmp15;
    2064           0 :     tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
    2065           0 :     tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
    2066           0 :     tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
    2067           0 :     tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
    2068           0 :     tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
    2069           0 :     tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
    2070             : 
    2071           0 :     tmp13 = LEFT_SHIFT(z1 - z3, CONST_BITS) + z4;
    2072             : 
    2073             :     /* Final output stage */
    2074             : 
    2075           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    2076             :                                                CONST_BITS+PASS1_BITS+3)
    2077           0 :                              & RANGE_MASK];
    2078           0 :     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    2079             :                                                CONST_BITS+PASS1_BITS+3)
    2080           0 :                              & RANGE_MASK];
    2081           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    2082             :                                                CONST_BITS+PASS1_BITS+3)
    2083           0 :                              & RANGE_MASK];
    2084           0 :     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    2085             :                                                CONST_BITS+PASS1_BITS+3)
    2086           0 :                              & RANGE_MASK];
    2087           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    2088             :                                                CONST_BITS+PASS1_BITS+3)
    2089           0 :                              & RANGE_MASK];
    2090           0 :     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    2091             :                                                CONST_BITS+PASS1_BITS+3)
    2092           0 :                              & RANGE_MASK];
    2093           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    2094             :                                                CONST_BITS+PASS1_BITS+3)
    2095           0 :                              & RANGE_MASK];
    2096           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    2097             :                                                CONST_BITS+PASS1_BITS+3)
    2098           0 :                              & RANGE_MASK];
    2099           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    2100             :                                                CONST_BITS+PASS1_BITS+3)
    2101           0 :                              & RANGE_MASK];
    2102           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    2103             :                                                CONST_BITS+PASS1_BITS+3)
    2104           0 :                              & RANGE_MASK];
    2105           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
    2106             :                                                CONST_BITS+PASS1_BITS+3)
    2107           0 :                              & RANGE_MASK];
    2108           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
    2109             :                                                CONST_BITS+PASS1_BITS+3)
    2110           0 :                              & RANGE_MASK];
    2111           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
    2112             :                                                CONST_BITS+PASS1_BITS+3)
    2113           0 :                              & RANGE_MASK];
    2114           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
    2115             :                                                CONST_BITS+PASS1_BITS+3)
    2116           0 :                              & RANGE_MASK];
    2117             : 
    2118           0 :     wsptr += 8;         /* advance pointer to next row */
    2119             :   }
    2120           0 : }
    2121             : 
    2122             : 
    2123             : /*
    2124             :  * Perform dequantization and inverse DCT on one block of coefficients,
    2125             :  * producing a 15x15 output block.
    2126             :  *
    2127             :  * Optimized algorithm with 22 multiplications in the 1-D kernel.
    2128             :  * cK represents sqrt(2) * cos(K*pi/30).
    2129             :  */
    2130             : 
    2131             : GLOBAL(void)
    2132           0 : jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    2133             :                  JCOEFPTR coef_block,
    2134             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    2135             : {
    2136             :   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
    2137             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
    2138             :   JLONG z1, z2, z3, z4;
    2139             :   JCOEFPTR inptr;
    2140             :   ISLOW_MULT_TYPE *quantptr;
    2141             :   int *wsptr;
    2142             :   JSAMPROW outptr;
    2143           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    2144             :   int ctr;
    2145             :   int workspace[8*15];  /* buffers data between passes */
    2146             :   SHIFT_TEMPS
    2147             : 
    2148             :   /* Pass 1: process columns from input, store into work array. */
    2149             : 
    2150           0 :   inptr = coef_block;
    2151           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    2152           0 :   wsptr = workspace;
    2153           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    2154             :     /* Even part */
    2155             : 
    2156           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    2157           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    2158             :     /* Add fudge factor here for final descale. */
    2159           0 :     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
    2160             : 
    2161           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    2162           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    2163           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    2164             : 
    2165           0 :     tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
    2166           0 :     tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
    2167             : 
    2168           0 :     tmp12 = z1 - tmp10;
    2169           0 :     tmp13 = z1 + tmp11;
    2170           0 :     z1 -= LEFT_SHIFT(tmp11 - tmp10, 1);     /* c0 = (c6-c12)*2 */
    2171             : 
    2172           0 :     z4 = z2 - z3;
    2173           0 :     z3 += z2;
    2174           0 :     tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
    2175           0 :     tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
    2176           0 :     z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
    2177             : 
    2178           0 :     tmp20 = tmp13 + tmp10 + tmp11;
    2179           0 :     tmp23 = tmp12 - tmp10 + tmp11 + z2;
    2180             : 
    2181           0 :     tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
    2182           0 :     tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
    2183             : 
    2184           0 :     tmp25 = tmp13 - tmp10 - tmp11;
    2185           0 :     tmp26 = tmp12 + tmp10 - tmp11 - z2;
    2186             : 
    2187           0 :     tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
    2188           0 :     tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
    2189             : 
    2190           0 :     tmp21 = tmp12 + tmp10 + tmp11;
    2191           0 :     tmp24 = tmp13 - tmp10 + tmp11;
    2192           0 :     tmp11 += tmp11;
    2193           0 :     tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
    2194           0 :     tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
    2195             : 
    2196             :     /* Odd part */
    2197             : 
    2198           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    2199           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    2200           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    2201           0 :     z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
    2202           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    2203             : 
    2204           0 :     tmp13 = z2 - z4;
    2205           0 :     tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
    2206           0 :     tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
    2207           0 :     tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
    2208             : 
    2209           0 :     tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
    2210           0 :     tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
    2211           0 :     z2 = z1 - z4;
    2212           0 :     tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
    2213             : 
    2214           0 :     tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
    2215           0 :     tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
    2216           0 :     tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
    2217           0 :     z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
    2218           0 :     tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
    2219           0 :     tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
    2220             : 
    2221             :     /* Final output stage */
    2222             : 
    2223           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
    2224           0 :     wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
    2225           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
    2226           0 :     wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
    2227           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
    2228           0 :     wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
    2229           0 :     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
    2230           0 :     wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
    2231           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
    2232           0 :     wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
    2233           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
    2234           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
    2235           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
    2236           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
    2237           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
    2238             :   }
    2239             : 
    2240             :   /* Pass 2: process 15 rows from work array, store into output array. */
    2241             : 
    2242           0 :   wsptr = workspace;
    2243           0 :   for (ctr = 0; ctr < 15; ctr++) {
    2244           0 :     outptr = output_buf[ctr] + output_col;
    2245             : 
    2246             :     /* Even part */
    2247             : 
    2248             :     /* Add fudge factor here for final descale. */
    2249           0 :     z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    2250           0 :     z1 = LEFT_SHIFT(z1, CONST_BITS);
    2251             : 
    2252           0 :     z2 = (JLONG) wsptr[2];
    2253           0 :     z3 = (JLONG) wsptr[4];
    2254           0 :     z4 = (JLONG) wsptr[6];
    2255             : 
    2256           0 :     tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
    2257           0 :     tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
    2258             : 
    2259           0 :     tmp12 = z1 - tmp10;
    2260           0 :     tmp13 = z1 + tmp11;
    2261           0 :     z1 -= LEFT_SHIFT(tmp11 - tmp10, 1);     /* c0 = (c6-c12)*2 */
    2262             : 
    2263           0 :     z4 = z2 - z3;
    2264           0 :     z3 += z2;
    2265           0 :     tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
    2266           0 :     tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
    2267           0 :     z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
    2268             : 
    2269           0 :     tmp20 = tmp13 + tmp10 + tmp11;
    2270           0 :     tmp23 = tmp12 - tmp10 + tmp11 + z2;
    2271             : 
    2272           0 :     tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
    2273           0 :     tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
    2274             : 
    2275           0 :     tmp25 = tmp13 - tmp10 - tmp11;
    2276           0 :     tmp26 = tmp12 + tmp10 - tmp11 - z2;
    2277             : 
    2278           0 :     tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
    2279           0 :     tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
    2280             : 
    2281           0 :     tmp21 = tmp12 + tmp10 + tmp11;
    2282           0 :     tmp24 = tmp13 - tmp10 + tmp11;
    2283           0 :     tmp11 += tmp11;
    2284           0 :     tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
    2285           0 :     tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
    2286             : 
    2287             :     /* Odd part */
    2288             : 
    2289           0 :     z1 = (JLONG) wsptr[1];
    2290           0 :     z2 = (JLONG) wsptr[3];
    2291           0 :     z4 = (JLONG) wsptr[5];
    2292           0 :     z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
    2293           0 :     z4 = (JLONG) wsptr[7];
    2294             : 
    2295           0 :     tmp13 = z2 - z4;
    2296           0 :     tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
    2297           0 :     tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
    2298           0 :     tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
    2299             : 
    2300           0 :     tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
    2301           0 :     tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
    2302           0 :     z2 = z1 - z4;
    2303           0 :     tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
    2304             : 
    2305           0 :     tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
    2306           0 :     tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
    2307           0 :     tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
    2308           0 :     z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
    2309           0 :     tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
    2310           0 :     tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
    2311             : 
    2312             :     /* Final output stage */
    2313             : 
    2314           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
    2315             :                                                CONST_BITS+PASS1_BITS+3)
    2316           0 :                              & RANGE_MASK];
    2317           0 :     outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
    2318             :                                                CONST_BITS+PASS1_BITS+3)
    2319           0 :                              & RANGE_MASK];
    2320           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
    2321             :                                                CONST_BITS+PASS1_BITS+3)
    2322           0 :                              & RANGE_MASK];
    2323           0 :     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
    2324             :                                                CONST_BITS+PASS1_BITS+3)
    2325           0 :                              & RANGE_MASK];
    2326           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
    2327             :                                                CONST_BITS+PASS1_BITS+3)
    2328           0 :                              & RANGE_MASK];
    2329           0 :     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
    2330             :                                                CONST_BITS+PASS1_BITS+3)
    2331           0 :                              & RANGE_MASK];
    2332           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
    2333             :                                                CONST_BITS+PASS1_BITS+3)
    2334           0 :                              & RANGE_MASK];
    2335           0 :     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
    2336             :                                                CONST_BITS+PASS1_BITS+3)
    2337           0 :                              & RANGE_MASK];
    2338           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
    2339             :                                                CONST_BITS+PASS1_BITS+3)
    2340           0 :                              & RANGE_MASK];
    2341           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
    2342             :                                                CONST_BITS+PASS1_BITS+3)
    2343           0 :                              & RANGE_MASK];
    2344           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
    2345             :                                                CONST_BITS+PASS1_BITS+3)
    2346           0 :                              & RANGE_MASK];
    2347           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
    2348             :                                                CONST_BITS+PASS1_BITS+3)
    2349           0 :                              & RANGE_MASK];
    2350           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
    2351             :                                                CONST_BITS+PASS1_BITS+3)
    2352           0 :                              & RANGE_MASK];
    2353           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
    2354             :                                                CONST_BITS+PASS1_BITS+3)
    2355           0 :                              & RANGE_MASK];
    2356           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
    2357             :                                                CONST_BITS+PASS1_BITS+3)
    2358           0 :                              & RANGE_MASK];
    2359             : 
    2360           0 :     wsptr += 8;         /* advance pointer to next row */
    2361             :   }
    2362           0 : }
    2363             : 
    2364             : 
    2365             : /*
    2366             :  * Perform dequantization and inverse DCT on one block of coefficients,
    2367             :  * producing a 16x16 output block.
    2368             :  *
    2369             :  * Optimized algorithm with 28 multiplications in the 1-D kernel.
    2370             :  * cK represents sqrt(2) * cos(K*pi/32).
    2371             :  */
    2372             : 
    2373             : GLOBAL(void)
    2374           0 : jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
    2375             :                  JCOEFPTR coef_block,
    2376             :                  JSAMPARRAY output_buf, JDIMENSION output_col)
    2377             : {
    2378             :   JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
    2379             :   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
    2380             :   JLONG z1, z2, z3, z4;
    2381             :   JCOEFPTR inptr;
    2382             :   ISLOW_MULT_TYPE *quantptr;
    2383             :   int *wsptr;
    2384             :   JSAMPROW outptr;
    2385           0 :   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
    2386             :   int ctr;
    2387             :   int workspace[8*16];  /* buffers data between passes */
    2388             :   SHIFT_TEMPS
    2389             : 
    2390             :   /* Pass 1: process columns from input, store into work array. */
    2391             : 
    2392           0 :   inptr = coef_block;
    2393           0 :   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
    2394           0 :   wsptr = workspace;
    2395           0 :   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
    2396             :     /* Even part */
    2397             : 
    2398           0 :     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    2399           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
    2400             :     /* Add fudge factor here for final descale. */
    2401           0 :     tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
    2402             : 
    2403           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    2404           0 :     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
    2405           0 :     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
    2406             : 
    2407           0 :     tmp10 = tmp0 + tmp1;
    2408           0 :     tmp11 = tmp0 - tmp1;
    2409           0 :     tmp12 = tmp0 + tmp2;
    2410           0 :     tmp13 = tmp0 - tmp2;
    2411             : 
    2412           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
    2413           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
    2414           0 :     z3 = z1 - z2;
    2415           0 :     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
    2416           0 :     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
    2417             : 
    2418           0 :     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
    2419           0 :     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
    2420           0 :     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
    2421           0 :     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
    2422             : 
    2423           0 :     tmp20 = tmp10 + tmp0;
    2424           0 :     tmp27 = tmp10 - tmp0;
    2425           0 :     tmp21 = tmp12 + tmp1;
    2426           0 :     tmp26 = tmp12 - tmp1;
    2427           0 :     tmp22 = tmp13 + tmp2;
    2428           0 :     tmp25 = tmp13 - tmp2;
    2429           0 :     tmp23 = tmp11 + tmp3;
    2430           0 :     tmp24 = tmp11 - tmp3;
    2431             : 
    2432             :     /* Odd part */
    2433             : 
    2434           0 :     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
    2435           0 :     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
    2436           0 :     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
    2437           0 :     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
    2438             : 
    2439           0 :     tmp11 = z1 + z3;
    2440             : 
    2441           0 :     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
    2442           0 :     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
    2443           0 :     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
    2444           0 :     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
    2445           0 :     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
    2446           0 :     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
    2447           0 :     tmp0  = tmp1 + tmp2 + tmp3 -
    2448           0 :             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
    2449           0 :     tmp13 = tmp10 + tmp11 + tmp12 -
    2450           0 :             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
    2451           0 :     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
    2452           0 :     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
    2453           0 :     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
    2454           0 :     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
    2455           0 :     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
    2456           0 :     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
    2457           0 :     z2    += z4;
    2458           0 :     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
    2459           0 :     tmp1  += z1;
    2460           0 :     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
    2461           0 :     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
    2462           0 :     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
    2463           0 :     tmp12 += z2;
    2464           0 :     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
    2465           0 :     tmp2  += z2;
    2466           0 :     tmp3  += z2;
    2467           0 :     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
    2468           0 :     tmp10 += z2;
    2469           0 :     tmp11 += z2;
    2470             : 
    2471             :     /* Final output stage */
    2472             : 
    2473           0 :     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
    2474           0 :     wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
    2475           0 :     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
    2476           0 :     wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
    2477           0 :     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
    2478           0 :     wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
    2479           0 :     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
    2480           0 :     wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
    2481           0 :     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
    2482           0 :     wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
    2483           0 :     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
    2484           0 :     wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
    2485           0 :     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
    2486           0 :     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
    2487           0 :     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
    2488           0 :     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
    2489             :   }
    2490             : 
    2491             :   /* Pass 2: process 16 rows from work array, store into output array. */
    2492             : 
    2493           0 :   wsptr = workspace;
    2494           0 :   for (ctr = 0; ctr < 16; ctr++) {
    2495           0 :     outptr = output_buf[ctr] + output_col;
    2496             : 
    2497             :     /* Even part */
    2498             : 
    2499             :     /* Add fudge factor here for final descale. */
    2500           0 :     tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
    2501           0 :     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
    2502             : 
    2503           0 :     z1 = (JLONG) wsptr[4];
    2504           0 :     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
    2505           0 :     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
    2506             : 
    2507           0 :     tmp10 = tmp0 + tmp1;
    2508           0 :     tmp11 = tmp0 - tmp1;
    2509           0 :     tmp12 = tmp0 + tmp2;
    2510           0 :     tmp13 = tmp0 - tmp2;
    2511             : 
    2512           0 :     z1 = (JLONG) wsptr[2];
    2513           0 :     z2 = (JLONG) wsptr[6];
    2514           0 :     z3 = z1 - z2;
    2515           0 :     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
    2516           0 :     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
    2517             : 
    2518           0 :     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
    2519           0 :     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
    2520           0 :     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
    2521           0 :     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
    2522             : 
    2523           0 :     tmp20 = tmp10 + tmp0;
    2524           0 :     tmp27 = tmp10 - tmp0;
    2525           0 :     tmp21 = tmp12 + tmp1;
    2526           0 :     tmp26 = tmp12 - tmp1;
    2527           0 :     tmp22 = tmp13 + tmp2;
    2528           0 :     tmp25 = tmp13 - tmp2;
    2529           0 :     tmp23 = tmp11 + tmp3;
    2530           0 :     tmp24 = tmp11 - tmp3;
    2531             : 
    2532             :     /* Odd part */
    2533             : 
    2534           0 :     z1 = (JLONG) wsptr[1];
    2535           0 :     z2 = (JLONG) wsptr[3];
    2536           0 :     z3 = (JLONG) wsptr[5];
    2537           0 :     z4 = (JLONG) wsptr[7];
    2538             : 
    2539           0 :     tmp11 = z1 + z3;
    2540             : 
    2541           0 :     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
    2542           0 :     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
    2543           0 :     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
    2544           0 :     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
    2545           0 :     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
    2546           0 :     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
    2547           0 :     tmp0  = tmp1 + tmp2 + tmp3 -
    2548           0 :             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
    2549           0 :     tmp13 = tmp10 + tmp11 + tmp12 -
    2550           0 :             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
    2551           0 :     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
    2552           0 :     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
    2553           0 :     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
    2554           0 :     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
    2555           0 :     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
    2556           0 :     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
    2557           0 :     z2    += z4;
    2558           0 :     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
    2559           0 :     tmp1  += z1;
    2560           0 :     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
    2561           0 :     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
    2562           0 :     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
    2563           0 :     tmp12 += z2;
    2564           0 :     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
    2565           0 :     tmp2  += z2;
    2566           0 :     tmp3  += z2;
    2567           0 :     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
    2568           0 :     tmp10 += z2;
    2569           0 :     tmp11 += z2;
    2570             : 
    2571             :     /* Final output stage */
    2572             : 
    2573           0 :     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
    2574             :                                                CONST_BITS+PASS1_BITS+3)
    2575           0 :                              & RANGE_MASK];
    2576           0 :     outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
    2577             :                                                CONST_BITS+PASS1_BITS+3)
    2578           0 :                              & RANGE_MASK];
    2579           0 :     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
    2580             :                                                CONST_BITS+PASS1_BITS+3)
    2581           0 :                              & RANGE_MASK];
    2582           0 :     outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
    2583             :                                                CONST_BITS+PASS1_BITS+3)
    2584           0 :                              & RANGE_MASK];
    2585           0 :     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
    2586             :                                                CONST_BITS+PASS1_BITS+3)
    2587           0 :                              & RANGE_MASK];
    2588           0 :     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
    2589             :                                                CONST_BITS+PASS1_BITS+3)
    2590           0 :                              & RANGE_MASK];
    2591           0 :     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
    2592             :                                                CONST_BITS+PASS1_BITS+3)
    2593           0 :                              & RANGE_MASK];
    2594           0 :     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
    2595             :                                                CONST_BITS+PASS1_BITS+3)
    2596           0 :                              & RANGE_MASK];
    2597           0 :     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
    2598             :                                                CONST_BITS+PASS1_BITS+3)
    2599           0 :                              & RANGE_MASK];
    2600           0 :     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
    2601             :                                                CONST_BITS+PASS1_BITS+3)
    2602           0 :                              & RANGE_MASK];
    2603           0 :     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
    2604             :                                                CONST_BITS+PASS1_BITS+3)
    2605           0 :                              & RANGE_MASK];
    2606           0 :     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
    2607             :                                                CONST_BITS+PASS1_BITS+3)
    2608           0 :                              & RANGE_MASK];
    2609           0 :     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
    2610             :                                                CONST_BITS+PASS1_BITS+3)
    2611           0 :                              & RANGE_MASK];
    2612           0 :     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
    2613             :                                                CONST_BITS+PASS1_BITS+3)
    2614           0 :                              & RANGE_MASK];
    2615           0 :     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
    2616             :                                                CONST_BITS+PASS1_BITS+3)
    2617           0 :                              & RANGE_MASK];
    2618           0 :     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
    2619             :                                                CONST_BITS+PASS1_BITS+3)
    2620           0 :                              & RANGE_MASK];
    2621             : 
    2622           0 :     wsptr += 8;         /* advance pointer to next row */
    2623             :   }
    2624           0 : }
    2625             : 
    2626             : #endif /* IDCT_SCALING_SUPPORTED */
    2627             : #endif /* DCT_ISLOW_SUPPORTED */

Generated by: LCOV version 1.13