LCOV - code coverage report
Current view: top level - media/libjpeg - jcdctmgr.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 211 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 10 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * jcdctmgr.c
       3             :  *
       4             :  * This file was part of the Independent JPEG Group's software:
       5             :  * Copyright (C) 1994-1996, Thomas G. Lane.
       6             :  * libjpeg-turbo Modifications:
       7             :  * Copyright (C) 1999-2006, MIYASAKA Masaru.
       8             :  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
       9             :  * Copyright (C) 2011, 2014-2015, D. R. Commander.
      10             :  * For conditions of distribution and use, see the accompanying README.ijg
      11             :  * file.
      12             :  *
      13             :  * This file contains the forward-DCT management logic.
      14             :  * This code selects a particular DCT implementation to be used,
      15             :  * and it performs related housekeeping chores including coefficient
      16             :  * quantization.
      17             :  */
      18             : 
      19             : #define JPEG_INTERNALS
      20             : #include "jinclude.h"
      21             : #include "jpeglib.h"
      22             : #include "jdct.h"               /* Private declarations for DCT subsystem */
      23             : #include "jsimddct.h"
      24             : 
      25             : 
      26             : /* Private subobject for this module */
      27             : 
      28             : typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
      29             : typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
      30             : 
      31             : typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
      32             :                                      JDIMENSION start_col,
      33             :                                      DCTELEM *workspace);
      34             : typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
      35             :                                            JDIMENSION start_col,
      36             :                                            FAST_FLOAT *workspace);
      37             : 
      38             : typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
      39             :                                      DCTELEM *workspace);
      40             : typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
      41             :                                            FAST_FLOAT *divisors,
      42             :                                            FAST_FLOAT *workspace);
      43             : 
      44             : METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
      45             : 
      46             : typedef struct {
      47             :   struct jpeg_forward_dct pub;  /* public fields */
      48             : 
      49             :   /* Pointer to the DCT routine actually in use */
      50             :   forward_DCT_method_ptr dct;
      51             :   convsamp_method_ptr convsamp;
      52             :   quantize_method_ptr quantize;
      53             : 
      54             :   /* The actual post-DCT divisors --- not identical to the quant table
      55             :    * entries, because of scaling (especially for an unnormalized DCT).
      56             :    * Each table is given in normal array order.
      57             :    */
      58             :   DCTELEM *divisors[NUM_QUANT_TBLS];
      59             : 
      60             :   /* work area for FDCT subroutine */
      61             :   DCTELEM *workspace;
      62             : 
      63             : #ifdef DCT_FLOAT_SUPPORTED
      64             :   /* Same as above for the floating-point case. */
      65             :   float_DCT_method_ptr float_dct;
      66             :   float_convsamp_method_ptr float_convsamp;
      67             :   float_quantize_method_ptr float_quantize;
      68             :   FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
      69             :   FAST_FLOAT *float_workspace;
      70             : #endif
      71             : } my_fdct_controller;
      72             : 
      73             : typedef my_fdct_controller *my_fdct_ptr;
      74             : 
      75             : 
      76             : #if BITS_IN_JSAMPLE == 8
      77             : 
      78             : /*
      79             :  * Find the highest bit in an integer through binary search.
      80             :  */
      81             : 
      82             : LOCAL(int)
      83           0 : flss (UINT16 val)
      84             : {
      85             :   int bit;
      86             : 
      87           0 :   bit = 16;
      88             : 
      89           0 :   if (!val)
      90           0 :     return 0;
      91             : 
      92           0 :   if (!(val & 0xff00)) {
      93           0 :     bit -= 8;
      94           0 :     val <<= 8;
      95             :   }
      96           0 :   if (!(val & 0xf000)) {
      97           0 :     bit -= 4;
      98           0 :     val <<= 4;
      99             :   }
     100           0 :   if (!(val & 0xc000)) {
     101           0 :     bit -= 2;
     102           0 :     val <<= 2;
     103             :   }
     104           0 :   if (!(val & 0x8000)) {
     105           0 :     bit -= 1;
     106           0 :     val <<= 1;
     107             :   }
     108             : 
     109           0 :   return bit;
     110             : }
     111             : 
     112             : 
     113             : /*
     114             :  * Compute values to do a division using reciprocal.
     115             :  *
     116             :  * This implementation is based on an algorithm described in
     117             :  *   "How to optimize for the Pentium family of microprocessors"
     118             :  *   (http://www.agner.org/assem/).
     119             :  * More information about the basic algorithm can be found in
     120             :  * the paper "Integer Division Using Reciprocals" by Robert Alverson.
     121             :  *
     122             :  * The basic idea is to replace x/d by x * d^-1. In order to store
     123             :  * d^-1 with enough precision we shift it left a few places. It turns
     124             :  * out that this algoright gives just enough precision, and also fits
     125             :  * into DCTELEM:
     126             :  *
     127             :  *   b = (the number of significant bits in divisor) - 1
     128             :  *   r = (word size) + b
     129             :  *   f = 2^r / divisor
     130             :  *
     131             :  * f will not be an integer for most cases, so we need to compensate
     132             :  * for the rounding error introduced:
     133             :  *
     134             :  *   no fractional part:
     135             :  *
     136             :  *       result = input >> r
     137             :  *
     138             :  *   fractional part of f < 0.5:
     139             :  *
     140             :  *       round f down to nearest integer
     141             :  *       result = ((input + 1) * f) >> r
     142             :  *
     143             :  *   fractional part of f > 0.5:
     144             :  *
     145             :  *       round f up to nearest integer
     146             :  *       result = (input * f) >> r
     147             :  *
     148             :  * This is the original algorithm that gives truncated results. But we
     149             :  * want properly rounded results, so we replace "input" with
     150             :  * "input + divisor/2".
     151             :  *
     152             :  * In order to allow SIMD implementations we also tweak the values to
     153             :  * allow the same calculation to be made at all times:
     154             :  *
     155             :  *   dctbl[0] = f rounded to nearest integer
     156             :  *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
     157             :  *   dctbl[2] = 1 << ((word size) * 2 - r)
     158             :  *   dctbl[3] = r - (word size)
     159             :  *
     160             :  * dctbl[2] is for stupid instruction sets where the shift operation
     161             :  * isn't member wise (e.g. MMX).
     162             :  *
     163             :  * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
     164             :  * is that most SIMD implementations have a "multiply and store top
     165             :  * half" operation.
     166             :  *
     167             :  * Lastly, we store each of the values in their own table instead
     168             :  * of in a consecutive manner, yet again in order to allow SIMD
     169             :  * routines.
     170             :  */
     171             : 
     172             : LOCAL(int)
     173           0 : compute_reciprocal (UINT16 divisor, DCTELEM *dtbl)
     174             : {
     175             :   UDCTELEM2 fq, fr;
     176             :   UDCTELEM c;
     177             :   int b, r;
     178             : 
     179           0 :   if (divisor == 1) {
     180             :     /* divisor == 1 means unquantized, so these reciprocal/correction/shift
     181             :      * values will cause the C quantization algorithm to act like the
     182             :      * identity function.  Since only the C quantization algorithm is used in
     183             :      * these cases, the scale value is irrelevant.
     184             :      */
     185           0 :     dtbl[DCTSIZE2 * 0] = (DCTELEM) 1;                       /* reciprocal */
     186           0 :     dtbl[DCTSIZE2 * 1] = (DCTELEM) 0;                       /* correction */
     187           0 :     dtbl[DCTSIZE2 * 2] = (DCTELEM) 1;                       /* scale */
     188           0 :     dtbl[DCTSIZE2 * 3] = -(DCTELEM) (sizeof(DCTELEM) * 8);  /* shift */
     189           0 :     return 0;
     190             :   }
     191             : 
     192           0 :   b = flss(divisor) - 1;
     193           0 :   r  = sizeof(DCTELEM) * 8 + b;
     194             : 
     195           0 :   fq = ((UDCTELEM2)1 << r) / divisor;
     196           0 :   fr = ((UDCTELEM2)1 << r) % divisor;
     197             : 
     198           0 :   c = divisor / 2; /* for rounding */
     199             : 
     200           0 :   if (fr == 0) { /* divisor is power of two */
     201             :     /* fq will be one bit too large to fit in DCTELEM, so adjust */
     202           0 :     fq >>= 1;
     203           0 :     r--;
     204           0 :   } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
     205           0 :     c++;
     206             :   } else { /* fractional part is > 0.5 */
     207           0 :     fq++;
     208             :   }
     209             : 
     210           0 :   dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
     211           0 :   dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
     212             : #ifdef WITH_SIMD
     213           0 :   dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
     214             : #else
     215             :   dtbl[DCTSIZE2 * 2] = 1;
     216             : #endif
     217           0 :   dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
     218             : 
     219           0 :   if(r <= 16) return 0;
     220           0 :   else return 1;
     221             : }
     222             : 
     223             : #endif
     224             : 
     225             : 
     226             : /*
     227             :  * Initialize for a processing pass.
     228             :  * Verify that all referenced Q-tables are present, and set up
     229             :  * the divisor table for each one.
     230             :  * In the current implementation, DCT of all components is done during
     231             :  * the first pass, even if only some components will be output in the
     232             :  * first scan.  Hence all components should be examined here.
     233             :  */
     234             : 
     235             : METHODDEF(void)
     236           0 : start_pass_fdctmgr (j_compress_ptr cinfo)
     237             : {
     238           0 :   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
     239             :   int ci, qtblno, i;
     240             :   jpeg_component_info *compptr;
     241             :   JQUANT_TBL *qtbl;
     242             :   DCTELEM *dtbl;
     243             : 
     244           0 :   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
     245           0 :        ci++, compptr++) {
     246           0 :     qtblno = compptr->quant_tbl_no;
     247             :     /* Make sure specified quantization table is present */
     248           0 :     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
     249           0 :         cinfo->quant_tbl_ptrs[qtblno] == NULL)
     250           0 :       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     251           0 :     qtbl = cinfo->quant_tbl_ptrs[qtblno];
     252             :     /* Compute divisors for this quant table */
     253             :     /* We may do this more than once for same table, but it's not a big deal */
     254           0 :     switch (cinfo->dct_method) {
     255             : #ifdef DCT_ISLOW_SUPPORTED
     256             :     case JDCT_ISLOW:
     257             :       /* For LL&M IDCT method, divisors are equal to raw quantization
     258             :        * coefficients multiplied by 8 (to counteract scaling).
     259             :        */
     260           0 :       if (fdct->divisors[qtblno] == NULL) {
     261           0 :         fdct->divisors[qtblno] = (DCTELEM *)
     262           0 :           (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     263             :                                       (DCTSIZE2 * 4) * sizeof(DCTELEM));
     264             :       }
     265           0 :       dtbl = fdct->divisors[qtblno];
     266           0 :       for (i = 0; i < DCTSIZE2; i++) {
     267             : #if BITS_IN_JSAMPLE == 8
     268           0 :         if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
     269           0 :             fdct->quantize == jsimd_quantize)
     270           0 :           fdct->quantize = quantize;
     271             : #else
     272             :         dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
     273             : #endif
     274             :       }
     275           0 :       break;
     276             : #endif
     277             : #ifdef DCT_IFAST_SUPPORTED
     278             :     case JDCT_IFAST:
     279             :       {
     280             :         /* For AA&N IDCT method, divisors are equal to quantization
     281             :          * coefficients scaled by scalefactor[row]*scalefactor[col], where
     282             :          *   scalefactor[0] = 1
     283             :          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
     284             :          * We apply a further scale factor of 8.
     285             :          */
     286             : #define CONST_BITS 14
     287             :         static const INT16 aanscales[DCTSIZE2] = {
     288             :           /* precomputed values scaled up by 14 bits */
     289             :           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
     290             :           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
     291             :           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
     292             :           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
     293             :           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
     294             :           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
     295             :            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
     296             :            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
     297             :         };
     298             :         SHIFT_TEMPS
     299             : 
     300           0 :         if (fdct->divisors[qtblno] == NULL) {
     301           0 :           fdct->divisors[qtblno] = (DCTELEM *)
     302           0 :             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     303             :                                         (DCTSIZE2 * 4) * sizeof(DCTELEM));
     304             :         }
     305           0 :         dtbl = fdct->divisors[qtblno];
     306           0 :         for (i = 0; i < DCTSIZE2; i++) {
     307             : #if BITS_IN_JSAMPLE == 8
     308           0 :           if (!compute_reciprocal(
     309           0 :                 DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
     310             :                                       (JLONG) aanscales[i]),
     311           0 :                         CONST_BITS-3), &dtbl[i]) &&
     312           0 :               fdct->quantize == jsimd_quantize)
     313           0 :             fdct->quantize = quantize;
     314             : #else
     315             :            dtbl[i] = (DCTELEM)
     316             :              DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
     317             :                                    (JLONG) aanscales[i]),
     318             :                      CONST_BITS-3);
     319             : #endif
     320             :         }
     321             :       }
     322           0 :       break;
     323             : #endif
     324             : #ifdef DCT_FLOAT_SUPPORTED
     325             :     case JDCT_FLOAT:
     326             :       {
     327             :         /* For float AA&N IDCT method, divisors are equal to quantization
     328             :          * coefficients scaled by scalefactor[row]*scalefactor[col], where
     329             :          *   scalefactor[0] = 1
     330             :          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
     331             :          * We apply a further scale factor of 8.
     332             :          * What's actually stored is 1/divisor so that the inner loop can
     333             :          * use a multiplication rather than a division.
     334             :          */
     335             :         FAST_FLOAT *fdtbl;
     336             :         int row, col;
     337             :         static const double aanscalefactor[DCTSIZE] = {
     338             :           1.0, 1.387039845, 1.306562965, 1.175875602,
     339             :           1.0, 0.785694958, 0.541196100, 0.275899379
     340             :         };
     341             : 
     342           0 :         if (fdct->float_divisors[qtblno] == NULL) {
     343           0 :           fdct->float_divisors[qtblno] = (FAST_FLOAT *)
     344           0 :             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     345             :                                         DCTSIZE2 * sizeof(FAST_FLOAT));
     346             :         }
     347           0 :         fdtbl = fdct->float_divisors[qtblno];
     348           0 :         i = 0;
     349           0 :         for (row = 0; row < DCTSIZE; row++) {
     350           0 :           for (col = 0; col < DCTSIZE; col++) {
     351           0 :             fdtbl[i] = (FAST_FLOAT)
     352           0 :               (1.0 / (((double) qtbl->quantval[i] *
     353           0 :                        aanscalefactor[row] * aanscalefactor[col] * 8.0)));
     354           0 :             i++;
     355             :           }
     356             :         }
     357             :       }
     358           0 :       break;
     359             : #endif
     360             :     default:
     361           0 :       ERREXIT(cinfo, JERR_NOT_COMPILED);
     362           0 :       break;
     363             :     }
     364             :   }
     365           0 : }
     366             : 
     367             : 
     368             : /*
     369             :  * Load data into workspace, applying unsigned->signed conversion.
     370             :  */
     371             : 
     372             : METHODDEF(void)
     373           0 : convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
     374             : {
     375             :   register DCTELEM *workspaceptr;
     376             :   register JSAMPROW elemptr;
     377             :   register int elemr;
     378             : 
     379           0 :   workspaceptr = workspace;
     380           0 :   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     381           0 :     elemptr = sample_data[elemr] + start_col;
     382             : 
     383             : #if DCTSIZE == 8                /* unroll the inner loop */
     384           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     385           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     386           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     387           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     388           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     389           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     390           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     391           0 :     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     392             : #else
     393             :     {
     394             :       register int elemc;
     395             :       for (elemc = DCTSIZE; elemc > 0; elemc--)
     396             :         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
     397             :     }
     398             : #endif
     399             :   }
     400           0 : }
     401             : 
     402             : 
     403             : /*
     404             :  * Quantize/descale the coefficients, and store into coef_blocks[].
     405             :  */
     406             : 
     407             : METHODDEF(void)
     408           0 : quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
     409             : {
     410             :   int i;
     411             :   DCTELEM temp;
     412           0 :   JCOEFPTR output_ptr = coef_block;
     413             : 
     414             : #if BITS_IN_JSAMPLE == 8
     415             : 
     416             :   UDCTELEM recip, corr;
     417             :   int shift;
     418             :   UDCTELEM2 product;
     419             : 
     420           0 :   for (i = 0; i < DCTSIZE2; i++) {
     421           0 :     temp = workspace[i];
     422           0 :     recip = divisors[i + DCTSIZE2 * 0];
     423           0 :     corr =  divisors[i + DCTSIZE2 * 1];
     424           0 :     shift = divisors[i + DCTSIZE2 * 3];
     425             : 
     426           0 :     if (temp < 0) {
     427           0 :       temp = -temp;
     428           0 :       product = (UDCTELEM2)(temp + corr) * recip;
     429           0 :       product >>= shift + sizeof(DCTELEM)*8;
     430           0 :       temp = (DCTELEM)product;
     431           0 :       temp = -temp;
     432             :     } else {
     433           0 :       product = (UDCTELEM2)(temp + corr) * recip;
     434           0 :       product >>= shift + sizeof(DCTELEM)*8;
     435           0 :       temp = (DCTELEM)product;
     436             :     }
     437           0 :     output_ptr[i] = (JCOEF) temp;
     438             :   }
     439             : 
     440             : #else
     441             : 
     442             :   register DCTELEM qval;
     443             : 
     444             :   for (i = 0; i < DCTSIZE2; i++) {
     445             :     qval = divisors[i];
     446             :     temp = workspace[i];
     447             :     /* Divide the coefficient value by qval, ensuring proper rounding.
     448             :      * Since C does not specify the direction of rounding for negative
     449             :      * quotients, we have to force the dividend positive for portability.
     450             :      *
     451             :      * In most files, at least half of the output values will be zero
     452             :      * (at default quantization settings, more like three-quarters...)
     453             :      * so we should ensure that this case is fast.  On many machines,
     454             :      * a comparison is enough cheaper than a divide to make a special test
     455             :      * a win.  Since both inputs will be nonnegative, we need only test
     456             :      * for a < b to discover whether a/b is 0.
     457             :      * If your machine's division is fast enough, define FAST_DIVIDE.
     458             :      */
     459             : #ifdef FAST_DIVIDE
     460             : #define DIVIDE_BY(a,b)  a /= b
     461             : #else
     462             : #define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
     463             : #endif
     464             :     if (temp < 0) {
     465             :       temp = -temp;
     466             :       temp += qval>>1;  /* for rounding */
     467             :       DIVIDE_BY(temp, qval);
     468             :       temp = -temp;
     469             :     } else {
     470             :       temp += qval>>1;  /* for rounding */
     471             :       DIVIDE_BY(temp, qval);
     472             :     }
     473             :     output_ptr[i] = (JCOEF) temp;
     474             :   }
     475             : 
     476             : #endif
     477             : 
     478           0 : }
     479             : 
     480             : 
     481             : /*
     482             :  * Perform forward DCT on one or more blocks of a component.
     483             :  *
     484             :  * The input samples are taken from the sample_data[] array starting at
     485             :  * position start_row/start_col, and moving to the right for any additional
     486             :  * blocks. The quantized coefficients are returned in coef_blocks[].
     487             :  */
     488             : 
     489             : METHODDEF(void)
     490           0 : forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr,
     491             :              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
     492             :              JDIMENSION start_row, JDIMENSION start_col,
     493             :              JDIMENSION num_blocks)
     494             : /* This version is used for integer DCT implementations. */
     495             : {
     496             :   /* This routine is heavily used, so it's worth coding it tightly. */
     497           0 :   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
     498           0 :   DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
     499             :   DCTELEM *workspace;
     500             :   JDIMENSION bi;
     501             : 
     502             :   /* Make sure the compiler doesn't look up these every pass */
     503           0 :   forward_DCT_method_ptr do_dct = fdct->dct;
     504           0 :   convsamp_method_ptr do_convsamp = fdct->convsamp;
     505           0 :   quantize_method_ptr do_quantize = fdct->quantize;
     506           0 :   workspace = fdct->workspace;
     507             : 
     508           0 :   sample_data += start_row;     /* fold in the vertical offset once */
     509             : 
     510           0 :   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     511             :     /* Load data into workspace, applying unsigned->signed conversion */
     512           0 :     (*do_convsamp) (sample_data, start_col, workspace);
     513             : 
     514             :     /* Perform the DCT */
     515           0 :     (*do_dct) (workspace);
     516             : 
     517             :     /* Quantize/descale the coefficients, and store into coef_blocks[] */
     518           0 :     (*do_quantize) (coef_blocks[bi], divisors, workspace);
     519             :   }
     520           0 : }
     521             : 
     522             : 
     523             : #ifdef DCT_FLOAT_SUPPORTED
     524             : 
     525             : 
     526             : METHODDEF(void)
     527           0 : convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace)
     528             : {
     529             :   register FAST_FLOAT *workspaceptr;
     530             :   register JSAMPROW elemptr;
     531             :   register int elemr;
     532             : 
     533           0 :   workspaceptr = workspace;
     534           0 :   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     535           0 :     elemptr = sample_data[elemr] + start_col;
     536             : #if DCTSIZE == 8                /* unroll the inner loop */
     537           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     538           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     539           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     540           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     541           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     542           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     543           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     544           0 :     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     545             : #else
     546             :     {
     547             :       register int elemc;
     548             :       for (elemc = DCTSIZE; elemc > 0; elemc--)
     549             :         *workspaceptr++ = (FAST_FLOAT)
     550             :                           (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
     551             :     }
     552             : #endif
     553             :   }
     554           0 : }
     555             : 
     556             : 
     557             : METHODDEF(void)
     558           0 : quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace)
     559             : {
     560             :   register FAST_FLOAT temp;
     561             :   register int i;
     562           0 :   register JCOEFPTR output_ptr = coef_block;
     563             : 
     564           0 :   for (i = 0; i < DCTSIZE2; i++) {
     565             :     /* Apply the quantization and scaling factor */
     566           0 :     temp = workspace[i] * divisors[i];
     567             : 
     568             :     /* Round to nearest integer.
     569             :      * Since C does not specify the direction of rounding for negative
     570             :      * quotients, we have to force the dividend positive for portability.
     571             :      * The maximum coefficient size is +-16K (for 12-bit data), so this
     572             :      * code should work for either 16-bit or 32-bit ints.
     573             :      */
     574           0 :     output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
     575             :   }
     576           0 : }
     577             : 
     578             : 
     579             : METHODDEF(void)
     580           0 : forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr,
     581             :                    JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
     582             :                    JDIMENSION start_row, JDIMENSION start_col,
     583             :                    JDIMENSION num_blocks)
     584             : /* This version is used for floating-point DCT implementations. */
     585             : {
     586             :   /* This routine is heavily used, so it's worth coding it tightly. */
     587           0 :   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
     588           0 :   FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
     589             :   FAST_FLOAT *workspace;
     590             :   JDIMENSION bi;
     591             : 
     592             : 
     593             :   /* Make sure the compiler doesn't look up these every pass */
     594           0 :   float_DCT_method_ptr do_dct = fdct->float_dct;
     595           0 :   float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
     596           0 :   float_quantize_method_ptr do_quantize = fdct->float_quantize;
     597           0 :   workspace = fdct->float_workspace;
     598             : 
     599           0 :   sample_data += start_row;     /* fold in the vertical offset once */
     600             : 
     601           0 :   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     602             :     /* Load data into workspace, applying unsigned->signed conversion */
     603           0 :     (*do_convsamp) (sample_data, start_col, workspace);
     604             : 
     605             :     /* Perform the DCT */
     606           0 :     (*do_dct) (workspace);
     607             : 
     608             :     /* Quantize/descale the coefficients, and store into coef_blocks[] */
     609           0 :     (*do_quantize) (coef_blocks[bi], divisors, workspace);
     610             :   }
     611           0 : }
     612             : 
     613             : #endif /* DCT_FLOAT_SUPPORTED */
     614             : 
     615             : 
     616             : /*
     617             :  * Initialize FDCT manager.
     618             :  */
     619             : 
     620             : GLOBAL(void)
     621           0 : jinit_forward_dct (j_compress_ptr cinfo)
     622             : {
     623             :   my_fdct_ptr fdct;
     624             :   int i;
     625             : 
     626           0 :   fdct = (my_fdct_ptr)
     627           0 :     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     628             :                                 sizeof(my_fdct_controller));
     629           0 :   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
     630           0 :   fdct->pub.start_pass = start_pass_fdctmgr;
     631             : 
     632             :   /* First determine the DCT... */
     633           0 :   switch (cinfo->dct_method) {
     634             : #ifdef DCT_ISLOW_SUPPORTED
     635             :   case JDCT_ISLOW:
     636           0 :     fdct->pub.forward_DCT = forward_DCT;
     637           0 :     if (jsimd_can_fdct_islow())
     638           0 :       fdct->dct = jsimd_fdct_islow;
     639             :     else
     640           0 :       fdct->dct = jpeg_fdct_islow;
     641           0 :     break;
     642             : #endif
     643             : #ifdef DCT_IFAST_SUPPORTED
     644             :   case JDCT_IFAST:
     645           0 :     fdct->pub.forward_DCT = forward_DCT;
     646           0 :     if (jsimd_can_fdct_ifast())
     647           0 :       fdct->dct = jsimd_fdct_ifast;
     648             :     else
     649           0 :       fdct->dct = jpeg_fdct_ifast;
     650           0 :     break;
     651             : #endif
     652             : #ifdef DCT_FLOAT_SUPPORTED
     653             :   case JDCT_FLOAT:
     654           0 :     fdct->pub.forward_DCT = forward_DCT_float;
     655           0 :     if (jsimd_can_fdct_float())
     656           0 :       fdct->float_dct = jsimd_fdct_float;
     657             :     else
     658           0 :       fdct->float_dct = jpeg_fdct_float;
     659           0 :     break;
     660             : #endif
     661             :   default:
     662           0 :     ERREXIT(cinfo, JERR_NOT_COMPILED);
     663           0 :     break;
     664             :   }
     665             : 
     666             :   /* ...then the supporting stages. */
     667           0 :   switch (cinfo->dct_method) {
     668             : #ifdef DCT_ISLOW_SUPPORTED
     669             :   case JDCT_ISLOW:
     670             : #endif
     671             : #ifdef DCT_IFAST_SUPPORTED
     672             :   case JDCT_IFAST:
     673             : #endif
     674             : #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
     675           0 :     if (jsimd_can_convsamp())
     676           0 :       fdct->convsamp = jsimd_convsamp;
     677             :     else
     678           0 :       fdct->convsamp = convsamp;
     679           0 :     if (jsimd_can_quantize())
     680           0 :       fdct->quantize = jsimd_quantize;
     681             :     else
     682           0 :       fdct->quantize = quantize;
     683           0 :     break;
     684             : #endif
     685             : #ifdef DCT_FLOAT_SUPPORTED
     686             :   case JDCT_FLOAT:
     687           0 :     if (jsimd_can_convsamp_float())
     688           0 :       fdct->float_convsamp = jsimd_convsamp_float;
     689             :     else
     690           0 :       fdct->float_convsamp = convsamp_float;
     691           0 :     if (jsimd_can_quantize_float())
     692           0 :       fdct->float_quantize = jsimd_quantize_float;
     693             :     else
     694           0 :       fdct->float_quantize = quantize_float;
     695           0 :     break;
     696             : #endif
     697             :   default:
     698           0 :     ERREXIT(cinfo, JERR_NOT_COMPILED);
     699           0 :     break;
     700             :   }
     701             : 
     702             :   /* Allocate workspace memory */
     703             : #ifdef DCT_FLOAT_SUPPORTED
     704           0 :   if (cinfo->dct_method == JDCT_FLOAT)
     705           0 :     fdct->float_workspace = (FAST_FLOAT *)
     706           0 :       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     707             :                                   sizeof(FAST_FLOAT) * DCTSIZE2);
     708             :   else
     709             : #endif
     710           0 :     fdct->workspace = (DCTELEM *)
     711           0 :       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
     712             :                                   sizeof(DCTELEM) * DCTSIZE2);
     713             : 
     714             :   /* Mark divisor tables unallocated */
     715           0 :   for (i = 0; i < NUM_QUANT_TBLS; i++) {
     716           0 :     fdct->divisors[i] = NULL;
     717             : #ifdef DCT_FLOAT_SUPPORTED
     718           0 :     fdct->float_divisors[i] = NULL;
     719             : #endif
     720             :   }
     721           0 : }

Generated by: LCOV version 1.13