Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "./vpx_dsp_rtcd.h"
12 :
13 : #include "vpx_config.h"
14 : #include "vp8_rtcd.h"
15 : #include "encodemb.h"
16 : #include "vp8/common/reconinter.h"
17 : #include "vp8/encoder/quantize.h"
18 : #include "tokenize.h"
19 : #include "vp8/common/invtrans.h"
20 : #include "vpx_mem/vpx_mem.h"
21 : #include "rdopt.h"
22 :
23 0 : void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
24 0 : unsigned char *src_ptr = (*(be->base_src) + be->src);
25 0 : short *diff_ptr = be->src_diff;
26 0 : unsigned char *pred_ptr = bd->predictor;
27 0 : int src_stride = be->src_stride;
28 :
29 0 : vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, pred_ptr,
30 : pitch);
31 0 : }
32 :
33 0 : void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
34 : int src_stride, unsigned char *upred,
35 : unsigned char *vpred, int pred_stride) {
36 0 : short *udiff = diff + 256;
37 0 : short *vdiff = diff + 320;
38 :
39 0 : vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
40 0 : vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
41 0 : }
42 :
43 0 : void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
44 : unsigned char *pred, int pred_stride) {
45 0 : vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
46 0 : }
47 :
48 0 : static void vp8_subtract_mb(MACROBLOCK *x) {
49 0 : BLOCK *b = &x->block[0];
50 :
51 0 : vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
52 0 : x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
53 0 : vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
54 0 : x->src.uv_stride, x->e_mbd.dst.u_buffer,
55 0 : x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
56 0 : }
57 :
58 0 : static void build_dcblock(MACROBLOCK *x) {
59 0 : short *src_diff_ptr = &x->src_diff[384];
60 : int i;
61 :
62 0 : for (i = 0; i < 16; ++i) {
63 0 : src_diff_ptr[i] = x->coeff[i * 16];
64 : }
65 0 : }
66 :
67 0 : void vp8_transform_mbuv(MACROBLOCK *x) {
68 : int i;
69 :
70 0 : for (i = 16; i < 24; i += 2) {
71 0 : x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
72 : }
73 0 : }
74 :
75 0 : void vp8_transform_intra_mby(MACROBLOCK *x) {
76 : int i;
77 :
78 0 : for (i = 0; i < 16; i += 2) {
79 0 : x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
80 : }
81 :
82 : /* build dc block from 16 y dc values */
83 0 : build_dcblock(x);
84 :
85 : /* do 2nd order transform on the dc block */
86 0 : x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
87 0 : }
88 :
89 0 : static void transform_mb(MACROBLOCK *x) {
90 : int i;
91 :
92 0 : for (i = 0; i < 16; i += 2) {
93 0 : x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
94 : }
95 :
96 : /* build dc block from 16 y dc values */
97 0 : if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x);
98 :
99 0 : for (i = 16; i < 24; i += 2) {
100 0 : x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
101 : }
102 :
103 : /* do 2nd order transform on the dc block */
104 0 : if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
105 0 : x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
106 : }
107 0 : }
108 :
109 0 : static void transform_mby(MACROBLOCK *x) {
110 : int i;
111 :
112 0 : for (i = 0; i < 16; i += 2) {
113 0 : x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
114 : }
115 :
116 : /* build dc block from 16 y dc values */
117 0 : if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
118 0 : build_dcblock(x);
119 0 : x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
120 : }
121 0 : }
122 :
123 : #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
124 :
125 : typedef struct vp8_token_state vp8_token_state;
126 :
127 : struct vp8_token_state {
128 : int rate;
129 : int error;
130 : signed char next;
131 : signed char token;
132 : short qc;
133 : };
134 :
135 : /* TODO: experiments to find optimal multiple numbers */
136 : #define Y1_RD_MULT 4
137 : #define UV_RD_MULT 2
138 : #define Y2_RD_MULT 16
139 :
140 : static const int plane_rd_mult[4] = { Y1_RD_MULT, Y2_RD_MULT, UV_RD_MULT,
141 : Y1_RD_MULT };
142 :
143 0 : static void optimize_b(MACROBLOCK *mb, int ib, int type, ENTROPY_CONTEXT *a,
144 : ENTROPY_CONTEXT *l) {
145 : BLOCK *b;
146 : BLOCKD *d;
147 : vp8_token_state tokens[17][2];
148 : unsigned best_mask[2];
149 : const short *dequant_ptr;
150 : const short *coeff_ptr;
151 : short *qcoeff_ptr;
152 : short *dqcoeff_ptr;
153 : int eob;
154 : int i0;
155 : int rc;
156 : int x;
157 0 : int sz = 0;
158 : int next;
159 : int rdmult;
160 : int rddiv;
161 : int final_eob;
162 : int rd_cost0;
163 : int rd_cost1;
164 : int rate0;
165 : int rate1;
166 : int error0;
167 : int error1;
168 : int t0;
169 : int t1;
170 : int best;
171 : int band;
172 : int pt;
173 : int i;
174 0 : int err_mult = plane_rd_mult[type];
175 :
176 0 : b = &mb->block[ib];
177 0 : d = &mb->e_mbd.block[ib];
178 :
179 0 : dequant_ptr = d->dequant;
180 0 : coeff_ptr = b->coeff;
181 0 : qcoeff_ptr = d->qcoeff;
182 0 : dqcoeff_ptr = d->dqcoeff;
183 0 : i0 = !type;
184 0 : eob = *d->eob;
185 :
186 : /* Now set up a Viterbi trellis to evaluate alternative roundings. */
187 0 : rdmult = mb->rdmult * err_mult;
188 0 : if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
189 0 : rdmult = (rdmult * 9) >> 4;
190 : }
191 :
192 0 : rddiv = mb->rddiv;
193 0 : best_mask[0] = best_mask[1] = 0;
194 : /* Initialize the sentinel node of the trellis. */
195 0 : tokens[eob][0].rate = 0;
196 0 : tokens[eob][0].error = 0;
197 0 : tokens[eob][0].next = 16;
198 0 : tokens[eob][0].token = DCT_EOB_TOKEN;
199 0 : tokens[eob][0].qc = 0;
200 0 : *(tokens[eob] + 1) = *(tokens[eob] + 0);
201 0 : next = eob;
202 0 : for (i = eob; i-- > i0;) {
203 : int base_bits;
204 : int d2;
205 : int dx;
206 :
207 0 : rc = vp8_default_zig_zag1d[i];
208 0 : x = qcoeff_ptr[rc];
209 : /* Only add a trellis state for non-zero coefficients. */
210 0 : if (x) {
211 0 : int shortcut = 0;
212 0 : error0 = tokens[next][0].error;
213 0 : error1 = tokens[next][1].error;
214 : /* Evaluate the first possibility for this state. */
215 0 : rate0 = tokens[next][0].rate;
216 0 : rate1 = tokens[next][1].rate;
217 0 : t0 = (vp8_dct_value_tokens_ptr + x)->Token;
218 : /* Consider both possible successor states. */
219 0 : if (next < 16) {
220 0 : band = vp8_coef_bands[i + 1];
221 0 : pt = vp8_prev_token_class[t0];
222 0 : rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
223 0 : rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
224 : }
225 0 : rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
226 0 : rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
227 0 : if (rd_cost0 == rd_cost1) {
228 0 : rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
229 0 : rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
230 : }
231 : /* And pick the best. */
232 0 : best = rd_cost1 < rd_cost0;
233 0 : base_bits = *(vp8_dct_value_cost_ptr + x);
234 0 : dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
235 0 : d2 = dx * dx;
236 0 : tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
237 0 : tokens[i][0].error = d2 + (best ? error1 : error0);
238 0 : tokens[i][0].next = next;
239 0 : tokens[i][0].token = t0;
240 0 : tokens[i][0].qc = x;
241 0 : best_mask[0] |= best << i;
242 : /* Evaluate the second possibility for this state. */
243 0 : rate0 = tokens[next][0].rate;
244 0 : rate1 = tokens[next][1].rate;
245 :
246 0 : if ((abs(x) * dequant_ptr[rc] > abs(coeff_ptr[rc])) &&
247 0 : (abs(x) * dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc])) {
248 0 : shortcut = 1;
249 : } else {
250 0 : shortcut = 0;
251 : }
252 :
253 0 : if (shortcut) {
254 0 : sz = -(x < 0);
255 0 : x -= 2 * sz + 1;
256 : }
257 :
258 : /* Consider both possible successor states. */
259 0 : if (!x) {
260 : /* If we reduced this coefficient to zero, check to see if
261 : * we need to move the EOB back here.
262 : */
263 0 : t0 =
264 0 : tokens[next][0].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
265 0 : t1 =
266 0 : tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
267 : } else {
268 0 : t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
269 : }
270 0 : if (next < 16) {
271 0 : band = vp8_coef_bands[i + 1];
272 0 : if (t0 != DCT_EOB_TOKEN) {
273 0 : pt = vp8_prev_token_class[t0];
274 0 : rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
275 : }
276 0 : if (t1 != DCT_EOB_TOKEN) {
277 0 : pt = vp8_prev_token_class[t1];
278 0 : rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
279 : }
280 : }
281 :
282 0 : rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
283 0 : rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
284 0 : if (rd_cost0 == rd_cost1) {
285 0 : rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
286 0 : rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
287 : }
288 : /* And pick the best. */
289 0 : best = rd_cost1 < rd_cost0;
290 0 : base_bits = *(vp8_dct_value_cost_ptr + x);
291 :
292 0 : if (shortcut) {
293 0 : dx -= (dequant_ptr[rc] + sz) ^ sz;
294 0 : d2 = dx * dx;
295 : }
296 0 : tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
297 0 : tokens[i][1].error = d2 + (best ? error1 : error0);
298 0 : tokens[i][1].next = next;
299 0 : tokens[i][1].token = best ? t1 : t0;
300 0 : tokens[i][1].qc = x;
301 0 : best_mask[1] |= best << i;
302 : /* Finally, make this the new head of the trellis. */
303 0 : next = i;
304 : }
305 : /* There's no choice to make for a zero coefficient, so we don't
306 : * add a new trellis node, but we do need to update the costs.
307 : */
308 : else {
309 0 : band = vp8_coef_bands[i + 1];
310 0 : t0 = tokens[next][0].token;
311 0 : t1 = tokens[next][1].token;
312 : /* Update the cost of each path if we're past the EOB token. */
313 0 : if (t0 != DCT_EOB_TOKEN) {
314 0 : tokens[next][0].rate += mb->token_costs[type][band][0][t0];
315 0 : tokens[next][0].token = ZERO_TOKEN;
316 : }
317 0 : if (t1 != DCT_EOB_TOKEN) {
318 0 : tokens[next][1].rate += mb->token_costs[type][band][0][t1];
319 0 : tokens[next][1].token = ZERO_TOKEN;
320 : }
321 : /* Don't update next, because we didn't add a new node. */
322 : }
323 : }
324 :
325 : /* Now pick the best path through the whole trellis. */
326 0 : band = vp8_coef_bands[i + 1];
327 0 : VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
328 0 : rate0 = tokens[next][0].rate;
329 0 : rate1 = tokens[next][1].rate;
330 0 : error0 = tokens[next][0].error;
331 0 : error1 = tokens[next][1].error;
332 0 : t0 = tokens[next][0].token;
333 0 : t1 = tokens[next][1].token;
334 0 : rate0 += mb->token_costs[type][band][pt][t0];
335 0 : rate1 += mb->token_costs[type][band][pt][t1];
336 0 : rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
337 0 : rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
338 0 : if (rd_cost0 == rd_cost1) {
339 0 : rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
340 0 : rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
341 : }
342 0 : best = rd_cost1 < rd_cost0;
343 0 : final_eob = i0 - 1;
344 0 : for (i = next; i < eob; i = next) {
345 0 : x = tokens[i][best].qc;
346 0 : if (x) final_eob = i;
347 0 : rc = vp8_default_zig_zag1d[i];
348 0 : qcoeff_ptr[rc] = x;
349 0 : dqcoeff_ptr[rc] = x * dequant_ptr[rc];
350 0 : next = tokens[i][best].next;
351 0 : best = (best_mask[best] >> i) & 1;
352 : }
353 0 : final_eob++;
354 :
355 0 : *a = *l = (final_eob != !type);
356 0 : *d->eob = (char)final_eob;
357 0 : }
358 0 : static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a,
359 : ENTROPY_CONTEXT *l) {
360 0 : int sum = 0;
361 : int i;
362 0 : BLOCKD *bd = &x->block[24];
363 :
364 0 : if (bd->dequant[0] >= 35 && bd->dequant[1] >= 35) return;
365 :
366 0 : for (i = 0; i < (*bd->eob); ++i) {
367 0 : int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
368 0 : sum += (coef >= 0) ? coef : -coef;
369 0 : if (sum >= 35) return;
370 : }
371 : /**************************************************************************
372 : our inverse hadamard transform effectively is weighted sum of all 16 inputs
373 : with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
374 : dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
375 : output after inverse wht and idct will be all zero. A sum of absolute value
376 : smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
377 : fall between -35 and +35.
378 : **************************************************************************/
379 0 : if (sum < 35) {
380 0 : for (i = 0; i < (*bd->eob); ++i) {
381 0 : int rc = vp8_default_zig_zag1d[i];
382 0 : bd->qcoeff[rc] = 0;
383 0 : bd->dqcoeff[rc] = 0;
384 : }
385 0 : *bd->eob = 0;
386 0 : *a = *l = (*bd->eob != !type);
387 : }
388 : }
389 :
390 0 : static void optimize_mb(MACROBLOCK *x) {
391 : int b;
392 : int type;
393 : int has_2nd_order;
394 :
395 : ENTROPY_CONTEXT_PLANES t_above, t_left;
396 : ENTROPY_CONTEXT *ta;
397 : ENTROPY_CONTEXT *tl;
398 :
399 0 : memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
400 0 : memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
401 :
402 0 : ta = (ENTROPY_CONTEXT *)&t_above;
403 0 : tl = (ENTROPY_CONTEXT *)&t_left;
404 :
405 0 : has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
406 0 : x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
407 0 : type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
408 :
409 0 : for (b = 0; b < 16; ++b) {
410 0 : optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
411 : }
412 :
413 0 : for (b = 16; b < 24; ++b) {
414 0 : optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
415 0 : tl + vp8_block2left[b]);
416 : }
417 :
418 0 : if (has_2nd_order) {
419 0 : b = 24;
420 0 : optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
421 0 : tl + vp8_block2left[b]);
422 0 : check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
423 0 : tl + vp8_block2left[b]);
424 : }
425 0 : }
426 :
427 0 : void vp8_optimize_mby(MACROBLOCK *x) {
428 : int b;
429 : int type;
430 : int has_2nd_order;
431 :
432 : ENTROPY_CONTEXT_PLANES t_above, t_left;
433 : ENTROPY_CONTEXT *ta;
434 : ENTROPY_CONTEXT *tl;
435 :
436 0 : if (!x->e_mbd.above_context) return;
437 :
438 0 : if (!x->e_mbd.left_context) return;
439 :
440 0 : memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
441 0 : memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
442 :
443 0 : ta = (ENTROPY_CONTEXT *)&t_above;
444 0 : tl = (ENTROPY_CONTEXT *)&t_left;
445 :
446 0 : has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
447 0 : x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
448 0 : type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
449 :
450 0 : for (b = 0; b < 16; ++b) {
451 0 : optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
452 : }
453 :
454 0 : if (has_2nd_order) {
455 0 : b = 24;
456 0 : optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
457 0 : tl + vp8_block2left[b]);
458 0 : check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
459 0 : tl + vp8_block2left[b]);
460 : }
461 : }
462 :
463 0 : void vp8_optimize_mbuv(MACROBLOCK *x) {
464 : int b;
465 : ENTROPY_CONTEXT_PLANES t_above, t_left;
466 : ENTROPY_CONTEXT *ta;
467 : ENTROPY_CONTEXT *tl;
468 :
469 0 : if (!x->e_mbd.above_context) return;
470 :
471 0 : if (!x->e_mbd.left_context) return;
472 :
473 0 : memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
474 0 : memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
475 :
476 0 : ta = (ENTROPY_CONTEXT *)&t_above;
477 0 : tl = (ENTROPY_CONTEXT *)&t_left;
478 :
479 0 : for (b = 16; b < 24; ++b) {
480 0 : optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
481 0 : tl + vp8_block2left[b]);
482 : }
483 : }
484 :
485 0 : void vp8_encode_inter16x16(MACROBLOCK *x) {
486 0 : vp8_build_inter_predictors_mb(&x->e_mbd);
487 :
488 0 : vp8_subtract_mb(x);
489 :
490 0 : transform_mb(x);
491 :
492 0 : vp8_quantize_mb(x);
493 :
494 0 : if (x->optimize) optimize_mb(x);
495 0 : }
496 :
497 : /* this funciton is used by first pass only */
498 0 : void vp8_encode_inter16x16y(MACROBLOCK *x) {
499 0 : BLOCK *b = &x->block[0];
500 :
501 0 : vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
502 : x->e_mbd.dst.y_stride);
503 :
504 0 : vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
505 0 : x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
506 :
507 0 : transform_mby(x);
508 :
509 0 : vp8_quantize_mby(x);
510 :
511 0 : vp8_inverse_transform_mby(&x->e_mbd);
512 0 : }
|