Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <math.h>
12 : #include <limits.h>
13 :
14 : #include "vp9/common/vp9_alloccommon.h"
15 : #include "vp9/common/vp9_onyxc_int.h"
16 : #include "vp9/common/vp9_quant_common.h"
17 : #include "vp9/common/vp9_reconinter.h"
18 : #include "vp9/encoder/vp9_extend.h"
19 : #include "vp9/encoder/vp9_firstpass.h"
20 : #include "vp9/encoder/vp9_mcomp.h"
21 : #include "vp9/encoder/vp9_encoder.h"
22 : #include "vp9/encoder/vp9_quantize.h"
23 : #include "vp9/encoder/vp9_ratectrl.h"
24 : #include "vp9/encoder/vp9_segmentation.h"
25 : #include "vp9/encoder/vp9_temporal_filter.h"
26 : #include "vpx_dsp/vpx_dsp_common.h"
27 : #include "vpx_mem/vpx_mem.h"
28 : #include "vpx_ports/mem.h"
29 : #include "vpx_ports/vpx_timer.h"
30 : #include "vpx_scale/vpx_scale.h"
31 :
32 : static int fixed_divide[512];
33 :
34 0 : static void temporal_filter_predictors_mb_c(
35 : MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
36 : int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
37 : uint8_t *pred, struct scale_factors *scale, int x, int y) {
38 0 : const int which_mv = 0;
39 0 : const MV mv = { mv_row, mv_col };
40 0 : const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
41 :
42 : enum mv_precision mv_precision_uv;
43 : int uv_stride;
44 0 : if (uv_block_width == 8) {
45 0 : uv_stride = (stride + 1) >> 1;
46 0 : mv_precision_uv = MV_PRECISION_Q4;
47 : } else {
48 0 : uv_stride = stride;
49 0 : mv_precision_uv = MV_PRECISION_Q3;
50 : }
51 :
52 : #if CONFIG_VP9_HIGHBITDEPTH
53 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
54 : vp9_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale,
55 : 16, 16, which_mv, kernel, MV_PRECISION_Q3,
56 : x, y, xd->bd);
57 :
58 : vp9_highbd_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256],
59 : uv_block_width, &mv, scale, uv_block_width,
60 : uv_block_height, which_mv, kernel,
61 : mv_precision_uv, x, y, xd->bd);
62 :
63 : vp9_highbd_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512],
64 : uv_block_width, &mv, scale, uv_block_width,
65 : uv_block_height, which_mv, kernel,
66 : mv_precision_uv, x, y, xd->bd);
67 : return;
68 : }
69 : #endif // CONFIG_VP9_HIGHBITDEPTH
70 : (void)xd;
71 0 : vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
72 : which_mv, kernel, MV_PRECISION_Q3, x, y);
73 :
74 0 : vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
75 : &mv, scale, uv_block_width, uv_block_height,
76 : which_mv, kernel, mv_precision_uv, x, y);
77 :
78 0 : vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
79 : &mv, scale, uv_block_width, uv_block_height,
80 : which_mv, kernel, mv_precision_uv, x, y);
81 0 : }
82 :
83 0 : void vp9_temporal_filter_init(void) {
84 : int i;
85 :
86 0 : fixed_divide[0] = 0;
87 0 : for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
88 0 : }
89 :
90 0 : void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
91 : uint8_t *frame2, unsigned int block_width,
92 : unsigned int block_height, int strength,
93 : int filter_weight, unsigned int *accumulator,
94 : uint16_t *count) {
95 : unsigned int i, j, k;
96 : int modifier;
97 0 : int byte = 0;
98 0 : const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
99 :
100 0 : for (i = 0, k = 0; i < block_height; i++) {
101 0 : for (j = 0; j < block_width; j++, k++) {
102 0 : int pixel_value = *frame2;
103 :
104 : // non-local mean approach
105 0 : int diff_sse[9] = { 0 };
106 0 : int idx, idy, index = 0;
107 :
108 0 : for (idy = -1; idy <= 1; ++idy) {
109 0 : for (idx = -1; idx <= 1; ++idx) {
110 0 : int row = (int)i + idy;
111 0 : int col = (int)j + idx;
112 :
113 0 : if (row >= 0 && row < (int)block_height && col >= 0 &&
114 0 : col < (int)block_width) {
115 0 : int diff = frame1[byte + idy * (int)stride + idx] -
116 0 : frame2[idy * (int)block_width + idx];
117 0 : diff_sse[index] = diff * diff;
118 0 : ++index;
119 : }
120 : }
121 : }
122 :
123 0 : assert(index > 0);
124 :
125 0 : modifier = 0;
126 0 : for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
127 :
128 0 : modifier *= 3;
129 0 : modifier /= index;
130 :
131 0 : ++frame2;
132 :
133 0 : modifier += rounding;
134 0 : modifier >>= strength;
135 :
136 0 : if (modifier > 16) modifier = 16;
137 :
138 0 : modifier = 16 - modifier;
139 0 : modifier *= filter_weight;
140 :
141 0 : count[k] += modifier;
142 0 : accumulator[k] += modifier * pixel_value;
143 :
144 0 : byte++;
145 : }
146 :
147 0 : byte += stride - block_width;
148 : }
149 0 : }
150 :
151 : #if CONFIG_VP9_HIGHBITDEPTH
152 : void vp9_highbd_temporal_filter_apply_c(
153 : uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
154 : unsigned int block_width, unsigned int block_height, int strength,
155 : int filter_weight, unsigned int *accumulator, uint16_t *count) {
156 : uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
157 : uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
158 : unsigned int i, j, k;
159 : int modifier;
160 : int byte = 0;
161 : const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
162 :
163 : for (i = 0, k = 0; i < block_height; i++) {
164 : for (j = 0; j < block_width; j++, k++) {
165 : int pixel_value = *frame2;
166 : int diff_sse[9] = { 0 };
167 : int idx, idy, index = 0;
168 :
169 : for (idy = -1; idy <= 1; ++idy) {
170 : for (idx = -1; idx <= 1; ++idx) {
171 : int row = (int)i + idy;
172 : int col = (int)j + idx;
173 :
174 : if (row >= 0 && row < (int)block_height && col >= 0 &&
175 : col < (int)block_width) {
176 : int diff = frame1[byte + idy * (int)stride + idx] -
177 : frame2[idy * (int)block_width + idx];
178 : diff_sse[index] = diff * diff;
179 : ++index;
180 : }
181 : }
182 : }
183 : assert(index > 0);
184 :
185 : modifier = 0;
186 : for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
187 :
188 : modifier *= 3;
189 : modifier /= index;
190 :
191 : ++frame2;
192 : modifier += rounding;
193 : modifier >>= strength;
194 :
195 : if (modifier > 16) modifier = 16;
196 :
197 : modifier = 16 - modifier;
198 : modifier *= filter_weight;
199 :
200 : count[k] += modifier;
201 : accumulator[k] += modifier * pixel_value;
202 :
203 : byte++;
204 : }
205 :
206 : byte += stride - block_width;
207 : }
208 : }
209 : #endif // CONFIG_VP9_HIGHBITDEPTH
210 :
211 0 : static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
212 : uint8_t *arf_frame_buf,
213 : uint8_t *frame_ptr_buf,
214 : int stride) {
215 0 : MACROBLOCK *const x = &cpi->td.mb;
216 0 : MACROBLOCKD *const xd = &x->e_mbd;
217 0 : MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
218 0 : const SEARCH_METHODS old_search_method = mv_sf->search_method;
219 : int step_param;
220 0 : int sadpb = x->sadperbit16;
221 0 : uint32_t bestsme = UINT_MAX;
222 : uint32_t distortion;
223 : uint32_t sse;
224 : int cost_list[5];
225 :
226 0 : MV best_ref_mv1 = { 0, 0 };
227 : MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
228 0 : MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv;
229 :
230 : // Save input state
231 0 : struct buf_2d src = x->plane[0].src;
232 0 : struct buf_2d pre = xd->plane[0].pre[0];
233 :
234 0 : best_ref_mv1_full.col = best_ref_mv1.col >> 3;
235 0 : best_ref_mv1_full.row = best_ref_mv1.row >> 3;
236 :
237 : // Setup frame pointers
238 0 : x->plane[0].src.buf = arf_frame_buf;
239 0 : x->plane[0].src.stride = stride;
240 0 : xd->plane[0].pre[0].buf = frame_ptr_buf;
241 0 : xd->plane[0].pre[0].stride = stride;
242 :
243 0 : step_param = mv_sf->reduce_first_step_size;
244 0 : step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
245 :
246 0 : mv_sf->search_method = HEX;
247 0 : vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
248 : sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1,
249 : ref_mv, 0, 0);
250 0 : mv_sf->search_method = old_search_method;
251 :
252 : // Ignore mv costing by sending NULL pointer instead of cost array
253 0 : bestsme = cpi->find_fractional_mv_step(
254 : x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
255 0 : x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0,
256 : mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL,
257 : &distortion, &sse, NULL, 0, 0);
258 :
259 : // Restore input state
260 0 : x->plane[0].src = src;
261 0 : xd->plane[0].pre[0] = pre;
262 :
263 0 : return bestsme;
264 : }
265 :
266 0 : static void temporal_filter_iterate_c(VP9_COMP *cpi,
267 : YV12_BUFFER_CONFIG **frames,
268 : int frame_count, int alt_ref_index,
269 : int strength,
270 : struct scale_factors *scale) {
271 : int byte;
272 : int frame;
273 : int mb_col, mb_row;
274 : unsigned int filter_weight;
275 0 : int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
276 0 : int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
277 0 : int mb_y_offset = 0;
278 0 : int mb_uv_offset = 0;
279 : DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
280 : DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
281 0 : MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
282 0 : YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
283 : uint8_t *dst1, *dst2;
284 : #if CONFIG_VP9_HIGHBITDEPTH
285 : DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
286 : DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
287 : uint8_t *predictor;
288 : #else
289 : DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
290 : #endif
291 0 : const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
292 0 : const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
293 :
294 : // Save input state
295 : uint8_t *input_buffer[MAX_MB_PLANE];
296 : int i;
297 : #if CONFIG_VP9_HIGHBITDEPTH
298 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
299 : predictor = CONVERT_TO_BYTEPTR(predictor16);
300 : } else {
301 : predictor = predictor8;
302 : }
303 : #endif
304 :
305 0 : for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
306 :
307 0 : for (mb_row = 0; mb_row < mb_rows; mb_row++) {
308 : // Source frames are extended to 16 pixels. This is different than
309 : // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
310 : // A 6/8 tap filter is used for motion search. This requires 2 pixels
311 : // before and 3 pixels after. So the largest Y mv on a border would
312 : // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
313 : // Y and therefore only extended by 8. The largest mv that a UV block
314 : // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv.
315 : // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
316 : // 8 - VP9_INTERP_EXTEND.
317 : // To keep the mv in play for both Y and UV planes the max that it
318 : // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
319 0 : cpi->td.mb.mv_limits.row_min =
320 0 : -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
321 0 : cpi->td.mb.mv_limits.row_max =
322 0 : ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
323 :
324 0 : for (mb_col = 0; mb_col < mb_cols; mb_col++) {
325 : int i, j, k;
326 : int stride;
327 :
328 0 : memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
329 0 : memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
330 :
331 0 : cpi->td.mb.mv_limits.col_min =
332 0 : -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
333 0 : cpi->td.mb.mv_limits.col_max =
334 0 : ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
335 :
336 0 : for (frame = 0; frame < frame_count; frame++) {
337 0 : const uint32_t thresh_low = 10000;
338 0 : const uint32_t thresh_high = 20000;
339 :
340 0 : if (frames[frame] == NULL) continue;
341 :
342 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
343 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
344 :
345 0 : if (frame == alt_ref_index) {
346 0 : filter_weight = 2;
347 : } else {
348 : // Find best match in this frame by MC
349 0 : uint32_t err = temporal_filter_find_matching_mb_c(
350 0 : cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
351 0 : frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
352 :
353 : // Assign higher weight to matching MB if its error
354 : // score is lower. If not applying MC default behavior
355 : // is to weight all MBs equal.
356 0 : filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
357 : }
358 :
359 0 : if (filter_weight != 0) {
360 : // Construct the predictors
361 0 : temporal_filter_predictors_mb_c(
362 0 : mbd, frames[frame]->y_buffer + mb_y_offset,
363 0 : frames[frame]->u_buffer + mb_uv_offset,
364 0 : frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
365 0 : mb_uv_width, mb_uv_height, mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
366 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
367 : mb_col * 16, mb_row * 16);
368 :
369 : #if CONFIG_VP9_HIGHBITDEPTH
370 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
371 : int adj_strength = strength + 2 * (mbd->bd - 8);
372 : // Apply the filter (YUV)
373 : vp9_highbd_temporal_filter_apply_c(
374 : f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
375 : adj_strength, filter_weight, accumulator, count);
376 : vp9_highbd_temporal_filter_apply_c(
377 : f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
378 : mb_uv_width, mb_uv_height, adj_strength, filter_weight,
379 : accumulator + 256, count + 256);
380 : vp9_highbd_temporal_filter_apply_c(
381 : f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
382 : mb_uv_width, mb_uv_height, adj_strength, filter_weight,
383 : accumulator + 512, count + 512);
384 : } else {
385 : // Apply the filter (YUV)
386 : vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
387 : predictor, 16, 16, strength,
388 : filter_weight, accumulator, count);
389 : vp9_temporal_filter_apply_c(
390 : f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
391 : mb_uv_width, mb_uv_height, strength, filter_weight,
392 : accumulator + 256, count + 256);
393 : vp9_temporal_filter_apply_c(
394 : f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
395 : mb_uv_width, mb_uv_height, strength, filter_weight,
396 : accumulator + 512, count + 512);
397 : }
398 : #else
399 : // Apply the filter (YUV)
400 : // TODO(jingning): Need SIMD optimization for this.
401 0 : vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
402 : predictor, 16, 16, strength,
403 : filter_weight, accumulator, count);
404 0 : vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
405 : predictor + 256, mb_uv_width,
406 : mb_uv_height, strength, filter_weight,
407 : accumulator + 256, count + 256);
408 0 : vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
409 : predictor + 512, mb_uv_width,
410 : mb_uv_height, strength, filter_weight,
411 : accumulator + 512, count + 512);
412 : #endif // CONFIG_VP9_HIGHBITDEPTH
413 : }
414 : }
415 :
416 : #if CONFIG_VP9_HIGHBITDEPTH
417 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
418 : uint16_t *dst1_16;
419 : uint16_t *dst2_16;
420 : // Normalize filter output to produce AltRef frame
421 : dst1 = cpi->alt_ref_buffer.y_buffer;
422 : dst1_16 = CONVERT_TO_SHORTPTR(dst1);
423 : stride = cpi->alt_ref_buffer.y_stride;
424 : byte = mb_y_offset;
425 : for (i = 0, k = 0; i < 16; i++) {
426 : for (j = 0; j < 16; j++, k++) {
427 : unsigned int pval = accumulator[k] + (count[k] >> 1);
428 : pval *= fixed_divide[count[k]];
429 : pval >>= 19;
430 :
431 : dst1_16[byte] = (uint16_t)pval;
432 :
433 : // move to next pixel
434 : byte++;
435 : }
436 :
437 : byte += stride - 16;
438 : }
439 :
440 : dst1 = cpi->alt_ref_buffer.u_buffer;
441 : dst2 = cpi->alt_ref_buffer.v_buffer;
442 : dst1_16 = CONVERT_TO_SHORTPTR(dst1);
443 : dst2_16 = CONVERT_TO_SHORTPTR(dst2);
444 : stride = cpi->alt_ref_buffer.uv_stride;
445 : byte = mb_uv_offset;
446 : for (i = 0, k = 256; i < mb_uv_height; i++) {
447 : for (j = 0; j < mb_uv_width; j++, k++) {
448 : int m = k + 256;
449 :
450 : // U
451 : unsigned int pval = accumulator[k] + (count[k] >> 1);
452 : pval *= fixed_divide[count[k]];
453 : pval >>= 19;
454 : dst1_16[byte] = (uint16_t)pval;
455 :
456 : // V
457 : pval = accumulator[m] + (count[m] >> 1);
458 : pval *= fixed_divide[count[m]];
459 : pval >>= 19;
460 : dst2_16[byte] = (uint16_t)pval;
461 :
462 : // move to next pixel
463 : byte++;
464 : }
465 :
466 : byte += stride - mb_uv_width;
467 : }
468 : } else {
469 : // Normalize filter output to produce AltRef frame
470 : dst1 = cpi->alt_ref_buffer.y_buffer;
471 : stride = cpi->alt_ref_buffer.y_stride;
472 : byte = mb_y_offset;
473 : for (i = 0, k = 0; i < 16; i++) {
474 : for (j = 0; j < 16; j++, k++) {
475 : unsigned int pval = accumulator[k] + (count[k] >> 1);
476 : pval *= fixed_divide[count[k]];
477 : pval >>= 19;
478 :
479 : dst1[byte] = (uint8_t)pval;
480 :
481 : // move to next pixel
482 : byte++;
483 : }
484 : byte += stride - 16;
485 : }
486 :
487 : dst1 = cpi->alt_ref_buffer.u_buffer;
488 : dst2 = cpi->alt_ref_buffer.v_buffer;
489 : stride = cpi->alt_ref_buffer.uv_stride;
490 : byte = mb_uv_offset;
491 : for (i = 0, k = 256; i < mb_uv_height; i++) {
492 : for (j = 0; j < mb_uv_width; j++, k++) {
493 : int m = k + 256;
494 :
495 : // U
496 : unsigned int pval = accumulator[k] + (count[k] >> 1);
497 : pval *= fixed_divide[count[k]];
498 : pval >>= 19;
499 : dst1[byte] = (uint8_t)pval;
500 :
501 : // V
502 : pval = accumulator[m] + (count[m] >> 1);
503 : pval *= fixed_divide[count[m]];
504 : pval >>= 19;
505 : dst2[byte] = (uint8_t)pval;
506 :
507 : // move to next pixel
508 : byte++;
509 : }
510 : byte += stride - mb_uv_width;
511 : }
512 : }
513 : #else
514 : // Normalize filter output to produce AltRef frame
515 0 : dst1 = cpi->alt_ref_buffer.y_buffer;
516 0 : stride = cpi->alt_ref_buffer.y_stride;
517 0 : byte = mb_y_offset;
518 0 : for (i = 0, k = 0; i < 16; i++) {
519 0 : for (j = 0; j < 16; j++, k++) {
520 0 : unsigned int pval = accumulator[k] + (count[k] >> 1);
521 0 : pval *= fixed_divide[count[k]];
522 0 : pval >>= 19;
523 :
524 0 : dst1[byte] = (uint8_t)pval;
525 :
526 : // move to next pixel
527 0 : byte++;
528 : }
529 0 : byte += stride - 16;
530 : }
531 :
532 0 : dst1 = cpi->alt_ref_buffer.u_buffer;
533 0 : dst2 = cpi->alt_ref_buffer.v_buffer;
534 0 : stride = cpi->alt_ref_buffer.uv_stride;
535 0 : byte = mb_uv_offset;
536 0 : for (i = 0, k = 256; i < mb_uv_height; i++) {
537 0 : for (j = 0; j < mb_uv_width; j++, k++) {
538 0 : int m = k + 256;
539 :
540 : // U
541 0 : unsigned int pval = accumulator[k] + (count[k] >> 1);
542 0 : pval *= fixed_divide[count[k]];
543 0 : pval >>= 19;
544 0 : dst1[byte] = (uint8_t)pval;
545 :
546 : // V
547 0 : pval = accumulator[m] + (count[m] >> 1);
548 0 : pval *= fixed_divide[count[m]];
549 0 : pval >>= 19;
550 0 : dst2[byte] = (uint8_t)pval;
551 :
552 : // move to next pixel
553 0 : byte++;
554 : }
555 0 : byte += stride - mb_uv_width;
556 : }
557 : #endif // CONFIG_VP9_HIGHBITDEPTH
558 0 : mb_y_offset += 16;
559 0 : mb_uv_offset += mb_uv_width;
560 : }
561 0 : mb_y_offset += 16 * (f->y_stride - mb_cols);
562 0 : mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
563 : }
564 :
565 : // Restore input state
566 0 : for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
567 0 : }
568 :
569 : // Apply buffer limits and context specific adjustments to arnr filter.
570 0 : static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
571 : int *arnr_frames, int *arnr_strength) {
572 0 : const VP9EncoderConfig *const oxcf = &cpi->oxcf;
573 0 : const int frames_after_arf =
574 0 : vp9_lookahead_depth(cpi->lookahead) - distance - 1;
575 0 : int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
576 : int frames_bwd;
577 : int q, frames, base_strength, strength;
578 :
579 : // Context dependent two pass adjustment to strength.
580 0 : if (oxcf->pass == 2) {
581 0 : base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment;
582 : // Clip to allowed range.
583 0 : base_strength = VPXMIN(6, VPXMAX(0, base_strength));
584 : } else {
585 0 : base_strength = oxcf->arnr_strength;
586 : }
587 :
588 : // Define the forward and backwards filter limits for this arnr group.
589 0 : if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
590 0 : if (frames_fwd > distance) frames_fwd = distance;
591 :
592 0 : frames_bwd = frames_fwd;
593 :
594 : // For even length filter there is one more frame backward
595 : // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
596 0 : if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
597 :
598 : // Set the baseline active filter size.
599 0 : frames = frames_bwd + 1 + frames_fwd;
600 :
601 : // Adjust the strength based on active max q.
602 0 : if (cpi->common.current_video_frame > 1)
603 0 : q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
604 : cpi->common.bit_depth));
605 : else
606 0 : q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
607 : cpi->common.bit_depth));
608 0 : if (q > 16) {
609 0 : strength = base_strength;
610 : } else {
611 0 : strength = base_strength - ((16 - q) / 2);
612 0 : if (strength < 0) strength = 0;
613 : }
614 :
615 : // Adjust number of frames in filter and strength based on gf boost level.
616 0 : if (frames > group_boost / 150) {
617 0 : frames = group_boost / 150;
618 0 : frames += !(frames & 1);
619 : }
620 :
621 0 : if (strength > group_boost / 300) {
622 0 : strength = group_boost / 300;
623 : }
624 :
625 : // Adjustments for second level arf in multi arf case.
626 0 : if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
627 0 : const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
628 0 : if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
629 0 : strength >>= 1;
630 : }
631 : }
632 :
633 0 : *arnr_frames = frames;
634 0 : *arnr_strength = strength;
635 0 : }
636 :
637 0 : void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
638 0 : VP9_COMMON *const cm = &cpi->common;
639 0 : RATE_CONTROL *const rc = &cpi->rc;
640 0 : MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
641 : int frame;
642 : int frames_to_blur;
643 : int start_frame;
644 : int strength;
645 : int frames_to_blur_backward;
646 : int frames_to_blur_forward;
647 : struct scale_factors sf;
648 0 : YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
649 :
650 : // Apply context specific adjustments to the arnr filter parameters.
651 0 : adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
652 0 : frames_to_blur_backward = (frames_to_blur / 2);
653 0 : frames_to_blur_forward = ((frames_to_blur - 1) / 2);
654 0 : start_frame = distance + frames_to_blur_forward;
655 :
656 : // Setup frame pointers, NULL indicates frame not included in filter.
657 0 : for (frame = 0; frame < frames_to_blur; ++frame) {
658 0 : const int which_buffer = start_frame - frame;
659 0 : struct lookahead_entry *buf =
660 0 : vp9_lookahead_peek(cpi->lookahead, which_buffer);
661 0 : frames[frames_to_blur - 1 - frame] = &buf->img;
662 : }
663 :
664 0 : if (frames_to_blur > 0) {
665 : // Setup scaling factors. Scaling on each of the arnr frames is not
666 : // supported.
667 0 : if (cpi->use_svc) {
668 : // In spatial svc the scaling factors might be less then 1/2.
669 : // So we will use non-normative scaling.
670 0 : int frame_used = 0;
671 : #if CONFIG_VP9_HIGHBITDEPTH
672 : vp9_setup_scale_factors_for_frame(
673 : &sf, get_frame_new_buffer(cm)->y_crop_width,
674 : get_frame_new_buffer(cm)->y_crop_height,
675 : get_frame_new_buffer(cm)->y_crop_width,
676 : get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth);
677 : #else
678 0 : vp9_setup_scale_factors_for_frame(
679 0 : &sf, get_frame_new_buffer(cm)->y_crop_width,
680 0 : get_frame_new_buffer(cm)->y_crop_height,
681 0 : get_frame_new_buffer(cm)->y_crop_width,
682 0 : get_frame_new_buffer(cm)->y_crop_height);
683 : #endif // CONFIG_VP9_HIGHBITDEPTH
684 :
685 0 : for (frame = 0; frame < frames_to_blur; ++frame) {
686 0 : if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
687 0 : cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
688 0 : if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
689 : cm->width, cm->height, cm->subsampling_x,
690 : cm->subsampling_y,
691 : #if CONFIG_VP9_HIGHBITDEPTH
692 : cm->use_highbitdepth,
693 : #endif
694 : VP9_ENC_BORDER_IN_PIXELS,
695 : cm->byte_alignment, NULL, NULL, NULL)) {
696 0 : vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
697 : "Failed to reallocate alt_ref_buffer");
698 : }
699 0 : frames[frame] = vp9_scale_if_required(
700 : cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0);
701 0 : ++frame_used;
702 : }
703 : }
704 0 : cm->mi = cm->mip + cm->mi_stride + 1;
705 0 : xd->mi = cm->mi_grid_visible;
706 0 : xd->mi[0] = cm->mi;
707 : } else {
708 : // ARF is produced at the native frame size and resized when coded.
709 : #if CONFIG_VP9_HIGHBITDEPTH
710 : vp9_setup_scale_factors_for_frame(
711 : &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
712 : frames[0]->y_crop_width, frames[0]->y_crop_height,
713 : cm->use_highbitdepth);
714 : #else
715 0 : vp9_setup_scale_factors_for_frame(
716 0 : &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
717 0 : frames[0]->y_crop_width, frames[0]->y_crop_height);
718 : #endif // CONFIG_VP9_HIGHBITDEPTH
719 : }
720 : }
721 :
722 0 : temporal_filter_iterate_c(cpi, frames, frames_to_blur,
723 : frames_to_blur_backward, strength, &sf);
724 0 : }
|