Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <math.h>
13 : #include <limits.h>
14 :
15 : #include "./aom_config.h"
16 : #include "av1/common/alloccommon.h"
17 : #include "av1/common/onyxc_int.h"
18 : #include "av1/common/quant_common.h"
19 : #include "av1/common/reconinter.h"
20 : #include "av1/common/odintrin.h"
21 : #include "av1/encoder/av1_quantize.h"
22 : #include "av1/encoder/extend.h"
23 : #include "av1/encoder/firstpass.h"
24 : #include "av1/encoder/mcomp.h"
25 : #include "av1/encoder/encoder.h"
26 : #include "av1/encoder/ratectrl.h"
27 : #include "av1/encoder/segmentation.h"
28 : #include "av1/encoder/temporal_filter.h"
29 : #include "aom_dsp/aom_dsp_common.h"
30 : #include "aom_mem/aom_mem.h"
31 : #include "aom_ports/mem.h"
32 : #include "aom_ports/aom_timer.h"
33 : #include "aom_scale/aom_scale.h"
34 :
35 0 : static void temporal_filter_predictors_mb_c(
36 : MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
37 : int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
38 : uint8_t *pred, struct scale_factors *scale, int x, int y) {
39 0 : const int which_mv = 0;
40 0 : const MV mv = { mv_row, mv_col };
41 : enum mv_precision mv_precision_uv;
42 : int uv_stride;
43 : // TODO(angiebird): change plane setting accordingly
44 0 : ConvolveParams conv_params = get_conv_params(which_mv, 0);
45 :
46 : #if USE_TEMPORALFILTER_12TAP
47 : #if CONFIG_DUAL_FILTER
48 0 : const InterpFilter interp_filter[4] = { TEMPORALFILTER_12TAP,
49 : TEMPORALFILTER_12TAP,
50 : TEMPORALFILTER_12TAP,
51 : TEMPORALFILTER_12TAP };
52 : #else
53 : const InterpFilter interp_filter = TEMPORALFILTER_12TAP;
54 : #endif
55 : (void)xd;
56 : #else
57 : const InterpFilter interp_filter = xd->mi[0]->mbmi.interp_filter;
58 : #endif // USE_TEMPORALFILTER_12TAP
59 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
60 : WarpTypesAllowed warp_types;
61 0 : memset(&warp_types, 0, sizeof(WarpTypesAllowed));
62 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
63 :
64 0 : if (uv_block_width == 8) {
65 0 : uv_stride = (stride + 1) >> 1;
66 0 : mv_precision_uv = MV_PRECISION_Q4;
67 : } else {
68 0 : uv_stride = stride;
69 0 : mv_precision_uv = MV_PRECISION_Q3;
70 : }
71 :
72 : #if CONFIG_HIGHBITDEPTH
73 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
74 0 : av1_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale,
75 : 16, 16, which_mv, interp_filter,
76 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
77 : &warp_types, x, y,
78 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
79 : 0, MV_PRECISION_Q3, x, y, xd);
80 :
81 0 : av1_highbd_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256],
82 : uv_block_width, &mv, scale, uv_block_width,
83 : uv_block_height, which_mv, interp_filter,
84 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
85 : &warp_types, x, y,
86 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
87 : 1, mv_precision_uv, x, y, xd);
88 :
89 0 : av1_highbd_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512],
90 : uv_block_width, &mv, scale, uv_block_width,
91 : uv_block_height, which_mv, interp_filter,
92 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
93 : &warp_types, x, y,
94 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
95 : 2, mv_precision_uv, x, y, xd);
96 0 : return;
97 : }
98 : #endif // CONFIG_HIGHBITDEPTH
99 0 : av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
100 : &conv_params, interp_filter,
101 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
102 : &warp_types, x, y, 0, 0,
103 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
104 : MV_PRECISION_Q3, x, y, xd);
105 :
106 0 : av1_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
107 : &mv, scale, uv_block_width, uv_block_height,
108 : &conv_params, interp_filter,
109 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
110 : &warp_types, x, y, 1, 0,
111 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
112 : mv_precision_uv, x, y, xd);
113 :
114 0 : av1_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
115 : &mv, scale, uv_block_width, uv_block_height,
116 : &conv_params, interp_filter,
117 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
118 : &warp_types, x, y, 2, 0,
119 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
120 : mv_precision_uv, x, y, xd);
121 : }
122 :
123 0 : void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
124 : uint8_t *frame2, unsigned int block_width,
125 : unsigned int block_height, int strength,
126 : int filter_weight, unsigned int *accumulator,
127 : uint16_t *count) {
128 : unsigned int i, j, k;
129 : int modifier;
130 0 : int byte = 0;
131 0 : const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
132 :
133 0 : for (i = 0, k = 0; i < block_height; i++) {
134 0 : for (j = 0; j < block_width; j++, k++) {
135 0 : int pixel_value = *frame2;
136 :
137 : // non-local mean approach
138 0 : int diff_sse[9] = { 0 };
139 0 : int idx, idy, index = 0;
140 :
141 0 : for (idy = -1; idy <= 1; ++idy) {
142 0 : for (idx = -1; idx <= 1; ++idx) {
143 0 : int row = (int)i + idy;
144 0 : int col = (int)j + idx;
145 :
146 0 : if (row >= 0 && row < (int)block_height && col >= 0 &&
147 0 : col < (int)block_width) {
148 0 : int diff = frame1[byte + idy * (int)stride + idx] -
149 0 : frame2[idy * (int)block_width + idx];
150 0 : diff_sse[index] = diff * diff;
151 0 : ++index;
152 : }
153 : }
154 : }
155 :
156 0 : assert(index > 0);
157 :
158 0 : modifier = 0;
159 0 : for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
160 :
161 0 : modifier *= 3;
162 0 : modifier /= index;
163 :
164 0 : ++frame2;
165 :
166 0 : modifier += rounding;
167 0 : modifier >>= strength;
168 :
169 0 : if (modifier > 16) modifier = 16;
170 :
171 0 : modifier = 16 - modifier;
172 0 : modifier *= filter_weight;
173 :
174 0 : count[k] += modifier;
175 0 : accumulator[k] += modifier * pixel_value;
176 :
177 0 : byte++;
178 : }
179 :
180 0 : byte += stride - block_width;
181 : }
182 0 : }
183 :
184 : #if CONFIG_HIGHBITDEPTH
185 0 : void av1_highbd_temporal_filter_apply_c(
186 : uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
187 : unsigned int block_width, unsigned int block_height, int strength,
188 : int filter_weight, unsigned int *accumulator, uint16_t *count) {
189 0 : uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
190 0 : uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
191 : unsigned int i, j, k;
192 : int modifier;
193 0 : int byte = 0;
194 0 : const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
195 :
196 0 : for (i = 0, k = 0; i < block_height; i++) {
197 0 : for (j = 0; j < block_width; j++, k++) {
198 0 : int pixel_value = *frame2;
199 :
200 : // non-local mean approach
201 0 : int diff_sse[9] = { 0 };
202 0 : int idx, idy, index = 0;
203 :
204 0 : for (idy = -1; idy <= 1; ++idy) {
205 0 : for (idx = -1; idx <= 1; ++idx) {
206 0 : int row = (int)i + idy;
207 0 : int col = (int)j + idx;
208 :
209 0 : if (row >= 0 && row < (int)block_height && col >= 0 &&
210 0 : col < (int)block_width) {
211 0 : int diff = frame1[byte + idy * (int)stride + idx] -
212 0 : frame2[idy * (int)block_width + idx];
213 0 : diff_sse[index] = diff * diff;
214 0 : ++index;
215 : }
216 : }
217 : }
218 :
219 0 : assert(index > 0);
220 :
221 0 : modifier = 0;
222 0 : for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
223 :
224 0 : modifier *= 3;
225 0 : modifier /= index;
226 :
227 0 : ++frame2;
228 :
229 0 : modifier += rounding;
230 0 : modifier >>= strength;
231 :
232 0 : if (modifier > 16) modifier = 16;
233 :
234 0 : modifier = 16 - modifier;
235 0 : modifier *= filter_weight;
236 :
237 0 : count[k] += modifier;
238 0 : accumulator[k] += modifier * pixel_value;
239 :
240 0 : byte++;
241 : }
242 :
243 0 : byte += stride - block_width;
244 : }
245 0 : }
246 : #endif // CONFIG_HIGHBITDEPTH
247 :
248 0 : static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
249 : uint8_t *arf_frame_buf,
250 : uint8_t *frame_ptr_buf,
251 : int stride) {
252 0 : MACROBLOCK *const x = &cpi->td.mb;
253 0 : MACROBLOCKD *const xd = &x->e_mbd;
254 0 : const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
255 : int step_param;
256 0 : int sadpb = x->sadperbit16;
257 0 : int bestsme = INT_MAX;
258 : int distortion;
259 : unsigned int sse;
260 : int cost_list[5];
261 0 : MvLimits tmp_mv_limits = x->mv_limits;
262 :
263 0 : MV best_ref_mv1 = { 0, 0 };
264 : MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
265 :
266 : // Save input state
267 0 : struct buf_2d src = x->plane[0].src;
268 0 : struct buf_2d pre = xd->plane[0].pre[0];
269 :
270 0 : best_ref_mv1_full.col = best_ref_mv1.col >> 3;
271 0 : best_ref_mv1_full.row = best_ref_mv1.row >> 3;
272 :
273 : // Setup frame pointers
274 0 : x->plane[0].src.buf = arf_frame_buf;
275 0 : x->plane[0].src.stride = stride;
276 0 : xd->plane[0].pre[0].buf = frame_ptr_buf;
277 0 : xd->plane[0].pre[0].stride = stride;
278 :
279 0 : step_param = mv_sf->reduce_first_step_size;
280 0 : step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
281 :
282 0 : av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
283 :
284 0 : x->mvcost = x->mv_cost_stack[0];
285 0 : x->nmvjointcost = x->nmv_vec_cost[0];
286 :
287 : // Use mv costing from x->mvcost directly
288 0 : av1_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
289 0 : cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0,
290 : &best_ref_mv1);
291 :
292 0 : x->mv_limits = tmp_mv_limits;
293 :
294 : // Ignore mv costing by sending NULL pointer instead of cost array
295 0 : bestsme = cpi->find_fractional_mv_step(
296 : x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
297 0 : &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
298 : cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
299 : #if CONFIG_EXT_INTER
300 : NULL, 0, 0,
301 : #endif
302 : 0, 0, 0);
303 :
304 0 : x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv;
305 :
306 : // Restore input state
307 0 : x->plane[0].src = src;
308 0 : xd->plane[0].pre[0] = pre;
309 :
310 0 : return bestsme;
311 : }
312 :
313 0 : static void temporal_filter_iterate_c(AV1_COMP *cpi,
314 : YV12_BUFFER_CONFIG **frames,
315 : int frame_count, int alt_ref_index,
316 : int strength,
317 : struct scale_factors *scale) {
318 : int byte;
319 : int frame;
320 : int mb_col, mb_row;
321 : unsigned int filter_weight;
322 0 : int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
323 0 : int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
324 0 : int mb_y_offset = 0;
325 0 : int mb_uv_offset = 0;
326 : DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
327 : DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
328 0 : MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
329 0 : YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
330 : uint8_t *dst1, *dst2;
331 : #if CONFIG_HIGHBITDEPTH
332 : DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
333 : DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
334 : uint8_t *predictor;
335 : #else
336 : DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
337 : #endif
338 0 : const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
339 0 : const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
340 :
341 : // Save input state
342 : uint8_t *input_buffer[MAX_MB_PLANE];
343 : int i;
344 : #if CONFIG_HIGHBITDEPTH
345 0 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
346 0 : predictor = CONVERT_TO_BYTEPTR(predictor16);
347 : } else {
348 0 : predictor = predictor8;
349 : }
350 : #endif
351 :
352 0 : for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
353 :
354 0 : for (mb_row = 0; mb_row < mb_rows; mb_row++) {
355 : // Source frames are extended to 16 pixels. This is different than
356 : // L/A/G reference frames that have a border of 32 (AV1ENCBORDERINPIXELS)
357 : // A 6/8 tap filter is used for motion search. This requires 2 pixels
358 : // before and 3 pixels after. So the largest Y mv on a border would
359 : // then be 16 - AOM_INTERP_EXTEND. The UV blocks are half the size of the
360 : // Y and therefore only extended by 8. The largest mv that a UV block
361 : // can support is 8 - AOM_INTERP_EXTEND. A UV mv is half of a Y mv.
362 : // (16 - AOM_INTERP_EXTEND) >> 1 which is greater than
363 : // 8 - AOM_INTERP_EXTEND.
364 : // To keep the mv in play for both Y and UV planes the max that it
365 : // can be on a border is therefore 16 - (2*AOM_INTERP_EXTEND+1).
366 0 : cpi->td.mb.mv_limits.row_min =
367 0 : -((mb_row * 16) + (17 - 2 * AOM_INTERP_EXTEND));
368 0 : cpi->td.mb.mv_limits.row_max =
369 0 : ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
370 :
371 0 : for (mb_col = 0; mb_col < mb_cols; mb_col++) {
372 : int j, k;
373 : int stride;
374 :
375 0 : memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
376 0 : memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
377 :
378 0 : cpi->td.mb.mv_limits.col_min =
379 0 : -((mb_col * 16) + (17 - 2 * AOM_INTERP_EXTEND));
380 0 : cpi->td.mb.mv_limits.col_max =
381 0 : ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
382 :
383 0 : for (frame = 0; frame < frame_count; frame++) {
384 0 : const int thresh_low = 10000;
385 0 : const int thresh_high = 20000;
386 :
387 0 : if (frames[frame] == NULL) continue;
388 :
389 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
390 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
391 :
392 0 : if (frame == alt_ref_index) {
393 0 : filter_weight = 2;
394 : } else {
395 : // Find best match in this frame by MC
396 0 : int err = temporal_filter_find_matching_mb_c(
397 0 : cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
398 0 : frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
399 :
400 : // Assign higher weight to matching MB if it's error
401 : // score is lower. If not applying MC default behavior
402 : // is to weight all MBs equal.
403 0 : filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
404 : }
405 :
406 0 : if (filter_weight != 0) {
407 : // Construct the predictors
408 0 : temporal_filter_predictors_mb_c(
409 0 : mbd, frames[frame]->y_buffer + mb_y_offset,
410 0 : frames[frame]->u_buffer + mb_uv_offset,
411 0 : frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
412 0 : mb_uv_width, mb_uv_height, mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
413 0 : mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
414 : mb_col * 16, mb_row * 16);
415 :
416 : #if CONFIG_HIGHBITDEPTH
417 0 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
418 0 : int adj_strength = strength + 2 * (mbd->bd - 8);
419 : // Apply the filter (YUV)
420 0 : av1_highbd_temporal_filter_apply(
421 0 : f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
422 : adj_strength, filter_weight, accumulator, count);
423 0 : av1_highbd_temporal_filter_apply(
424 0 : f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
425 : mb_uv_width, mb_uv_height, adj_strength, filter_weight,
426 : accumulator + 256, count + 256);
427 0 : av1_highbd_temporal_filter_apply(
428 0 : f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
429 : mb_uv_width, mb_uv_height, adj_strength, filter_weight,
430 : accumulator + 512, count + 512);
431 : } else {
432 : // Apply the filter (YUV)
433 0 : av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
434 : predictor, 16, 16, strength,
435 : filter_weight, accumulator, count);
436 0 : av1_temporal_filter_apply_c(
437 0 : f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
438 : mb_uv_width, mb_uv_height, strength, filter_weight,
439 : accumulator + 256, count + 256);
440 0 : av1_temporal_filter_apply_c(
441 0 : f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
442 : mb_uv_width, mb_uv_height, strength, filter_weight,
443 : accumulator + 512, count + 512);
444 : }
445 : #else
446 : // Apply the filter (YUV)
447 : av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
448 : predictor, 16, 16, strength,
449 : filter_weight, accumulator, count);
450 : av1_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
451 : predictor + 256, mb_uv_width,
452 : mb_uv_height, strength, filter_weight,
453 : accumulator + 256, count + 256);
454 : av1_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
455 : predictor + 512, mb_uv_width,
456 : mb_uv_height, strength, filter_weight,
457 : accumulator + 512, count + 512);
458 : #endif // CONFIG_HIGHBITDEPTH
459 : }
460 : }
461 :
462 : #if CONFIG_HIGHBITDEPTH
463 0 : if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
464 : uint16_t *dst1_16;
465 : uint16_t *dst2_16;
466 : // Normalize filter output to produce AltRef frame
467 0 : dst1 = cpi->alt_ref_buffer.y_buffer;
468 0 : dst1_16 = CONVERT_TO_SHORTPTR(dst1);
469 0 : stride = cpi->alt_ref_buffer.y_stride;
470 0 : byte = mb_y_offset;
471 0 : for (i = 0, k = 0; i < 16; i++) {
472 0 : for (j = 0; j < 16; j++, k++) {
473 0 : dst1_16[byte] =
474 0 : (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
475 :
476 : // move to next pixel
477 0 : byte++;
478 : }
479 :
480 0 : byte += stride - 16;
481 : }
482 :
483 0 : dst1 = cpi->alt_ref_buffer.u_buffer;
484 0 : dst2 = cpi->alt_ref_buffer.v_buffer;
485 0 : dst1_16 = CONVERT_TO_SHORTPTR(dst1);
486 0 : dst2_16 = CONVERT_TO_SHORTPTR(dst2);
487 0 : stride = cpi->alt_ref_buffer.uv_stride;
488 0 : byte = mb_uv_offset;
489 0 : for (i = 0, k = 256; i < mb_uv_height; i++) {
490 0 : for (j = 0; j < mb_uv_width; j++, k++) {
491 0 : int m = k + 256;
492 :
493 : // U
494 0 : dst1_16[byte] =
495 0 : (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
496 :
497 : // V
498 0 : dst2_16[byte] =
499 0 : (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
500 :
501 : // move to next pixel
502 0 : byte++;
503 : }
504 :
505 0 : byte += stride - mb_uv_width;
506 : }
507 : } else {
508 : // Normalize filter output to produce AltRef frame
509 0 : dst1 = cpi->alt_ref_buffer.y_buffer;
510 0 : stride = cpi->alt_ref_buffer.y_stride;
511 0 : byte = mb_y_offset;
512 0 : for (i = 0, k = 0; i < 16; i++) {
513 0 : for (j = 0; j < 16; j++, k++) {
514 0 : dst1[byte] =
515 0 : (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
516 :
517 : // move to next pixel
518 0 : byte++;
519 : }
520 0 : byte += stride - 16;
521 : }
522 :
523 0 : dst1 = cpi->alt_ref_buffer.u_buffer;
524 0 : dst2 = cpi->alt_ref_buffer.v_buffer;
525 0 : stride = cpi->alt_ref_buffer.uv_stride;
526 0 : byte = mb_uv_offset;
527 0 : for (i = 0, k = 256; i < mb_uv_height; i++) {
528 0 : for (j = 0; j < mb_uv_width; j++, k++) {
529 0 : int m = k + 256;
530 :
531 : // U
532 0 : dst1[byte] =
533 0 : (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
534 :
535 : // V
536 0 : dst2[byte] =
537 0 : (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
538 :
539 : // move to next pixel
540 0 : byte++;
541 : }
542 0 : byte += stride - mb_uv_width;
543 : }
544 : }
545 : #else
546 : // Normalize filter output to produce AltRef frame
547 : dst1 = cpi->alt_ref_buffer.y_buffer;
548 : stride = cpi->alt_ref_buffer.y_stride;
549 : byte = mb_y_offset;
550 : for (i = 0, k = 0; i < 16; i++) {
551 : for (j = 0; j < 16; j++, k++) {
552 : dst1[byte] =
553 : (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
554 :
555 : // move to next pixel
556 : byte++;
557 : }
558 : byte += stride - 16;
559 : }
560 :
561 : dst1 = cpi->alt_ref_buffer.u_buffer;
562 : dst2 = cpi->alt_ref_buffer.v_buffer;
563 : stride = cpi->alt_ref_buffer.uv_stride;
564 : byte = mb_uv_offset;
565 : for (i = 0, k = 256; i < mb_uv_height; i++) {
566 : for (j = 0; j < mb_uv_width; j++, k++) {
567 : int m = k + 256;
568 :
569 : // U
570 : dst1[byte] =
571 : (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
572 :
573 : // V
574 : dst2[byte] =
575 : (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
576 :
577 : // move to next pixel
578 : byte++;
579 : }
580 : byte += stride - mb_uv_width;
581 : }
582 : #endif // CONFIG_HIGHBITDEPTH
583 0 : mb_y_offset += 16;
584 0 : mb_uv_offset += mb_uv_width;
585 : }
586 0 : mb_y_offset += 16 * (f->y_stride - mb_cols);
587 0 : mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
588 : }
589 :
590 : // Restore input state
591 0 : for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
592 0 : }
593 :
594 : // Apply buffer limits and context specific adjustments to arnr filter.
595 0 : static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
596 : int *arnr_frames, int *arnr_strength) {
597 0 : const AV1EncoderConfig *const oxcf = &cpi->oxcf;
598 0 : const int frames_after_arf =
599 0 : av1_lookahead_depth(cpi->lookahead) - distance - 1;
600 0 : int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
601 : int frames_bwd;
602 : int q, frames, strength;
603 :
604 : // Define the forward and backwards filter limits for this arnr group.
605 0 : if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
606 0 : if (frames_fwd > distance) frames_fwd = distance;
607 :
608 0 : frames_bwd = frames_fwd;
609 :
610 : // For even length filter there is one more frame backward
611 : // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
612 0 : if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
613 :
614 : // Set the baseline active filter size.
615 0 : frames = frames_bwd + 1 + frames_fwd;
616 :
617 : // Adjust the strength based on active max q.
618 0 : if (cpi->common.current_video_frame > 1)
619 0 : q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
620 : cpi->common.bit_depth));
621 : else
622 0 : q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
623 : cpi->common.bit_depth));
624 0 : if (q > 16) {
625 0 : strength = oxcf->arnr_strength;
626 : } else {
627 0 : strength = oxcf->arnr_strength - ((16 - q) / 2);
628 0 : if (strength < 0) strength = 0;
629 : }
630 :
631 : // Adjust number of frames in filter and strength based on gf boost level.
632 0 : if (frames > group_boost / 150) {
633 0 : frames = group_boost / 150;
634 0 : frames += !(frames & 1);
635 : }
636 :
637 0 : if (strength > group_boost / 300) {
638 0 : strength = group_boost / 300;
639 : }
640 :
641 : // Adjustments for second level arf in multi arf case.
642 0 : if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
643 0 : const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
644 0 : if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
645 0 : strength >>= 1;
646 : }
647 : }
648 :
649 0 : *arnr_frames = frames;
650 0 : *arnr_strength = strength;
651 0 : }
652 :
653 0 : void av1_temporal_filter(AV1_COMP *cpi, int distance) {
654 0 : RATE_CONTROL *const rc = &cpi->rc;
655 : int frame;
656 : int frames_to_blur;
657 : int start_frame;
658 : int strength;
659 : int frames_to_blur_backward;
660 : int frames_to_blur_forward;
661 : struct scale_factors sf;
662 0 : YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
663 : #if CONFIG_EXT_REFS
664 0 : const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
665 : #endif
666 :
667 : // Apply context specific adjustments to the arnr filter parameters.
668 0 : adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
669 : // TODO(weitinglin): Currently, we enforce the filtering strength on
670 : // extra ARFs' to be zeros. We should investigate in which
671 : // case it is more beneficial to use non-zero strength
672 : // filtering.
673 : #if CONFIG_EXT_REFS
674 0 : if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
675 0 : strength = 0;
676 0 : frames_to_blur = 1;
677 : }
678 : #endif
679 :
680 : #if CONFIG_EXT_REFS
681 0 : if (strength == 0 && frames_to_blur == 1) {
682 0 : cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
683 : } else {
684 0 : cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 0;
685 : }
686 : #endif
687 :
688 0 : frames_to_blur_backward = (frames_to_blur / 2);
689 0 : frames_to_blur_forward = ((frames_to_blur - 1) / 2);
690 0 : start_frame = distance + frames_to_blur_forward;
691 :
692 : // Setup frame pointers, NULL indicates frame not included in filter.
693 0 : for (frame = 0; frame < frames_to_blur; ++frame) {
694 0 : const int which_buffer = start_frame - frame;
695 0 : struct lookahead_entry *buf =
696 0 : av1_lookahead_peek(cpi->lookahead, which_buffer);
697 0 : frames[frames_to_blur - 1 - frame] = &buf->img;
698 : }
699 :
700 0 : if (frames_to_blur > 0) {
701 : // Setup scaling factors. Scaling on each of the arnr frames is not
702 : // supported.
703 : // ARF is produced at the native frame size and resized when coded.
704 : #if CONFIG_HIGHBITDEPTH
705 0 : av1_setup_scale_factors_for_frame(
706 0 : &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
707 0 : frames[0]->y_crop_width, frames[0]->y_crop_height,
708 : cpi->common.use_highbitdepth);
709 : #else
710 : av1_setup_scale_factors_for_frame(
711 : &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
712 : frames[0]->y_crop_width, frames[0]->y_crop_height);
713 : #endif // CONFIG_HIGHBITDEPTH
714 : }
715 :
716 0 : temporal_filter_iterate_c(cpi, frames, frames_to_blur,
717 : frames_to_blur_backward, strength, &sf);
718 0 : }
|