Line data Source code
1 : /*
2 : * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "./vpx_config.h"
12 : #include "./vp9_rtcd.h"
13 : #include "./vpx_dsp_rtcd.h"
14 : #include "./vpx_scale_rtcd.h"
15 :
16 : #include "vp9/common/vp9_onyxc_int.h"
17 : #include "vp9/common/vp9_postproc.h"
18 :
19 : // TODO(jackychen): Replace this function with SSE2 code. There is
20 : // one SSE2 implementation in vp8, so will consider how to share it
21 : // between vp8 and vp9.
22 0 : static void filter_by_weight(const uint8_t *src, int src_stride, uint8_t *dst,
23 : int dst_stride, int block_size, int src_weight) {
24 0 : const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
25 0 : const int rounding_bit = 1 << (MFQE_PRECISION - 1);
26 : int r, c;
27 :
28 0 : for (r = 0; r < block_size; r++) {
29 0 : for (c = 0; c < block_size; c++) {
30 0 : dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
31 : MFQE_PRECISION;
32 : }
33 0 : src += src_stride;
34 0 : dst += dst_stride;
35 : }
36 0 : }
37 :
38 0 : void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst,
39 : int dst_stride, int src_weight) {
40 0 : filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
41 0 : }
42 :
43 0 : void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
44 : uint8_t *dst, int dst_stride, int src_weight) {
45 0 : filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
46 0 : }
47 :
48 0 : static void filter_by_weight32x32(const uint8_t *src, int src_stride,
49 : uint8_t *dst, int dst_stride, int weight) {
50 0 : vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
51 0 : vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, weight);
52 0 : vp9_filter_by_weight16x16(src + src_stride * 16, src_stride,
53 0 : dst + dst_stride * 16, dst_stride, weight);
54 0 : vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
55 0 : dst + dst_stride * 16 + 16, dst_stride, weight);
56 0 : }
57 :
58 0 : static void filter_by_weight64x64(const uint8_t *src, int src_stride,
59 : uint8_t *dst, int dst_stride, int weight) {
60 0 : filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
61 0 : filter_by_weight32x32(src + 32, src_stride, dst + 32, dst_stride, weight);
62 0 : filter_by_weight32x32(src + src_stride * 32, src_stride,
63 0 : dst + dst_stride * 32, dst_stride, weight);
64 0 : filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
65 0 : dst + dst_stride * 32 + 32, dst_stride, weight);
66 0 : }
67 :
68 0 : static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
69 : int yd_stride, const uint8_t *u, const uint8_t *v,
70 : int uv_stride, uint8_t *ud, uint8_t *vd,
71 : int uvd_stride, BLOCK_SIZE block_size, int weight) {
72 0 : if (block_size == BLOCK_16X16) {
73 0 : vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
74 0 : vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
75 0 : vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
76 0 : } else if (block_size == BLOCK_32X32) {
77 0 : filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
78 0 : vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
79 0 : vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
80 0 : } else if (block_size == BLOCK_64X64) {
81 0 : filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
82 0 : filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
83 0 : filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
84 : }
85 0 : }
86 :
87 : // TODO(jackychen): Determine whether replace it with assembly code.
88 0 : static void copy_mem8x8(const uint8_t *src, int src_stride, uint8_t *dst,
89 : int dst_stride) {
90 : int r;
91 0 : for (r = 0; r < 8; r++) {
92 0 : memcpy(dst, src, 8);
93 0 : src += src_stride;
94 0 : dst += dst_stride;
95 : }
96 0 : }
97 :
98 0 : static void copy_mem16x16(const uint8_t *src, int src_stride, uint8_t *dst,
99 : int dst_stride) {
100 : int r;
101 0 : for (r = 0; r < 16; r++) {
102 0 : memcpy(dst, src, 16);
103 0 : src += src_stride;
104 0 : dst += dst_stride;
105 : }
106 0 : }
107 :
108 0 : static void copy_mem32x32(const uint8_t *src, int src_stride, uint8_t *dst,
109 : int dst_stride) {
110 0 : copy_mem16x16(src, src_stride, dst, dst_stride);
111 0 : copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
112 0 : copy_mem16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16,
113 : dst_stride);
114 0 : copy_mem16x16(src + src_stride * 16 + 16, src_stride,
115 0 : dst + dst_stride * 16 + 16, dst_stride);
116 0 : }
117 :
118 0 : static void copy_mem64x64(const uint8_t *src, int src_stride, uint8_t *dst,
119 : int dst_stride) {
120 0 : copy_mem32x32(src, src_stride, dst, dst_stride);
121 0 : copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
122 0 : copy_mem32x32(src + src_stride * 32, src_stride, dst + src_stride * 32,
123 : dst_stride);
124 0 : copy_mem32x32(src + src_stride * 32 + 32, src_stride,
125 0 : dst + src_stride * 32 + 32, dst_stride);
126 0 : }
127 :
128 0 : static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
129 : int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
130 : uint8_t *vd, int yd_stride, int uvd_stride,
131 : BLOCK_SIZE bs) {
132 0 : if (bs == BLOCK_16X16) {
133 0 : copy_mem16x16(y, y_stride, yd, yd_stride);
134 0 : copy_mem8x8(u, uv_stride, ud, uvd_stride);
135 0 : copy_mem8x8(v, uv_stride, vd, uvd_stride);
136 0 : } else if (bs == BLOCK_32X32) {
137 0 : copy_mem32x32(y, y_stride, yd, yd_stride);
138 0 : copy_mem16x16(u, uv_stride, ud, uvd_stride);
139 0 : copy_mem16x16(v, uv_stride, vd, uvd_stride);
140 : } else {
141 0 : copy_mem64x64(y, y_stride, yd, yd_stride);
142 0 : copy_mem32x32(u, uv_stride, ud, uvd_stride);
143 0 : copy_mem32x32(v, uv_stride, vd, uvd_stride);
144 : }
145 0 : }
146 :
147 0 : static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
148 0 : const int adj = qdiff >> MFQE_PRECISION;
149 0 : if (bs == BLOCK_16X16) {
150 0 : *sad_thr = 7 + adj;
151 0 : } else if (bs == BLOCK_32X32) {
152 0 : *sad_thr = 6 + adj;
153 : } else { // BLOCK_64X64
154 0 : *sad_thr = 5 + adj;
155 : }
156 0 : *vdiff_thr = 125 + qdiff;
157 0 : }
158 :
159 0 : static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
160 : const uint8_t *v, int y_stride, int uv_stride,
161 : uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
162 : int uvd_stride, int qdiff) {
163 : int sad, sad_thr, vdiff, vdiff_thr;
164 : uint32_t sse;
165 :
166 0 : get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
167 :
168 0 : if (bs == BLOCK_16X16) {
169 0 : vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
170 0 : sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
171 0 : } else if (bs == BLOCK_32X32) {
172 0 : vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
173 0 : sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
174 : } else /* if (bs == BLOCK_64X64) */ {
175 0 : vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
176 0 : sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
177 : }
178 :
179 : // vdiff > sad * 3 means vdiff should not be too small, otherwise,
180 : // it might be a lighting change in smooth area. When there is a
181 : // lighting change in smooth area, it is dangerous to do MFQE.
182 0 : if (sad > 1 && vdiff > sad * 3) {
183 0 : const int weight = 1 << MFQE_PRECISION;
184 0 : int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
185 : // When ifactor equals weight, no MFQE is done.
186 0 : if (ifactor > weight) {
187 0 : ifactor = weight;
188 : }
189 0 : apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
190 : uvd_stride, bs, ifactor);
191 : } else {
192 : // Copy the block from current frame (i.e., no mfqe is done).
193 0 : copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride,
194 : bs);
195 : }
196 0 : }
197 :
198 0 : static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
199 : // Check the motion in current block(for inter frame),
200 : // or check the motion in the correlated block in last frame (for keyframe).
201 0 : const int mv_len_square = mi->mv[0].as_mv.row * mi->mv[0].as_mv.row +
202 0 : mi->mv[0].as_mv.col * mi->mv[0].as_mv.col;
203 0 : const int mv_threshold = 100;
204 0 : return mi->mode >= NEARESTMV && // Not an intra block
205 0 : cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold;
206 : }
207 :
208 : // Process each partiton in a super block, recursively.
209 0 : static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
210 : const uint8_t *y, const uint8_t *u, const uint8_t *v,
211 : int y_stride, int uv_stride, uint8_t *yd,
212 : uint8_t *ud, uint8_t *vd, int yd_stride,
213 : int uvd_stride) {
214 : int mi_offset, y_offset, uv_offset;
215 0 : const BLOCK_SIZE cur_bs = mi->sb_type;
216 0 : const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
217 0 : const int bsl = b_width_log2_lookup[bs];
218 0 : PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
219 0 : const BLOCK_SIZE subsize = get_subsize(bs, partition);
220 :
221 0 : if (cur_bs < BLOCK_8X8) {
222 : // If there are blocks smaller than 8x8, it must be on the boundary.
223 0 : return;
224 : }
225 : // No MFQE on blocks smaller than 16x16
226 0 : if (bs == BLOCK_16X16) {
227 0 : partition = PARTITION_NONE;
228 : }
229 0 : if (bs == BLOCK_64X64) {
230 0 : mi_offset = 4;
231 0 : y_offset = 32;
232 0 : uv_offset = 16;
233 : } else {
234 0 : mi_offset = 2;
235 0 : y_offset = 16;
236 0 : uv_offset = 8;
237 : }
238 0 : switch (partition) {
239 : BLOCK_SIZE mfqe_bs, bs_tmp;
240 : case PARTITION_HORZ:
241 0 : if (bs == BLOCK_64X64) {
242 0 : mfqe_bs = BLOCK_64X32;
243 0 : bs_tmp = BLOCK_32X32;
244 : } else {
245 0 : mfqe_bs = BLOCK_32X16;
246 0 : bs_tmp = BLOCK_16X16;
247 : }
248 0 : if (mfqe_decision(mi, mfqe_bs)) {
249 : // Do mfqe on the first square partition.
250 0 : mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
251 : uvd_stride, qdiff);
252 : // Do mfqe on the second square partition.
253 0 : mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
254 : uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
255 : yd_stride, uvd_stride, qdiff);
256 : }
257 0 : if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
258 : // Do mfqe on the first square partition.
259 0 : mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
260 0 : v + uv_offset * uv_stride, y_stride, uv_stride,
261 0 : yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
262 0 : vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
263 : // Do mfqe on the second square partition.
264 0 : mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
265 0 : u + uv_offset * uv_stride + uv_offset,
266 0 : v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
267 0 : yd + y_offset * yd_stride + y_offset,
268 0 : ud + uv_offset * uvd_stride + uv_offset,
269 0 : vd + uv_offset * uvd_stride + uv_offset, yd_stride,
270 : uvd_stride, qdiff);
271 : }
272 0 : break;
273 : case PARTITION_VERT:
274 0 : if (bs == BLOCK_64X64) {
275 0 : mfqe_bs = BLOCK_32X64;
276 0 : bs_tmp = BLOCK_32X32;
277 : } else {
278 0 : mfqe_bs = BLOCK_16X32;
279 0 : bs_tmp = BLOCK_16X16;
280 : }
281 0 : if (mfqe_decision(mi, mfqe_bs)) {
282 : // Do mfqe on the first square partition.
283 0 : mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
284 : uvd_stride, qdiff);
285 : // Do mfqe on the second square partition.
286 0 : mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
287 0 : v + uv_offset * uv_stride, y_stride, uv_stride,
288 0 : yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
289 0 : vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
290 : }
291 0 : if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
292 : // Do mfqe on the first square partition.
293 0 : mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
294 : uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
295 : yd_stride, uvd_stride, qdiff);
296 : // Do mfqe on the second square partition.
297 0 : mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
298 0 : u + uv_offset * uv_stride + uv_offset,
299 0 : v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
300 0 : yd + y_offset * yd_stride + y_offset,
301 0 : ud + uv_offset * uvd_stride + uv_offset,
302 0 : vd + uv_offset * uvd_stride + uv_offset, yd_stride,
303 : uvd_stride, qdiff);
304 : }
305 0 : break;
306 : case PARTITION_NONE:
307 0 : if (mfqe_decision(mi, cur_bs)) {
308 : // Do mfqe on this partition.
309 0 : mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
310 : uvd_stride, qdiff);
311 : } else {
312 : // Copy the block from current frame(i.e., no mfqe is done).
313 0 : copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
314 : uvd_stride, bs);
315 : }
316 0 : break;
317 : case PARTITION_SPLIT:
318 : // Recursion on four square partitions, e.g. if bs is 64X64,
319 : // then look into four 32X32 blocks in it.
320 0 : mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
321 : yd_stride, uvd_stride);
322 0 : mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
323 : v + uv_offset, y_stride, uv_stride, yd + y_offset,
324 : ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
325 0 : mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
326 0 : y + y_offset * y_stride, u + uv_offset * uv_stride,
327 0 : v + uv_offset * uv_stride, y_stride, uv_stride,
328 0 : yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
329 0 : vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
330 0 : mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, subsize,
331 0 : y + y_offset * y_stride + y_offset,
332 0 : u + uv_offset * uv_stride + uv_offset,
333 0 : v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
334 0 : yd + y_offset * yd_stride + y_offset,
335 0 : ud + uv_offset * uvd_stride + uv_offset,
336 0 : vd + uv_offset * uvd_stride + uv_offset, yd_stride,
337 : uvd_stride);
338 0 : break;
339 0 : default: assert(0);
340 : }
341 : }
342 :
343 0 : void vp9_mfqe(VP9_COMMON *cm) {
344 : int mi_row, mi_col;
345 : // Current decoded frame.
346 0 : const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
347 : // Last decoded frame and will store the MFQE result.
348 0 : YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
349 : // Loop through each super block.
350 0 : for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
351 0 : for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
352 : MODE_INFO *mi;
353 0 : MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
354 : // Motion Info in last frame.
355 0 : MODE_INFO *mi_prev =
356 0 : cm->postproc_state.prev_mi + (mi_row * cm->mi_stride + mi_col);
357 0 : const uint32_t y_stride = show->y_stride;
358 0 : const uint32_t uv_stride = show->uv_stride;
359 0 : const uint32_t yd_stride = dest->y_stride;
360 0 : const uint32_t uvd_stride = dest->uv_stride;
361 0 : const uint32_t row_offset_y = mi_row << 3;
362 0 : const uint32_t row_offset_uv = mi_row << 2;
363 0 : const uint32_t col_offset_y = mi_col << 3;
364 0 : const uint32_t col_offset_uv = mi_col << 2;
365 0 : const uint8_t *y =
366 0 : show->y_buffer + row_offset_y * y_stride + col_offset_y;
367 0 : const uint8_t *u =
368 0 : show->u_buffer + row_offset_uv * uv_stride + col_offset_uv;
369 0 : const uint8_t *v =
370 0 : show->v_buffer + row_offset_uv * uv_stride + col_offset_uv;
371 0 : uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
372 0 : uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + col_offset_uv;
373 0 : uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + col_offset_uv;
374 0 : if (frame_is_intra_only(cm)) {
375 0 : mi = mi_prev;
376 : } else {
377 0 : mi = mi_local;
378 : }
379 0 : mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
380 : vd, yd_stride, uvd_stride);
381 : }
382 : }
383 0 : }
|