Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 :
14 : #include "./aom_scale_rtcd.h"
15 : #include "./aom_dsp_rtcd.h"
16 : #include "./aom_config.h"
17 :
18 : #include "aom/aom_integer.h"
19 : #include "aom_dsp/blend.h"
20 :
21 : #include "av1/common/blockd.h"
22 : #include "av1/common/reconinter.h"
23 : #include "av1/common/reconintra.h"
24 : #if CONFIG_MOTION_VAR
25 : #include "av1/common/onyxc_int.h"
26 : #endif // CONFIG_MOTION_VAR
27 :
28 : #if CONFIG_EXT_INTER
29 :
30 : #define NSMOOTHERS 1
31 :
32 : // [smoother][negative][direction]
33 : DECLARE_ALIGNED(16, static uint8_t,
34 : wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
35 : [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
36 :
37 : DECLARE_ALIGNED(16, static uint8_t,
38 : wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]);
39 :
40 : // 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
41 : // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
42 : DECLARE_ALIGNED(16, static uint8_t,
43 : wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]);
44 :
45 : static wedge_masks_type wedge_masks[BLOCK_SIZES][2];
46 :
47 : // Some unused wedge codebooks left temporarily to facilitate experiments.
48 : // To be removed when settled.
49 : /*
50 : static wedge_code_type wedge_codebook_8_hgtw[8] = {
51 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
52 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
53 : { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
54 : { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
55 : };
56 :
57 : static wedge_code_type wedge_codebook_8_hltw[8] = {
58 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
59 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
60 : { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
61 : { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62 : };
63 :
64 : static wedge_code_type wedge_codebook_8_heqw[8] = {
65 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
66 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67 : { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
68 : { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
69 : };
70 :
71 : static const wedge_code_type wedge_codebook_32_hgtw[32] = {
72 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
73 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
74 : { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
75 : { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
76 : { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
77 : { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
78 : { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
79 : { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
80 : { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
81 : { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
82 : { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
83 : { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
84 : { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
85 : { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
86 : { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
87 : { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
88 : };
89 :
90 : static const wedge_code_type wedge_codebook_32_hltw[32] = {
91 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
92 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
93 : { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
94 : { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
95 : { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
96 : { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
97 : { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
98 : { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
99 : { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
100 : { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
101 : { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
102 : { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
103 : { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
104 : { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
105 : { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
106 : { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
107 : };
108 :
109 : static const wedge_code_type wedge_codebook_32_heqw[32] = {
110 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
111 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
112 : { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
113 : { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
114 : { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
115 : { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
116 : { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
117 : { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
118 : { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
119 : { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
120 : { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
121 : { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
122 : { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
123 : { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
124 : { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
125 : { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
126 : };
127 : */
128 :
129 : static const wedge_code_type wedge_codebook_16_hgtw[16] = {
130 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
131 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
132 : { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
133 : { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
134 : { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
135 : { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
136 : { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
137 : { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
138 : };
139 :
140 : static const wedge_code_type wedge_codebook_16_hltw[16] = {
141 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
142 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
143 : { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
144 : { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
145 : { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
146 : { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
147 : { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
148 : { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
149 : };
150 :
151 : static const wedge_code_type wedge_codebook_16_heqw[16] = {
152 : { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
153 : { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
154 : { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
155 : { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
156 : { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
157 : { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
158 : { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
159 : { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
160 : };
161 :
162 : const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
163 : #if CONFIG_CB4X4
164 : { 0, NULL, NULL, 0, NULL },
165 : { 0, NULL, NULL, 0, NULL },
166 : { 0, NULL, NULL, 0, NULL },
167 : #endif // CONFIG_CB4X4
168 : { 0, NULL, NULL, 0, NULL },
169 : { 0, NULL, NULL, 0, NULL },
170 : { 0, NULL, NULL, 0, NULL },
171 : #if CONFIG_WEDGE
172 : { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
173 : wedge_masks[BLOCK_8X8] },
174 : { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
175 : wedge_masks[BLOCK_8X16] },
176 : { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
177 : wedge_masks[BLOCK_16X8] },
178 : { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
179 : wedge_masks[BLOCK_16X16] },
180 : { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
181 : wedge_masks[BLOCK_16X32] },
182 : { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
183 : wedge_masks[BLOCK_32X16] },
184 : { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
185 : wedge_masks[BLOCK_32X32] },
186 : { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
187 : wedge_masks[BLOCK_32X64] },
188 : { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
189 : wedge_masks[BLOCK_64X32] },
190 : { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
191 : wedge_masks[BLOCK_64X64] },
192 : #else
193 : { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
194 : wedge_masks[BLOCK_8X8] },
195 : { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
196 : wedge_masks[BLOCK_8X16] },
197 : { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
198 : wedge_masks[BLOCK_16X8] },
199 : { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
200 : wedge_masks[BLOCK_16X16] },
201 : { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
202 : wedge_masks[BLOCK_16X32] },
203 : { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
204 : wedge_masks[BLOCK_32X16] },
205 : { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
206 : wedge_masks[BLOCK_32X32] },
207 : { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
208 : wedge_masks[BLOCK_32X64] },
209 : { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
210 : wedge_masks[BLOCK_64X32] },
211 : { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
212 : wedge_masks[BLOCK_64X64] },
213 : #endif // CONFIG_WEDGE
214 : #if CONFIG_EXT_PARTITION
215 : { 0, NULL, NULL, 0, NULL },
216 : { 0, NULL, NULL, 0, NULL },
217 : { 0, NULL, NULL, 0, NULL },
218 : #endif // CONFIG_EXT_PARTITION
219 : };
220 :
221 0 : static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
222 : BLOCK_SIZE sb_type) {
223 : const uint8_t *master;
224 0 : const int bh = block_size_high[sb_type];
225 0 : const int bw = block_size_wide[sb_type];
226 0 : const wedge_code_type *a =
227 0 : wedge_params_lookup[sb_type].codebook + wedge_index;
228 0 : const int smoother = wedge_params_lookup[sb_type].smoother;
229 : int woff, hoff;
230 0 : const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
231 :
232 0 : assert(wedge_index >= 0 &&
233 : wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
234 0 : woff = (a->x_offset * bw) >> 3;
235 0 : hoff = (a->y_offset * bh) >> 3;
236 0 : master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
237 0 : MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
238 0 : MASK_MASTER_SIZE / 2 - woff;
239 0 : return master;
240 : }
241 :
242 0 : const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
243 : BLOCK_SIZE sb_type, int offset_x,
244 : int offset_y) {
245 0 : const uint8_t *mask =
246 0 : get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
247 0 : if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
248 0 : return mask;
249 : }
250 :
251 : #if CONFIG_COMPOUND_SEGMENT
252 0 : static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
253 : int h, int w, int stride) {
254 : int i, j;
255 :
256 0 : for (i = 0; i < h; ++i)
257 0 : for (j = 0; j < w; ++j) {
258 0 : mask_inv_buffer[i * stride + j] =
259 0 : AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
260 : }
261 0 : return mask_inv_buffer;
262 : }
263 : #endif // CONFIG_COMPOUND_SEGMENT
264 :
265 0 : const uint8_t *av1_get_compound_type_mask_inverse(
266 : const INTERINTER_COMPOUND_DATA *const comp_data,
267 : #if CONFIG_COMPOUND_SEGMENT
268 : uint8_t *mask_buffer, int h, int w, int stride,
269 : #endif
270 : BLOCK_SIZE sb_type) {
271 0 : assert(is_masked_compound_type(comp_data->interinter_compound_type));
272 : (void)sb_type;
273 0 : switch (comp_data->interinter_compound_type) {
274 : #if CONFIG_WEDGE
275 : case COMPOUND_WEDGE:
276 0 : return av1_get_contiguous_soft_mask(comp_data->wedge_index,
277 0 : !comp_data->wedge_sign, sb_type);
278 : #endif // CONFIG_WEDGE
279 : #if CONFIG_COMPOUND_SEGMENT
280 : case COMPOUND_SEG:
281 0 : return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
282 : #endif // CONFIG_COMPOUND_SEGMENT
283 0 : default: assert(0); return NULL;
284 : }
285 : }
286 :
287 0 : const uint8_t *av1_get_compound_type_mask(
288 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
289 0 : assert(is_masked_compound_type(comp_data->interinter_compound_type));
290 : (void)sb_type;
291 0 : switch (comp_data->interinter_compound_type) {
292 : #if CONFIG_WEDGE
293 : case COMPOUND_WEDGE:
294 0 : return av1_get_contiguous_soft_mask(comp_data->wedge_index,
295 : comp_data->wedge_sign, sb_type);
296 : #endif // CONFIG_WEDGE
297 : #if CONFIG_COMPOUND_SEGMENT
298 0 : case COMPOUND_SEG: return comp_data->seg_mask;
299 : #endif // CONFIG_COMPOUND_SEGMENT
300 0 : default: assert(0); return NULL;
301 : }
302 : }
303 :
304 : #if CONFIG_COMPOUND_SEGMENT
305 : #if COMPOUND_SEGMENT_TYPE == 0
306 : static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
307 : int h, int w, int mask_val) {
308 : int i, j;
309 : int block_stride = block_size_wide[sb_type];
310 : for (i = 0; i < h; ++i)
311 : for (j = 0; j < w; ++j) {
312 : mask[i * block_stride + j] =
313 : which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
314 : }
315 : }
316 :
317 : void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
318 : const uint8_t *src0, int src0_stride,
319 : const uint8_t *src1, int src1_stride,
320 : BLOCK_SIZE sb_type, int h, int w) {
321 : (void)src0;
322 : (void)src1;
323 : (void)src0_stride;
324 : (void)src1_stride;
325 : switch (mask_type) {
326 : case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
327 : case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
328 : default: assert(0);
329 : }
330 : }
331 :
332 : #if CONFIG_HIGHBITDEPTH
333 : void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
334 : const uint8_t *src0, int src0_stride,
335 : const uint8_t *src1, int src1_stride,
336 : BLOCK_SIZE sb_type, int h, int w, int bd) {
337 : (void)src0;
338 : (void)src1;
339 : (void)src0_stride;
340 : (void)src1_stride;
341 : (void)bd;
342 : switch (mask_type) {
343 : case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
344 : case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
345 : default: assert(0);
346 : }
347 : }
348 : #endif // CONFIG_HIGHBITDEPTH
349 :
350 : #elif COMPOUND_SEGMENT_TYPE == 1
351 : #define DIFF_FACTOR 16
352 0 : static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
353 : const uint8_t *src0, int src0_stride,
354 : const uint8_t *src1, int src1_stride,
355 : BLOCK_SIZE sb_type, int h, int w) {
356 : int i, j, m, diff;
357 0 : int block_stride = block_size_wide[sb_type];
358 0 : for (i = 0; i < h; ++i) {
359 0 : for (j = 0; j < w; ++j) {
360 0 : diff =
361 0 : abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
362 0 : m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
363 0 : mask[i * block_stride + j] =
364 0 : which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
365 : }
366 : }
367 0 : }
368 :
369 0 : void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
370 : const uint8_t *src0, int src0_stride,
371 : const uint8_t *src1, int src1_stride,
372 : BLOCK_SIZE sb_type, int h, int w) {
373 0 : switch (mask_type) {
374 : case DIFFWTD_38:
375 0 : diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
376 : h, w);
377 0 : break;
378 : case DIFFWTD_38_INV:
379 0 : diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
380 : h, w);
381 0 : break;
382 0 : default: assert(0);
383 : }
384 0 : }
385 :
386 : #if CONFIG_HIGHBITDEPTH
387 0 : static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
388 : const uint16_t *src0, int src0_stride,
389 : const uint16_t *src1, int src1_stride,
390 : BLOCK_SIZE sb_type, int h, int w, int bd) {
391 : int i, j, m, diff;
392 0 : int block_stride = block_size_wide[sb_type];
393 0 : for (i = 0; i < h; ++i) {
394 0 : for (j = 0; j < w; ++j) {
395 0 : diff = abs((int)src0[i * src0_stride + j] -
396 0 : (int)src1[i * src1_stride + j]) >>
397 0 : (bd - 8);
398 0 : m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
399 0 : mask[i * block_stride + j] =
400 0 : which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
401 : }
402 : }
403 0 : }
404 :
405 0 : void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
406 : const uint8_t *src0, int src0_stride,
407 : const uint8_t *src1, int src1_stride,
408 : BLOCK_SIZE sb_type, int h, int w, int bd) {
409 0 : switch (mask_type) {
410 : case DIFFWTD_38:
411 0 : diffwtd_mask_highbd(mask, 0, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
412 0 : CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
413 : bd);
414 0 : break;
415 : case DIFFWTD_38_INV:
416 0 : diffwtd_mask_highbd(mask, 1, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
417 0 : CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
418 : bd);
419 0 : break;
420 0 : default: assert(0);
421 : }
422 0 : }
423 : #endif // CONFIG_HIGHBITDEPTH
424 : #endif // COMPOUND_SEGMENT_TYPE
425 : #endif // CONFIG_COMPOUND_SEGMENT
426 :
427 : #if MASK_MASTER_SIZE == 64
428 : static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
429 : {
430 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
432 : 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
433 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
434 : }
435 : };
436 : static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
437 : {
438 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
440 : 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
441 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
442 : }
443 : };
444 : static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
445 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
447 : 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
448 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
449 : } };
450 :
451 0 : static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
452 0 : if (shift >= 0) {
453 0 : memcpy(dst + shift, src, width - shift);
454 0 : memset(dst, src[0], shift);
455 : } else {
456 0 : shift = -shift;
457 0 : memcpy(dst, src + shift, width - shift);
458 0 : memset(dst + width - shift, src[width - 1], shift);
459 : }
460 0 : }
461 : #else
462 : static const double smoother_param[NSMOOTHERS] = { 3.0 };
463 : #endif // MASK_MASTER_SIZE == 64
464 :
465 0 : static void init_wedge_master_masks() {
466 : int i, j, s;
467 0 : const int w = MASK_MASTER_SIZE;
468 0 : const int h = MASK_MASTER_SIZE;
469 0 : const int stride = MASK_MASTER_STRIDE;
470 0 : for (s = 0; s < NSMOOTHERS; s++) {
471 : // Note: index [0] stores the masters, and [1] its complement.
472 : #if MASK_MASTER_SIZE == 64
473 : // Generate prototype by shifting the masters
474 0 : int shift = h / 4;
475 0 : for (i = 0; i < h; i += 2) {
476 0 : shift_copy(wedge_master_oblique_even[s],
477 0 : &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
478 : MASK_MASTER_SIZE);
479 0 : shift--;
480 0 : shift_copy(wedge_master_oblique_odd[s],
481 0 : &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
482 : shift, MASK_MASTER_SIZE);
483 0 : memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
484 0 : wedge_master_vertical[s],
485 : MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
486 0 : memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
487 0 : wedge_master_vertical[s],
488 : MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
489 : }
490 : #else
491 : const int a[2] = { 2, 1 };
492 : const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
493 : for (i = 0; i < h; i++) {
494 : for (j = 0; j < w; ++j) {
495 : int x = (2 * j + 1 - w);
496 : int y = (2 * i + 1 - h);
497 : double d = (a[0] * x + a[1] * y) / asqrt;
498 : const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
499 : wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
500 : const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
501 : wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
502 : }
503 : }
504 : #endif // MASK_MASTER_SIZE == 64
505 0 : for (i = 0; i < h; ++i) {
506 0 : for (j = 0; j < w; ++j) {
507 0 : const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
508 0 : wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
509 0 : wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
510 0 : wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
511 0 : (1 << WEDGE_WEIGHT_BITS) - msk;
512 0 : wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
513 0 : wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
514 0 : (1 << WEDGE_WEIGHT_BITS) - msk;
515 0 : wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
516 0 : wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
517 : msk;
518 0 : const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
519 0 : wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
520 0 : wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
521 0 : wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
522 0 : (1 << WEDGE_WEIGHT_BITS) - mskx;
523 : }
524 : }
525 : }
526 0 : }
527 :
528 : // If the signs for the wedges for various blocksizes are
529 : // inconsistent flip the sign flag. Do it only once for every
530 : // wedge codebook.
531 0 : static void init_wedge_signs() {
532 : BLOCK_SIZE sb_type;
533 0 : memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
534 0 : for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) {
535 0 : const int bw = block_size_wide[sb_type];
536 0 : const int bh = block_size_high[sb_type];
537 0 : const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
538 0 : const int wbits = wedge_params.bits;
539 0 : const int wtypes = 1 << wbits;
540 : int i, w;
541 0 : if (wbits == 0) continue;
542 0 : for (w = 0; w < wtypes; ++w) {
543 : // Get the mask master, i.e. index [0]
544 0 : const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
545 0 : int avg = 0;
546 0 : for (i = 0; i < bw; ++i) avg += mask[i];
547 0 : for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
548 0 : avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
549 : // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
550 : // If default sign is 1:
551 : // If sign requested is 0, we need to flip the sign and return
552 : // the complement i.e. index [1] instead. If sign requested is 1
553 : // we need to flip the sign and return index [0] instead.
554 : // If default sign is 0:
555 : // If sign requested is 0, we need to return index [0] the master
556 : // if sign requested is 1, we need to return the complement index [1]
557 : // instead.
558 0 : wedge_params.signflip[w] = (avg < 32);
559 : // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
560 : }
561 : }
562 0 : }
563 :
564 0 : static void init_wedge_masks() {
565 0 : uint8_t *dst = wedge_mask_buf;
566 : BLOCK_SIZE bsize;
567 0 : memset(wedge_masks, 0, sizeof(wedge_masks));
568 0 : for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) {
569 : const uint8_t *mask;
570 0 : const int bw = block_size_wide[bsize];
571 0 : const int bh = block_size_high[bsize];
572 0 : const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
573 0 : const int wbits = wedge_params->bits;
574 0 : const int wtypes = 1 << wbits;
575 : int w;
576 0 : if (wbits == 0) continue;
577 0 : for (w = 0; w < wtypes; ++w) {
578 0 : mask = get_wedge_mask_inplace(w, 0, bsize);
579 0 : aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
580 : bh);
581 0 : wedge_params->masks[0][w] = dst;
582 0 : dst += bw * bh;
583 :
584 0 : mask = get_wedge_mask_inplace(w, 1, bsize);
585 0 : aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
586 : bh);
587 0 : wedge_params->masks[1][w] = dst;
588 0 : dst += bw * bh;
589 : }
590 0 : assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
591 : }
592 0 : }
593 :
594 : // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
595 0 : void av1_init_wedge_masks() {
596 0 : init_wedge_master_masks();
597 0 : init_wedge_signs();
598 0 : init_wedge_masks();
599 0 : }
600 :
601 : #if CONFIG_SUPERTX
602 : static void build_masked_compound_wedge_extend(
603 : uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
604 : const uint8_t *src1, int src1_stride,
605 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
606 : int wedge_offset_x, int wedge_offset_y, int h, int w) {
607 : const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
608 : const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
609 : const uint8_t *mask;
610 : size_t mask_stride;
611 : switch (comp_data->interinter_compound_type) {
612 : case COMPOUND_WEDGE:
613 : mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
614 : sb_type, wedge_offset_x, wedge_offset_y);
615 : mask_stride = MASK_MASTER_STRIDE;
616 : break;
617 : #if CONFIG_COMPOUND_SEGMENT
618 : case COMPOUND_SEG:
619 : mask = comp_data->seg_mask;
620 : mask_stride = block_size_wide[sb_type];
621 : break;
622 : #endif
623 : default: assert(0); return;
624 : }
625 : aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
626 : mask, (int)mask_stride, h, w, subh, subw);
627 : }
628 :
629 : #if CONFIG_HIGHBITDEPTH
630 : static void build_masked_compound_wedge_extend_highbd(
631 : uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
632 : const uint8_t *src1_8, int src1_stride,
633 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
634 : int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) {
635 : const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
636 : const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
637 : const uint8_t *mask;
638 : size_t mask_stride;
639 : switch (comp_data->interinter_compound_type) {
640 : case COMPOUND_WEDGE:
641 : mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
642 : sb_type, wedge_offset_x, wedge_offset_y);
643 : mask_stride = MASK_MASTER_STRIDE;
644 : break;
645 : #if CONFIG_COMPOUND_SEGMENT
646 : case COMPOUND_SEG:
647 : mask = comp_data->seg_mask;
648 : mask_stride = block_size_wide[sb_type];
649 : break;
650 : #endif
651 : default: assert(0); return;
652 : }
653 : aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
654 : src1_stride, mask, (int)mask_stride, h, w, subh,
655 : subw, bd);
656 : }
657 : #endif // CONFIG_HIGHBITDEPTH
658 : #else
659 0 : static void build_masked_compound(
660 : uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
661 : const uint8_t *src1, int src1_stride,
662 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
663 : int w) {
664 : // Derive subsampling from h and w passed in. May be refactored to
665 : // pass in subsampling factors directly.
666 0 : const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
667 0 : const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
668 0 : const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
669 0 : aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
670 0 : mask, block_size_wide[sb_type], h, w, subh, subw);
671 0 : }
672 :
673 : #if CONFIG_HIGHBITDEPTH
674 0 : static void build_masked_compound_highbd(
675 : uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
676 : const uint8_t *src1_8, int src1_stride,
677 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
678 : int w, int bd) {
679 : // Derive subsampling from h and w passed in. May be refactored to
680 : // pass in subsampling factors directly.
681 0 : const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
682 0 : const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
683 0 : const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
684 : // const uint8_t *mask =
685 : // av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
686 0 : aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
687 0 : src1_stride, mask, block_size_wide[sb_type], h, w,
688 : subh, subw, bd);
689 0 : }
690 : #endif // CONFIG_HIGHBITDEPTH
691 : #endif // CONFIG_SUPERTX
692 :
693 0 : void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
694 : uint8_t *dst, int dst_stride,
695 : const int subpel_x, const int subpel_y,
696 : const struct scale_factors *sf, int w,
697 : int h,
698 : #if CONFIG_DUAL_FILTER
699 : const InterpFilter *interp_filter,
700 : #else
701 : const InterpFilter interp_filter,
702 : #endif
703 : int xs, int ys,
704 : #if CONFIG_SUPERTX
705 : int wedge_offset_x, int wedge_offset_y,
706 : #endif // CONFIG_SUPERTX
707 : int plane,
708 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
709 : const WarpTypesAllowed *warp_types,
710 : int p_col, int p_row, int ref,
711 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
712 : MACROBLOCKD *xd) {
713 0 : MODE_INFO *mi = xd->mi[0];
714 0 : const INTERINTER_COMPOUND_DATA comp_data = {
715 : #if CONFIG_WEDGE
716 0 : mi->mbmi.wedge_index,
717 0 : mi->mbmi.wedge_sign,
718 : #endif // CONFIG_WEDGE
719 : #if CONFIG_COMPOUND_SEGMENT
720 0 : mi->mbmi.mask_type,
721 0 : xd->seg_mask,
722 : #endif // CONFIG_COMPOUND_SEGMENT
723 0 : mi->mbmi.interinter_compound_type
724 : };
725 : // The prediction filter types used here should be those for
726 : // the second reference block.
727 : #if CONFIG_DUAL_FILTER
728 0 : InterpFilter tmp_ipf[4] = {
729 : interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3],
730 : };
731 : #else
732 : InterpFilter tmp_ipf = interp_filter;
733 : #endif // CONFIG_DUAL_FILTER
734 0 : ConvolveParams conv_params = get_conv_params(0, plane);
735 :
736 : #if CONFIG_HIGHBITDEPTH
737 : DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
738 0 : uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
739 0 : ? CONVERT_TO_BYTEPTR(tmp_dst_)
740 0 : : tmp_dst_;
741 0 : av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
742 : subpel_y, sf, w, h, &conv_params, tmp_ipf,
743 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
744 : warp_types, p_col, p_row, plane, ref,
745 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
746 : #if CONFIG_MOTION_VAR
747 : 0, 0,
748 : #endif
749 : xs, ys, xd);
750 : #if CONFIG_COMPOUND_SEGMENT
751 0 : if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
752 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
753 0 : build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
754 : dst, dst_stride, tmp_dst, MAX_SB_SIZE,
755 0 : mi->mbmi.sb_type, h, w, xd->bd);
756 : else
757 0 : build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
758 : dst_stride, tmp_dst, MAX_SB_SIZE,
759 0 : mi->mbmi.sb_type, h, w);
760 : }
761 : #endif // CONFIG_COMPOUND_SEGMENT
762 :
763 : #if CONFIG_SUPERTX
764 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
765 : build_masked_compound_wedge_extend_highbd(
766 : dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
767 : mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
768 : else
769 : build_masked_compound_wedge_extend(
770 : dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
771 : mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
772 : #else
773 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
774 0 : build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
775 0 : MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
776 : w, xd->bd);
777 : else
778 0 : build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
779 0 : MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
780 : #endif // CONFIG_SUPERTX
781 :
782 : #else // CONFIG_HIGHBITDEPTH
783 : DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
784 : av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
785 : subpel_y, sf, w, h, &conv_params, tmp_ipf,
786 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
787 : warp_types, p_col, p_row, plane, ref,
788 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
789 : #if CONFIG_MOTION_VAR
790 : 0, 0,
791 : #endif
792 : xs, ys, xd);
793 : #if CONFIG_COMPOUND_SEGMENT
794 : if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG)
795 : build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
796 : dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type,
797 : h, w);
798 : #endif // CONFIG_COMPOUND_SEGMENT
799 : #if CONFIG_SUPERTX
800 : build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
801 : MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
802 : wedge_offset_x, wedge_offset_y, h, w);
803 : #else
804 : build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
805 : &comp_data, mi->mbmi.sb_type, h, w);
806 : #endif // CONFIG_SUPERTX
807 : #endif // CONFIG_HIGHBITDEPTH
808 : #if CONFIG_COMPOUND_SEGMENT
809 : (void)plane;
810 : #endif // CONFIG_COMPOUND_SEGMENT
811 0 : }
812 : #endif // CONFIG_EXT_INTER
813 :
814 : // TODO(sarahparker) av1_highbd_build_inter_predictor and
815 : // av1_build_inter_predictor should be combined with
816 : // av1_make_inter_predictor
817 : #if CONFIG_HIGHBITDEPTH
818 0 : void av1_highbd_build_inter_predictor(
819 : const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
820 : const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
821 : #if CONFIG_DUAL_FILTER
822 : const InterpFilter *interp_filter,
823 : #else
824 : const InterpFilter interp_filter,
825 : #endif
826 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
827 : const WarpTypesAllowed *warp_types, int p_col, int p_row,
828 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
829 : int plane, enum mv_precision precision, int x, int y,
830 : const MACROBLOCKD *xd) {
831 0 : const int is_q4 = precision == MV_PRECISION_Q4;
832 0 : const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
833 0 : is_q4 ? src_mv->col : src_mv->col * 2 };
834 0 : MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
835 0 : const int subpel_x = mv.col & SUBPEL_MASK;
836 0 : const int subpel_y = mv.row & SUBPEL_MASK;
837 0 : ConvolveParams conv_params = get_conv_params(ref, plane);
838 :
839 0 : src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
840 :
841 0 : av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
842 : sf, w, h, &conv_params, interp_filter,
843 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
844 : warp_types, p_col, p_row, plane, ref,
845 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
846 : #if CONFIG_MOTION_VAR
847 : 0, 0,
848 : #endif
849 : sf->x_step_q4, sf->y_step_q4, xd);
850 0 : }
851 : #endif // CONFIG_HIGHBITDEPTH
852 :
853 0 : void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
854 : int dst_stride, const MV *src_mv,
855 : const struct scale_factors *sf, int w, int h,
856 : ConvolveParams *conv_params,
857 : #if CONFIG_DUAL_FILTER
858 : const InterpFilter *interp_filter,
859 : #else
860 : const InterpFilter interp_filter,
861 : #endif
862 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
863 : const WarpTypesAllowed *warp_types, int p_col,
864 : int p_row, int plane, int ref,
865 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
866 : enum mv_precision precision, int x, int y,
867 : const MACROBLOCKD *xd) {
868 0 : const int is_q4 = precision == MV_PRECISION_Q4;
869 0 : const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
870 0 : is_q4 ? src_mv->col : src_mv->col * 2 };
871 0 : MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
872 0 : const int subpel_x = mv.col & SUBPEL_MASK;
873 0 : const int subpel_y = mv.row & SUBPEL_MASK;
874 :
875 0 : src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
876 :
877 0 : av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
878 : sf, w, h, conv_params, interp_filter,
879 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
880 : warp_types, p_col, p_row, plane, ref,
881 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
882 : #if CONFIG_MOTION_VAR
883 : 0, 0,
884 : #endif
885 : sf->x_step_q4, sf->y_step_q4, xd);
886 0 : }
887 :
888 : typedef struct SubpelParams {
889 : int xs;
890 : int ys;
891 : int subpel_x;
892 : int subpel_y;
893 : } SubpelParams;
894 :
895 0 : void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
896 : #if CONFIG_MOTION_VAR
897 : int mi_col_offset, int mi_row_offset,
898 : #endif // CONFIG_MOTION_VAR
899 : int block, int bw, int bh, int x, int y, int w,
900 : int h,
901 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
902 : int wedge_offset_x, int wedge_offset_y,
903 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
904 : int mi_x, int mi_y) {
905 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
906 : #if CONFIG_MOTION_VAR
907 0 : const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
908 : #else
909 : const MODE_INFO *mi = xd->mi[0];
910 : #endif // CONFIG_MOTION_VAR
911 0 : int is_compound = has_second_ref(&mi->mbmi);
912 : int ref;
913 : #if CONFIG_INTRABC
914 : const int is_intrabc = is_intrabc_block(&mi->mbmi);
915 : assert(IMPLIES(is_intrabc, !is_compound));
916 : #endif // CONFIG_INTRABC
917 : #if CONFIG_GLOBAL_MOTION
918 0 : int is_global[2] = { 0, 0 };
919 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
920 0 : WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
921 0 : is_global[ref] = is_global_mv_block(mi, block, wm->wmtype);
922 : }
923 : #endif // CONFIG_GLOBAL_MOTION
924 :
925 : #if CONFIG_CB4X4
926 : (void)block;
927 : (void)cm;
928 : #endif
929 :
930 : #if CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
931 0 : const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
932 : #endif // CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
933 :
934 : #if CONFIG_CHROMA_SUB8X8
935 0 : const BLOCK_SIZE bsize = mi->mbmi.sb_type;
936 0 : const int ss_x = pd->subsampling_x;
937 0 : const int ss_y = pd->subsampling_y;
938 0 : int sub8x8_inter = bsize < BLOCK_8X8 && (ss_x || ss_y);
939 0 : const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
940 0 : const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
941 :
942 : #if CONFIG_MOTION_VAR
943 0 : if (!build_for_obmc && sub8x8_inter) {
944 : #else
945 : if (sub8x8_inter) {
946 : #endif // CONFIG_MOTION_VAR
947 0 : for (int row = row_start; row <= 0 && sub8x8_inter; ++row)
948 0 : for (int col = col_start; col <= 0; ++col)
949 0 : if (!is_inter_block(&xd->mi[row * xd->mi_stride + col]->mbmi))
950 0 : sub8x8_inter = 0;
951 : }
952 :
953 : #if CONFIG_MOTION_VAR
954 0 : if (!build_for_obmc && sub8x8_inter) {
955 : #else
956 : if (sub8x8_inter) {
957 : #endif // CONFIG_MOTION_VAR
958 : // block size
959 0 : const int b4_w = block_size_wide[bsize] >> ss_x;
960 0 : const int b4_h = block_size_high[bsize] >> ss_y;
961 0 : const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
962 0 : const int b8_w = block_size_wide[plane_bsize] >> ss_x;
963 0 : const int b8_h = block_size_high[plane_bsize] >> ss_y;
964 : int idx, idy;
965 :
966 0 : const int x_base = x;
967 0 : const int y_base = y;
968 :
969 0 : const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
970 :
971 0 : int row = row_start;
972 0 : for (idy = 0; idy < b8_h; idy += b4_h) {
973 0 : int col = col_start;
974 0 : for (idx = 0; idx < b8_w; idx += b4_w) {
975 0 : MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
976 0 : is_compound = has_second_ref(this_mbmi);
977 :
978 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
979 0 : struct buf_2d *const dst_buf = &pd->dst;
980 :
981 0 : const RefBuffer *ref_buf =
982 0 : &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
983 :
984 0 : const int c_offset = (mi_x + MI_SIZE * col_start) >> ss_x;
985 0 : const int r_offset = (mi_y + MI_SIZE * row_start) >> ss_y;
986 0 : pd->pre[ref].buf0 =
987 0 : (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
988 0 : pd->pre[ref].buf =
989 0 : pd->pre[ref].buf0 + scaled_buffer_offset(c_offset, r_offset,
990 0 : ref_buf->buf->uv_stride,
991 : &ref_buf->sf);
992 0 : pd->pre[ref].width = ref_buf->buf->uv_crop_width;
993 0 : pd->pre[ref].height = ref_buf->buf->uv_crop_height;
994 0 : pd->pre[ref].stride = ref_buf->buf->uv_stride;
995 :
996 : #if CONFIG_INTRABC
997 : const struct scale_factors *const sf =
998 : is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
999 : struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
1000 : #else
1001 0 : const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
1002 0 : struct buf_2d *const pre_buf = &pd->pre[ref];
1003 : #endif // CONFIG_INTRABC
1004 0 : uint8_t *dst = dst_buf->buf;
1005 :
1006 0 : const MV mv = this_mbmi->mv[ref].as_mv;
1007 :
1008 0 : const MV mv_q4 = clamp_mv_to_umv_border_sb(
1009 : xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
1010 : uint8_t *pre;
1011 : MV32 scaled_mv;
1012 : int xs, ys, subpel_x, subpel_y;
1013 0 : const int is_scaled = av1_is_scaled(sf);
1014 0 : ConvolveParams conv_params = get_conv_params(ref, plane);
1015 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1016 : WarpTypesAllowed warp_types;
1017 : #if CONFIG_GLOBAL_MOTION
1018 0 : warp_types.global_warp_allowed = is_global[ref];
1019 : #endif // CONFIG_GLOBAL_MOTION
1020 : #if CONFIG_WARPED_MOTION
1021 0 : warp_types.local_warp_allowed =
1022 0 : this_mbmi->motion_mode == WARPED_CAUSAL;
1023 : #endif // CONFIG_WARPED_MOTION
1024 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1025 :
1026 0 : x = x_base + idx;
1027 0 : y = y_base + idy;
1028 :
1029 0 : dst += dst_buf->stride * y + x;
1030 :
1031 0 : if (is_scaled) {
1032 0 : pre =
1033 0 : pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
1034 0 : scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
1035 0 : xs = sf->x_step_q4;
1036 0 : ys = sf->y_step_q4;
1037 : } else {
1038 0 : pre = pre_buf->buf + y * pre_buf->stride + x;
1039 0 : scaled_mv.row = mv_q4.row;
1040 0 : scaled_mv.col = mv_q4.col;
1041 0 : xs = ys = 16;
1042 : }
1043 :
1044 0 : subpel_x = scaled_mv.col & SUBPEL_MASK;
1045 0 : subpel_y = scaled_mv.row & SUBPEL_MASK;
1046 0 : pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
1047 0 : (scaled_mv.col >> SUBPEL_BITS);
1048 :
1049 : #if CONFIG_EXT_INTER
1050 0 : if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
1051 0 : av1_make_masked_inter_predictor(
1052 : pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
1053 0 : sf, w, h, mi->mbmi.interp_filter, xs, ys,
1054 : #if CONFIG_SUPERTX
1055 : wedge_offset_x, wedge_offset_y,
1056 : #endif // CONFIG_SUPERTX
1057 : plane,
1058 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1059 0 : &warp_types, (mi_x >> pd->subsampling_x) + x,
1060 0 : (mi_y >> pd->subsampling_y) + y, ref,
1061 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1062 : xd);
1063 : else
1064 : #endif // CONFIG_EXT_INTER
1065 0 : av1_make_inter_predictor(
1066 : pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
1067 0 : sf, b4_w, b4_h, &conv_params, this_mbmi->interp_filter,
1068 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1069 0 : &warp_types, (mi_x >> pd->subsampling_x) + x,
1070 0 : (mi_y >> pd->subsampling_y) + y, plane, ref,
1071 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1072 : #if CONFIG_MOTION_VAR
1073 : mi_col_offset, mi_row_offset,
1074 : #endif
1075 : xs, ys, xd);
1076 : }
1077 0 : ++col;
1078 : }
1079 0 : ++row;
1080 : }
1081 :
1082 0 : for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
1083 0 : return;
1084 : }
1085 : #else
1086 : (void)cm;
1087 : #endif // CONFIG_CHROMA_SUB8X8
1088 :
1089 : {
1090 0 : struct buf_2d *const dst_buf = &pd->dst;
1091 0 : uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
1092 : uint8_t *pre[2];
1093 : MV32 scaled_mv[2];
1094 : SubpelParams subpel_params[2];
1095 : #if CONFIG_CONVOLVE_ROUND
1096 : DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
1097 : av1_zero(tmp_dst);
1098 : #endif // CONFIG_CONVOLVE_ROUND
1099 :
1100 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
1101 : #if CONFIG_INTRABC
1102 : const struct scale_factors *const sf =
1103 : is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
1104 : struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
1105 : #else
1106 0 : const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
1107 0 : struct buf_2d *const pre_buf = &pd->pre[ref];
1108 : #endif // CONFIG_INTRABC
1109 : #if CONFIG_CB4X4
1110 0 : const MV mv = mi->mbmi.mv[ref].as_mv;
1111 : #else
1112 : const MV mv =
1113 : #if CONFIG_MOTION_VAR
1114 : (mi->mbmi.sb_type < BLOCK_8X8 && !build_for_obmc)
1115 : ?
1116 : #else
1117 : mi->mbmi.sb_type < BLOCK_8X8 ?
1118 : #endif
1119 : average_split_mvs(pd, mi, ref, block)
1120 : : mi->mbmi.mv[ref].as_mv;
1121 : #endif
1122 :
1123 : // TODO(jkoleszar): This clamping is done in the incorrect place for the
1124 : // scaling case. It needs to be done on the scaled MV, not the pre-scaling
1125 : // MV. Note however that it performs the subsampling aware scaling so
1126 : // that the result is always q4.
1127 : // mv_precision precision is MV_PRECISION_Q4.
1128 0 : const MV mv_q4 = clamp_mv_to_umv_border_sb(
1129 : xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
1130 :
1131 0 : const int is_scaled = av1_is_scaled(sf);
1132 0 : if (is_scaled) {
1133 0 : pre[ref] =
1134 0 : pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
1135 0 : scaled_mv[ref] = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
1136 0 : subpel_params[ref].xs = sf->x_step_q4;
1137 0 : subpel_params[ref].ys = sf->y_step_q4;
1138 : } else {
1139 0 : pre[ref] = pre_buf->buf + (y * pre_buf->stride + x);
1140 0 : scaled_mv[ref].row = mv_q4.row;
1141 0 : scaled_mv[ref].col = mv_q4.col;
1142 0 : subpel_params[ref].xs = 16;
1143 0 : subpel_params[ref].ys = 16;
1144 : }
1145 :
1146 0 : subpel_params[ref].subpel_x = scaled_mv[ref].col & SUBPEL_MASK;
1147 0 : subpel_params[ref].subpel_y = scaled_mv[ref].row & SUBPEL_MASK;
1148 0 : pre[ref] += (scaled_mv[ref].row >> SUBPEL_BITS) * pre_buf->stride +
1149 0 : (scaled_mv[ref].col >> SUBPEL_BITS);
1150 : }
1151 :
1152 : #if CONFIG_CONVOLVE_ROUND
1153 : ConvolveParams conv_params =
1154 : get_conv_params_no_round(ref, plane, tmp_dst, MAX_SB_SIZE);
1155 : #else
1156 0 : ConvolveParams conv_params = get_conv_params(ref, plane);
1157 : #endif // CONFIG_CONVOLVE_ROUND
1158 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
1159 : #if CONFIG_INTRABC
1160 : const struct scale_factors *const sf =
1161 : is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
1162 : struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
1163 : #else
1164 0 : const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
1165 0 : struct buf_2d *const pre_buf = &pd->pre[ref];
1166 : #endif // CONFIG_INTRABC
1167 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1168 : WarpTypesAllowed warp_types;
1169 : #if CONFIG_GLOBAL_MOTION
1170 0 : warp_types.global_warp_allowed = is_global[ref];
1171 : #endif // CONFIG_GLOBAL_MOTION
1172 : #if CONFIG_WARPED_MOTION
1173 0 : warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
1174 : #endif // CONFIG_WARPED_MOTION
1175 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1176 0 : conv_params.ref = ref;
1177 : #if CONFIG_EXT_INTER
1178 0 : if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
1179 0 : av1_make_masked_inter_predictor(
1180 0 : pre[ref], pre_buf->stride, dst, dst_buf->stride,
1181 : subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
1182 0 : mi->mbmi.interp_filter, subpel_params[ref].xs,
1183 : subpel_params[ref].ys,
1184 : #if CONFIG_SUPERTX
1185 : wedge_offset_x, wedge_offset_y,
1186 : #endif // CONFIG_SUPERTX
1187 : plane,
1188 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1189 0 : &warp_types, (mi_x >> pd->subsampling_x) + x,
1190 0 : (mi_y >> pd->subsampling_y) + y, ref,
1191 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1192 : xd);
1193 : else
1194 : #endif // CONFIG_EXT_INTER
1195 0 : av1_make_inter_predictor(
1196 0 : pre[ref], pre_buf->stride, dst, dst_buf->stride,
1197 : subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
1198 0 : &conv_params, mi->mbmi.interp_filter,
1199 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1200 0 : &warp_types, (mi_x >> pd->subsampling_x) + x,
1201 0 : (mi_y >> pd->subsampling_y) + y, plane, ref,
1202 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1203 : #if CONFIG_MOTION_VAR
1204 : mi_col_offset, mi_row_offset,
1205 : #endif
1206 : subpel_params[ref].xs, subpel_params[ref].ys, xd);
1207 : }
1208 :
1209 : #if CONFIG_CONVOLVE_ROUND
1210 : // TODO(angiebird): This part needs optimization
1211 : if (conv_params.do_post_rounding) {
1212 : #if CONFIG_HIGHBITDEPTH
1213 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1214 : av1_highbd_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride,
1215 : w, h, FILTER_BITS * 2 + is_compound -
1216 : conv_params.round_0 -
1217 : conv_params.round_1,
1218 : xd->bd);
1219 : else
1220 : #endif // CONFIG_HIGHBITDEPTH
1221 : av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
1222 : FILTER_BITS * 2 + is_compound -
1223 : conv_params.round_0 - conv_params.round_1);
1224 : }
1225 : #endif // CONFIG_CONVOLVE_ROUND
1226 : }
1227 : }
1228 :
1229 0 : void av1_build_inter_predictor_sub8x8(const AV1_COMMON *cm, MACROBLOCKD *xd,
1230 : int plane, int i, int ir, int ic,
1231 : int mi_row, int mi_col) {
1232 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
1233 0 : MODE_INFO *const mi = xd->mi[0];
1234 0 : const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1235 0 : const int width = block_size_wide[plane_bsize];
1236 0 : const int height = block_size_high[plane_bsize];
1237 0 : uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
1238 : int ref;
1239 0 : const int is_compound = has_second_ref(&mi->mbmi);
1240 : (void)cm;
1241 :
1242 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1243 : WarpTypesAllowed warp_types;
1244 0 : const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
1245 0 : const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
1246 : #if CONFIG_GLOBAL_MOTION
1247 : int is_global[2];
1248 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
1249 0 : WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
1250 0 : is_global[ref] = is_global_mv_block(mi, i, wm->wmtype);
1251 : }
1252 : #endif // CONFIG_GLOBAL_MOTION
1253 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1254 :
1255 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
1256 0 : ConvolveParams conv_params = get_conv_params(ref, plane);
1257 0 : const uint8_t *pre =
1258 0 : &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2];
1259 : #if CONFIG_GLOBAL_MOTION
1260 0 : warp_types.global_warp_allowed = is_global[ref];
1261 : #endif // CONFIG_GLOBAL_MOTION
1262 : #if CONFIG_WARPED_MOTION
1263 0 : warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
1264 : #endif // CONFIG_WARPED_MOTION
1265 :
1266 : #if CONFIG_HIGHBITDEPTH
1267 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1268 0 : av1_highbd_build_inter_predictor(
1269 : pre, pd->pre[ref].stride, dst, pd->dst.stride,
1270 0 : &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
1271 0 : ref, mi->mbmi.interp_filter,
1272 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1273 : &warp_types, p_col, p_row,
1274 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1275 0 : plane, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
1276 0 : mi_row * MI_SIZE + 4 * ir, xd);
1277 : else
1278 : #endif // CONFIG_HIGHBITDEPTH
1279 0 : av1_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride,
1280 0 : &mi->bmi[i].as_mv[ref].as_mv,
1281 0 : &xd->block_refs[ref]->sf, width, height,
1282 0 : &conv_params, mi->mbmi.interp_filter,
1283 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1284 : &warp_types, p_col, p_row, plane, ref,
1285 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1286 0 : MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
1287 0 : mi_row * MI_SIZE + 4 * ir, xd);
1288 : }
1289 0 : }
1290 :
1291 0 : static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
1292 : MACROBLOCKD *xd, BLOCK_SIZE bsize,
1293 : int mi_row, int mi_col,
1294 : int plane_from, int plane_to) {
1295 : int plane;
1296 0 : const int mi_x = mi_col * MI_SIZE;
1297 0 : const int mi_y = mi_row * MI_SIZE;
1298 : #if CONFIG_CB4X4
1299 0 : const int unify_bsize = 1;
1300 : #else
1301 : const int unify_bsize = 0;
1302 : #endif
1303 0 : for (plane = plane_from; plane <= plane_to; ++plane) {
1304 0 : const struct macroblockd_plane *pd = &xd->plane[plane];
1305 0 : const int bw = pd->width;
1306 0 : const int bh = pd->height;
1307 :
1308 : #if CONFIG_CB4X4
1309 0 : if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
1310 : pd->subsampling_y))
1311 0 : continue;
1312 : #endif
1313 :
1314 0 : if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !unify_bsize) {
1315 0 : const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
1316 0 : const int have_vsplit = bp != PARTITION_HORZ;
1317 0 : const int have_hsplit = bp != PARTITION_VERT;
1318 0 : const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
1319 0 : const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
1320 0 : const int pw = 8 >> (have_vsplit | pd->subsampling_x);
1321 0 : const int ph = 8 >> (have_hsplit | pd->subsampling_y);
1322 : int x, y;
1323 0 : assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
1324 0 : assert(bsize == BLOCK_8X8);
1325 0 : assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
1326 0 : for (y = 0; y < num_4x4_h; ++y)
1327 0 : for (x = 0; x < num_4x4_w; ++x)
1328 0 : build_inter_predictors(cm, xd, plane,
1329 : #if CONFIG_MOTION_VAR
1330 : 0, 0,
1331 : #endif // CONFIG_MOTION_VAR
1332 0 : y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
1333 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
1334 : 0, 0,
1335 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
1336 : mi_x, mi_y);
1337 : } else {
1338 0 : build_inter_predictors(cm, xd, plane,
1339 : #if CONFIG_MOTION_VAR
1340 : 0, 0,
1341 : #endif // CONFIG_MOTION_VAR
1342 : 0, bw, bh, 0, 0, bw, bh,
1343 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
1344 : 0, 0,
1345 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
1346 : mi_x, mi_y);
1347 : }
1348 : }
1349 0 : }
1350 :
1351 0 : void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
1352 : int mi_row, int mi_col, BUFFER_SET *ctx,
1353 : BLOCK_SIZE bsize) {
1354 0 : build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
1355 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
1356 0 : if (is_interintra_pred(&xd->mi[0]->mbmi)) {
1357 0 : BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL },
1358 0 : { xd->plane[0].dst.stride, 0, 0 } };
1359 0 : if (!ctx) ctx = &default_ctx;
1360 0 : av1_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
1361 : xd->plane[0].dst.stride, ctx, bsize);
1362 : }
1363 : #else
1364 : (void)ctx;
1365 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
1366 0 : }
1367 :
1368 0 : void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
1369 : int mi_row, int mi_col, BUFFER_SET *ctx,
1370 : BLOCK_SIZE bsize) {
1371 0 : build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
1372 : MAX_MB_PLANE - 1);
1373 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
1374 0 : if (is_interintra_pred(&xd->mi[0]->mbmi)) {
1375 0 : BUFFER_SET default_ctx = {
1376 0 : { NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
1377 0 : { 0, xd->plane[1].dst.stride, xd->plane[2].dst.stride }
1378 : };
1379 0 : if (!ctx) ctx = &default_ctx;
1380 0 : av1_build_interintra_predictors_sbuv(
1381 : xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
1382 : xd->plane[2].dst.stride, ctx, bsize);
1383 : }
1384 : #else
1385 : (void)ctx;
1386 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
1387 0 : }
1388 :
1389 : // TODO(afergs): Check if ctx can be made constant
1390 0 : void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
1391 : int mi_row, int mi_col, BUFFER_SET *ctx,
1392 : BLOCK_SIZE bsize) {
1393 0 : build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0,
1394 : MAX_MB_PLANE - 1);
1395 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
1396 0 : if (is_interintra_pred(&xd->mi[0]->mbmi)) {
1397 0 : BUFFER_SET default_ctx = {
1398 0 : { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
1399 0 : { xd->plane[0].dst.stride, xd->plane[1].dst.stride,
1400 0 : xd->plane[2].dst.stride }
1401 : };
1402 0 : if (!ctx) ctx = &default_ctx;
1403 0 : av1_build_interintra_predictors(
1404 : xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
1405 : xd->plane[0].dst.stride, xd->plane[1].dst.stride,
1406 : xd->plane[2].dst.stride, ctx, bsize);
1407 : }
1408 : #else
1409 : (void)ctx;
1410 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
1411 0 : }
1412 :
1413 0 : void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
1414 : BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
1415 : int mi_row, int mi_col) {
1416 0 : uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
1417 0 : src->v_buffer };
1418 0 : const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
1419 0 : src->uv_crop_width };
1420 0 : const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
1421 0 : src->uv_crop_height };
1422 0 : const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
1423 0 : src->uv_stride };
1424 : int i;
1425 :
1426 0 : for (i = 0; i < MAX_MB_PLANE; ++i) {
1427 0 : struct macroblockd_plane *const pd = &planes[i];
1428 0 : setup_pred_plane(&pd->dst, bsize, buffers[i], widths[i], heights[i],
1429 : strides[i], mi_row, mi_col, NULL, pd->subsampling_x,
1430 : pd->subsampling_y);
1431 : }
1432 0 : }
1433 :
1434 0 : void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
1435 : const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
1436 : const struct scale_factors *sf) {
1437 0 : if (src != NULL) {
1438 : int i;
1439 0 : uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
1440 0 : src->v_buffer };
1441 0 : const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
1442 0 : src->uv_crop_width };
1443 0 : const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
1444 0 : src->uv_crop_height };
1445 0 : const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
1446 0 : src->uv_stride };
1447 0 : for (i = 0; i < MAX_MB_PLANE; ++i) {
1448 0 : struct macroblockd_plane *const pd = &xd->plane[i];
1449 0 : setup_pred_plane(&pd->pre[idx], xd->mi[0]->mbmi.sb_type, buffers[i],
1450 : widths[i], heights[i], strides[i], mi_row, mi_col, sf,
1451 : pd->subsampling_x, pd->subsampling_y);
1452 : }
1453 : }
1454 0 : }
1455 :
1456 : #if CONFIG_SUPERTX
1457 : #if CONFIG_CB4X4
1458 : static const uint8_t mask_4[4] = { 64, 52, 12, 0 };
1459 : static const uint8_t mask_4_uv[4] = { 64, 52, 12, 0 };
1460 : #endif // CONFIG_CB4X4
1461 : static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
1462 :
1463 : static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36,
1464 : 28, 21, 14, 9, 6, 4, 2, 1 };
1465 :
1466 : static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
1467 : 61, 57, 52, 45, 36, 28, 19, 12, 7, 3, 1,
1468 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1469 :
1470 : static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
1471 :
1472 : static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36,
1473 : 28, 19, 11, 3, 0, 0, 0, 0 };
1474 :
1475 : static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64,
1476 : 64, 64, 64, 64, 60, 54, 46, 36,
1477 : 28, 18, 10, 4, 0, 0, 0, 0,
1478 : 0, 0, 0, 0, 0, 0, 0, 0 };
1479 :
1480 : static const uint8_t *get_supertx_mask(int length, int plane) {
1481 : switch (length) {
1482 : #if CONFIG_CB4X4
1483 : case 4: return plane ? mask_4_uv : mask_4;
1484 : #endif // CONFIG_CB4X4
1485 : case 8: return plane ? mask_8_uv : mask_8;
1486 : case 16: return plane ? mask_16_uv : mask_16;
1487 : case 32: return plane ? mask_32_uv : mask_32;
1488 : default: assert(0);
1489 : }
1490 : return NULL;
1491 : }
1492 :
1493 : void av1_build_masked_inter_predictor_complex(
1494 : MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
1495 : int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
1496 : BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
1497 : int plane) {
1498 : const struct macroblockd_plane *pd = &xd->plane[plane];
1499 : const int ssx = pd->subsampling_x;
1500 : const int ssy = pd->subsampling_y;
1501 : const int top_w = block_size_wide[top_bsize] >> ssx;
1502 : const int top_h = block_size_high[top_bsize] >> ssy;
1503 : const int w = block_size_wide[bsize] >> ssx;
1504 : const int h = block_size_high[bsize] >> ssy;
1505 : const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx;
1506 : const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy;
1507 :
1508 : int w_remain, h_remain;
1509 :
1510 : #if CONFIG_HIGHBITDEPTH
1511 : const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
1512 : #endif // CONFIG_HIGHBITDEPTH
1513 :
1514 : assert(bsize <= BLOCK_32X32);
1515 : assert(IMPLIES(plane == 0, ssx == 0));
1516 : assert(IMPLIES(plane == 0, ssy == 0));
1517 :
1518 : switch (partition) {
1519 : case PARTITION_HORZ: {
1520 : const uint8_t *const mask = get_supertx_mask(h, ssy);
1521 :
1522 : w_remain = top_w;
1523 : h_remain = top_h - h_offset - h;
1524 : dst += h_offset * dst_stride;
1525 : pre += h_offset * pre_stride;
1526 :
1527 : #if CONFIG_HIGHBITDEPTH
1528 : if (is_hdb)
1529 : aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre,
1530 : pre_stride, mask, h, top_w, xd->bd);
1531 : else
1532 : #endif // CONFIG_HIGHBITDEPTH
1533 : aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
1534 : mask, h, top_w);
1535 :
1536 : dst += h * dst_stride;
1537 : pre += h * pre_stride;
1538 : break;
1539 : }
1540 : case PARTITION_VERT: {
1541 : const uint8_t *const mask = get_supertx_mask(w, ssx);
1542 :
1543 : w_remain = top_w - w_offset - w;
1544 : h_remain = top_h;
1545 : dst += w_offset;
1546 : pre += w_offset;
1547 :
1548 : #if CONFIG_HIGHBITDEPTH
1549 : if (is_hdb)
1550 : aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre,
1551 : pre_stride, mask, top_h, w, xd->bd);
1552 : else
1553 : #endif // CONFIG_HIGHBITDEPTH
1554 : aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
1555 : mask, top_h, w);
1556 :
1557 : dst += w;
1558 : pre += w;
1559 : break;
1560 : }
1561 : default: {
1562 : assert(0);
1563 : return;
1564 : }
1565 : }
1566 :
1567 : if (w_remain == 0 || h_remain == 0) {
1568 : return;
1569 : }
1570 :
1571 : #if CONFIG_HIGHBITDEPTH
1572 : if (is_hdb) {
1573 : dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst);
1574 : pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre);
1575 : dst_stride *= 2;
1576 : pre_stride *= 2;
1577 : w_remain *= 2;
1578 : }
1579 : #endif // CONFIG_HIGHBITDEPTH
1580 :
1581 : do {
1582 : memcpy(dst, pre, w_remain * sizeof(uint8_t));
1583 : dst += dst_stride;
1584 : pre += pre_stride;
1585 : } while (--h_remain);
1586 : }
1587 :
1588 : void av1_build_inter_predictors_sb_sub8x8_extend(const AV1_COMMON *cm,
1589 : MACROBLOCKD *xd,
1590 : #if CONFIG_EXT_INTER
1591 : int mi_row_ori, int mi_col_ori,
1592 : #endif // CONFIG_EXT_INTER
1593 : int mi_row, int mi_col,
1594 : BLOCK_SIZE bsize, int block) {
1595 : // Prediction function used in supertx:
1596 : // Use the mv at current block (which is less than 8x8)
1597 : // to get prediction of a block located at (mi_row, mi_col) at size of bsize
1598 : // bsize can be larger than 8x8.
1599 : // block (0-3): the sub8x8 location of current block
1600 : int plane;
1601 : const int mi_x = mi_col * MI_SIZE;
1602 : const int mi_y = mi_row * MI_SIZE;
1603 : #if CONFIG_EXT_INTER
1604 : const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
1605 : const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
1606 : #endif // CONFIG_EXT_INTER
1607 :
1608 : // For sub8x8 uv:
1609 : // Skip uv prediction in supertx except the first block (block = 0)
1610 : int max_plane = block ? 1 : MAX_MB_PLANE;
1611 :
1612 : for (plane = 0; plane < max_plane; plane++) {
1613 : const BLOCK_SIZE plane_bsize =
1614 : get_plane_block_size(bsize, &xd->plane[plane]);
1615 : const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
1616 : const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
1617 : const int bw = 4 * num_4x4_w;
1618 : const int bh = 4 * num_4x4_h;
1619 :
1620 : build_inter_predictors(cm, xd, plane,
1621 : #if CONFIG_MOTION_VAR
1622 : 0, 0,
1623 : #endif // CONFIG_MOTION_VAR
1624 : block, bw, bh, 0, 0, bw, bh,
1625 : #if CONFIG_EXT_INTER
1626 : wedge_offset_x, wedge_offset_y,
1627 : #endif // CONFIG_EXT_INTER
1628 : mi_x, mi_y);
1629 : }
1630 : #if CONFIG_EXT_INTER
1631 : if (is_interintra_pred(&xd->mi[0]->mbmi)) {
1632 : BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf,
1633 : xd->plane[2].dst.buf },
1634 : { xd->plane[0].dst.stride, xd->plane[1].dst.stride,
1635 : xd->plane[2].dst.stride } };
1636 : av1_build_interintra_predictors(
1637 : xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
1638 : xd->plane[0].dst.stride, xd->plane[1].dst.stride,
1639 : xd->plane[2].dst.stride, &ctx, bsize);
1640 : }
1641 : #endif // CONFIG_EXT_INTER
1642 : }
1643 :
1644 : void av1_build_inter_predictors_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
1645 : #if CONFIG_EXT_INTER
1646 : int mi_row_ori, int mi_col_ori,
1647 : #endif // CONFIG_EXT_INTER
1648 : int mi_row, int mi_col,
1649 : BLOCK_SIZE bsize) {
1650 : int plane;
1651 : const int mi_x = mi_col * MI_SIZE;
1652 : const int mi_y = mi_row * MI_SIZE;
1653 : #if CONFIG_EXT_INTER
1654 : const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
1655 : const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
1656 : #endif // CONFIG_EXT_INTER
1657 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
1658 : const BLOCK_SIZE plane_bsize =
1659 : get_plane_block_size(bsize, &xd->plane[plane]);
1660 : const int bw = block_size_wide[plane_bsize];
1661 : const int bh = block_size_high[plane_bsize];
1662 :
1663 : build_inter_predictors(cm, xd, plane,
1664 : #if CONFIG_MOTION_VAR
1665 : 0, 0,
1666 : #endif // CONFIG_MOTION_VAR
1667 : 0, bw, bh, 0, 0, bw, bh,
1668 : #if CONFIG_EXT_INTER
1669 : wedge_offset_x, wedge_offset_y,
1670 : #endif // CONFIG_EXT_INTER
1671 : mi_x, mi_y);
1672 : }
1673 : }
1674 : #endif // CONFIG_SUPERTX
1675 :
1676 : #if CONFIG_MOTION_VAR
1677 : // obmc_mask_N[overlap_position]
1678 : static const uint8_t obmc_mask_1[1] = { 64 };
1679 :
1680 : static const uint8_t obmc_mask_2[2] = { 45, 64 };
1681 :
1682 : static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
1683 :
1684 : static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1685 :
1686 : static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1687 : 56, 58, 60, 61, 64, 64, 64, 64 };
1688 :
1689 : static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1690 : 45, 47, 48, 50, 51, 52, 53, 55,
1691 : 56, 57, 58, 59, 60, 60, 61, 62,
1692 : 64, 64, 64, 64, 64, 64, 64, 64 };
1693 :
1694 : #if CONFIG_EXT_PARTITION
1695 : static const uint8_t obmc_mask_64[64] = {
1696 : 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1697 : 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1698 : 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1699 : 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1700 : };
1701 : #endif // CONFIG_EXT_PARTITION
1702 :
1703 0 : const uint8_t *av1_get_obmc_mask(int length) {
1704 0 : switch (length) {
1705 0 : case 1: return obmc_mask_1;
1706 0 : case 2: return obmc_mask_2;
1707 0 : case 4: return obmc_mask_4;
1708 0 : case 8: return obmc_mask_8;
1709 0 : case 16: return obmc_mask_16;
1710 0 : case 32: return obmc_mask_32;
1711 : #if CONFIG_EXT_PARTITION
1712 : case 64: return obmc_mask_64;
1713 : #endif // CONFIG_EXT_PARTITION
1714 0 : default: assert(0); return NULL;
1715 : }
1716 : }
1717 :
1718 : #if CONFIG_NCOBMC
1719 : // obmc_mask_flipN[overlap_position]
1720 : static const uint8_t obmc_mask_flip1[1] = { 55 };
1721 :
1722 : static const uint8_t obmc_mask_flip2[2] = { 62, 45 };
1723 :
1724 : static const uint8_t obmc_mask_flip4[4] = { 64, 59, 50, 39 };
1725 :
1726 : static const uint8_t obmc_mask_flip8[8] = { 64, 63, 61, 57, 53, 48, 42, 36 };
1727 :
1728 : static const uint8_t obmc_mask_flip16[16] = { 64, 64, 64, 63, 61, 60, 58, 56,
1729 : 54, 52, 49, 46, 43, 40, 37, 34 };
1730 :
1731 : static const uint8_t obmc_mask_flip32[32] = { 64, 64, 64, 64, 64, 63, 63, 62,
1732 : 62, 61, 60, 60, 59, 58, 57, 56,
1733 : 55, 53, 52, 51, 50, 48, 47, 45,
1734 : 44, 43, 41, 40, 38, 36, 35, 33 };
1735 :
1736 : #if CONFIG_EXT_PARTITION
1737 : static const uint8_t obmc_mask_flip64[64] = {
1738 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62,
1739 : 62, 62, 62, 61, 60, 60, 60, 60, 60, 59, 58, 58, 57, 57, 56, 56,
1740 : 56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 49, 48, 47, 47, 46, 45,
1741 : 44, 44, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33,
1742 : };
1743 : #endif // CONFIG_EXT_PARTITION
1744 :
1745 : const uint8_t *av1_get_obmc_mask_flipped(int length) {
1746 : switch (length) {
1747 : case 1: return obmc_mask_flip1;
1748 : case 2: return obmc_mask_flip2;
1749 : case 4: return obmc_mask_flip4;
1750 : case 8: return obmc_mask_flip8;
1751 : case 16: return obmc_mask_flip16;
1752 : case 32: return obmc_mask_flip32;
1753 : #if CONFIG_EXT_PARTITION
1754 : case 64: return obmc_mask_flip64;
1755 : #endif // CONFIG_EXT_PARTITION
1756 : default: assert(0); return NULL;
1757 : }
1758 : }
1759 : #endif // CONFIG_NCOBMC
1760 :
1761 0 : void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
1762 : int mi_row, int mi_col) {
1763 : int i, mi_step;
1764 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1765 :
1766 0 : xd->mi[0]->mbmi.overlappable_neighbors[0] = 0;
1767 0 : xd->mi[0]->mbmi.overlappable_neighbors[1] = 0;
1768 :
1769 0 : if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
1770 :
1771 0 : if (xd->up_available) {
1772 0 : const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
1773 0 : for (i = 0; i < ilimit; i += mi_step) {
1774 0 : int mi_row_offset = -1;
1775 0 : int mi_col_offset = i;
1776 0 : MODE_INFO *above_mi =
1777 0 : xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
1778 0 : MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
1779 : #if CONFIG_CHROMA_SUB8X8
1780 0 : if (above_mbmi->sb_type < BLOCK_8X8) {
1781 0 : ++mi_col_offset;
1782 0 : above_mbmi =
1783 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1784 : }
1785 : #endif
1786 0 : BLOCK_SIZE above_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
1787 0 : mi_step = AOMMIN(xd->n8_w, mi_size_wide[above_bsize]);
1788 :
1789 0 : if (is_neighbor_overlappable(above_mbmi))
1790 0 : xd->mi[0]->mbmi.overlappable_neighbors[0]++;
1791 : }
1792 : }
1793 :
1794 0 : if (xd->left_available) {
1795 0 : const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
1796 0 : for (i = 0; i < ilimit; i += mi_step) {
1797 0 : int mi_row_offset = i;
1798 0 : int mi_col_offset = -1;
1799 0 : MODE_INFO *left_mi =
1800 0 : xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
1801 0 : MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
1802 :
1803 : #if CONFIG_CHROMA_SUB8X8
1804 0 : if (left_mbmi->sb_type < BLOCK_8X8) {
1805 0 : ++mi_row_offset;
1806 0 : left_mbmi =
1807 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1808 : }
1809 : #endif
1810 0 : BLOCK_SIZE left_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
1811 0 : mi_step = AOMMIN(xd->n8_h, mi_size_high[left_bsize]);
1812 :
1813 0 : if (is_neighbor_overlappable(left_mbmi))
1814 0 : xd->mi[0]->mbmi.overlappable_neighbors[1]++;
1815 : }
1816 : }
1817 : }
1818 :
1819 : // HW does not support < 4x4 prediction. To limit the bandwidth requirement, for
1820 : // small blocks, only blend with neighbors from one side. If block-size of
1821 : // current plane is 4x4 or 8x4, the above neighbor (dir = 0) will be skipped. If
1822 : // it is 4x8, the left neighbor (dir = 1) will be skipped.
1823 : #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
1824 :
1825 0 : int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
1826 : int dir) {
1827 0 : assert(is_motion_variation_allowed_bsize(bsize));
1828 :
1829 0 : BLOCK_SIZE bsize_plane =
1830 0 : ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
1831 : #if CONFIG_CB4X4
1832 0 : if (bsize_plane < BLOCK_4X4) return 1;
1833 : #endif
1834 0 : switch (bsize_plane) {
1835 : #if DISABLE_CHROMA_U8X8_OBMC
1836 : case BLOCK_4X4:
1837 : case BLOCK_8X4:
1838 : case BLOCK_4X8: return 1; break;
1839 : #else
1840 : case BLOCK_4X4:
1841 : case BLOCK_8X4:
1842 0 : case BLOCK_4X8: return dir == 0; break;
1843 : #endif
1844 0 : default: return 0;
1845 : }
1846 : }
1847 :
1848 : // This function combines motion compensated predictions that is generated by
1849 : // top/left neighboring blocks' inter predictors with the regular inter
1850 : // prediction. We assume the original prediction (bmc) is stored in
1851 : // xd->plane[].dst.buf
1852 0 : void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1853 : int mi_row, int mi_col,
1854 : uint8_t *above[MAX_MB_PLANE],
1855 : int above_stride[MAX_MB_PLANE],
1856 : uint8_t *left[MAX_MB_PLANE],
1857 : int left_stride[MAX_MB_PLANE]) {
1858 0 : const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1859 : int plane, i;
1860 : #if CONFIG_HIGHBITDEPTH
1861 0 : const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
1862 : #endif // CONFIG_HIGHBITDEPTH
1863 :
1864 : // handle above row
1865 0 : if (xd->up_available) {
1866 0 : const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
1867 0 : const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
1868 0 : const int mi_row_offset = -1;
1869 0 : const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
1870 0 : int neighbor_count = 0;
1871 :
1872 0 : assert(miw > 0);
1873 :
1874 0 : i = 0;
1875 : do { // for each mi in the above row
1876 0 : int mi_col_offset = i;
1877 0 : MB_MODE_INFO *above_mbmi =
1878 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1879 : #if CONFIG_CHROMA_SUB8X8
1880 0 : if (above_mbmi->sb_type < BLOCK_8X8) {
1881 0 : ++mi_col_offset;
1882 0 : above_mbmi =
1883 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1884 : }
1885 : #endif
1886 :
1887 0 : const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
1888 0 : const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
1889 :
1890 0 : if (is_neighbor_overlappable(above_mbmi)) {
1891 0 : neighbor_count++;
1892 0 : if (neighbor_count > neighbor_limit) break;
1893 0 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
1894 0 : const struct macroblockd_plane *pd = &xd->plane[plane];
1895 0 : const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
1896 0 : const int bh = overlap >> pd->subsampling_y;
1897 :
1898 0 : if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1899 :
1900 0 : const int dst_stride = pd->dst.stride;
1901 0 : uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
1902 0 : const int tmp_stride = above_stride[plane];
1903 0 : const uint8_t *const tmp =
1904 0 : &above[plane][(i * MI_SIZE) >> pd->subsampling_x];
1905 0 : const uint8_t *const mask = av1_get_obmc_mask(bh);
1906 :
1907 : #if CONFIG_HIGHBITDEPTH
1908 0 : if (is_hbd)
1909 0 : aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1910 : tmp_stride, mask, bh, bw, xd->bd);
1911 : else
1912 : #endif // CONFIG_HIGHBITDEPTH
1913 0 : aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1914 : tmp_stride, mask, bh, bw);
1915 : }
1916 : }
1917 0 : i += mi_step;
1918 0 : } while (i < miw);
1919 : }
1920 :
1921 : // handle left column
1922 0 : if (xd->left_available) {
1923 0 : const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
1924 0 : const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
1925 0 : const int mi_col_offset = -1;
1926 0 : const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
1927 0 : int neighbor_count = 0;
1928 :
1929 0 : assert(mih > 0);
1930 :
1931 0 : i = 0;
1932 : do { // for each mi in the left column
1933 0 : int mi_row_offset = i;
1934 0 : MB_MODE_INFO *left_mbmi =
1935 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1936 : #if CONFIG_CHROMA_SUB8X8
1937 0 : if (left_mbmi->sb_type < BLOCK_8X8) {
1938 0 : ++mi_row_offset;
1939 0 : left_mbmi =
1940 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
1941 : }
1942 : #endif
1943 :
1944 0 : const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
1945 0 : const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
1946 :
1947 0 : if (is_neighbor_overlappable(left_mbmi)) {
1948 0 : neighbor_count++;
1949 0 : if (neighbor_count > neighbor_limit) break;
1950 0 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
1951 0 : const struct macroblockd_plane *pd = &xd->plane[plane];
1952 0 : const int bw = overlap >> pd->subsampling_x;
1953 0 : const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
1954 :
1955 0 : if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1956 :
1957 0 : const int dst_stride = pd->dst.stride;
1958 0 : uint8_t *const dst =
1959 0 : &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
1960 0 : const int tmp_stride = left_stride[plane];
1961 0 : const uint8_t *const tmp =
1962 0 : &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
1963 0 : const uint8_t *const mask = av1_get_obmc_mask(bw);
1964 :
1965 : #if CONFIG_HIGHBITDEPTH
1966 0 : if (is_hbd)
1967 0 : aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1968 : tmp_stride, mask, bh, bw, xd->bd);
1969 : else
1970 : #endif // CONFIG_HIGHBITDEPTH
1971 0 : aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1972 : tmp_stride, mask, bh, bw);
1973 : }
1974 : }
1975 0 : i += mi_step;
1976 0 : } while (i < mih);
1977 : }
1978 0 : }
1979 :
1980 0 : void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1981 : #if CONFIG_EXT_INTER
1982 0 : if (is_interintra_pred(mbmi)) {
1983 0 : mbmi->ref_frame[1] = NONE_FRAME;
1984 0 : } else if (has_second_ref(mbmi) &&
1985 0 : is_masked_compound_type(mbmi->interinter_compound_type)) {
1986 0 : mbmi->interinter_compound_type = COMPOUND_AVERAGE;
1987 0 : mbmi->ref_frame[1] = NONE_FRAME;
1988 : }
1989 : #endif // CONFIG_EXT_INTER
1990 0 : if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME;
1991 0 : return;
1992 : }
1993 :
1994 0 : void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
1995 : int mi_row, int mi_col,
1996 : uint8_t *tmp_buf[MAX_MB_PLANE],
1997 : int tmp_width[MAX_MB_PLANE],
1998 : int tmp_height[MAX_MB_PLANE],
1999 : int tmp_stride[MAX_MB_PLANE]) {
2000 0 : const TileInfo *const tile = &xd->tile;
2001 0 : BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2002 : int i, j, mi_step, ref;
2003 0 : const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
2004 0 : int mb_to_right_edge_base = xd->mb_to_right_edge;
2005 0 : const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
2006 0 : int neighbor_count = 0;
2007 :
2008 0 : if (mi_row <= tile->mi_row_start) return;
2009 :
2010 0 : xd->mb_to_bottom_edge += xd->n8_h * 32;
2011 0 : for (i = 0; i < ilimit; i += mi_step) {
2012 0 : int mi_row_offset = -1;
2013 0 : int mi_col_offset = i;
2014 : int mi_x, mi_y, bw, bh;
2015 0 : MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2016 0 : MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
2017 :
2018 : #if CONFIG_CHROMA_SUB8X8
2019 0 : if (above_mbmi->sb_type < BLOCK_8X8) {
2020 0 : ++mi_col_offset;
2021 0 : above_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
2022 : }
2023 : #endif
2024 :
2025 0 : const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
2026 : MB_MODE_INFO backup_mbmi;
2027 :
2028 0 : mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
2029 :
2030 0 : if (!is_neighbor_overlappable(above_mbmi)) continue;
2031 :
2032 0 : neighbor_count++;
2033 0 : if (neighbor_count > neighbor_limit) break;
2034 :
2035 0 : backup_mbmi = *above_mbmi;
2036 0 : modify_neighbor_predictor_for_obmc(above_mbmi);
2037 :
2038 0 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2039 0 : struct macroblockd_plane *const pd = &xd->plane[j];
2040 0 : setup_pred_plane(&pd->dst, a_bsize, tmp_buf[j], tmp_width[j],
2041 0 : tmp_height[j], tmp_stride[j], 0, i, NULL,
2042 : pd->subsampling_x, pd->subsampling_y);
2043 : }
2044 0 : for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
2045 0 : const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
2046 0 : const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
2047 :
2048 0 : xd->block_refs[ref] = ref_buf;
2049 0 : if ((!av1_is_valid_scale(&ref_buf->sf)))
2050 0 : aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
2051 : "Reference frame has invalid dimensions");
2052 0 : av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
2053 : &ref_buf->sf);
2054 : }
2055 :
2056 0 : xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
2057 0 : xd->mb_to_right_edge =
2058 0 : mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
2059 0 : mi_x = (mi_col + i) << MI_SIZE_LOG2;
2060 0 : mi_y = mi_row << MI_SIZE_LOG2;
2061 :
2062 0 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2063 0 : const struct macroblockd_plane *pd = &xd->plane[j];
2064 0 : bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
2065 0 : bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
2066 : 4);
2067 :
2068 0 : if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
2069 0 : build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
2070 : 0, 0, bw, bh,
2071 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2072 : 0, 0,
2073 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2074 : mi_x, mi_y);
2075 : }
2076 0 : *above_mbmi = backup_mbmi;
2077 : }
2078 0 : xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
2079 0 : xd->mb_to_right_edge = mb_to_right_edge_base;
2080 0 : xd->mb_to_bottom_edge -= xd->n8_h * 32;
2081 : }
2082 :
2083 0 : void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
2084 : int mi_row, int mi_col,
2085 : uint8_t *tmp_buf[MAX_MB_PLANE],
2086 : int tmp_width[MAX_MB_PLANE],
2087 : int tmp_height[MAX_MB_PLANE],
2088 : int tmp_stride[MAX_MB_PLANE]) {
2089 0 : const TileInfo *const tile = &xd->tile;
2090 0 : BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2091 : int i, j, mi_step, ref;
2092 0 : const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
2093 0 : int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
2094 0 : const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
2095 0 : int neighbor_count = 0;
2096 :
2097 0 : if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return;
2098 :
2099 0 : xd->mb_to_right_edge += xd->n8_w * 32;
2100 0 : for (i = 0; i < ilimit; i += mi_step) {
2101 0 : int mi_row_offset = i;
2102 0 : int mi_col_offset = -1;
2103 : int mi_x, mi_y, bw, bh;
2104 0 : MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2105 0 : MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
2106 :
2107 : #if CONFIG_CHROMA_SUB8X8
2108 0 : if (left_mbmi->sb_type < BLOCK_8X8) {
2109 0 : ++mi_row_offset;
2110 0 : left_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
2111 : }
2112 : #endif
2113 :
2114 0 : const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
2115 : MB_MODE_INFO backup_mbmi;
2116 :
2117 0 : mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
2118 :
2119 0 : if (!is_neighbor_overlappable(left_mbmi)) continue;
2120 :
2121 0 : neighbor_count++;
2122 0 : if (neighbor_count > neighbor_limit) break;
2123 :
2124 0 : backup_mbmi = *left_mbmi;
2125 0 : modify_neighbor_predictor_for_obmc(left_mbmi);
2126 :
2127 0 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2128 0 : struct macroblockd_plane *const pd = &xd->plane[j];
2129 0 : setup_pred_plane(&pd->dst, l_bsize, tmp_buf[j], tmp_width[j],
2130 0 : tmp_height[j], tmp_stride[j], i, 0, NULL,
2131 : pd->subsampling_x, pd->subsampling_y);
2132 : }
2133 0 : for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
2134 0 : const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
2135 0 : const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
2136 :
2137 0 : xd->block_refs[ref] = ref_buf;
2138 0 : if ((!av1_is_valid_scale(&ref_buf->sf)))
2139 0 : aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
2140 : "Reference frame has invalid dimensions");
2141 0 : av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
2142 : &ref_buf->sf);
2143 : }
2144 :
2145 0 : xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
2146 0 : xd->mb_to_bottom_edge =
2147 0 : mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
2148 0 : mi_x = mi_col << MI_SIZE_LOG2;
2149 0 : mi_y = (mi_row + i) << MI_SIZE_LOG2;
2150 :
2151 0 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2152 0 : const struct macroblockd_plane *pd = &xd->plane[j];
2153 0 : bw = AOMMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
2154 : 4);
2155 0 : bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
2156 :
2157 0 : if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
2158 0 : build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
2159 : 0, 0, bw, bh,
2160 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2161 : 0, 0,
2162 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2163 : mi_x, mi_y);
2164 : }
2165 0 : *left_mbmi = backup_mbmi;
2166 : }
2167 0 : xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
2168 0 : xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
2169 0 : xd->mb_to_right_edge -= xd->n8_w * 32;
2170 : }
2171 :
2172 0 : void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
2173 : int mi_row, int mi_col) {
2174 : #if CONFIG_HIGHBITDEPTH
2175 : DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
2176 : DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
2177 : #else
2178 : DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
2179 : DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
2180 : #endif // CONFIG_HIGHBITDEPTH
2181 : uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
2182 0 : int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2183 0 : int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2184 0 : int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2185 0 : int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2186 0 : int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2187 0 : int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2188 :
2189 : #if CONFIG_HIGHBITDEPTH
2190 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2191 0 : int len = sizeof(uint16_t);
2192 0 : dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
2193 0 : dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
2194 0 : dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
2195 0 : dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
2196 0 : dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
2197 0 : dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
2198 : } else {
2199 : #endif // CONFIG_HIGHBITDEPTH
2200 0 : dst_buf1[0] = tmp_buf1;
2201 0 : dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
2202 0 : dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
2203 0 : dst_buf2[0] = tmp_buf2;
2204 0 : dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
2205 0 : dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
2206 : #if CONFIG_HIGHBITDEPTH
2207 : }
2208 : #endif // CONFIG_HIGHBITDEPTH
2209 0 : av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
2210 : dst_width1, dst_height1, dst_stride1);
2211 0 : av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
2212 : dst_width2, dst_height2, dst_stride2);
2213 0 : av1_setup_dst_planes(xd->plane, xd->mi[0]->mbmi.sb_type,
2214 0 : get_frame_new_buffer(cm), mi_row, mi_col);
2215 0 : av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
2216 : dst_buf2, dst_stride2);
2217 0 : }
2218 :
2219 : #if CONFIG_NCOBMC
2220 : void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
2221 : int mi_row, int mi_col,
2222 : uint8_t *tmp_buf[MAX_MB_PLANE],
2223 : int tmp_width[MAX_MB_PLANE],
2224 : int tmp_height[MAX_MB_PLANE],
2225 : int tmp_stride[MAX_MB_PLANE]) {
2226 : const TileInfo *const tile = &xd->tile;
2227 : BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2228 : int i, j, mi_step, ref;
2229 : const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
2230 : int mb_to_right_edge_base = xd->mb_to_right_edge;
2231 :
2232 : if (mi_row + xd->n8_h >= tile->mi_row_end ||
2233 : (mi_row + xd->n8_h) % MI_SIZE == 0 || (mi_row + xd->n8_h) >= cm->mi_rows)
2234 : return;
2235 : assert(bsize >= BLOCK_8X8);
2236 :
2237 : xd->mb_to_top_edge -= xd->n8_h * 32;
2238 : for (i = 0; i < ilimit; i += mi_step) {
2239 : int mi_row_offset = xd->n8_h;
2240 : int mi_col_offset = i;
2241 : int mi_x, mi_y, bw, bh;
2242 : MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2243 : MB_MODE_INFO *mbmi = &mi->mbmi;
2244 : #if CONFIG_EXT_INTER
2245 : MB_MODE_INFO backup_mbmi;
2246 : #endif // CONFIG_EXT_INTER
2247 :
2248 : mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
2249 :
2250 : if (!is_neighbor_overlappable(mbmi)) continue;
2251 :
2252 : #if CONFIG_EXT_INTER
2253 : backup_mbmi = *mbmi;
2254 : modify_neighbor_predictor_for_obmc(mbmi);
2255 : #endif // CONFIG_EXT_INTER
2256 :
2257 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2258 : struct macroblockd_plane *const pd = &xd->plane[j];
2259 : setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
2260 : tmp_width[j], tmp_height[j], tmp_stride[j],
2261 : (xd->n8_h >> 1), i, NULL, pd->subsampling_x,
2262 : pd->subsampling_y);
2263 : }
2264 : for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
2265 : const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
2266 : const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
2267 :
2268 : xd->block_refs[ref] = ref_buf;
2269 : if ((!av1_is_valid_scale(&ref_buf->sf)))
2270 : aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
2271 : "Reference frame has invalid dimensions");
2272 : av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + (xd->n8_h >> 1),
2273 : mi_col + i, &ref_buf->sf);
2274 : }
2275 :
2276 : xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
2277 : xd->mb_to_right_edge =
2278 : mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
2279 : mi_x = (mi_col + i) << MI_SIZE_LOG2;
2280 : mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * 4;
2281 :
2282 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2283 : const struct macroblockd_plane *pd = &xd->plane[j];
2284 : bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_x;
2285 : bh = (num_4x4_blocks_high_lookup[bsize] << 1) >> pd->subsampling_y;
2286 :
2287 : if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
2288 : const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
2289 : const int have_vsplit = bp != PARTITION_HORZ;
2290 : const int have_hsplit = bp != PARTITION_VERT;
2291 : const int num_4x4_w = 2 >> (!have_vsplit);
2292 : const int num_4x4_h = 2 >> (!have_hsplit);
2293 : const int pw = 8 >> (have_vsplit + pd->subsampling_x);
2294 : int x, y;
2295 :
2296 : for (y = 0; y < num_4x4_h; ++y)
2297 : for (x = 0; x < num_4x4_w; ++x) {
2298 : if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y != 0)
2299 : continue;
2300 :
2301 : build_inter_predictors(
2302 : cm, xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh,
2303 : (4 * x) >> pd->subsampling_x,
2304 : xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, pw, bh,
2305 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2306 : 0, 0,
2307 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2308 : mi_x, mi_y);
2309 : }
2310 : } else {
2311 : build_inter_predictors(
2312 : cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0,
2313 : xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw, bh,
2314 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2315 : 0, 0,
2316 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2317 : mi_x, mi_y);
2318 : }
2319 : }
2320 : #if CONFIG_EXT_INTER
2321 : *mbmi = backup_mbmi;
2322 : #endif // CONFIG_EXT_INTER
2323 : }
2324 : xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
2325 : xd->mb_to_right_edge = mb_to_right_edge_base;
2326 : xd->mb_to_top_edge += xd->n8_h * 32;
2327 : }
2328 :
2329 : void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
2330 : int mi_row, int mi_col,
2331 : uint8_t *tmp_buf[MAX_MB_PLANE],
2332 : int tmp_width[MAX_MB_PLANE],
2333 : int tmp_height[MAX_MB_PLANE],
2334 : const int tmp_stride[MAX_MB_PLANE]) {
2335 : const TileInfo *const tile = &xd->tile;
2336 : BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2337 : int i, j, mi_step, ref;
2338 : const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
2339 : int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
2340 :
2341 : if (mi_col + xd->n8_w >= tile->mi_col_end ||
2342 : (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
2343 : return;
2344 :
2345 : xd->mb_to_left_edge -= xd->n8_w * 32;
2346 : for (i = 0; i < ilimit; i += mi_step) {
2347 : int mi_row_offset = i;
2348 : int mi_col_offset = xd->n8_w;
2349 : int mi_x, mi_y, bw, bh;
2350 : MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2351 : MB_MODE_INFO *mbmi = &mi->mbmi;
2352 : #if CONFIG_EXT_INTER
2353 : MB_MODE_INFO backup_mbmi;
2354 : #endif // CONFIG_EXT_INTER
2355 :
2356 : mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
2357 :
2358 : if (!is_neighbor_overlappable(mbmi)) continue;
2359 :
2360 : #if CONFIG_EXT_INTER
2361 : backup_mbmi = *mbmi;
2362 : modify_neighbor_predictor_for_obmc(mbmi);
2363 : #endif // CONFIG_EXT_INTER
2364 :
2365 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2366 : struct macroblockd_plane *const pd = &xd->plane[j];
2367 : setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
2368 : tmp_width[j], tmp_height[j], tmp_stride[j], i,
2369 : xd->n8_w >> 1, NULL, pd->subsampling_x,
2370 : pd->subsampling_y);
2371 : }
2372 : for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
2373 : const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
2374 : const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
2375 :
2376 : xd->block_refs[ref] = ref_buf;
2377 : if ((!av1_is_valid_scale(&ref_buf->sf)))
2378 : aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
2379 : "Reference frame has invalid dimensions");
2380 : av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i,
2381 : mi_col + (xd->n8_w >> 1), &ref_buf->sf);
2382 : }
2383 :
2384 : xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
2385 : xd->mb_to_bottom_edge =
2386 : mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
2387 : mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * 4;
2388 : mi_y = (mi_row + i) << MI_SIZE_LOG2;
2389 :
2390 : for (j = 0; j < MAX_MB_PLANE; ++j) {
2391 : const struct macroblockd_plane *pd = &xd->plane[j];
2392 : bw = (num_4x4_blocks_wide_lookup[bsize] << 1) >> pd->subsampling_x;
2393 : bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
2394 :
2395 : if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
2396 : const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
2397 : const int have_vsplit = bp != PARTITION_HORZ;
2398 : const int have_hsplit = bp != PARTITION_VERT;
2399 : const int num_4x4_w = 2 >> (!have_vsplit);
2400 : const int num_4x4_h = 2 >> (!have_hsplit);
2401 : const int ph = 8 >> (have_hsplit + pd->subsampling_y);
2402 : int x, y;
2403 :
2404 : for (y = 0; y < num_4x4_h; ++y)
2405 : for (x = 0; x < num_4x4_w; ++x) {
2406 : if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x != 0)
2407 : continue;
2408 :
2409 : build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset,
2410 : y * 2 + x, bw, bh,
2411 : xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
2412 : (4 * y) >> pd->subsampling_y, bw, ph,
2413 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2414 : 0, 0,
2415 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2416 : mi_x, mi_y);
2417 : }
2418 : } else {
2419 : build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw,
2420 : bh, xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
2421 : 0, bw, bh,
2422 : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
2423 : 0, 0,
2424 : #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
2425 : mi_x, mi_y);
2426 : }
2427 : }
2428 : #if CONFIG_EXT_INTER
2429 : *mbmi = backup_mbmi;
2430 : #endif // CONFIG_EXT_INTER
2431 : }
2432 : xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
2433 : xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
2434 : xd->mb_to_left_edge += xd->n8_w * 32;
2435 : }
2436 :
2437 : // This function combines motion compensated predictions that is generated by
2438 : // bottom/right neighboring blocks' inter predictors with prediction in dst
2439 : // buffer.
2440 : void av1_merge_dst_bottom_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
2441 : int mi_row, int mi_col,
2442 : uint8_t *bottom[MAX_MB_PLANE],
2443 : const int bottom_stride[MAX_MB_PLANE],
2444 : uint8_t *right[MAX_MB_PLANE],
2445 : const int right_stride[MAX_MB_PLANE]) {
2446 : const TileInfo *const tile = &xd->tile;
2447 : BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2448 : int plane, i, mi_step;
2449 : const int bottom_available = mi_row + xd->n8_h < tile->mi_row_end &&
2450 : (mi_row + xd->n8_h) % MI_SIZE != 0 &&
2451 : (mi_row + xd->n8_h) < cm->mi_rows;
2452 : #if CONFIG_HIGHBITDEPTH
2453 : int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
2454 : #endif // CONFIG_HIGHBITDEPTH
2455 :
2456 : // handle bottom row
2457 : for (i = 0; bottom_available && i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
2458 : i += mi_step) {
2459 : int mi_row_offset = xd->n8_h;
2460 : int mi_col_offset = i;
2461 : MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2462 : MB_MODE_INFO *mbmi = &mi->mbmi;
2463 : int overlap;
2464 :
2465 : mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
2466 :
2467 : if (!is_neighbor_overlappable(mbmi)) continue;
2468 :
2469 : overlap = num_4x4_blocks_high_lookup[bsize] << 1;
2470 :
2471 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
2472 : const struct macroblockd_plane *pd = &xd->plane[plane];
2473 : const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
2474 : const int bh = overlap >> pd->subsampling_y;
2475 : const int dst_stride = pd->dst.stride;
2476 : uint8_t *dst =
2477 : &pd->dst.buf[((i * MI_SIZE) >> pd->subsampling_x) +
2478 : (((xd->n8_h * MI_SIZE - overlap) * dst_stride) >>
2479 : pd->subsampling_y)];
2480 : const int tmp_stride = bottom_stride[plane];
2481 : const uint8_t *const tmp =
2482 : &bottom[plane][((i * MI_SIZE) >> pd->subsampling_x) +
2483 : (((xd->n8_h * MI_SIZE - overlap) * tmp_stride) >>
2484 : pd->subsampling_y)];
2485 : const uint8_t *const mask = av1_get_obmc_mask_flipped(bh);
2486 :
2487 : #if CONFIG_HIGHBITDEPTH
2488 : if (is_hbd)
2489 : aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
2490 : tmp_stride, mask, bh, bw, xd->bd);
2491 : else
2492 : #endif // CONFIG_HIGHBITDEPTH
2493 : aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
2494 : mask, bh, bw);
2495 : }
2496 : } // each mi in the bottom row
2497 :
2498 : // handle right column
2499 : if (mi_col + xd->n8_w >= tile->mi_col_end ||
2500 : (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
2501 : return;
2502 :
2503 : for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
2504 : int mi_row_offset = i;
2505 : int mi_col_offset = xd->n8_w;
2506 : int overlap;
2507 : MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
2508 : MB_MODE_INFO *mbmi = &mi->mbmi;
2509 :
2510 : mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
2511 :
2512 : if (!is_neighbor_overlappable(mbmi)) continue;
2513 :
2514 : overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
2515 :
2516 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
2517 : const struct macroblockd_plane *pd = &xd->plane[plane];
2518 : const int bw = overlap >> pd->subsampling_x;
2519 : const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
2520 : const int dst_stride = pd->dst.stride;
2521 : uint8_t *dst =
2522 : &pd->dst.buf[((i * MI_SIZE * dst_stride) >> pd->subsampling_y) +
2523 : ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
2524 : const int tmp_stride = right_stride[plane];
2525 : const uint8_t *const tmp =
2526 : &right[plane][((i * MI_SIZE * tmp_stride) >> pd->subsampling_y) +
2527 : ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
2528 : const uint8_t *const mask = av1_get_obmc_mask_flipped(bw);
2529 :
2530 : #if CONFIG_HIGHBITDEPTH
2531 : if (is_hbd)
2532 : aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
2533 : tmp_stride, mask, bh, bw, xd->bd);
2534 : else
2535 : #endif // CONFIG_HIGHBITDEPTH
2536 : aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
2537 : mask, bh, bw);
2538 : }
2539 : } // each mi in the right column
2540 : }
2541 :
2542 : // This function generates 4 sided obmc. (1) Prediction blocks generated by
2543 : // bottom and right motion vectors are calculated. (2) Combine them with the
2544 : // original prediction block (which should be pre-stored in xd->plane[].dst.buf
2545 : // before calling this function). The results is updated in xd->plane[].dst.buf
2546 : // (3) Call causal obmc prediction function, which will generate left and above
2547 : // preds, and then merge them and xd->plane[].dst.buf.
2548 : void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
2549 : int mi_row, int mi_col) {
2550 : #if CONFIG_HIGHBITDEPTH
2551 : DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
2552 : DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
2553 : #else
2554 : DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
2555 : DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
2556 : #endif // CONFIG_HIGHBITDEPTH
2557 : uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
2558 : int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2559 : int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2560 : int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2561 : int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2562 : int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2563 : int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
2564 :
2565 : #if CONFIG_HIGHBITDEPTH
2566 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2567 : int len = sizeof(uint16_t);
2568 : dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
2569 : dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
2570 : dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
2571 : dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
2572 : dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
2573 : dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
2574 : } else {
2575 : #endif // CONFIG_HIGHBITDEPTH
2576 : dst_buf1[0] = tmp_buf1;
2577 : dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
2578 : dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
2579 : dst_buf2[0] = tmp_buf2;
2580 : dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
2581 : dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
2582 : #if CONFIG_HIGHBITDEPTH
2583 : }
2584 : #endif // CONFIG_HIGHBITDEPTH
2585 :
2586 : const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
2587 : av1_build_prediction_by_bottom_preds(cm, xd, mi_row, mi_col, dst_buf1,
2588 : dst_width1, dst_height1, dst_stride1);
2589 : av1_build_prediction_by_right_preds(cm, xd, mi_row, mi_col, dst_buf2,
2590 : dst_width2, dst_height2, dst_stride2);
2591 : av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
2592 : mi_col);
2593 : av1_merge_dst_bottom_right_preds(cm, xd, mi_row, mi_col, dst_buf1,
2594 : dst_stride1, dst_buf2, dst_stride2);
2595 : av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
2596 : mi_col);
2597 : av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
2598 : av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
2599 : mi_col);
2600 : }
2601 : #endif // CONFIG_NCOBMC
2602 : #endif // CONFIG_MOTION_VAR
2603 :
2604 : #if CONFIG_EXT_INTER
2605 : /* clang-format off */
2606 : #if CONFIG_INTERINTRA
2607 : #if CONFIG_EXT_PARTITION
2608 : static const int ii_weights1d[MAX_SB_SIZE] = {
2609 : 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
2610 : 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
2611 : 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
2612 : 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
2613 : 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
2614 : 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
2615 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2616 : };
2617 : static int ii_size_scales[BLOCK_SIZES] = {
2618 : #if CONFIG_CB4X4
2619 : 32, 32, 32,
2620 : #endif
2621 : 32, 16, 16, 16, 8, 8, 8, 4,
2622 : 4, 4, 2, 2, 2, 1, 1, 1,
2623 : };
2624 : #else
2625 : static const int ii_weights1d[MAX_SB_SIZE] = {
2626 : 60, 56, 52, 48, 45, 42, 39, 37, 34, 32, 30, 28, 26, 24, 22, 21,
2627 : 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7,
2628 : 6, 6, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2,
2629 : 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2630 : };
2631 : static int ii_size_scales[BLOCK_SIZES] = {
2632 : #if CONFIG_CB4X4
2633 : 16, 16, 16,
2634 : #endif
2635 : 16, 8, 8, 8, 4, 4, 4,
2636 : 2, 2, 2, 1, 1, 1,
2637 : };
2638 : /* clang-format on */
2639 : #endif // CONFIG_EXT_PARTITION
2640 :
2641 0 : static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
2642 : int wedge_index, int wedge_sign,
2643 : BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
2644 : uint8_t *comppred, int compstride,
2645 : const uint8_t *interpred, int interstride,
2646 : const uint8_t *intrapred, int intrastride) {
2647 0 : const int bw = block_size_wide[plane_bsize];
2648 0 : const int bh = block_size_high[plane_bsize];
2649 0 : const int size_scale = ii_size_scales[plane_bsize];
2650 : int i, j;
2651 :
2652 0 : if (use_wedge_interintra) {
2653 0 : if (is_interintra_wedge_used(bsize)) {
2654 0 : const uint8_t *mask =
2655 0 : av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
2656 0 : const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
2657 0 : const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
2658 0 : aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
2659 0 : interpred, interstride, mask, block_size_wide[bsize],
2660 : bh, bw, subh, subw);
2661 : }
2662 0 : return;
2663 : }
2664 :
2665 0 : switch (mode) {
2666 : case II_V_PRED:
2667 0 : for (i = 0; i < bh; ++i) {
2668 0 : for (j = 0; j < bw; ++j) {
2669 0 : int scale = ii_weights1d[i * size_scale];
2670 0 : comppred[i * compstride + j] =
2671 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2672 : interpred[i * interstride + j]);
2673 : }
2674 : }
2675 0 : break;
2676 :
2677 : case II_H_PRED:
2678 0 : for (i = 0; i < bh; ++i) {
2679 0 : for (j = 0; j < bw; ++j) {
2680 0 : int scale = ii_weights1d[j * size_scale];
2681 0 : comppred[i * compstride + j] =
2682 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2683 : interpred[i * interstride + j]);
2684 : }
2685 : }
2686 0 : break;
2687 :
2688 : #if CONFIG_ALT_INTRA
2689 : case II_SMOOTH_PRED:
2690 0 : for (i = 0; i < bh; ++i) {
2691 0 : for (j = 0; j < bw; ++j) {
2692 0 : int scale = ii_weights1d[(i < j ? i : j) * size_scale];
2693 0 : comppred[i * compstride + j] =
2694 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2695 : interpred[i * interstride + j]);
2696 : }
2697 : }
2698 0 : break;
2699 : #endif
2700 :
2701 : #if !CONFIG_ALT_INTRA
2702 : case II_TM_PRED:
2703 : #endif
2704 : case II_DC_PRED:
2705 : default:
2706 0 : for (i = 0; i < bh; ++i) {
2707 0 : for (j = 0; j < bw; ++j) {
2708 0 : comppred[i * compstride + j] = AOM_BLEND_AVG(
2709 : intrapred[i * intrastride + j], interpred[i * interstride + j]);
2710 : }
2711 : }
2712 0 : break;
2713 : }
2714 : }
2715 :
2716 : #if CONFIG_HIGHBITDEPTH
2717 0 : static void combine_interintra_highbd(
2718 : INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
2719 : int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
2720 : uint8_t *comppred8, int compstride, const uint8_t *interpred8,
2721 : int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
2722 0 : const int bw = block_size_wide[plane_bsize];
2723 0 : const int bh = block_size_high[plane_bsize];
2724 0 : const int size_scale = ii_size_scales[plane_bsize];
2725 : int i, j;
2726 :
2727 0 : uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
2728 0 : const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
2729 0 : const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
2730 :
2731 0 : if (use_wedge_interintra) {
2732 0 : if (is_interintra_wedge_used(bsize)) {
2733 0 : const uint8_t *mask =
2734 0 : av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
2735 0 : const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
2736 0 : const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
2737 0 : aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
2738 : interpred8, interstride, mask, bw, bh, bw, subh,
2739 : subw, bd);
2740 : }
2741 0 : return;
2742 : }
2743 :
2744 0 : switch (mode) {
2745 : case II_V_PRED:
2746 0 : for (i = 0; i < bh; ++i) {
2747 0 : for (j = 0; j < bw; ++j) {
2748 0 : int scale = ii_weights1d[i * size_scale];
2749 0 : comppred[i * compstride + j] =
2750 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2751 : interpred[i * interstride + j]);
2752 : }
2753 : }
2754 0 : break;
2755 :
2756 : case II_H_PRED:
2757 0 : for (i = 0; i < bh; ++i) {
2758 0 : for (j = 0; j < bw; ++j) {
2759 0 : int scale = ii_weights1d[j * size_scale];
2760 0 : comppred[i * compstride + j] =
2761 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2762 : interpred[i * interstride + j]);
2763 : }
2764 : }
2765 0 : break;
2766 :
2767 : #if CONFIG_ALT_INTRA
2768 : case II_SMOOTH_PRED:
2769 0 : for (i = 0; i < bh; ++i) {
2770 0 : for (j = 0; j < bw; ++j) {
2771 0 : int scale = ii_weights1d[(i < j ? i : j) * size_scale];
2772 0 : comppred[i * compstride + j] =
2773 0 : AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
2774 : interpred[i * interstride + j]);
2775 : }
2776 : }
2777 0 : break;
2778 : #endif
2779 :
2780 : #if !CONFIG_ALT_INTRA
2781 : case II_TM_PRED:
2782 : #endif
2783 : case II_DC_PRED:
2784 : default:
2785 0 : for (i = 0; i < bh; ++i) {
2786 0 : for (j = 0; j < bw; ++j) {
2787 0 : comppred[i * compstride + j] = AOM_BLEND_AVG(
2788 : interpred[i * interstride + j], intrapred[i * intrastride + j]);
2789 : }
2790 : }
2791 0 : break;
2792 : }
2793 : }
2794 : #endif // CONFIG_HIGHBITDEPTH
2795 :
2796 0 : void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
2797 : BLOCK_SIZE bsize, int plane,
2798 : BUFFER_SET *ctx, uint8_t *dst,
2799 : int dst_stride) {
2800 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
2801 0 : BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
2802 0 : PREDICTION_MODE mode =
2803 0 : interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode];
2804 :
2805 0 : av1_predict_intra_block(xd, pd->width, pd->height, plane_bsize, mode,
2806 0 : ctx->plane[plane], ctx->stride[plane], dst,
2807 : dst_stride, 0, 0, plane);
2808 0 : }
2809 :
2810 0 : void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2811 : const uint8_t *inter_pred, int inter_stride,
2812 : const uint8_t *intra_pred, int intra_stride) {
2813 0 : const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
2814 : #if CONFIG_HIGHBITDEPTH
2815 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2816 0 : combine_interintra_highbd(
2817 0 : xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra,
2818 0 : xd->mi[0]->mbmi.interintra_wedge_index,
2819 0 : xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
2820 : xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred,
2821 : inter_stride, intra_pred, intra_stride, xd->bd);
2822 0 : return;
2823 : }
2824 : #endif // CONFIG_HIGHBITDEPTH
2825 0 : combine_interintra(xd->mi[0]->mbmi.interintra_mode,
2826 0 : xd->mi[0]->mbmi.use_wedge_interintra,
2827 0 : xd->mi[0]->mbmi.interintra_wedge_index,
2828 0 : xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
2829 : xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
2830 : inter_pred, inter_stride, intra_pred, intra_stride);
2831 : }
2832 :
2833 0 : void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
2834 : int ystride, BUFFER_SET *ctx,
2835 : BLOCK_SIZE bsize) {
2836 : #if CONFIG_HIGHBITDEPTH
2837 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2838 : DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
2839 0 : av1_build_intra_predictors_for_interintra(
2840 0 : xd, bsize, 0, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
2841 0 : av1_combine_interintra(xd, bsize, 0, ypred, ystride,
2842 0 : CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
2843 0 : return;
2844 : }
2845 : #endif // CONFIG_HIGHBITDEPTH
2846 : {
2847 : DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
2848 0 : av1_build_intra_predictors_for_interintra(xd, bsize, 0, ctx, intrapredictor,
2849 : MAX_SB_SIZE);
2850 0 : av1_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor,
2851 : MAX_SB_SIZE);
2852 : }
2853 : }
2854 :
2855 0 : void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
2856 : int ustride, BUFFER_SET *ctx,
2857 : int plane, BLOCK_SIZE bsize) {
2858 : #if CONFIG_HIGHBITDEPTH
2859 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2860 : DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
2861 0 : av1_build_intra_predictors_for_interintra(
2862 0 : xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(uintrapredictor),
2863 : MAX_SB_SIZE);
2864 0 : av1_combine_interintra(xd, bsize, plane, upred, ustride,
2865 0 : CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
2866 0 : return;
2867 : }
2868 : #endif // CONFIG_HIGHBITDEPTH
2869 : {
2870 : DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
2871 0 : av1_build_intra_predictors_for_interintra(xd, bsize, plane, ctx,
2872 : uintrapredictor, MAX_SB_SIZE);
2873 0 : av1_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor,
2874 : MAX_SB_SIZE);
2875 : }
2876 : }
2877 :
2878 0 : void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
2879 : uint8_t *vpred, int ustride,
2880 : int vstride, BUFFER_SET *ctx,
2881 : BLOCK_SIZE bsize) {
2882 0 : av1_build_interintra_predictors_sbc(xd, upred, ustride, ctx, 1, bsize);
2883 0 : av1_build_interintra_predictors_sbc(xd, vpred, vstride, ctx, 2, bsize);
2884 0 : }
2885 :
2886 0 : void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
2887 : uint8_t *upred, uint8_t *vpred,
2888 : int ystride, int ustride, int vstride,
2889 : BUFFER_SET *ctx, BLOCK_SIZE bsize) {
2890 0 : av1_build_interintra_predictors_sby(xd, ypred, ystride, ctx, bsize);
2891 0 : av1_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, ctx,
2892 : bsize);
2893 0 : }
2894 : #endif // CONFIG_INTERINTRA
2895 :
2896 : // Builds the inter-predictor for the single ref case
2897 : // for use in the encoder to search the wedges efficiently.
2898 0 : static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
2899 : int block, int bw, int bh, int x,
2900 : int y, int w, int h, int mi_x,
2901 : int mi_y, int ref,
2902 : uint8_t *const ext_dst,
2903 : int ext_dst_stride) {
2904 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
2905 0 : const MODE_INFO *mi = xd->mi[0];
2906 :
2907 0 : const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
2908 0 : struct buf_2d *const pre_buf = &pd->pre[ref];
2909 : #if CONFIG_HIGHBITDEPTH
2910 0 : uint8_t *const dst =
2911 0 : (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst)
2912 0 : : ext_dst) +
2913 0 : ext_dst_stride * y + x;
2914 : #else
2915 : uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
2916 : #endif
2917 0 : const MV mv = mi->mbmi.sb_type < BLOCK_8X8
2918 : ? average_split_mvs(pd, mi, ref, block)
2919 : : mi->mbmi.mv[ref].as_mv;
2920 :
2921 : // TODO(jkoleszar): This clamping is done in the incorrect place for the
2922 : // scaling case. It needs to be done on the scaled MV, not the pre-scaling
2923 : // MV. Note however that it performs the subsampling aware scaling so
2924 : // that the result is always q4.
2925 : // mv_precision precision is MV_PRECISION_Q4.
2926 0 : const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x,
2927 : pd->subsampling_y);
2928 :
2929 : uint8_t *pre;
2930 : MV32 scaled_mv;
2931 : int xs, ys, subpel_x, subpel_y;
2932 0 : const int is_scaled = av1_is_scaled(sf);
2933 0 : ConvolveParams conv_params = get_conv_params(0, plane);
2934 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
2935 : WarpTypesAllowed warp_types;
2936 : #if CONFIG_GLOBAL_MOTION
2937 0 : WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
2938 0 : warp_types.global_warp_allowed = is_global_mv_block(mi, block, wm->wmtype);
2939 : #endif // CONFIG_GLOBAL_MOTION
2940 : #if CONFIG_WARPED_MOTION
2941 0 : warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
2942 : #endif // CONFIG_WARPED_MOTION
2943 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
2944 :
2945 0 : if (is_scaled) {
2946 0 : pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
2947 0 : scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
2948 0 : xs = sf->x_step_q4;
2949 0 : ys = sf->y_step_q4;
2950 : } else {
2951 0 : pre = pre_buf->buf + (y * pre_buf->stride + x);
2952 0 : scaled_mv.row = mv_q4.row;
2953 0 : scaled_mv.col = mv_q4.col;
2954 0 : xs = ys = 16;
2955 : }
2956 :
2957 0 : subpel_x = scaled_mv.col & SUBPEL_MASK;
2958 0 : subpel_y = scaled_mv.row & SUBPEL_MASK;
2959 0 : pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
2960 0 : (scaled_mv.col >> SUBPEL_BITS);
2961 :
2962 0 : av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
2963 : subpel_y, sf, w, h, &conv_params,
2964 0 : mi->mbmi.interp_filter,
2965 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
2966 0 : &warp_types, (mi_x >> pd->subsampling_x) + x,
2967 0 : (mi_y >> pd->subsampling_y) + y, plane, ref,
2968 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
2969 : #if CONFIG_MOTION_VAR
2970 : 0, 0,
2971 : #endif
2972 : xs, ys, xd);
2973 0 : }
2974 :
2975 0 : void av1_build_inter_predictors_for_planes_single_buf(
2976 : MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
2977 : int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) {
2978 : int plane;
2979 0 : const int mi_x = mi_col * MI_SIZE;
2980 0 : const int mi_y = mi_row * MI_SIZE;
2981 0 : for (plane = plane_from; plane <= plane_to; ++plane) {
2982 0 : const BLOCK_SIZE plane_bsize =
2983 0 : get_plane_block_size(bsize, &xd->plane[plane]);
2984 0 : const int bw = block_size_wide[plane_bsize];
2985 0 : const int bh = block_size_high[plane_bsize];
2986 :
2987 : if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
2988 : int x, y;
2989 : const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
2990 : const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
2991 : assert(bsize == BLOCK_8X8);
2992 : for (y = 0; y < num_4x4_h; ++y)
2993 : for (x = 0; x < num_4x4_w; ++x)
2994 : build_inter_predictors_single_buf(
2995 : xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref,
2996 : ext_dst[plane], ext_dst_stride[plane]);
2997 : } else {
2998 0 : build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh,
2999 0 : mi_x, mi_y, ref, ext_dst[plane],
3000 0 : ext_dst_stride[plane]);
3001 : }
3002 : }
3003 0 : }
3004 :
3005 0 : static void build_wedge_inter_predictor_from_buf(
3006 : MACROBLOCKD *xd, int plane, int x, int y, int w, int h,
3007 : #if CONFIG_SUPERTX
3008 : int wedge_offset_x, int wedge_offset_y,
3009 : #endif // CONFIG_SUPERTX
3010 : uint8_t *ext_dst0, int ext_dst_stride0, uint8_t *ext_dst1,
3011 : int ext_dst_stride1) {
3012 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3013 0 : const int is_compound = has_second_ref(mbmi);
3014 0 : MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
3015 0 : struct buf_2d *const dst_buf = &pd->dst;
3016 0 : uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
3017 0 : const INTERINTER_COMPOUND_DATA comp_data = {
3018 : #if CONFIG_WEDGE
3019 0 : mbmi->wedge_index,
3020 0 : mbmi->wedge_sign,
3021 : #endif // CONFIG_WEDGE
3022 : #if CONFIG_COMPOUND_SEGMENT
3023 0 : mbmi->mask_type,
3024 0 : xd->seg_mask,
3025 : #endif // CONFIG_COMPOUND_SEGMENT
3026 0 : mbmi->interinter_compound_type
3027 : };
3028 :
3029 0 : if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type)) {
3030 : #if CONFIG_COMPOUND_SEGMENT
3031 0 : if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
3032 : #if CONFIG_HIGHBITDEPTH
3033 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3034 0 : build_compound_seg_mask_highbd(
3035 : comp_data.seg_mask, comp_data.mask_type,
3036 0 : CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
3037 0 : CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, mbmi->sb_type, h, w,
3038 : xd->bd);
3039 : else
3040 : #endif // CONFIG_HIGHBITDEPTH
3041 0 : build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type,
3042 : ext_dst0, ext_dst_stride0, ext_dst1,
3043 0 : ext_dst_stride1, mbmi->sb_type, h, w);
3044 : }
3045 : #endif // CONFIG_COMPOUND_SEGMENT
3046 :
3047 : #if CONFIG_SUPERTX
3048 : #if CONFIG_HIGHBITDEPTH
3049 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3050 : build_masked_compound_wedge_extend_highbd(
3051 : dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
3052 : CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
3053 : mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
3054 : else
3055 : #endif // CONFIG_HIGHBITDEPTH
3056 : build_masked_compound_wedge_extend(
3057 : dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1,
3058 : ext_dst_stride1, &comp_data, mbmi->sb_type, wedge_offset_x,
3059 : wedge_offset_y, h, w);
3060 : #else
3061 : #if CONFIG_HIGHBITDEPTH
3062 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3063 0 : build_masked_compound_highbd(
3064 0 : dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
3065 0 : CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
3066 0 : mbmi->sb_type, h, w, xd->bd);
3067 : else
3068 : #endif // CONFIG_HIGHBITDEPTH
3069 0 : build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
3070 : ext_dst1, ext_dst_stride1, &comp_data,
3071 0 : mbmi->sb_type, h, w);
3072 : #endif // CONFIG_SUPERTX
3073 : } else {
3074 : #if CONFIG_HIGHBITDEPTH
3075 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3076 0 : aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
3077 0 : dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
3078 : xd->bd);
3079 : else
3080 : #endif // CONFIG_HIGHBITDEPTH
3081 0 : aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
3082 : 0, NULL, 0, w, h);
3083 : }
3084 0 : }
3085 :
3086 0 : void av1_build_wedge_inter_predictor_from_buf(
3087 : MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
3088 : #if CONFIG_SUPERTX
3089 : int wedge_offset_x, int wedge_offset_y,
3090 : #endif // CONFIG_SUPERTX
3091 : uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
3092 : int ext_dst_stride1[3]) {
3093 : int plane;
3094 0 : for (plane = plane_from; plane <= plane_to; ++plane) {
3095 0 : const BLOCK_SIZE plane_bsize =
3096 0 : get_plane_block_size(bsize, &xd->plane[plane]);
3097 :
3098 : if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
3099 : int x, y;
3100 : const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
3101 : const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
3102 : assert(bsize == BLOCK_8X8);
3103 : for (y = 0; y < num_4x4_h; ++y)
3104 : for (x = 0; x < num_4x4_w; ++x)
3105 : build_wedge_inter_predictor_from_buf(
3106 : xd, plane, 4 * x, 4 * y, 4, 4,
3107 : #if CONFIG_SUPERTX
3108 : wedge_offset_x, wedge_offset_y,
3109 : #endif // CONFIG_SUPERTX
3110 : ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
3111 : ext_dst_stride1[plane]);
3112 : } else {
3113 0 : const int bw = block_size_wide[plane_bsize];
3114 0 : const int bh = block_size_high[plane_bsize];
3115 0 : build_wedge_inter_predictor_from_buf(
3116 : xd, plane, 0, 0, bw, bh,
3117 : #if CONFIG_SUPERTX
3118 : wedge_offset_x, wedge_offset_y,
3119 : #endif // CONFIG_SUPERTX
3120 0 : ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
3121 0 : ext_dst_stride1[plane]);
3122 : }
3123 : }
3124 0 : }
3125 : #endif // CONFIG_EXT_INTER
|