Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <stdio.h>
13 : #include <stdlib.h>
14 : #include <memory.h>
15 : #include <math.h>
16 : #include <assert.h>
17 :
18 : #include "./av1_rtcd.h"
19 : #include "av1/common/warped_motion.h"
20 :
21 : /* clang-format off */
22 : static const int error_measure_lut[512] = {
23 : // pow 0.7
24 : 16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068,
25 : 16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703,
26 : 15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335,
27 : 15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963,
28 : 14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587,
29 : 14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206,
30 : 14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822,
31 : 13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432,
32 : 13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038,
33 : 12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639,
34 : 12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234,
35 : 12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823,
36 : 11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406,
37 : 11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982,
38 : 10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552,
39 : 10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113,
40 : 10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666,
41 : 9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211,
42 : 9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745,
43 : 8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269,
44 : 8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780,
45 : 7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278,
46 : 7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760,
47 : 6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225,
48 : 6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670,
49 : 5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090,
50 : 5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480,
51 : 4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832,
52 : 3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133,
53 : 3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359,
54 : 2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452,
55 : 1323, 1187, 1045, 894, 731, 550, 339, 0,
56 : 339, 550, 731, 894, 1045, 1187, 1323, 1452,
57 : 1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359,
58 : 2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133,
59 : 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832,
60 : 3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480,
61 : 4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090,
62 : 5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670,
63 : 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225,
64 : 6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760,
65 : 6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278,
66 : 7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780,
67 : 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269,
68 : 8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745,
69 : 8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211,
70 : 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666,
71 : 9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113,
72 : 10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552,
73 : 10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982,
74 : 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406,
75 : 11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823,
76 : 11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234,
77 : 12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639,
78 : 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038,
79 : 13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432,
80 : 13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822,
81 : 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206,
82 : 14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587,
83 : 14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963,
84 : 15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335,
85 : 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703,
86 : 15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068,
87 : 16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384,
88 : };
89 : /* clang-format on */
90 :
91 0 : static ProjectPointsFunc get_project_points_type(TransformationType type) {
92 0 : switch (type) {
93 0 : case HOMOGRAPHY: return project_points_homography;
94 0 : case AFFINE: return project_points_affine;
95 0 : case ROTZOOM: return project_points_rotzoom;
96 0 : case TRANSLATION: return project_points_translation;
97 0 : default: assert(0); return NULL;
98 : }
99 : }
100 :
101 0 : void project_points_translation(const int32_t *mat, int *points, int *proj,
102 : const int n, const int stride_points,
103 : const int stride_proj, const int subsampling_x,
104 : const int subsampling_y) {
105 : int i;
106 0 : for (i = 0; i < n; ++i) {
107 0 : const int x = *(points++), y = *(points++);
108 0 : if (subsampling_x)
109 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
110 : ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
111 : WARPEDDIFF_PREC_BITS + 1);
112 : else
113 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
114 : ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
115 0 : if (subsampling_y)
116 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
117 : ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
118 : WARPEDDIFF_PREC_BITS + 1);
119 : else
120 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
121 : ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
122 0 : points += stride_points - 2;
123 0 : proj += stride_proj - 2;
124 : }
125 0 : }
126 :
127 0 : void project_points_rotzoom(const int32_t *mat, int *points, int *proj,
128 : const int n, const int stride_points,
129 : const int stride_proj, const int subsampling_x,
130 : const int subsampling_y) {
131 : int i;
132 0 : for (i = 0; i < n; ++i) {
133 0 : const int x = *(points++), y = *(points++);
134 0 : if (subsampling_x)
135 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
136 : mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
137 : (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
138 : WARPEDDIFF_PREC_BITS + 1);
139 : else
140 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
141 : WARPEDDIFF_PREC_BITS);
142 0 : if (subsampling_y)
143 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
144 : -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
145 : (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
146 : WARPEDDIFF_PREC_BITS + 1);
147 : else
148 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
149 : WARPEDDIFF_PREC_BITS);
150 0 : points += stride_points - 2;
151 0 : proj += stride_proj - 2;
152 : }
153 0 : }
154 :
155 0 : void project_points_affine(const int32_t *mat, int *points, int *proj,
156 : const int n, const int stride_points,
157 : const int stride_proj, const int subsampling_x,
158 : const int subsampling_y) {
159 : int i;
160 0 : for (i = 0; i < n; ++i) {
161 0 : const int x = *(points++), y = *(points++);
162 0 : if (subsampling_x)
163 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
164 : mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
165 : (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
166 : WARPEDDIFF_PREC_BITS + 1);
167 : else
168 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
169 : WARPEDDIFF_PREC_BITS);
170 0 : if (subsampling_y)
171 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
172 : mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
173 : (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
174 : WARPEDDIFF_PREC_BITS + 1);
175 : else
176 0 : *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
177 : WARPEDDIFF_PREC_BITS);
178 0 : points += stride_points - 2;
179 0 : proj += stride_proj - 2;
180 : }
181 0 : }
182 :
183 0 : void project_points_hortrapezoid(const int32_t *mat, int *points, int *proj,
184 : const int n, const int stride_points,
185 : const int stride_proj, const int subsampling_x,
186 : const int subsampling_y) {
187 : int i;
188 : int64_t x, y, Z;
189 : int64_t xp, yp;
190 0 : for (i = 0; i < n; ++i) {
191 0 : x = *(points++), y = *(points++);
192 0 : x = (subsampling_x ? 4 * x + 1 : 2 * x);
193 0 : y = (subsampling_y ? 4 * y + 1 : 2 * y);
194 :
195 0 : Z = (mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
196 0 : xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
197 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
198 : WARPEDMODEL_PREC_BITS));
199 0 : yp = (mat[5] * y + 2 * mat[1]) *
200 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
201 : WARPEDMODEL_PREC_BITS));
202 :
203 0 : xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
204 0 : yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
205 :
206 0 : if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
207 0 : if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
208 0 : *(proj++) = (int)xp;
209 0 : *(proj++) = (int)yp;
210 :
211 0 : points += stride_points - 2;
212 0 : proj += stride_proj - 2;
213 : }
214 0 : }
215 :
216 0 : void project_points_vertrapezoid(const int32_t *mat, int *points, int *proj,
217 : const int n, const int stride_points,
218 : const int stride_proj, const int subsampling_x,
219 : const int subsampling_y) {
220 : int i;
221 : int64_t x, y, Z;
222 : int64_t xp, yp;
223 0 : for (i = 0; i < n; ++i) {
224 0 : x = *(points++), y = *(points++);
225 0 : x = (subsampling_x ? 4 * x + 1 : 2 * x);
226 0 : y = (subsampling_y ? 4 * y + 1 : 2 * y);
227 :
228 0 : Z = (mat[6] * x + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
229 0 : xp = (mat[2] * x + 2 * mat[0]) *
230 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
231 : WARPEDMODEL_PREC_BITS));
232 0 : yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
233 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
234 : WARPEDMODEL_PREC_BITS));
235 :
236 0 : xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
237 0 : yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
238 :
239 0 : if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
240 0 : if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
241 0 : *(proj++) = (int)xp;
242 0 : *(proj++) = (int)yp;
243 :
244 0 : points += stride_points - 2;
245 0 : proj += stride_proj - 2;
246 : }
247 0 : }
248 :
249 0 : void project_points_homography(const int32_t *mat, int *points, int *proj,
250 : const int n, const int stride_points,
251 : const int stride_proj, const int subsampling_x,
252 : const int subsampling_y) {
253 : int i;
254 : int64_t x, y, Z;
255 : int64_t xp, yp;
256 0 : for (i = 0; i < n; ++i) {
257 0 : x = *(points++), y = *(points++);
258 0 : x = (subsampling_x ? 4 * x + 1 : 2 * x);
259 0 : y = (subsampling_y ? 4 * y + 1 : 2 * y);
260 :
261 0 : Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
262 0 : xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
263 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
264 : WARPEDMODEL_PREC_BITS));
265 0 : yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
266 : (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
267 : WARPEDMODEL_PREC_BITS));
268 :
269 0 : xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
270 0 : yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
271 :
272 0 : if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
273 0 : if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
274 0 : *(proj++) = (int)xp;
275 0 : *(proj++) = (int)yp;
276 :
277 0 : points += stride_points - 2;
278 0 : proj += stride_proj - 2;
279 : }
280 0 : }
281 :
282 : // 'points' are at original scale, output 'proj's are scaled up by
283 : // 1 << WARPEDPIXEL_PREC_BITS
284 0 : void project_points(const WarpedMotionParams *wm_params, int *points, int *proj,
285 : const int n, const int stride_points, const int stride_proj,
286 : const int subsampling_x, const int subsampling_y) {
287 0 : switch (wm_params->wmtype) {
288 : case AFFINE:
289 0 : project_points_affine(wm_params->wmmat, points, proj, n, stride_points,
290 : stride_proj, subsampling_x, subsampling_y);
291 0 : break;
292 : case ROTZOOM:
293 0 : project_points_rotzoom(wm_params->wmmat, points, proj, n, stride_points,
294 : stride_proj, subsampling_x, subsampling_y);
295 0 : break;
296 : case HOMOGRAPHY:
297 0 : project_points_homography(wm_params->wmmat, points, proj, n,
298 : stride_points, stride_proj, subsampling_x,
299 : subsampling_y);
300 0 : break;
301 0 : default: assert(0 && "Invalid warped motion type!"); return;
302 : }
303 0 : }
304 :
305 : static const int16_t
306 : filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
307 : #if WARPEDPIXEL_PREC_BITS == 6
308 : { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },
309 : { 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 },
310 : { 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 },
311 : { 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 },
312 : { 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 },
313 : { 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 },
314 : { 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 },
315 : { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
316 : { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
317 : { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
318 : { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
319 : { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
320 : { 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 },
321 : { 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 },
322 : { 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 },
323 : { 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 },
324 : { 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 },
325 : { 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 },
326 : { 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 },
327 : { 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 },
328 : { 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 },
329 : { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
330 : { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
331 : { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
332 : { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
333 : { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
334 : { 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 },
335 : { 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 },
336 : { 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 },
337 : { 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 },
338 : { 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 },
339 : { 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },
340 : #elif WARPEDPIXEL_PREC_BITS == 5
341 : { 0, 0, 128, 0, 0, 0 }, { 1, -3, 127, 4, -1, 0 },
342 : { 1, -5, 126, 8, -3, 1 }, { 1, -7, 124, 13, -4, 1 },
343 : { 2, -9, 122, 18, -6, 1 }, { 2, -11, 120, 22, -7, 2 },
344 : { 3, -13, 117, 27, -8, 2 }, { 3, -14, 114, 32, -10, 3 },
345 : { 3, -15, 111, 37, -11, 3 }, { 3, -16, 108, 42, -12, 3 },
346 : { 4, -17, 104, 47, -13, 3 }, { 4, -17, 100, 52, -14, 3 },
347 : { 4, -18, 96, 58, -15, 3 }, { 4, -18, 91, 63, -16, 4 },
348 : { 4, -18, 87, 68, -17, 4 }, { 4, -18, 82, 73, -17, 4 },
349 : { 4, -18, 78, 78, -18, 4 }, { 4, -17, 73, 82, -18, 4 },
350 : { 4, -17, 68, 87, -18, 4 }, { 4, -16, 63, 91, -18, 4 },
351 : { 3, -15, 58, 96, -18, 4 }, { 3, -14, 52, 100, -17, 4 },
352 : { 3, -13, 47, 104, -17, 4 }, { 3, -12, 42, 108, -16, 3 },
353 : { 3, -11, 37, 111, -15, 3 }, { 3, -10, 32, 114, -14, 3 },
354 : { 2, -8, 27, 117, -13, 3 }, { 2, -7, 22, 120, -11, 2 },
355 : { 1, -6, 18, 122, -9, 2 }, { 1, -4, 13, 124, -7, 1 },
356 : { 1, -3, 8, 126, -5, 1 }, { 0, -1, 4, 127, -3, 1 },
357 : #endif // WARPEDPIXEL_PREC_BITS == 6
358 : };
359 :
360 0 : static int32_t do_ntap_filter(const int32_t *const p, int x) {
361 : int i;
362 0 : int32_t sum = 0;
363 0 : for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
364 0 : sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
365 : }
366 0 : return sum;
367 : }
368 :
369 0 : static int32_t do_cubic_filter(const int32_t *const p, int x) {
370 0 : if (x == 0) {
371 0 : return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
372 0 : } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
373 0 : return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
374 : } else {
375 0 : const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
376 0 : const int64_t v2 =
377 0 : (int64_t)x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
378 0 : const int64_t v3 = x * (p[1] - p[-1]);
379 0 : const int64_t v4 = 2 * p[0];
380 0 : return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
381 : (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
382 : (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
383 : (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
384 : 3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
385 : }
386 : }
387 :
388 0 : static INLINE void get_subcolumn(int taps, const uint8_t *const ref,
389 : int32_t *col, int stride, int x, int y_start) {
390 : int i;
391 0 : for (i = 0; i < taps; ++i) {
392 0 : col[i] = ref[(i + y_start) * stride + x];
393 : }
394 0 : }
395 :
396 0 : static uint8_t bi_ntap_filter(const uint8_t *const ref, int x, int y,
397 : int stride) {
398 : int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
399 : int k;
400 0 : const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
401 0 : const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
402 0 : for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
403 : int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
404 0 : get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
405 0 : i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
406 : j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
407 0 : arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
408 0 : y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
409 : }
410 0 : val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
411 0 : x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
412 0 : val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
413 0 : return (uint8_t)clip_pixel(val);
414 : }
415 :
416 0 : static uint8_t bi_cubic_filter(const uint8_t *const ref, int x, int y,
417 : int stride) {
418 : int32_t val, arr[4];
419 : int k;
420 0 : const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
421 0 : const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
422 0 : for (k = 0; k < 4; ++k) {
423 : int32_t arr_temp[4];
424 0 : get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
425 0 : arr[k] =
426 0 : do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
427 : }
428 0 : val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
429 0 : val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
430 0 : return (uint8_t)clip_pixel(val);
431 : }
432 :
433 0 : static uint8_t bi_linear_filter(const uint8_t *const ref, int x, int y,
434 : int stride) {
435 0 : const int ix = x >> WARPEDPIXEL_PREC_BITS;
436 0 : const int iy = y >> WARPEDPIXEL_PREC_BITS;
437 0 : const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
438 0 : const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
439 : int32_t val;
440 0 : val = ROUND_POWER_OF_TWO_SIGNED(
441 : ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
442 : (WARPEDPIXEL_PREC_SHIFTS - sx) +
443 : ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
444 : ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
445 : ref[(iy + 1) * stride + ix + 1] * sy * sx,
446 : WARPEDPIXEL_PREC_BITS * 2);
447 0 : return (uint8_t)clip_pixel(val);
448 : }
449 :
450 0 : static uint8_t warp_interpolate(const uint8_t *const ref, int x, int y,
451 : int width, int height, int stride) {
452 0 : const int ix = x >> WARPEDPIXEL_PREC_BITS;
453 0 : const int iy = y >> WARPEDPIXEL_PREC_BITS;
454 0 : const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
455 0 : const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
456 : int32_t v;
457 :
458 0 : if (ix < 0 && iy < 0)
459 0 : return ref[0];
460 0 : else if (ix < 0 && iy >= height - 1)
461 0 : return ref[(height - 1) * stride];
462 0 : else if (ix >= width - 1 && iy < 0)
463 0 : return ref[width - 1];
464 0 : else if (ix >= width - 1 && iy >= height - 1)
465 0 : return ref[(height - 1) * stride + (width - 1)];
466 0 : else if (ix < 0) {
467 0 : v = ROUND_POWER_OF_TWO_SIGNED(
468 : ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
469 : ref[(iy + 1) * stride] * sy,
470 : WARPEDPIXEL_PREC_BITS);
471 0 : return clip_pixel(v);
472 0 : } else if (iy < 0) {
473 0 : v = ROUND_POWER_OF_TWO_SIGNED(
474 : ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
475 : WARPEDPIXEL_PREC_BITS);
476 0 : return clip_pixel(v);
477 0 : } else if (ix >= width - 1) {
478 0 : v = ROUND_POWER_OF_TWO_SIGNED(
479 : ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
480 : ref[(iy + 1) * stride + width - 1] * sy,
481 : WARPEDPIXEL_PREC_BITS);
482 0 : return clip_pixel(v);
483 0 : } else if (iy >= height - 1) {
484 0 : v = ROUND_POWER_OF_TWO_SIGNED(
485 : ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
486 : ref[(height - 1) * stride + ix + 1] * sx,
487 : WARPEDPIXEL_PREC_BITS);
488 0 : return clip_pixel(v);
489 0 : } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
490 0 : iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
491 0 : ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
492 0 : iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
493 0 : return bi_ntap_filter(ref, x, y, stride);
494 0 : } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
495 0 : return bi_cubic_filter(ref, x, y, stride);
496 : } else {
497 0 : return bi_linear_filter(ref, x, y, stride);
498 : }
499 : }
500 :
501 : // For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
502 : // at a time. The zoom/rotation/shear in the model are applied to the
503 : // "fractional" position of each pixel, which therefore varies within
504 : // [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
505 : // We need an extra 2 taps to fit this in, for a total of 8 taps.
506 : /* clang-format off */
507 : const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
508 : #if WARPEDPIXEL_PREC_BITS == 6
509 : // [-1, 0)
510 : { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
511 : { 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },
512 : { 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 },
513 : { 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 },
514 : { 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 },
515 : { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 },
516 : { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 },
517 : { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 },
518 : { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 },
519 : { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 },
520 : { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 },
521 : { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 },
522 : { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 },
523 : { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 },
524 : { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 },
525 : { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 },
526 : { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 },
527 : { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 },
528 : { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 },
529 : { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 },
530 : { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 },
531 : { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 },
532 : { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 },
533 : { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 },
534 : { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 },
535 : { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 },
536 : { 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 },
537 : { 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 },
538 : { 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 },
539 : { 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 },
540 : { 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 },
541 : { 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 },
542 :
543 : // [0, 1)
544 : { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0},
545 : { 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0},
546 : { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1},
547 : {-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1},
548 : {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1},
549 : {-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1},
550 : {-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1},
551 : {-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1},
552 : {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2},
553 : {-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2},
554 : {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2},
555 : {-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2},
556 : {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2},
557 : {-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2},
558 : {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2},
559 : {-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2},
560 : {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2},
561 : {-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2},
562 : {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2},
563 : {-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2},
564 : {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2},
565 : {-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2},
566 : {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2},
567 : {-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2},
568 : {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2},
569 : {-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1},
570 : {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2},
571 : {-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1},
572 : {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1},
573 : {-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1},
574 : { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0},
575 : { 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0},
576 :
577 : // [1, 2)
578 : { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 },
579 : { 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 },
580 : { 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 },
581 : { 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 },
582 : { 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 },
583 : { 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 },
584 : { 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 },
585 : { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 },
586 : { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 },
587 : { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 },
588 : { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 },
589 : { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 },
590 : { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 },
591 : { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 },
592 : { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 },
593 : { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 },
594 : { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 },
595 : { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 },
596 : { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 },
597 : { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 },
598 : { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 },
599 : { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 },
600 : { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 },
601 : { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 },
602 : { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 },
603 : { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 },
604 : { 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 },
605 : { 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 },
606 : { 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 },
607 : { 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 },
608 : { 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 },
609 : { 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 },
610 : // dummy (replicate row index 191)
611 : { 0, 0, 0, 0, 2, 127, - 1, 0 },
612 :
613 : #elif WARPEDPIXEL_PREC_BITS == 5
614 : // [-1, 0)
615 : {0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0},
616 : {1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0},
617 : {2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0},
618 : {3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0},
619 : {3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0},
620 : {4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0},
621 : {4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0},
622 : {4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0},
623 : {4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0},
624 : {4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0},
625 : {3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0},
626 : {3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0},
627 : {3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0},
628 : {2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0},
629 : {1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0},
630 : {1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0},
631 : // [0, 1)
632 : { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0},
633 : { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1},
634 : {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1},
635 : {-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1},
636 : {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2},
637 : {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2},
638 : {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2},
639 : {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2},
640 : {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2},
641 : {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2},
642 : {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2},
643 : {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2},
644 : {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2},
645 : {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1},
646 : {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1},
647 : { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0},
648 : // [1, 2)
649 : {0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0},
650 : {0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1},
651 : {0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2},
652 : {0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3},
653 : {0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3},
654 : {0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3},
655 : {0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4},
656 : {0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4},
657 : {0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4},
658 : {0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4},
659 : {0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4},
660 : {0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3},
661 : {0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3},
662 : {0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2},
663 : {0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1},
664 : {0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1},
665 : // dummy (replicate row index 95)
666 : {0, 0, 0, -1, 4, 127, -3, 1},
667 :
668 : #endif // WARPEDPIXEL_PREC_BITS == 6
669 : };
670 :
671 : /* clang-format on */
672 :
673 : #define DIV_LUT_PREC_BITS 14
674 : #define DIV_LUT_BITS 8
675 : #define DIV_LUT_NUM (1 << DIV_LUT_BITS)
676 :
677 : static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
678 : 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
679 : 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
680 : 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
681 : 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
682 : 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
683 : 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
684 : 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
685 : 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
686 : 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
687 : 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
688 : 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
689 : 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
690 : 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
691 : 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
692 : 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
693 : 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
694 : 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
695 : 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
696 : 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
697 : 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
698 : 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
699 : 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
700 : 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
701 : 8240, 8224, 8208, 8192,
702 : };
703 :
704 : #if CONFIG_WARPED_MOTION
705 : // Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
706 : // at precision of DIV_LUT_PREC_BITS along with the shift.
707 0 : static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
708 : int64_t e, f;
709 0 : *shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
710 0 : : get_msb((unsigned int)D));
711 : // e is obtained from D after resetting the most significant 1 bit.
712 0 : e = D - ((uint64_t)1 << *shift);
713 : // Get the most significant DIV_LUT_BITS (8) bits of e into f
714 0 : if (*shift > DIV_LUT_BITS)
715 0 : f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);
716 : else
717 0 : f = e << (DIV_LUT_BITS - *shift);
718 0 : assert(f <= DIV_LUT_NUM);
719 0 : *shift += DIV_LUT_PREC_BITS;
720 : // Use f as lookup into the precomputed table of multipliers
721 0 : return div_lut[f];
722 : }
723 : #endif // CONFIG_WARPED_MOTION
724 :
725 0 : static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
726 : int32_t e, f;
727 0 : *shift = get_msb(D);
728 : // e is obtained from D after resetting the most significant 1 bit.
729 0 : e = D - ((uint32_t)1 << *shift);
730 : // Get the most significant DIV_LUT_BITS (8) bits of e into f
731 0 : if (*shift > DIV_LUT_BITS)
732 0 : f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);
733 : else
734 0 : f = e << (DIV_LUT_BITS - *shift);
735 0 : assert(f <= DIV_LUT_NUM);
736 0 : *shift += DIV_LUT_PREC_BITS;
737 : // Use f as lookup into the precomputed table of multipliers
738 0 : return div_lut[f];
739 : }
740 :
741 0 : static int is_affine_valid(const WarpedMotionParams *const wm) {
742 0 : const int32_t *mat = wm->wmmat;
743 0 : return (mat[2] > 0);
744 : }
745 :
746 0 : static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
747 : int16_t delta) {
748 0 : if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
749 0 : (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS)))
750 0 : return 0;
751 : else
752 0 : return 1;
753 : }
754 :
755 : // Returns 1 on success or 0 on an invalid affine set
756 0 : int get_shear_params(WarpedMotionParams *wm) {
757 0 : const int32_t *mat = wm->wmmat;
758 0 : if (!is_affine_valid(wm)) return 0;
759 0 : wm->alpha =
760 0 : clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
761 0 : wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
762 : int16_t shift;
763 0 : int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
764 : int64_t v;
765 0 : v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
766 0 : wm->gamma =
767 0 : clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
768 0 : v = ((int64_t)mat[3] * mat[4]) * y;
769 0 : wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
770 : (1 << WARPEDMODEL_PREC_BITS),
771 : INT16_MIN, INT16_MAX);
772 0 : if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
773 0 : return 0;
774 :
775 0 : wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) *
776 : (1 << WARP_PARAM_REDUCE_BITS);
777 0 : wm->beta = ROUND_POWER_OF_TWO_SIGNED(wm->beta, WARP_PARAM_REDUCE_BITS) *
778 : (1 << WARP_PARAM_REDUCE_BITS);
779 0 : wm->gamma = ROUND_POWER_OF_TWO_SIGNED(wm->gamma, WARP_PARAM_REDUCE_BITS) *
780 : (1 << WARP_PARAM_REDUCE_BITS);
781 0 : wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) *
782 : (1 << WARP_PARAM_REDUCE_BITS);
783 0 : return 1;
784 : }
785 :
786 : #if CONFIG_HIGHBITDEPTH
787 0 : static INLINE void highbd_get_subcolumn(int taps, const uint16_t *const ref,
788 : int32_t *col, int stride, int x,
789 : int y_start) {
790 : int i;
791 0 : for (i = 0; i < taps; ++i) {
792 0 : col[i] = ref[(i + y_start) * stride + x];
793 : }
794 0 : }
795 :
796 0 : static uint16_t highbd_bi_ntap_filter(const uint16_t *const ref, int x, int y,
797 : int stride, int bd) {
798 : int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
799 : int k;
800 0 : const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
801 0 : const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
802 0 : for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
803 : int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
804 0 : highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
805 0 : i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
806 : j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
807 0 : arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
808 0 : y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
809 : }
810 0 : val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
811 0 : x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
812 0 : val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
813 0 : return (uint16_t)clip_pixel_highbd(val, bd);
814 : }
815 :
816 0 : static uint16_t highbd_bi_cubic_filter(const uint16_t *const ref, int x, int y,
817 : int stride, int bd) {
818 : int32_t val, arr[4];
819 : int k;
820 0 : const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
821 0 : const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
822 0 : for (k = 0; k < 4; ++k) {
823 : int32_t arr_temp[4];
824 0 : highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
825 0 : arr[k] =
826 0 : do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
827 : }
828 0 : val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
829 0 : val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
830 0 : return (uint16_t)clip_pixel_highbd(val, bd);
831 : }
832 :
833 0 : static uint16_t highbd_bi_linear_filter(const uint16_t *const ref, int x, int y,
834 : int stride, int bd) {
835 0 : const int ix = x >> WARPEDPIXEL_PREC_BITS;
836 0 : const int iy = y >> WARPEDPIXEL_PREC_BITS;
837 0 : const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
838 0 : const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
839 : int32_t val;
840 0 : val = ROUND_POWER_OF_TWO_SIGNED(
841 : ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
842 : (WARPEDPIXEL_PREC_SHIFTS - sx) +
843 : ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
844 : ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
845 : ref[(iy + 1) * stride + ix + 1] * sy * sx,
846 : WARPEDPIXEL_PREC_BITS * 2);
847 0 : return (uint16_t)clip_pixel_highbd(val, bd);
848 : }
849 :
850 0 : static uint16_t highbd_warp_interpolate(const uint16_t *const ref, int x, int y,
851 : int width, int height, int stride,
852 : int bd) {
853 0 : const int ix = x >> WARPEDPIXEL_PREC_BITS;
854 0 : const int iy = y >> WARPEDPIXEL_PREC_BITS;
855 0 : const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
856 0 : const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
857 : int32_t v;
858 :
859 0 : if (ix < 0 && iy < 0)
860 0 : return ref[0];
861 0 : else if (ix < 0 && iy > height - 1)
862 0 : return ref[(height - 1) * stride];
863 0 : else if (ix > width - 1 && iy < 0)
864 0 : return ref[width - 1];
865 0 : else if (ix > width - 1 && iy > height - 1)
866 0 : return ref[(height - 1) * stride + (width - 1)];
867 0 : else if (ix < 0) {
868 0 : v = ROUND_POWER_OF_TWO_SIGNED(
869 : ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
870 : ref[(iy + 1) * stride] * sy,
871 : WARPEDPIXEL_PREC_BITS);
872 0 : return clip_pixel_highbd(v, bd);
873 0 : } else if (iy < 0) {
874 0 : v = ROUND_POWER_OF_TWO_SIGNED(
875 : ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
876 : WARPEDPIXEL_PREC_BITS);
877 0 : return clip_pixel_highbd(v, bd);
878 0 : } else if (ix > width - 1) {
879 0 : v = ROUND_POWER_OF_TWO_SIGNED(
880 : ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
881 : ref[(iy + 1) * stride + width - 1] * sy,
882 : WARPEDPIXEL_PREC_BITS);
883 0 : return clip_pixel_highbd(v, bd);
884 0 : } else if (iy > height - 1) {
885 0 : v = ROUND_POWER_OF_TWO_SIGNED(
886 : ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
887 : ref[(height - 1) * stride + ix + 1] * sx,
888 : WARPEDPIXEL_PREC_BITS);
889 0 : return clip_pixel_highbd(v, bd);
890 0 : } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
891 0 : iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
892 0 : ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
893 0 : iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
894 0 : return highbd_bi_ntap_filter(ref, x, y, stride, bd);
895 0 : } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
896 0 : return highbd_bi_cubic_filter(ref, x, y, stride, bd);
897 : } else {
898 0 : return highbd_bi_linear_filter(ref, x, y, stride, bd);
899 : }
900 : }
901 :
902 0 : static INLINE int highbd_error_measure(int err, int bd) {
903 0 : const int b = bd - 8;
904 0 : const int bmask = (1 << b) - 1;
905 0 : const int v = (1 << b);
906 : int e1, e2;
907 0 : err = abs(err);
908 0 : e1 = err >> b;
909 0 : e2 = err & bmask;
910 0 : return error_measure_lut[255 + e1] * (v - e2) +
911 0 : error_measure_lut[256 + e1] * e2;
912 : }
913 :
914 0 : static void highbd_warp_plane_old(
915 : const WarpedMotionParams *const wm, const uint8_t *const ref8, int width,
916 : int height, int stride, const uint8_t *const pred8, int p_col, int p_row,
917 : int p_width, int p_height, int p_stride, int subsampling_x,
918 : int subsampling_y, int x_scale, int y_scale, int bd, int comp_avg) {
919 : int i, j;
920 0 : ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
921 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
922 0 : const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
923 0 : if (projectpoints == NULL) return;
924 0 : for (i = p_row; i < p_row + p_height; ++i) {
925 0 : for (j = p_col; j < p_col + p_width; ++j) {
926 : int in[2], out[2];
927 0 : in[0] = j;
928 0 : in[1] = i;
929 0 : projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
930 0 : out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
931 0 : out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
932 0 : if (comp_avg)
933 0 : pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
934 : pred[(j - p_col) + (i - p_row) * p_stride] +
935 : highbd_warp_interpolate(ref, out[0], out[1], width, height,
936 : stride, bd),
937 : 1);
938 : else
939 0 : pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
940 : ref, out[0], out[1], width, height, stride, bd);
941 : }
942 : }
943 : }
944 :
945 : /* Note: For an explanation of the warp algorithm, and some notes on bit widths
946 : for hardware implementations, see the comments above av1_warp_affine_c
947 : */
948 0 : void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
949 : int width, int height, int stride, uint16_t *pred,
950 : int p_col, int p_row, int p_width, int p_height,
951 : int p_stride, int subsampling_x,
952 : int subsampling_y, int bd, int comp_avg,
953 : int16_t alpha, int16_t beta, int16_t gamma,
954 : int16_t delta) {
955 : uint32_t tmp[15 * 8];
956 : int i, j, k, l, m;
957 :
958 0 : for (i = p_row; i < p_row + p_height; i += 8) {
959 0 : for (j = p_col; j < p_col + p_width; j += 8) {
960 : int32_t x4, y4, ix4, sx4, iy4, sy4;
961 0 : if (subsampling_x)
962 0 : x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
963 0 : (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
964 : 4;
965 : else
966 0 : x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
967 :
968 0 : if (subsampling_y)
969 0 : y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
970 0 : (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
971 : 4;
972 : else
973 0 : y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
974 :
975 0 : ix4 = x4 >> WARPEDMODEL_PREC_BITS;
976 0 : sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
977 0 : iy4 = y4 >> WARPEDMODEL_PREC_BITS;
978 0 : sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
979 :
980 0 : sx4 += alpha * (-4) + beta * (-4);
981 0 : sy4 += gamma * (-4) + delta * (-4);
982 :
983 0 : sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
984 0 : sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
985 :
986 : // Horizontal filter
987 0 : for (k = -7; k < 8; ++k) {
988 0 : int iy = iy4 + k;
989 0 : if (iy < 0)
990 0 : iy = 0;
991 0 : else if (iy > height - 1)
992 0 : iy = height - 1;
993 :
994 0 : int sx = sx4 + beta * (k + 4);
995 0 : for (l = -4; l < 4; ++l) {
996 0 : int ix = ix4 + l - 3;
997 0 : const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
998 : WARPEDPIXEL_PREC_SHIFTS;
999 0 : assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1000 0 : const int16_t *coeffs = warped_filter[offs];
1001 :
1002 0 : int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1);
1003 0 : for (m = 0; m < 8; ++m) {
1004 0 : int sample_x = ix + m;
1005 0 : if (sample_x < 0)
1006 0 : sample_x = 0;
1007 0 : else if (sample_x > width - 1)
1008 0 : sample_x = width - 1;
1009 0 : sum += ref[iy * stride + sample_x] * coeffs[m];
1010 : }
1011 0 : sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
1012 0 : assert(0 <= sum &&
1013 : sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 -
1014 : HORSHEAR_REDUCE_PREC_BITS)));
1015 0 : tmp[(k + 7) * 8 + (l + 4)] = sum;
1016 0 : sx += alpha;
1017 : }
1018 : }
1019 :
1020 : // Vertical filter
1021 0 : for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
1022 0 : int sy = sy4 + delta * (k + 4);
1023 0 : for (l = -4; l < 4; ++l) {
1024 0 : uint16_t *p =
1025 0 : &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
1026 0 : const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
1027 : WARPEDPIXEL_PREC_SHIFTS;
1028 0 : assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1029 0 : const int16_t *coeffs = warped_filter[offs];
1030 :
1031 0 : int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS -
1032 : HORSHEAR_REDUCE_PREC_BITS);
1033 0 : for (m = 0; m < 8; ++m) {
1034 0 : sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
1035 : }
1036 0 : sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
1037 0 : assert(0 <= sum && sum < (1 << (bd + 2)));
1038 0 : uint16_t px =
1039 0 : clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
1040 0 : if (comp_avg)
1041 0 : *p = ROUND_POWER_OF_TWO(*p + px, 1);
1042 : else
1043 0 : *p = px;
1044 0 : sy += gamma;
1045 : }
1046 : }
1047 : }
1048 : }
1049 0 : }
1050 :
1051 0 : static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
1052 : int width, int height, int stride,
1053 : const uint8_t *const pred8, int p_col, int p_row,
1054 : int p_width, int p_height, int p_stride,
1055 : int subsampling_x, int subsampling_y, int x_scale,
1056 : int y_scale, int bd, int comp_avg) {
1057 0 : if (wm->wmtype == ROTZOOM) {
1058 0 : wm->wmmat[5] = wm->wmmat[2];
1059 0 : wm->wmmat[4] = -wm->wmmat[3];
1060 : }
1061 0 : if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
1062 0 : y_scale == 16) {
1063 0 : const int32_t *const mat = wm->wmmat;
1064 0 : const int16_t alpha = wm->alpha;
1065 0 : const int16_t beta = wm->beta;
1066 0 : const int16_t gamma = wm->gamma;
1067 0 : const int16_t delta = wm->delta;
1068 :
1069 0 : const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1070 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
1071 0 : av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
1072 : p_width, p_height, p_stride, subsampling_x,
1073 : subsampling_y, bd, comp_avg, alpha, beta, gamma,
1074 : delta);
1075 : } else {
1076 0 : highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
1077 : p_width, p_height, p_stride, subsampling_x,
1078 : subsampling_y, x_scale, y_scale, bd, comp_avg);
1079 : }
1080 0 : }
1081 :
1082 0 : static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
1083 : const uint16_t *const dst, int p_col,
1084 : int p_row, int p_width, int p_height,
1085 : int p_stride, int bd) {
1086 0 : int64_t sum_error = 0;
1087 0 : for (int i = 0; i < p_height; ++i) {
1088 0 : for (int j = 0; j < p_width; ++j) {
1089 0 : sum_error += highbd_error_measure(
1090 0 : dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride], bd);
1091 : }
1092 : }
1093 0 : return sum_error;
1094 : }
1095 :
1096 0 : static int64_t highbd_warp_error(
1097 : WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height,
1098 : int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width,
1099 : int p_height, int p_stride, int subsampling_x, int subsampling_y,
1100 : int x_scale, int y_scale, int bd) {
1101 0 : int64_t gm_sumerr = 0;
1102 0 : uint16_t *tmp = aom_malloc(p_width * p_height * sizeof(*tmp));
1103 0 : if (!tmp) return INT64_MAX;
1104 :
1105 0 : highbd_warp_plane(wm, ref8, width, height, stride, CONVERT_TO_BYTEPTR(tmp),
1106 : p_col, p_row, p_width, p_height, p_width, subsampling_x,
1107 : subsampling_y, x_scale, y_scale, bd, 0);
1108 :
1109 0 : gm_sumerr = highbd_frame_error(tmp, p_width, CONVERT_TO_SHORTPTR(dst8), p_col,
1110 : p_row, p_width, p_height, p_stride, bd);
1111 :
1112 0 : aom_free(tmp);
1113 0 : return gm_sumerr;
1114 : }
1115 : #endif // CONFIG_HIGHBITDEPTH
1116 :
1117 0 : static INLINE int error_measure(int err) {
1118 0 : return error_measure_lut[255 + err];
1119 : }
1120 :
1121 0 : static void warp_plane_old(const WarpedMotionParams *const wm,
1122 : const uint8_t *const ref, int width, int height,
1123 : int stride, uint8_t *pred, int p_col, int p_row,
1124 : int p_width, int p_height, int p_stride,
1125 : int subsampling_x, int subsampling_y, int x_scale,
1126 : int y_scale, int comp_avg) {
1127 : int i, j;
1128 0 : ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
1129 0 : if (projectpoints == NULL) return;
1130 0 : for (i = p_row; i < p_row + p_height; ++i) {
1131 0 : for (j = p_col; j < p_col + p_width; ++j) {
1132 : int in[2], out[2];
1133 0 : in[0] = j;
1134 0 : in[1] = i;
1135 0 : projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
1136 0 : out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
1137 0 : out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
1138 0 : if (comp_avg)
1139 0 : pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
1140 : pred[(j - p_col) + (i - p_row) * p_stride] +
1141 : warp_interpolate(ref, out[0], out[1], width, height, stride),
1142 : 1);
1143 : else
1144 0 : pred[(j - p_col) + (i - p_row) * p_stride] =
1145 0 : warp_interpolate(ref, out[0], out[1], width, height, stride);
1146 : }
1147 : }
1148 : }
1149 :
1150 : /* The warp filter for ROTZOOM and AFFINE models works as follows:
1151 : * Split the input into 8x8 blocks
1152 : * For each block, project the point (4, 4) within the block, to get the
1153 : overall block position. Split into integer and fractional coordinates,
1154 : maintaining full WARPEDMODEL precision
1155 : * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a
1156 : variable horizontal offset. This means that, while the rows of the
1157 : intermediate buffer align with the rows of the *reference* image, the
1158 : columns align with the columns of the *destination* image.
1159 : * Filter vertically: Generate the output block (up to 8x8 pixels, but if the
1160 : destination is too small we crop the output at this stage). Each pixel has
1161 : a variable vertical offset, so that the resulting rows are aligned with
1162 : the rows of the destination image.
1163 :
1164 : To accomplish these alignments, we factor the warp matrix as a
1165 : product of two shear / asymmetric zoom matrices:
1166 : / a b \ = / 1 0 \ * / 1+alpha beta \
1167 : \ c d / \ gamma 1+delta / \ 0 1 /
1168 : where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively.
1169 : The horizontal shear (with alpha and beta) is applied first,
1170 : then the vertical shear (with gamma and delta) is applied second.
1171 :
1172 : The only limitation is that, to fit this in a fixed 8-tap filter size,
1173 : the fractional pixel offsets must be at most +-1. Since the horizontal filter
1174 : generates 15 rows of 8 columns, and the initial point we project is at (4, 4)
1175 : within the block, the parameters must satisfy
1176 : 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 4 * |delta| <= 1
1177 : for this filter to be applicable.
1178 :
1179 : Note: This function assumes that the caller has done all of the relevant
1180 : checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5]
1181 : are set appropriately (if using a ROTZOOM model), and that alpha, beta,
1182 : gamma, delta are all in range.
1183 :
1184 : TODO(david.barker): Maybe support scaled references?
1185 : */
1186 : /* A note on hardware implementation:
1187 : The warp filter is intended to be implementable using the same hardware as
1188 : the high-precision convolve filters from the loop-restoration and
1189 : convolve-round experiments.
1190 :
1191 : For a single filter stage, considering all of the coefficient sets for the
1192 : warp filter and the regular convolution filter, an input in the range
1193 : [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)]
1194 : before rounding.
1195 :
1196 : Allowing for some changes to the filter coefficient sets, call the range
1197 : [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k,
1198 : we can replace this by the range [0, 256 * 2^k], which can be stored in an
1199 : unsigned value with 8 + k bits.
1200 :
1201 : This allows the derivation of the appropriate bit widths and offsets for
1202 : the various intermediate values: If
1203 :
1204 : F := WARPEDPIXEL_FILTER_BITS = 7 (or else the above ranges need adjusting)
1205 : So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
1206 : intermediate value.
1207 : H := HORSHEAR_REDUCE_PREC_BITS
1208 : V := VERSHEAR_REDUCE_PREC_BITS
1209 : (and note that we must have H + V = 2*F for the output to have the same
1210 : scale as the input)
1211 :
1212 : then we end up with the following offsets and ranges:
1213 : Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a
1214 : uint{bd + F + 1}
1215 : After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}.
1216 : Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a
1217 : uint{bd + 2*F + 2 - H}
1218 : After rounding: The final value, before undoing the offset, fits into a
1219 : uint{bd + 2}.
1220 :
1221 : Then we need to undo the offsets before clamping to a pixel. Note that,
1222 : if we do this at the end, the amount to subtract is actually independent
1223 : of H and V:
1224 :
1225 : offset to subtract = (1 << ((bd + F - 1) - H + F - V)) +
1226 : (1 << ((bd + 2*F - H) - V))
1227 : == (1 << (bd - 1)) + (1 << bd)
1228 :
1229 : This allows us to entirely avoid clamping in both the warp filter and
1230 : the convolve-round experiment. As of the time of writing, the Wiener filter
1231 : from loop-restoration can encode a central coefficient up to 216, which
1232 : leads to a maximum value of about 282 * 2^k after applying the offset.
1233 : So in that case we still need to clamp.
1234 : */
1235 0 : void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
1236 : int height, int stride, uint8_t *pred, int p_col,
1237 : int p_row, int p_width, int p_height, int p_stride,
1238 : int subsampling_x, int subsampling_y, int comp_avg,
1239 : int16_t alpha, int16_t beta, int16_t gamma,
1240 : int16_t delta) {
1241 : uint16_t tmp[15 * 8];
1242 : int i, j, k, l, m;
1243 0 : const int bd = 8;
1244 :
1245 0 : for (i = p_row; i < p_row + p_height; i += 8) {
1246 0 : for (j = p_col; j < p_col + p_width; j += 8) {
1247 : int32_t x4, y4, ix4, sx4, iy4, sy4;
1248 0 : if (subsampling_x)
1249 0 : x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
1250 0 : (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
1251 : 4;
1252 : else
1253 0 : x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
1254 :
1255 0 : if (subsampling_y)
1256 0 : y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
1257 0 : (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
1258 : 4;
1259 : else
1260 0 : y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
1261 :
1262 0 : ix4 = x4 >> WARPEDMODEL_PREC_BITS;
1263 0 : sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
1264 0 : iy4 = y4 >> WARPEDMODEL_PREC_BITS;
1265 0 : sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
1266 :
1267 0 : sx4 += alpha * (-4) + beta * (-4);
1268 0 : sy4 += gamma * (-4) + delta * (-4);
1269 :
1270 0 : sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
1271 0 : sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
1272 :
1273 : // Horizontal filter
1274 0 : for (k = -7; k < 8; ++k) {
1275 : // Clamp to top/bottom edge of the frame
1276 0 : int iy = iy4 + k;
1277 0 : if (iy < 0)
1278 0 : iy = 0;
1279 0 : else if (iy > height - 1)
1280 0 : iy = height - 1;
1281 :
1282 0 : int sx = sx4 + beta * (k + 4);
1283 :
1284 0 : for (l = -4; l < 4; ++l) {
1285 0 : int ix = ix4 + l - 3;
1286 : // At this point, sx = sx4 + alpha * l + beta * k
1287 0 : const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
1288 : WARPEDPIXEL_PREC_SHIFTS;
1289 0 : assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1290 0 : const int16_t *coeffs = warped_filter[offs];
1291 :
1292 0 : int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1);
1293 0 : for (m = 0; m < 8; ++m) {
1294 : // Clamp to left/right edge of the frame
1295 0 : int sample_x = ix + m;
1296 0 : if (sample_x < 0)
1297 0 : sample_x = 0;
1298 0 : else if (sample_x > width - 1)
1299 0 : sample_x = width - 1;
1300 :
1301 0 : sum += ref[iy * stride + sample_x] * coeffs[m];
1302 : }
1303 0 : sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
1304 0 : assert(0 <= sum &&
1305 : sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 -
1306 : HORSHEAR_REDUCE_PREC_BITS)));
1307 0 : tmp[(k + 7) * 8 + (l + 4)] = sum;
1308 0 : sx += alpha;
1309 : }
1310 : }
1311 :
1312 : // Vertical filter
1313 0 : for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
1314 0 : int sy = sy4 + delta * (k + 4);
1315 0 : for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
1316 0 : uint8_t *p =
1317 0 : &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
1318 : // At this point, sy = sy4 + gamma * l + delta * k
1319 0 : const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
1320 : WARPEDPIXEL_PREC_SHIFTS;
1321 0 : assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1322 0 : const int16_t *coeffs = warped_filter[offs];
1323 :
1324 0 : int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS -
1325 : HORSHEAR_REDUCE_PREC_BITS);
1326 0 : for (m = 0; m < 8; ++m) {
1327 0 : sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
1328 : }
1329 0 : sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
1330 0 : assert(0 <= sum && sum < (1 << (bd + 2)));
1331 0 : uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
1332 0 : if (comp_avg)
1333 0 : *p = ROUND_POWER_OF_TWO(*p + px, 1);
1334 : else
1335 0 : *p = px;
1336 0 : sy += gamma;
1337 : }
1338 : }
1339 : }
1340 : }
1341 0 : }
1342 :
1343 0 : static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
1344 : int width, int height, int stride, uint8_t *pred,
1345 : int p_col, int p_row, int p_width, int p_height,
1346 : int p_stride, int subsampling_x, int subsampling_y,
1347 : int x_scale, int y_scale, int comp_avg) {
1348 0 : if (wm->wmtype == ROTZOOM) {
1349 0 : wm->wmmat[5] = wm->wmmat[2];
1350 0 : wm->wmmat[4] = -wm->wmmat[3];
1351 : }
1352 0 : if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
1353 0 : y_scale == 16) {
1354 0 : const int32_t *const mat = wm->wmmat;
1355 0 : const int16_t alpha = wm->alpha;
1356 0 : const int16_t beta = wm->beta;
1357 0 : const int16_t gamma = wm->gamma;
1358 0 : const int16_t delta = wm->delta;
1359 :
1360 0 : av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
1361 : p_width, p_height, p_stride, subsampling_x, subsampling_y,
1362 : comp_avg, alpha, beta, gamma, delta);
1363 : } else {
1364 0 : warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
1365 : p_height, p_stride, subsampling_x, subsampling_y, x_scale,
1366 : y_scale, comp_avg);
1367 : }
1368 0 : }
1369 :
1370 0 : static int64_t frame_error(const uint8_t *const ref, int stride,
1371 : const uint8_t *const dst, int p_col, int p_row,
1372 : int p_width, int p_height, int p_stride) {
1373 0 : int64_t sum_error = 0;
1374 0 : for (int i = 0; i < p_height; ++i) {
1375 0 : for (int j = 0; j < p_width; ++j) {
1376 0 : sum_error += (int64_t)error_measure(
1377 0 : dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride]);
1378 : }
1379 : }
1380 0 : return sum_error;
1381 : }
1382 :
1383 0 : static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
1384 : int width, int height, int stride,
1385 : const uint8_t *const dst, int p_col, int p_row,
1386 : int p_width, int p_height, int p_stride,
1387 : int subsampling_x, int subsampling_y, int x_scale,
1388 : int y_scale) {
1389 0 : int64_t gm_sumerr = 0;
1390 0 : uint8_t *tmp = aom_malloc(p_width * p_height);
1391 0 : if (!tmp) return INT64_MAX;
1392 :
1393 0 : warp_plane(wm, ref, width, height, stride, tmp, p_col, p_row, p_width,
1394 : p_height, p_width, subsampling_x, subsampling_y, x_scale, y_scale,
1395 : 0);
1396 :
1397 0 : gm_sumerr =
1398 : frame_error(tmp, p_width, dst, p_col, p_row, p_width, p_height, p_stride);
1399 :
1400 0 : aom_free(tmp);
1401 0 : return gm_sumerr;
1402 : }
1403 :
1404 0 : int64_t av1_frame_error(
1405 : #if CONFIG_HIGHBITDEPTH
1406 : int use_hbd, int bd,
1407 : #endif // CONFIG_HIGHBITDEPTH
1408 : const uint8_t *ref, int stride, uint8_t *dst, int p_col, int p_row,
1409 : int p_width, int p_height, int p_stride) {
1410 : #if CONFIG_HIGHBITDEPTH
1411 0 : if (use_hbd) {
1412 0 : return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride,
1413 0 : CONVERT_TO_SHORTPTR(dst), p_col, p_row, p_width,
1414 : p_height, p_stride, bd);
1415 : }
1416 : #endif // CONFIG_HIGHBITDEPTH
1417 0 : return frame_error(ref, stride, dst, p_col, p_row, p_width, p_height,
1418 : p_stride);
1419 : }
1420 :
1421 0 : int64_t av1_warp_error(WarpedMotionParams *wm,
1422 : #if CONFIG_HIGHBITDEPTH
1423 : int use_hbd, int bd,
1424 : #endif // CONFIG_HIGHBITDEPTH
1425 : const uint8_t *ref, int width, int height, int stride,
1426 : uint8_t *dst, int p_col, int p_row, int p_width,
1427 : int p_height, int p_stride, int subsampling_x,
1428 : int subsampling_y, int x_scale, int y_scale) {
1429 0 : if (wm->wmtype <= AFFINE)
1430 0 : if (!get_shear_params(wm)) return 1;
1431 : #if CONFIG_HIGHBITDEPTH
1432 0 : if (use_hbd)
1433 0 : return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row,
1434 : p_width, p_height, p_stride, subsampling_x,
1435 : subsampling_y, x_scale, y_scale, bd);
1436 : #endif // CONFIG_HIGHBITDEPTH
1437 0 : return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width,
1438 : p_height, p_stride, subsampling_x, subsampling_y, x_scale,
1439 : y_scale);
1440 : }
1441 :
1442 0 : void av1_warp_plane(WarpedMotionParams *wm,
1443 : #if CONFIG_HIGHBITDEPTH
1444 : int use_hbd, int bd,
1445 : #endif // CONFIG_HIGHBITDEPTH
1446 : const uint8_t *ref, int width, int height, int stride,
1447 : uint8_t *pred, int p_col, int p_row, int p_width,
1448 : int p_height, int p_stride, int subsampling_x,
1449 : int subsampling_y, int x_scale, int y_scale, int comp_avg) {
1450 : #if CONFIG_HIGHBITDEPTH
1451 0 : if (use_hbd)
1452 0 : highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
1453 : p_width, p_height, p_stride, subsampling_x, subsampling_y,
1454 : x_scale, y_scale, bd, comp_avg);
1455 : else
1456 : #endif // CONFIG_HIGHBITDEPTH
1457 0 : warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
1458 : p_height, p_stride, subsampling_x, subsampling_y, x_scale,
1459 : y_scale, comp_avg);
1460 0 : }
1461 :
1462 : #if CONFIG_WARPED_MOTION
1463 : #define LEAST_SQUARES_ORDER 2
1464 :
1465 : #define LS_MV_MAX 256 // max mv in 1/8-pel
1466 : #define LS_STEP 2
1467 :
1468 : // Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
1469 : // the precision needed is:
1470 : // (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
1471 : // (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
1472 : // 1 [for sign] +
1473 : // LEAST_SQUARES_SAMPLES_MAX_BITS
1474 : // [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
1475 : // The value is 23
1476 : #define LS_MAT_RANGE_BITS \
1477 : ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
1478 :
1479 : // Bit-depth reduction from the full-range
1480 : #define LS_MAT_DOWN_BITS 2
1481 :
1482 : // bits range of A, Bx and By after downshifting
1483 : #define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
1484 : #define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
1485 : #define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
1486 :
1487 : #define LS_SUM(a) ((a)*4 + LS_STEP * 2)
1488 : #define LS_SQUARE(a) \
1489 : (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
1490 : #define LS_PRODUCT1(a, b) \
1491 : (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> 2)
1492 : #define LS_PRODUCT2(a, b) \
1493 : (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
1494 :
1495 0 : static int find_affine_int(int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
1496 : int mvy, int mvx, WarpedMotionParams *wm, int mi_row,
1497 : int mi_col) {
1498 0 : int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
1499 0 : int32_t Bx[2] = { 0, 0 };
1500 0 : int32_t By[2] = { 0, 0 };
1501 0 : int i, n = 0;
1502 :
1503 0 : const int bw = block_size_wide[bsize];
1504 0 : const int bh = block_size_high[bsize];
1505 0 : const int suy = (mi_row * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1) * 8;
1506 0 : const int sux = (mi_col * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1) * 8;
1507 0 : const int duy = suy + mvy;
1508 0 : const int dux = sux + mvx;
1509 :
1510 : // Assume the center pixel of the block has exactly the same motion vector
1511 : // as transmitted for the block. First shift the origin of the source
1512 : // points to the block center, and the origin of the destination points to
1513 : // the block center added to the motion vector transmitted.
1514 : // Let (xi, yi) denote the source points and (xi', yi') denote destination
1515 : // points after origin shfifting, for i = 0, 1, 2, .... n-1.
1516 : // Then if P = [x0, y0,
1517 : // x1, y1
1518 : // x2, y1,
1519 : // ....
1520 : // ]
1521 : // q = [x0', x1', x2', ... ]'
1522 : // r = [y0', y1', y2', ... ]'
1523 : // the least squares problems that need to be solved are:
1524 : // [h1, h2]' = inv(P'P)P'q and
1525 : // [h3, h4]' = inv(P'P)P'r
1526 : // where the affine transformation is given by:
1527 : // x' = h1.x + h2.y
1528 : // y' = h3.x + h4.y
1529 : //
1530 : // The loop below computes: A = P'P, Bx = P'q, By = P'r
1531 : // We need to just compute inv(A).Bx and inv(A).By for the solutions.
1532 : int sx, sy, dx, dy;
1533 : // Contribution from neighbor block
1534 0 : for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {
1535 0 : dx = pts2[i * 2] - dux;
1536 0 : dy = pts2[i * 2 + 1] - duy;
1537 0 : sx = pts1[i * 2] - sux;
1538 0 : sy = pts1[i * 2 + 1] - suy;
1539 0 : if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
1540 0 : A[0][0] += LS_SQUARE(sx);
1541 0 : A[0][1] += LS_PRODUCT1(sx, sy);
1542 0 : A[1][1] += LS_SQUARE(sy);
1543 0 : Bx[0] += LS_PRODUCT2(sx, dx);
1544 0 : Bx[1] += LS_PRODUCT1(sy, dx);
1545 0 : By[0] += LS_PRODUCT1(sx, dy);
1546 0 : By[1] += LS_PRODUCT2(sy, dy);
1547 0 : n++;
1548 : }
1549 : }
1550 : int downshift;
1551 0 : if (n >= 4)
1552 0 : downshift = LS_MAT_DOWN_BITS;
1553 0 : else if (n >= 2)
1554 0 : downshift = LS_MAT_DOWN_BITS - 1;
1555 : else
1556 0 : downshift = LS_MAT_DOWN_BITS - 2;
1557 :
1558 : // Reduce precision by downshift bits
1559 0 : A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,
1560 : LS_MAT_MAX);
1561 0 : A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,
1562 : LS_MAT_MAX);
1563 0 : A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,
1564 : LS_MAT_MAX);
1565 0 : Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,
1566 : LS_MAT_MAX);
1567 0 : Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,
1568 : LS_MAT_MAX);
1569 0 : By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,
1570 : LS_MAT_MAX);
1571 0 : By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,
1572 : LS_MAT_MAX);
1573 :
1574 : int64_t Px[2], Py[2], Det;
1575 : int16_t iDet, shift;
1576 :
1577 : // These divided by the Det, are the least squares solutions
1578 0 : Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
1579 0 : Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
1580 0 : Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
1581 0 : Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
1582 :
1583 : // Compute Determinant of A
1584 0 : Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
1585 0 : if (Det == 0) return 1;
1586 0 : iDet = resolve_divisor_64(llabs(Det), &shift) * (Det < 0 ? -1 : 1);
1587 0 : shift -= WARPEDMODEL_PREC_BITS;
1588 0 : if (shift < 0) {
1589 0 : iDet <<= (-shift);
1590 0 : shift = 0;
1591 : }
1592 :
1593 : int64_t v;
1594 0 : v = Px[0] * (int64_t)iDet;
1595 0 : wm->wmmat[2] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED_64(v, shift));
1596 0 : v = Px[1] * (int64_t)iDet;
1597 0 : wm->wmmat[3] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED_64(v, shift));
1598 0 : v = ((int64_t)dux * (1 << WARPEDMODEL_PREC_BITS)) -
1599 0 : (int64_t)sux * wm->wmmat[2] - (int64_t)suy * wm->wmmat[3];
1600 0 : wm->wmmat[0] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED(v, 3));
1601 :
1602 0 : v = Py[0] * (int64_t)iDet;
1603 0 : wm->wmmat[4] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED_64(v, shift));
1604 0 : v = Py[1] * (int64_t)iDet;
1605 0 : wm->wmmat[5] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED_64(v, shift));
1606 0 : v = ((int64_t)duy * (1 << WARPEDMODEL_PREC_BITS)) -
1607 0 : (int64_t)sux * wm->wmmat[4] - (int64_t)suy * wm->wmmat[5];
1608 0 : wm->wmmat[1] = (int32_t)(ROUND_POWER_OF_TWO_SIGNED(v, 3));
1609 :
1610 0 : wm->wmmat[6] = wm->wmmat[7] = 0;
1611 :
1612 : // Clamp values
1613 0 : wm->wmmat[0] = clamp(wm->wmmat[0], -WARPEDMODEL_TRANS_CLAMP,
1614 : WARPEDMODEL_TRANS_CLAMP - 1);
1615 0 : wm->wmmat[1] = clamp(wm->wmmat[1], -WARPEDMODEL_TRANS_CLAMP,
1616 : WARPEDMODEL_TRANS_CLAMP - 1);
1617 0 : wm->wmmat[2] = clamp(wm->wmmat[2], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1618 : WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1619 0 : wm->wmmat[5] = clamp(wm->wmmat[5], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1620 : WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1621 0 : wm->wmmat[3] = clamp(wm->wmmat[3], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1622 : WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
1623 0 : wm->wmmat[4] = clamp(wm->wmmat[4], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1624 : WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
1625 0 : return 0;
1626 : }
1627 :
1628 0 : int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
1629 : int mvx, WarpedMotionParams *wm_params, int mi_row,
1630 : int mi_col) {
1631 0 : int result = 1;
1632 0 : switch (wm_params->wmtype) {
1633 : case AFFINE:
1634 0 : result = find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params,
1635 : mi_row, mi_col);
1636 0 : break;
1637 0 : default: assert(0 && "Invalid warped motion type!"); return 1;
1638 : }
1639 0 : if (result == 0) {
1640 0 : if (wm_params->wmtype == ROTZOOM) {
1641 0 : wm_params->wmmat[5] = wm_params->wmmat[2];
1642 0 : wm_params->wmmat[4] = -wm_params->wmmat[3];
1643 : }
1644 0 : if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
1645 : // check compatibility with the fast warp filter
1646 0 : if (!get_shear_params(wm_params)) return 1;
1647 : }
1648 : }
1649 :
1650 0 : return result;
1651 : }
1652 : #endif // CONFIG_WARPED_MOTION
|