Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 :
14 : #include "aom/aom_integer.h"
15 :
16 : #include "aom_ports/mem.h"
17 :
18 : #include "aom_dsp/aom_dsp_common.h"
19 :
20 : #include "av1/common/reconinter.h"
21 :
22 : #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
23 :
24 : /**
25 : * Computes SSE of a compound predictor constructed from 2 fundamental
26 : * predictors p0 and p1 using blending with mask.
27 : *
28 : * r1: Residuals of p1.
29 : * (source - p1)
30 : * d: Difference of p1 and p0.
31 : * (p1 - p0)
32 : * m: The blending mask
33 : * N: Number of pixels
34 : *
35 : * 'r1', 'd', and 'm' are contiguous.
36 : *
37 : * Computes:
38 : * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
39 : * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
40 : * where r0 is (source - p0), and r1 is (source - p1), which is in turn
41 : * is equivalent to:
42 : * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
43 : * which is the SSE of the residuals of the compound predictor scaled up by
44 : * MAX_MASK_VALUE**2.
45 : *
46 : * Note that we clamp the partial term in the loop to 16 bits signed. This is
47 : * to facilitate equivalent SIMD implementation. It should have no effect if
48 : * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
49 : * holds for 8 bit input, and on real input, it should hold practically always,
50 : * as residuals are expected to be small.
51 : */
52 0 : uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
53 : const uint8_t *m, int N) {
54 0 : uint64_t csse = 0;
55 : int i;
56 :
57 0 : for (i = 0; i < N; i++) {
58 0 : int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
59 0 : t = clamp(t, INT16_MIN, INT16_MAX);
60 0 : csse += t * t;
61 : }
62 0 : return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
63 : }
64 :
65 : /**
66 : * Choose the mask sign for a compound predictor.
67 : *
68 : * ds: Difference of the squares of the residuals.
69 : * r0**2 - r1**2
70 : * m: The blending mask
71 : * N: Number of pixels
72 : * limit: Pre-computed threshold value.
73 : * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
74 : *
75 : * 'ds' and 'm' are contiguous.
76 : *
77 : * Returns true if the negated mask has lower SSE compared to the positive
78 : * mask. Computation is based on:
79 : * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
80 : * >
81 : * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
82 : *
83 : * which can be simplified to:
84 : *
85 : * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
86 : *
87 : * The right hand side does not depend on the mask, and needs to be passed as
88 : * the 'limit' parameter.
89 : *
90 : * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
91 : * hand side is simply a scalar product between an int16_t and uint8_t vector.
92 : *
93 : * Note that for efficiency, ds is stored on 16 bits. Real input residuals
94 : * being small, this should not cause a noticeable issue.
95 : */
96 0 : int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
97 : int64_t limit) {
98 0 : int64_t acc = 0;
99 :
100 : do {
101 0 : acc += *ds++ * *m++;
102 0 : } while (--N);
103 :
104 0 : return acc > limit;
105 : }
106 :
107 : /**
108 : * Compute the element-wise difference of the squares of 2 arrays.
109 : *
110 : * d: Difference of the squares of the inputs: a**2 - b**2
111 : * a: First input array
112 : * b: Second input array
113 : * N: Number of elements
114 : *
115 : * 'd', 'a', and 'b' are contiguous.
116 : *
117 : * The result is saturated to signed 16 bits.
118 : */
119 0 : void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
120 : const int16_t *b, int N) {
121 : int i;
122 :
123 0 : for (i = 0; i < N; i++)
124 0 : d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
125 0 : }
|