Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 : #include <math.h>
14 :
15 : #include "./aom_dsp_rtcd.h"
16 : #include "aom_dsp/psnr.h"
17 : #include "aom_scale/yv12config.h"
18 :
19 0 : double aom_sse_to_psnr(double samples, double peak, double sse) {
20 0 : if (sse > 0.0) {
21 0 : const double psnr = 10.0 * log10(samples * peak * peak / sse);
22 0 : return psnr > MAX_PSNR ? MAX_PSNR : psnr;
23 : } else {
24 0 : return MAX_PSNR;
25 : }
26 : }
27 :
28 : /* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
29 : * and highbd_8_variance(). It should not.
30 : */
31 0 : static void encoder_variance(const uint8_t *a, int a_stride, const uint8_t *b,
32 : int b_stride, int w, int h, unsigned int *sse,
33 : int *sum) {
34 : int i, j;
35 :
36 0 : *sum = 0;
37 0 : *sse = 0;
38 :
39 0 : for (i = 0; i < h; i++) {
40 0 : for (j = 0; j < w; j++) {
41 0 : const int diff = a[j] - b[j];
42 0 : *sum += diff;
43 0 : *sse += diff * diff;
44 : }
45 :
46 0 : a += a_stride;
47 0 : b += b_stride;
48 : }
49 0 : }
50 :
51 : #if CONFIG_HIGHBITDEPTH
52 0 : static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
53 : const uint8_t *b8, int b_stride, int w,
54 : int h, uint64_t *sse, int64_t *sum) {
55 : int i, j;
56 :
57 0 : uint16_t *a = CONVERT_TO_SHORTPTR(a8);
58 0 : uint16_t *b = CONVERT_TO_SHORTPTR(b8);
59 0 : *sum = 0;
60 0 : *sse = 0;
61 :
62 0 : for (i = 0; i < h; i++) {
63 0 : for (j = 0; j < w; j++) {
64 0 : const int diff = a[j] - b[j];
65 0 : *sum += diff;
66 0 : *sse += diff * diff;
67 : }
68 0 : a += a_stride;
69 0 : b += b_stride;
70 : }
71 0 : }
72 :
73 0 : static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
74 : const uint8_t *b8, int b_stride, int w,
75 : int h, unsigned int *sse, int *sum) {
76 0 : uint64_t sse_long = 0;
77 0 : int64_t sum_long = 0;
78 0 : encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long,
79 : &sum_long);
80 0 : *sse = (unsigned int)sse_long;
81 0 : *sum = (int)sum_long;
82 0 : }
83 : #endif // CONFIG_HIGHBITDEPTH
84 :
85 0 : static int64_t get_sse(const uint8_t *a, int a_stride, const uint8_t *b,
86 : int b_stride, int width, int height) {
87 0 : const int dw = width % 16;
88 0 : const int dh = height % 16;
89 0 : int64_t total_sse = 0;
90 0 : unsigned int sse = 0;
91 0 : int sum = 0;
92 : int x, y;
93 :
94 0 : if (dw > 0) {
95 0 : encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, dw,
96 : height, &sse, &sum);
97 0 : total_sse += sse;
98 : }
99 :
100 0 : if (dh > 0) {
101 0 : encoder_variance(&a[(height - dh) * a_stride], a_stride,
102 0 : &b[(height - dh) * b_stride], b_stride, width - dw, dh,
103 : &sse, &sum);
104 0 : total_sse += sse;
105 : }
106 :
107 0 : for (y = 0; y < height / 16; ++y) {
108 0 : const uint8_t *pa = a;
109 0 : const uint8_t *pb = b;
110 0 : for (x = 0; x < width / 16; ++x) {
111 0 : aom_mse16x16(pa, a_stride, pb, b_stride, &sse);
112 0 : total_sse += sse;
113 :
114 0 : pa += 16;
115 0 : pb += 16;
116 : }
117 :
118 0 : a += 16 * a_stride;
119 0 : b += 16 * b_stride;
120 : }
121 :
122 0 : return total_sse;
123 : }
124 :
125 : #if CONFIG_HIGHBITDEPTH
126 0 : static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
127 : const uint8_t *b8, int b_stride, int width,
128 : int height, unsigned int input_shift) {
129 0 : const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
130 0 : const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
131 0 : int64_t total_sse = 0;
132 : int x, y;
133 0 : for (y = 0; y < height; ++y) {
134 0 : for (x = 0; x < width; ++x) {
135 : int64_t diff;
136 0 : diff = (a[x] >> input_shift) - (b[x] >> input_shift);
137 0 : total_sse += diff * diff;
138 : }
139 0 : a += a_stride;
140 0 : b += b_stride;
141 : }
142 0 : return total_sse;
143 : }
144 :
145 0 : static int64_t highbd_get_sse(const uint8_t *a, int a_stride, const uint8_t *b,
146 : int b_stride, int width, int height) {
147 0 : int64_t total_sse = 0;
148 : int x, y;
149 0 : const int dw = width % 16;
150 0 : const int dh = height % 16;
151 0 : unsigned int sse = 0;
152 0 : int sum = 0;
153 0 : if (dw > 0) {
154 0 : encoder_highbd_8_variance(&a[width - dw], a_stride, &b[width - dw],
155 : b_stride, dw, height, &sse, &sum);
156 0 : total_sse += sse;
157 : }
158 0 : if (dh > 0) {
159 0 : encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
160 0 : &b[(height - dh) * b_stride], b_stride,
161 : width - dw, dh, &sse, &sum);
162 0 : total_sse += sse;
163 : }
164 0 : for (y = 0; y < height / 16; ++y) {
165 0 : const uint8_t *pa = a;
166 0 : const uint8_t *pb = b;
167 0 : for (x = 0; x < width / 16; ++x) {
168 0 : aom_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
169 0 : total_sse += sse;
170 0 : pa += 16;
171 0 : pb += 16;
172 : }
173 0 : a += 16 * a_stride;
174 0 : b += 16 * b_stride;
175 : }
176 0 : return total_sse;
177 : }
178 : #endif // CONFIG_HIGHBITDEPTH
179 :
180 0 : int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
181 : const YV12_BUFFER_CONFIG *b, int hstart, int width,
182 : int vstart, int height) {
183 0 : return get_sse(a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
184 0 : b->y_buffer + vstart * b->y_stride + hstart, b->y_stride,
185 : width, height);
186 : }
187 :
188 0 : int64_t aom_get_y_sse(const YV12_BUFFER_CONFIG *a,
189 : const YV12_BUFFER_CONFIG *b) {
190 0 : assert(a->y_crop_width == b->y_crop_width);
191 0 : assert(a->y_crop_height == b->y_crop_height);
192 :
193 0 : return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
194 : a->y_crop_width, a->y_crop_height);
195 : }
196 :
197 0 : int64_t aom_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
198 : const YV12_BUFFER_CONFIG *b, int hstart, int width,
199 : int vstart, int height) {
200 0 : return get_sse(a->u_buffer + vstart * a->uv_stride + hstart, a->uv_stride,
201 0 : b->u_buffer + vstart * b->uv_stride + hstart, b->uv_stride,
202 : width, height);
203 : }
204 :
205 0 : int64_t aom_get_u_sse(const YV12_BUFFER_CONFIG *a,
206 : const YV12_BUFFER_CONFIG *b) {
207 0 : assert(a->uv_crop_width == b->uv_crop_width);
208 0 : assert(a->uv_crop_height == b->uv_crop_height);
209 :
210 0 : return get_sse(a->u_buffer, a->uv_stride, b->u_buffer, b->uv_stride,
211 : a->uv_crop_width, a->uv_crop_height);
212 : }
213 :
214 0 : int64_t aom_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
215 : const YV12_BUFFER_CONFIG *b, int hstart, int width,
216 : int vstart, int height) {
217 0 : return get_sse(a->v_buffer + vstart * a->uv_stride + hstart, a->uv_stride,
218 0 : b->v_buffer + vstart * b->uv_stride + hstart, b->uv_stride,
219 : width, height);
220 : }
221 :
222 0 : int64_t aom_get_v_sse(const YV12_BUFFER_CONFIG *a,
223 : const YV12_BUFFER_CONFIG *b) {
224 0 : assert(a->uv_crop_width == b->uv_crop_width);
225 0 : assert(a->uv_crop_height == b->uv_crop_height);
226 :
227 0 : return get_sse(a->v_buffer, a->uv_stride, b->v_buffer, b->uv_stride,
228 : a->uv_crop_width, a->uv_crop_height);
229 : }
230 :
231 : #if CONFIG_HIGHBITDEPTH
232 0 : int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
233 : const YV12_BUFFER_CONFIG *b, int hstart,
234 : int width, int vstart, int height) {
235 0 : return highbd_get_sse(
236 0 : a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
237 0 : b->y_buffer + vstart * b->y_stride + hstart, b->y_stride, width, height);
238 : }
239 :
240 0 : int64_t aom_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
241 : const YV12_BUFFER_CONFIG *b) {
242 0 : assert(a->y_crop_width == b->y_crop_width);
243 0 : assert(a->y_crop_height == b->y_crop_height);
244 0 : assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
245 0 : assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
246 :
247 0 : return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
248 : a->y_crop_width, a->y_crop_height);
249 : }
250 :
251 0 : int64_t aom_highbd_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
252 : const YV12_BUFFER_CONFIG *b, int hstart,
253 : int width, int vstart, int height) {
254 0 : return highbd_get_sse(a->u_buffer + vstart * a->uv_stride + hstart,
255 : a->uv_stride,
256 0 : b->u_buffer + vstart * b->uv_stride + hstart,
257 : b->uv_stride, width, height);
258 : }
259 :
260 0 : int64_t aom_highbd_get_u_sse(const YV12_BUFFER_CONFIG *a,
261 : const YV12_BUFFER_CONFIG *b) {
262 0 : assert(a->uv_crop_width == b->uv_crop_width);
263 0 : assert(a->uv_crop_height == b->uv_crop_height);
264 0 : assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
265 0 : assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
266 :
267 0 : return highbd_get_sse(a->u_buffer, a->uv_stride, b->u_buffer, b->uv_stride,
268 : a->uv_crop_width, a->uv_crop_height);
269 : }
270 :
271 0 : int64_t aom_highbd_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
272 : const YV12_BUFFER_CONFIG *b, int hstart,
273 : int width, int vstart, int height) {
274 0 : return highbd_get_sse(a->v_buffer + vstart * a->uv_stride + hstart,
275 : a->uv_stride,
276 0 : b->v_buffer + vstart * b->uv_stride + hstart,
277 : b->uv_stride, width, height);
278 : }
279 :
280 0 : int64_t aom_highbd_get_v_sse(const YV12_BUFFER_CONFIG *a,
281 : const YV12_BUFFER_CONFIG *b) {
282 0 : assert(a->uv_crop_width == b->uv_crop_width);
283 0 : assert(a->uv_crop_height == b->uv_crop_height);
284 0 : assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
285 0 : assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
286 :
287 0 : return highbd_get_sse(a->v_buffer, a->uv_stride, b->v_buffer, b->uv_stride,
288 : a->uv_crop_width, a->uv_crop_height);
289 : }
290 : #endif // CONFIG_HIGHBITDEPTH
291 :
292 : #if CONFIG_HIGHBITDEPTH
293 0 : void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
294 : const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr,
295 : uint32_t bit_depth, uint32_t in_bit_depth) {
296 0 : const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
297 0 : const int heights[3] = { a->y_crop_height, a->uv_crop_height,
298 0 : a->uv_crop_height };
299 0 : const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
300 0 : const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
301 0 : const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
302 0 : const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
303 : int i;
304 0 : uint64_t total_sse = 0;
305 0 : uint32_t total_samples = 0;
306 0 : const double peak = (double)((1 << in_bit_depth) - 1);
307 0 : const unsigned int input_shift = bit_depth - in_bit_depth;
308 :
309 0 : for (i = 0; i < 3; ++i) {
310 0 : const int w = widths[i];
311 0 : const int h = heights[i];
312 0 : const uint32_t samples = w * h;
313 : uint64_t sse;
314 0 : if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
315 0 : if (input_shift) {
316 0 : sse = highbd_get_sse_shift(a_planes[i], a_strides[i], b_planes[i],
317 : b_strides[i], w, h, input_shift);
318 : } else {
319 0 : sse = highbd_get_sse(a_planes[i], a_strides[i], b_planes[i],
320 : b_strides[i], w, h);
321 : }
322 : } else {
323 0 : sse = get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h);
324 : }
325 0 : psnr->sse[1 + i] = sse;
326 0 : psnr->samples[1 + i] = samples;
327 0 : psnr->psnr[1 + i] = aom_sse_to_psnr(samples, peak, (double)sse);
328 :
329 0 : total_sse += sse;
330 0 : total_samples += samples;
331 : }
332 :
333 0 : psnr->sse[0] = total_sse;
334 0 : psnr->samples[0] = total_samples;
335 0 : psnr->psnr[0] =
336 0 : aom_sse_to_psnr((double)total_samples, peak, (double)total_sse);
337 0 : }
338 :
339 : #endif // !CONFIG_HIGHBITDEPTH
340 :
341 0 : void aom_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
342 : PSNR_STATS *psnr) {
343 : static const double peak = 255.0;
344 0 : const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
345 0 : const int heights[3] = { a->y_crop_height, a->uv_crop_height,
346 0 : a->uv_crop_height };
347 0 : const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
348 0 : const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
349 0 : const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
350 0 : const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
351 : int i;
352 0 : uint64_t total_sse = 0;
353 0 : uint32_t total_samples = 0;
354 :
355 0 : for (i = 0; i < 3; ++i) {
356 0 : const int w = widths[i];
357 0 : const int h = heights[i];
358 0 : const uint32_t samples = w * h;
359 0 : const uint64_t sse =
360 0 : get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h);
361 0 : psnr->sse[1 + i] = sse;
362 0 : psnr->samples[1 + i] = samples;
363 0 : psnr->psnr[1 + i] = aom_sse_to_psnr(samples, peak, (double)sse);
364 :
365 0 : total_sse += sse;
366 0 : total_samples += samples;
367 : }
368 :
369 0 : psnr->sse[0] = total_sse;
370 0 : psnr->samples[0] = total_samples;
371 0 : psnr->psnr[0] =
372 0 : aom_sse_to_psnr((double)total_samples, peak, (double)total_sse);
373 0 : }
|