Line data Source code
1 : /*
2 : * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/desktop_capture/differ_vector_sse2.h"
12 :
13 : #if defined(_MSC_VER)
14 : #include <intrin.h>
15 : #else
16 : #include <mmintrin.h>
17 : #include <emmintrin.h>
18 : #endif
19 :
20 : namespace webrtc {
21 :
22 0 : extern bool VectorDifference_SSE2_W16(const uint8_t* image1,
23 : const uint8_t* image2) {
24 0 : __m128i acc = _mm_setzero_si128();
25 : __m128i v0;
26 : __m128i v1;
27 : __m128i sad;
28 0 : const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
29 0 : const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
30 0 : v0 = _mm_loadu_si128(i1);
31 0 : v1 = _mm_loadu_si128(i2);
32 0 : sad = _mm_sad_epu8(v0, v1);
33 0 : acc = _mm_adds_epu16(acc, sad);
34 0 : v0 = _mm_loadu_si128(i1 + 1);
35 0 : v1 = _mm_loadu_si128(i2 + 1);
36 0 : sad = _mm_sad_epu8(v0, v1);
37 0 : acc = _mm_adds_epu16(acc, sad);
38 0 : v0 = _mm_loadu_si128(i1 + 2);
39 0 : v1 = _mm_loadu_si128(i2 + 2);
40 0 : sad = _mm_sad_epu8(v0, v1);
41 0 : acc = _mm_adds_epu16(acc, sad);
42 0 : v0 = _mm_loadu_si128(i1 + 3);
43 0 : v1 = _mm_loadu_si128(i2 + 3);
44 0 : sad = _mm_sad_epu8(v0, v1);
45 0 : acc = _mm_adds_epu16(acc, sad);
46 :
47 : // This essential means sad = acc >> 64. We only care about the lower 16
48 : // bits.
49 0 : sad = _mm_shuffle_epi32(acc, 0xEE);
50 0 : sad = _mm_adds_epu16(sad, acc);
51 0 : return _mm_cvtsi128_si32(sad) != 0;
52 : }
53 :
54 0 : extern bool VectorDifference_SSE2_W32(const uint8_t* image1,
55 : const uint8_t* image2) {
56 0 : __m128i acc = _mm_setzero_si128();
57 : __m128i v0;
58 : __m128i v1;
59 : __m128i sad;
60 0 : const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
61 0 : const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
62 0 : v0 = _mm_loadu_si128(i1);
63 0 : v1 = _mm_loadu_si128(i2);
64 0 : sad = _mm_sad_epu8(v0, v1);
65 0 : acc = _mm_adds_epu16(acc, sad);
66 0 : v0 = _mm_loadu_si128(i1 + 1);
67 0 : v1 = _mm_loadu_si128(i2 + 1);
68 0 : sad = _mm_sad_epu8(v0, v1);
69 0 : acc = _mm_adds_epu16(acc, sad);
70 0 : v0 = _mm_loadu_si128(i1 + 2);
71 0 : v1 = _mm_loadu_si128(i2 + 2);
72 0 : sad = _mm_sad_epu8(v0, v1);
73 0 : acc = _mm_adds_epu16(acc, sad);
74 0 : v0 = _mm_loadu_si128(i1 + 3);
75 0 : v1 = _mm_loadu_si128(i2 + 3);
76 0 : sad = _mm_sad_epu8(v0, v1);
77 0 : acc = _mm_adds_epu16(acc, sad);
78 0 : v0 = _mm_loadu_si128(i1 + 4);
79 0 : v1 = _mm_loadu_si128(i2 + 4);
80 0 : sad = _mm_sad_epu8(v0, v1);
81 0 : acc = _mm_adds_epu16(acc, sad);
82 0 : v0 = _mm_loadu_si128(i1 + 5);
83 0 : v1 = _mm_loadu_si128(i2 + 5);
84 0 : sad = _mm_sad_epu8(v0, v1);
85 0 : acc = _mm_adds_epu16(acc, sad);
86 0 : v0 = _mm_loadu_si128(i1 + 6);
87 0 : v1 = _mm_loadu_si128(i2 + 6);
88 0 : sad = _mm_sad_epu8(v0, v1);
89 0 : acc = _mm_adds_epu16(acc, sad);
90 0 : v0 = _mm_loadu_si128(i1 + 7);
91 0 : v1 = _mm_loadu_si128(i2 + 7);
92 0 : sad = _mm_sad_epu8(v0, v1);
93 0 : acc = _mm_adds_epu16(acc, sad);
94 :
95 : // This essential means sad = acc >> 64. We only care about the lower 16
96 : // bits.
97 0 : sad = _mm_shuffle_epi32(acc, 0xEE);
98 0 : sad = _mm_adds_epu16(sad, acc);
99 0 : return _mm_cvtsi128_si32(sad) != 0;
100 : }
101 :
102 : } // namespace webrtc
|