Line data Source code
1 : // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include <emmintrin.h>
6 : #include "yuv_row.h"
7 :
8 : namespace mozilla {
9 : namespace gfx {
10 :
11 : // FilterRows combines two rows of the image using linear interpolation.
12 : // SSE2 version does 16 pixels at a time.
13 0 : void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
14 : int source_width, int source_y_fraction) {
15 0 : __m128i zero = _mm_setzero_si128();
16 0 : __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
17 0 : __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
18 :
19 0 : const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
20 0 : const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
21 0 : __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
22 0 : __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
23 :
24 0 : do {
25 0 : __m128i y0 = _mm_loadu_si128(y0_ptr128);
26 0 : __m128i y1 = _mm_loadu_si128(y1_ptr128);
27 0 : __m128i y2 = _mm_unpackhi_epi8(y0, zero);
28 0 : __m128i y3 = _mm_unpackhi_epi8(y1, zero);
29 0 : y0 = _mm_unpacklo_epi8(y0, zero);
30 0 : y1 = _mm_unpacklo_epi8(y1, zero);
31 0 : y0 = _mm_mullo_epi16(y0, y0_fraction);
32 0 : y1 = _mm_mullo_epi16(y1, y1_fraction);
33 0 : y2 = _mm_mullo_epi16(y2, y0_fraction);
34 0 : y3 = _mm_mullo_epi16(y3, y1_fraction);
35 0 : y0 = _mm_add_epi16(y0, y1);
36 0 : y2 = _mm_add_epi16(y2, y3);
37 0 : y0 = _mm_srli_epi16(y0, 8);
38 0 : y2 = _mm_srli_epi16(y2, 8);
39 0 : y0 = _mm_packus_epi16(y0, y2);
40 0 : *dest128++ = y0;
41 0 : ++y0_ptr128;
42 0 : ++y1_ptr128;
43 0 : } while (dest128 < end128);
44 0 : }
45 :
46 : } // namespace gfx
47 : } // namespace mozilla
|