Line data Source code
1 : /*
2 : * Copyright 2009 The Android Open Source Project
3 : *
4 : * Use of this source code is governed by a BSD-style license that can be
5 : * found in the LICENSE file.
6 : */
7 :
8 : #include "SkBitmapProcState_opts_SSE2.h"
9 : #include "SkBitmapProcState_opts_SSSE3.h"
10 : #include "SkBitmapScaler.h"
11 : #include "SkBlitMask.h"
12 : #include "SkBlitRow.h"
13 : #include "SkBlitRow_opts_SSE2.h"
14 : #include "SkCpu.h"
15 :
16 :
17 : /*
18 : *****************************************
19 : *********This file is deprecated*********
20 : *****************************************
21 : * New CPU-specific work should be done in
22 : * SkOpts framework. Run-time detection of
23 : * available instruction set extensions is
24 : * implemented in src/core/SkOpts.cpp file
25 : *****************************************
26 : */
27 :
28 :
29 : /* This file must *not* be compiled with -msse or any other optional SIMD
30 : extension, otherwise gcc may generate SIMD instructions even for scalar ops
31 : (and thus give an invalid instruction on Pentium3 on the code below).
32 : For example, only files named *_SSE2.cpp in this directory should be
33 : compiled with -msse2 or higher. */
34 :
35 : ////////////////////////////////////////////////////////////////////////////////
36 :
37 141 : void SkBitmapProcState::platformProcs() {
38 : /* Every optimization in the function requires at least SSE2 */
39 141 : if (!SkCpu::Supports(SkCpu::SSE2)) {
40 0 : return;
41 : }
42 141 : const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3);
43 :
44 : /* Check fSampleProc32 */
45 141 : if (fSampleProc32 == S32_opaque_D32_filter_DX) {
46 42 : if (ssse3) {
47 42 : fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
48 : } else {
49 0 : fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
50 : }
51 99 : } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
52 0 : if (ssse3) {
53 0 : fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
54 : }
55 99 : } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
56 0 : if (ssse3) {
57 0 : fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
58 : } else {
59 0 : fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
60 : }
61 99 : } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
62 0 : if (ssse3) {
63 0 : fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
64 : }
65 : }
66 :
67 : /* Check fMatrixProc */
68 141 : if (fMatrixProc == ClampX_ClampY_filter_scale) {
69 25 : fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
70 116 : } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
71 0 : fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
72 116 : } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
73 0 : fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
74 116 : } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
75 0 : fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
76 : }
77 : }
78 :
79 : ////////////////////////////////////////////////////////////////////////////////
80 :
81 : static const SkBlitRow::Proc16 platform_16_procs[] = {
82 : S32_D565_Opaque_SSE2, // S32_D565_Opaque
83 : nullptr, // S32_D565_Blend
84 : S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
85 : nullptr, // S32A_D565_Blend
86 : S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
87 : nullptr, // S32_D565_Blend_Dither
88 : S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
89 : nullptr, // S32A_D565_Blend_Dither
90 : };
91 :
92 0 : SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
93 0 : if (SkCpu::Supports(SkCpu::SSE2)) {
94 0 : return platform_16_procs[flags];
95 : } else {
96 0 : return nullptr;
97 : }
98 : }
99 :
100 : static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = {
101 : Color32A_D565_SSE2, // Color32A_D565,
102 : nullptr, // Color32A_D565_Dither
103 : };
104 :
105 0 : SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
106 : /* If you're thinking about writing an SSE4 version of this, do check it's
107 : * actually faster on Atom. Our original SSE4 version was slower than this
108 : * SSE2 version on Silvermont, and only marginally faster on a Core i7,
109 : * mainly due to the MULLD timings.
110 : */
111 0 : if (SkCpu::Supports(SkCpu::SSE2)) {
112 0 : return platform_565_colorprocs_SSE2[flags];
113 : } else {
114 0 : return nullptr;
115 : }
116 : }
117 :
118 : static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
119 : nullptr, // S32_Opaque,
120 : S32_Blend_BlitRow32_SSE2, // S32_Blend,
121 : nullptr, // Ported to SkOpts
122 : S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
123 : };
124 :
125 204 : SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
126 204 : if (SkCpu::Supports(SkCpu::SSE2)) {
127 204 : return platform_32_procs_SSE2[flags];
128 : } else {
129 0 : return nullptr;
130 : }
131 : }
132 :
133 : ////////////////////////////////////////////////////////////////////////////////
134 :
135 419 : SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
136 419 : if (SkCpu::Supports(SkCpu::SSE2)) {
137 419 : if (isOpaque) {
138 315 : return SkBlitLCD16OpaqueRow_SSE2;
139 : } else {
140 104 : return SkBlitLCD16Row_SSE2;
141 : }
142 : } else {
143 0 : return nullptr;
144 : }
145 :
146 : }
147 :
148 61 : SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
149 61 : return nullptr;
150 : }
|