19 #include "convert_srgb.h" 22 #if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) 24 #include <xmmintrin.h> 25 #include <emmintrin.h> 27 static INLINE __m128i _encode_sRGB_sse2_mul255(__m128 val) {
38 val = _mm_max_ps(val, _mm_set1_ps(0.0f));
39 val = _mm_min_ps(val, _mm_set1_ps(1.0f));
42 __m128 xf = _mm_mul_ps(val, _mm_set1_ps(6.3307e18f));
45 xf = _mm_cvtepi32_ps(_mm_castps_si128(xf));
48 xf = _mm_mul_ps(xf, _mm_set1_ps(2.0f / 3.0f));
51 xf = _mm_castsi128_ps(_mm_cvtps_epi32(xf));
54 __m128 xover = _mm_mul_ps(val, xf);
55 __m128 xunder = _mm_mul_ps(_mm_mul_ps(val, val),
59 __m128 xavg = _mm_mul_ps(_mm_add_ps(xover, xunder),
60 _mm_set1_ps(0.5286098f));
64 xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
65 xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
70 xavg = _mm_mul_ps(xavg, _mm_set1_ps(269.122f));
71 xavg = _mm_sub_ps(xavg, _mm_set1_ps(13.55f));
76 __m128 lval = _mm_mul_ps(val,
77 _mm_set_ps(255.0f, 3294.6f, 3294.6f, 3294.6f));
79 lval = _mm_add_ps(lval, _mm_set1_ps(0.5f));
84 __m128 mask = _mm_cmpge_ps(val,
85 _mm_set_ps(2.0f, 0.0031308f, 0.0031308f, 0.0031308f));
88 return _mm_cvttps_epi32(_mm_or_ps(
89 _mm_and_ps(mask, xavg),
90 _mm_andnot_ps(mask, lval)));
94 encode_sRGB_uchar_sse2(
float val) {
97 return (
unsigned char)
98 _mm_extract_epi16(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0);
102 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into) {
104 __m128 vec = _mm_load_ps(color.
get_data());
106 __m128 vec = _mm_loadu_ps(color.
get_data());
109 __m128i vals = _encode_sRGB_sse2_mul255(vec);
110 into.r = _mm_extract_epi16(vals, 0);
111 into.g = _mm_extract_epi16(vals, 2);
112 into.b = _mm_extract_epi16(vals, 4);
116 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into, xelval &into_alpha) {
118 __m128 vec = _mm_load_ps(color.
get_data());
120 __m128 vec = _mm_loadu_ps(color.
get_data());
123 __m128i vals = _encode_sRGB_sse2_mul255(vec);
124 into.r = _mm_extract_epi16(vals, 0);
125 into.g = _mm_extract_epi16(vals, 2);
126 into.b = _mm_extract_epi16(vals, 4);
127 into_alpha = _mm_extract_epi16(vals, 6);
134 #warning convert_srgb_sse2.cxx is being compiled without SSE2 support! 137 encode_sRGB_uchar_sse2(
float val) {
138 return encode_sRGB_uchar(val);
142 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into) {
143 encode_sRGB_uchar(color, into);
147 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into, xelval &into_alpha) {
148 encode_sRGB_uchar(color, into, into_alpha);
const float * get_data() const
Returns the address of the first of the four data elements in the vector.
This is the base class for all three-component vectors and points.