mirror of https://github.com/OpenTTD/OpenTTD
(svn r26255) -Codechange: improve performance of brightness adjustment (MJP)
parent
2f7c4f6d12
commit
70901e04c5
|
@ -294,7 +294,7 @@ inline Colour Blitter_32bppSSE2::AdjustBrightness(Colour colour, uint8 brightnes
|
||||||
}
|
}
|
||||||
|
|
||||||
IGNORE_UNINITIALIZED_WARNING_START
|
IGNORE_UNINITIALIZED_WARNING_START
|
||||||
/* static */ Colour Blitter_32bppSSE2::ReallyAdjustBrightness(Colour colour, uint8 brightness)
|
Colour Blitter_32bppSSE2::ReallyAdjustBrightness(Colour colour, uint8 brightness)
|
||||||
{
|
{
|
||||||
uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
|
uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
|
||||||
c16 *= brightness;
|
c16 *= brightness;
|
||||||
|
@ -304,24 +304,14 @@ IGNORE_UNINITIALIZED_WARNING_START
|
||||||
|
|
||||||
/* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */
|
/* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */
|
||||||
c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
|
c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
|
||||||
uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32);
|
const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2;
|
||||||
|
|
||||||
const uint32 alpha32 = colour.data & 0xFF000000;
|
const uint32 alpha32 = colour.data & 0xFF000000;
|
||||||
__m128i ret;
|
__m128i ret;
|
||||||
#ifdef _SQ64
|
LOAD64(c16, ret);
|
||||||
ret = _mm_cvtsi64_si128(c16);
|
|
||||||
#else
|
|
||||||
INSR64(c16, ret, 0);
|
|
||||||
#endif
|
|
||||||
if (ob != 0) {
|
if (ob != 0) {
|
||||||
/* Reduce overbright strength. */
|
__m128i ob128 = _mm_cvtsi32_si128(ob);
|
||||||
ob /= 2;
|
ob128 = _mm_shufflelo_epi16(ob128, 0xC0);
|
||||||
__m128i ob128;
|
|
||||||
#ifdef _SQ64
|
|
||||||
ob128 = _mm_cvtsi64_si128(ob | ob << 16 | ob << 32);
|
|
||||||
#else
|
|
||||||
INSR64(ob | ob << 16 | ob << 32, ob128, 0);
|
|
||||||
#endif
|
|
||||||
__m128i white = OVERBRIGHT_VALUE_MASK;
|
__m128i white = OVERBRIGHT_VALUE_MASK;
|
||||||
__m128i c128 = ret;
|
__m128i c128 = ret;
|
||||||
ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */
|
ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */
|
||||||
|
|
|
@ -54,6 +54,12 @@ typedef union ALIGN(16) um128i {
|
||||||
}
|
}
|
||||||
#define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i_u64[m_rank] = (m_val)
|
#define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i_u64[m_rank] = (m_val)
|
||||||
|
|
||||||
|
#ifdef _SQ64
|
||||||
|
#define LOAD64(m_val, m_into) m_into = _mm_cvtsi64_si128(m_val);
|
||||||
|
#else
|
||||||
|
#define LOAD64(m_val, m_into) INSR64(m_val, m_into, 0)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* PUT_ALPHA_IN_FRONT_OF_RGB is redefined in 32bpp_ssse3.hpp. */
|
/* PUT_ALPHA_IN_FRONT_OF_RGB is redefined in 32bpp_ssse3.hpp. */
|
||||||
#define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) \
|
#define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) \
|
||||||
m_into = _mm_shufflelo_epi16(m_from, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \
|
m_into = _mm_shufflelo_epi16(m_from, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \
|
||||||
|
|
|
@ -232,7 +232,7 @@ inline Colour Blitter_32bppSSE4::AdjustBrightness(Colour colour, uint8 brightnes
|
||||||
}
|
}
|
||||||
|
|
||||||
IGNORE_UNINITIALIZED_WARNING_START
|
IGNORE_UNINITIALIZED_WARNING_START
|
||||||
/* static */ Colour Blitter_32bppSSE4::ReallyAdjustBrightness(Colour colour, uint8 brightness)
|
Colour Blitter_32bppSSE4::ReallyAdjustBrightness(Colour colour, uint8 brightness)
|
||||||
{
|
{
|
||||||
uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
|
uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
|
||||||
c16 *= brightness;
|
c16 *= brightness;
|
||||||
|
@ -242,16 +242,14 @@ IGNORE_UNINITIALIZED_WARNING_START
|
||||||
|
|
||||||
/* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */
|
/* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */
|
||||||
c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
|
c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
|
||||||
uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32);
|
const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2;
|
||||||
|
|
||||||
const uint32 alpha32 = colour.data & 0xFF000000;
|
const uint32 alpha32 = colour.data & 0xFF000000;
|
||||||
__m128i ret;
|
__m128i ret;
|
||||||
INSR64(c16, ret, 0);
|
LOAD64(c16, ret);
|
||||||
if (ob != 0) {
|
if (ob != 0) {
|
||||||
/* Reduce overbright strength. */
|
__m128i ob128 = _mm_cvtsi32_si128(ob);
|
||||||
ob /= 2;
|
ob128 = _mm_shufflelo_epi16(ob128, 0xC0);
|
||||||
__m128i ob128;
|
|
||||||
INSR64(ob | ob << 16 | ob << 32, ob128, 0);
|
|
||||||
__m128i white = OVERBRIGHT_VALUE_MASK;
|
__m128i white = OVERBRIGHT_VALUE_MASK;
|
||||||
__m128i c128 = ret;
|
__m128i c128 = ret;
|
||||||
ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */
|
ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */
|
||||||
|
|
|
@ -35,6 +35,11 @@ IGNORE_UNINITIALIZED_WARNING_START
|
||||||
(*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.low, (m_rank)*2); \
|
(*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.low, (m_rank)*2); \
|
||||||
(*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.high, (m_rank)*2 + 1); \
|
(*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.high, (m_rank)*2 + 1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef LOAD64
|
||||||
|
#define LOAD64(m_val, m_into) \
|
||||||
|
m_into = _mm_cvtsi32_si128(m_val); \
|
||||||
|
INSR32((m_val) >> 32, m_into, 1);
|
||||||
#endif
|
#endif
|
||||||
IGNORE_UNINITIALIZED_WARNING_STOP
|
IGNORE_UNINITIALIZED_WARNING_STOP
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue