32bpp_ssse3.hpp

Go to the documentation of this file.
00001 /* $Id: 32bpp_ssse3.hpp 26214 2014-01-02 23:52:13Z rubidium $ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #ifndef BLITTER_32BPP_SSSE3_HPP
00013 #define BLITTER_32BPP_SSSE3_HPP
00014 
00015 #ifdef WITH_SSE
00016 
00017 #include "32bpp_sse2.hpp"
00018 #include "tmmintrin.h"
00019 
00020 /* Alpha blend 2 pixels. */
00021 #undef ALPHA_BLEND_2
00022 #define ALPHA_BLEND_2(m_pack_mask) { \
00023   __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); /* PUNPCKLBW, expand each uint8 into uint16 */ \
00024   __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \
00025   \
00026   __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128());   /* PCMPGTW, if (alpha > 0) a++; */ \
00027   alphaAB = _mm_srli_epi16(alphaAB, 15); \
00028   alphaAB = _mm_add_epi16(alphaAB, srcAB); \
00029   alphaAB = _mm_shuffle_epi8(alphaAB, a_cm);        /* PSHUFB, put alpha in front of each rgb */ \
00030   \
00031   srcAB = _mm_sub_epi16(srcAB, dstAB);              /* PSUBW,    (r - Cr) */ \
00032   srcAB = _mm_mullo_epi16(srcAB, alphaAB);          /* PMULLW, a*(r - Cr) */ \
00033   srcAB = _mm_srli_epi16(srcAB, 8);                 /* PSRLW,  a*(r - Cr)/256 */ \
00034   srcAB = _mm_add_epi16(srcAB, dstAB);              /* PADDW,  a*(r - Cr)/256 + Cr */ \
00035   srcABCD = _mm_shuffle_epi8(srcAB, m_pack_mask);   /* PSHUFB, pack 2 Colour (without saturation) */ \
00036 }
00037 
00038 /* Adjust brightness of 2 pixels. */
00039 #define ADJUST_BRIGHTNESS_2(colourX2, brightnessX2) \
00040   /* The following dataflow differs from the one of AdjustBrightness() only for alpha.
00041    * In order to keep alpha in colAB, insert a 1 in a unused brightness byte (a*1->a).
00042    * OK, not a 1 but DEFAULT_BRIGHTNESS to compensate the div.
00043    */ \
00044   brightnessX2 &= 0xFF00FF00; \
00045   brightnessX2 += DEFAULT_BRIGHTNESS; \
00046   \
00047   __m128i zero = _mm_setzero_si128(); \
00048   __m128i colAB = _mm_unpacklo_epi8(colourX2, zero); \
00049   \
00050   __m128i briAB; \
00051   INSR64(brightnessX2, briAB, 0); \
00052   briAB = _mm_shuffle_epi8(briAB, briAB_cm); /* DEFAULT_BRIGHTNESS in 0, 0x00 in 2. */ \
00053   colAB = _mm_mullo_epi16(colAB, briAB); \
00054   __m128i colAB_ob = _mm_srli_epi16(colAB, 8+7); \
00055   colAB = _mm_srli_epi16(colAB, 7); \
00056   \
00057   /* Sum overbright.
00058    * Maximum for each rgb is 508 => 9 bits. The highest bit tells if there is overbright.
00059    * -255 is changed in -256 so we just have to take the 8 lower bits into account.
00060    */ \
00061   colAB = _mm_and_si128(colAB, div_cleaner); \
00062   colAB_ob = _mm_and_si128(colAB_ob, ob_check); \
00063   colAB_ob = _mm_mullo_epi16(colAB_ob, ob_mask); \
00064   colAB_ob = _mm_and_si128(colAB_ob, colAB); \
00065   __m128i obAB = _mm_hadd_epi16(_mm_hadd_epi16(colAB_ob, zero), zero); \
00066   \
00067   obAB = _mm_srli_epi16(obAB, 1);       /* Reduce overbright strength. */ \
00068   obAB = _mm_shuffle_epi8(obAB, ob_cm); \
00069   __m128i retAB = ob_mask;              /* ob_mask is equal to white. */ \
00070   retAB = _mm_subs_epu16(retAB, colAB); /*    (255 - rgb) */ \
00071   retAB = _mm_mullo_epi16(retAB, obAB); /* ob*(255 - rgb) */ \
00072   retAB = _mm_srli_epi16(retAB, 8);     /* ob*(255 - rgb)/256 */ \
00073   retAB = _mm_add_epi16(retAB, colAB);  /* ob*(255 - rgb)/256 + rgb */ \
00074   \
00075   colourX2 = _mm_packus_epi16(retAB, retAB);
00076 
00078 class Blitter_32bppSSSE3 : public Blitter_32bppSSE2 {
00079 public:
00080   /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom);
00081   template <BlitterMode mode, Blitter_32bppSSE_Base::ReadMode read_mode, Blitter_32bppSSE_Base::BlockType bt_last>
00082   void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
00083   /* virtual */ const char *GetName() { return "32bpp-ssse3"; }
00084 };
00085 
00087 class FBlitter_32bppSSSE3: public BlitterFactory {
00088 public:
00089   FBlitter_32bppSSSE3() : BlitterFactory("32bpp-ssse3", "32bpp SSSE3 Blitter (no palette animation)", HasCPUIDFlag(1, 2, 9)) {}
00090   /* virtual */ Blitter *CreateInstance() { return new Blitter_32bppSSSE3(); }
00091 };
00092 
00093 #endif /* WITH_SSE */
00094 #endif /* BLITTER_32BPP_SSSE3_HPP */