32bpp_sse2.hpp

Go to the documentation of this file.
00001 /* $Id: 32bpp_sse2.hpp 26214 2014-01-02 23:52:13Z rubidium $ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #ifndef BLITTER_32BPP_SSE2_HPP
00013 #define BLITTER_32BPP_SSE2_HPP
00014 
00015 #ifdef WITH_SSE
00016 
00017 #include "32bpp_simple.hpp"
00018 #include "emmintrin.h"
00019 
00020 #define META_LENGTH 2 ///< Number of uint32 inserted before each line of pixels in a sprite.
00021 #define MARGIN_NORMAL_THRESHOLD (zoom == ZOOM_LVL_OUT_32X ? 8 : 4) ///< Minimum width to use margins with BM_NORMAL.
00022 #define MARGIN_REMAP_THRESHOLD 4 ///< Minimum width to use margins with BM_COLOUR_REMAP.
00023 
00024 #ifdef _MSC_VER
00025   #define ALIGN(n) __declspec(align(n))
00026 #else
00027   #define ALIGN(n) __attribute__ ((aligned (n)))
00028 #endif
00029 
00030 typedef union ALIGN(16) um128i {
00031   __m128i m128i;
00032   uint8 m128i_u8[16];
00033   uint16 m128i_u16[8];
00034   uint32 m128i_u32[4];
00035   uint64 m128i_u64[2];
00036 } um128i;
00037 
00038 #define CLEAR_HIGH_BYTE_MASK        _mm_setr_epi8(-1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0)
00039 #define ALPHA_CONTROL_MASK          _mm_setr_epi8( 6,  7,  6,  7,  6,  7, -1, -1, 14, 15, 14, 15, 14, 15, -1, -1)
00040 #define PACK_LOW_CONTROL_MASK       _mm_setr_epi8( 0,  2,  4, -1,  8, 10, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1)
00041 #define PACK_HIGH_CONTROL_MASK      _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1,  0,  2,  4, -1,  8, 10, 12, -1)
00042 #define BRIGHTNESS_LOW_CONTROL_MASK _mm_setr_epi8( 1,  2,  1,  2,  1,  2,  0,  2,  3,  2,  3,  2,  3,  2,  0,  2)
00043 #define BRIGHTNESS_DIV_CLEANER      _mm_setr_epi8(-1,  1, -1,  1, -1,  1, -1,  0, -1,  1, -1,  1, -1,  1, -1,  0)
00044 #define OVERBRIGHT_PRESENCE_MASK    _mm_setr_epi8( 1,  0,  1,  0,  1,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0)
00045 #define OVERBRIGHT_VALUE_MASK       _mm_setr_epi8(-1,  0, -1,  0, -1,  0,  0,  0, -1,  0, -1,  0, -1,  0,  0,  0)
00046 #define OVERBRIGHT_CONTROL_MASK     _mm_setr_epi8( 0,  1,  0,  1,  0,  1,  7,  7,  2,  3,  2,  3,  2,  3,  7,  7)
00047 #define TRANSPARENT_NOM_BASE        _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256)
00048 
00049 #define EXTR32(from, rank) (*(um128i*) &from).m128i_u32[rank]
00050 #define EXTR64(from, rank) (*(um128i*) &from).m128i_u64[rank]
00051 #define INSR32(val, into, rank) { \
00052   (*(um128i*) &into).m128i = _mm_insert_epi16((*(um128i*) &into).m128i, val, (rank)*2); \
00053   (*(um128i*) &into).m128i = _mm_insert_epi16((*(um128i*) &into).m128i, (val) >> 16, (rank)*2 + 1); \
00054 }
00055 #define INSR64(val, into, rank) (*(um128i*) &into).m128i_u64[rank] = (val)
00056 
00057 /* Alpha blend 2 pixels. */
00058 #define ALPHA_BLEND_2() { \
00059   __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); /* PUNPCKLBW, expand each uint8 into uint16 */ \
00060   __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \
00061   \
00062   __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128());   /* PCMPGTW, if (alpha > 0) a++; */ \
00063   alphaAB = _mm_srli_epi16(alphaAB, 15); \
00064   alphaAB = _mm_add_epi16(alphaAB, srcAB); \
00065   alphaAB = _mm_shufflelo_epi16(alphaAB, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \
00066   alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F); /* PSHUFHW, put alpha2 in front of each rgb2 */ \
00067   \
00068   srcAB = _mm_sub_epi16(srcAB, dstAB);          /* PSUBW,    (r - Cr) */ \
00069   srcAB = _mm_mullo_epi16(srcAB, alphaAB);      /* PMULLW, a*(r - Cr) */ \
00070   srcAB = _mm_srli_epi16(srcAB, 8);             /* PSRLW,  a*(r - Cr)/256 */ \
00071   srcAB = _mm_add_epi16(srcAB, dstAB);          /* PADDW,  a*(r - Cr)/256 + Cr */ \
00072   srcAB = _mm_and_si128(srcAB, clear_hi);       /* PAND, wipe high bytes to keep low bytes when packing */ \
00073   srcABCD = _mm_packus_epi16(srcAB, srcAB);     /* PACKUSWB, pack 2 colours (with saturation) */ \
00074 }
00075 
00077 class Blitter_32bppSSE_Base {
00078 public:
00079   virtual ~Blitter_32bppSSE_Base() {}
00080 
00081   struct MapValue {
00082     uint8 m;
00083     uint8 v;
00084   };
00085   assert_compile(sizeof(MapValue) == 2);
00086 
00088   enum ReadMode {
00089     RM_WITH_SKIP,   
00090     RM_WITH_MARGIN, 
00091     RM_NONE,        
00092   };
00093 
00095   enum BlockType {
00096     BT_EVEN, 
00097     BT_ODD,  
00098     BT_NONE, 
00099   };
00100 
00102   struct SpriteInfo {
00103     uint32 sprite_offset;    
00104     uint32 mv_offset;        
00105     uint16 sprite_line_size; 
00106     uint16 sprite_width;     
00107   };
00108   struct SpriteData {
00109     SpriteInfo infos[ZOOM_LVL_COUNT];
00110     byte data[]; 
00111   };
00112 
00113   Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator);
00114   virtual Colour AdjustBrightness(Colour colour, uint8 brightness) = 0;
00115 };
00116 
00118 class Blitter_32bppSSE2 : public Blitter_32bppSimple, public Blitter_32bppSSE_Base {
00119 public:
00120   virtual Colour AdjustBrightness(Colour colour, uint8 brightness);
00121   static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness);
00122   /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom);
00123   template <BlitterMode mode, Blitter_32bppSSE_Base::ReadMode read_mode, Blitter_32bppSSE_Base::BlockType bt_last>
00124   void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
00125 
00126   /* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator) {
00127     return Blitter_32bppSSE_Base::Encode(sprite, allocator);
00128   }
00129 
00130   /* virtual */ const char *GetName() { return "32bpp-sse2"; }
00131 };
00132 
00134 class FBlitter_32bppSSE2 : public BlitterFactory {
00135 public:
00136   FBlitter_32bppSSE2() : BlitterFactory("32bpp-sse2", "32bpp SSE2 Blitter (no palette animation)", HasCPUIDFlag(1, 3, 26)) {}
00137   /* virtual */ Blitter *CreateInstance() { return new Blitter_32bppSSE2(); }
00138 };
00139 
00140 #endif /* WITH_SSE */
00141 #endif /* BLITTER_32BPP_SSE2_HPP */