00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #ifdef WITH_SSE
00013
00014 #include "../stdafx.h"
00015 #include "../zoom_func.h"
00016 #include "../settings_type.h"
00017 #include "32bpp_sse4.hpp"
00018
00020 static FBlitter_32bppSSE4 iFBlitter_32bppSSE4;
00021
00029 IGNORE_UNINITIALIZED_WARNING_START
00030 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last>
00031 inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
00032 {
00033 const byte * const remap = bp->remap;
00034 Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left;
00035 int effective_width = bp->width;
00036
00037
00038 const SpriteData * const sd = (const SpriteData *) bp->sprite;
00039 const SpriteInfo * const si = &sd->infos[zoom];
00040 const MapValue *src_mv_line = (const MapValue *) &sd->data[si->mv_offset] + bp->skip_top * si->sprite_width;
00041 const Colour *src_rgba_line = (const Colour *) ((const byte *) &sd->data[si->sprite_offset] + bp->skip_top * si->sprite_line_size);
00042
00043 if (read_mode != RM_WITH_MARGIN) {
00044 src_rgba_line += bp->skip_left;
00045 src_mv_line += bp->skip_left;
00046 }
00047
00048
00049 const __m128i a_cm = ALPHA_CONTROL_MASK;
00050 const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
00051 const __m128i briAB_cm = BRIGHTNESS_LOW_CONTROL_MASK;
00052 const __m128i div_cleaner = BRIGHTNESS_DIV_CLEANER;
00053 const __m128i ob_check = OVERBRIGHT_PRESENCE_MASK;
00054 const __m128i ob_mask = OVERBRIGHT_VALUE_MASK;
00055 const __m128i ob_cm = OVERBRIGHT_CONTROL_MASK;
00056 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
00057
00058 for (int y = bp->height; y != 0; y--) {
00059 const Colour *src = src_rgba_line + META_LENGTH;
00060 Colour *dst = dst_line;
00061 const MapValue *src_mv = src_mv_line;
00062
00063 switch (mode) {
00064 default: {
00065 switch (read_mode) {
00066 case RM_WITH_MARGIN: {
00067 src += src_rgba_line[0].data;
00068 dst += src_rgba_line[0].data;
00069 const int width_diff = si->sprite_width - bp->width;
00070 effective_width = bp->width - (int) src_rgba_line[0].data;
00071 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00072 const int new_width = effective_width - (delta_diff & ~1);
00073 effective_width = delta_diff > 0 ? new_width : effective_width;
00074 if (effective_width <= 0) break;
00075
00076 }
00077
00078 case RM_WITH_SKIP: {
00079 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00080 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00081 for (uint x = (uint) effective_width / 2; x > 0; x--) {
00082 ALPHA_BLEND_2(pack_low_cm);
00083 srcABCD = _mm_blend_epi16(srcABCD, dstABCD, 0xF0);
00084 Colour *old_dst = dst;
00085 src += 2;
00086 dst += 2;
00087
00088
00089
00090 dstABCD = _mm_loadu_si128((__m128i*) dst);
00091 _mm_storeu_si128((__m128i *) old_dst, srcABCD);
00092 srcABCD = _mm_loadu_si128((const __m128i*) src);
00093 }
00094 if (bt_last == BT_ODD) {
00095 ALPHA_BLEND_2(pack_low_cm);
00096 *dst = (Colour) EXTR32(srcABCD, 0);
00097 }
00098 break;
00099 }
00100
00101 default: NOT_REACHED();
00102 }
00103 break;
00104 }
00105
00106 case BM_COLOUR_REMAP: {
00107 switch (read_mode) {
00108 case RM_WITH_MARGIN: {
00109 src += src_rgba_line[0].data;
00110 src_mv += src_rgba_line[0].data;
00111 dst += src_rgba_line[0].data;
00112 const int width_diff = si->sprite_width - bp->width;
00113 effective_width = bp->width - (int) src_rgba_line[0].data;
00114 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00115 const int nd = effective_width - delta_diff;
00116 effective_width = delta_diff > 0 ? nd : effective_width;
00117 if (effective_width <= 0) break;
00118
00119 }
00120
00121 case RM_WITH_SKIP: {
00122 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00123 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00124 uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00125
00126 for (uint x = (uint) effective_width / 2; x > 0; x--) {
00127
00128 if (mvX2 & 0x00FF00FF) {
00129
00130 const Colour src0 = src[0];
00131 const uint m0 = (byte) mvX2;
00132 const uint r0 = remap[m0];
00133 const Colour c0map = (this->LookupColourInPalette(r0).data & 0x00FFFFFF) | (src0.data & 0xFF000000);
00134 Colour c0 = 0;
00135 c0 = r0 == 0 ? c0 : c0map;
00136 c0 = m0 != 0 ? c0 : src0;
00137 INSR32(c0.data, srcABCD, 0);
00138
00139 const Colour src1 = src[1];
00140 const uint m1 = (byte) (mvX2 >> 16);
00141 const uint r1 = remap[m1];
00142 const Colour c1map = (this->LookupColourInPalette(r1).data & 0x00FFFFFF) | (src1.data & 0xFF000000);
00143 Colour c1 = 0;
00144 c1 = r1 == 0 ? c1 : c1map;
00145 c1 = m1 != 0 ? c1 : src1;
00146 INSR32(c1.data, srcABCD, 1);
00147
00148 if ((mvX2 & 0xFF00FF00) != 0x80008000) {
00149 ADJUST_BRIGHTNESS_2(srcABCD, mvX2);
00150 }
00151 }
00152
00153
00154 ALPHA_BLEND_2(pack_low_cm);
00155 srcABCD = _mm_blend_epi16(srcABCD, dstABCD, 0xF0);
00156 Colour *old_dst = dst;
00157 dst += 2;
00158 src += 2;
00159 src_mv += 2;
00160 dstABCD = _mm_loadu_si128((__m128i*) dst);
00161 _mm_storeu_si128((__m128i *) old_dst, srcABCD);
00162 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00163 srcABCD = _mm_loadu_si128((const __m128i*) src);
00164 }
00165
00166 if (effective_width & 1) {
00167
00168 if ((byte) mvX2 == 0) {
00169 if (src->a < 255) {
00170 ALPHA_BLEND_2(pack_low_cm);
00171 (*dst).data = EXTR32(srcABCD, 0);
00172 } else
00173 *dst = *src;
00174 } else {
00175 const uint r = remap[(byte) mvX2];
00176 if (r != 0) {
00177 Colour remapped_colour = AdjustBrightness(this->LookupColourInPalette(r), (byte) (mvX2 >> 8));
00178 if (src->a == 255) {
00179 *dst = remapped_colour;
00180 } else {
00181 remapped_colour.a = src->a;
00182 INSR32(remapped_colour.data, srcABCD, 0);
00183 ALPHA_BLEND_2(pack_low_cm);
00184 (*dst).data = EXTR32(srcABCD, 0);
00185 }
00186 }
00187 }
00188 }
00189 break;
00190 }
00191
00192 default: NOT_REACHED();
00193 }
00194 src_mv_line += si->sprite_width;
00195 break;
00196 }
00197
00198 case BM_TRANSPARENT: {
00199
00200
00201
00202 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00203 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00204 for (uint x = (uint) bp->width / 2; x > 0; x--) {
00205 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00206 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00207 __m128i dstCD = _mm_unpackhi_epi8(dstABCD, _mm_setzero_si128());
00208 __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
00209 alphaAB = _mm_srli_epi16(alphaAB, 2);
00210 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00211 dstAB = _mm_mullo_epi16(dstAB, nom);
00212 dstAB = _mm_srli_epi16(dstAB, 8);
00213 dstAB = _mm_packus_epi16(dstAB, dstCD);
00214 Colour *old_dst = dst;
00215 src += 2;
00216 dst += 2;
00217 dstABCD = _mm_loadu_si128((__m128i*) dst);
00218 _mm_storeu_si128((__m128i *) old_dst, dstAB);
00219 srcABCD = _mm_loadu_si128((const __m128i*) src);
00220 }
00221 if (bp->width & 1) {
00222 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00223 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00224 __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
00225 alphaAB = _mm_srli_epi16(alphaAB, 2);
00226 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00227 dstAB = _mm_mullo_epi16(dstAB, nom);
00228 dstAB = _mm_srli_epi16(dstAB, 8);
00229 dstAB = _mm_packus_epi16(dstAB, dstAB);
00230 (*dst).data = EXTR32(dstAB, 0);
00231 }
00232
00233 break;
00234 }
00235 }
00236
00237 src_rgba_line = (const Colour*) ((const byte*) src_rgba_line + si->sprite_line_size);
00238 dst_line += bp->pitch;
00239 }
00240 }
00241 IGNORE_UNINITIALIZED_WARNING_STOP
00242
00250 void Blitter_32bppSSE4::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
00251 {
00252 const BlockType bt_last = (BlockType) (bp->width & 1);
00253 switch (mode) {
00254 case BM_NORMAL: {
00255 if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) {
00256 switch (bt_last) {
00257 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN>(bp, zoom); return;
00258 case BT_ODD: Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD>(bp, zoom); return;
00259 default: NOT_REACHED();
00260 }
00261 } else {
00262 switch (bt_last) {
00263 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_EVEN>(bp, zoom); return;
00264 case BT_ODD: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_ODD>(bp, zoom); return;
00265 default: NOT_REACHED();
00266 }
00267 }
00268 break;
00269 }
00270 case BM_COLOUR_REMAP:
00271 if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) {
00272 Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE>(bp, zoom); return;
00273 } else {
00274 Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE>(bp, zoom); return;
00275 }
00276 case BM_TRANSPARENT: Draw<BM_TRANSPARENT, RM_NONE, BT_NONE>(bp, zoom); return;
00277 default: NOT_REACHED();
00278 }
00279 }
00280
00282 inline Colour Blitter_32bppSSE4::AdjustBrightness(Colour colour, uint8 brightness)
00283 {
00284
00285 if (brightness == DEFAULT_BRIGHTNESS) return colour;
00286
00287 return Blitter_32bppSSE4::ReallyAdjustBrightness(colour, brightness);
00288 }
00289
00290 IGNORE_UNINITIALIZED_WARNING_START
00291 Colour Blitter_32bppSSE4::ReallyAdjustBrightness(Colour colour, uint8 brightness)
00292 {
00293 ALIGN(16) uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
00294 c16 *= brightness;
00295 uint64 c16_ob = c16;
00296 c16 /= DEFAULT_BRIGHTNESS;
00297 c16 &= 0x01FF01FF01FF;
00298
00299
00300 c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
00301 uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32);
00302
00303 const uint32 alpha32 = colour.data & 0xFF000000;
00304 __m128i ret;
00305 INSR64(c16, ret, 0);
00306 if (ob != 0) {
00307
00308 ob /= 2;
00309 __m128i ob128;
00310 INSR64(ob | ob << 16 | ob << 32, ob128, 0);
00311 __m128i white = OVERBRIGHT_VALUE_MASK;
00312 __m128i c128 = ret;
00313 ret = _mm_subs_epu16(white, c128);
00314 ret = _mm_mullo_epi16(ret, ob128);
00315 ret = _mm_srli_epi16(ret, 8);
00316 ret = _mm_add_epi16(ret, c128);
00317 }
00318
00319 ret = _mm_packus_epi16(ret, ret);
00320 return alpha32 | EXTR32(ret, 0);
00321 }
00322 IGNORE_UNINITIALIZED_WARNING_STOP
00323
00324 #endif