00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #ifdef WITH_SSE
00013
00014 #include "../stdafx.h"
00015 #include "../video/video_driver.hpp"
00016 #include "../table/sprites.h"
00017 #include "32bpp_anim_sse4.hpp"
00018
00020 static FBlitter_32bppSSE4_Anim iFBlitter_32bppSSE4_Anim;
00021
00029 IGNORE_UNINITIALIZED_WARNING_START
00030 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last>
00031 inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
00032 {
00033 Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left;
00034 uint16 *anim_line = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_width + bp->left;
00035 int effective_width = bp->width;
00036
00037
00038 const byte * const remap = bp->remap;
00039 const Blitter_32bppSSE_Base::SpriteData * const sd = (const Blitter_32bppSSE_Base::SpriteData *) bp->sprite;
00040 const SpriteInfo * const si = &sd->infos[zoom];
00041 const MapValue *src_mv_line = (const MapValue *) &sd->data[si->mv_offset] + bp->skip_top * si->sprite_width;
00042 const Colour *src_rgba_line = (const Colour *) ((const byte *) &sd->data[si->sprite_offset] + bp->skip_top * si->sprite_line_size);
00043
00044 if (read_mode != RM_WITH_MARGIN) {
00045 src_rgba_line += bp->skip_left;
00046 src_mv_line += bp->skip_left;
00047 }
00048
00049
00050 const __m128i a_cm = ALPHA_CONTROL_MASK;
00051 const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
00052 const __m128i briAB_cm = BRIGHTNESS_LOW_CONTROL_MASK;
00053 const __m128i div_cleaner = BRIGHTNESS_DIV_CLEANER;
00054 const __m128i ob_check = OVERBRIGHT_PRESENCE_MASK;
00055 const __m128i ob_mask = OVERBRIGHT_VALUE_MASK;
00056 const __m128i ob_cm = OVERBRIGHT_CONTROL_MASK;
00057 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
00058
00059 for (int y = bp->height; y != 0; y--) {
00060 Colour *dst = dst_line;
00061 const Colour *src = src_rgba_line + META_LENGTH;
00062 const MapValue *src_mv = src_mv_line;
00063 uint16 *anim = anim_line;
00064
00065 switch (mode) {
00066 default: {
00067 switch (read_mode) {
00068 case RM_WITH_MARGIN: {
00069 src += src_rgba_line[0].data;
00070 dst += src_rgba_line[0].data;
00071 src_mv += src_rgba_line[0].data;
00072 anim += src_rgba_line[0].data;
00073 const int width_diff = si->sprite_width - bp->width;
00074 effective_width = bp->width - (int) src_rgba_line[0].data;
00075 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00076 const int new_width = effective_width - (delta_diff & ~1);
00077 effective_width = delta_diff > 0 ? new_width : effective_width;
00078 if (effective_width <= 0) break;
00079
00080 }
00081
00082 case RM_WITH_SKIP: {
00083 uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00084 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00085 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00086 for (uint x = (uint) effective_width/2; x != 0; x--) {
00087
00088 const byte m0 = mvX2;
00089 if (m0 >= PALETTE_ANIM_START) {
00090 const Colour c0 = (this->LookupColourInPalette(m0).data & 0x00FFFFFF) | (src[0].data & 0xFF000000);
00091 INSR32(AdjustBrightness(c0, (byte) (mvX2 >> 8)).data, srcABCD, 0);
00092 }
00093 const byte m1 = mvX2 >> 16;
00094 if (m1 >= PALETTE_ANIM_START) {
00095 const Colour c1 = (this->LookupColourInPalette(m1).data & 0x00FFFFFF) | (src[1].data & 0xFF000000);
00096 INSR32(AdjustBrightness(c1, (byte) (mvX2 >> 24)).data, srcABCD, 1);
00097 }
00098
00099
00100 const byte a0 = src[0].a;
00101 const byte a1 = src[1].a;
00102 uint32 anim01 = 0;
00103 if (a0 == 255) {
00104 if (a1 == 255) {
00105 *(uint32*) anim = mvX2;
00106 goto bmno_full_opacity;
00107 }
00108 anim01 = (uint16) mvX2;
00109 } else if (a0 == 0) {
00110 if (a1 == 0) {
00111 goto bmno_full_transparency;
00112 } else {
00113 if (a1 == 255) anim[1] = (uint16) (mvX2 >> 16);
00114 goto bmno_alpha_blend;
00115 }
00116 }
00117 if (a1 > 0) {
00118 if (a1 == 255) anim01 |= mvX2 & 0xFFFF0000;
00119 *(uint32*) anim = anim01;
00120 } else {
00121 anim[0] = (uint16) anim01;
00122 }
00123
00124
00125 bmno_alpha_blend:
00126 ALPHA_BLEND_2(pack_low_cm);
00127 bmno_full_opacity:
00128 srcABCD = _mm_blend_epi16(srcABCD, dstABCD, 0xF0);
00129
00130 src_mv += 2;
00131 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00132 src += 2;
00133 anim += 2;
00134 dstABCD = _mm_loadu_si128((__m128i*) (dst+2));
00135 _mm_storeu_si128((__m128i *) dst, srcABCD);
00136 srcABCD = _mm_loadu_si128((const __m128i*) src);
00137 dst += 2;
00138 continue;
00139
00140 bmno_full_transparency:
00141 src_mv += 2;
00142 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00143 dst += 2;
00144 src += 2;
00145 anim += 2;
00146 dstABCD = _mm_loadu_si128((__m128i*) dst);
00147 srcABCD = _mm_loadu_si128((const __m128i*) src);
00148 }
00149
00150 if (bt_last == BT_ODD) {
00151 if (src->a == 0) {
00152 } else if (src->a == 255) {
00153 *anim = (uint16) mvX2;
00154 *dst = ((byte) mvX2 >= PALETTE_ANIM_START) ? AdjustBrightness(LookupColourInPalette((byte) mvX2), (byte) (mvX2 >> 8)) : *src;
00155 } else {
00156 *anim = 0;
00157 if ((byte) mvX2 >= PALETTE_ANIM_START) {
00158 ALIGN(16) Colour colour = AdjustBrightness(LookupColourInPalette((byte) mvX2), (byte) (mvX2 >> 8));
00159 colour.a = src->a;
00160 srcABCD = _mm_load_si128((__m128i*) &colour);
00161 }
00162 ALPHA_BLEND_2(pack_low_cm);
00163 (*dst).data = EXTR32(srcABCD, 0);
00164 }
00165 }
00166 break;
00167 }
00168
00169 default: NOT_REACHED();
00170 }
00171 break;
00172 }
00173
00174 case BM_COLOUR_REMAP: {
00175 switch (read_mode) {
00176 case RM_WITH_MARGIN: {
00177 src += src_rgba_line[0].data;
00178 src_mv += src_rgba_line[0].data;
00179 dst += src_rgba_line[0].data;
00180 anim += src_rgba_line[0].data;
00181 const int width_diff = si->sprite_width - bp->width;
00182 effective_width = bp->width - (int) src_rgba_line[0].data;
00183 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00184 const int nd = effective_width - delta_diff;
00185 effective_width = delta_diff > 0 ? nd : effective_width;
00186 if (effective_width <= 0) break;
00187
00188 }
00189
00190 case RM_WITH_SKIP: {
00191 uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00192 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00193 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00194
00195 for (uint x = (uint) effective_width / 2; x != 0; x--) {
00196
00197 const uint m0 = (byte) mvX2;
00198 const uint r0 = remap[m0];
00199 const uint m1 = (byte) (mvX2 >> 16);
00200 const uint r1 = remap[m1];
00201 if (mvX2 & 0x00FF00FF) {
00202
00203 const Colour src0 = src[0];
00204 const Colour c0map = (this->LookupColourInPalette(r0).data & 0x00FFFFFF) | (src0.data & 0xFF000000);
00205 Colour c0 = dst[0];
00206 c0 = r0 == 0 ? c0 : c0map;
00207 c0 = m0 != 0 ? c0 : src0;
00208 INSR32(c0.data, srcABCD, 0);
00209
00210 const Colour src1 = src[1];
00211 const Colour c1map = (this->LookupColourInPalette(r1).data & 0x00FFFFFF) | (src1.data & 0xFF000000);
00212 Colour c1 = dst[1];
00213 c1 = r1 == 0 ? c1 : c1map;
00214 c1 = m1 != 0 ? c1 : src1;
00215 INSR32(c1.data, srcABCD, 1);
00216
00217 if ((mvX2 & 0xFF00FF00) != 0x80008000) {
00218 ADJUST_BRIGHTNESS_2(srcABCD, mvX2);
00219 }
00220 }
00221
00222
00223 const byte a0 = src[0].a;
00224 const byte a1 = src[1].a;
00225 uint32 anim01 = mvX2 & 0xFF00FF00;
00226 if (a0 == 255) {
00227 anim01 |= r0;
00228 if (a1 == 255) {
00229 *(uint32*) anim = anim01 | (r1 << 16);
00230 goto bmcr_full_opacity;
00231 }
00232 } else if (a0 == 0) {
00233 if (a1 == 0) {
00234 goto bmcr_full_transparency;
00235 } else {
00236 if (a1 == 255) {
00237 anim[1] = r1 | (anim01 >> 16);
00238 }
00239 goto bmcr_alpha_blend;
00240 }
00241 }
00242 if (a1 > 0) {
00243 if (a1 == 255) anim01 |= r1 << 16;
00244 *(uint32*) anim = anim01;
00245 } else {
00246 anim[0] = (uint16) anim01;
00247 }
00248
00249
00250 bmcr_alpha_blend:
00251 ALPHA_BLEND_2(pack_low_cm);
00252 bmcr_full_opacity:
00253 srcABCD = _mm_blend_epi16(srcABCD, dstABCD, 0xF0);
00254
00255 src += 2;
00256 src_mv += 2;
00257 anim += 2;
00258 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00259 dstABCD = _mm_loadu_si128((__m128i*) (dst+2));
00260 _mm_storeu_si128((__m128i *) dst, srcABCD);
00261 srcABCD = _mm_loadu_si128((const __m128i*) src);
00262 dst += 2;
00263 continue;
00264
00265 bmcr_full_transparency:
00266 src_mv += 2;
00267 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00268 dst += 2;
00269 src += 2;
00270 anim += 2;
00271 dstABCD = _mm_loadu_si128((__m128i*) dst);
00272 srcABCD = _mm_loadu_si128((const __m128i*) src);
00273 }
00274
00275 if (effective_width & 1) {
00276
00277 if (src->a == 0) {
00278 } else if ((byte) mvX2 != 0) {
00279 const uint r = remap[(byte) mvX2];
00280 *anim = (src->a == 255) ? (r | ((uint16) mvX2 & 0xFF00)) : 0;
00281 if (r != 0) {
00282 Colour remapped_colour = AdjustBrightness(LookupColourInPalette(r), (byte) (mvX2 >> 8));
00283 if (src->a == 255) {
00284 *dst = remapped_colour;
00285 } else {
00286 remapped_colour.a = src->a;
00287 INSR32(remapped_colour.data, srcABCD, 0);
00288 goto bmcr_alpha_blend_single;
00289 }
00290 }
00291 } else {
00292 *anim = 0;
00293 if (src->a == 255) {
00294 *dst = *src;
00295 } else {
00296 bmcr_alpha_blend_single:
00297 ALPHA_BLEND_2(pack_low_cm);
00298 (*dst).data = EXTR32(srcABCD, 0);
00299 }
00300 }
00301 }
00302 break;
00303 }
00304
00305 default: NOT_REACHED();
00306 }
00307 break;
00308 }
00309
00310 case BM_TRANSPARENT: {
00311
00312 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00313 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00314 for (uint x = (uint) bp->width / 2; x > 0; x--) {
00315 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00316 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00317 __m128i dstCD = _mm_unpackhi_epi8(dstABCD, _mm_setzero_si128());
00318 __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
00319 alphaAB = _mm_srli_epi16(alphaAB, 2);
00320 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00321 dstAB = _mm_mullo_epi16(dstAB, nom);
00322 dstAB = _mm_srli_epi16(dstAB, 8);
00323 dstAB = _mm_packus_epi16(dstAB, dstCD);
00324 Colour *old_dst = dst;
00325 src += 2;
00326 dst += 2;
00327 anim += 2;
00328 dstABCD = _mm_loadu_si128((__m128i*) dst);
00329 _mm_storeu_si128((__m128i *) old_dst, dstAB);
00330 srcABCD = _mm_loadu_si128((const __m128i*) src);
00331 if (src[-2].a) anim[-2] = 0;
00332 if (src[-1].a) anim[-1] = 0;
00333 }
00334 if (bp->width & 1) {
00335 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00336 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00337 __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
00338 alphaAB = _mm_srli_epi16(alphaAB, 2);
00339 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00340 dstAB = _mm_mullo_epi16(dstAB, nom);
00341 dstAB = _mm_srli_epi16(dstAB, 8);
00342 dstAB = _mm_packus_epi16(dstAB, dstAB);
00343 (*dst).data = EXTR32(dstAB, 0);
00344 if (src[0].a) anim[0] = 0;
00345 }
00346 break;
00347 }
00348 }
00349
00350 src_mv_line += si->sprite_width;
00351 src_rgba_line = (const Colour*) ((const byte*) src_rgba_line + si->sprite_line_size);
00352 dst_line += bp->pitch;
00353 anim_line += this->anim_buf_width;
00354 }
00355 }
00356 IGNORE_UNINITIALIZED_WARNING_STOP
00357
00365 void Blitter_32bppSSE4_Anim::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
00366 {
00367 switch (mode) {
00368 case BM_NORMAL: {
00369 const BlockType bt_last = (BlockType) (bp->width & 1);
00370 if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) {
00371 switch (bt_last) {
00372 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN>(bp, zoom); return;
00373 case BT_ODD: Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD>(bp, zoom); return;
00374 default: NOT_REACHED();
00375 }
00376 } else {
00377 switch (bt_last) {
00378 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_EVEN>(bp, zoom); return;
00379 case BT_ODD: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_ODD>(bp, zoom); return;
00380 default: NOT_REACHED();
00381 }
00382 }
00383 break;
00384 }
00385 case BM_COLOUR_REMAP:
00386 if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) {
00387 Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE>(bp, zoom); return;
00388 } else {
00389 Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE>(bp, zoom); return;
00390 }
00391 case BM_TRANSPARENT: Draw<BM_TRANSPARENT, RM_NONE, BT_NONE>(bp, zoom); return;
00392 default: NOT_REACHED();
00393 }
00394 }
00395
00397 inline Colour Blitter_32bppSSE4_Anim::AdjustBrightness(Colour colour, uint8 brightness)
00398 {
00399
00400 if (brightness == DEFAULT_BRIGHTNESS) return colour;
00401
00402 return Blitter_32bppSSE4::ReallyAdjustBrightness(colour, brightness);
00403 }
00404
00405 #endif