00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #ifdef WITH_SSE
00013
00014 #include "../stdafx.h"
00015 #include "../zoom_func.h"
00016 #include "../settings_type.h"
00017 #include "32bpp_sse2.hpp"
00018
00020 static FBlitter_32bppSSE2 iFBlitter_32bppSSE2;
00021
00029 IGNORE_UNINITIALIZED_WARNING_START
00030 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last>
00031 inline void Blitter_32bppSSE2::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
00032 {
00033 Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left;
00034 int effective_width = bp->width;
00035
00036
00037 const SpriteData * const sd = (const SpriteData *) bp->sprite;
00038 const SpriteInfo * const si = &sd->infos[zoom];
00039 const MapValue *src_mv_line = (const MapValue *) &sd->data[si->mv_offset] + bp->skip_top * si->sprite_width;
00040 const Colour *src_rgba_line = (const Colour *) ((const byte *) &sd->data[si->sprite_offset] + bp->skip_top * si->sprite_line_size);
00041
00042 if (read_mode != RM_WITH_MARGIN) {
00043 src_rgba_line += bp->skip_left;
00044 src_mv_line += bp->skip_left;
00045 }
00046
00047
00048 const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
00049 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
00050
00051 for (int y = bp->height; y != 0; y--) {
00052 Colour *dst = dst_line;
00053 const Colour *src = src_rgba_line + META_LENGTH;
00054 const MapValue *src_mv = src_mv_line;
00055
00056 switch (mode) {
00057 default: {
00058 switch (read_mode) {
00059 case RM_WITH_MARGIN: {
00060 src += src_rgba_line[0].data;
00061 dst += src_rgba_line[0].data;
00062 const int width_diff = si->sprite_width - bp->width;
00063 effective_width = bp->width - (int) src_rgba_line[0].data;
00064 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00065 const int new_width = effective_width - (delta_diff & ~1);
00066 effective_width = delta_diff > 0 ? new_width : effective_width;
00067 if (effective_width <= 0) break;
00068
00069 }
00070
00071 case RM_WITH_SKIP: {
00072 for (uint x = (uint) effective_width / 2; x > 0; x--) {
00073 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00074 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00075 ALPHA_BLEND_2();
00076 *(uint64*) dst = EXTR64(srcABCD, 0);
00077 src += 2;
00078 dst += 2;
00079 }
00080 if (bt_last == BT_ODD) {
00081 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00082 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00083 ALPHA_BLEND_2();
00084 (*dst).data = EXTR32(srcABCD, 0);
00085 }
00086 break;
00087 }
00088
00089 default: NOT_REACHED();
00090 }
00091 break;
00092 }
00093 case BM_COLOUR_REMAP: {
00094 switch (read_mode) {
00095 case RM_WITH_MARGIN: {
00096 src += src_rgba_line[0].data;
00097 src_mv += src_rgba_line[0].data;
00098 dst += src_rgba_line[0].data;
00099 const int width_diff = si->sprite_width - bp->width;
00100 effective_width = bp->width - (int) src_rgba_line[0].data;
00101 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00102 const int nd = effective_width - delta_diff;
00103 effective_width = delta_diff > 0 ? nd : effective_width;
00104 if (effective_width <= 0) break;
00105
00106 }
00107
00108 case RM_WITH_SKIP: {
00109 const byte *remap = bp->remap;
00110 for (uint x = (uint) effective_width; x != 0; x--) {
00111
00112 if (src_mv->m == 0) {
00113 if (src->a < 255) {
00114 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00115 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00116 ALPHA_BLEND_2();
00117 (*dst).data = EXTR32(srcABCD, 0);
00118 } else {
00119 *dst = src->data;
00120 }
00121 } else {
00122 const uint r = remap[src_mv->m];
00123 if (r != 0) {
00124 Colour remapped_colour = AdjustBrightness(this->LookupColourInPalette(r), src_mv->v);
00125 if (src->a < 255) {
00126 __m128i srcABCD;
00127 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00128 remapped_colour.a = src->a;
00129 INSR32(remapped_colour.data, srcABCD, 0);
00130 ALPHA_BLEND_2();
00131 (*dst).data = EXTR32(srcABCD, 0);
00132 } else
00133 *dst = remapped_colour;
00134 }
00135 }
00136 src_mv++;
00137 dst++;
00138 src++;
00139 }
00140 break;
00141 }
00142
00143 default: NOT_REACHED();
00144 }
00145 src_mv_line += si->sprite_width;
00146 break;
00147 }
00148 case BM_TRANSPARENT: {
00149
00150
00151
00152 __m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
00153 __m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
00154 for (uint x = (uint) bp->width / 2; x > 0; x--) {
00155 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00156 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00157 __m128i dstCD = _mm_unpackhi_epi8(dstABCD, _mm_setzero_si128());
00158 __m128i alphaAB = _mm_shufflelo_epi16(srcAB, 0x3F);
00159 alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);
00160 alphaAB = _mm_srli_epi16(alphaAB, 2);
00161 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00162 dstAB = _mm_mullo_epi16(dstAB, nom);
00163 dstAB = _mm_srli_epi16(dstAB, 8);
00164 dstAB = _mm_packus_epi16(dstAB, dstCD);
00165 Colour *old_dst = dst;
00166 src += 2;
00167 dst += 2;
00168 dstABCD = _mm_loadu_si128((__m128i*) dst);
00169 _mm_storeu_si128((__m128i *) old_dst, dstAB);
00170 srcABCD = _mm_loadu_si128((const __m128i*) src);
00171 }
00172 if (bp->width & 1) {
00173 __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
00174 __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
00175 __m128i alphaAB = _mm_shufflelo_epi16(srcAB, 0x3F);
00176 alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);
00177 alphaAB = _mm_srli_epi16(alphaAB, 2);
00178 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
00179 dstAB = _mm_mullo_epi16(dstAB, nom);
00180 dstAB = _mm_srli_epi16(dstAB, 8);
00181 dstAB = _mm_packus_epi16(dstAB, dstAB);
00182 (*dst).data = EXTR32(dstAB, 0);
00183 }
00184 break;
00185 }
00186 }
00187
00188 src_rgba_line = (const Colour*) ((const byte*) src_rgba_line + si->sprite_line_size);
00189 dst_line += bp->pitch;
00190 }
00191 }
00192 IGNORE_UNINITIALIZED_WARNING_STOP
00193
00201 void Blitter_32bppSSE2::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
00202 {
00203 switch (mode) {
00204 case BM_NORMAL: {
00205 const BlockType bt_last = (BlockType) (bp->width & 1);
00206 if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) {
00207 switch (bt_last) {
00208 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN>(bp, zoom); return;
00209 case BT_ODD: Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD>(bp, zoom); return;
00210 default: NOT_REACHED();
00211 }
00212 } else {
00213 switch (bt_last) {
00214 case BT_EVEN: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_EVEN>(bp, zoom); return;
00215 case BT_ODD: Draw<BM_NORMAL, RM_WITH_MARGIN, BT_ODD>(bp, zoom); return;
00216 default: NOT_REACHED();
00217 }
00218 }
00219 break;
00220 }
00221 case BM_COLOUR_REMAP:
00222 if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) {
00223 Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE>(bp, zoom); return;
00224 } else {
00225 Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE>(bp, zoom); return;
00226 }
00227 case BM_TRANSPARENT: Draw<BM_TRANSPARENT, RM_NONE, BT_NONE>(bp, zoom); return;
00228 default: NOT_REACHED();
00229 }
00230 }
00231
00232 Sprite *Blitter_32bppSSE_Base::Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator)
00233 {
00234
00235
00236
00237
00238 ZoomLevel zoom_min = ZOOM_LVL_NORMAL;
00239 ZoomLevel zoom_max = ZOOM_LVL_NORMAL;
00240 if (sprite->type != ST_FONT) {
00241 zoom_min = _settings_client.gui.zoom_min;
00242 zoom_max = _settings_client.gui.zoom_max;
00243 if (zoom_max == zoom_min) zoom_max = ZOOM_LVL_MAX;
00244 }
00245
00246
00247 SpriteData sd;
00248 uint all_sprites_size = 0;
00249 for (ZoomLevel z = zoom_min; z <= zoom_max; z++) {
00250 const SpriteLoader::Sprite *src_sprite = &sprite[z];
00251 sd.infos[z].sprite_width = src_sprite->width;
00252 sd.infos[z].sprite_offset = all_sprites_size;
00253 sd.infos[z].sprite_line_size = sizeof(Colour) * src_sprite->width + sizeof(uint32) * META_LENGTH;
00254
00255 const uint rgba_size = sd.infos[z].sprite_line_size * src_sprite->height;
00256 sd.infos[z].mv_offset = all_sprites_size + rgba_size;
00257
00258 const uint mv_size = sizeof(MapValue) * src_sprite->width * src_sprite->height;
00259 all_sprites_size += rgba_size + mv_size;
00260 }
00261
00262 Sprite *dst_sprite = (Sprite *) allocator(sizeof(Sprite) + sizeof(SpriteData) + all_sprites_size);
00263 dst_sprite->height = sprite->height;
00264 dst_sprite->width = sprite->width;
00265 dst_sprite->x_offs = sprite->x_offs;
00266 dst_sprite->y_offs = sprite->y_offs;
00267 memcpy(dst_sprite->data, &sd, sizeof(SpriteData));
00268
00269
00270 for (ZoomLevel z = zoom_min; z <= zoom_max; z++) {
00271 const SpriteLoader::Sprite *src_sprite = &sprite[z];
00272 const SpriteLoader::CommonPixel *src = (const SpriteLoader::CommonPixel *) src_sprite->data;
00273 Colour *dst_rgba_line = (Colour *) &dst_sprite->data[sizeof(SpriteData) + sd.infos[z].sprite_offset];
00274 MapValue *dst_mv = (MapValue *) &dst_sprite->data[sizeof(SpriteData) + sd.infos[z].mv_offset];
00275 for (uint y = src_sprite->height; y != 0; y--) {
00276 Colour *dst_rgba = dst_rgba_line + META_LENGTH;
00277 for (uint x = src_sprite->width; x != 0; x--) {
00278 if (src->a != 0) {
00279 dst_rgba->a = src->a;
00280 dst_mv->m = src->m;
00281 if (src->m != 0) {
00282
00283 const uint8 rgb_max = max(src->r, max(src->g, src->b));
00284 dst_mv->v = (rgb_max == 0) ? Blitter_32bppBase::DEFAULT_BRIGHTNESS : rgb_max;
00285
00286
00287 const Colour colour = AdjustBrightness(Blitter_32bppBase::LookupColourInPalette(src->m), dst_mv->v);
00288 dst_rgba->r = colour.r;
00289 dst_rgba->g = colour.g;
00290 dst_rgba->b = colour.b;
00291 } else {
00292 dst_rgba->r = src->r;
00293 dst_rgba->g = src->g;
00294 dst_rgba->b = src->b;
00295 dst_mv->v = Blitter_32bppBase::DEFAULT_BRIGHTNESS;
00296 }
00297 } else {
00298 dst_rgba->data = 0;
00299 *(uint16*) dst_mv = 0;
00300 }
00301 dst_rgba++;
00302 dst_mv++;
00303 src++;
00304 }
00305
00306
00307 dst_rgba = dst_rgba_line + META_LENGTH;
00308 uint32 nb_pix_transp = 0;
00309 for (uint x = src_sprite->width; x != 0; x--) {
00310 if (dst_rgba->a == 0) nb_pix_transp++;
00311 else break;
00312 dst_rgba++;
00313 }
00314 (*dst_rgba_line).data = nb_pix_transp & ~1;
00315
00316 Colour *nb_right = dst_rgba_line + 1;
00317 dst_rgba_line = (Colour*) ((byte*) dst_rgba_line + sd.infos[z].sprite_line_size);
00318
00319
00320 dst_rgba = dst_rgba_line - 1;
00321 nb_pix_transp = 0;
00322 for (uint x = src_sprite->width; x != 0; x--) {
00323 if (dst_rgba->a == 0) nb_pix_transp++;
00324 else break;
00325 dst_rgba--;
00326 }
00327 (*nb_right).data = nb_pix_transp;
00328 }
00329 }
00330
00331 return dst_sprite;
00332 }
00333
00337 inline Colour Blitter_32bppSSE2::AdjustBrightness(Colour colour, uint8 brightness)
00338 {
00339
00340 if (brightness == DEFAULT_BRIGHTNESS) return colour;
00341
00342 return Blitter_32bppSSE2::ReallyAdjustBrightness(colour, brightness);
00343 }
00344
00345 IGNORE_UNINITIALIZED_WARNING_START
00346 Colour Blitter_32bppSSE2::ReallyAdjustBrightness(Colour colour, uint8 brightness)
00347 {
00348 ALIGN(16) uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
00349 c16 *= brightness;
00350 uint64 c16_ob = c16;
00351 c16 /= DEFAULT_BRIGHTNESS;
00352 c16 &= 0x01FF01FF01FF;
00353
00354
00355 c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16;
00356 uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32);
00357
00358 const uint32 alpha32 = colour.data & 0xFF000000;
00359 __m128i ret;
00360 INSR64(c16, ret, 0);
00361 if (ob != 0) {
00362
00363 ob /= 2;
00364 __m128i ob128;
00365 INSR64(ob | ob << 16 | ob << 32, ob128, 0);
00366 __m128i white = OVERBRIGHT_VALUE_MASK;
00367 __m128i c128 = ret;
00368 ret = _mm_subs_epu16(white, c128);
00369 ret = _mm_mullo_epi16(ret, ob128);
00370 ret = _mm_srli_epi16(ret, 8);
00371 ret = _mm_add_epi16(ret, c128);
00372 }
00373
00374 ret = _mm_packus_epi16(ret, ret);
00375 return alpha32 | EXTR32(ret, 0);
00376 }
00377 IGNORE_UNINITIALIZED_WARNING_STOP
00378
00379 #endif