viewport_sprite_sorter_sse4.cpp

00001 /* $Id: viewport_sprite_sorter_sse4.cpp 26207 2014-01-02 18:52:54Z rubidium $ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #ifdef WITH_SSE
00013 
00014 #include "stdafx.h"
00015 #include "cpu.h"
00016 #include "smmintrin.h"
00017 #include "viewport_sprite_sorter.h"
00018 
00019 #ifdef _SQ64
00020   assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0);
00021   #define LOAD_128 _mm_load_si128
00022 #else
00023   #define LOAD_128 _mm_loadu_si128
00024 #endif
00025 
00027 void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
00028 {
00029   const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0);
00030   ParentSpriteToDraw ** const psdvend = psdv->End();
00031   ParentSpriteToDraw **psd = psdv->Begin();
00032   while (psd != psdvend) {
00033     ParentSpriteToDraw * const ps = *psd;
00034 
00035     if (ps->comparison_done) {
00036       psd++;
00037       continue;
00038     }
00039 
00040     ps->comparison_done = true;
00041 
00042     for (ParentSpriteToDraw **psd2 = psd + 1; psd2 != psdvend; psd2++) {
00043       ParentSpriteToDraw * const ps2 = *psd2;
00044 
00045       if (ps2->comparison_done) continue;
00046 
00047       /*
00048        * Decide which comparator to use, based on whether the bounding boxes overlap
00049        *
00050        * Original code:
00051        * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
00052        *     ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
00053        *     ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
00054        *
00055        * Above conditions are equivalent to:
00056        * 1/    !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin)   &&    (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
00057        * 2/    !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin)   &&    (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
00058        * 3/  !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) &&  ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
00059        * 4/ !( !( (ps->xmax <  ps2->xmin) || (ps->ymax <  ps2->ymin) || (ps->zmax <  ps2->zmin) ) && !( (ps2->xmax <  ps->xmin) || (ps2->ymax <  ps->ymin) || (ps2->zmax <  ps->zmin) ) )
00060        * 5/ PTEST <---------------------------------- rslt1 ---------------------------------->         <------------------------------ rslt2 -------------------------------------->
00061        */
00062       __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
00063       __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
00064       __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
00065       if (!_mm_testz_si128(mask_ptest, rslt1))
00066         continue;
00067 
00068       __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
00069       __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);
00070       __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min);
00071       if (_mm_testz_si128(mask_ptest, rslt2)) {
00072         /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
00073          * the screen and with higher Z elevation, are drawn in front.
00074          * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
00075          * i.e. X=(left+right)/2, etc.
00076          * However, since we only care about order, don't actually divide / 2
00077          */
00078         if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <=
00079             ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) {
00080           continue;
00081         }
00082       }
00083 
00084       /* Move ps2 in front of ps */
00085       ParentSpriteToDraw * const temp = ps2;
00086       for (ParentSpriteToDraw **psd3 = psd2; psd3 > psd; psd3--) {
00087         *psd3 = *(psd3 - 1);
00088       }
00089       *psd = temp;
00090     }
00091   }
00092 }
00093 
00098 bool ViewportSortParentSpritesSSE41Checker()
00099 {
00100   return HasCPUIDFlag(1, 2, 19);
00101 }
00102 
00103 #endif /* WITH_SSE */