diff --git a/src/core/cpu/naive.hpp b/src/core/cpu/naive.hpp index 32457c8..02c6519 100644 --- a/src/core/cpu/naive.hpp +++ b/src/core/cpu/naive.hpp @@ -12,10 +12,15 @@ namespace core { static const unsigned int MASK_32=0xFFFFFFFF; public: //Attributes - static const unsigned char LEN_64 = 2; - static const unsigned char LEN_128 = 4; - static const unsigned char LEN_256 = 8; - static const unsigned char LEN_512 = 8; + static const unsigned char INT_LEN_64 = 2; + static const unsigned char INT_LEN_128 = 4; + static const unsigned char INT_LEN_256 = 8; + static const unsigned char INT_LEN_512 = 16; + + static const unsigned char LONG_LEN_64 = 1; + static const unsigned char LONG_LEN_128 = 2; + static const unsigned char LONG_LEN_256 = 4; + static const unsigned char LONG_LEN_512 = 8; //Methods static void sum_128_long(int* a, int* b); diff --git a/src/core/cpu/sse.cpp b/src/core/cpu/sse.cpp index 8054d70..e0ff2c8 100644 --- a/src/core/cpu/sse.cpp +++ b/src/core/cpu/sse.cpp @@ -1,7 +1,7 @@ #include "sse.hpp" #include -//X86 Assembly to add two 128 bit numbers +//X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit void core::cpu::SSE::sum_128(int *a,int *b) { asm( "movdqa %0, %%xmm1\n" @@ -10,4 +10,17 @@ void core::cpu::SSE::sum_128(int *a,int *b) { : "=m"(*a) : "m"(*b) ); -} \ No newline at end of file +}; + + +//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit +void core::cpu::SSE::sum_128(long *a,long *b) { + asm( + "movdqa %0, %%xmm1\n" + "paddd %1, %%xmm1\n" + "movdqa %%xmm1, %0" + : "=m"(*a) + : "m"(*b) + ); +} + diff --git a/src/core/cpu/sse.hpp b/src/core/cpu/sse.hpp index fd56426..da11868 100644 --- a/src/core/cpu/sse.hpp +++ b/src/core/cpu/sse.hpp @@ -10,6 +10,7 @@ namespace core { class SSE { public: static void sum_128(int *a,int *b); + static void sum_128(long *a,long *b); }; }; }; diff --git a/sse.cpp b/sse.cpp index a0f7446..2d0813a 100644 --- a/sse.cpp +++ b/sse.cpp @@ -1,24 +1,58 @@ #include #include #include +#include #include "src/core/cpu/naive.hpp" #include "src/core/cpu/sse.hpp" using namespace core::cpu; +long int gettime(){ + struct timeval tp; + gettimeofday(&tp, NULL); + long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000; + return ms; +}; + +#define MAX_ITER 1000*1000*100 + int main(int argc, char** argcv){ - int v1_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 }; - int v2_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 }; + int v1_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 }; + int v2_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 }; - Naive::print(v1_128,Naive::LEN_128); - Naive::print(v2_128,Naive::LEN_128); + long v1_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 }; + long v2_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 }; - Naive::sum_128_long(v1_128,v2_128); - SSE::sum_128(v1_128,v2_128); + long int start,end; - Naive::print(v1_128,Naive::LEN_128); - Naive::print(v2_128,Naive::LEN_128); + Naive::print(v1_128,Naive::INT_LEN_128); + Naive::print(v2_128,Naive::INT_LEN_128); + + start = gettime(); + for(int i=0;i