Added assembly sse instruction for long and int adding
This commit is contained in:
parent
2db72850bc
commit
2ae4f28e7d
4 changed files with 67 additions and 14 deletions
|
@ -12,10 +12,15 @@ namespace core {
|
|||
static const unsigned int MASK_32=0xFFFFFFFF;
|
||||
public:
|
||||
//Attributes
|
||||
static const unsigned char LEN_64 = 2;
|
||||
static const unsigned char LEN_128 = 4;
|
||||
static const unsigned char LEN_256 = 8;
|
||||
static const unsigned char LEN_512 = 8;
|
||||
static const unsigned char INT_LEN_64 = 2;
|
||||
static const unsigned char INT_LEN_128 = 4;
|
||||
static const unsigned char INT_LEN_256 = 8;
|
||||
static const unsigned char INT_LEN_512 = 16;
|
||||
|
||||
static const unsigned char LONG_LEN_64 = 1;
|
||||
static const unsigned char LONG_LEN_128 = 2;
|
||||
static const unsigned char LONG_LEN_256 = 4;
|
||||
static const unsigned char LONG_LEN_512 = 8;
|
||||
|
||||
//Methods
|
||||
static void sum_128_long(int* a, int* b);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "sse.hpp"
|
||||
#include <iostream>
|
||||
|
||||
//X86 Assembly to add two 128 bit numbers
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit
|
||||
void core::cpu::SSE::sum_128(int *a,int *b) {
|
||||
asm(
|
||||
"movdqa %0, %%xmm1\n"
|
||||
|
@ -10,4 +10,17 @@ void core::cpu::SSE::sum_128(int *a,int *b) {
|
|||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
|
||||
void core::cpu::SSE::sum_128(long *a,long *b) {
|
||||
asm(
|
||||
"movdqa %0, %%xmm1\n"
|
||||
"paddd %1, %%xmm1\n"
|
||||
"movdqa %%xmm1, %0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ namespace core {
|
|||
class SSE {
|
||||
public:
|
||||
static void sum_128(int *a,int *b);
|
||||
static void sum_128(long *a,long *b);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
|
48
sse.cpp
48
sse.cpp
|
@ -1,24 +1,58 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "src/core/cpu/naive.hpp"
|
||||
#include "src/core/cpu/sse.hpp"
|
||||
|
||||
using namespace core::cpu;
|
||||
|
||||
long int gettime(){
|
||||
struct timeval tp;
|
||||
gettimeofday(&tp, NULL);
|
||||
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
|
||||
return ms;
|
||||
};
|
||||
|
||||
#define MAX_ITER 1000*1000*100
|
||||
|
||||
int main(int argc, char** argcv){
|
||||
int v1_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||
int v2_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||
int v1_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||
int v2_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||
|
||||
Naive::print(v1_128,Naive::LEN_128);
|
||||
Naive::print(v2_128,Naive::LEN_128);
|
||||
long v1_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 };
|
||||
long v2_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 };
|
||||
|
||||
long int start,end;
|
||||
|
||||
Naive::print(v1_128,Naive::INT_LEN_128);
|
||||
Naive::print(v2_128,Naive::INT_LEN_128);
|
||||
|
||||
start = gettime();
|
||||
for(int i=0;i<MAX_ITER;i++){
|
||||
Naive::sum_128_long(v1_128,v2_128);
|
||||
SSE::sum_128(v1_128,v2_128);
|
||||
}
|
||||
end = gettime();
|
||||
cout << "Naive Approach: " << end-start << endl;
|
||||
|
||||
Naive::print(v1_128,Naive::LEN_128);
|
||||
Naive::print(v2_128,Naive::LEN_128);
|
||||
start = gettime();
|
||||
for(int i=0;i<MAX_ITER;i++){
|
||||
SSE::sum_128(v1_128,v2_128);
|
||||
}
|
||||
end = gettime();
|
||||
cout << "SSE Approach paddw: " << end-start << endl;
|
||||
|
||||
start = gettime();
|
||||
for(int i=0;i<MAX_ITER;i++){
|
||||
SSE::sum_128(v1_128_l,v2_128_l);
|
||||
}
|
||||
end = gettime();
|
||||
cout << "SSE Approach paddd: " << end-start << endl;
|
||||
|
||||
|
||||
//Naive::print(v1_128,Naive::LEN_128);
|
||||
//Naive::print(v2_128,Naive::LEN_128);
|
||||
|
||||
|
||||
}
|
Loading…
Reference in a new issue