Added assembly sse instruction for long and int adding
This commit is contained in:
parent
2db72850bc
commit
2ae4f28e7d
4 changed files with 67 additions and 14 deletions
|
@ -12,10 +12,15 @@ namespace core {
|
||||||
static const unsigned int MASK_32=0xFFFFFFFF;
|
static const unsigned int MASK_32=0xFFFFFFFF;
|
||||||
public:
|
public:
|
||||||
//Attributes
|
//Attributes
|
||||||
static const unsigned char LEN_64 = 2;
|
static const unsigned char INT_LEN_64 = 2;
|
||||||
static const unsigned char LEN_128 = 4;
|
static const unsigned char INT_LEN_128 = 4;
|
||||||
static const unsigned char LEN_256 = 8;
|
static const unsigned char INT_LEN_256 = 8;
|
||||||
static const unsigned char LEN_512 = 8;
|
static const unsigned char INT_LEN_512 = 16;
|
||||||
|
|
||||||
|
static const unsigned char LONG_LEN_64 = 1;
|
||||||
|
static const unsigned char LONG_LEN_128 = 2;
|
||||||
|
static const unsigned char LONG_LEN_256 = 4;
|
||||||
|
static const unsigned char LONG_LEN_512 = 8;
|
||||||
|
|
||||||
//Methods
|
//Methods
|
||||||
static void sum_128_long(int* a, int* b);
|
static void sum_128_long(int* a, int* b);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#include "sse.hpp"
|
#include "sse.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
//X86 Assembly to add two 128 bit numbers
|
//X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit
|
||||||
void core::cpu::SSE::sum_128(int *a,int *b) {
|
void core::cpu::SSE::sum_128(int *a,int *b) {
|
||||||
asm(
|
asm(
|
||||||
"movdqa %0, %%xmm1\n"
|
"movdqa %0, %%xmm1\n"
|
||||||
|
@ -10,4 +10,17 @@ void core::cpu::SSE::sum_128(int *a,int *b) {
|
||||||
: "=m"(*a)
|
: "=m"(*a)
|
||||||
: "m"(*b)
|
: "m"(*b)
|
||||||
);
|
);
|
||||||
}
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
|
||||||
|
void core::cpu::SSE::sum_128(long *a,long *b) {
|
||||||
|
asm(
|
||||||
|
"movdqa %0, %%xmm1\n"
|
||||||
|
"paddd %1, %%xmm1\n"
|
||||||
|
"movdqa %%xmm1, %0"
|
||||||
|
: "=m"(*a)
|
||||||
|
: "m"(*b)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ namespace core {
|
||||||
class SSE {
|
class SSE {
|
||||||
public:
|
public:
|
||||||
static void sum_128(int *a,int *b);
|
static void sum_128(int *a,int *b);
|
||||||
|
static void sum_128(long *a,long *b);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
50
sse.cpp
50
sse.cpp
|
@ -1,24 +1,58 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
#include "src/core/cpu/naive.hpp"
|
#include "src/core/cpu/naive.hpp"
|
||||||
#include "src/core/cpu/sse.hpp"
|
#include "src/core/cpu/sse.hpp"
|
||||||
|
|
||||||
using namespace core::cpu;
|
using namespace core::cpu;
|
||||||
|
|
||||||
|
long int gettime(){
|
||||||
|
struct timeval tp;
|
||||||
|
gettimeofday(&tp, NULL);
|
||||||
|
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
|
||||||
|
return ms;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MAX_ITER 1000*1000*100
|
||||||
|
|
||||||
int main(int argc, char** argcv){
|
int main(int argc, char** argcv){
|
||||||
int v1_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
int v1_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||||
int v2_128[Naive::LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
int v2_128[Naive::INT_LEN_128] = { 0x1, 0x2,0x3,0x4 };
|
||||||
|
|
||||||
Naive::print(v1_128,Naive::LEN_128);
|
long v1_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 };
|
||||||
Naive::print(v2_128,Naive::LEN_128);
|
long v2_128_l[Naive::LONG_LEN_128] = { (long)0x2<<32 | 0x2, (long)0x4<<32 | 0x3 };
|
||||||
|
|
||||||
Naive::sum_128_long(v1_128,v2_128);
|
long int start,end;
|
||||||
SSE::sum_128(v1_128,v2_128);
|
|
||||||
|
|
||||||
Naive::print(v1_128,Naive::LEN_128);
|
Naive::print(v1_128,Naive::INT_LEN_128);
|
||||||
Naive::print(v2_128,Naive::LEN_128);
|
Naive::print(v2_128,Naive::INT_LEN_128);
|
||||||
|
|
||||||
|
start = gettime();
|
||||||
|
for(int i=0;i<MAX_ITER;i++){
|
||||||
|
Naive::sum_128_long(v1_128,v2_128);
|
||||||
|
}
|
||||||
|
end = gettime();
|
||||||
|
cout << "Naive Approach: " << end-start << endl;
|
||||||
|
|
||||||
|
start = gettime();
|
||||||
|
for(int i=0;i<MAX_ITER;i++){
|
||||||
|
SSE::sum_128(v1_128,v2_128);
|
||||||
|
}
|
||||||
|
end = gettime();
|
||||||
|
cout << "SSE Approach paddw: " << end-start << endl;
|
||||||
|
|
||||||
|
start = gettime();
|
||||||
|
for(int i=0;i<MAX_ITER;i++){
|
||||||
|
SSE::sum_128(v1_128_l,v2_128_l);
|
||||||
|
}
|
||||||
|
end = gettime();
|
||||||
|
cout << "SSE Approach paddd: " << end-start << endl;
|
||||||
|
|
||||||
|
|
||||||
|
//Naive::print(v1_128,Naive::LEN_128);
|
||||||
|
//Naive::print(v2_128,Naive::LEN_128);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in a new issue