cpplab/app/sse.cpp

130 lines
No EOL
3.4 KiB
C++

#include <iostream>
#include "../src/cpu/utils.hpp"
#include "../src/cpu/x86/sse.hpp"
#include "../src/cpu/naive.hpp"
#include <sys/time.h>
using namespace cpu;
using namespace std;
long int gettime()
{
struct timeval tp;
gettimeofday(&tp, NULL);
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
return ms;
};
#define MAX_ITER 1000 * 1000 * 100
int main(int argc, char **argcv)
{
SSE sse = SSE();
UChar c1_128[CHAR_LEN_128] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10};
UChar c2_128[CHAR_LEN_128] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10};
UInt v1_128[INT_LEN_128] = {0x1, 0x2, 0x3, 0x4};
UInt v2_128[INT_LEN_128] = {0x1, 0x2, 0x3, 0x4};
ULong v1_128_l[LONG_LEN_128];
ULong v2_128_l[LONG_LEN_128];
UChar c1_256[CHAR_LEN_256] = {
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10
};
UChar c2_256[CHAR_LEN_256] = {
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10
};
UInt v1_256[INT_LEN_256] = {0x1, 0x2, 0x3, 0x4,0x1, 0x2, 0x3, 0x4};
UInt v2_256[INT_LEN_256] = {0x1, 0x2, 0x3, 0x4,0x1, 0x2, 0x3, 0x4};
ULong v1_256_l[LONG_LEN_256];
ULong v2_256_l[LONG_LEN_256];
Utils::int128ToLong(v1_128, v1_128_l);
Utils::int128ToLong(v2_128, v2_128_l);
Utils::int256ToLong(v1_256,v1_256_l);
Utils::int256ToLong(v2_256,v2_256_l);
sse.add_128(v1_128, v2_128);
long int start, end;
Utils::printHex(v1_128, INT_LEN_128);
Utils::printHex(v2_128, INT_LEN_128);
Utils::printHex(v1_128_l, LONG_LEN_128);
Utils::printHex(v2_128_l, LONG_LEN_128);
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
Naive::add_128(v1_128, v2_128);
}
end = gettime();
cout << "Naive Approach 128: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_128(v1_128, v2_128);
}
end = gettime();
cout << "128 Approach paddw: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_128(v1_128_l, v2_128_l);
}
end = gettime();
cout << "128 Approach paddd: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_128(c1_128, c2_128);
}
end = gettime();
cout << "128 Approach paddb: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
Naive::add_256(v1_256, v2_256);
}
end = gettime();
cout << "Naive Approach 256: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_256(v1_256, v2_256);
}
end = gettime();
cout << "256 Approach vpaddw: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_256(v1_256_l, v2_256_l);
}
end = gettime();
cout << "256 Approach vpaddd: " << end - start << endl;
start = gettime();
for (int i = 0; i < MAX_ITER; i++)
{
sse.add_256(c1_256, c2_256);
}
end = gettime();
cout << "256 Approach vpaddb: " << end - start << endl;
}