Replaced vmovdqa with vmovdqu because the memory is not aligned to 256 bits. Next align the memory and then use the aligned assembly instruction
This commit is contained in:
parent
cbf7ecee92
commit
7345dd2665
5 changed files with 36 additions and 35 deletions
|
@ -3,7 +3,6 @@
|
|||
#include "../src/cpu/x86/sse.hpp"
|
||||
#include "../src/cpu/naive.hpp"
|
||||
#include "../src/platform/timer.hpp"
|
||||
#include <sys/time.h>
|
||||
|
||||
using namespace cpu;
|
||||
using namespace platform;
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
typedef unsigned char UChar;
|
||||
typedef unsigned short UShort;
|
||||
typedef unsigned int UInt;
|
||||
typedef unsigned long ULong;
|
||||
typedef unsigned long int ULong;
|
||||
|
||||
// SSE DataTypes
|
||||
#define CHAR_LEN_128 16
|
||||
|
|
|
@ -50,7 +50,7 @@ namespace cpu
|
|||
packedULong[0] = (ULong)packedUInteger[1] << 32 | packedUInteger[0];
|
||||
packedULong[1] = (ULong)packedUInteger[3] << 32 | packedUInteger[2];
|
||||
packedULong[2] = (ULong)packedUInteger[5] << 32 | packedUInteger[4];
|
||||
packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[7];
|
||||
packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[6];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#include "sse.hpp"
|
||||
|
||||
|
||||
#ifdef ARCH_X86
|
||||
#include <iostream>
|
||||
|
||||
void cpu::SSE::add_128(UChar *a,UChar *b){
|
||||
__asm__ __volatile__ (
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm1\n"
|
||||
"paddb %1,%%xmm1\n"
|
||||
"movdqa %%xmm1,%0"
|
||||
|
@ -14,9 +16,9 @@
|
|||
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit
|
||||
void cpu::SSE::add_128(UInt *a,UInt *b) {
|
||||
__asm__ __volatile__ (
|
||||
"movdqa %0, %%xmm1\n"
|
||||
"paddw %1, %%xmm1\n"
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm1\n"
|
||||
"paddw %1,%%xmm1\n"
|
||||
"movdqa %%xmm1, %0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
|
@ -26,9 +28,9 @@
|
|||
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
|
||||
void cpu::SSE::add_128(ULong *a,ULong *b) {
|
||||
__asm__ __volatile__ (
|
||||
"movdqa %0, %%xmm1\n"
|
||||
"paddd %1, %%xmm1\n"
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm1\n"
|
||||
"paddd %1,%%xmm1\n"
|
||||
"movdqa %%xmm1, %0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
|
@ -37,35 +39,35 @@
|
|||
|
||||
//X86 Assembly to add two 256 bit numbers in the form of packed byte vector
|
||||
void cpu::SSE::add_256(UChar *a,UChar *b) {
|
||||
__asm__ __volatile__ (
|
||||
"vmovdqa %0, %%ymm1\n"
|
||||
"vmovdqa %1, %%ymm2\n"
|
||||
"vpaddb %%ymm1, %%ymm1, %%ymm2\n"
|
||||
"vmovdqa %%ymm1, %0"
|
||||
asm volatile (
|
||||
"vmovdqu %0,%%ymm1\n"
|
||||
"vmovdqu %1,%%ymm2\n"
|
||||
"vpaddb %%ymm3,%%ymm2,%%ymm1\n"
|
||||
"vmovdqu %%ymm1,%0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
);
|
||||
};
|
||||
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed long 32bit
|
||||
//X86 Assembly to add two 256 bit numbers in the form of packed int 32bit
|
||||
void cpu::SSE::add_256(UInt *a,UInt *b) {
|
||||
__asm__ __volatile__(
|
||||
"vmovdqa %0, %%xmm1\n"
|
||||
"vmovdqa %1, %%ymm2\n"
|
||||
"vpaddw %%ymm1, %%ymm1, %%ymm2\n"
|
||||
"vmovdqa %%ymm1, %0"
|
||||
asm volatile(
|
||||
"vmovdqu %0,%%ymm1\n"
|
||||
"vmovdqu %1,%%ymm2\n"
|
||||
"vpaddw %%ymm1, %%ymm2, %%ymm1\n"
|
||||
"vmovdqu %%ymm1,%0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
);
|
||||
};
|
||||
|
||||
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
|
||||
//X86 Assembly to add two 256 bit numbers in the form of packed long 64bit
|
||||
void cpu::SSE::add_256(ULong *a,ULong *b) {
|
||||
__asm__ __volatile__(
|
||||
"vmovdqa %0, %%ymm1\n"
|
||||
"vmovdqa %1, %%ymm2\n"
|
||||
"vpaddd %%ymm1, %%ymm1, %%ymm2\n"
|
||||
"vmovdqa %%ymm1, %0"
|
||||
asm volatile(
|
||||
"vmovdqu %0, %%ymm1\n"
|
||||
"vmovdqu %1, %%ymm2\n"
|
||||
"vpaddd %%ymm1,%%ymm2,%%ymm1\n"
|
||||
"vmovdqu %%ymm1, %0"
|
||||
: "=m"(*a)
|
||||
: "m"(*b)
|
||||
);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace platform
|
||||
{
|
||||
|
@ -11,26 +12,25 @@ namespace platform
|
|||
std::string label;
|
||||
long int start;
|
||||
long int end;
|
||||
|
||||
|
||||
long int gettime()
|
||||
{
|
||||
struct timeval tp;
|
||||
gettimeofday(&tp, NULL);
|
||||
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
|
||||
return ms;
|
||||
return (double)(tp.tv_sec * 1000 + (double)tp.tv_usec / 1000);
|
||||
};
|
||||
|
||||
public:
|
||||
Timer(const char* lbl) : label(lbl)
|
||||
Timer(const char *lbl)
|
||||
{
|
||||
this->label=std::string(lbl);
|
||||
this->start = gettime();
|
||||
this->label = std::string(lbl);
|
||||
this->start = this->gettime();
|
||||
}
|
||||
|
||||
~Timer()
|
||||
{
|
||||
this->end=this->gettime();
|
||||
unsigned long diff = this->end - this->start;
|
||||
this->end = this->gettime();
|
||||
unsigned long diff = this->end - this->start;
|
||||
std::cout << this->label << diff << std::endl;
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue