Replaced vmovdqa with vmovdqu because the memory is not aligned to 256 bits. Next align the memory and then use the aligned assembly instruction

This commit is contained in:
balhau@balhau.net 2021-12-25 19:14:49 +00:00
parent cbf7ecee92
commit 7345dd2665
No known key found for this signature in database
GPG key ID: 1E666F326A121830
5 changed files with 36 additions and 35 deletions

View file

@ -3,7 +3,6 @@
#include "../src/cpu/x86/sse.hpp"
#include "../src/cpu/naive.hpp"
#include "../src/platform/timer.hpp"
#include <sys/time.h>
using namespace cpu;
using namespace platform;

View file

@ -38,7 +38,7 @@
typedef unsigned char UChar;
typedef unsigned short UShort;
typedef unsigned int UInt;
typedef unsigned long ULong;
typedef unsigned long int ULong;
// SSE DataTypes
#define CHAR_LEN_128 16

View file

@ -50,7 +50,7 @@ namespace cpu
packedULong[0] = (ULong)packedUInteger[1] << 32 | packedUInteger[0];
packedULong[1] = (ULong)packedUInteger[3] << 32 | packedUInteger[2];
packedULong[2] = (ULong)packedUInteger[5] << 32 | packedUInteger[4];
packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[7];
packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[6];
}
/**

View file

@ -1,9 +1,11 @@
#include "sse.hpp"
#ifdef ARCH_X86
#include <iostream>
void cpu::SSE::add_128(UChar *a,UChar *b){
__asm__ __volatile__ (
asm volatile (
"movdqa %0,%%xmm1\n"
"paddb %1,%%xmm1\n"
"movdqa %%xmm1,%0"
@ -14,9 +16,9 @@
//X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit
void cpu::SSE::add_128(UInt *a,UInt *b) {
__asm__ __volatile__ (
"movdqa %0, %%xmm1\n"
"paddw %1, %%xmm1\n"
asm volatile (
"movdqa %0,%%xmm1\n"
"paddw %1,%%xmm1\n"
"movdqa %%xmm1, %0"
: "=m"(*a)
: "m"(*b)
@ -26,9 +28,9 @@
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
void cpu::SSE::add_128(ULong *a,ULong *b) {
__asm__ __volatile__ (
"movdqa %0, %%xmm1\n"
"paddd %1, %%xmm1\n"
asm volatile (
"movdqa %0,%%xmm1\n"
"paddd %1,%%xmm1\n"
"movdqa %%xmm1, %0"
: "=m"(*a)
: "m"(*b)
@ -37,35 +39,35 @@
//X86 Assembly to add two 256 bit numbers in the form of packed byte vector
void cpu::SSE::add_256(UChar *a,UChar *b) {
__asm__ __volatile__ (
"vmovdqa %0, %%ymm1\n"
"vmovdqa %1, %%ymm2\n"
"vpaddb %%ymm1, %%ymm1, %%ymm2\n"
"vmovdqa %%ymm1, %0"
asm volatile (
"vmovdqu %0,%%ymm1\n"
"vmovdqu %1,%%ymm2\n"
"vpaddb %%ymm3,%%ymm2,%%ymm1\n"
"vmovdqu %%ymm1,%0"
: "=m"(*a)
: "m"(*b)
);
};
//X86 Assembly to add two 128 bit numbers in the form of packed long 32bit
//X86 Assembly to add two 256 bit numbers in the form of packed int 32bit
void cpu::SSE::add_256(UInt *a,UInt *b) {
__asm__ __volatile__(
"vmovdqa %0, %%xmm1\n"
"vmovdqa %1, %%ymm2\n"
"vpaddw %%ymm1, %%ymm1, %%ymm2\n"
"vmovdqa %%ymm1, %0"
asm volatile(
"vmovdqu %0,%%ymm1\n"
"vmovdqu %1,%%ymm2\n"
"vpaddw %%ymm1, %%ymm2, %%ymm1\n"
"vmovdqu %%ymm1,%0"
: "=m"(*a)
: "m"(*b)
);
};
//X86 Assembly to add two 128 bit numbers in the form of packed long 64bit
//X86 Assembly to add two 256 bit numbers in the form of packed long 64bit
void cpu::SSE::add_256(ULong *a,ULong *b) {
__asm__ __volatile__(
"vmovdqa %0, %%ymm1\n"
"vmovdqa %1, %%ymm2\n"
"vpaddd %%ymm1, %%ymm1, %%ymm2\n"
"vmovdqa %%ymm1, %0"
asm volatile(
"vmovdqu %0, %%ymm1\n"
"vmovdqu %1, %%ymm2\n"
"vpaddd %%ymm1,%%ymm2,%%ymm1\n"
"vmovdqu %%ymm1, %0"
: "=m"(*a)
: "m"(*b)
);

View file

@ -2,6 +2,7 @@
#include <iostream>
#include <sys/time.h>
#include <unistd.h>
namespace platform
{
@ -16,20 +17,19 @@ namespace platform
{
struct timeval tp;
gettimeofday(&tp, NULL);
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
return ms;
return (double)(tp.tv_sec * 1000 + (double)tp.tv_usec / 1000);
};
public:
Timer(const char* lbl) : label(lbl)
Timer(const char *lbl)
{
this->label=std::string(lbl);
this->start = gettime();
this->label = std::string(lbl);
this->start = this->gettime();
}
~Timer()
{
this->end=this->gettime();
this->end = this->gettime();
unsigned long diff = this->end - this->start;
std::cout << this->label << diff << std::endl;
}