From 7345dd26653198c8ed5b7f8f331baeb1425ace7a Mon Sep 17 00:00:00 2001 From: "balhau@balhau.net" Date: Sat, 25 Dec 2021 19:14:49 +0000 Subject: [PATCH] Replaced vmovdqa with vmovdqu because the memory is not aligned to 256 bits. Next align the memory and then use the aligned assembly instruction --- app/sse.cpp | 1 - src/cpu/types.hpp | 2 +- src/cpu/utils.hpp | 2 +- src/cpu/x86/sse.cpp | 50 ++++++++++++++++++++++-------------------- src/platform/timer.hpp | 16 +++++++------- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/app/sse.cpp b/app/sse.cpp index 3d6a864..226dec7 100644 --- a/app/sse.cpp +++ b/app/sse.cpp @@ -3,7 +3,6 @@ #include "../src/cpu/x86/sse.hpp" #include "../src/cpu/naive.hpp" #include "../src/platform/timer.hpp" -#include using namespace cpu; using namespace platform; diff --git a/src/cpu/types.hpp b/src/cpu/types.hpp index 4c7d471..4d8ca14 100644 --- a/src/cpu/types.hpp +++ b/src/cpu/types.hpp @@ -38,7 +38,7 @@ typedef unsigned char UChar; typedef unsigned short UShort; typedef unsigned int UInt; - typedef unsigned long ULong; + typedef unsigned long int ULong; // SSE DataTypes #define CHAR_LEN_128 16 diff --git a/src/cpu/utils.hpp b/src/cpu/utils.hpp index 87e1ed1..cf73dc5 100644 --- a/src/cpu/utils.hpp +++ b/src/cpu/utils.hpp @@ -50,7 +50,7 @@ namespace cpu packedULong[0] = (ULong)packedUInteger[1] << 32 | packedUInteger[0]; packedULong[1] = (ULong)packedUInteger[3] << 32 | packedUInteger[2]; packedULong[2] = (ULong)packedUInteger[5] << 32 | packedUInteger[4]; - packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[7]; + packedULong[3] = (ULong)packedUInteger[7] << 32 | packedUInteger[6]; } /** diff --git a/src/cpu/x86/sse.cpp b/src/cpu/x86/sse.cpp index b7e8384..62c25ad 100644 --- a/src/cpu/x86/sse.cpp +++ b/src/cpu/x86/sse.cpp @@ -1,9 +1,11 @@ #include "sse.hpp" + + #ifdef ARCH_X86 #include void cpu::SSE::add_128(UChar *a,UChar *b){ - __asm__ __volatile__ ( + asm volatile ( "movdqa %0,%%xmm1\n" "paddb %1,%%xmm1\n" "movdqa %%xmm1,%0" @@ -14,9 +16,9 @@ //X86 Assembly to add two 128 bit numbers in the form of packed integers 32bit void cpu::SSE::add_128(UInt *a,UInt *b) { - __asm__ __volatile__ ( - "movdqa %0, %%xmm1\n" - "paddw %1, %%xmm1\n" + asm volatile ( + "movdqa %0,%%xmm1\n" + "paddw %1,%%xmm1\n" "movdqa %%xmm1, %0" : "=m"(*a) : "m"(*b) @@ -26,9 +28,9 @@ //X86 Assembly to add two 128 bit numbers in the form of packed long 64bit void cpu::SSE::add_128(ULong *a,ULong *b) { - __asm__ __volatile__ ( - "movdqa %0, %%xmm1\n" - "paddd %1, %%xmm1\n" + asm volatile ( + "movdqa %0,%%xmm1\n" + "paddd %1,%%xmm1\n" "movdqa %%xmm1, %0" : "=m"(*a) : "m"(*b) @@ -37,35 +39,35 @@ //X86 Assembly to add two 256 bit numbers in the form of packed byte vector void cpu::SSE::add_256(UChar *a,UChar *b) { - __asm__ __volatile__ ( - "vmovdqa %0, %%ymm1\n" - "vmovdqa %1, %%ymm2\n" - "vpaddb %%ymm1, %%ymm1, %%ymm2\n" - "vmovdqa %%ymm1, %0" + asm volatile ( + "vmovdqu %0,%%ymm1\n" + "vmovdqu %1,%%ymm2\n" + "vpaddb %%ymm3,%%ymm2,%%ymm1\n" + "vmovdqu %%ymm1,%0" : "=m"(*a) : "m"(*b) ); }; - //X86 Assembly to add two 128 bit numbers in the form of packed long 32bit + //X86 Assembly to add two 256 bit numbers in the form of packed int 32bit void cpu::SSE::add_256(UInt *a,UInt *b) { - __asm__ __volatile__( - "vmovdqa %0, %%xmm1\n" - "vmovdqa %1, %%ymm2\n" - "vpaddw %%ymm1, %%ymm1, %%ymm2\n" - "vmovdqa %%ymm1, %0" + asm volatile( + "vmovdqu %0,%%ymm1\n" + "vmovdqu %1,%%ymm2\n" + "vpaddw %%ymm1, %%ymm2, %%ymm1\n" + "vmovdqu %%ymm1,%0" : "=m"(*a) : "m"(*b) ); }; - //X86 Assembly to add two 128 bit numbers in the form of packed long 64bit + //X86 Assembly to add two 256 bit numbers in the form of packed long 64bit void cpu::SSE::add_256(ULong *a,ULong *b) { - __asm__ __volatile__( - "vmovdqa %0, %%ymm1\n" - "vmovdqa %1, %%ymm2\n" - "vpaddd %%ymm1, %%ymm1, %%ymm2\n" - "vmovdqa %%ymm1, %0" + asm volatile( + "vmovdqu %0, %%ymm1\n" + "vmovdqu %1, %%ymm2\n" + "vpaddd %%ymm1,%%ymm2,%%ymm1\n" + "vmovdqu %%ymm1, %0" : "=m"(*a) : "m"(*b) ); diff --git a/src/platform/timer.hpp b/src/platform/timer.hpp index fd93431..b2b1543 100644 --- a/src/platform/timer.hpp +++ b/src/platform/timer.hpp @@ -2,6 +2,7 @@ #include #include +#include namespace platform { @@ -11,26 +12,25 @@ namespace platform std::string label; long int start; long int end; - + long int gettime() { struct timeval tp; gettimeofday(&tp, NULL); - long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000; - return ms; + return (double)(tp.tv_sec * 1000 + (double)tp.tv_usec / 1000); }; public: - Timer(const char* lbl) : label(lbl) + Timer(const char *lbl) { - this->label=std::string(lbl); - this->start = gettime(); + this->label = std::string(lbl); + this->start = this->gettime(); } ~Timer() { - this->end=this->gettime(); - unsigned long diff = this->end - this->start; + this->end = this->gettime(); + unsigned long diff = this->end - this->start; std::cout << this->label << diff << std::endl; } };