dogfood fmadd* update
This commit is contained in:
Binary file not shown.
@@ -131,8 +131,9 @@ void kernel_fmadd(void* arg) {
|
|||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
float a = src0_ptr[offset+i];
|
float a = src0_ptr[offset+i];
|
||||||
float b = src1_ptr[offset+i];
|
float b = src1_ptr[offset+i];
|
||||||
float c = a * b + 0.5f;
|
float c = a - b;
|
||||||
dst_ptr[offset+i] = c;
|
float d = a * b + c;
|
||||||
|
dst_ptr[offset+i] = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,8 +148,9 @@ void kernel_fmsub(void* arg) {
|
|||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
float a = src0_ptr[offset+i];
|
float a = src0_ptr[offset+i];
|
||||||
float b = src1_ptr[offset+i];
|
float b = src1_ptr[offset+i];
|
||||||
float c = a * b - 0.5f;
|
float c = a - b;
|
||||||
dst_ptr[offset+i] = c;
|
float d = a * b - c;
|
||||||
|
dst_ptr[offset+i] = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,8 +165,9 @@ void kernel_fnmadd(void* arg) {
|
|||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
float a = src0_ptr[offset+i];
|
float a = src0_ptr[offset+i];
|
||||||
float b = src1_ptr[offset+i];
|
float b = src1_ptr[offset+i];
|
||||||
float c = -a * b - 0.5f;
|
float c = a - b;
|
||||||
dst_ptr[offset+i] = c;
|
float d =-a * b - c;
|
||||||
|
dst_ptr[offset+i] = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,8 +182,9 @@ void kernel_fnmsub(void* arg) {
|
|||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
float a = src0_ptr[offset+i];
|
float a = src0_ptr[offset+i];
|
||||||
float b = src1_ptr[offset+i];
|
float b = src1_ptr[offset+i];
|
||||||
float c = -a * b + 0.5f;
|
float c = a - b;
|
||||||
dst_ptr[offset+i] = c;
|
float d =-a * b + c;
|
||||||
|
dst_ptr[offset+i] = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -195,10 +199,11 @@ void kernel_fnmadd_madd(void* arg) {
|
|||||||
for (uint32_t i = 0; i < count; ++i) {
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
float a = src0_ptr[offset+i];
|
float a = src0_ptr[offset+i];
|
||||||
float b = src1_ptr[offset+i];
|
float b = src1_ptr[offset+i];
|
||||||
float c =-a * b - 0.5f;
|
float c = a - b;
|
||||||
float d = a * b + 0.5f;
|
float d =-a * b - c;
|
||||||
float e = c + d;
|
float e = a * b + c;
|
||||||
dst_ptr[offset+i] = e;
|
float f = d + e;
|
||||||
|
dst_ptr[offset+i] = f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -19,7 +19,7 @@ inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<f
|
|||||||
return fabs(a - b) <= tolerance;
|
return fabs(a - b) <= tolerance;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) {
|
||||||
Float_t fa{a}, fb{b};
|
Float_t fa{a}, fb{b};
|
||||||
return std::abs(fa.i - fb.i) <= ulp;
|
return std::abs(fa.i - fb.i) <= ulp;
|
||||||
}
|
}
|
||||||
@@ -253,7 +253,8 @@ public:
|
|||||||
auto b = (float*)src2;
|
auto b = (float*)src2;
|
||||||
auto c = (float*)dst;
|
auto c = (float*)dst;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto ref = a[i] * b[i] + 0.5f;
|
auto x = a[i] - b[i];
|
||||||
|
auto ref = a[i] * b[i] + x;
|
||||||
if (!almost_equal(c[i], ref)) {
|
if (!almost_equal(c[i], ref)) {
|
||||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||||
++errors;
|
++errors;
|
||||||
@@ -281,7 +282,8 @@ public:
|
|||||||
auto b = (float*)src2;
|
auto b = (float*)src2;
|
||||||
auto c = (float*)dst;
|
auto c = (float*)dst;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto ref = a[i] * b[i] - 0.5f;
|
auto x = a[i] - b[i];
|
||||||
|
auto ref = a[i] * b[i] - x;
|
||||||
if (!almost_equal(c[i], ref)) {
|
if (!almost_equal(c[i], ref)) {
|
||||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||||
++errors;
|
++errors;
|
||||||
@@ -309,7 +311,8 @@ public:
|
|||||||
auto b = (float*)src2;
|
auto b = (float*)src2;
|
||||||
auto c = (float*)dst;
|
auto c = (float*)dst;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto ref = -a[i] * b[i] - 0.5f;
|
auto x = a[i] - b[i];
|
||||||
|
auto ref = -a[i] * b[i] - x;
|
||||||
if (!almost_equal(c[i], ref)) {
|
if (!almost_equal(c[i], ref)) {
|
||||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||||
++errors;
|
++errors;
|
||||||
@@ -337,7 +340,8 @@ public:
|
|||||||
auto b = (float*)src2;
|
auto b = (float*)src2;
|
||||||
auto c = (float*)dst;
|
auto c = (float*)dst;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto ref = -a[i] * b[i] + 0.5f;
|
auto x = a[i] - b[i];
|
||||||
|
auto ref = -a[i] * b[i] + x;
|
||||||
if (!almost_equal(c[i], ref)) {
|
if (!almost_equal(c[i], ref)) {
|
||||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||||
++errors;
|
++errors;
|
||||||
@@ -365,9 +369,10 @@ public:
|
|||||||
auto b = (float*)src2;
|
auto b = (float*)src2;
|
||||||
auto c = (float*)dst;
|
auto c = (float*)dst;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
auto x = -a[i] * b[i] - 0.5f;
|
auto x = a[i] - b[i];
|
||||||
auto y = a[i] * b[i] + 0.5f;
|
auto y = -a[i] * b[i] - x;
|
||||||
auto ref = x + y;
|
auto z = a[i] * b[i] + x;
|
||||||
|
auto ref = y + z;
|
||||||
if (!almost_equal(c[i], ref)) {
|
if (!almost_equal(c[i], ref)) {
|
||||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||||
++errors;
|
++errors;
|
||||||
|
|||||||
Reference in New Issue
Block a user