Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: mockingbirdnest/Principia
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 53ce1608bf2e
Choose a base ref
...
head repository: mockingbirdnest/Principia
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 6b28e2ca4eef
Choose a head ref
  • 2 commits
  • 4 files changed
  • 1 contributor

Commits on May 31, 2021

  1. No FMA with clang

    eggrobin committed May 31, 2021
    Copy the full SHA
    2c5fd8f View commit details

Commits on Jun 1, 2021

  1. Merge pull request #3020 from eggrobin/no-vex

    No FMA with clang
    eggrobin authored Jun 1, 2021
    Copy the full SHA
    6b28e2c View commit details
Showing with 37 additions and 11 deletions.
  1. +0 −1 Makefile
  2. +10 −1 numerics/fma.hpp
  3. +26 −8 numerics/fma_body.hpp
  4. +1 −1 numerics/fma_test.cpp
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -91,7 +91,6 @@ INCLUDES := -I. -I$(DEP_DIR)glog/src \
-I$(DEP_DIR)zfp/include
SHARED_ARGS := \
-std=c++1z -stdlib=libc++ -O3 -g \
-mfma \
-fPIC -fexceptions -ferror-limit=1000 -fno-omit-frame-pointer \
-Wall -Wpedantic \
-Wno-char-subscripts \
11 changes: 10 additions & 1 deletion numerics/fma.hpp
Original file line number Diff line number Diff line change
@@ -11,10 +11,18 @@ namespace internal_fma {

using base::CPUFeatureFlags;

// With clang, using FMA requires VEX-encoding everything; see #3019.
#if PRINCIPIA_COMPILER_MSVC
constexpr bool CanEmitFMAInstructions = true;
#else
constexpr bool CanEmitFMAInstructions = false;
#endif

// The functions in this file unconditionally wrap the appropriate intrinsics.
// The caller may only use them if |UseHardwareFMA| is true.
#if PRINCIPIA_USE_FMA_IF_AVAILABLE
inline bool const UseHardwareFMA = HasCPUFeatures(CPUFeatureFlags::FMA);
inline bool const UseHardwareFMA =
CanEmitFMAInstructions && HasCPUFeatures(CPUFeatureFlags::FMA);
#else
inline bool const UseHardwareFMA = false;
#endif
@@ -33,6 +41,7 @@ inline double FusedNegatedMultiplySubtract(double a, double b, double c);

} // namespace internal_fma

using internal_fma::CanEmitFMAInstructions;
using internal_fma::FusedMultiplyAdd;
using internal_fma::FusedMultiplySubtract;
using internal_fma::FusedNegatedMultiplyAdd;
34 changes: 26 additions & 8 deletions numerics/fma_body.hpp
Original file line number Diff line number Diff line change
@@ -2,33 +2,51 @@

#include "numerics/fma.hpp"

#include "glog/logging.h"

namespace principia {
namespace numerics {
namespace internal_fma {

inline double FusedMultiplyAdd(double const a, double const b, double const c) {
return _mm_cvtsd_f64(
_mm_fmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
if constexpr (CanEmitFMAInstructions) {
return _mm_cvtsd_f64(
_mm_fmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
} else {
LOG(FATAL) << "Clang cannot use FMA without VEX-encoding everything";
}
}

inline double FusedMultiplySubtract(double const a,
double const b,
double const c) {
return _mm_cvtsd_f64(
_mm_fmsub_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
if constexpr (CanEmitFMAInstructions) {
return _mm_cvtsd_f64(
_mm_fmsub_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
} else {
LOG(FATAL) << "Clang cannot use FMA without VEX-encoding everything";
}
}
inline double FusedNegatedMultiplyAdd(double const a,
double const b,
double const c) {
return _mm_cvtsd_f64(
_mm_fnmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
if constexpr (CanEmitFMAInstructions) {
return _mm_cvtsd_f64(
_mm_fnmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
} else {
LOG(FATAL) << "Clang cannot use FMA without VEX-encoding everything";
}
}

inline double FusedNegatedMultiplySubtract(double const a,
double const b,
double const c) {
return _mm_cvtsd_f64(
_mm_fnmsub_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
if constexpr (CanEmitFMAInstructions) {
return _mm_cvtsd_f64(
_mm_fnmsub_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)));
} else {
LOG(FATAL) << "Clang cannot use FMA without VEX-encoding everything";
}
}

} // namespace internal_fma
2 changes: 1 addition & 1 deletion numerics/fma_test.cpp
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@ class FMATest : public testing::Test {};

TEST_F(FMATest, FMA) {
// Note that we test even if |UseHardwareFMA| is false, i.e., even in debug.
if (!HasCPUFeatures(CPUFeatureFlags::FMA)) {
if (!CanEmitFMAInstructions || !HasCPUFeatures(CPUFeatureFlags::FMA)) {
LOG(ERROR) << "Cannot test FMA on a machine without FMA";
return;
}