#!/bin/sh
# Detect platform-specific compiler flags for robscale

# Detect -fopenmp-simd support
OPENMP_SIMD_FLAG=""
OPENMP_SIMD_DEFINE=""
echo "int main(){return 0;}" > conftest.cpp
if ${CXX:-c++} -fopenmp-simd -c conftest.cpp -o conftest.o 2>/dev/null; then
  OPENMP_SIMD_FLAG="-fopenmp-simd"
  OPENMP_SIMD_DEFINE="-DROBSCALE_HAS_OMP_SIMD"
  echo "  -fopenmp-simd supported"
else
  echo "  -fopenmp-simd not supported, skipping"
fi
rm -f conftest.cpp conftest.o

HAS_SLEEF="no"

# Skip SLEEF on macOS as Accelerate is faster and preferred
if [ "$(uname -s)" != "Darwin" ]; then
  # Check standard paths first
  for prefix in /usr /usr/local /opt/homebrew; do
    if [ -f "${prefix}/include/sleef.h" ]; then
      SLEEF_CFLAGS="-I${prefix}/include -DROBSCALE_HAS_SLEEF"
      SLEEF_LIBS="-L${prefix}/lib -lsleef"
      HAS_SLEEF="yes"
      break
    fi
  done

  # Fallback to pkg-config if not found in standard paths
  if [ "${HAS_SLEEF}" = "no" ]; then
    if pkg-config --exists sleef 2>/dev/null; then
      SLEEF_CFLAGS="$(pkg-config --cflags sleef) -DROBSCALE_HAS_SLEEF"
      SLEEF_LIBS=$(pkg-config --libs sleef)
      HAS_SLEEF="yes"
    fi
  fi
fi

if [ "${HAS_SLEEF}" = "yes" ]; then
  echo "  SLEEF detected"
else
  echo "  SLEEF not detected, falling back to Accelerate/OpenMP"
fi

# Detect glibc libmvec _ZGVdN4v_tanh (Linux x86_64, glibc >= 2.35)
# libmvec is 25-50% faster than SLEEF for 4-wide double tanh on Zen/Skylake.
# Independent of SLEEF: libmvec is a glibc-native AVX2 tanh that needs no
# third-party library.  ROBSCALE_HAS_AVX2_DISPATCH (compiler-derived in
# robscale_config.h) provides the target-attribute infrastructure.
HAS_GLIBC_MVEC="no"
GLIBC_MVEC_CFLAGS=""
GLIBC_MVEC_LIBS=""
if [ "$(uname -s)" = "Linux" ]; then
  # Test 1: glibc >= 2.35 (versioned _ZGVdN4v_tanh@GLIBC_2.35 requires this)
  cat > conftest_glibc.c << 'CFEOF'
#include <features.h>
#if !(__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 35))
#error "glibc < 2.35"
#endif
int main(void) { return 0; }
CFEOF
  if ${CC:-gcc} conftest_glibc.c -o conftest_glibc 2>/dev/null; then
    # Test 2: _ZGVdN4v_tanh linkable from libmvec
    cat > conftest_mvec.c << 'CFEOF'
typedef double v4d __attribute__((vector_size(32)));
extern v4d _ZGVdN4v_tanh(v4d);
int main(void) { v4d x = {0.5,0.5,0.5,0.5}; v4d r = _ZGVdN4v_tanh(x); return (int)r[0]; }
CFEOF
    if ${CC:-gcc} conftest_mvec.c -lmvec -o conftest_mvec 2>/dev/null; then
      HAS_GLIBC_MVEC="yes"
      GLIBC_MVEC_CFLAGS="-DROBSCALE_HAS_GLIBC_MVEC"
      GLIBC_MVEC_LIBS="-lmvec"
      echo "  glibc libmvec _ZGVdN4v_tanh detected (preferred AVX2 tanh)"
    else
      echo "  libmvec not available"
    fi
    rm -f conftest_mvec.c conftest_mvec
  else
    echo "  glibc < 2.35, libmvec not available"
  fi
  rm -f conftest_glibc.c conftest_glibc
fi

# Detect macOS Accelerate framework (vForce for vectorized tanh)
ACCELERATE_LIBS=""
if [ "$(uname -s)" = "Darwin" ]; then
  ACCELERATE_LIBS="-framework Accelerate"
  echo "  macOS detected, linking Accelerate framework"
fi

# ── TBB backend detection ────────────────────────────────────────────────────
# Priority 1: system oneTBB  (explicit -ltbb, modern scheduler)
# Priority 2: RcppParallel bundled TBB  (explicit link to its libtbb.so)
# Priority 3: OpenMP thread-level parallel (#pragma omp parallel fallback)
TBB_DEFINE=""
TBB_LIBS=""
OMP_PARALLEL_CFLAGS=""
OMP_PARALLEL_LIBS=""

SYSTEM_TBB_LIB=""
SYSTEM_TBB_HEADER=""
for libdir in /usr/lib /usr/lib64 /usr/lib/x86_64-linux-gnu \
              /usr/lib/aarch64-linux-gnu /usr/lib/arm-linux-gnueabihf \
              /usr/local/lib /opt/homebrew/lib; do
  if [ -f "${libdir}/libtbb.so" ] || [ -f "${libdir}/libtbb.dylib" ]; then
    SYSTEM_TBB_LIB="${libdir}"
    break
  fi
done
for incdir in /usr/include /usr/local/include /opt/homebrew/include; do
  if [ -f "${incdir}/oneapi/tbb/parallel_reduce.h" ]; then
    SYSTEM_TBB_HEADER="${incdir}"
    break
  fi
done

# API validation: confirm detected headers+lib expose oneTBB 2021 interface
if [ -n "${SYSTEM_TBB_LIB}" ] && [ -n "${SYSTEM_TBB_HEADER}" ]; then
  cat > conftest_tbb.cpp << 'TBBEOF'
#include <oneapi/tbb/parallel_reduce.h>
#include <oneapi/tbb/blocked_range.h>
int main() {
  double r = tbb::parallel_reduce(
    tbb::blocked_range<int>(0,4), 0.0,
    [](const tbb::blocked_range<int>& b, double s){
      for(int i=b.begin();i<b.end();++i) s+=i; return s; },
    [](double a, double b){ return a+b; });
  return (int)r;
}
TBBEOF
  if ${CXX:-c++} -std=c++11 -I${SYSTEM_TBB_HEADER} conftest_tbb.cpp \
      -L${SYSTEM_TBB_LIB} -ltbb -o conftest_tbb 2>/dev/null; then
    echo "  Manual oneTBB 2021 API verified (${SYSTEM_TBB_LIB})"
  else
    echo "  Manual TBB found but oneTBB 2021 API check failed, trying pkg-config"
    SYSTEM_TBB_LIB=""
    SYSTEM_TBB_HEADER=""
  fi
  rm -f conftest_tbb.cpp conftest_tbb
fi

# pkg-config fallback if path search failed or API validation cleared manual find
if [ -z "${SYSTEM_TBB_LIB}" ] || [ -z "${SYSTEM_TBB_HEADER}" ]; then
  if pkg-config --exists tbb 2>/dev/null; then
    cat > conftest_tbb.cpp << 'TBBEOF'
#include <oneapi/tbb/parallel_reduce.h>
#include <oneapi/tbb/blocked_range.h>
int main() {
  double r = tbb::parallel_reduce(
    tbb::blocked_range<int>(0,4), 0.0,
    [](const tbb::blocked_range<int>& b, double s){
      for(int i=b.begin();i<b.end();++i) s+=i; return s; },
    [](double a, double b){ return a+b; });
  return (int)r;
}
TBBEOF
    _tbb_libdir=$(pkg-config --variable=libdir tbb 2>/dev/null)
    if ${CXX:-c++} -std=c++11 $(pkg-config --cflags tbb) conftest_tbb.cpp \
        $(pkg-config --libs tbb) -o conftest_tbb 2>/dev/null; then
      SYSTEM_TBB_LIB="pkg-config"
      SYSTEM_TBB_HEADER="pkg-config"
      TBB_DEFINE="-DROBSCALE_HAS_SYSTEM_TBB $(pkg-config --cflags tbb)"
      if [ -n "${_tbb_libdir}" ]; then
        TBB_LIBS="$(pkg-config --libs tbb) -Wl,-rpath,${_tbb_libdir}"
      else
        TBB_LIBS="$(pkg-config --libs tbb)"
      fi
      echo "  System oneTBB detected via pkg-config"
    else
      echo "  pkg-config TBB found but oneTBB 2021 API check failed"
    fi
    rm -f conftest_tbb.cpp conftest_tbb
  fi
fi

if [ "${SYSTEM_TBB_LIB}" = "pkg-config" ]; then
  : # TBB_DEFINE and TBB_LIBS already set above by pkg-config
elif [ -n "${SYSTEM_TBB_LIB}" ] && [ -n "${SYSTEM_TBB_HEADER}" ]; then
  TBB_DEFINE="-DROBSCALE_HAS_SYSTEM_TBB -I${SYSTEM_TBB_HEADER}"
  TBB_LIBS="-L${SYSTEM_TBB_LIB} -ltbb -Wl,-rpath,${SYSTEM_TBB_LIB}"
  echo "  System oneTBB detected (${SYSTEM_TBB_LIB}/libtbb.so), using -ltbb"
else
  RCPP_PAR_LIB=""
  if [ -n "${R_HOME}" ]; then
    RCPP_PAR_LIB=$("${R_HOME}/bin/Rscript" --no-save -e \
      "cat(system.file('lib', package='RcppParallel', mustWork=FALSE))" \
      2>/dev/null || true)
  fi
  if [ -n "${RCPP_PAR_LIB}" ] && { [ -f "${RCPP_PAR_LIB}/libtbb.so" ] || [ -f "${RCPP_PAR_LIB}/libtbb.dylib" ]; }; then
    TBB_DEFINE="-DUSE_DIRECT_TBB"
    TBB_LIBS="-L${RCPP_PAR_LIB} -ltbb -Wl,-rpath,${RCPP_PAR_LIB}"
    echo "  RcppParallel TBB detected at ${RCPP_PAR_LIB}, using -ltbb"
  else
    TBB_DEFINE="-DROBSCALE_HAS_OMP_PARALLEL"
    OMP_PARALLEL_CFLAGS='$(SHLIB_OPENMP_CXXFLAGS)'
    OMP_PARALLEL_LIBS='$(SHLIB_OPENMP_CXXFLAGS)'
    echo "  No TBB found, using OpenMP parallel fallback"
  fi
fi

# Force recompilation when flags change (make doesn't track PKG_CXXFLAGS)
rm -f src/*.o src/robscale.so

# Generate Makevars from template
sed -e "s!@OPENMP_SIMD_FLAG@!${OPENMP_SIMD_FLAG}!" \
    -e "s!@OPENMP_SIMD_DEFINE@!${OPENMP_SIMD_DEFINE}!" \
    -e "s!@ACCELERATE_LIBS@!${ACCELERATE_LIBS}!" \
    -e "s!@SLEEF_CFLAGS@!${SLEEF_CFLAGS}!" \
    -e "s!@SLEEF_LIBS@!${SLEEF_LIBS}!" \
    -e "s!@TBB_DEFINE@!${TBB_DEFINE}!" \
    -e "s!@TBB_LIBS@!${TBB_LIBS}!" \
    -e "s!@OMP_PARALLEL_CFLAGS@!${OMP_PARALLEL_CFLAGS}!" \
    -e "s!@OMP_PARALLEL_LIBS@!${OMP_PARALLEL_LIBS}!" \
    -e "s!@GLIBC_MVEC_CFLAGS@!${GLIBC_MVEC_CFLAGS}!" \
    -e "s!@GLIBC_MVEC_LIBS@!${GLIBC_MVEC_LIBS}!" \
    src/Makevars.in > src/Makevars

echo "  Generated src/Makevars"
