diff --git a/Makefile b/Makefile index fff4c11..6d8ed76 100644 --- a/Makefile +++ b/Makefile @@ -34,8 +34,8 @@ endif # # keep standard at C11 and C++11 -CFLAGS = -I. -O3 -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC +CFLAGS = -I. -O3 -std=c11 +CXXFLAGS = -I. -I./examples -O3 -std=c++11 LDFLAGS = ifndef LLAMA_DEBUG @@ -123,12 +123,12 @@ ifdef LLAMA_OPENBLAS endif endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/aarch64-linux/include + CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/aarch64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/aarch64-linux/lib OBJS += ggml-cuda.o - NVCC = nvcc - NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native + NVCC = /usr/local/cuda/bin/nvcc + NVCCFLAGS = --forward-unknown-to-host-compiler ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ endif diff --git a/ggml.h b/ggml.h index 255541d..d4f7e50 100644 --- a/ggml.h +++ b/ggml.h @@ -212,7 +212,7 @@ extern "C" { #endif -#ifdef __ARM_NEON +#ifdef ____undefined //__ARM_NEON // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; #else