File tree Expand file tree Collapse file tree 2 files changed +8
-12
lines changed
Expand file tree Collapse file tree 2 files changed +8
-12
lines changed Original file line number Diff line number Diff line change @@ -365,11 +365,9 @@ if (LLAMA_HIPBLAS)
365365 add_compile_definitions (GGML_USE_HIPBLAS GGML_USE_CUBLAS)
366366 add_library (ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
367367 target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
368- target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_DMMV_Y =${LLAMA_CUDA_DMMV_Y } )
368+ target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_MMV_Y =${LLAMA_CUDA_MMV_Y } )
369369 target_compile_definitions (ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
370- if (LLAMA_CUDA_FORCE_DMMV)
371- target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
372- endif ()
370+ target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
373371 set_source_files_properties (ggml-cuda.cu PROPERTIES LANGUAGE CXX)
374372 target_link_libraries (ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
375373
Original file line number Diff line number Diff line change @@ -226,20 +226,18 @@ ifdef LLAMA_HIPBLAS
226226 CC := $(ROCM_PATH)/llvm/bin/clang
227227 CXX := $(ROCM_PATH)/llvm/bin/clang++
228228 GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100
229- LLAMA_CUDA_DMMV_X ?= 32
230- LLAMA_CUDA_DMMV_Y ?= 1
229+ LLAMA_CUDA_DMMV_X ?= 32
230+ LLAMA_CUDA_MMV_Y ?= 1
231+ LLAMA_CUDA_KQUANTS_ITER ?= 2
231232 CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
232233 CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
233234 LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
234235 OBJS += ggml-cuda.o
235- ifdef LLAMA_CUDA_KQUANTS_ITER
236- CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
237- else
238- CXXFLAGS += -DK_QUANTS_PER_ITERATION=2
239- endif
240236ggml-cuda.o : CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS ) )
241237ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X )
242- ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y )
238+ ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y )
239+ ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_FORCE_DMMV
240+ ggml-cuda.o : CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER )
243241ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
244242 $(CXX ) $(CXXFLAGS ) -x hip -c -o $@ $<
245243endif # LLAMA_HIPBLAS
You can’t perform that action at this time.
0 commit comments