Illegal Instruction Fault From Improper CPU Feature Detection

**LLVM Versions Tested**: 18, 19, 20, current tip

### Issue Summary
LLVM components incorrectly detect supported CPU instructions, resulting in the generation of illegal instructions, resulting in faulting binaries.

### Detailed Description
The compiler infrastructure appears to use static CPU model mappings to determine available instruction sets, instead of querying what the CPU actually supports (via `/proc/cpuinfo` or hwcap on AArch64). This causes particular problems on:

1. Arm-V9 CPUs from Qualcomm SoCs that do not implement SVE despite the Arm-V9 specification requiring it
2. Potentially any system where the Linux kernel is not configured with `CONFIG_ARM64_SVE=Y`

But is not necessarily limited to AArch64 or the above.

This affects instruction selection/codegen/runtime dispatching for all of LLVM like Clang, Flang, OpenMP, ORC JIT, etc.

### Reproduction Steps
Below is a relatively minimal test case using ORC JIT that demonstrates the issue. A Termux environment on Android devices using Qualcomm chips is likely the easiest target for reproduction. This could also be reproduced with a vectorizable loop in C code, with Clang, using the "-march=native" flag.

```cpp
#include <iostream>
#include <vector>
#include <string>

#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Target/TargetMachine.h"

using namespace llvm;
using namespace llvm::orc;

// Diagnostic handler to suppress remarks
class SilenceRemarksHandler : public DiagnosticHandler {
public:
  bool handleDiagnostics(const DiagnosticInfo &DI) override {
    // Ignore remarks, pass through other diagnostics
    if (DI.getSeverity() == DS_Remark) {
      return true;
    }
    return false;
  }
};

std::unique_ptr<Module> createVectorModule(LLVMContext &Context) {
  auto M = std::make_unique<Module>("VecTest", Context);
  
  auto *FloatTy = Type::getFloatTy(Context);
  auto *FloatPtrTy = PointerType::get(FloatTy, 0);
  auto *Int32Ty = Type::getInt32Ty(Context);
  
  FunctionType *FT = FunctionType::get(
      Type::getVoidTy(Context),
      {FloatPtrTy, FloatPtrTy, FloatPtrTy, Int32Ty},
      false);
  
  Function *F = Function::Create(FT, Function::ExternalLinkage, "vector_op", M.get());
  F->addFnAttr(Attribute::NoUnwind);
  
  auto Args = F->arg_begin();
  Value *A = &*Args++;
  Value *B = &*Args++;
  Value *Result = &*Args++;
  Value *Length = &*Args++;
  
  BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F);
  BasicBlock *LoopBB = BasicBlock::Create(Context, "loop", F);
  BasicBlock *ExitBB = BasicBlock::Create(Context, "exit", F);
  
  IRBuilder<> Builder(Context);
  
  Builder.SetInsertPoint(EntryBB);
  Value *IndexAlloca = Builder.CreateAlloca(Int32Ty, nullptr, "i");
  Builder.CreateStore(ConstantInt::get(Int32Ty, 0), IndexAlloca);
  Builder.CreateBr(LoopBB);
  
  Builder.SetInsertPoint(LoopBB);
  Value *Index = Builder.CreateLoad(Int32Ty, IndexAlloca, "idx");
  Value *LoopCond = Builder.CreateICmpSLT(Index, Length, "cond");
  
  Value *APtr = Builder.CreateGEP(FloatTy, A, Index, "a_ptr");
  Value *BPtr = Builder.CreateGEP(FloatTy, B, Index, "b_ptr");
  Value *ResultPtr = Builder.CreateGEP(FloatTy, Result, Index, "result_ptr");
  
  MDNode *AccessGroup = MDNode::get(Context, {});
  
  Value *AVal = Builder.CreateLoad(FloatTy, APtr, "a_val");
  Value *BVal = Builder.CreateLoad(FloatTy, BPtr, "b_val");
  
  cast<Instruction>(AVal)->setMetadata("llvm.mem.parallel_loop_access", AccessGroup);
  cast<Instruction>(BVal)->setMetadata("llvm.mem.parallel_loop_access", AccessGroup);
  
  Value *Square = Builder.CreateFMul(AVal, AVal, "square");
  Value *AddResult = Builder.CreateFAdd(Square, BVal, "add");
  
  auto *StoreInst = Builder.CreateStore(AddResult, ResultPtr);
  StoreInst->setMetadata("llvm.mem.parallel_loop_access", AccessGroup);
  
  Value *NextIndex = Builder.CreateAdd(Index, ConstantInt::get(Int32Ty, 1), "next_idx");
  Builder.CreateStore(NextIndex, IndexAlloca);
  
  // Loop metadata to force vectorization
  MDNode *ForcedVec = MDNode::get(Context, {
      MDString::get(Context, "llvm.loop.vectorize.enable"),
      ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 1))
  });
  
  MDNode *LoopID = MDNode::get(Context, {MDNode::get(Context, {}), ForcedVec});
  LoopID->replaceOperandWith(0, LoopID);
  
  Builder.CreateCondBr(LoopCond, LoopBB, ExitBB)->setMetadata("llvm.loop", LoopID);
  Builder.SetInsertPoint(ExitBB);
  Builder.CreateRetVoid();
  
  verifyFunction(*F);
  return M;
}

// Apply optimization passes to force vectorization
void optimizeModule(Module &M, TargetMachine *TM) {
  PassBuilder PB;
  
  LoopAnalysisManager LAM;
  FunctionAnalysisManager FAM;
  CGSCCAnalysisManager CGAM;
  ModuleAnalysisManager MAM;
  
  FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
  
  PB.registerModuleAnalyses(MAM);
  PB.registerCGSCCAnalyses(CGAM);
  PB.registerFunctionAnalyses(FAM);
  PB.registerLoopAnalyses(LAM);
  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  
  ModulePassManager MPM = PB.buildPerModuleDefaultPipeline(OptimizationLevel::O3);
  
  MPM.run(M, MAM);
}

int main(int argc, char** argv) {
  // Parse command line arguments
  bool useNoSVE = false;
  for (int i = 1; i < argc; i++) {
    if (std::string(argv[i]) == "--use-nosve") {
      useNoSVE = true;
    }
  }

  InitializeNativeTarget();
  InitializeNativeTargetAsmPrinter();
  InitializeNativeTargetAsmParser();
  
  // Silence remarks
  LLVMContext Context;
  Context.setDiagnosticHandler(std::make_unique<SilenceRemarksHandler>());
  
  auto JTMB = cantFail(JITTargetMachineBuilder::detectHost());
  JTMB.setCodeGenOptLevel(CodeGenOptLevel::Aggressive);
  
  if (useNoSVE) {
    JTMB.addFeatures(std::vector<std::string>{"-sve"});
  }
  
  std::unique_ptr<TargetMachine> TM(cantFail(JTMB.createTargetMachine()));
  auto M = createVectorModule(Context);
  M->setDataLayout(TM->createDataLayout());
  
  // Apply optimization passes to ensure and force vectorization
  optimizeModule(*M, TM.get());
  
  // Set-up JIT compiled function
  auto JIT = cantFail(LLJITBuilder().setJITTargetMachineBuilder(std::move(JTMB)).create());
  cantFail(JIT->addIRModule(ThreadSafeModule(std::move(M), std::make_unique<LLVMContext>())));
  auto VecOpAddr = cantFail(JIT->lookup("vector_op"));
  auto *VectorOp = (void(*)(float*, float*, float*, int))VecOpAddr.getValue();
  const int Length = 1024;
  std::vector<float> A(Length), B(Length), Result(Length);
  for (int i = 0; i < Length; i++) {
    A[i] = i;
    B[i] = i * 2;
  }
  
  // Execute JIT-compiled function
  // It should fault with an illegal instruction on such devices
  VectorOp(A.data(), B.data(), Result.data(), Length);
  
  // Will only reach here if execution succeeds
  std::cout << "Result[10]: " << Result[10] << std::endl;
  
  return 0;
}
```

When executed normally, the program generates illegal instructions on hardware that meets the specified conditions. It will also accept an argument `--use-nosve` to add `-sve` to the JIT's features list which should cause it not crash.

### Additional Context
Attempting to workaround this issue locally revealed frustrating inconsistentencies in how LLVM CPU features are specified across different LLVM interfaces:
- `-march=`
- `-mcpu=`
- `-Xclang -target-feature`
- `llvm::orc::JITTargetMachineBuilder::addFeatures()`

Each of these accepts a different set of feature flags with inconsistent naming conventions and limited documentation.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Illegal Instruction Fault From Improper CPU Feature Detection #130509

Issue Summary

Detailed Description

Reproduction Steps

Additional Context

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Illegal Instruction Fault From Improper CPU Feature Detection #130509

Description

Issue Summary

Detailed Description

Reproduction Steps

Additional Context

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions