Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 88 additions & 88 deletions slang.sln

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions source/slang/hlsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -2528,12 +2528,17 @@ __target_intrinsic(cuda, "__activemask()")
__target_intrinsic(hlsl, "WaveActiveBallot(true).x")
WaveMask WaveGetConvergedMask();

__intrinsic_op($(kIROp_WaveGetActiveMask))
WaveMask __WaveGetActiveMask();

__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupBallot(true).x")
__target_intrinsic(hlsl, "WaveActiveBallot(true).x")
__target_intrinsic(cuda, "__activemask()") // Note: semantically incorrect, but best we can do for now.
WaveMask WaveGetActiveMask();
WaveMask WaveGetActiveMask()
{
return __WaveGetActiveMask();
}

__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
Expand Down
25 changes: 25 additions & 0 deletions source/slang/slang-emit-cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,31 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
m_writer->emit("\n}");
return true;
}
case kIROp_WaveMaskBallot:
{
m_writer->emit("__ballot_sync(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(")");
return true;
}
case kIROp_WaveMaskMatch:
{
SemanticVersion version;
version.set(7, 0);
if (version > m_extensionTracker->m_smVersion)
{
m_extensionTracker->m_smVersion = version;
}

m_writer->emit("__match_any_sync(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(")");
return true;
}
default: break;
}

Expand Down
26 changes: 26 additions & 0 deletions source/slang/slang-emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "slang-ir-specialize-resources.h"
#include "slang-ir-ssa.h"
#include "slang-ir-strip-witness-tables.h"
#include "slang-ir-synthesize-active-mask.h"
#include "slang-ir-union.h"
#include "slang-ir-validate.h"
#include "slang-ir-wrap-structured-buffers.h"
Expand Down Expand Up @@ -500,6 +501,31 @@ Result linkAndOptimizeIR(
legalizeByteAddressBufferOps(session, irModule, byteAddressBufferOptions);
}

// For CUDA targets only, we will need to turn operations
// the implicitly reference the "active mask" into ones
// that use (and pass around) an explicit mask instead.
//
switch(target)
{
case CodeGenTarget::CUDASource:
case CodeGenTarget::PTX:
{
synthesizeActiveMask(
irModule,
compileRequest->getSink());

#if 0
dumpIRIfEnabled(compileRequest, irModule, "AFTER synthesizeActiveMask");
#endif
validateIRModuleIfEnabled(compileRequest, irModule);

}
break;

default:
break;
}

// For GLSL only, we will need to perform "legalization" of
// the entry point and any entry-point parameters.
//
Expand Down
50 changes: 50 additions & 0 deletions source/slang/slang-ir-dominators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,27 @@ bool IRDominatorTree::immediatelyDominates(IRBlock* dominator, IRBlock* dominate

bool IRDominatorTree::properlyDominates(IRBlock* dominator, IRBlock* dominated)
{
// We need to deal with the cases where `dominator` and/or
// `dominated` are unreachable, and thus not represtend
// in the nodes of the dominator tree we constructed.
//
// If `dominated` is unreachable, then there are zero
// control flow paths that can reach it, so that *all*
// of those (zero) control flow paths go through
// `dominator`.
//
if(isUnreachable(dominated))
return true;

// If `dominated` is reachable then there must exist at least
// one control-flow path to it. Thus if `dominator` is not
// reachable, it cannot be on that path, and thus must
// not be a dominator.
//
if(isUnreachable(dominator))
return false;


// Because of how we laid out the tree, we can test if one node
// properly dominates another in constant time.
//
Expand Down Expand Up @@ -67,6 +88,11 @@ bool IRDominatorTree::dominates(IRBlock* dominator, IRBlock* dominated)

IRBlock* IRDominatorTree::getImmediateDominator(IRBlock* block)
{
// An unreachable block has no immediate dominator.
//
if(isUnreachable(block))
return nullptr;

// The immediate dominator of a block is its parent
// in the dominator tree. Looking this up is straightforward,
// and we just need to be a bit careful to deal with
Expand All @@ -83,6 +109,11 @@ IRBlock* IRDominatorTree::getImmediateDominator(IRBlock* block)

IRDominatorTree::DominatedList IRDominatorTree::getImmediatelyDominatedBlocks(IRBlock* block)
{
// An unreachable block doesn't immediately dominate anything.
//
if(isUnreachable(block))
return DominatedList();

// Because of our representation, the immediately dominated blocks
// for a node are contiguous, and we store their range in the
// node already.
Expand All @@ -99,6 +130,13 @@ IRDominatorTree::DominatedList IRDominatorTree::getImmediatelyDominatedBlocks(IR

IRDominatorTree::DominatedList IRDominatorTree::getProperlyDominatedBlocks(IRBlock* block)
{
// Technically each unreachable block dominates all the other
// unreachable blocks, but setting things up to answer that
// query "correctly" would be a hassle.
//
if(isUnreachable(block))
return DominatedList();

// Because of our representation, the properly dominated blocks
// for a node are contiguous, and we store their range in the
// node already.
Expand All @@ -123,6 +161,12 @@ Int IRDominatorTree::getBlockIndex(IRBlock* block)
return index;
}

bool IRDominatorTree::isUnreachable(IRBlock* block)
{
return !mapBlockToIndex.ContainsKey(block);
}


// IRDominatorTree::DominatedList

IRDominatorTree::DominatedList::DominatedList()
Expand Down Expand Up @@ -181,6 +225,12 @@ bool IRDominatorTree::DominatedList::Iterator::operator==(Iterator const& that)
return mIndex == that.mIndex;
}

bool IRDominatorTree::DominatedList::Iterator::operator!=(Iterator const& that) const
{
SLANG_ASSERT(mTree == that.mTree);
return mIndex != that.mIndex;
}

//
// The dominance computation algorithm we are using relies on being able to compute
// a reverse postorder traversal of the nodes in the CFG, which is done using a depth-first
Expand Down
4 changes: 4 additions & 0 deletions source/slang/slang-ir-dominators.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ namespace Slang
/// These are the descendents of the block in the dominator tree.
DominatedList getProperlyDominatedBlocks(IRBlock* block);

/// Is `block` unrechable in the control flow graph?
bool isUnreachable(IRBlock* block);

struct DominatedList
{
public:
Expand All @@ -67,6 +70,7 @@ namespace Slang
IRBlock* operator*() const;
void operator++();
bool operator==(Iterator const& that) const;
bool operator!=(Iterator const& that) const;

private:
friend struct DominatedList;
Expand Down
8 changes: 8 additions & 0 deletions source/slang/slang-ir-inst-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,14 @@ INST(Dot, dot, 2, 0)

INST(GetStringHash, getStringHash, 1, 0)

INST(WaveGetActiveMask, waveGetActiveMask, 0, 0)

/// trueMask = waveMaskBallot(mask, condition)
INST(WaveMaskBallot, waveMaskBallot, 2, 0)

/// matchMask = waveMaskBallot(mask, value)
INST(WaveMaskMatch, waveMaskMatch, 2, 0)

// Texture sampling operation of the form `t.Sample(s,u)`
INST(Sample, sample, 3, 0)

Expand Down
9 changes: 9 additions & 0 deletions source/slang/slang-ir-insts.h
Original file line number Diff line number Diff line change
Expand Up @@ -1509,6 +1509,9 @@ struct SharedIRBuilder

// TODO: We probably shouldn't use this in the long run.
Dictionary<void*, IRLayout*> layoutMap;


void insertBlockAlongEdge(IREdge const& edge);
};

struct IRBuilderSourceLocRAII;
Expand Down Expand Up @@ -2019,6 +2022,12 @@ struct IRBuilder
IRType* type,
IRInst* val);

IRInst* emitWaveMaskBallot(IRType* type, IRInst* mask, IRInst* condition);
IRInst* emitWaveMaskMatch(IRType* type, IRInst* mask, IRInst* value);

IRInst* emitBitAnd(IRType* type, IRInst* left, IRInst* right);
IRInst* emitBitNot(IRType* type, IRInst* value);

//
// Decorations
//
Expand Down
Loading