Skip to content

feat: concurrent sum #1360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/typegpu-docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"@tailwindcss/vite": "^4.1.6",
"@typegpu/color": "workspace:*",
"@typegpu/noise": "workspace:*",
"@typegpu/concurrent-sum": "workspace:*",
"@types/dom-mediacapture-transform": "^0.1.9",
"@types/react": "^19.0.10",
"@types/react-dom": "^19.0.4",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<div>
<div>
<label for="array-input">Enter array values (comma separated):</label>
<input type="text" id="array-input" placeholder="1,2,3,4,5" />
<button id="calculate-btn">Calculate Sum</button>
</div>
<div id="result-container">Result will appear here</div>
<canvas></canvas>
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { currentSum } from '@typegpu/concurrent-sum';
import tgpu from 'typegpu';
import * as d from 'typegpu/data';
import { fixedArrayLength } from '../../../../../../../packages/typegpu-concurrent-sum/src/schemas.ts';

const presentationFormat = navigator.gpu.getPreferredCanvasFormat();
const canvas = document.querySelector('canvas') as HTMLCanvasElement;
const context = canvas.getContext('webgpu') as GPUCanvasContext;

const root = await tgpu.init({
adapter: {
powerPreference: 'high-performance',
},
device: {
requiredFeatures: [
'timestamp-query',
],
},
});

context.configure({
device: root.device,
format: presentationFormat,
alphaMode: 'premultiplied',
});

const buffer = root.createBuffer(
d.arrayOf(d.f32, fixedArrayLength),
Array.from({ length: fixedArrayLength }, (_, k) => k),
).$usage('storage');

currentSum(root, buffer);

export function onCleanup() {
root.destroy();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"title": "Concurrent Sum",
"category": "simple",
"tags": ["experimental"]
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions apps/typegpu-docs/src/utils/examples/sandboxModules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,7 @@ export const SANDBOX_MODULES: Record<string, SandboxModuleDefinition> = {
'@typegpu/color': {
typeDef: { reroute: ['typegpu-color/src/index.ts'] },
},
'@typegpu/concurrent-sum': {
typeDef: { reroute: ['typegpu-concurrent-sum/src/index.ts'] },
},
};
9 changes: 9 additions & 0 deletions packages/typegpu-concurrent-sum/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<div align="center">

# @typegpu/concurrent-sum

🚧 **Under Construction** 🚧

</div>

A concurrent sum module. for use in WebGPU/TypeGPU apps.
12 changes: 12 additions & 0 deletions packages/typegpu-concurrent-sum/build.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { type BuildConfig, defineBuildConfig } from 'unbuild';
import typegpu from 'unplugin-typegpu/rollup';

const Config: BuildConfig[] = defineBuildConfig({
hooks: {
'rollup:options': (_options, config) => {
config.plugins.push(typegpu({ include: [/\.ts$/] }));
},
},
});

export default Config;
7 changes: 7 additions & 0 deletions packages/typegpu-concurrent-sum/deno.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"exclude": ["."],
"fmt": {
"exclude": ["!."],
"singleQuote": true
}
}
45 changes: 45 additions & 0 deletions packages/typegpu-concurrent-sum/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "@typegpu/concurrent-sum",
"type": "module",
"version": "0.0.8",
"description": "A concurrent sum module.",
"exports": {
".": "./src/index.ts",
"./package.json": "./package.json"
},
"publishConfig": {
"directory": "dist",
"linkDirectory": false,
"main": "./dist/index.mjs",
"types": "./dist/index.d.ts",
"exports": {
"./package.json": "./dist/package.json",
".": {
"types": "./dist/index.d.ts",
"module": "./dist/index.mjs",
"import": "./dist/index.mjs",
"default": "./dist/index.cjs"
}
}
},
"sideEffects": false,
"scripts": {
"build": "unbuild",
"test:types": "pnpm tsc --p ./tsconfig.json --noEmit",
"prepublishOnly": "tgpu-dev-cli prepack"
},
"keywords": [],
"license": "MIT",
"peerDependencies": {
"typegpu": "^0.5.8"
},
"devDependencies": {
"@typegpu/tgpu-dev-cli": "workspace:*",
"@types/node": "^22.13.14",
"@webgpu/types": "catalog:",
"unbuild": "catalog:",
"typegpu": "workspace:*",
"typescript": "catalog:",
"unplugin-typegpu": "workspace:*"
}
}
65 changes: 65 additions & 0 deletions packages/typegpu-concurrent-sum/src/compute.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import tgpu from 'typegpu';
import * as d from 'typegpu/data';
import * as std from 'typegpu/std';
import {
dataBindGroupLayout as layout,
fixedArrayLength,
workgroupSize,
} from './schemas.ts';

export const computeShader = tgpu['~unstable'].computeFn({
in: { in: d.builtin.globalInvocationId },
workgroupSize: [workgroupSize],
})((input) => {
const threadId = input.in.x;
const length = d.u32(fixedArrayLength);
const log2Length = d.i32(std.log2(d.f32(length)));

if (threadId < length) {
layout.$.workArray[threadId] = layout.$.inputArray[threadId] as number;
}

std.workgroupBarrier();
// Up-sweep phase
for (let dLevel = 0; dLevel < log2Length; dLevel++) {
const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

if (threadId < length / windowSize) {
const i = threadId * windowSize;
const leftIdx = i + offset - 1;
const rightIdx = i + windowSize - 1;

layout.$.workArray[rightIdx] = (layout.$.workArray[leftIdx] as number) +
(layout.$.workArray[rightIdx] as number);
}

std.workgroupBarrier();
}

if (threadId === 0) {
layout.$.workArray[length - 1] = 0;
}

std.workgroupBarrier();

// Down-sweep phase
for (let k = 0; k < log2Length; k++) {
const dLevel = log2Length - 1 - k;
const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

if (threadId < length / windowSize) {
const i = threadId * windowSize;
const leftIdx = i + offset - 1;
const rightIdx = i + windowSize - 1;

const temp = layout.$.workArray[leftIdx] as number;
layout.$.workArray[leftIdx] = layout.$.workArray[rightIdx] as number;
layout.$.workArray[rightIdx] = temp +
(layout.$.workArray[rightIdx] as number);
}

std.workgroupBarrier();
}
});
61 changes: 61 additions & 0 deletions packages/typegpu-concurrent-sum/src/compute/computeInPlace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import tgpu from 'typegpu';
import * as d from 'typegpu/data';
import * as std from 'typegpu/std';
import {
dataBindGroupLayout as layout,
fixedArrayLength,
workgroupSize,
} from '../schemas.ts';

export const computeShaderInPlace = tgpu['~unstable'].computeFn({
in: { in: d.builtin.globalInvocationId },
workgroupSize: [workgroupSize],
})((input) => {
const threadId = input.in.x;
const length = d.u32(fixedArrayLength);
const log2Length = d.i32(std.log2(d.f32(length)));

std.workgroupBarrier();
// Up-sweep phase
for (let dLevel = 0; dLevel < log2Length; dLevel++) {
const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

if (threadId < length / windowSize) {
const i = threadId * windowSize;
const leftIdx = i + offset - 1;
const rightIdx = i + windowSize - 1;

layout.$.inputArray[rightIdx] = (layout.$.inputArray[leftIdx] as number) +
(layout.$.inputArray[rightIdx] as number);
}

std.workgroupBarrier();
}

if (threadId === 0) {
layout.$.inputArray[length - 1] = 0;
}

std.workgroupBarrier();

// Down-sweep phase
for (let k = 0; k < log2Length; k++) {
const dLevel = log2Length - 1 - k;
const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

if (threadId < length / windowSize) {
const i = threadId * windowSize;
const leftIdx = i + offset - 1;
const rightIdx = i + windowSize - 1;

const temp = layout.$.inputArray[leftIdx] as number;
layout.$.inputArray[leftIdx] = layout.$.inputArray[rightIdx] as number;
layout.$.inputArray[rightIdx] = temp +
(layout.$.inputArray[rightIdx] as number);
}

std.workgroupBarrier();
}
});
88 changes: 88 additions & 0 deletions packages/typegpu-concurrent-sum/src/compute/computeShared.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import tgpu from 'typegpu';
import * as d from 'typegpu/data';
import * as std from 'typegpu/std';
import {
dataBindGroupLayout as layout,
fixedArrayLength,
workgroupSize,
} from '../schemas.ts';

const sharedMem = tgpu['~unstable'].workgroupVar(
d.arrayOf(d.f32, workgroupSize * 2),
);

export const computeShaderShared = tgpu['~unstable'].computeFn({
in: {
lid: d.builtin.localInvocationIndex,
gid: d.builtin.globalInvocationId,
},
workgroupSize: [workgroupSize],
})((input) => {
const lId = input.lid;
const gId = input.gid.x;
const length = d.u32(workgroupSize);
const log2Length = d.i32(std.log2(d.f32(length)));

// copy
const idx0 = gId * 2;
const idx1 = gId * 2 + 1;
if (idx0 < d.u32(fixedArrayLength)) {
sharedMem.value[lId * 2] = layout.$.inputArray[idx0] as number;
}
if (idx1 < d.u32(fixedArrayLength)) {
sharedMem.value[lId * 2 + 1] = layout.$.inputArray[idx1] as number;
}
std.workgroupBarrier();

// Up-sweep phase
for (let dLevel = 0; dLevel < log2Length; dLevel++) {
const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

if (lId < (length / (windowSize / 2))) { //workgroup length
const i = lId * windowSize;
const leftIdx = i + offset - 1;
const rightIdx = i + windowSize - 1;

(sharedMem.value[rightIdx] as number) += sharedMem
.value[leftIdx] as number;
}

std.workgroupBarrier();
}
std.workgroupBarrier();

// if (lId === 0) {
// sharedMem.value[length - 1] = 0;
// }

// std.workgroupBarrier();

// // Down-sweep phase
// for (let k = 0; k < log2Length; k++) {
// const dLevel = log2Length - 1 - k;
// const windowSize = d.u32(std.exp2(d.f32(dLevel + 1))); // window size == step
// const offset = d.u32(std.exp2(d.f32(dLevel))); // offset for the window

// if (lId < length / windowSize) {
// const i = lId * windowSize;
// const leftIdx = (i + offset - 1) % (length * 2);
// const rightIdx = (i + windowSize - 1) % (length * 2);

// const temp = sharedMem.value[leftIdx] as number;
// sharedMem.value[leftIdx] = sharedMem.value[rightIdx] as number;
// sharedMem.value[rightIdx] = temp +
// (sharedMem.value[rightIdx] as number);
// }

// std.workgroupBarrier();
// }

// copy back
if (idx0 < d.u32(fixedArrayLength)) {
layout.$.workArray[idx0] = sharedMem.value[lId * 2] as number;
}
if (idx1 < d.u32(fixedArrayLength)) {
layout.$.workArray[idx1] = sharedMem.value[lId * 2 + 1] as number;
}
});
Loading