Skip to content

Commit cdb4e9b

Browse files
committed
Reimport base65536 patch: gchq/CyberChef#2146
1 parent 8596e61 commit cdb4e9b

File tree

1 file changed

+293
-0
lines changed

1 file changed

+293
-0
lines changed

voo7ieX9-base65536.patch

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
commit 40f58640d27577ffcdcd2ad28d30893f5cc23e80
2+
Author: voo7ieX9 <[email protected]>
3+
Date: Mon Dec 15 23:13:53 2025 +0100
4+
5+
Added base65536 encoder and decoder
6+
7+
diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
8+
index 434c8bb6..0dcb75cd 100644
9+
--- a/src/core/config/Categories.json
10+
+++ b/src/core/config/Categories.json
11+
@@ -31,6 +31,8 @@
12+
"To Base64",
13+
"From Base64",
14+
"Show Base64 offsets",
15+
+ "To Base65536",
16+
+ "From Base65536",
17+
"To Base92",
18+
"From Base92",
19+
"To Base85",
20+
diff --git a/src/core/lib/Base65536.mjs b/src/core/lib/Base65536.mjs
21+
new file mode 100644
22+
index 00000000..eed3d6c9
23+
--- /dev/null
24+
+++ b/src/core/lib/Base65536.mjs
25+
@@ -0,0 +1,172 @@
26+
+/**
27+
+ * Base65536 resources.
28+
+ *
29+
+ * @author voo7ieX9
30+
+ * @copyright Crown Copyright 2025
31+
+ * @license Apache-2.0
32+
+ */
33+
+
34+
+/**
35+
+ * BLOCK_START lookup table for encoding
36+
+ * Maps the second byte (b2) to the starting Unicode code point
37+
+ */
38+
+const BLOCK_START = {
39+
+ 0: 13312, 1: 13568, 2: 13824, 3: 14080, 4: 14336, 5: 14592, 6: 14848, 7:
40+
+ 15104, 8: 15360, 9: 15616, 10: 15872, 11: 16128, 12: 16384, 13: 16640, 14:
41+
+ 16896, 15: 17152, 16: 17408, 17: 17664, 18: 17920, 19: 18176, 20: 18432,
42+
+ 21: 18688, 22: 18944, 23: 19200, 24: 19456, 25: 19968, 26: 20224, 27:
43+
+ 20480, 28: 20736, 29: 20992, 30: 21248, 31: 21504, 32: 21760, 33: 22016,
44+
+ 34: 22272, 35: 22528, 36: 22784, 37: 23040, 38: 23296, 39: 23552, 40:
45+
+ 23808, 41: 24064, 42: 24320, 43: 24576, 44: 24832, 45: 25088, 46: 25344,
46+
+ 47: 25600, 48: 25856, 49: 26112, 50: 26368, 51: 26624, 52: 26880, 53:
47+
+ 27136, 54: 27392, 55: 27648, 56: 27904, 57: 28160, 58: 28416, 59: 28672,
48+
+ 60: 28928, 61: 29184, 62: 29440, 63: 29696, 64: 29952, 65: 30208, 66:
49+
+ 30464, 67: 30720, 68: 30976, 69: 31232, 70: 31488, 71: 31744, 72: 32000,
50+
+ 73: 32256, 74: 32512, 75: 32768, 76: 33024, 77: 33280, 78: 33536, 79:
51+
+ 33792, 80: 34048, 81: 34304, 82: 34560, 83: 34816, 84: 35072, 85: 35328,
52+
+ 86: 35584, 87: 35840, 88: 36096, 89: 36352, 90: 36608, 91: 36864, 92:
53+
+ 37120, 93: 37376, 94: 37632, 95: 37888, 96: 38144, 97: 38400, 98: 38656,
54+
+ 99: 38912, 100: 39168, 101: 39424, 102: 39680, 103: 39936, 104: 40192, 105:
55+
+ 40448, 106: 41216, 107: 41472, 108: 41728, 109: 42240, 110: 67072, 111:
56+
+ 73728, 112: 73984, 113: 74240, 114: 77824, 115: 78080, 116: 78336, 117:
57+
+ 78592, 118: 82944, 119: 83200, 120: 92160, 121: 92416, 122: 131072, 123:
58+
+ 131328, 124: 131584, 125: 131840, 126: 132096, 127: 132352, 128: 132608,
59+
+ 129: 132864, 130: 133120, 131: 133376, 132: 133632, 133: 133888, 134:
60+
+ 134144, 135: 134400, 136: 134656, 137: 134912, 138: 135168, 139: 135424,
61+
+ 140: 135680, 141: 135936, 142: 136192, 143: 136448, 144: 136704, 145:
62+
+ 136960, 146: 137216, 147: 137472, 148: 137728, 149: 137984, 150: 138240,
63+
+ 151: 138496, 152: 138752, 153: 139008, 154: 139264, 155: 139520, 156:
64+
+ 139776, 157: 140032, 158: 140288, 159: 140544, 160: 140800, 161: 141056,
65+
+ 162: 141312, 163: 141568, 164: 141824, 165: 142080, 166: 142336, 167:
66+
+ 142592, 168: 142848, 169: 143104, 170: 143360, 171: 143616, 172: 143872,
67+
+ 173: 144128, 174: 144384, 175: 144640, 176: 144896, 177: 145152, 178:
68+
+ 145408, 179: 145664, 180: 145920, 181: 146176, 182: 146432, 183: 146688,
69+
+ 184: 146944, 185: 147200, 186: 147456, 187: 147712, 188: 147968, 189:
70+
+ 148224, 190: 148480, 191: 148736, 192: 148992, 193: 149248, 194: 149504,
71+
+ 195: 149760, 196: 150016, 197: 150272, 198: 150528, 199: 150784, 200:
72+
+ 151040, 201: 151296, 202: 151552, 203: 151808, 204: 152064, 205: 152320,
73+
+ 206: 152576, 207: 152832, 208: 153088, 209: 153344, 210: 153600, 211:
74+
+ 153856, 212: 154112, 213: 154368, 214: 154624, 215: 154880, 216: 155136,
75+
+ 217: 155392, 218: 155648, 219: 155904, 220: 156160, 221: 156416, 222:
76+
+ 156672, 223: 156928, 224: 157184, 225: 157440, 226: 157696, 227: 157952,
77+
+ 228: 158208, 229: 158464, 230: 158720, 231: 158976, 232: 159232, 233:
78+
+ 159488, 234: 159744, 235: 160000, 236: 160256, 237: 160512, 238: 160768,
79+
+ 239: 161024, 240: 161280, 241: 161536, 242: 161792, 243: 162048, 244:
80+
+ 162304, 245: 162560, 246: 162816, 247: 163072, 248: 163328, 249: 163584,
81+
+ 250: 163840, 251: 164096, 252: 164352, 253: 164608, 254: 164864, 255:
82+
+ 165120, "-1": 5376
83+
+};
84+
+
85+
+/**
86+
+ * B2 lookup table for decoding
87+
+ * Maps Unicode code point (minus the first byte) to the second byte (b2)
88+
+ */
89+
+const B2 = {
90+
+ 5376: -1, 13312: 0, 13568: 1, 13824: 2, 14080: 3, 14336: 4, 14592: 5,
91+
+ 14848: 6, 15104: 7, 15360: 8, 15616: 9, 15872: 10, 16128: 11, 16384: 12,
92+
+ 16640: 13, 16896: 14, 17152: 15, 17408: 16, 17664: 17, 17920: 18, 18176:
93+
+ 19, 18432: 20, 18688: 21, 18944: 22, 19200: 23, 19456: 24, 19968: 25,
94+
+ 20224: 26, 20480: 27, 20736: 28, 20992: 29, 21248: 30, 21504: 31, 21760:
95+
+ 32, 22016: 33, 22272: 34, 22528: 35, 22784: 36, 23040: 37, 23296: 38,
96+
+ 23552: 39, 23808: 40, 24064: 41, 24320: 42, 24576: 43, 24832: 44, 25088:
97+
+ 45, 25344: 46, 25600: 47, 25856: 48, 26112: 49, 26368: 50, 26624: 51,
98+
+ 26880: 52, 27136: 53, 27392: 54, 27648: 55, 27904: 56, 28160: 57, 28416:
99+
+ 58, 28672: 59, 28928: 60, 29184: 61, 29440: 62, 29696: 63, 29952: 64,
100+
+ 30208: 65, 30464: 66, 30720: 67, 30976: 68, 31232: 69, 31488: 70, 31744:
101+
+ 71, 32000: 72, 32256: 73, 32512: 74, 32768: 75, 33024: 76, 33280: 77,
102+
+ 33536: 78, 33792: 79, 34048: 80, 34304: 81, 34560: 82, 34816: 83, 35072:
103+
+ 84, 35328: 85, 35584: 86, 35840: 87, 36096: 88, 36352: 89, 36608: 90,
104+
+ 36864: 91, 37120: 92, 37376: 93, 37632: 94, 37888: 95, 38144: 96, 38400:
105+
+ 97, 38656: 98, 38912: 99, 39168: 100, 39424: 101, 39680: 102, 39936: 103,
106+
+ 40192: 104, 40448: 105, 41216: 106, 41472: 107, 41728: 108, 42240: 109,
107+
+ 67072: 110, 73728: 111, 73984: 112, 74240: 113, 77824: 114, 78080: 115,
108+
+ 78336: 116, 78592: 117, 82944: 118, 83200: 119, 92160: 120, 92416: 121,
109+
+ 131072: 122, 131328: 123, 131584: 124, 131840: 125, 132096: 126, 132352:
110+
+ 127, 132608: 128, 132864: 129, 133120: 130, 133376: 131, 133632: 132,
111+
+ 133888: 133, 134144: 134, 134400: 135, 134656: 136, 134912: 137, 135168:
112+
+ 138, 135424: 139, 135680: 140, 135936: 141, 136192: 142, 136448: 143,
113+
+ 136704: 144, 136960: 145, 137216: 146, 137472: 147, 137728: 148, 137984:
114+
+ 149, 138240: 150, 138496: 151, 138752: 152, 139008: 153, 139264: 154,
115+
+ 139520: 155, 139776: 156, 140032: 157, 140288: 158, 140544: 159, 140800:
116+
+ 160, 141056: 161, 141312: 162, 141568: 163, 141824: 164, 142080: 165,
117+
+ 142336: 166, 142592: 167, 142848: 168, 143104: 169, 143360: 170, 143616:
118+
+ 171, 143872: 172, 144128: 173, 144384: 174, 144640: 175, 144896: 176,
119+
+ 145152: 177, 145408: 178, 145664: 179, 145920: 180, 146176: 181, 146432:
120+
+ 182, 146688: 183, 146944: 184, 147200: 185, 147456: 186, 147712: 187,
121+
+ 147968: 188, 148224: 189, 148480: 190, 148736: 191, 148992: 192, 149248:
122+
+ 193, 149504: 194, 149760: 195, 150016: 196, 150272: 197, 150528: 198,
123+
+ 150784: 199, 151040: 200, 151296: 201, 151552: 202, 151808: 203, 152064:
124+
+ 204, 152320: 205, 152576: 206, 152832: 207, 153088: 208, 153344: 209,
125+
+ 153600: 210, 153856: 211, 154112: 212, 154368: 213, 154624: 214, 154880:
126+
+ 215, 155136: 216, 155392: 217, 155648: 218, 155904: 219, 156160: 220,
127+
+ 156416: 221, 156672: 222, 156928: 223, 157184: 224, 157440: 225, 157696:
128+
+ 226, 157952: 227, 158208: 228, 158464: 229, 158720: 230, 158976: 231,
129+
+ 159232: 232, 159488: 233, 159744: 234, 160000: 235, 160256: 236, 160512:
130+
+ 237, 160768: 238, 161024: 239, 161280: 240, 161536: 241, 161792: 242,
131+
+ 162048: 243, 162304: 244, 162560: 245, 162816: 246, 163072: 247, 163328:
132+
+ 248, 163584: 249, 163840: 250, 164096: 251, 164352: 252, 164608: 253,
133+
+ 164864: 254, 165120: 255
134+
+};
135+
+
136+
+/**
137+
+ * Encode bytes to Base65536 string.
138+
+ *
139+
+ * @param {Uint8Array} data
140+
+ * @returns {string}
141+
+ */
142+
+export function encode(data) {
143+
+ let result = "";
144+
+ const length = data.length;
145+
+
146+
+ for (let i = 0; i < length; i += 2) {
147+
+ const b1 = data[i];
148+
+ const b2 = (i + 1 < length) ? data[i + 1] : -1;
149+
+ const codePoint = BLOCK_START[b2] + b1;
150+
+ result += String.fromCodePoint(codePoint);
151+
+ }
152+
+
153+
+ return result;
154+
+}
155+
+
156+
+/**
157+
+ * Decode Base65536 string to bytes.
158+
+ *
159+
+ * @param {string} str
160+
+ * @returns {Uint8Array}
161+
+ */
162+
+export function decode(str) {
163+
+ const result = [];
164+
+ let done = false;
165+
+
166+
+ for (let i = 0; i < str.length; i++) {
167+
+ const codePoint = str.codePointAt(i);
168+
+
169+
+ // Handle surrogate pairs (code points > 0xFFFF)
170+
+ if (codePoint > 0xFFFF) {
171+
+ i++; // Skip the next code unit as it's part of the surrogate pair
172+
+ }
173+
+
174+
+ const b1 = codePoint & 0xFF;
175+
+ const blockStart = codePoint - b1;
176+
+
177+
+ if (!Object.prototype.hasOwnProperty.call(B2, blockStart)) {
178+
+ throw new Error(`Invalid base65536 code point: ${codePoint}`);
179+
+ }
180+
+
181+
+ const b2 = B2[blockStart];
182+
+
183+
+ if (b2 === -1) {
184+
+ // Final byte
185+
+ if (done) {
186+
+ throw new Error("base65536 sequence continued after final byte");
187+
+ }
188+
+ result.push(b1);
189+
+ done = true;
190+
+ } else {
191+
+ result.push(b1);
192+
+ result.push(b2);
193+
+ }
194+
+ }
195+
+
196+
+ return new Uint8Array(result);
197+
+}
198+
diff --git a/src/core/operations/FromBase65536.mjs b/src/core/operations/FromBase65536.mjs
199+
new file mode 100644
200+
index 00000000..5eabf4e3
201+
--- /dev/null
202+
+++ b/src/core/operations/FromBase65536.mjs
203+
@@ -0,0 +1,42 @@
204+
+/**
205+
+ * @author voo7ieX9
206+
+ * @copyright Crown Copyright 2025
207+
+ * @license Apache-2.0
208+
+ */
209+
+
210+
+import Operation from "../Operation.mjs";
211+
+import {decode} from "../lib/Base65536.mjs";
212+
+
213+
+/**
214+
+ * From Base65536 operation
215+
+ */
216+
+class FromBase65536 extends Operation {
217+
+
218+
+ /**
219+
+ * FromBase65536 constructor
220+
+ */
221+
+ constructor() {
222+
+ super();
223+
+
224+
+ this.name = "From Base65536";
225+
+ this.module = "Default";
226+
+ this.description = "Base65536 is a binary encoding that uses Unicode code points to represent data. This operation decodes a Base65536 string back into its original form.<br><br>e.g. <code>驨ꍬ啯𒁷ꍲᕤ</code> becomes <code>hello world</code>";
227+
+ this.infoURL = "https://github.com/qntm/base65536";
228+
+ this.inputType = "string";
229+
+ this.outputType = "byteArray";
230+
+ this.args = [];
231+
+ }
232+
+
233+
+ /**
234+
+ * @param {string} input
235+
+ * @param {Object[]} args
236+
+ * @returns {byteArray}
237+
+ */
238+
+ run(input, args) {
239+
+ const result = decode(input);
240+
+ return Array.from(result);
241+
+ }
242+
+
243+
+}
244+
+
245+
+export default FromBase65536;
246+
diff --git a/src/core/operations/ToBase65536.mjs b/src/core/operations/ToBase65536.mjs
247+
new file mode 100644
248+
index 00000000..51b80ced
249+
--- /dev/null
250+
+++ b/src/core/operations/ToBase65536.mjs
251+
@@ -0,0 +1,42 @@
252+
+/**
253+
+ * @author voo7ieX9
254+
+ * @copyright Crown Copyright 2025
255+
+ * @license Apache-2.0
256+
+ */
257+
+
258+
+import Operation from "../Operation.mjs";
259+
+import {encode} from "../lib/Base65536.mjs";
260+
+
261+
+/**
262+
+ * To Base65536 operation
263+
+ */
264+
+class ToBase65536 extends Operation {
265+
+
266+
+ /**
267+
+ * ToBase65536 constructor
268+
+ */
269+
+ constructor() {
270+
+ super();
271+
+
272+
+ this.name = "To Base65536";
273+
+ this.module = "Default";
274+
+ this.description = "Base65536 is a binary encoding that uses Unicode code points to represent data. This operation encodes data into a Unicode string.<br><br>e.g. <code>hello world</code> becomes <code>驨ꍬ啯𒁷ꍲᕤ</code>";
275+
+ this.infoURL = "https://github.com/qntm/base65536";
276+
+ this.inputType = "ArrayBuffer";
277+
+ this.outputType = "string";
278+
+ this.args = [];
279+
+ }
280+
+
281+
+ /**
282+
+ * @param {ArrayBuffer} input
283+
+ * @param {Object[]} args
284+
+ * @returns {string}
285+
+ */
286+
+ run(input, args) {
287+
+ const data = new Uint8Array(input);
288+
+ return encode(data);
289+
+ }
290+
+
291+
+}
292+
+
293+
+export default ToBase65536;

0 commit comments

Comments
 (0)