Skip to content

Commit 1cdfb2a

Browse files
committed
fix
1 parent aef1fde commit 1cdfb2a

File tree

1 file changed

+14
-108
lines changed

1 file changed

+14
-108
lines changed

src/json.nr

Lines changed: 14 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
134134
}
135135

136136
let old_transcript = self.transcript[i];
137-
// only change is set the token to be KEY_TOKEN
137+
// The only difference between old_transcript and new_transcript is that the token is set to KEY_TOKEN
138138
let new_transcript = TranscriptEntry::to_field(
139139
TranscriptEntry {
140140
token: KEY_TOKEN as Field,
@@ -228,40 +228,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
228228
assert(depth == 0, "validate_tokens: unclosed objects or arrays");
229229
}
230230

231-
/// Parses [`Self::transcript`] to populate [`Self::json_entries_packed`] and [`Self::key_data`]
232-
/// Given a processed transcript of json tokens, compute a list of json entries that describes the values within the JSON blob
233-
///
234-
/// [`Self::json_entries_packed`] is a [JSONEntry] struct whose members have been packed into a single Field element.
235-
///
236-
/// A 'value' here is either an Object, Array, String, Numeric or Literal.
237-
/// e.g. "[ 1, 2, 3 ]" contains 4 values (3 Numeric types and the Array that contains them)
238-
///
239-
/// To avoid branches and if statements, we construct a state transition function out of the lookup table TOKEN_FLAGS_TABLE
240-
/// This table takes as an input the following:
241-
/// 1. The token value of an element in the transcript
242-
/// 2. The layer type the previous token is located in (i.e. are we in an array or an object?)
243-
/// The table outputs the following data:
244-
/// 1. Should we create a new json entry? (i.e. is the token a STRING_TOKEN, LITERAL_TOKEN, NUMERIC_TOKEN, END_ARRAY_TOKEN, END_OBJECT_TOKEN)
245-
/// 2. Is the token `}` or `]`?
246-
/// 3. Is the token `{` or `[`?
247-
/// 4. Given the current layer type and the token being queried, what should the new layer type be?
248-
/// 5. Is the token `KEY_TOKEN`?
249-
/// 6. Is the token a `STRING_TOKEN`, `NUMERIC_TOKEN` OR `VALUE_TOKEN`?
250-
/// 7. Is the token one that we should skip over? `,` or `:`
251-
///
252-
/// ## explanation of `parent_context_stack`
253-
/// When recording a JSONEntry, we need to understand how many children (if any) a JSONEntry has,
254-
/// as well as a way of accessing children given the parent JSONEntry object
255-
/// Note: OBJECT_TOKEN and ARRAY_TOKEN have children. single values (NUMERIC_TOKEN, LITERAL_TOKEN, STRING_TOKEN) do not.
256-
/// We define a "context stack" via `parent_context_stack` to track this data.
257-
/// The front of `parent_context_stack` contains a JSONContextStackEntry (packed into a single Field for the purposes of efficient lookups) for the current parent
258-
/// If we parse a token that creates a new parent (BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN), we push a new parent onto the stack
259-
/// If we reach the end of an object or array (END_OBJECT_TOKEN, END_ARRAY_TOKEN) we pop the current parent off of the stack
260-
/// Note: "stack" is used loosely here. We have a fixed-size array of packed JSONContextStackEntry vals and a pointer to the head of the stack.
261-
/// (the array size defines the maximum number of entities the stack can contain, currently set at 32)
262-
/// Note: the size param `32` is a magic number we should replace with a defined const global variable
263-
/// To push: we increment the pointer by 1 and write a new entry at the pointer value
264-
/// To pop: we decrement the pointer by 1 (we don't need to delete data because new data is written every time the pointer is incremented)
265231
/// Parses [`Self::transcript`] to populate [`Self::json_entries_packed`] and [`Self::key_data`]
266232
/// Given a processed transcript of json tokens, compute a list of json entries that describes the values within the JSON blob
267233
///
@@ -303,11 +269,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
303269
// Note: parent_context_stack[0] = the root JSON object
304270
let mut depth: Field = 1;
305271
// how many children does the current parent have?
306-
// how many children does the current parent have?
307272
let mut num_entries_at_current_depth: Field = 0;
308273

309-
// current_identity_value = unique identifier for all JSON objects/arrays we create
310-
311274
// current_identity_value = unique identifier for all JSON objects/arrays we create
312275
let mut current_identity_value: Field = 0;
313276
// next_identity_value = smallest integer that we've not yet assigned as a unique identifier to the JSON objects/arrays we create
@@ -316,11 +279,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
316279
// context: 0 for object, 1 for array
317280
let mut context = OBJECT_LAYER;
318281

319-
// current_key_index_and_length encodes 2 bits of data in a single Field element (to save some gates)
320-
// note: would be more readable if we had a custom struct that wrapped a Field element with defined update methods
321-
// 1. what is the key index? (index = unique identifier, starts at 0)
322-
// 2. what is the size of the key in bytes?
323-
// current_key_index_and_length = index + length * 0x10000 (assumes index does not exceed 2^16. I don't think we check for this, there is an assumption that the size of the circuit would be too large to compile/run if the JSON blob has over 2^16 unique keys)
324282
// current_key_index_and_length encodes 2 bits of data in a single Field element (to save some gates)
325283
// note: would be more readable if we had a custom struct that wrapped a Field element with defined update methods
326284
// 1. what is the key index? (index = unique identifier, starts at 0)
@@ -352,33 +310,19 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
352310
// - `is_end_of_object_or_array, `is_start_of_object_or_array`, `is_key_token`, `is_value_token`
353311
// See comments in token_flags.nr for more details
354312

355-
// The TOKEN_FLAGS_TABLE table encodes the following information:
356-
// Given the current token and the context (whether the parent is an object or array),
357-
// we can extract the following information from TOKEN_FLAGS_TABLE via a simple lookup:
358-
// 1. Should we create a new JSONEntry object?
359-
// - i.e is the token END_ARRAY_TOKEN, END_OBJECT_TOKEN, STRING_TOKEN, NUMERIC_TOKEN, LITERAL_TOKEN
360-
// 2. If the token creates a new parent (BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN), what is the context value? (0 = object, 1 = array)
361-
// 3. Has the parent entity changed?
362-
// - `preserve_num_entries` = 1 if the parent entity changes, not the best variable name
363-
// 5. Various bools that describe the token, which are cheaper to acquire this way than via comparison operators
364-
// - `is_end_of_object_or_array, `is_start_of_object_or_array`, `is_key_token`, `is_value_token`
365-
// See comments in token_flags.nr for more details
366313
// 13 gates
367314
let TokenFlags {
368-
create_json_entry,
369-
is_end_of_object_or_array,
370-
is_start_of_object_or_array,
371-
new_context,
372-
is_key_token,
373-
is_value_token,
374-
preserve_num_entries,} = TokenFlags::from_field(
315+
create_json_entry,
316+
is_end_of_object_or_array,
317+
is_start_of_object_or_array,
318+
new_context,
319+
is_key_token,
320+
is_value_token,
321+
preserve_num_entries,
322+
} = TokenFlags::from_field(
375323
TOKEN_FLAGS_TABLE[cast_num_to_u32(token) + context * NUM_TOKENS],
376324
);
377325

378-
// Determine what the current key index is and the key size in bytes
379-
// (key index = byte location in the original JSON of the key)
380-
// Pseudocode equivalent:
381-
// current_key_index_and_length = update_key ? (index + length * 0x10000) : current_key_index_and_length
382326
// We convert these booleans into Fields so that we can use them in arithmetic operations.
383327
let create_json_entry = create_json_entry as Field;
384328
let is_end_of_object_or_array = is_end_of_object_or_array as Field;
@@ -401,13 +345,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
401345
diff * is_key_token as Field + current_key_index_and_length;
402346
std::as_witness(current_key_index_and_length);
403347

404-
// If the current token is BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
405-
// we need to push a new parent object into `parent_context_stack`.
406-
// We apply a trick here to avoid branching: regardless of the token type,
407-
// we *always* write a new stack entry into `parent_context_stack[depth]`.
408-
// If the current token is not BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
409-
// the data we write never gets read.
410-
// Note: we only update the value of `depth` if token == BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
411348
// If the current token is BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
412349
// we need to push a new parent object into `parent_context_stack`.
413350
// We apply a trick here to avoid branching: regardless of the token type,
@@ -434,8 +371,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
434371
// 9.5 gates
435372
let depth_index: Field = (depth - 1);
436373
let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
437-
let depth_index: Field = (depth - 1);
438-
let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
439374
let previous_stack_entry =
440375
JSONContextStackEntry::from_field(previous_stack_entry_packed);
441376

@@ -542,7 +477,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
542477
// num_entries_at_current_depth = previous_stack_entry.num_entries + 1;
543478
// }
544479
// 2 gates
545-
// If we ses a value token (string/number/literal), we add 1 to count. If we see , or :, no change.
480+
// If we see a value token (string/number/literal), we add 1 to count. If we see , or :, no change.
546481
// If preserve_num_entries is 0 (i.e. start or end of object or array) then we reset variable to 0.
547482
num_entries_at_current_depth =
548483
num_entries_at_current_depth * preserve_num_entries + is_value_token;
@@ -560,16 +495,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
560495
// } else {
561496
// context = new_context
562497
// }
563-
// Set the value of `context` (badly named: are we in an object or array? context == 0 => object, context == 1 => array)
564-
// If current token is END_OBJECT_TOKEN or END_ARRAY_TOKEN, set context to the context value in previous_stack_entry
565-
// (i.e. restore the context to whatever the parent of the object/array is)
566-
// Pseudocode:
567-
// if (is_end_of_object_or_array) {
568-
// context = previous_stack_entry.context
569-
// } else {
570-
// context = new_context
571-
// }
572-
// 1 gate
498+
573499
// if `is_end_of_object_or_array == 1`, `new_context = 0` so we can do something cheaper than a conditional select:
574500
// If is_end_of_object_or_array is 1, then new_context is 0, so set context = previous_stack_entry.context
575501
// If is_end_of_object_or_array is 0, then set context = new_context
@@ -585,7 +511,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
585511
// * the key length (length of the key in bytes)
586512
// * current_identity_value (unique identifier for the key's JSON object. starts at 0)
587513
// * in the current parent object/array, how many JSON entries deep is the key's associated JSON object?
588-
// TODO: would be much more readable if we have a custom struct `KeyData` that wrapped a Field elemenet with sensible helper methods
514+
// TODO: would be much more readable if we have a custom struct `KeyData` that wrapped a Field element with sensible helper methods
589515
// Pseudocode:
590516
// if (create_json_entry) {
591517
// let mut new_key_data;
@@ -597,24 +523,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
597523
// self.key_data[entry_ptr] = new_key_data;
598524
// }
599525

600-
// Update data that describes the key for the current token.
601-
// If we are creating a JSON entry, we also populate `self.key_data` with info that describes the current entry's key
602-
// key_data contains 3 members that are packed into a Field:
603-
// * the key index (where in the original JSON blob does the key start?)
604-
// * the key length (length of the key in bytes)
605-
// * current_identity_value (unique identifier for the key's JSON object. starts at 0)
606-
// * in the current parent object/array, how many JSON entries deep is the key's associated JSON object?
607-
// TODO: would be much more readable if we have a custom struct `KeyData` that wrapped a Field elemenet with sensible helper methods
608-
// Pseudocode:
609-
// if (create_json_entry) {
610-
// let mut new_key_data;
611-
// if (is_value_token) {
612-
// new_key_data = make_key(current_key_index_and_length, current_identity_value, num_entries_at_current_depth - 1);
613-
// } else if (is_end_of_object_or_array) {
614-
// new_key_data = make_key(previous_stack_entry.current_key_index_and_length, current_identity_value, num_entries_at_current_depth - 1);
615-
// }
616-
// self.key_data[entry_ptr] = new_key_data;
617-
// }
618526
// 3 gates
619527
// If context is 0 (object context), then don't take the num_entries_at_current_depth term into account
620528
// because searching for a key only depends of the key name, not position, as opposed to array context where we need to look up by position/index.
@@ -632,7 +540,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
632540
// 3.5 gates
633541
self.key_data[entry_ptr] = new_key_data * create_json_entry as Field;
634542

635-
// Update `entry_ptr` (points to the head of self.key_data and self.json_entries_packed)
636543
// Update `entry_ptr` (points to the head of self.key_data and self.json_entries_packed)
637544
// 1 gate
638545
entry_ptr += create_json_entry as u32;
@@ -675,7 +582,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
675582
let raw_transcript = unsafe { __build_transcript::<NumBytes, MaxNumTokens>(self.json) };
676583

677584
// steps to verify the transcript is correct
678-
// 14 gates per iteration, plus fixed cost for initing 2,048 size lookup table (4,096 gates)
585+
// 14 gates per iteration, plus fixed cost for initializing a 2,048 size lookup table (4,096 gates)
679586
let mut previous_was_potential_escape_sequence: bool = false;
680587
for i in 0..NumBytes {
681588
let ascii = self.json[i];
@@ -699,8 +606,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
699606
// 2 gates
700607
let raw: Field = raw_transcript[cast_num_to_u32(transcript_ptr)];
701608

702-
// TODO: document this
703-
// TODO: why are we comparing a derived quantity against `raw_transcript` instead of constructing `raw_transcript` directly (faster)
609+
// Compare a derived quantity against `raw_transcript` instead of constructing `raw_transcript` directly to avoid writing to arrays in a constrained function
704610
// 1 gate
705611
let diff: Field = raw
706612
- RawTranscriptEntry::to_field(

0 commit comments

Comments
 (0)