Skip to content

Commit 0ecd29c

Browse files
committed
Changed unmatched to use instructions field instead of having its own special property that destroys usfm
1 parent 6d46990 commit 0ecd29c

File tree

3 files changed

+165
-46
lines changed

3 files changed

+165
-46
lines changed

lib/platform-bible-utils/dist/index.d.ts

Lines changed: 159 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,7 @@ export declare function areUsjContentsEqualExceptWhitespace(a: Usj | undefined,
15641564
* The equivalent in USX would be:
15651565
*
15661566
* ```xml
1567+
* <!-- prettier-ignore -->
15671568
* <chapter number="1" style="c" altnumber="2" pubnumber="A" sid="GEN 1" />
15681569
* <para style="s1">This is a section header</para>
15691570
* ```
@@ -1691,9 +1692,10 @@ export type NormalMarkerInfo = {
16911692
* The equivalent in USX would be:
16921693
*
16931694
* ```xml
1694-
* <periph alt="Title Page" id="title">
1695+
* <!-- prettier-ignore -->
1696+
* <periph alt="Example Peripheral" id="x-example">
16951697
* <para style="p">Some contents of the example peripheral</para>
1696-
* </periph>;
1698+
* </periph>
16971699
* ```
16981700
*/
16991701
textContentAttribute?: string;
@@ -1716,9 +1718,8 @@ export type NormalMarkerInfo = {
17161718
* The equivalent in USX would be:
17171719
*
17181720
* ```xml
1719-
* <book code="MAT" style="id">
1720-
* 41MATEX.SFM, Example Translation, September 2025
1721-
* </book>;
1721+
* <!-- prettier-ignore -->
1722+
* <book code="MAT" style="id">41MATEX.SFM, Example Translation, September 2025</book>
17221723
* ```
17231724
*/
17241725
leadingAttributes?: string[];
@@ -1794,6 +1795,22 @@ export type NormalMarkerInfo = {
17941795
* `ex2`.
17951796
*/
17961797
isIndependentClosingMarkerForRegExp?: string[];
1798+
/**
1799+
* Marker to use when operating on the USFM representation of this marker. For example, when
1800+
* outputting to USFM, the marker info for the marker listed here in `markerUsfm` should be used
1801+
* instead of the marker info for the marker as listed in USX or USJ.
1802+
*/
1803+
markerUsfm?: string;
1804+
/**
1805+
* Instructions regarding special handling required for this marker when transforming from USFM to
1806+
* USX or USJ. These instructions are an explanation of what needs to be done to this marker to
1807+
* properly transform it to USX or USJ.
1808+
*
1809+
* This property is generally only included when it is exceptionally difficult to parse a marker
1810+
* properly from USFM; the markers map attempts to use this property as little as possible,
1811+
* favoring encoding information in other properties for more automatic transformation instead.
1812+
*/
1813+
parseUsfmInstructions?: string;
17971814
};
17981815
/** Information about a USFM/USX/USJ marker that is essential for proper translation between formats */
17991816
export type MarkerInfo = NormalMarkerInfo | AttributeMarkerInfo;
@@ -1881,6 +1898,8 @@ export type NonCloseableMarkerTypeInfo = MarkerTypeInfoBase & {
18811898
* {@link MarkerTypeInfo} for various kinds of marker types.
18821899
*/
18831900
export type MarkerTypeInfoBase = {
1901+
/** Explanation of the meaning of this marker type */
1902+
description?: string;
18841903
/**
18851904
* Whether markers of this type should have a `style` attribute in USX/USJ.
18861905
*
@@ -1906,8 +1925,8 @@ export type MarkerTypeInfoBase = {
19061925
* inside the marker (if present) should not be skipped.
19071926
*
19081927
* This is used for certain markers that sometimes are normal markers but sometimes are derived
1909-
* metadata and are not present in USFM. These derived metadata markers are all identified by
1910-
* whether they have specific attributes on them.
1928+
* metadata and are not present in USFM. These derived metadata markers are identified by whether
1929+
* they have specific attributes on them.
19111930
*
19121931
* For example, if the `verse` marker has an `eid` attribute, it indicates it is a marker denoting
19131932
* the end of the verse that is derived metadata in USX/USJ and is not present in USFM. Note that
@@ -1917,11 +1936,10 @@ export type MarkerTypeInfoBase = {
19171936
* Following is an example of a derived metadata `verse` marker in USX:
19181937
*
19191938
* ```xml
1939+
* <!-- prettier-ignore -->
19201940
* <para style="p">
1921-
* <verse number="21" style="v" sid="2SA 1:21" />
1922-
* This is verse 21.
1923-
* <verse eid="2SA 1:21" />
1924-
* </para>;
1941+
* <verse number="21" style="v" sid="2SA 1:21" />This is verse 21.<verse eid="2SA 1:21" />
1942+
* </para>
19251943
* ```
19261944
*
19271945
* The equivalent in USFM would be:
@@ -1939,12 +1957,8 @@ export type MarkerTypeInfoBase = {
19391957
* Following is an example of a generated `ref` in USX:
19401958
*
19411959
* ```xml
1942-
* <char style="xt">
1943-
* <ref loc="2SA 1:1" gen="true">
1944-
* 2Sam 1:1
1945-
* </ref>
1946-
* ; <ref loc="2SA 1:2-3">2Sam 1:2-3</ref>.
1947-
* </char>;
1960+
* <!-- prettier-ignore -->
1961+
* <char style="xt"><ref loc="2SA 1:1" gen="true">2Sam 1:1</ref>; <ref loc="2SA 1:2-3">2Sam 1:2-3</ref>.</char>
19481962
* ```
19491963
*
19501964
* The equivalent in USFM would be:
@@ -1956,6 +1970,54 @@ export type MarkerTypeInfoBase = {
19561970
* This property is not used when converting to USX or USJ.
19571971
*/
19581972
skipOutputMarkerToUsfmIfAttributeIsPresent?: string[];
1973+
/**
1974+
* Whether to always skip outputting this marker to USFM. Skip outputting this marker when
1975+
* converting to USFM. Only skip outputting the opening and closing marker representations,
1976+
* though; the content inside the marker (if present) should not be skipped.
1977+
*
1978+
* This is used for marker types that have no representation in USFM in a given version, likely
1979+
* meaning they are derived metadata and are not present in USFM.
1980+
*
1981+
* For example, in USFM 3.1, the `table` marker type is generated while transforming USFM into
1982+
* USX/USJ and is not preserved when transforming from USX/USJ to USFM.
1983+
*
1984+
* Following is an example of a derived metadata `table` marker in USX:
1985+
*
1986+
* ```xml
1987+
* <!-- prettier-ignore -->
1988+
* <table>
1989+
* <row style="tr">
1990+
* <cell style="th1" align="start">Header 1</cell>
1991+
* <cell style="th2" align="start">Header 2 space after </cell>
1992+
* <cell style="thc3" align="center" colspan="2">Header 3-4 centered</cell>
1993+
* <cell style="thr5" align="end">Header 5 right</cell>
1994+
* </row>
1995+
* <row style="tr">
1996+
* <cell style="tc1" align="start">Row 1 cell 1</cell>
1997+
* <cell style="tc2" align="start">Row 1 cell 2 space after </cell>
1998+
* <cell style="thc3" align="center">Row 1 cell 3 centered</cell>
1999+
* <cell style="thr4" align="end" colspan="2">Row 1 cell 4-5 right</cell>
2000+
* </row>
2001+
* <row style="tr">
2002+
* <cell style="tcr1" align="end" colspan="4">Row 2 cell 1-4 right</cell>
2003+
* <cell style="tc5" align="start">Row 2 cell 5</cell>
2004+
* </row>
2005+
* </table>
2006+
* ```
2007+
*
2008+
* The equivalent in USFM would be:
2009+
*
2010+
* ```usfm
2011+
* \tr \th1 Header 1\th2 Header 2 space after \thc3-4 Header 3-4 centered\thr5 Header 5 right
2012+
* \tr \tc1 Row 1 cell 1\tc2 Row 1 cell 2 space after \thc3 Row 1 cell 3 centered\thr4-5 Row 1 cell 4-5 right
2013+
* \tr \tcr1-4 Row 2 cell 1-4 right\tc5 Row 2 cell 5
2014+
* ```
2015+
*
2016+
* This property is not used when converting to USX or USJ.
2017+
*
2018+
* If not present, defaults to `false`
2019+
*/
2020+
skipOutputMarkerToUsfm?: boolean;
19592021
/**
19602022
* Whether markers of this type should have a newline before them in USFM.
19612023
*
@@ -1973,6 +2035,55 @@ export type MarkerTypeInfoBase = {
19732035
* If not present, defaults to `false`
19742036
*/
19752037
hasNewlineBefore?: boolean;
2038+
/**
2039+
* Marker type to use when operating on the USFM representation of markers of this type. For
2040+
* example, when outputting to USFM, the marker type listed here in `markerTypeUsfm` should be
2041+
* used instead of the marker's type as listed in USX or USJ.
2042+
*/
2043+
markerTypeUsfm?: string;
2044+
/**
2045+
* Marker type to use when operating on the USX representation of markers of this type. For
2046+
* example, when outputting to USX, the marker type listed here in `markerTypeUsx` should be used
2047+
* instead of the marker's type as listed in USFM or USJ.
2048+
*/
2049+
markerTypeUsx?: string;
2050+
/**
2051+
* Marker type to use when operating on the USJ representation of markers of this type. For
2052+
* example, when outputting to USJ, the marker type listed here in `markerTypeUsj` should be used
2053+
* instead of the marker's type as listed in USFM or USX.
2054+
*/
2055+
markerTypeUsj?: string;
2056+
/**
2057+
* Prefix to add to the opening and closing marker before the marker name if a marker of this type
2058+
* occurs within another marker of this type when outputting to USFM.
2059+
*
2060+
* Following is an example of `nd` inside `wj` (both are `char`-type markers) in USFM:
2061+
*
2062+
* ```usfm
2063+
* \p \wj This is \+nd nested\+nd*!\wj*
2064+
* ```
2065+
*/
2066+
nestedPrefix?: string;
2067+
/**
2068+
* Instructions regarding special handling required for this marker type when transforming to
2069+
* USFM. These instructions are an explanation of what needs to be done to markers of this type to
2070+
* properly transform the marker to USFM.
2071+
*
2072+
* This property is generally only included when it is exceptionally difficult to output a marker
2073+
* properly to USFM; the markers map attempts to use this property as little as possible, favoring
2074+
* encoding information in other properties for more automatic transformation instead.
2075+
*/
2076+
outputToUsfmInstructions?: string;
2077+
/**
2078+
* Instructions regarding special handling required for this marker type when transforming from
2079+
* USFM to USX or USJ. These instructions are an explanation of what needs to be done to markers
2080+
* of this type to properly transform the marker to USX or USJ.
2081+
*
2082+
* This property is generally only included when it is exceptionally difficult to parse a marker
2083+
* properly from USFM; the markers map attempts to use this property as little as possible,
2084+
* favoring encoding information in other properties for more automatic transformation instead.
2085+
*/
2086+
parseUsfmInstructions?: string;
19762087
};
19772088
/**
19782089
* Information about a USFM/USX/USJ marker type that is essential for proper translation between
@@ -1989,6 +2100,11 @@ export type MarkersMap = {
19892100
* the schema file.
19902101
*/
19912102
commit: string;
2103+
/**
2104+
* Which version of the markers map types this markers map conforms to. Follows [Semantic
2105+
* versioning](https://semver.org/); the same major version contains no breaking changes.
2106+
*/
2107+
markersMapVersion: `1.${number}.${number}${string}`;
19922108
/**
19932109
* Which tag or commit of `usfm-tools` repo this map is generated from.
19942110
*
@@ -4288,6 +4404,20 @@ export declare function getStylesheetForTheme(theme: ThemeDefinitionExpanded): s
42884404
* @returns
42894405
*/
42904406
export declare function applyThemeStylesheet(this: Window, theme: ThemeDefinitionExpanded, previousStyleElement?: HTMLStyleElement, styleElementIdSuffix?: string): HTMLStyleElement;
4407+
/**
4408+
* Represents information about where a USJ node resides in the `content` array of its parent.
4409+
* `parent` is a reference to the node's parent, and `index` represents the numeric index inside of
4410+
* `parent`'s content array.
4411+
*/
4412+
export type StackItem = {
4413+
parent: MarkerObject | Usj;
4414+
index: number;
4415+
};
4416+
/**
4417+
* Stack of levels inside a USJ tree relative to a specific node. The top of the stack should always
4418+
* be the root Usj object.
4419+
*/
4420+
export type WorkingStack = StackItem[];
42914421
/** Represents USJ formatted scripture with helpful utilities for working with it */
42924422
export declare class UsjReaderWriter implements IUsjReaderWriter {
42934423
private readonly usj;
@@ -4302,12 +4432,16 @@ export declare class UsjReaderWriter implements IUsjReaderWriter {
43024432
findSingleValue<T>(jsonPathQuery: string): T | undefined;
43034433
findParent<T>(jsonPathQuery: string): T | undefined;
43044434
/**
4305-
* Determine if the passed in marker is the USJ marker (should be the top-level marker)
4435+
* Determine if the passed in marker is the top-level USJ marker.
4436+
*
4437+
* Note that USJ markers that are not the top-level USJ markers technically should not occur, but
4438+
* they can occur. We should treat them like any other marker. They conform to
4439+
* {@link MarkerObject}, so it's not hard to do.
43064440
*
43074441
* @param marker Marker to test if it is USJ marker
43084442
* @returns `true` if it is a USJ marker; false otherwise
43094443
*/
4310-
static isUsjMarker(marker: Usj | MarkerContent): marker is Usj;
4444+
static isTopLevelUsjMarker(marker: Usj | MarkerContent, workingStack: WorkingStack): marker is Usj;
43114445
/**
43124446
* Determine if a fragment is a marker, not a text content string or some kind of position
43134447
* fragment that isn't actually a marker e.g. closing marker fragment
@@ -4427,8 +4561,13 @@ export declare class UsjReaderWriter implements IUsjReaderWriter {
44274561
* Gathers various pieces of information about a marker that are helpful for transforming the
44284562
* marker to USFM
44294563
*
4564+
* WARNING: this only has the ability to return the info for the marker to be used in USFM. If you
4565+
* need to use info for the marker in USX or USJ, this method needs to be modified.
4566+
*
44304567
* @param marker A USJ marker (can be USJ type) or a string which is the marker name
4431-
* @param markersMap The markers map from which to gather info
4568+
* @param scriptureFormat The Scripture format to get the marker information for. For example, if
4569+
* you are using this marker info to transform the marker into USFM, this should be `usfm`.
4570+
* Defaults to `usfm`
44324571
* @returns Various pieces of info about the marker
44334572
*/
44344573
private getInfoForMarker;

lib/platform-bible-utils/src/scripture/markers-map-3.1.model.tsx

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -524,30 +524,6 @@ export type MarkerTypeInfoBase = {
524524
* ```
525525
*/
526526
nestedPrefix?: string;
527-
/**
528-
* Whether markers of this type do not have a structural space after the opening marker in USFM.
529-
* All standard marker types have a structural space after the opening marker; this property is
530-
* expected to be `true` only with the `unmatched` marker type, which is a non-standard type that
531-
* Paratext generates for closing markers it cannot find matching opening markers for. If this
532-
* property is `true`, the marker needs some way to indicate when it is over; `unmatched` markers
533-
* always have an asterisk at the end.
534-
*
535-
* For example, `para` marker types such as `p` have a structural space after the opening marker,
536-
* but `unmatched` marker types such as `nd*` markers without a matching opening `nd` should not:
537-
*
538-
* ```usfm
539-
* \p Paragraph marker with a structural space at the start.
540-
* \p This unmatched closing nd marker \nd*has no structural space after it.
541-
* ```
542-
*
543-
* WARNING: There is no expectation that any standard marker should have no space after opening.
544-
* This property is expected to be `true` _only_ for `unmatched` as it could cause serious issues
545-
* with USFM syntax otherwise. Every opening marker in USFM should have a space after it except
546-
* `unmatched`.
547-
*
548-
* If not present, defaults to `false`
549-
*/
550-
noSpaceAfterOpening?: boolean;
551527
/**
552528
* Instructions regarding special handling required for this marker type when transforming to
553529
* USFM. These instructions are an explanation of what needs to be done to markers of this type to
@@ -1814,7 +1790,8 @@ export const USFM_MARKERS_MAP: MarkersMap = deepFreeze({
18141790
unmatched: {
18151791
description:
18161792
'Paratext uses this type for closing markers that it cannot find opening markers for. They are treated like char markers but have no contents, no closing markers, and no space after the marker.',
1817-
noSpaceAfterOpening: true,
1793+
outputToUsfmInstructions:
1794+
'Do not output a structural space after the opening marker for markers with unmatched type.',
18181795
parseUsfmInstructions:
18191796
'If a closing marker occurs but does not seem to have a matching opening marker, create an unmatched-type marker. There is no structural space after the unmatched-type marker; its end is determined by the asterisk at the end of the marker.',
18201797
},

lib/platform-bible-utils/src/scripture/usj-reader-writer.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1555,7 +1555,10 @@ export class UsjReaderWriter implements IUsjReaderWriter {
15551555
fragment: { isAttributeValueForKey: 'marker', forMarker: marker },
15561556
indexInUsfm: usfm.length,
15571557
});
1558-
usfm += `${markerName}${markerTypeInfo.noSpaceAfterOpening ? '' : ' '}`;
1558+
1559+
// According to `unmatched`'s `outputToUsfmInstructions`, no space after the marker name
1560+
// because it is basically a closing marker
1561+
usfm += `${markerName}${markerType === 'unmatched' ? '' : ' '}`;
15591562
}
15601563

15611564
// Add leading attributes in listed order

0 commit comments

Comments
 (0)