diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index b7202ccb8ad6..b291065b3064 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -14,6 +14,7 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/messageformat2.h" #include "unicode/normalizer2.h" +#include "unicode/ubidi.h" #include "unicode/unistr.h" #include "messageformat2_allocation.h" #include "messageformat2_checker.h" @@ -31,17 +32,36 @@ using namespace data_model; // ------------------------------------------------------ // Formatting -// The result of formatting a literal is just itself. -static Formattable evalLiteral(const Literal& lit) { - return Formattable(lit.unquoted()); + +// Arguments and literals +//----------------------- + +static UnicodeString varFallback(const VariableName& var) { + UnicodeString str(DOLLAR); + str += var; + return str; +} + +static UnicodeString functionFallback(const InternalValue& operand, + const FunctionName& functionName) { + UnicodeString fallbackStr; + // Create the fallback string for this function call + if (operand.isNullOperand()) { + fallbackStr = UnicodeString(COLON); + fallbackStr += functionName; + } else { + fallbackStr = operand.asFallback(); + } + return fallbackStr; } // Assumes that `var` is a message argument; returns the argument's value. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const UnicodeString& fallback, - const VariableName& var, - MessageContext& context, - UErrorCode& errorCode) const { +[[nodiscard]] InternalValue MessageFormatter::evalArgument(const UnicodeString& fallback, + const VariableName& var, + MessageContext& context, + UErrorCode& errorCode) const { if (U_SUCCESS(errorCode)) { + // Look up the variable in the global environment const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { // Note: the fallback string has to be passed in because in a declaration like: @@ -52,7 +72,12 @@ static Formattable evalLiteral(const Literal& lit) { fallbackToUse += DOLLAR; fallbackToUse += var; } - return (FormattedPlaceholder(*val, fallbackToUse)); + // If it exists, create a BaseValue (FunctionValue) for it + LocalPointer result(BaseValue::create(locale, fallbackToUse, *val, false, errorCode)); + // Add fallback and return an InternalValue + if (U_SUCCESS(errorCode)) { + return InternalValue(result.orphan(), fallbackToUse); + } } } return {}; @@ -80,75 +105,258 @@ static UnicodeString reserialize(const UnicodeString& s) { } // Returns the contents of the literal -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const UnicodeString& fallback, - const Literal& lit) const { - // The fallback for a literal is itself, unless another fallback is passed in - // (same reasoning as evalArgument()) - UnicodeString fallbackToUse = fallback.isEmpty() ? reserialize(lit.unquoted()) : fallback; - return FormattedPlaceholder(evalLiteral(lit), fallbackToUse); +[[nodiscard]] InternalValue MessageFormatter::evalLiteral(const UnicodeString& fallback, + const Literal& lit, + UErrorCode& errorCode) const { + // The fallback for a literal is itself, unless another fallback is passed + // in (same reasoning as evalArgument()) + UnicodeString fallbackToUse = !fallback.isEmpty() ? fallback : reserialize(lit.unquoted()); + + // Create a BaseValue (FunctionValue) that wraps the literal + LocalPointer val(BaseValue::create(locale, + fallbackToUse, + Formattable(lit.unquoted()), + true, + errorCode)); + if (U_SUCCESS(errorCode)) { + return InternalValue(val.orphan(), fallbackToUse); + } + return {}; +} + +// Operands +// -------- + +[[nodiscard]] InternalValue& MessageFormatter::evalVariableReference(const UnicodeString& fallback, + Environment& env, + const VariableName& var, + MessageContext& context, + UErrorCode &status) const { + // Check if it's local or global + // Note: there is no name shadowing; this is enforced by the parser + + // This code implements lazy call-by-need evaluation of locals. + // That is, the environment binds names to a closure, not a resolved value. + // The spec does not require either eager or lazy evaluation. + + // NFC-normalize the variable name. See + // https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers + const VariableName normalized = StandardFunctions::normalizeNFC(var); + + // Look up the variable in the environment + if (env.has(normalized)) { + // `var` is a local -- look it up + InternalValue& rhs = env.lookup(normalized); + // Evaluate the expression using the environment from the closure + // The name of this local variable is the fallback for its RHS. + UnicodeString newFallback(DOLLAR); + newFallback += var; + + if (!rhs.isEvaluated()) { + Closure& c = rhs.asClosure(); + InternalValue& result = evalExpression(newFallback, + c.getEnv(), + c.getExpr(), + context, + status); + // Overwrite the closure with the result of evaluation + if (result.isFallback()) { + rhs.update(result.asFallback()); + } else { + U_ASSERT(result.isEvaluated()); + + // The FunctionValue representing the right-hand side of this declaration + // might have a wasSetFromLiteral() method that returns true (i.e. if it's a BaseValue); + // But that value is being assigned to a variable here, so we need to + // ensure that wasSetFromLiteral() returns false. + // We accomplish this by wrapping it in a VariableValue. + const FunctionValue* inner = result.getValue(status); + U_ASSERT(U_SUCCESS(status)); // Already checked that result is evaluated + LocalPointer variableValue(static_cast(VariableValue::create(inner, status))); + if (U_FAILURE(status) || !variableValue.isValid()) { + return result; + } + + InternalValue wrappedResult(variableValue.orphan(), result.asFallback()); + InternalValue& ref = env.createUnnamed(std::move(wrappedResult), status); + if (U_FAILURE(status)) { + return result; + } + // Create an indirection to the result returned + // by evalExpression() + rhs.update(ref); + } + return rhs; + } + // If it's already evaluated, just return the value + return rhs; + } + // Variable wasn't found in locals -- check if it's global + InternalValue result = evalArgument(fallback, normalized, context, status); + if (status == U_ILLEGAL_ARGUMENT_ERROR) { + status = U_ZERO_ERROR; + // Unbound variable -- set a resolution error + context.getErrors().setUnresolvedVariable(var, status); + // Use fallback per + // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution + return env.createFallback(varFallback(var), status); + } + // Looking up the global variable succeeded; return it + return env.createUnnamed(std::move(result), status); } -[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const UnicodeString& fallback, - const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { +// InternalValues are passed as references into a global environment object +// that is live for the duration of one formatter call. +// They are mutable references so that they can be updated with a new value +// (when a closure is overwritten with the result of evaluating it), +// which can be shared across different references to the corresponding MF2 +// variable. +[[nodiscard]] InternalValue& MessageFormatter::evalOperand(const UnicodeString& fallback, + Environment& env, + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { - return {}; + return env.bogus(); } + // Three cases: absent operand; variable; or literal + + // Absent (null) operand if (rand.isNull()) { - return create(InternalValue(FormattedPlaceholder()), status); + return env.createNull(status); } + // Variable reference if (rand.isVariable()) { - // Check if it's local or global - // Note: there is no name shadowing; this is enforced by the parser - const VariableName& var = rand.asVariable(); - // TODO: Currently, this code implements lazy evaluation of locals. - // That is, the environment binds names to a closure, not a resolved value. - // Eager vs. lazy evaluation is an open issue: - // see https://github.com/unicode-org/message-format-wg/issues/299 - - // NFC-normalize the variable name. See - // https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers - const VariableName normalized = StandardFunctions::normalizeNFC(var); - - // Look up the variable in the environment - if (env.has(normalized)) { - // `var` is a local -- look it up - const Closure& rhs = env.lookup(normalized); - // Format the expression using the environment from the closure - // The name of this local variable is the fallback for its RHS. - UnicodeString newFallback(DOLLAR); - newFallback += var; - return formatExpression(newFallback, rhs.getEnv(), rhs.getExpr(), context, status); - } - // Variable wasn't found in locals -- check if it's global - FormattedPlaceholder result = evalArgument(fallback, normalized, context, status); - if (status == U_ILLEGAL_ARGUMENT_ERROR) { - status = U_ZERO_ERROR; - // Unbound variable -- set a resolution error - context.getErrors().setUnresolvedVariable(var, status); - // Use fallback per - // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution - UnicodeString str(DOLLAR); - str += var; - return create(InternalValue(FormattedPlaceholder(str)), status); - } - return create(InternalValue(std::move(result)), status); - } else { + return evalVariableReference(fallback, env, rand.asVariable(), context, status); + } + // Literal + else { U_ASSERT(rand.isLiteral()); - return create(InternalValue(formatLiteral(fallback, rand.asLiteral())), status); + return env.createUnnamed(evalLiteral(fallback, rand.asLiteral(), status), status); + } +} + +// Function calls +// -------------- + +// Looks up `functionName` and applies it to an operand and options, +// handling errors if the function is unbound +[[nodiscard]] InternalValue& MessageFormatter::apply(Environment& env, + const FunctionName& functionName, + InternalValue& rand, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { + if (U_FAILURE(status)) + return env.bogus(); + + // Create the fallback string to use in case of an error + // calling the function + UnicodeString fallbackStr = functionFallback(rand, functionName); + + // Look up the function name + Function* function = lookupFunction(functionName, status); + + if (U_FAILURE(status)) { // Handle unknown function + // Set error and use the fallback value + status = U_ZERO_ERROR; + context.getErrors().setUnknownFunction(functionName, status); + return env.createFallback(fallbackStr, status); + } // `function` is now known to be non-null + + // Value is not a fallback (checked by the caller), + // so we can safely call getValue() + const FunctionValue* functionArg(rand.getValue(status)); + U_ASSERT(U_SUCCESS(status)); + // Call the function + LocalPointer + functionResult(function->call(makeFunctionContext(options), + *functionArg, + std::move(options), + status)); + // Handle any errors signaled by the function + // (and use the fallback value) + UErrorCode savedStatus = status; + status = U_ZERO_ERROR; + bool recover = false; + // Three types of errors are recoverable: + if (savedStatus == U_MF_OPERAND_MISMATCH_ERROR) { + recover = true; + context.getErrors().setOperandMismatchError(functionName, status); + } // 1. Operand mismatch error + if (savedStatus == U_MF_FORMATTING_ERROR) { + recover = true; + context.getErrors().setFormattingError(functionName, status); + } // 2. Formatting error + if (savedStatus == U_MF_BAD_OPTION) { + recover = true; + context.getErrors().setBadOption(functionName, status); + } // 3. Bad option error + if (recover) { + return env.createFallback(fallbackStr, status); + } // Anything else is non-recoverable + if (U_FAILURE(savedStatus)) { + status = savedStatus; + return env.bogus(); + } // Success; return the result + return env.createUnnamed(InternalValue(functionResult.orphan(), fallbackStr), status); +} + +// Function options and context +// ---------------------------- +static UMFBidiOption getBidiOption(const UnicodeString& s) { + if (s == options::LTR) { + return U_MF_BIDI_OPTION_LTR; + } + if (s == options::RTL) { + return U_MF_BIDI_OPTION_RTL; } + if (s == options::AUTO) { + return U_MF_BIDI_OPTION_AUTO; + } + return U_MF_BIDI_OPTION_INHERIT; // inherit is default +} + +FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const { + // Look up "u:locale", "u:dir", and "u:id" in the options + UnicodeString localeStr = options.getStringFunctionOption(options::U_LOCALE); + + // Use default locale from context, unless "u:locale" is provided + Locale localeToUse; + if (localeStr.isEmpty()) { + localeToUse = locale; + } else { + UErrorCode localStatus = U_ZERO_ERROR; + int32_t len = localeStr.length(); + char* buf = static_cast(uprv_malloc(len + 1)); + localeStr.extract(0, len, buf, len); + Locale l = Locale::forLanguageTag(StringPiece(buf, len), localStatus); + uprv_free(buf); + if (U_SUCCESS(localStatus)) { + localeToUse = l; + } else { + localeToUse = locale; + } + } + UMFBidiOption dir = getBidiOption(options.getStringFunctionOption(options::U_DIR)); + UnicodeString id = options.getStringFunctionOption(options::U_ID); + + return FunctionContext(localeToUse, dir, id); } // Resolves a function's options -FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { +FunctionOptions MessageFormatter::resolveOptions(Environment& env, + const OptionMap& options, + MessageContext& context, + UErrorCode& status) const { + // Create a vector of options LocalPointer optionsVector(createUVector(status)); if (U_FAILURE(status)) { return {}; } LocalPointer resolvedOpt; + // For each option... for (int i = 0; i < options.size(); i++) { const Option& opt = options.getOption(i, status); if (U_FAILURE(status)) { @@ -157,158 +365,169 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O const UnicodeString& k = opt.getName(); const Operand& v = opt.getValue(); - // Options are fully evaluated before calling the function - // Format the operand - LocalPointer rhsVal(formatOperand({}, env, v, context, status)); - if (U_FAILURE(status)) { - return {}; - } - // Note: this means option values are "eagerly" evaluated. - // Currently, options don't have options. This will be addressed by the - // full FormattedPlaceholder redesign. - FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status); - resolvedOpt.adoptInstead(create - (ResolvedFunctionOption(k, - optValue.asFormattable(), - v.isLiteral()), - status)); - if (U_FAILURE(status)) { - return {}; - } - optionsVector->adoptElement(resolvedOpt.orphan(), status); + // ...evaluate its right-hand side... + InternalValue& rhsVal = evalOperand({}, env, v, context, status); + // ...giving a FunctionValue. + const FunctionValue* optVal = rhsVal.getValue(status); + if (U_FAILURE(status)) { // Ignore fallback values + status = U_ZERO_ERROR; + continue; + } // The list of resolved options omits any fallback values + + // The option is resolved; add it to the vector + ResolvedFunctionOption resolvedOpt(k, *optVal, false); + LocalPointer + p(create(std::move(resolvedOpt), status)); + EMPTY_ON_ERROR(status); + optionsVector->adoptElement(p.orphan(), status); } + // Return a new FunctionOptions constructed from the vector of options return FunctionOptions(std::move(*optionsVector), status); } -// Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return nullptr; - } - - // These cases should have been checked for already - U_ASSERT(!argument.isFallback() && !argument.isNullOperand()); - - const Formattable& toFormat = argument.asFormattable(); - switch (toFormat.getType()) { - case UFMT_OBJECT: { - const FormattableObject* obj = toFormat.getObject(status); - U_ASSERT(U_SUCCESS(status)); - U_ASSERT(obj != nullptr); - const UnicodeString& type = obj->tag(); - FunctionName functionName; - if (!getDefaultFormatterNameByType(type, functionName)) { - // No formatter for this type -- follow default behavior - break; - } - return evalFunctionCall(functionName, - create(std::move(argument), status), - FunctionOptions(), - context, - status); - } - default: { - // TODO: The array case isn't handled yet; not sure whether it's desirable - // to have a default list formatter - break; - } - } - // No formatter for this type, or it's a primitive type (which will be formatted later) - // -- just return the argument itself - return create(std::move(argument), status); -} - -// Overload that dispatches on function name -// Adopts `arg` -[[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(const FunctionName& functionName, - InternalValue* arg_, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; - } - - LocalPointer arg(arg_); - - // Look up the formatter or selector - LocalPointer formatterImpl(nullptr); - LocalPointer selectorImpl(nullptr); - if (isFormatter(functionName)) { - formatterImpl.adoptInstead(getFormatter(functionName, status)); - U_ASSERT(U_SUCCESS(status)); - } - if (isSelector(functionName)) { - selectorImpl.adoptInstead(getSelector(context, functionName, status)); - U_ASSERT(U_SUCCESS(status)); - } - if (formatterImpl == nullptr && selectorImpl == nullptr) { - // Unknown function error - context.getErrors().setUnknownFunction(functionName, status); - - if (arg->hasNullOperand()) { - // Non-selector used as selector; an error would have been recorded earlier - UnicodeString fallback(COLON); - fallback += functionName; - return new InternalValue(FormattedPlaceholder(fallback)); - } else { - return new InternalValue(FormattedPlaceholder(arg->getFallback())); - } - } - return new InternalValue(arg.orphan(), - std::move(options), - functionName, - formatterImpl.isValid() ? formatterImpl.orphan() : nullptr, - selectorImpl.isValid() ? selectorImpl.orphan() : nullptr); +// BiDi isolation +// -------------- +// `uDirOption` is the directionality from a u:dir annotation on the expression +// that produced this formatted value, if present. +// `dir` is the directionality of `fmt`. This is determined from the resolved +// value that `fmt` is part of; that is, each function can set the directionality +// of the resolved value of its result. +UnicodeString& MessageFormatter::bidiIsolate(UMFBidiOption uDirOption, + UMFDirectionality dir, + UnicodeString& fmt) const { + // See "The Default Bidi Strategy" at: + // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text + + // If strategy is 'none', just return the string + if (bidiIsolationStrategy == U_MF_BIDI_OFF) + return fmt; + + /* 1. Let msgdir be the directionality of the whole message, one of « 'LTR', 'RTL', 'unknown' ». These correspond to the message having left-to-right directionality, right-to-left directionality, and to the message's directionality not being known. */ + + // 2i Let fmt be the formatted string representation of the resolved value of exp. + // (Passed as argument) + + // 2ii Let dir be the directionality of fmt, one of « 'LTR', 'RTL', 'unknown' », with the same meanings as for msgdir + // (Passed as argument) + + // 2iii. Let the boolean value isolate be True if the u:dir option of the resolved value of exp has a value other than 'inherit', or False otherwise. + bool isolate = uDirOption != U_MF_BIDI_OPTION_INHERIT; + + UnicodeString bdiOpen(""); + UnicodeString bdiClose(""); + + // 2iv. If dir is 'LTR' + switch (dir) { + case U_MF_DIRECTIONALITY_LTR: + if (msgdir == U_MF_DIRECTIONALITY_LTR && !isolate) { + // 2iv(a). If msgdir is 'LTR' in the formatted output, let fmt be itself + return fmt; + } + // 2iii(b) Else, in the formatted output, prefix fmt with U+2066 LEFT-TO-RIGHT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE. + if (bidiIsolationStyle == U_MF_BIDI_STYLE_CONTROL) { + fmt.insert(0, LRI_CHAR); + fmt.insert(fmt.length(), PDI_CHAR); + } else { + fmt.insert(0, bdiOpen); + fmt.insert(fmt.length(), bdiClose); + } + break; // End of 2iii + // 2iv. Else, if dir is 'RTL': + case U_MF_DIRECTIONALITY_RTL: + // 2iv(a). In the formatted output, prefix fmt with U+2067 RIGHT-TO-LEFT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE. + if (bidiIsolationStyle == U_MF_BIDI_STYLE_CONTROL) { + fmt.insert(0, RLI_CHAR); + fmt.insert(fmt.length(), PDI_CHAR); + } else { + fmt.insert(0, bdiOpen); + fmt.insert(fmt.length(), bdiClose); + } + break; // End of 2iv. + // 2v. Else: + default: + // 2v(a). In the formatted output, prefix fmt with U+2068 FIRST STRONG ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE. + if (bidiIsolationStyle == U_MF_BIDI_STYLE_CONTROL) { + fmt.insert(0, FSI_CHAR); + fmt.insert(fmt.length(), PDI_CHAR); + } else { + fmt.insert(0, bdiOpen); + fmt.insert(fmt.length(), bdiClose); + } + break; // End of 2v + } // `fmt` now contains the isolated string + return fmt; } -// Formats an expression using `globalEnv` for the values of variables -[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const UnicodeString& fallback, - const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - const Operand& rand = expr.getOperand(); - // Format the operand (formatOperand handles the case of a null operand) - LocalPointer randVal(formatOperand(fallback, globalEnv, rand, context, status)); - - FormattedPlaceholder maybeRand = randVal->takeArgument(status); - - if (!expr.isFunctionCall() && U_SUCCESS(status)) { - // Dispatch based on type of `randVal` - if (maybeRand.isFallback()) { - return randVal.orphan(); - } - return evalFunctionCall(std::move(maybeRand), context, status); - } else if (expr.isFunctionCall()) { - status = U_ZERO_ERROR; - const Operator* rator = expr.getOperator(status); - U_ASSERT(U_SUCCESS(status)); - const FunctionName& functionName = rator->getFunctionName(); - const OptionMap& options = rator->getOptionsInternal(); - // Resolve the options - FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); - - // Call the formatter function - return evalFunctionCall(functionName, - randVal.orphan(), - std::move(resolvedOptions), - context, - status); +// Expressions +// ----------- +// Evaluates an expression using `globalEnv` for the values of variables +[[nodiscard]] InternalValue& MessageFormatter::evalExpression(const UnicodeString& fallback, + Environment& globalEnv, + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { + if (U_FAILURE(status)) + return globalEnv.bogus(); + + // Evaluate the operand (evalOperand handles the case of a null operand) + InternalValue& randVal = evalOperand(fallback, globalEnv, expr.getOperand(), context, status); + + // If there's no function, we check for an implicit formatter + if (!expr.isFunctionCall()) { + const FunctionValue* contained = randVal.getValue(status); + if (U_FAILURE(status)) { + // Fallback or null -- no implicit formatter + status = U_ZERO_ERROR; + return randVal; + } // There might be an implicit formatter + const Formattable& toFormat = contained->unwrap(); + // If it has an object type, there might be an implicit formatter for it... + switch (toFormat.getType()) { + case UFMT_OBJECT: { + const FormattableObject* obj = toFormat.getObject(status); + U_ASSERT(U_SUCCESS(status)); + U_ASSERT(obj != nullptr); + const UnicodeString& type = obj->tag(); + FunctionName functionName; + if (!getDefaultFormatterNameByType(type, functionName)) { + // No formatter for this type -- follow default behavior + return randVal; + } + // ... apply the implicit formatter + return apply(globalEnv, + functionName, + randVal, + FunctionOptions(), + context, + status); + } // No formatters for other types, so just return the evaluated operand + default: + return randVal; + } // End of non-function-call case } else { - status = U_ZERO_ERROR; - return randVal.orphan(); + // Don't call the function on error values + if (randVal.isFallback()) + return randVal; + const Operator* rator = expr.getOperator(status); // Get the operator from the expression + U_ASSERT(U_SUCCESS(status)); // This must succeed since we checked that it's a function call + const FunctionName& functionName = rator->getFunctionName(); // Get function name + const OptionMap& options = rator->getOptionsInternal(); // Get options + FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); // Resolve options + + // Call the function with the operand and arguments + return apply(globalEnv, functionName, + randVal, std::move(resolvedOptions), context, status); } } +// Patterns +// -------- + // Formats each text and expression part of a pattern, appending the results to `result` -void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatPattern(MessageContext& context, + Environment& globalEnv, + const Pattern& pat, + UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); for (int32_t i = 0; i < pat.numParts(); i++) { @@ -316,24 +535,44 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& if (part.isText()) { result += part.asText(); } else if (part.isMarkup()) { - // Markup is ignored + validateUOptionsOnMarkup(context, globalEnv, part.asMarkup(), status); } else { // Format the expression - LocalPointer partVal( - formatExpression({}, globalEnv, part.contents(), context, status)); - FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(), - status); - // Force full evaluation, e.g. applying default formatters to - // unformatted input (or formatting numbers as strings) - result += partResult.formatToString(locale, status); - // Handle formatting errors. `formatToString()` can't take a context and thus can't - // register an error directly - if (status == U_MF_FORMATTING_ERROR) { - status = U_ZERO_ERROR; - // TODO: The name of the formatter that failed is unavailable. - // Not ideal, but it's hard for `formatToString()` - // to pass along more detailed diagnostics - context.getErrors().setFormattingError(status); + InternalValue& partVal = evalExpression({}, globalEnv, part.contents(), context, status); + if (partVal.isFallback()) { + result += LEFT_CURLY_BRACE; + result += partVal.asFallback(); + result += RIGHT_CURLY_BRACE; + } else { + // Get the `FunctionValue` corresponding to this part + const FunctionValue* val = partVal.getValue(status); + // It shouldn't be null or a fallback + U_ASSERT(U_SUCCESS(status)); + + // See comment in matchSelectorKeys() + bool badSelectOption = !checkSelectOption(*val); + + // Format the `FunctionValue` to a string + UnicodeString fmt = val->formatToString(status); + + // Apply bidi isolation to the formatted result + UMFDirectionality dir = val->getDirection(); + result += bidiIsolate(val->getDirectionAnnotation(), dir, fmt); + + if (badSelectOption) { + context.getErrors().setBadOption(val->getFunctionName(), status); + CHECK_ERROR(status); + } + + // Handle formatting errors. `formatToString()` can't take a context and thus can't + // register an error directly + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + // TODO: The name of the formatter that failed is unavailable. + // Not ideal, but it's hard for `formatToString()` + // to pass along more detailed diagnostics + context.getErrors().setFormattingError(status); + } } } } @@ -344,7 +583,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors // `res` is a vector of ResolvedSelectors -void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { +void MessageFormatter::resolveSelectors(MessageContext& context, Environment& env, UErrorCode &status, UVector& res) const { CHECK_ERROR(status); U_ASSERT(!dataModel.hasPattern()); @@ -354,8 +593,8 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - LocalPointer rv(formatOperand({}, env, Operand(selectors[i]), context, status)); - if (rv->canSelect()) { + InternalValue& rv = evalVariableReference({}, env, selectors[i], context, status); + if (rv.isSelectable()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) } else { @@ -364,32 +603,50 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // Append nomatch as the last element of the list res. // Emit a Selection Error. // (Note: in this case, rv, being a fallback, serves as `nomatch`) - DynamicErrors& err = context.getErrors(); - err.setSelectorError(rv->getFunctionName(), status); - rv.adoptInstead(new InternalValue(FormattedPlaceholder(rv->getFallback()))); - if (!rv.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } + context.getErrors().setSelectorError({}, status); } // 2ii(a). Append rv as the last element of the list res. // (Also fulfills 2iii) - res.adoptElement(rv.orphan(), status); + LocalPointer v(create(std::move(rv), status)); + CHECK_ERROR(status); + res.adoptElement(v.orphan(), status); + } +} + +bool MessageFormatter::checkSelectOption(const FunctionValue& val) const { + const UnicodeString& name = val.getFunctionName(); + + if (name != UnicodeString("number") && name != UnicodeString("integer")) { + return true; } + + // Per the spec, if the "select" option is present, it must have been + // set from a literal + + // Returns false if the `select` option is present and it was not set from a literal + + const FunctionOptions& opts = val.getResolvedOptions(); + + // OK if the option wasn't present + UErrorCode localErrorCode = U_ZERO_ERROR; + opts.getFunctionOption(options::SELECT, localErrorCode); + if (U_FAILURE(localErrorCode)) { + return true; + } + // Otherwise, return true if the option was set from a literal + return opts.wasSetFromLiteral(options::SELECT); } // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences // `keys` and `matches` are vectors of strings void MessageFormatter::matchSelectorKeys(const UVector& keys, MessageContext& context, - InternalValue* rv, // Does not adopt `rv` + InternalValue&& rv, UVector& keysOut, UErrorCode& status) const { CHECK_ERROR(status); - if (U_FAILURE(status)) { - // Return an empty list of matches - status = U_ZERO_ERROR; + if (!rv.isSelectable()) { return; } @@ -410,26 +667,37 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, LocalArray adoptedKeys(keysArr); // Create an array to hold the output - UnicodeString* prefsArr = new UnicodeString[keysLen]; + int32_t* prefsArr = static_cast(uprv_malloc(keysLen * sizeof(int32_t))); if (prefsArr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } - LocalArray adoptedPrefs(prefsArr); + int32_t prefsLen = 0; // Call the selector - FunctionName name = rv->getFunctionName(); - rv->forceSelection(context.getErrors(), - adoptedKeys.getAlias(), keysLen, - adoptedPrefs.getAlias(), prefsLen, - status); + // Caller checked for fallback, so it's safe to call getValue() + const FunctionValue* rvVal = rv.getValue(status); + + // This condition can't be checked in the selector. + bool badSelectOption = !checkSelectOption(*rvVal); + + U_ASSERT(U_SUCCESS(status)); + rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, prefsArr, prefsLen, + status); + + if (badSelectOption) { + context.getErrors().setBadOption(rvVal->getFunctionName(), status); + CHECK_ERROR(status); + // In this case, only the `*` variant should match + prefsLen = 0; + } // Update errors if (savedStatus != status) { if (U_FAILURE(status)) { status = U_ZERO_ERROR; - context.getErrors().setSelectorError(name, status); + context.getErrors().setSelectorError({}, status); } else { // Ignore warnings status = savedStatus; @@ -441,7 +709,8 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Copy the resulting keys (if there was no error) keysOut.removeAllElements(); for (int32_t i = 0; i < prefsLen; i++) { - UnicodeString* k = message2::create(std::move(prefsArr[i]), status); + UnicodeString* k = + message2::create(std::move(keysArr[prefsArr[i]]), status); if (k == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; @@ -449,12 +718,17 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, keysOut.adoptElement(k, status); CHECK_ERROR(status); } + + uprv_free(prefsArr); } // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences // `res` is a vector of FormattedPlaceholders; // `pref` is a vector of vectors of strings -void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const { +void MessageFormatter::resolvePreferences(MessageContext& context, + UVector& res, + UVector& pref, + UErrorCode &status) const { CHECK_ERROR(status); // 1. Let pref be a new empty list of lists of strings. @@ -492,7 +766,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, } // 2iii. Let `rv` be the resolved value at index `i` of `res`. U_ASSERT(i < res.size()); - InternalValue* rv = static_cast(res[i]); + InternalValue rv = std::move(*(static_cast(res[i]))); // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) LocalPointer matches(createUVector(status)); matchSelectorKeys(*keys, context, std::move(rv), *matches, status); @@ -625,7 +899,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo // 7. Select the pattern of `var` } -void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatSelectors(MessageContext& context, + Environment& env, + UErrorCode &status, + UnicodeString& result) const { CHECK_ERROR(status); // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection @@ -665,6 +942,9 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen formatPattern(context, env, pat, status, result); } +// Formatting to string +// -------------------- + // Note: this is non-const due to the function registry being non-const, which is in turn // due to the values (`FormatterFactory` objects in the map) having mutable state. // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing @@ -672,21 +952,20 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) { EMPTY_ON_ERROR(status); + // Create a new environment that will store closures for all local variables + Environment* env = Environment::create(status); + // Create a new context with the given arguments and the `errors` structure MessageContext context(arguments, *errors, status); - UnicodeString result; - if (!(errors->hasSyntaxError() || errors->hasDataModelError())) { - // Create a new environment that will store closures for all local variables - // Check for unresolved variable errors - // checkDeclarations needs a reference to the pointer to the environment - // since it uses its `env` argument as an out-parameter. So it needs to be - // temporarily not a LocalPointer... - Environment* env(Environment::create(status)); - checkDeclarations(context, env, status); - // ...and then it's adopted to avoid leaks - LocalPointer globalEnv(env); + // Check for unresolved variable errors + checkDeclarations(context, env, status); + LocalPointer globalEnv(env); + + DynamicErrors& err = context.getErrors(); + UnicodeString result; + if (!(err.hasSyntaxError() || err.hasDataModelError())) { if (dataModel.hasPattern()) { formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); } else { @@ -711,6 +990,60 @@ UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments return result; } +// Markup +// ------ + +// Evaluates `rand` and requires the value to be a string, setting `result` to it +// if so, and setting a bad option error if not +bool MessageFormatter::operandToStringWithBadOptionError(MessageContext& context, + Environment& globalEnv, + const Operand& rand, + UnicodeString& result, + UErrorCode& status) const { + EMPTY_ON_ERROR(status); + + InternalValue& iVal = evalOperand({}, globalEnv, rand, context, status); + EMPTY_ON_ERROR(status); + const FunctionValue* val = iVal.getValue(status); + U_ASSERT(U_SUCCESS(status)); + + result = val->unwrap().getString(status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + context.getErrors().setBadOption({}, status); + return false; + } + return true; +} + +// Validates u: options on markup parts -- see +// https://github.com/unicode-org/message-format-wg/blob/main/spec/u-namespace.md +void MessageFormatter::validateUOptionsOnMarkup(MessageContext& context, + Environment& globalEnv, + const Markup& markupPart, + UErrorCode& status) const { + CHECK_ERROR(status); + + const OptionMap& opts = markupPart.getOptionsInternal(); + for (int32_t i = 0; i < opts.len; i++) { + const Option& opt = opts.options[i]; + const UnicodeString& optionName = opt.getName(); + const Operand& optionValue = opt.getValue(); + + if (optionName == options::U_ID) { + UnicodeString ignore; + operandToStringWithBadOptionError(context, globalEnv, optionValue, ignore, status); + } else if (optionName == options::U_LOCALE) { + // Can't be set on markup + context.getErrors().setBadOption({}, status); + } else if (optionName == options::U_DIR) { + // Can't be set on markup + context.getErrors().setBadOption({}, status); + } + // Any other options are ignored + } +} + // ---------------------------------------- // Checking for resolution errors @@ -775,8 +1108,10 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& // memoizing the value of localEnv up to this point // Add the LHS to the environment for checking the next declaration - env = Environment::create(StandardFunctions::normalizeNFC(decl.getVariable()), - Closure(rhs, *env), + const VariableName& lhs = decl.getVariable(); + env = Environment::create(StandardFunctions::normalizeNFC(lhs), + Closure::create(rhs, *env, status), + varFallback(lhs), env, status); CHECK_ERROR(status); diff --git a/icu4c/source/i18n/messageformat2_allocation.h b/icu4c/source/i18n/messageformat2_allocation.h index 5b06d0851296..394d1e4b6dba 100644 --- a/icu4c/source/i18n/messageformat2_allocation.h +++ b/icu4c/source/i18n/messageformat2_allocation.h @@ -133,6 +133,18 @@ namespace message2 { return result; } + template + inline T* create(const T& node, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + T* result = new T(node); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; + } + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_errors.cpp b/icu4c/source/i18n/messageformat2_errors.cpp index c4a96544fe68..1a502ac87915 100644 --- a/icu4c/source/i18n/messageformat2_errors.cpp +++ b/icu4c/source/i18n/messageformat2_errors.cpp @@ -33,10 +33,6 @@ namespace message2 { addError(DynamicError(DynamicErrorType::BadOptionError, formatterName), status); } - void DynamicErrors::setRecoverableBadOption(const FunctionName& formatterName, UErrorCode& status) { - addError(DynamicError(DynamicErrorType::RecoverableBadOptionError, formatterName), status); - } - void DynamicErrors::setOperandMismatchError(const FunctionName& formatterName, UErrorCode& status) { addError(DynamicError(DynamicErrorType::OperandMismatchError, formatterName), status); } @@ -145,8 +141,7 @@ namespace message2 { status = U_MF_FORMATTING_ERROR; break; } - case DynamicErrorType::BadOptionError: - case DynamicErrorType::RecoverableBadOptionError: { + case DynamicErrorType::BadOptionError: { status = U_MF_BAD_OPTION; break; } @@ -246,10 +241,6 @@ namespace message2 { resolutionAndFormattingErrors->adoptElement(errorP, status); break; } - case DynamicErrorType::RecoverableBadOptionError: { - resolutionAndFormattingErrors->adoptElement(errorP, status); - break; - } } } diff --git a/icu4c/source/i18n/messageformat2_errors.h b/icu4c/source/i18n/messageformat2_errors.h index 42cea5b2b123..1145bb36c432 100644 --- a/icu4c/source/i18n/messageformat2_errors.h +++ b/icu4c/source/i18n/messageformat2_errors.h @@ -67,16 +67,6 @@ namespace message2 { UnresolvedVariable, FormattingError, BadOptionError, - /** - This is used to signal errors from :number and :integer when a - bad `select` option is passed. In this case, fallback output - is not used, so it must be distinguished from a regular bad - option error (but it maps to a bad option error in the final - error code). - See https://github.com/unicode-org/message-format-wg/blob/main/spec/functions/number.md#number-selection - "The formatting of the _resolved value_ is not affected by the `select` option.") - */ - RecoverableBadOptionError, OperandMismatchError, SelectorError, UnknownFunction, @@ -141,7 +131,6 @@ namespace message2 { // Used when the name of the offending formatter is unknown void setFormattingError(UErrorCode&); void setBadOption(const FunctionName&, UErrorCode&); - void setRecoverableBadOption(const FunctionName&, UErrorCode&); void setOperandMismatchError(const FunctionName&, UErrorCode&); bool hasDataModelError() const { return staticErrors.hasDataModelError(); } bool hasFormattingError() const { return formattingError; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 1a88ece4724f..8d37978a71b6 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -9,6 +9,7 @@ #if !UCONFIG_NO_MF2 +#include "unicode/ubidi.h" #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" #include "messageformat2_function_registry_internal.h" @@ -23,16 +24,85 @@ namespace message2 { using namespace data_model; +// BaseValue +// --------- + +BaseValue::BaseValue(const Locale& loc, const UnicodeString& fb, const Formattable& source, bool wasCreatedFromLiteral) + : locale(loc), fromLiteral(wasCreatedFromLiteral) { + innerValue = source; + fallback += LEFT_CURLY_BRACE; + fallback += fb; + fallback += RIGHT_CURLY_BRACE; +} + +/* static */ BaseValue* BaseValue::create(const Locale& locale, + const UnicodeString& fallback, + const Formattable& source, + bool wasCreatedFromLiteral, + UErrorCode& errorCode) { + return message2::create(BaseValue(locale, fallback, source, wasCreatedFromLiteral), errorCode); +} + +extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); + +UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const { + return formattableToString(locale, innerValue, errorCode); +} + +BaseValue& BaseValue::operator=(BaseValue&& other) noexcept { + innerValue = std::move(other.innerValue); + opts = std::move(other.opts); + dir = other.dir; + inputDir = other.inputDir; + locale = other.locale; + fallback = other.fallback; + fromLiteral = other.fromLiteral; + + return *this; +} + +BaseValue::BaseValue(BaseValue&& other) { + *this = std::move(other); +} + +// VariableValue +// ------------- + +VariableValue::VariableValue(const FunctionValue* v) : underlyingValue(std::move(v)) {} + +/* static */ VariableValue* VariableValue::create(const FunctionValue* v, + UErrorCode& errorCode) { + return message2::create(VariableValue(std::move(v)), errorCode); +} + +VariableValue& VariableValue::operator=(VariableValue&& other) noexcept { + underlyingValue = other.underlyingValue; + + return *this; +} + +VariableValue::VariableValue(VariableValue&& other) { + *this = std::move(other); +} + +VariableValue::~VariableValue() { + underlyingValue = nullptr; // not owned +} + // Functions // ------------- ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { - name = std::move(other.name); - value = std::move(other.value); - sourceIsLiteral = other.sourceIsLiteral; + *this = std::move(other); } -ResolvedFunctionOption::~ResolvedFunctionOption() {} +ResolvedFunctionOption::ResolvedFunctionOption(const UnicodeString& n, + const FunctionValue& f, + bool b) : name(n), value(&f), thisWasMerged(b) {} + +ResolvedFunctionOption::~ResolvedFunctionOption() { + value = nullptr; // value is not owned +} const ResolvedFunctionOption* FunctionOptions::getResolvedFunctionOptions(int32_t& len) const { @@ -49,59 +119,77 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { } // Returns false if option doesn't exist -UBool FunctionOptions::wasSetFromLiteral(const UnicodeString& key) const { +UBool FunctionOptions::wasSetFromLiteral(const std::u16string_view key) const { if (options == nullptr) { U_ASSERT(functionOptionsLen == 0); } for (int32_t i = 0; i < functionOptionsLen; i++) { const ResolvedFunctionOption& opt = options[i]; if (opt.getName() == key) { - return opt.isLiteral(); + // Require both: - opt's value was created from a literal; + // - opt does not originate from merging a previous options map + // with this one + return opt.getValue().wasCreatedFromLiteral() + && (!opt.wasMerged()); } } return false; } -UBool FunctionOptions::getFunctionOption(std::u16string_view key, Formattable& option) const { +const FunctionValue* +FunctionOptions::getFunctionOption(std::u16string_view key, + UErrorCode& status) const { if (options == nullptr) { U_ASSERT(functionOptionsLen == 0); } for (int32_t i = 0; i < functionOptionsLen; i++) { const ResolvedFunctionOption& opt = options[i]; if (opt.getName() == key) { - option = opt.getValue(); - return true; + return &opt.getValue(); } } - return false; + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; } -UnicodeString FunctionOptions::getStringFunctionOption(std::u16string_view key) const { - Formattable option; - if (getFunctionOption(key, option)) { - if (option.getType() == UFMT_STRING) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = option.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); - return val; +UnicodeString +FunctionOptions::getStringFunctionOption(std::u16string_view k, UErrorCode& errorCode) const { + const FunctionValue* option = getFunctionOption(k, errorCode); + if (U_SUCCESS(errorCode)) { + UnicodeString result = option->formatToString(errorCode); + if (U_SUCCESS(errorCode)) { + return result; } } - // For anything else, including non-string values, return "". - // Alternately, could try to stringify the non-string option. - // (Currently, no tests require that.) return {}; } -FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept { - functionOptionsLen = other.functionOptionsLen; - options = other.options; - other.functionOptionsLen = 0; - other.options = nullptr; +UnicodeString FunctionOptions::getStringFunctionOption(std::u16string_view key) const { + UErrorCode localStatus = U_ZERO_ERROR; + + UnicodeString result = getStringFunctionOption(key, localStatus); + if (U_FAILURE(localStatus)) { + return {}; + } + return result; +} + +FunctionOptions& FunctionOptions::operator=(FunctionOptions other) noexcept { + swap(*this, other); return *this; } -FunctionOptions::FunctionOptions(FunctionOptions&& other) { - *this = std::move(other); +FunctionOptions::FunctionOptions(const FunctionOptions& other) { + U_ASSERT(!other.bogus); + functionOptionsLen = other.functionOptionsLen; + options = nullptr; + if (functionOptionsLen != 0) { + UErrorCode localStatus = U_ZERO_ERROR; + options = copyArray(other.options, functionOptionsLen, localStatus); + if (U_FAILURE(localStatus)) { + bogus = true; + } + } } FunctionOptions::~FunctionOptions() { @@ -122,19 +210,22 @@ static bool containsOption(const UVector& opts, const ResolvedFunctionOption& op } // Options in `this` take precedence -// `this` can't be used after mergeOptions is called -FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, - UErrorCode& status) { +FunctionOptions FunctionOptions::mergeOptions(const FunctionOptions& other, + UErrorCode& status) const { UVector mergedOptions(status); mergedOptions.setDeleter(uprv_deleteUObject); if (U_FAILURE(status)) { return {}; } + if (bogus || other.bogus) { + status = U_MEMORY_ALLOCATION_ERROR; + return {}; + } // Create a new vector consisting of the options from this `FunctionOptions` for (int32_t i = 0; i < functionOptionsLen; i++) { - mergedOptions.adoptElement(create(std::move(options[i]), status), + mergedOptions.adoptElement(create(options[i], status), status); } @@ -142,19 +233,140 @@ FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, for (int i = 0; i < other.functionOptionsLen; i++) { // Note: this is quadratic in the length of `options` if (!containsOption(mergedOptions, other.options[i])) { - mergedOptions.adoptElement(create(std::move(other.options[i]), - status), - status); + const ResolvedFunctionOption& oldOpt = other.options[i]; + ResolvedFunctionOption newOpt = ResolvedFunctionOption(oldOpt.name, *oldOpt.value, true); + mergedOptions.adoptElement(create(newOpt, status), + status); } } - delete[] options; - options = nullptr; - functionOptionsLen = 0; - return FunctionOptions(std::move(mergedOptions), status); } +// InternalValue +// ------------- + + +InternalValue::~InternalValue() {} + +InternalValue& InternalValue::operator=(InternalValue&& other) { + fallbackString = other.fallbackString; + val = std::move(other.val); + return *this; +} + +InternalValue::InternalValue(InternalValue&& other) { + *this = std::move(other); +} + +InternalValue::InternalValue(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer nv(new NullValue()); + if (!nv.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + val = std::move(nv); +} + +InternalValue::InternalValue(FunctionValue* v, const UnicodeString& fb) + : fallbackString(fb) { + U_ASSERT(v != nullptr); + val = LocalPointer(v); +} + +const FunctionValue* InternalValue::getValue(UErrorCode& status) const { + if (U_FAILURE(status)) { + return nullptr; + } + // If this is a closure or fallback, error out + if (!isEvaluated()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + // Follow the indirection to get the value + if (isIndirection()) { + const InternalValue* other = *std::get_if(&val); + U_ASSERT(other != nullptr); + return other->getValue(status); + } + // Otherwise, return the contained FunctionValue + const LocalPointer* result = std::get_if>(&val); + U_ASSERT(result->isValid()); + return (*result).getAlias(); +} + +bool InternalValue::isSelectable() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { + return false; + } + return val->isSelectable(); +} + +/* static */ LocalPointer InternalValue::null(UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(status); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } + } + return LocalPointer(); +} + +/* static */ LocalPointer InternalValue::fallback(const UnicodeString& s, + UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(s); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } + } + return LocalPointer(); +} + +/* static */ InternalValue InternalValue::closure(Closure* c, const UnicodeString& fb) { + U_ASSERT(c != nullptr); + return InternalValue(c, fb); +} + +bool InternalValue::isClosure() const { + return std::holds_alternative>(val); +} + +bool InternalValue::isEvaluated() const { + return std::holds_alternative>(val) || isIndirection(); +} + +bool InternalValue::isIndirection() const { + return std::holds_alternative(val); +} + +bool InternalValue::isNullOperand() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { + return false; + } + return val->isNullOperand(); +} + +void InternalValue::update(InternalValue& newVal) { + fallbackString = newVal.fallbackString; + val = &newVal; +} + +void InternalValue::update(LocalPointer newVal) { + val = std::move(newVal); +} + +void InternalValue::update(const UnicodeString& fb) { + fallbackString = fb; + val = fb; +} + // PrioritizedVariant // ------------------ @@ -169,9 +381,11 @@ PrioritizedVariant::~PrioritizedVariant() {} // ---------------- Environments and closures - Environment* Environment::create(const VariableName& var, Closure&& c, Environment* parent, UErrorCode& errorCode) { + Environment* Environment::create(const VariableName& var, Closure* c, + const UnicodeString& fallbackStr, + Environment* parent, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new NonEmptyEnvironment(var, std::move(c), parent); + Environment* result = new NonEmptyEnvironment(var, InternalValue::closure(c, fallbackStr), parent); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -181,21 +395,20 @@ PrioritizedVariant::~PrioritizedVariant() {} Environment* Environment::create(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new EmptyEnvironment(); - if (result == nullptr) { + Environment* result = new EmptyEnvironment(errorCode); + if (U_SUCCESS(errorCode) && result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; } return result; } - const Closure& EmptyEnvironment::lookup(const VariableName& v) const { - (void) v; + InternalValue& EmptyEnvironment::lookup(const VariableName&) { U_ASSERT(false); UPRV_UNREACHABLE_EXIT; } - const Closure& NonEmptyEnvironment::lookup(const VariableName& v) const { + InternalValue& NonEmptyEnvironment::lookup(const VariableName& v) { if (v == var) { return rhs; } @@ -214,290 +427,95 @@ PrioritizedVariant::~PrioritizedVariant() {} return parent->has(v); } - Environment::~Environment() {} - NonEmptyEnvironment::~NonEmptyEnvironment() {} - EmptyEnvironment::~EmptyEnvironment() {} - - Closure::~Closure() {} - - // MessageContext methods - - void MessageContext::checkErrors(UErrorCode& status) const { - CHECK_ERROR(status); - errors.checkErrors(status); - } - - const Formattable* MessageContext::getGlobal(const VariableName& v, - UErrorCode& errorCode) const { - return arguments.getArgument(v, errorCode); - } - - MessageContext::MessageContext(const MessageArguments& args, - const StaticErrors& e, - UErrorCode& status) : arguments(args), errors(e, status) {} - - MessageContext::~MessageContext() {} - - // InternalValue - // ------------- - - bool InternalValue::isFallback() const { - return std::holds_alternative(argument) - && std::get_if(&argument)->isFallback(); - } - - bool InternalValue::hasNullOperand() const { - return std::holds_alternative(argument) - && std::get_if(&argument)->isNullOperand(); - } - - FormattedPlaceholder InternalValue::takeArgument(UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return {}; + InternalValue& EmptyEnvironment::createNull(UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); } - - if (std::holds_alternative(argument)) { - return std::move(*std::get_if(&argument)); - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return {}; + LocalPointer val(InternalValue::null(status)); + return addUnnamedValue(std::move(val), status); } - const UnicodeString& InternalValue::getFallback() const { - if (std::holds_alternative(argument)) { - return std::get_if(&argument)->getFallback(); + InternalValue& EmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); } - return (*std::get_if(&argument))->getFallback(); + LocalPointer val(InternalValue::fallback(s, status)); + return addUnnamedValue(std::move(val), status); } - const Selector* InternalValue::getSelector(UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - if (selector == nullptr) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - return selector; - } - - InternalValue::InternalValue(FormattedPlaceholder&& arg) { - argument = std::move(arg); - selector = nullptr; - formatter = nullptr; - } - - InternalValue::InternalValue(InternalValue* operand, - FunctionOptions&& opts, - const FunctionName& functionName, - const Formatter* f, - const Selector* s) { - argument = operand; - options = std::move(opts); - name = functionName; - selector = s; - formatter = f; - U_ASSERT(selector != nullptr || formatter != nullptr); - } - - // `this` cannot be used after calling this method - void InternalValue::forceSelection(DynamicErrors& errs, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return; - } - - if (!canSelect()) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - // Find the argument and complete set of options by traversing `argument` - FunctionOptions opts; - InternalValue* p = this; - FunctionName selectorName = name; - - bool operandSelect = false; - while (std::holds_alternative(p->argument)) { - if (p->name != selectorName) { - // Can only compose calls to the same selector - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - // Very special case to detect something like: - // .local $sel = {1 :integer select=exact} .local $bad = {$sel :integer} .match $bad 1 {{ONE}} * {{operand select {$bad}}} - // This can be done better once function composition is fully implemented. - if (p != this && - !p->options.getStringFunctionOption(options::SELECT).isEmpty() - && (selectorName == functions::NUMBER || selectorName == functions::INTEGER)) { - // In this case, we want to call the selector normally but emit a - // `bad-option` error, possibly with the outcome of normal-looking output (with relaxed - // error handling) and an error (with strict error handling). - operandSelect = true; - } - // First argument to mergeOptions takes precedence - opts = opts.mergeOptions(std::move(p->options), errorCode); - if (U_FAILURE(errorCode)) { - return; - } - InternalValue* next = *std::get_if(&p->argument); - p = next; + InternalValue& EmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); } - FormattedPlaceholder arg = std::move(*std::get_if(&p->argument)); - - // This condition can't be checked in the selector. - // Effectively, there are two different kinds of "bad option" errors: - // one that can be recovered from (used for select=$var) and one that - // can't (used for bad digit size options and other cases). - // The checking of the recoverable error has to be done here; otherwise, - // the "bad option" signaled by the selector implementation would cause - // fallback output to be used when formatting the `*` pattern. - bool badSelectOption = !checkSelectOption(); - - selector->selectKey(std::move(arg), std::move(opts), - keys, keysLen, - prefs, prefsLen, errorCode); - if (errorCode == U_MF_SELECTOR_ERROR) { - errorCode = U_ZERO_ERROR; - errs.setSelectorError(selectorName, errorCode); - } else if (errorCode == U_MF_BAD_OPTION) { - errorCode = U_ZERO_ERROR; - errs.setBadOption(selectorName, errorCode); - } else if (operandSelect || badSelectOption) { - errs.setRecoverableBadOption(selectorName, errorCode); - // In this case, only the `*` variant should match - prefsLen = 0; + LocalPointer val(new InternalValue(std::move(v))); + if (!val.isValid()) { + return bogus(); } + return addUnnamedValue(std::move(val), status); } - bool InternalValue::checkSelectOption() const { - if (name != UnicodeString("number") && name != UnicodeString("integer")) { - return true; - } - - // Per the spec, if the "select" option is present, it must have been - // set from a literal - - Formattable opt; - // Returns false if the `select` option is present and it was not set from a literal - - // OK if the option wasn't present - if (!options.getFunctionOption(UnicodeString("select"), opt)) { - return true; - } - // Otherwise, return true if the option was set from a literal - return options.wasSetFromLiteral(UnicodeString("select")); + InternalValue& NonEmptyEnvironment::createNull(UErrorCode& status) { + return parent->createNull(status); } - FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return {}; - } - - if (formatter == nullptr && selector == nullptr) { - U_ASSERT(std::holds_alternative(argument)); - return std::move(*std::get_if(&argument)); - } - if (formatter == nullptr) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return {}; - } + InternalValue& NonEmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + return parent->createFallback(s, status); + } - FormattedPlaceholder arg; + InternalValue& NonEmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + return parent->createUnnamed(std::move(v), status); + } - if (std::holds_alternative(argument)) { - arg = std::move(*std::get_if(&argument)); - } else { - arg = (*std::get_if(&argument))->forceFormatting(errs, - errorCode); + InternalValue& EmptyEnvironment::addUnnamedValue(LocalPointer val, + UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); } + U_ASSERT(val.isValid()); + InternalValue* v = val.orphan(); + unnamedValues.adoptElement(v, status); + return *v; + } - if (U_FAILURE(errorCode)) { - return {}; - } + EmptyEnvironment::EmptyEnvironment(UErrorCode& status) : unnamedValues(UVector(status)) { + unnamedValues.setDeleter(uprv_deleteUObject); + } - if (arg.isFallback()) { - return arg; - } + Environment::~Environment() {} + NonEmptyEnvironment::~NonEmptyEnvironment() {} + EmptyEnvironment::~EmptyEnvironment() {} - // The fallback for a nullary function call is the function name - UnicodeString fallback; - if (arg.isNullOperand()) { - fallback = u":"; - fallback += name; - } else { - fallback = arg.getFallback(); - } + /* static */ Closure* Closure::create(const Expression& expr, Environment& env, + UErrorCode& status) { + NULL_ON_ERROR(status); - // Very special case for :number select=foo and :integer select=foo - // This check can't be done inside the function implementation because - // it doesn't have a way to both signal an error and return usable output, - // and the spec stipulates that fallback output shouldn't be used in the - // case of a bad `select` option to a formatting call. - bool badSelect = !checkSelectOption(); - - // Call the function with the argument - FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode); - if (U_SUCCESS(errorCode) && errorCode == U_USING_DEFAULT_WARNING) { - // Ignore this warning - errorCode = U_ZERO_ERROR; - } - if (U_FAILURE(errorCode)) { - if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) { - errorCode = U_ZERO_ERROR; - errs.setOperandMismatchError(name, errorCode); - } else if (errorCode == U_MF_BAD_OPTION) { - errorCode = U_ZERO_ERROR; - errs.setBadOption(name, errorCode); - } else { - errorCode = U_ZERO_ERROR; - // Convey any other error generated by the formatter - // as a formatting error - errs.setFormattingError(name, errorCode); - } - } - // Ignore the output if any error occurred - // We don't ignore the output in the case of a Bad Option Error, - // because of the select=bad case where we want both an error - // and non-fallback output. - if (errs.hasFormattingError() || errs.hasBadOptionError()) { - return FormattedPlaceholder(fallback); - } - if (badSelect) { - // In this case, we want to set an error but not replace - // the output with a fallback - errs.setRecoverableBadOption(name, errorCode); + Closure* result = new Closure(expr, env); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; } return result; } - InternalValue& InternalValue::operator=(InternalValue&& other) noexcept { - argument = std::move(other.argument); - other.argument = nullptr; - options = std::move(other.options); - name = other.name; - selector = other.selector; - formatter = other.formatter; - other.selector = nullptr; - other.formatter = nullptr; + Closure::~Closure() {} + + // MessageContext methods - return *this; + void MessageContext::checkErrors(UErrorCode& status) const { + CHECK_ERROR(status); + errors.checkErrors(status); } - InternalValue::~InternalValue() { - delete selector; - selector = nullptr; - delete formatter; - formatter = nullptr; - if (std::holds_alternative(argument)) { - delete *std::get_if(&argument); - argument = nullptr; - } + const Formattable* MessageContext::getGlobal(const VariableName& v, + UErrorCode& errorCode) const { + return arguments.getArgument(v, errorCode); } + MessageContext::MessageContext(const MessageArguments& args, + const StaticErrors& e, + UErrorCode& status) : arguments(args), errors(e, status) {} + + MessageContext::~MessageContext() {} + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index f73b444229f5..50216eeeb307 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -45,6 +45,148 @@ namespace message2 { using namespace data_model; + class Closure; + class Environment; + + // InternalValue represents an intermediate value in the message + // formatter. + // It has four possible states: + // 1. Fallback Value. A fallback value + // is a string that serves as a replacement for expressions whose evaluation + // caused an error. Fallback values are not passed to functions. + // 2. Closure, representing the unevaluated right-hand side of a declaration. + // 3. Evaluated Value (FunctionValue), representing an evaluated declaration. + // 4. Indirection (const InternalValue*), representing a shared reference to another + // InternalValue. Note that all InternalValues are owned by the global + // environment. + /* + Example: + + .local $x = {$y} + .local $z = {1 :number} + .local $a = {$z} + {{ {$x} {$z} {$a} }} + + If this message is formatted with no arguments, + initially, x, z and a are all bound to Closures. + When the value of x is demanded by the pattern, the contents of x's value + are updated to a Fallback Value (because its RHS contains an unbound variable). + When the value of z is demanded, the contents of z's value are updated to + an Evaluated Value representing the result of :number on the operand. + When the value of a is demanded, the contents of a's value are updated to + an Indirection, pointing to z's value. + + Indirections are used so that a FunctionValue can be uniquely owned by an + InternalValue. Since all InternalValues are owned by the global Environment, + it's safe to use these non-owned pointers. + */ + class InternalValue : public UObject { + public: + bool isFallback() const { return std::holds_alternative(val); } + bool isNullOperand() const; + bool isEvaluated() const; + bool isClosure() const; + bool isSelectable() const; + + Closure& asClosure() { + U_ASSERT(isClosure()); + return **std::get_if>(&val); + } + const FunctionValue* getValue(UErrorCode& status) const; + + UnicodeString asFallback() const { return fallbackString; } + + static LocalPointer null(UErrorCode& status); + static LocalPointer fallback(const UnicodeString& s, UErrorCode& status); + // Adopts `c` + static InternalValue closure(Closure* c, const UnicodeString& s); + + // Updates the mutable contents of this InternalValue + void update(InternalValue&); + void update(LocalPointer); + void update(const UnicodeString&); + + InternalValue() : val(UnicodeString()) {} + explicit InternalValue(FunctionValue* v, const UnicodeString& fb); + InternalValue& operator=(InternalValue&&); + InternalValue(InternalValue&&); + virtual ~InternalValue(); + private: + UnicodeString fallbackString; + std::variant, // Unevaluated thunk + LocalPointer, // Evaluated value + const InternalValue*> val; // Indirection to another value -- Not owned + // Null operand constructor + explicit InternalValue(UErrorCode& status); + // Fallback constructor + explicit InternalValue(const UnicodeString& fb) + : fallbackString(fb), val(fb) {} + // Closure (unevaluated) constructor + explicit InternalValue(Closure* c, UnicodeString fallbackStr) + : fallbackString(fallbackStr), val(LocalPointer(c)) {} + bool isIndirection() const; + }; // class InternalValue + + + // A BaseValue wraps a literal value or argument value so it can be used + // in a context that expects a FunctionValue. + class BaseValue : public FunctionValue { + public: + static BaseValue* create(const Locale&, const UnicodeString&, const Formattable&, bool, UErrorCode&); + // Apply default formatters to the argument value + UnicodeString formatToString(UErrorCode&) const override; + UBool isSelectable() const override { return true; } + UBool wasCreatedFromLiteral() const override { return fromLiteral; } + BaseValue() {} + BaseValue(BaseValue&&); + BaseValue& operator=(BaseValue&&) noexcept; + private: + Locale locale; + bool fromLiteral = false; + + BaseValue(const Locale&, const UnicodeString&, const Formattable&, bool); + }; // class BaseValue + + // A NullValue represents the absence of an argument. + class NullValue : public FunctionValue { + public: + virtual UBool isNullOperand() const { return true; } + }; // class NullValue + + // A VariableValue wraps another FunctionValue and its sole purpose + // is to override the wasCreatedFromLiteral() method to always return false. + // This makes it easy to implement .local $foo = {exact}: the RHS returns a BaseValue + // such that wasCreatedFromLiteral() is true, but then we can wrap it in a VariableValue, + // which will always return false for this method. + class VariableValue : public FunctionValue { + public: + static VariableValue* create(const FunctionValue*, UErrorCode&); + UBool wasCreatedFromLiteral() const override { return false; } + UnicodeString formatToString(UErrorCode& status) const override { return underlyingValue->formatToString(status); } + const Formattable& unwrap() const override { return underlyingValue->unwrap(); } + const FunctionOptions& getResolvedOptions() const override { return underlyingValue->getResolvedOptions(); } + UMFDirectionality getDirection() const override { return underlyingValue->getDirection(); } + UMFBidiOption getDirectionAnnotation() const override { return underlyingValue->getDirectionAnnotation(); } + UBool isSelectable() const override { return underlyingValue->isSelectable(); } + UBool isNullOperand() const override { return underlyingValue->isNullOperand(); } + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const override { return underlyingValue->selectKeys(keys, keysLen, prefs, prefsLen, status); } + const UnicodeString& getFunctionName() const override { return underlyingValue->getFunctionName(); } + const UnicodeString& getFallback() const override { return underlyingValue->getFallback(); } + VariableValue() {} + virtual ~VariableValue(); + VariableValue(VariableValue&&); + VariableValue& operator=(VariableValue&&) noexcept; + private: + const FunctionValue* underlyingValue; + + VariableValue(const FunctionValue*); + }; // class VariableValue + // PrioritizedVariant // For how this class is used, see the references to (integer, variant) tuples @@ -89,66 +231,106 @@ namespace message2 { const Expression& getExpr() const { return expr; } - const Environment& getEnv() const { + Environment& getEnv() const { return env; } - Closure(const Expression& expression, const Environment& environment) : expr(expression), env(environment) {} Closure(Closure&&) = default; + static Closure* create(const Expression&, Environment&, UErrorCode&); virtual ~Closure(); private: + Closure(const Expression& expression, Environment& environment) : expr(expression), env(environment) {} + // An unevaluated expression const Expression& expr; // The environment mapping names used in this // expression to other expressions - const Environment& env; + Environment& env; }; + class NonEmptyEnvironment; + // An environment is represented as a linked chain of // non-empty environments, terminating at an empty environment. // It's searched using linear search. class Environment : public UMemory { - public: - virtual bool has(const VariableName&) const = 0; - virtual const Closure& lookup(const VariableName&) const = 0; - static Environment* create(UErrorCode&); - static Environment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); - virtual ~Environment(); + public: + virtual bool has(const VariableName&) const = 0; + virtual InternalValue& lookup(const VariableName&) = 0; + virtual InternalValue& bogus() = 0; + // For convenience so that InternalValue::getValue() can return a reference + // in error cases + FunctionValue& bogusFunctionValue() { return bogusFunctionVal; } + virtual InternalValue& createFallback(const UnicodeString&, UErrorCode&) = 0; + virtual InternalValue& createNull(UErrorCode&) = 0; + virtual InternalValue& createUnnamed(InternalValue&&, UErrorCode&) = 0; + static Environment* create(UErrorCode&); + static Environment* create(const VariableName&, Closure*, const UnicodeString&, + Environment*, UErrorCode&); + virtual ~Environment(); + + private: + FunctionValue bogusFunctionVal; }; - class NonEmptyEnvironment; + // The empty environment includes a "bogus" value to use when an + // InternalValue& is needed (e.g. error conditions), + // and a vector of "unnamed" values, so that the environment can + // own all InternalValues (even those arising from expressions + // that appear directly in a pattern and are not named). class EmptyEnvironment : public Environment { public: - EmptyEnvironment() = default; + EmptyEnvironment(UErrorCode& status); virtual ~EmptyEnvironment(); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return bogusValue; } static EmptyEnvironment* create(UErrorCode&); - static NonEmptyEnvironment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); + static NonEmptyEnvironment* create(const VariableName&, InternalValue, + Environment*, UErrorCode&); + + // Creates a fallback value owned by this Environment + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + // Creates a null operand owned by this Environment + InternalValue& createNull(UErrorCode&) override; + // Creates an arbitrary value owned by this Environment + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; + + InternalValue& addUnnamedValue(LocalPointer, UErrorCode&); + + InternalValue bogusValue; // Used in place of `nullptr` in error conditions + UVector unnamedValues; }; class NonEmptyEnvironment : public Environment { + public: + InternalValue* update(const VariableName&, InternalValue&&); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return parent->bogus(); } static NonEmptyEnvironment* create(const VariableName&, Closure&&, const Environment*, UErrorCode&); virtual ~NonEmptyEnvironment(); private: friend class Environment; - NonEmptyEnvironment(const VariableName& v, Closure&& c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + NonEmptyEnvironment(const VariableName& v, InternalValue c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + InternalValue& createNull(UErrorCode&) override; + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; // Maps VariableName onto Closure* // Chain of linked environments VariableName var; - Closure rhs; + InternalValue rhs; const LocalPointer parent; }; @@ -177,45 +359,6 @@ namespace message2 { }; // class MessageContext - // InternalValue - // ---------------- - - class InternalValue : public UObject { - public: - const FunctionName& getFunctionName() const { return name; } - bool canSelect() const { return selector != nullptr; } - const Selector* getSelector(UErrorCode&) const; - FormattedPlaceholder forceFormatting(DynamicErrors& errs, - UErrorCode& errorCode); - void forceSelection(DynamicErrors& errs, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode); - // Needs to be deep-copyable and movable - virtual ~InternalValue(); - InternalValue(FormattedPlaceholder&&); - // Formatter and selector may be null - InternalValue(InternalValue*, FunctionOptions&&, const FunctionName&, const Formatter*, - const Selector*); - const UnicodeString& getFallback() const; - bool isFallback() const; - bool hasNullOperand() const; - // Can't be used anymore after calling this - FormattedPlaceholder takeArgument(UErrorCode& errorCode); - InternalValue(InternalValue&& other) { *this = std::move(other); } - InternalValue& operator=(InternalValue&& other) noexcept; - private: - // InternalValue is owned (if present) - std::variant argument; - FunctionOptions options; - FunctionName name; - const Selector* selector; // May be null - const Formatter* formatter; // May be null, but one or the other should be non-null unless argument is a FormattedPlaceholder - bool checkSelectOption() const; - }; // class InternalValue - } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 0bbecaf25d77..58f12bc0e45a 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -10,10 +10,12 @@ #if !UCONFIG_NO_MF2 #include "unicode/messageformat2_formattable.h" +#include "unicode/messageformat2.h" #include "unicode/smpdtfmt.h" +#include "unicode/ubidi.h" #include "messageformat2_allocation.h" -#include "messageformat2_function_registry_internal.h" #include "messageformat2_macros.h" +#include "ubidiimp.h" #include "limits.h" @@ -21,17 +23,6 @@ U_NAMESPACE_BEGIN namespace message2 { - // Fallback values are enclosed in curly braces; - // see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#formatting-fallback-values - - static UnicodeString fallbackToString(const UnicodeString& s) { - UnicodeString result; - result += LEFT_CURLY_BRACE; - result += s; - result += RIGHT_CURLY_BRACE; - return result; - } - Formattable& Formattable::operator=(Formattable other) noexcept { swap(*this, other); return *this; @@ -163,43 +154,6 @@ namespace message2 { FormattedMessage::~FormattedMessage() {} - FormattedValue::FormattedValue(const UnicodeString& s) { - type = kString; - stringOutput = std::move(s); - } - - FormattedValue::FormattedValue(number::FormattedNumber&& n) { - type = kNumber; - numberOutput = std::move(n); - } - - FormattedValue& FormattedValue::operator=(FormattedValue&& other) noexcept { - type = other.type; - if (type == kString) { - stringOutput = std::move(other.stringOutput); - } else { - numberOutput = std::move(other.numberOutput); - } - return *this; - } - - FormattedValue::~FormattedValue() {} - - FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept { - type = other.type; - source = other.source; - if (type == kEvaluated) { - formatted = std::move(other.formatted); - previousOptions = std::move(other.previousOptions); - } - fallback = other.fallback; - return *this; - } - - const Formattable& FormattedPlaceholder::asFormattable() const { - return source; - } - // Default formatters // ------------------ @@ -219,7 +173,7 @@ namespace message2 { return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode); } - DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { + static DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); if (!df.isValid()) { @@ -229,15 +183,41 @@ namespace message2 { return df.orphan(); } - // Called when output is required and the contents are an unevaluated `Formattable`; - // formats the source `Formattable` to a string with defaults, if it can be - // formatted with a default formatter - static FormattedPlaceholder formatWithDefaults(const Locale& locale, const FormattedPlaceholder& input, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; + TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + TimeZone* tz; + if (dateInfo.zoneId.isEmpty()) { + // Floating time value -- use default time zone + tz = TimeZone::createDefault(); + } else { + tz = TimeZone::createTimeZone(dateInfo.zoneId); } + if (tz == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return tz; + } + + void formatDateWithDefaults(const Locale& locale, + const DateInfo& dateInfo, + UnicodeString& result, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer df(defaultDateTimeInstance(locale, errorCode)); + CHECK_ERROR(errorCode); + + df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); + CHECK_ERROR(errorCode); + df->format(dateInfo.date, result, nullptr, errorCode); + } + + UnicodeString formattableToString(const Locale& locale, + const Formattable& toFormat, + UErrorCode& status) { + EMPTY_ON_ERROR(status); - const Formattable& toFormat = input.asFormattable(); // Try as decimal number first if (toFormat.isNumeric()) { // Note: the ICU Formattable has to be created here since the StringPiece @@ -249,38 +229,42 @@ namespace message2 { return {}; } if (asDecimal != nullptr) { - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, asDecimal, status))); + return formatNumberWithDefaults(locale, asDecimal, status).toString(status); } } UFormattableType type = toFormat.getType(); + UnicodeString result; + switch (type) { case UFMT_DATE: { - UnicodeString result; const DateInfo* dateInfo = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, *dateInfo, result, status); - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + break; } case UFMT_DOUBLE: { double d = toFormat.getDouble(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status))); + result = formatNumberWithDefaults(locale, d, status).toString(status); + break; } case UFMT_LONG: { int32_t l = toFormat.getLong(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status))); + result = formatNumberWithDefaults(locale, l, status).toString(status); + break; } case UFMT_INT64: { int64_t i = toFormat.getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status))); + result = formatNumberWithDefaults(locale, i, status).toString(status); + break; } case UFMT_STRING: { - const UnicodeString& s = toFormat.getString(status); + result = toFormat.getString(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(UnicodeString(s))); + break; } default: { // No default formatters for other types; use fallback @@ -288,43 +272,11 @@ namespace message2 { // Note: it would be better to set an internal formatting error so that a string // (e.g. the type tag) can be provided. However, this method is called by the // public method formatToString() and thus can't take a MessageContext - return FormattedPlaceholder(input.getFallback()); - } - } - } - - // Called when string output is required; forces output to be produced - // if none is present (including formatting number output as a string) - UnicodeString FormattedPlaceholder::formatToString(const Locale& locale, - UErrorCode& status) const { - if (U_FAILURE(status)) { return {}; } - if (isFallback() || isNullOperand()) { - return fallbackToString(fallback); } - // Evaluated value: either just return the string, or format the number - // as a string and return it - if (isEvaluated()) { - if (formatted.isString()) { - return formatted.getString(); - } else { - return formatted.getNumber().toString(status); - } - } - // Unevaluated value: first evaluate it fully, then format - UErrorCode savedStatus = status; - FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status); - if (status == U_MF_FORMATTING_ERROR) { - U_ASSERT(evaluated.isFallback()); - return evaluated.getFallback(); - } - // Ignore U_USING_DEFAULT_WARNING - if (status == U_USING_DEFAULT_WARNING) { - status = savedStatus; - } - return evaluated.formatToString(locale, status); + return result; } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 6c555ecbc0e2..9ce00c621d00 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -92,6 +92,26 @@ namespace message2 { return *this; } + MessageFormatter::Builder& + MessageFormatter::Builder::setBidiIsolationStrategy( + MessageFormatter::UMFBidiIsolationStrategy strategy) { + bidiIsolationStrategy = strategy; + return *this; + } + + MessageFormatter::Builder& + MessageFormatter::Builder::setBidiContext(MessageFormatter::UMFBidiContext dir) { + msgdir = dir; + return *this; + } + + MessageFormatter::Builder& + MessageFormatter::Builder::setBidiIsolationStyle( + MessageFormatter::UMFBidiIsolationStyle style) { + bidiStyle = style; + return *this; + } + /* This build() method is non-destructive, which entails the risk that its borrowed MFFunctionRegistry and (if the setDataModel() method was called) @@ -117,6 +137,21 @@ namespace message2 { } } + + static UMFDirectionality + resolveDirectionality(MessageFormatter::UMFBidiContext setting, const Locale& locale) { + switch (setting) { + case MessageFormatter::U_MF_BIDI_CONTEXT_LTR: + return U_MF_DIRECTIONALITY_LTR; + case MessageFormatter::U_MF_BIDI_CONTEXT_RTL: + return U_MF_DIRECTIONALITY_RTL; + default: + return locale.isRightToLeft() + ? U_MF_DIRECTIONALITY_RTL + : U_MF_DIRECTIONALITY_LTR; + } + } + // MessageFormatter MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) { @@ -125,23 +160,32 @@ namespace message2 { // Set up the standard function registry MFFunctionRegistry::Builder standardFunctionsBuilder(success); - FormatterFactory* dateTime = StandardFunctions::DateTimeFactory::dateTime(success); - FormatterFactory* date = StandardFunctions::DateTimeFactory::date(success); - FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success); - FormatterFactory* number = new StandardFunctions::NumberFactory(); - FormatterFactory* integer = new StandardFunctions::IntegerFactory(); - standardFunctionsBuilder.adoptFormatter(FunctionName(functions::DATETIME), dateTime, success) - .adoptFormatter(FunctionName(functions::DATE), date, success) - .adoptFormatter(FunctionName(functions::TIME), time, success) - .adoptFormatter(FunctionName(functions::NUMBER), number, success) - .adoptFormatter(FunctionName(functions::INTEGER), integer, success) - .adoptFormatter(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestFormatFactory(), success) - .adoptFormatter(FunctionName(functions::TEST_FORMAT), new StandardFunctions::TestFormatFactory(), success) - .adoptSelector(FunctionName(functions::NUMBER), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success) - .adoptSelector(FunctionName(functions::INTEGER), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success) - .adoptSelector(FunctionName(functions::STRING), new StandardFunctions::TextFactory(), success) - .adoptSelector(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestSelectFactory(), success) - .adoptSelector(FunctionName(functions::TEST_SELECT), new StandardFunctions::TestSelectFactory(), success); + LocalPointer dateTime(StandardFunctions::DateTime::dateTime(success)); + LocalPointer date(StandardFunctions::DateTime::date(success)); + LocalPointer time(StandardFunctions::DateTime::time(success)); + LocalPointer number(StandardFunctions::Number::number(success)); + LocalPointer integer(StandardFunctions::Number::integer(success)); + LocalPointer string(StandardFunctions::String::string(success)); + LocalPointer testFunction(StandardFunctions::TestFunction::testFunction(success)); + LocalPointer testFormat(StandardFunctions::TestFunction::testFormat(success)); + LocalPointer testSelect(StandardFunctions::TestFunction::testSelect(success)); + CHECK_ERROR(success); + standardFunctionsBuilder.adoptFunction(FunctionName(functions::DATETIME), + dateTime.orphan(), success) + .adoptFunction(FunctionName(functions::DATE), date.orphan(), success) + .adoptFunction(FunctionName(functions::TIME), time.orphan(), success) + .adoptFunction(FunctionName(functions::NUMBER), + number.orphan(), success) + .adoptFunction(FunctionName(functions::INTEGER), + integer.orphan(), success) + .adoptFunction(FunctionName(functions::STRING), + string.orphan(), success) + .adoptFunction(FunctionName(functions::TEST_FUNCTION), + testFunction.orphan(), success) + .adoptFunction(FunctionName(functions::TEST_FORMAT), + testFormat.orphan(), success) + .adoptFunction(FunctionName(functions::TEST_SELECT), + testSelect.orphan(), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); CHECK_ERROR(success); @@ -149,6 +193,9 @@ namespace message2 { normalizedInput = builder.normalizedInput; signalErrors = builder.signalErrors; + bidiIsolationStrategy = builder.bidiIsolationStrategy; + bidiIsolationStyle = builder.bidiStyle; + msgdir = resolveDirectionality(builder.msgdir, locale); // Build data model // First, check that there is a data model @@ -196,6 +243,9 @@ namespace message2 { dataModel = std::move(other.dataModel); normalizedInput = std::move(other.normalizedInput); signalErrors = other.signalErrors; + bidiIsolationStrategy = other.bidiIsolationStrategy; + msgdir = other.msgdir; + bidiIsolationStyle = other.bidiIsolationStyle; errors = other.errors; other.errors = nullptr; return *this; @@ -221,138 +271,47 @@ namespace message2 { cleanup(); } - // Selector and formatter lookup - // ----------------------------- - - // Postcondition: selector != nullptr || U_FAILURE(status) - Selector* MessageFormatter::getSelector(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - U_ASSERT(isSelector(functionName)); - - const SelectorFactory* selectorFactory = lookupSelectorFactory(context, functionName, status); - NULL_ON_ERROR(status); - if (selectorFactory == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - // Create a specific instance of the selector - auto result = selectorFactory->createSelector(getLocale(), status); - NULL_ON_ERROR(status); - return result; - } - - // Returns an owned pointer - Formatter* MessageFormatter::getFormatter(const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - - // Create the formatter - - // First, look up the formatter factory for this function - FormatterFactory* formatterFactory = lookupFormatterFactory(functionName, status); - NULL_ON_ERROR(status); - - U_ASSERT(formatterFactory != nullptr); - - // Create a specific instance of the formatter - Formatter* formatter = formatterFactory->createFormatter(locale, status); - NULL_ON_ERROR(status); - if (formatter == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return formatter; - } - - bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, - FunctionName& name) const { - if (!hasCustomMFFunctionRegistry()) { - return false; - } - const MFFunctionRegistry& reg = getCustomMFFunctionRegistry(); - return reg.getDefaultFormatterNameByType(type, name); - } - // --------------------------------------------------- // Function registry - bool MessageFormatter::isBuiltInSelector(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasSelector(functionName); + bool MessageFormatter::isBuiltInFunction(const FunctionName& functionName) const { + return standardMFFunctionRegistry.hasFunction(functionName); } - bool MessageFormatter::isBuiltInFormatter(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasFormatter(functionName); - } - - // https://github.com/unicode-org/message-format-wg/issues/409 - // Unknown function = unknown function error - // Formatter used as selector = selector error - // Selector used as formatter = formatting error - const SelectorFactory* MessageFormatter::lookupSelectorFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - DynamicErrors& err = context.getErrors(); + Function* + MessageFormatter::lookupFunction(const FunctionName& functionName, + UErrorCode& status) const { + NULL_ON_ERROR(status); - if (isBuiltInSelector(functionName)) { - return standardMFFunctionRegistry.getSelector(functionName); - } - if (isBuiltInFormatter(functionName)) { - err.setSelectorError(functionName, status); - return nullptr; + if (isBuiltInFunction(functionName)) { + return standardMFFunctionRegistry.getFunction(functionName); } if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - const SelectorFactory* selectorFactory = customMFFunctionRegistry.getSelector(functionName); - if (selectorFactory != nullptr) { - return selectorFactory; - } - if (customMFFunctionRegistry.getFormatter(functionName) != nullptr) { - err.setSelectorError(functionName, status); - return nullptr; + Function* function = customMFFunctionRegistry.getFunction(functionName); + if (function != nullptr) { + return function; } } // Either there is no custom function registry and the function // isn't built-in, or the function doesn't exist in either the built-in // or custom registry. // Unknown function error - err.setUnknownFunction(functionName, status); + status = U_MF_UNKNOWN_FUNCTION_ERROR; return nullptr; } - FormatterFactory* MessageFormatter::lookupFormatterFactory(const FunctionName& functionName, - UErrorCode& status) const { - NULL_ON_ERROR(status); - - if (isBuiltInFormatter(functionName)) { - return standardMFFunctionRegistry.getFormatter(functionName); - } - if (isBuiltInSelector(functionName)) { - status = U_MF_FORMATTING_ERROR; - return nullptr; - } + bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& tag, + FunctionName& result) const { if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - FormatterFactory* formatterFactory = customMFFunctionRegistry.getFormatter(functionName); - if (formatterFactory != nullptr) { - return formatterFactory; - } - if (customMFFunctionRegistry.getSelector(functionName) != nullptr) { - status = U_MF_FORMATTING_ERROR; - return nullptr; - } + return customMFFunctionRegistry.getDefaultFormatterNameByType(tag, result); } - // Either there is no custom function registry and the function - // isn't built-in, or the function doesn't exist in either the built-in - // or custom registry. - // Unknown function error - status = U_MF_UNKNOWN_FUNCTION_ERROR; - return nullptr; + return false; } - bool MessageFormatter::isCustomFormatter(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFormatter(fn) != nullptr; - } - - - bool MessageFormatter::isCustomSelector(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr; + bool MessageFormatter::isCustomFunction(const FunctionName& fn) const { + return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFunction(fn) != nullptr; } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 3e924ce97804..be97a68785cd 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -22,6 +22,7 @@ #include "charstr.h" #include "double-conversion.h" #include "messageformat2_allocation.h" +#include "messageformat2_evaluation.h" #include "messageformat2_function_registry_internal.h" #include "messageformat2_macros.h" #include "hash.h" @@ -46,83 +47,74 @@ namespace message2 { // Function registry implementation -Formatter::~Formatter() {} -Selector::~Selector() {} -FormatterFactory::~FormatterFactory() {} -SelectorFactory::~SelectorFactory() {} +Function::~Function() {} +FunctionValue::~FunctionValue() {} MFFunctionRegistry MFFunctionRegistry::Builder::build() { - U_ASSERT(formatters != nullptr && selectors != nullptr && formattersByType != nullptr); - MFFunctionRegistry result = MFFunctionRegistry(formatters, selectors, formattersByType); - formatters = nullptr; - selectors = nullptr; + U_ASSERT(functions != nullptr); + U_ASSERT(formattersByType != nullptr); + MFFunctionRegistry result = MFFunctionRegistry(functions, formattersByType); + functions = nullptr; formattersByType = nullptr; return result; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) { +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::adoptFunction(const FunctionName& functionName, + Function* function, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { - U_ASSERT(selectors != nullptr); - selectors->put(selectorName, selectorFactory, errorCode); + U_ASSERT(functions != nullptr); + functions->put(functionName, function, errorCode); } return *this; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - U_ASSERT(formatters != nullptr); - formatters->put(formatterName, formatterFactory, errorCode); - } - return *this; -} - -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, const FunctionName& functionName, UErrorCode& errorCode) { +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, + const FunctionName& functionName, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { U_ASSERT(formattersByType != nullptr); FunctionName* f = create(FunctionName(functionName), errorCode); formattersByType->put(type, f, errorCode); - } - return *this; -} + } + return *this; + } MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) { CHECK_ERROR(errorCode); - formatters = new Hashtable(); - selectors = new Hashtable(); + functions = new Hashtable(); formattersByType = new Hashtable(); - if (!(formatters != nullptr && selectors != nullptr && formattersByType != nullptr)) { + if (functions == nullptr || formattersByType == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - } else { - formatters->setValueDeleter(uprv_deleteUObject); - selectors->setValueDeleter(uprv_deleteUObject); - formattersByType->setValueDeleter(uprv_deleteUObject); } + + functions->setValueDeleter(uprv_deleteUObject); + formattersByType->setValueDeleter(uprv_deleteUObject); } MFFunctionRegistry::Builder::~Builder() { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; + if (functions != nullptr) { + delete functions; + functions = nullptr; } if (formattersByType != nullptr) { delete formattersByType; + formattersByType = nullptr; } } // Returns non-owned pointer. Returns pointer rather than reference because it can fail. -// Returns non-const because FormatterFactory is mutable. -// TODO: This is unsafe because of the cached-formatters map -// (the caller could delete the resulting pointer) -FormatterFactory* MFFunctionRegistry::getFormatter(const FunctionName& formatterName) const { - U_ASSERT(formatters != nullptr); - return static_cast(formatters->get(formatterName)); +// Returns non-const because Function is mutable. +Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { + U_ASSERT(functions != nullptr); + return static_cast(functions->get(functionName)); } UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(formatters != nullptr); + U_ASSERT(formattersByType != nullptr); const FunctionName* f = static_cast(formattersByType->get(type)); if (f != nullptr) { name = *f; @@ -131,52 +123,32 @@ UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& typ return false; } -const SelectorFactory* MFFunctionRegistry::getSelector(const FunctionName& selectorName) const { - U_ASSERT(selectors != nullptr); - return static_cast(selectors->get(selectorName)); -} - -bool MFFunctionRegistry::hasFormatter(const FunctionName& f) const { - return getFormatter(f) != nullptr; -} - -bool MFFunctionRegistry::hasSelector(const FunctionName& s) const { - return getSelector(s) != nullptr; +bool MFFunctionRegistry::hasFunction(const FunctionName& f) const { + return getFunction(f) != nullptr; } -void MFFunctionRegistry::checkFormatter(const char* s) const { +void MFFunctionRegistry::checkFunction(const char* s) const { #if U_DEBUG - U_ASSERT(hasFormatter(FunctionName(UnicodeString(s)))); + U_ASSERT(hasFunction(FunctionName(UnicodeString(s)))); #else (void) s; #endif } -void MFFunctionRegistry::checkSelector(const char* s) const { -#if U_DEBUG - U_ASSERT(hasSelector(FunctionName(UnicodeString(s)))); -#else - (void) s; -#endif -} - // Debugging void MFFunctionRegistry::checkStandard() const { - checkFormatter("datetime"); - checkFormatter("date"); - checkFormatter("time"); - checkFormatter("number"); - checkFormatter("integer"); - checkFormatter("test:function"); - checkFormatter("test:format"); - checkSelector("number"); - checkSelector("integer"); - checkSelector("string"); - checkSelector("test:function"); - checkSelector("test:select"); -} - -// Formatter/selector helpers + checkFunction("datetime"); + checkFunction("date"); + checkFunction("time"); + checkFunction("number"); + checkFunction("integer"); + checkFunction("string"); + checkFunction("test:function"); + checkFunction("test:format"); + checkFunction("test:select"); +} + +// Function/selector helpers // Returns the NFC-normalized version of s, returning s itself // if it's already normalized. @@ -250,33 +222,32 @@ static int64_t getInt64Value(const Locale& locale, const Formattable& value, UEr return 0; } -// Adopts its arguments -MFFunctionRegistry::MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType) : formatters(f), selectors(s), formattersByType(byType) { - U_ASSERT(f != nullptr && s != nullptr && byType != nullptr); +// Adopts its argument +MFFunctionRegistry::MFFunctionRegistry(FunctionMap* f, Hashtable* byType) + : functions(f), formattersByType(byType) { + U_ASSERT(f != nullptr); + U_ASSERT(byType != nullptr); } MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept { cleanup(); - formatters = other.formatters; - selectors = other.selectors; + functions = other.functions; + other.functions = nullptr; formattersByType = other.formattersByType; - other.formatters = nullptr; - other.selectors = nullptr; other.formattersByType = nullptr; return *this; } void MFFunctionRegistry::cleanup() noexcept { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; + if (functions != nullptr) { + delete functions; + functions = nullptr; } if (formattersByType != nullptr) { delete formattersByType; + formattersByType = nullptr; } } @@ -285,7 +256,24 @@ MFFunctionRegistry::~MFFunctionRegistry() { cleanup(); } -// Specific formatter implementations +// Specific function implementations + +UMFDirectionality +outputDirectionalityFromUDir(UMFBidiOption uDir, const Locale& locale) { + switch (uDir) { + case U_MF_BIDI_OPTION_LTR: + return U_MF_DIRECTIONALITY_LTR; + case U_MF_BIDI_OPTION_RTL: + return U_MF_DIRECTIONALITY_RTL; + case U_MF_BIDI_OPTION_AUTO: + case U_MF_BIDI_OPTION_INHERIT: + if (locale.isRightToLeft()) { + return U_MF_DIRECTIONALITY_RTL; + } + return U_MF_DIRECTIONALITY_LTR; + } + return U_MF_DIRECTIONALITY_LTR; +} // --------- Number @@ -406,17 +394,54 @@ bool isDigitSizeOption(const UnicodeString& s) { /* static */ void StandardFunctions::validateDigitSizeOptions(const FunctionOptions& opts, UErrorCode& status) { CHECK_ERROR(status); - for (int32_t i = 0; i < opts.optionsCount(); i++) { const ResolvedFunctionOption& opt = opts.options[i]; - if (isDigitSizeOption(opt.getName()) && !isInteger(opt.getValue())) { + if (isDigitSizeOption(opt.getName()) && !isInteger(opt.getValue().unwrap())) { status = U_MF_BAD_OPTION; return; } } } +/* static */ StandardFunctions::Number* +StandardFunctions::Number::integer(UErrorCode& success) { + return create(true, success); +} + +/* static */ StandardFunctions::Number* +StandardFunctions::Number::number(UErrorCode& success) { + return create(false, success); +} + +/* static */ StandardFunctions::Number* +StandardFunctions::Number::create(bool isInteger, UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new Number(isInteger)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +LocalPointer StandardFunctions::Number::call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + val(new NumberValue(*this, context, operand, options, errorCode)); + if (!val.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return val; +} + /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status) { number::UnlocalizedNumberFormatter nf; @@ -511,7 +536,9 @@ bool isDigitSizeOption(const UnicodeString& s) { // All other options apply to both `:number` and `:integer` int32_t minIntegerDigits = number.minimumIntegerDigits(opts); - nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + if (minIntegerDigits != -1) { + nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + } // signDisplay UnicodeString sd = opts.getStringFunctionOption(options::SIGN_DISPLAY); @@ -562,44 +589,10 @@ bool isDigitSizeOption(const UnicodeString& s) { } } } - return nf.locale(number.locale); -} - -Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(Number::integer(locale)); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; + return nf.locale(locale); } -StandardFunctions::IntegerFactory::~IntegerFactory() {} - -static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { - return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); -} - -static double parseNumberLiteral(const Formattable& input, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return {}; - } - - // Copying string to avoid GCC dangling-reference warning - // (although the reference is safe) - UnicodeString inputStr = input.getString(errorCode); - // Precondition: `input`'s source Formattable has type string +static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } @@ -659,19 +652,27 @@ static UChar32 digitToChar(int32_t val, UErrorCode errorCode) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; return '0'; } + return '0'; } -int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (isInteger) { - return 0; - } - - if (opts.getFunctionOption(options::MAXIMUM_FRACTION_DIGITS, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { +int32_t StandardFunctions::Number::digitSizeOption(const FunctionOptions& opts, + const UnicodeString& k) const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* opt = opts.getFunctionOption(k, + localStatus); + if (U_SUCCESS(localStatus)) { + // First try the formatted value + UnicodeString formatted = opt->formatToString(localStatus); + int64_t val = 0; + if (U_SUCCESS(localStatus)) { + val = getInt64Value(Locale("en-US"), Formattable(formatted), localStatus); + } + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + } + // Next try the operand + val = getInt64Value(Locale("en-US"), opt->unwrap(), localStatus); + if (U_SUCCESS(localStatus)) { return static_cast(val); } } @@ -681,222 +682,161 @@ int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& return -1; } +int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { + if (isInteger) { + return 0; + } + + return digitSizeOption(opts, UnicodeString("maximumFractionDigits")); +} + int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const { Formattable opt; - if (!isInteger) { - if (opts.getFunctionOption(options::MINIMUM_FRACTION_DIGITS, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minFraction() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumFractionDigits")); } int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(options::MINIMUM_INTEGER_DIGITS, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - return 1; + return digitSizeOption(opts, UnicodeString("minimumIntegerDigits")); } int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (!isInteger) { - if (opts.getFunctionOption(options::MINIMUM_SIGNIFICANT_DIGITS, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minSignificantDigits() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumSignificantDigits")); } int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(options::MAXIMUM_SIGNIFICANT_DIGITS, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::maxSignificantDigits() - // is an error. - return -1; // Not a valid value for Precision; has to be checked + return digitSizeOption(opts, UnicodeString("maximumSignificantDigits")); } bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { - Formattable opt; - if (isInteger - || !opts.getFunctionOption(options::STYLE, opt) - || opt.getType() != UFMT_STRING) { + const UnicodeString& style = opts.getStringFunctionOption(UnicodeString("style")); + if (isInteger || style.length() == 0) { return false; } - UErrorCode localErrorCode = U_ZERO_ERROR; - const UnicodeString& style = opt.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); - return (style == options::PERCENT_STRING); + return (style == UnicodeString("percent")); } -/* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) { - return StandardFunctions::Number(loc, true); -} - -FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& arg, FunctionOptions&& opts, UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } - - // No argument => return "NaN" - if (!arg.canFormat()) { +StandardFunctions::NumberValue::NumberValue(const Number& parent, + const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + // Must have an argument + if (arg.isNullOperand()) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + return; } + locale = context.getLocale(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + innerValue = arg.unwrap(); + functionName = UnicodeString(parent.isInteger ? "integer" : "number"); + inputDir = context.getDirection(); + dir = outputDirectionalityFromUDir(inputDir, locale); + number::LocalizedNumberFormatter realFormatter; - realFormatter = formatterForOptions(*this, opts, errorCode); + realFormatter = formatterForOptions(parent, locale, opts, errorCode); - number::FormattedNumber numberResult; int64_t integerValue = 0; if (U_SUCCESS(errorCode)) { - // Already checked that contents can be formatted - const Formattable& toFormat = arg.asFormattable(); - switch (toFormat.getType()) { + switch (innerValue.getType()) { case UFMT_DOUBLE: { - double d = toFormat.getDouble(errorCode); + double d = innerValue.getDouble(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatDouble(d, errorCode); + formattedNumber = realFormatter.formatDouble(d, errorCode); integerValue = static_cast(std::round(d)); break; } case UFMT_LONG: { - int32_t l = toFormat.getLong(errorCode); + int32_t l = innerValue.getLong(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(l, errorCode); + formattedNumber = realFormatter.formatInt(l, errorCode); integerValue = l; break; } case UFMT_INT64: { - int64_t i = toFormat.getInt64(errorCode); + int64_t i = innerValue.getInt64(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(i, errorCode); + formattedNumber = realFormatter.formatInt(i, errorCode); integerValue = i; break; } case UFMT_STRING: { // Try to parse the string as a number - double d = parseNumberLiteral(toFormat, errorCode); + const UnicodeString& s = innerValue.getString(errorCode); + U_ASSERT(U_SUCCESS(errorCode)); + double d = parseNumberLiteral(s, errorCode); if (U_FAILURE(errorCode)) - return {}; - numberResult = realFormatter.formatDouble(d, errorCode); + return; + formattedNumber = realFormatter.formatDouble(d, errorCode); integerValue = static_cast(std::round(d)); break; } default: { // Other types can't be parsed as a number errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + break; } } } - // Need to return the integer value if invoked as :integer - if (isInteger) { - return FormattedPlaceholder(FormattedPlaceholder(Formattable(integerValue), arg.getFallback()), - std::move(opts), - FormattedValue(std::move(numberResult))); + // Ignore U_USING_DEFAULT_WARNING + if (errorCode == U_USING_DEFAULT_WARNING) { + errorCode = U_ZERO_ERROR; + } + + // Need to set the integer value if invoked as :integer + if (parent.isInteger) { + innerValue = Formattable(integerValue); } - return FormattedPlaceholder(arg, std::move(opts), FormattedValue(std::move(numberResult))); } -StandardFunctions::Number::~Number() {} -StandardFunctions::NumberFactory::~NumberFactory() {} +UnicodeString StandardFunctions::NumberValue::formatToString(UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return {}; + } -// --------- PluralFactory + return formattedNumber.toString(errorCode); +} -StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { - Formattable opt; +StandardFunctions::Number::~Number() {} +StandardFunctions::NumberValue::~NumberValue() {} - if (opts.getFunctionOption(options::SELECT, opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = opt.getString(localErrorCode); - if (U_SUCCESS(localErrorCode)) { - if (val == options::ORDINAL) { - return PluralType::PLURAL_ORDINAL; - } - if (val == options::EXACT) { - return PluralType::PLURAL_EXACT; - } +/* static */ StandardFunctions::Number::PluralType +StandardFunctions::Number::pluralType(const FunctionOptions& opts) { + UnicodeString val = opts.getStringFunctionOption(options::SELECT); + if (!val.isEmpty()) { + if (val == options::ORDINAL) { + return PluralType::PLURAL_ORDINAL; + } + if (val == options::EXACT) { + return PluralType::PLURAL_EXACT; } } return PluralType::PLURAL_CARDINAL; } -Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - - Selector* result; - if (isInteger) { - result = new Plural(Plural::integer(locale, errorCode)); - } else { - result = new Plural(locale, errorCode); - } - NULL_ON_ERROR(errorCode); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) const { +void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); - // No argument => return "NaN" - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } - - // Handle any formatting options - PluralType type = pluralType(opts); - FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat), - std::move(opts), - errorCode); - CHECK_ERROR(errorCode); + Number::PluralType type = Number::pluralType(opts); - U_ASSERT(resolvedSelector.isEvaluated() && resolvedSelector.output().isNumber()); + // (resolvedSelector is `this`) // See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection // 1. Let exact be the JSON string representation of the numeric value of resolvedSelector - const number::FormattedNumber& formattedNumber = resolvedSelector.output().getNumber(); UnicodeString exact = formattedNumber.toString(errorCode); if (U_FAILURE(errorCode)) { @@ -908,8 +848,8 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // Step 2. Let keyword be a string which is the result of rule selection on resolvedSelector. // If the option select is set to exact, rule-based selection is not used. Return the empty string. UnicodeString keyword; - if (type != PluralType::PLURAL_EXACT) { - UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; + if (type != Number::PluralType::PLURAL_EXACT) { + UPluralType t = type == Number::PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; // Look up plural rules by locale and type LocalPointer rules(PluralRules::forLocale(locale, t, errorCode)); CHECK_ERROR(errorCode); @@ -936,7 +876,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // 5i(a). If key and exact consist of the same sequence of Unicode code points, then if (exact == keys[i]) { // 5i(a)(a) Append key as the last element of the list resultExact. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; break; } @@ -944,7 +884,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, } // Return immediately if exact matching was requested - if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) { + if (prefsLen == keysLen || type == Number::PluralType::PLURAL_EXACT) { return; } @@ -957,7 +897,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // 5ii(a). If key and keyword consist of the same sequence of Unicode code points, then if (keyword == keys[i]) { // 5ii(a)(a) Append key as the last element of the list resultKeyword. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; } } @@ -969,116 +909,58 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // (Implicit, since `prefs` is an out-parameter) } -StandardFunctions::Plural::Plural(const Locale& loc, UErrorCode& status) : locale(loc) { - CHECK_ERROR(status); +// --------- DateTime - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::date(UErrorCode& success) { + return DateTime::create(DateTimeType::kDate, success); } -StandardFunctions::Plural::Plural(const Locale& loc, bool isInt, UErrorCode& status) : locale(loc), isInteger(isInt) { - CHECK_ERROR(status); - - if (isInteger) { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc, true)); - } else { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - } - - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::time(UErrorCode& success) { + return DateTime::create(DateTimeType::kTime, success); } -StandardFunctions::Plural::~Plural() {} - -StandardFunctions::PluralFactory::~PluralFactory() {} - -// --------- DateTimeFactory - -/* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts, - std::u16string_view optionName, - UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - Formattable opt; - if (opts.getFunctionOption(optionName, opt)) { - return opt.getString(errorCode); // In case it's not a string, error code will be set - } else { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - } - // Default is empty string - return {}; +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::dateTime(UErrorCode& success) { + return DateTime::create(DateTimeType::kDateTime, success); } -// Date/time options only -static UnicodeString defaultForOption(std::u16string_view optionName) { - if (optionName == options::DATE_STYLE - || optionName == options::TIME_STYLE - || optionName == options::STYLE) { - return UnicodeString(options::SHORT); - } - return {}; // Empty string is default -} +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::create(DateTime::DateTimeType type, + UErrorCode& success) { + NULL_ON_ERROR(success); -// TODO -// Only DateTime currently uses the function options stored in the placeholder. -// It also doesn't use them very consistently (it looks at the previous set of options, -// and others aren't preserved). This needs to be generalized, -// but that depends on https://github.com/unicode-org/message-format-wg/issues/515 -// Finally, the option value is assumed to be a string, -// which works for datetime options but not necessarily in general. -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - std::u16string_view optionName) const { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; + LocalPointer result(new DateTime(type)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } - // Finally, use default - return defaultForOption(optionName); + return result.orphan(); } -// Used for options that don't have defaults -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - std::u16string_view optionName, - UErrorCode& errorCode) const { - if (U_SUCCESS(errorCode)) { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; +LocalPointer +StandardFunctions::DateTime::call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new DateTimeValue(type, context, val, opts, errorCode)); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } - return {}; + return result; } static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { UnicodeString upper = option.toUpper(); + if (upper.isEmpty()) { + return DateFormat::EStyle::kShort; + } if (upper == options::FULL_UPPER) { return DateFormat::EStyle::kFull; } @@ -1099,45 +981,6 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC return DateFormat::EStyle::kNone; } -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::dateTime(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new StandardFunctions::DateTimeFactory(DateTimeType::DateTime); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::date(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Date); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::time(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Time); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new StandardFunctions::DateTime(locale, type); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} // DateFormat parsers that are shared across threads static DateFormat* dateParser = nullptr; @@ -1191,173 +1034,52 @@ static void initDateParsersOnce(UErrorCode& errorCode) { ucln_i18n_registerCleanup(UCLN_I18N_MF2_DATE_PARSERS, mf2_date_parsers_cleanup); } -// Lazily initialize DateFormat objects used for parsing date literals static void initDateParsers(UErrorCode& errorCode) { CHECK_ERROR(errorCode); umtx_initOnce(gMF2DateParsersInitOnce, &initDateParsersOnce, errorCode); } -// From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : -// "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or -// an ISO 8601 datetime optionally followed by a timezone offset." -UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return 0; - } - // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case - // where a timezone offset follows) - if (sourceStr.length() > 10) { - return dateTimeParser->parse(sourceStr, errorCode); - } - // Handle ISO 8601 date - return dateParser->parse(sourceStr, errorCode); -} +UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& status) const { + (void) status; -// See comment on tryPatterns() for spec reference -UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sourceStr, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return 0; - } - int32_t len = sourceStr.length(); - if (len > 0 && sourceStr[len] == 'Z') { - return dateTimeUTCParser->parse(sourceStr, errorCode); - } - return dateTimeZoneParser->parse(sourceStr, errorCode); + return formattedDate; } -static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +extern TimeZone* createTimeZone(const DateInfo&, UErrorCode&); - TimeZone* tz; - if (dateInfo.zoneId.isEmpty()) { - // Floating time value -- use default time zone - tz = TimeZone::createDefault(); - } else { - tz = TimeZone::createTimeZone(dateInfo.zoneId); +StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, + const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + using DateTimeType = DateTime::DateTimeType; + + // Function requires an operand + if (arg.isNullOperand()) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - if (tz == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + + locale = context.getLocale(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + innerValue = arg.unwrap(); + switch (type) { + case DateTimeType::kDate: + functionName = functions::DATE; + break; + case DateTimeType::kTime: + functionName = functions::TIME; + break; + case DateTimeType::kDateTime: + functionName = functions::DATETIME; + break; } - return tz; -} + inputDir = context.getDirection(); + dir = outputDirectionalityFromUDir(inputDir, locale); -// Returns true iff `sourceStr` ends in an offset like +03:30 or -06:00 -// (This function is just used to determine whether to call tryPatterns() -// or tryTimeZonePatterns(); tryTimeZonePatterns() checks fully that the -// string matches the expected format) -static bool hasTzOffset(const UnicodeString& sourceStr) { - int32_t len = sourceStr.length(); - - if (len <= 6) { - return false; - } - return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN) - && sourceStr[len - 3] == COLON); -} - -// Note: `calendar` option to :datetime not implemented yet; -// Gregorian calendar is assumed -DateInfo StandardFunctions::DateTime::createDateInfoFromString(const UnicodeString& sourceStr, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } - - UDate absoluteDate; - - // Check if the string has a time zone part - int32_t indexOfZ = sourceStr.indexOf('Z'); - int32_t indexOfPlus = sourceStr.lastIndexOf('+'); - int32_t indexOfMinus = sourceStr.lastIndexOf('-'); - int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus; - bool isTzOffset = hasTzOffset(sourceStr); - bool isGMT = indexOfZ > 0; - UnicodeString offsetPart; - bool hasTimeZone = isTzOffset || isGMT; - - if (!hasTimeZone) { - // No time zone; parse the date and time - absoluteDate = tryPatterns(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } - } else { - // Try to split into time zone and non-time-zone parts - UnicodeString dateTimePart; - if (isGMT) { - dateTimePart = sourceStr.tempSubString(0, indexOfZ); - } else { - dateTimePart = sourceStr.tempSubString(0, indexOfSign); - } - // Parse the date from the date/time part - tryPatterns(dateTimePart, errorCode); - // Failure -- can't parse this string - if (U_FAILURE(errorCode)) { - return {}; - } - // Success -- now parse the time zone part - if (isGMT) { - dateTimePart += UnicodeString("GMT"); - absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } - } else { - // Try to parse time zone in offset format: [+-]nn:nn - absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } - offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); - } - } - - // If the time zone was provided, get its canonical ID, - // in order to return it in the DateInfo - UnicodeString canonicalID; - if (hasTimeZone) { - UnicodeString tzID("GMT"); - if (!isGMT) { - tzID += offsetPart; - } - TimeZone::getCanonicalID(tzID, canonicalID, errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } - } - - return { absoluteDate, canonicalID }; -} - -void formatDateWithDefaults(const Locale& locale, - const DateInfo& dateInfo, - UnicodeString& result, - UErrorCode& errorCode) { - CHECK_ERROR(errorCode); - - LocalPointer df(defaultDateTimeInstance(locale, errorCode)); - CHECK_ERROR(errorCode); - - df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); - CHECK_ERROR(errorCode); - df->format(dateInfo.date, result, nullptr, errorCode); -} - -FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } - - // Argument must be present - if (!toFormat.canFormat()) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return std::move(toFormat); - } + const Formattable* source = &innerValue; LocalPointer df; Formattable opt; @@ -1369,30 +1091,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& UnicodeString timeStyleName("timeStyle"); UnicodeString styleName("style"); - bool hasDateStyleOption = opts.getFunctionOption(dateStyleName, opt); - bool hasTimeStyleOption = opts.getFunctionOption(timeStyleName, opt); + UnicodeString dateStyleOption = opts.getStringFunctionOption(dateStyleName); + UnicodeString timeStyleOption = opts.getStringFunctionOption(timeStyleName); + bool hasDateStyleOption = dateStyleOption.length() > 0; + bool hasTimeStyleOption = timeStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; - bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime + bool useStyle = (type == DateTimeType::kDateTime && (hasDateStyleOption || hasTimeStyleOption || noOptions)) - || (type != DateTimeFactory::DateTimeType::DateTime); + || (type != DateTimeType::kDateTime); - bool useDate = type == DateTimeFactory::DateTimeType::Date - || (type == DateTimeFactory::DateTimeType::DateTime + bool useDate = type == DateTimeType::kDate + || (type == DateTimeType::kDateTime && hasDateStyleOption); - bool useTime = type == DateTimeFactory::DateTimeType::Time - || (type == DateTimeFactory::DateTimeType::DateTime + bool useTime = type == DateTimeType::kTime + || (type == DateTimeType::kDateTime && hasTimeStyleOption); if (useStyle) { // Extract style options - if (type == DateTimeFactory::DateTimeType::DateTime) { + if (type == DateTimeType::kDateTime) { // Note that the options-getting has to be repeated across the three cases, // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time` // use "style" - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, dateStyleName), errorCode); - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, timeStyleName), errorCode); + dateStyle = stringToStyle(opts.getStringFunctionOption(dateStyleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(timeStyleName), errorCode); if (useDate && !useTime) { df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); @@ -1401,12 +1125,12 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } else { df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); } - } else if (type == DateTimeFactory::DateTimeType::Date) { - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + } else if (type == DateTimeType::kDate) { + dateStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); } else { // :time - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale)); } } else { @@ -1417,7 +1141,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& #define ADD_PATTERN(s) skeleton += UnicodeString(s) if (U_SUCCESS(errorCode)) { // Year - UnicodeString year = getFunctionOption(toFormat, opts, options::YEAR, errorCode); + UnicodeString year = opts.getStringFunctionOption(options::YEAR, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1429,7 +1153,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Month - UnicodeString month = getFunctionOption(toFormat, opts, options::MONTH, errorCode); + UnicodeString month = opts.getStringFunctionOption(options::MONTH, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1448,7 +1172,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Weekday - UnicodeString weekday = getFunctionOption(toFormat, opts, options::WEEKDAY, errorCode); + UnicodeString weekday = opts.getStringFunctionOption(options::WEEKDAY, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1462,7 +1186,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Day - UnicodeString day = getFunctionOption(toFormat, opts, options::DAY, errorCode); + UnicodeString day = opts.getStringFunctionOption(options::DAY, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1474,7 +1198,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Hour - UnicodeString hour = getFunctionOption(toFormat, opts, options::HOUR, errorCode); + UnicodeString hour = opts.getStringFunctionOption(options::HOUR, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1486,7 +1210,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Minute - UnicodeString minute = getFunctionOption(toFormat, opts, options::MINUTE, errorCode); + UnicodeString minute = opts.getStringFunctionOption(options::MINUTE, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1498,7 +1222,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Second - UnicodeString second = getFunctionOption(toFormat, opts, options::SECOND, errorCode); + UnicodeString second = opts.getStringFunctionOption(options::SECOND, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1521,43 +1245,35 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } if (U_FAILURE(errorCode)) { - return {}; + return; } if (!df.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } UnicodeString result; - const Formattable& source = toFormat.asFormattable(); - switch (source.getType()) { + switch (source->getType()) { case UFMT_STRING: { // Lazily initialize date parsers used for parsing date literals initDateParsers(errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } + CHECK_ERROR(errorCode); - const UnicodeString& sourceStr = source.getString(errorCode); + const UnicodeString& sourceStr = source->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - DateInfo dateInfo = createDateInfoFromString(sourceStr, errorCode); + DateInfo dateInfo = StandardFunctions::DateTime::createDateInfoFromString(sourceStr, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; + return; } df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); - - // Use the parsed date as the source value - // in the returned FormattedPlaceholder; this is necessary - // so the date can be re-formatted df->format(dateInfo.date, result, 0, errorCode); - toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)), - toFormat.getFallback()); + innerValue = message2::Formattable(std::move(dateInfo)); break; } case UFMT_DATE: { - const DateInfo* dateInfo = source.getDate(errorCode); + const DateInfo* dateInfo = source->getDate(errorCode); if (U_SUCCESS(errorCode)) { // If U_SUCCESS(errorCode), then source.getDate() returned // a non-null pointer @@ -1577,86 +1293,256 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& break; } } + if (U_FAILURE(errorCode)) { + return; + } + // Ignore U_USING_DEFAULT_WARNING + if (errorCode == U_USING_DEFAULT_WARNING) { + errorCode = U_ZERO_ERROR; + } + formattedDate = result; +} + +// From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : +// "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or +// an ISO 8601 datetime optionally followed by a timezone offset." +UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case + // where a timezone offset follows) + if (sourceStr.length() > 10) { + return dateTimeParser->parse(sourceStr, errorCode); + } + // Handle ISO 8601 date + return dateParser->parse(sourceStr, errorCode); +} + +// See comment on tryPatterns() for spec reference +UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sourceStr, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + int32_t len = sourceStr.length(); + if (len > 0 && sourceStr[len] == 'Z') { + return dateTimeUTCParser->parse(sourceStr, errorCode); + } + return dateTimeZoneParser->parse(sourceStr, errorCode); +} + +// Returns true iff `sourceStr` ends in an offset like +03:30 or -06:00 +// (This function is just used to determine whether to call tryPatterns() +// or tryTimeZonePatterns(); tryTimeZonePatterns() checks fully that the +// string matches the expected format) +static bool hasTzOffset(const UnicodeString& sourceStr) { + int32_t len = sourceStr.length(); + + if (len <= 6) { + return false; + } + return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN) + && sourceStr[len - 3] == COLON); +} + +// Note: `calendar` option to :datetime not implemented yet; +// Gregorian calendar is assumed +DateInfo StandardFunctions::DateTime::createDateInfoFromString(const UnicodeString& sourceStr, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } - return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); + + UDate absoluteDate; + + // Check if the string has a time zone part + int32_t indexOfZ = sourceStr.indexOf('Z'); + int32_t indexOfPlus = sourceStr.lastIndexOf('+'); + int32_t indexOfMinus = sourceStr.lastIndexOf('-'); + int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus; + bool isTzOffset = hasTzOffset(sourceStr); + bool isGMT = indexOfZ > 0; + UnicodeString offsetPart; + bool hasTimeZone = isTzOffset || isGMT; + + if (!hasTimeZone) { + // No time zone; parse the date and time + absoluteDate = tryPatterns(sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + } else { + // Try to split into time zone and non-time-zone parts + UnicodeString dateTimePart; + if (isGMT) { + dateTimePart = sourceStr.tempSubString(0, indexOfZ); + } else { + dateTimePart = sourceStr.tempSubString(0, indexOfSign); + } + // Parse the date from the date/time part + tryPatterns(dateTimePart, errorCode); + // Failure -- can't parse this string + if (U_FAILURE(errorCode)) { + return {}; + } + // Success -- now parse the time zone part + if (isGMT) { + dateTimePart += UnicodeString("GMT"); + absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + } else { + // Try to parse time zone in offset format: [+-]nn:nn + absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); + } + } + + // If the time zone was provided, get its canonical ID, + // in order to return it in the DateInfo + UnicodeString canonicalID; + if (hasTimeZone) { + UnicodeString tzID("GMT"); + if (!isGMT) { + tzID += offsetPart; + } + TimeZone::getCanonicalID(tzID, canonicalID, errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + } + + return { absoluteDate, canonicalID }; } -StandardFunctions::DateTimeFactory::~DateTimeFactory() {} StandardFunctions::DateTime::~DateTime() {} +StandardFunctions::DateTimeValue::~DateTimeValue() {} -// --------- TextFactory +// --------- String -Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - Selector* result = new TextSelector(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; +/* static */ StandardFunctions::String* +StandardFunctions::String::string(UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new String()); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; return nullptr; } + return result.orphan(); +} + +extern UnicodeString formattableToString(const Locale&, + const Formattable&, + UErrorCode&); + +LocalPointer +StandardFunctions::String::call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new StringValue(context, val, opts, errorCode)); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } return result; } -void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) const { +UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; +} + +UMFDirectionality stringOutputDirection(UMFBidiOption inputDir) { + switch (inputDir) { + case U_MF_BIDI_OPTION_INHERIT: + case U_MF_BIDI_OPTION_AUTO: + return U_MF_DIRECTIONALITY_UNKNOWN; + case U_MF_BIDI_OPTION_LTR: + return U_MF_DIRECTIONALITY_LTR; + case U_MF_BIDI_OPTION_RTL: + return U_MF_DIRECTIONALITY_RTL; + } + + return U_MF_DIRECTIONALITY_LTR; +} + +StandardFunctions::StringValue::StringValue(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions&, + UErrorCode& status) { + CHECK_ERROR(status); + locale = context.getLocale(); + innerValue = val.unwrap(); + functionName = UnicodeString("string"); + inputDir = context.getDirection(); + dir = stringOutputDirection(inputDir); // No options - (void) opts; + // Convert to string + formattedString = formattableToString(context.getLocale(), innerValue, status); +} +void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); // Just compares the key and value as strings - // Argument must be present - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } - prefsLen = 0; - // Convert to string - const UnicodeString& formattedValue = toFormat.formatToString(locale, errorCode); if (U_FAILURE(errorCode)) { return; } // Normalize result - UnicodeString normalized = normalizeNFC(formattedValue); + UnicodeString normalized = normalizeNFC(formattedString); for (int32_t i = 0; i < keysLen; i++) { if (keys[i] == normalized) { - prefs[0] = keys[i]; + prefs[0] = i; prefsLen = 1; break; } } } -StandardFunctions::TextFactory::~TextFactory() {} -StandardFunctions::TextSelector::~TextSelector() {} +StandardFunctions::String::~String() {} +StandardFunctions::StringValue::~StringValue() {} -// ------------ TestFormatFactory +// ------------ TestFunction -Formatter* StandardFunctions::TestFormatFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +StandardFunctions::TestFunction::~TestFunction() {} +StandardFunctions::TestFunctionValue::~TestFunctionValue() {} - // Results are not locale-dependent - (void) locale; - Formatter* result = new TestFormat(); - if (result == nullptr) { +LocalPointer StandardFunctions::TestFunction::call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + val(new TestFunctionValue(*this, context, operand, options, errorCode)); + if (!val.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return val; } -StandardFunctions::TestFormatFactory::~TestFormatFactory() {} -StandardFunctions::TestFormat::~TestFormat() {} - // Extract numeric value from a Formattable or, if it's a string, // parse it as a number according to the MF2 `number-literal` grammar production double formattableToNumber(const Formattable& arg, UErrorCode& status) { @@ -1684,7 +1570,9 @@ double formattableToNumber(const Formattable& arg, UErrorCode& status) { } case UFMT_STRING: { // Try to parse the string as a number - result = parseNumberLiteral(arg, status); + const UnicodeString& s = arg.getString(status); + U_ASSERT(U_SUCCESS(status)); + result = parseNumberLiteral(s, status); if (U_FAILURE(status)) { status = U_MF_OPERAND_MISMATCH_ERROR; } @@ -1699,14 +1587,53 @@ double formattableToNumber(const Formattable& arg, UErrorCode& status) { return result; } +static bool isTestFunction(const UnicodeString& s) { + return (s == u"test:format" + || s == u"test:select" + || s == u"test:function"); +} -/* static */ void StandardFunctions::TestFormat::testFunctionParameters(const FormattedPlaceholder& arg, - const FunctionOptions& options, - int32_t& decimalPlaces, - bool& failsFormat, - bool& failsSelect, - double& input, - UErrorCode& status) { +static void setFailsFromFunctionValue(const FunctionValue& optionValue, + bool& failsFormat, + bool& failsSelect, + UErrorCode& status) { + UnicodeString failsString = optionValue.unwrap().getString(status); + if (U_SUCCESS(status)) { + // 9i. If its value resolves to the string 'always', then + if (failsString == u"always") { + // 9ia. Set FailsFormat to be true + failsFormat = true; + // 9ib. Set FailsSelect to be true. + failsSelect = true; + } + // 9ii. Else if its value resolves to the string "format", then + else if (failsString == u"format") { + // 9ia. Set FailsFormat to be true + failsFormat = true; + } + // 9iii. Else if its value resolves to the string "select", then + else if (failsString == u"select") { + // 9iiia. Set FailsSelect to be true. + failsSelect = true; + } + // 9iv. Else if its value does not resolve to the string "never", then + else if (failsString != u"never") { + // 9iv(a). Emit "bad-option" Resolution Error. + status = U_MF_BAD_OPTION; + } + } else { + // 9iv. again + status = U_MF_BAD_OPTION; + } +} + +/* static */ void StandardFunctions::TestFunction::testFunctionParameters(const FunctionValue& arg, + const FunctionOptions& options, + int32_t& decimalPlaces, + bool& failsFormat, + bool& failsSelect, + double& input, + UErrorCode& status) const { CHECK_ERROR(status); // 1. Let DecimalPlaces be 0. @@ -1721,96 +1648,121 @@ double formattableToNumber(const Formattable& arg, UErrorCode& status) { // 4. Let arg be the resolved value of the expression operand. // (already true) - // Step 5 omitted because composition isn't fully implemented yet - // 6. Else if arg is a numerical value or a string matching the number-literal production, then - input = formattableToNumber(arg.asFormattable(), status); - if (U_FAILURE(status)) { - // 7. Else, - // 7i. Emit "bad-input" Resolution Error. - status = U_MF_OPERAND_MISMATCH_ERROR; - // 7ii. Use a fallback value as the resolved value of the expression. - // Further steps of this algorithm are not followed. + // 5. If arg is the resolved value of an expression with a :test:function, :test:select, or :test:format annotation for which resolution has succeeded, then + if (isTestFunction(arg.getFunctionName())) { + // 5i. Let Input be the Input value of arg. + input = formattableToNumber(arg.unwrap(), status); + if (U_FAILURE(status)) { + status = U_MF_OPERAND_MISMATCH_ERROR; + return; + } + const FunctionOptions& opts = arg.getResolvedOptions(); + // 5ii. Set DecimalPlaces to be DecimalPlaces value of arg. + const FunctionValue* decimalPlacesFunctionValue = opts.getFunctionOption(UnicodeString("decimalPlaces"), status); + if (U_SUCCESS(status)) { + decimalPlaces = formattableToNumber(decimalPlacesFunctionValue->unwrap(), status); + if (U_FAILURE(status)) { + status = U_MF_OPERAND_MISMATCH_ERROR; + return; + } + } else { + // Option was not provided -- not an error + status = U_ZERO_ERROR; + } + // 5iii. Set FailsFormat to be FailsFormat value of arg. + const FunctionValue* failsFormatFunctionValue = opts.getFunctionOption(UnicodeString("fails"), status); + if (U_SUCCESS(status)) { + setFailsFromFunctionValue(*failsFormatFunctionValue, failsFormat, failsSelect, status); + if (U_FAILURE(status)) { + status = U_MF_BAD_OPTION; + return; + } + } else { + // Option was not provided -- not an error + status = U_ZERO_ERROR; + } + // 5iv. Set FailsSelect to be FailsSelect value of arg. + // (Done in previous step) + } else { + // 6. Else if arg is a numerical value or a string matching the number-literal production, then + input = formattableToNumber(arg.unwrap(), status); + if (U_FAILURE(status)) { + // 7. Else, + // 7i. Emit "bad-input" Resolution Error. + status = U_MF_OPERAND_MISMATCH_ERROR; + // 7ii. Use a fallback value as the resolved value of the expression. + // Further steps of this algorithm are not followed. + } } + + const FunctionValue* decimalPlacesOpt = options.getFunctionOption(options::DECIMAL_PLACES, status); // 8. If the decimalPlaces option is set, then - Formattable opt; - if (options.getFunctionOption(options::DECIMAL_PLACES, opt)) { + if (U_SUCCESS(status)) { // 8i. If its value resolves to a numerical integer value 0 or 1 // or their corresponding string representations '0' or '1', then - double decimalPlacesInput = formattableToNumber(opt, status); + double decimalPlacesInput = formattableToNumber(decimalPlacesOpt->unwrap(), status); if (U_SUCCESS(status)) { if (decimalPlacesInput == 0 || decimalPlacesInput == 1) { // 8ia. Set DecimalPlaces to be the numerical value of the option. decimalPlaces = decimalPlacesInput; } - } - // 8ii. Else if its value is not an unresolved value set by option resolution, - else { - // 8iia. Emit "bad-option" Resolution Error. - status = U_MF_BAD_OPTION; - // 8iib. Use a fallback value as the resolved value of the expression. - } - } - // 9. If the fails option is set, then - Formattable failsOpt; - if (options.getFunctionOption(options::FAILS, failsOpt)) { - UnicodeString failsString = failsOpt.getString(status); - if (U_SUCCESS(status)) { - // 9i. If its value resolves to the string 'always', then - if (failsString == u"always") { - // 9ia. Set FailsFormat to be true - failsFormat = true; - // 9ib. Set FailsSelect to be true. - failsSelect = true; - } - // 9ii. Else if its value resolves to the string "format", then - else if (failsString == u"format") { - // 9ia. Set FailsFormat to be true - failsFormat = true; - } - // 9iii. Else if its value resolves to the string "select", then - else if (failsString == u"select") { - // 9iiia. Set FailsSelect to be true. - failsSelect = true; - } - // 9iv. Else if its value does not resolve to the string "never", then - else if (failsString != u"never") { - // 9iv(a). Emit "bad-option" Resolution Error. + // 8ii. Else if its value is not an unresolved value set by option resolution, + else { + // 8iia. Emit "bad-option" Resolution Error. status = U_MF_BAD_OPTION; + return; + // 8iib. Use a fallback value as the resolved value of the expression. } } else { - // 9iv. again status = U_MF_BAD_OPTION; + return; } + } else { + // Option was not provided -- not an error + status = U_ZERO_ERROR; } -} -FormattedPlaceholder StandardFunctions::TestFormat::format(FormattedPlaceholder&& arg, - FunctionOptions&& options, - UErrorCode& status) const{ + const FunctionValue* failsOpt = options.getFunctionOption(UnicodeString("fails"), status); + // 9. If the fails option is set, then + if (U_SUCCESS(status)) { + setFailsFromFunctionValue(*failsOpt, failsFormat, failsSelect, status); + } else { + // Option was not provided -- not an error + status = U_ZERO_ERROR; + } +} - int32_t decimalPlaces; - bool failsFormat; - bool failsSelect; - double input; +StandardFunctions::TestFunctionValue::TestFunctionValue(const TestFunction& parent, + const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& status) { + parent.testFunctionParameters(arg, options, decimalPlaces, + failsFormat, failsSelect, input, status); + CHECK_ERROR(status); + opts = options.mergeOptions(arg.getResolvedOptions(), status); + innerValue = arg.unwrap(); + canFormat = parent.canFormat; + canSelect = parent.canSelect; + functionName = UnicodeString(canFormat && canSelect ? + "test:function" + : canFormat ? "test:format" + : "test:select"); - testFunctionParameters(arg, options, decimalPlaces, - failsFormat, failsSelect, input, status); - if (U_FAILURE(status)) { - return FormattedPlaceholder(arg.getFallback()); - } + CHECK_ERROR(status); // If FailsFormat is true, attempting to format the placeholder to any // formatting target will fail. if (failsFormat) { - status = U_MF_FORMATTING_ERROR; - return FormattedPlaceholder(arg.getFallback()); + formattedString = arg.getFallback(); + return; } - UnicodeString result; + // When :test:function is used as a formatter, a placeholder resolving to a value // with a :test:function expression is formatted as a concatenation of the following parts: // 1. If Input is less than 0, the character - U+002D Hyphen-Minus. if (input < 0) { - result += HYPHEN; + formattedString += HYPHEN; } // 2. The truncated absolute integer value of Input, i.e. floor(abs(Input)), formatted as a // sequence of decimal digit characters (U+0030...U+0039). @@ -1826,86 +1778,91 @@ FormattedPlaceholder StandardFunctions::TestFormat::format(FormattedPlaceholder& &ignore, &ignoreLen, &ignorePoint); - result += UnicodeString(buffer); + formattedString += UnicodeString(buffer); // 3. If DecimalPlaces is 1, then if (decimalPlaces == 1) { // 3i. The character . U+002E Full Stop. - result += u"."; + formattedString += u"."; // 3ii. The single decimal digit character representing the value // floor((abs(Input) - floor(abs(Input))) * 10) int32_t val = floor((abs(input) - floor(abs(input)) * 10)); - result += digitToChar(val, status); + formattedString += digitToChar(val, status); U_ASSERT(U_SUCCESS(status)); } - return FormattedPlaceholder(result); } -// ------------ TestSelectFactory - -StandardFunctions::TestSelectFactory::~TestSelectFactory() {} -StandardFunctions::TestSelect::~TestSelect() {} - -Selector* StandardFunctions::TestSelectFactory::createSelector(const Locale& locale, - UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - - // Results are not locale-dependent - (void) locale; - - Selector* result = new TestSelect(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; +UnicodeString StandardFunctions::TestFunctionValue::formatToString(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; } - return result; + if (!canFormat || failsFormat) { + status = U_MF_FORMATTING_ERROR; + } + if (!canFormat) { + return {}; + } + return formattedString; } -void StandardFunctions::TestSelect::selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const { - int32_t decimalPlaces; - bool failsFormat; - bool failsSelect; - double input; - - TestFormat::testFunctionParameters(val, options, decimalPlaces, - failsFormat, failsSelect, input, status); - +void StandardFunctions::TestFunctionValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const { if (U_FAILURE(status)) { return; } - if (failsSelect) { + if (!canSelect || failsSelect) { status = U_MF_SELECTOR_ERROR; return; } + prefsLen = 0; + + if (input != 1) { + return; + } + // If the Input is 1 and DecimalPlaces is 1, the method will return some slice // of the list « '1.0', '1' », depending on whether those values are included in keys. - bool include1point0 = false; - bool include1 = false; if (input == 1 && decimalPlaces == 1) { - include1point0 = true; - include1 = true; - } else if (input == 1 && decimalPlaces == 0) { - include1 = true; + // 1.0 must come first, so search the keys for 1.0 and then 1 + for (int32_t i = 0; i < keysLen; i++) { + if (keys[i] == u"1.0") { + prefs[0] = i; + prefsLen++; + } + } } // If the Input is 1 and DecimalPlaces is 0, the method will return the list « '1' » if // keys includes '1', or an empty list otherwise. // If the Input is any other value, the method will return an empty list. for (int32_t i = 0; i < keysLen; i++) { - if ((keys[i] == u"1" && include1) - || (keys[i] == u"1.0" && include1point0)) { - prefs[prefsLen] = keys[i]; + if (keys[i] == u"1") { + prefs[prefsLen] = i; prefsLen++; } } } +StandardFunctions::TestFunction::TestFunction(bool format, bool select) : canFormat(format), canSelect(select) { + U_ASSERT(format || select); +} + +/* static */ StandardFunctions::TestFunction* StandardFunctions::TestFunction::testFunction(UErrorCode& status) { + return create(TestFunction(true, true), status); +} + +/* static */ StandardFunctions::TestFunction* StandardFunctions::TestFunction::testFormat(UErrorCode& status) { + return create(TestFunction(true, false), status); +} + +/* static */ StandardFunctions::TestFunction* StandardFunctions::TestFunction::testSelect(UErrorCode& status) { + return create(TestFunction(false, true), status); +} + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 25d2a467502d..a7b5678b4aff 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -26,6 +26,7 @@ namespace message2 { // Constants for option names namespace options { static constexpr std::u16string_view ALWAYS = u"always"; +static constexpr std::u16string_view AUTO = u"auto"; static constexpr std::u16string_view COMPACT = u"compact"; static constexpr std::u16string_view COMPACT_DISPLAY = u"compactDisplay"; static constexpr std::u16string_view DATE_STYLE = u"dateStyle"; @@ -38,8 +39,10 @@ static constexpr std::u16string_view EXCEPT_ZERO = u"exceptZero"; static constexpr std::u16string_view FAILS = u"fails"; static constexpr std::u16string_view FULL_UPPER = u"FULL"; static constexpr std::u16string_view HOUR = u"hour"; +static constexpr std::u16string_view INHERIT = u"inherit"; static constexpr std::u16string_view LONG = u"long"; static constexpr std::u16string_view LONG_UPPER = u"LONG"; +static constexpr std::u16string_view LTR = u"ltr"; static constexpr std::u16string_view MAXIMUM_FRACTION_DIGITS = u"maximumFractionDigits"; static constexpr std::u16string_view MAXIMUM_SIGNIFICANT_DIGITS = u"maximumSignificantDigits"; static constexpr std::u16string_view MEDIUM_UPPER = u"MEDIUM"; @@ -57,6 +60,7 @@ static constexpr std::u16string_view NUMBERING_SYSTEM = u"numberingSystem"; static constexpr std::u16string_view NUMERIC = u"numeric"; static constexpr std::u16string_view ORDINAL = u"ordinal"; static constexpr std::u16string_view PERCENT_STRING = u"percent"; +static constexpr std::u16string_view RTL = u"rtl"; static constexpr std::u16string_view SCIENTIFIC = u"scientific"; static constexpr std::u16string_view SECOND = u"second"; static constexpr std::u16string_view SELECT = u"select"; @@ -66,6 +70,9 @@ static constexpr std::u16string_view SIGN_DISPLAY = u"signDisplay"; static constexpr std::u16string_view STYLE = u"style"; static constexpr std::u16string_view TIME_STYLE = u"timeStyle"; static constexpr std::u16string_view TWO_DIGIT = u"2-digit"; +static constexpr std::u16string_view U_DIR = u"u:dir"; +static constexpr std::u16string_view U_ID = u"u:id"; +static constexpr std::u16string_view U_LOCALE = u"u:locale"; static constexpr std::u16string_view USE_GROUPING = u"useGrouping"; static constexpr std::u16string_view WEEKDAY = u"weekday"; static constexpr std::u16string_view YEAR = u"year"; @@ -93,98 +100,69 @@ static constexpr std::u16string_view YEAR = u"year"; UErrorCode& errorCode); class DateTime; + class DateTimeValue; - class DateTimeFactory : public FormatterFactory { + class DateTime : public Function { public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - static DateTimeFactory* date(UErrorCode&); - static DateTimeFactory* time(UErrorCode&); - static DateTimeFactory* dateTime(UErrorCode&); - DateTimeFactory() = delete; - virtual ~DateTimeFactory(); - - private: - friend class DateTime; - - typedef enum DateTimeType { - Date, - Time, - DateTime - } DateTimeType; - - DateTimeType type; - DateTimeFactory(DateTimeType t) : type(t) {} - }; - - class DateTime : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; + static DateTime* date(UErrorCode&); + static DateTime* time(UErrorCode&); + static DateTime* dateTime(UErrorCode&); + + LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; virtual ~DateTime(); private: - const Locale& locale; - const DateTimeFactory::DateTimeType type; friend class DateTimeFactory; - DateTime(const Locale& l, DateTimeFactory::DateTimeType t) - : locale(l), type(t) {} - const LocalPointer icuFormatter; + friend class DateTimeValue; // Methods for parsing date literals - UDate tryPatterns(const UnicodeString&, UErrorCode&) const; - UDate tryTimeZonePatterns(const UnicodeString&, UErrorCode&) const; - DateInfo createDateInfoFromString(const UnicodeString&, UErrorCode&) const; - - /* - Looks up an option by name, first checking `opts`, then the cached options - in `toFormat` if applicable, and finally using a default - - Ignores any options with non-string values - */ - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - std::u16string_view optionName) const; - // Version for options that don't have defaults; sets the error - // code instead of returning a default value - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - std::u16string_view optionName, - UErrorCode& errorCode) const; + static UDate tryPatterns(const UnicodeString&, UErrorCode&); + static UDate tryTimeZonePatterns(const UnicodeString&, UErrorCode&); + static DateInfo createDateInfoFromString(const UnicodeString&, UErrorCode&); - }; + typedef enum DateTimeType { + kDate, + kTime, + kDateTime + } DateTimeType; - // Note: IntegerFactory doesn't implement SelectorFactory; - // instead, an instance of PluralFactory is registered to the integer - // selector - // TODO - class IntegerFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~IntegerFactory(); + const DateTimeType type; + static DateTime* create(DateTimeType, UErrorCode&); + DateTime(DateTimeType t) : type(t) {} + const LocalPointer icuFormatter; }; - class NumberFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~NumberFactory(); - private: - friend class IntegerFactory; - static NumberFactory integer(const Locale& locale, UErrorCode& status); - }; + class NumberValue; - class Number : public Formatter { + class Number : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; + static Number* integer(UErrorCode& success); + static Number* number( UErrorCode& success); + + LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; virtual ~Number(); private: - friend class NumberFactory; + friend class NumberValue; friend class StandardFunctions; - Number(const Locale& loc) : locale(loc), icuFormatter(number::NumberFormatter::withLocale(loc)) {} - Number(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt), icuFormatter(number::NumberFormatter::withLocale(loc)) {} - static Number integer(const Locale& loc); + typedef enum PluralType { + PLURAL_ORDINAL, + PLURAL_CARDINAL, + PLURAL_EXACT + } PluralType; + + static Number* create(bool, UErrorCode&); + Number(bool isInt) : isInteger(isInt) /*, icuFormatter(number::NumberFormatter::withLocale(loc))*/ {} // These options have their own accessor methods, since they have different default values. + int32_t digitSizeOption(const FunctionOptions&, const UnicodeString&) const; int32_t maximumFractionDigits(const FunctionOptions& options) const; int32_t minimumFractionDigits(const FunctionOptions& options) const; int32_t minimumSignificantDigits(const FunctionOptions& options) const; @@ -192,139 +170,145 @@ static constexpr std::u16string_view YEAR = u"year"; int32_t minimumIntegerDigits(const FunctionOptions& options) const; bool usePercent(const FunctionOptions& options) const; - const Locale& locale; const bool isInteger = false; const number::LocalizedNumberFormatter icuFormatter; + + static PluralType pluralType(const FunctionOptions& opts); }; static number::LocalizedNumberFormatter formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status); - class PluralFactory : public SelectorFactory { - public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~PluralFactory(); + class NumberValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const override; + UBool isSelectable() const override { return true; } + NumberValue(); + const UnicodeString& getFunctionName() const override { return functionName; } + virtual ~NumberValue(); private: - friend class IntegerFactory; - friend class MessageFormatter; + friend class Number; - PluralFactory() {} - PluralFactory(bool isInt) : isInteger(isInt) {} - static PluralFactory integer() { return PluralFactory(true);} - const bool isInteger = false; - }; + number::FormattedNumber formattedNumber; + NumberValue(const Number&, + const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); + }; // class NumberValue - class Plural : public Selector { + class DateTimeValue : public FunctionValue { public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~Plural(); - + UnicodeString formatToString(UErrorCode&) const override; + DateTimeValue(); + const UnicodeString& getFunctionName() const override { return functionName; } + virtual ~DateTimeValue(); private: - friend class IntegerFactory; - friend class PluralFactory; + friend class DateTime; - // Can't use UPluralType for this since we want to include - // exact matching as an option - typedef enum PluralType { - PLURAL_ORDINAL, - PLURAL_CARDINAL, - PLURAL_EXACT - } PluralType; - Plural(const Locale& loc, UErrorCode& errorCode); - Plural(const Locale& loc, bool isInt, UErrorCode& errorCode); - static Plural integer(const Locale& loc, UErrorCode& errorCode) { return Plural(loc, true, errorCode); } - PluralType pluralType(const FunctionOptions& opts) const; - const Locale& locale; - const bool isInteger = false; - LocalPointer numberFormatter; - }; + UnicodeString formattedDate; + DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, + const FunctionValue&, const FunctionOptions&, UErrorCode&); + }; // class DateTimeValue - class TextFactory : public SelectorFactory { + class String : public Function { public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~TextFactory(); - }; - - class TextSelector : public Selector { - public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~TextSelector(); + LocalPointer call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) override; + static String* string(UErrorCode& status); + virtual ~String(); private: - friend class TextFactory; + friend class StringFactory; - // Formatting `value` to a string might require the locale - const Locale& locale; - - TextSelector(const Locale& l) : locale(l) {} + String() {} }; // See https://github.com/unicode-org/message-format-wg/blob/main/test/README.md - class TestFormatFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - TestFormatFactory() {} - virtual ~TestFormatFactory(); - }; - - class TestSelect; + class TestFunctionValue; - class TestFormat : public Formatter { + class TestFunction : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; - virtual ~TestFormat(); - + static TestFunction* testFunction(UErrorCode&); + static TestFunction* testFormat(UErrorCode&); + static TestFunction* testSelect(UErrorCode&); + + LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; + virtual ~TestFunction(); private: - friend class TestFormatFactory; - friend class TestSelect; - TestFormat() {} - static void testFunctionParameters(const FormattedPlaceholder& arg, - const FunctionOptions& options, - int32_t& decimalPlaces, - bool& failsFormat, - bool& failsSelect, - double& input, - UErrorCode& status); - + friend class TestFunctionValue; + + TestFunction(bool, bool); + void testFunctionParameters(const FunctionValue&, + const FunctionOptions&, + int32_t&, + bool&, + bool&, + double&, + UErrorCode&) const; + bool canFormat; // True iff this was invoked as test:function or test:format + bool canSelect; // True iff this was involved as test:function or test:select }; - // See https://github.com/unicode-org/message-format-wg/blob/main/test/README.md - class TestSelectFactory : public SelectorFactory { + class TestFunctionValue : public FunctionValue { public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - TestSelectFactory() {} - virtual ~TestSelectFactory(); + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString*, + int32_t, + int32_t*, + int32_t&, + UErrorCode&) const override; + UBool isSelectable() const override { return canSelect; } + TestFunctionValue(); + const UnicodeString& getFunctionName() const override { return functionName; } + virtual ~TestFunctionValue(); + private: + friend class TestFunction; + + TestFunctionValue(const TestFunction&, + const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); + + UnicodeString formattedString; + bool canFormat; + bool canSelect; + int32_t decimalPlaces; + bool failsFormat; // Different from "canFormat" -- derived from "fails" option + bool failsSelect; // Different from "canSelect" -- derived from "fails" option + double input; }; - class TestSelect : public Selector { + class StringValue : public FunctionValue { public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~TestSelect(); - + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const override; + UBool isSelectable() const override { return true; } + virtual ~StringValue(); private: - friend class TestSelectFactory; - TestSelect() {} - }; + friend class String; + + UnicodeString formattedString; + StringValue(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&); + }; // class StringValue }; diff --git a/icu4c/source/i18n/messageformat2_macros.h b/icu4c/source/i18n/messageformat2_macros.h index 20e81377d4d5..eafddcd4fd9e 100644 --- a/icu4c/source/i18n/messageformat2_macros.h +++ b/icu4c/source/i18n/messageformat2_macros.h @@ -19,6 +19,7 @@ #include "unicode/format.h" #include "unicode/unistr.h" #include "plurrule_impl.h" +#include "ubidiimp.h" U_NAMESPACE_BEGIN @@ -32,8 +33,6 @@ using namespace pluralimpl; #define LEFT_CURLY_BRACE ((UChar32)0x007B) #define RIGHT_CURLY_BRACE ((UChar32)0x007D) #define HTAB ((UChar32)0x0009) -#define CR ((UChar32)0x000D) -#define LF ((UChar32)0x000A) #define IDEOGRAPHIC_SPACE ((UChar32)0x3000) #define PIPE ((UChar32)0x007C) diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp index 879c7024fc9c..0315724a80f1 100644 --- a/icu4c/source/i18n/messageformat2_parser.cpp +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -1973,7 +1973,7 @@ void Parser::parseSelectors(UErrorCode& status) { // Parse selectors // "Backtracking" is required here. It's not clear if whitespace is // (`[s]` selector) or (`[s]` variant) - while (isWhitespace(peek()) || peek() == DOLLAR) { + while (isWhitespace(peek()) || isBidiControl(peek()) || peek() == DOLLAR) { int32_t whitespaceStart = index; parseRequiredWhitespace(status); // Restore precondition @@ -2006,25 +2006,14 @@ void Parser::parseSelectors(UErrorCode& status) { break; \ } \ - // Parse variants - // matcher = match-statement s variant *(o variant) - - // Parse first variant + // Parse required whitespace before first variant parseRequiredWhitespace(status); - if (!inBounds()) { - ERROR(status); - return; - } - parseVariant(status); - if (!inBounds()) { - // Not an error; there might be only one variant - return; - } + + // Parse variants while (isWhitespace(peek()) || isBidiControl(peek()) || isKeyStart(peek())) { - parseOptionalWhitespace(); - // Restore the precondition. // Trailing whitespace is allowed. + parseOptionalWhitespace(); if (!inBounds()) { return; } diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index f98e591fad3f..e93528047b36 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -35,6 +35,7 @@ namespace message2 { class MessageContext; class StaticErrors; class InternalValue; + class BaseValue; /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. @@ -167,6 +168,90 @@ namespace message2 { U_MF_STRICT } UMFErrorHandlingBehavior; + /** + * Used in conjunction with the + * MessageFormatter::Builder::setBidiIsolationStrategy() method. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + typedef enum UMFBidiIsolationStrategy { + /** + * Do not perform bidi isolation. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_OFF = 0, + /** + * Perform bidi isolation using the "default" strategy + * described in the MF2 specification (default). + * https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_AUTO, + U_MF_BIDI_DEFAULT = U_MF_BIDI_AUTO + } UMFBidiIsolationStrategy; + + /** + * Used in conjunction with the + * MessageFormatter::Builder::setBidiIsolationStyle() method. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + typedef enum UMFBidiIsolationStyle { + /** + * Insert bidi control characters for isolation. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_STYLE_CONTROL = 0, + /** + * Insert HTML markup tags for isolation. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_STYLE_HTML, + U_MF_BIDI_STYLE_DEFAULT = U_MF_BIDI_STYLE_CONTROL + } UMFBidiIsolationStyle; + + /** + * Used in conjunction with the + * MessageFormatter::Builder::setBidiContext() method. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + typedef enum UMFBidiContext { + /** + * Denotes a left-to-right message. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_CONTEXT_LTR = 0, + /** + * Denotes a right-to-left message. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_CONTEXT_RTL, + /** + * Indicates that the message directionality should be + * inferred from the locale. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_CONTEXT_AUTO, + U_MF_BIDI_CONTEXT_DEFAULT = U_MF_BIDI_CONTEXT_AUTO + } UMFBidiContext; /** * The mutable Builder class allows each part of the MessageFormatter to be initialized * separately; calling its `build()` method yields an immutable MessageFormatter. @@ -196,6 +281,14 @@ namespace message2 { const MFFunctionRegistry* customMFFunctionRegistry; // Error behavior; see comment in `MessageFormatter` class bool signalErrors = false; + // Bidi isolation strategy + MessageFormatter::UMFBidiIsolationStrategy + bidiIsolationStrategy = U_MF_BIDI_DEFAULT; + // Message directionality + MessageFormatter::UMFBidiContext msgdir = U_MF_BIDI_CONTEXT_DEFAULT; + // Bidi isolation style + MessageFormatter::UMFBidiIsolationStyle + bidiStyle = U_MF_BIDI_STYLE_DEFAULT; void clearState(); public: @@ -281,6 +374,62 @@ namespace message2 { * @deprecated This API is for technology preview only. */ U_I18N_API Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type); + /** + * Set the bidi isolation behavior for this formatter. + * + * "OFF" means that no bidi isolation will be performed. + * "AUTO" means that the default bidi isolation strategy + * as described in the MF2 specification + * ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text ) + * will be applied. + * + * @param strategy An enum with type UMFBidiIsolationStrategy; + * that specifies how bidi isolation marks are inserted into + * the formatting result. The default is U_MF_BIDI_AUTO. + * + * @return A reference to the builder. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy); + /** + * Set the bidi isolation style for this formatter. + * + * "CONTROL" means that bidi control characters will be inserted into + * the formatted result. + * "HTML_SPAN" means that HTML markup will be inserted into + * the formatted result. + * + * @param style An enum with type UMFBidiIsolationStyle + * that specifies how bidi isolation is applied to + * the formatting result. The default is + * U_MF_BIDI_STYLE_CONTROL. + * + * @return A reference to the builder. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API Builder& setBidiIsolationStyle(UMFBidiIsolationStyle style); + /** + * Set the directionality context of the input message. + * + * "LTR" means left-to-right and "RTL" means right-to-left. + * "AUTO" means to infer the context from the locale + * (either what was set with setLocale(), or the default locale + * if setLocale() was never called on the builder.) + * + * @param dir An enum with type UMFBidiContext + * that specifies the directionality of the message. + * The default is U_MF_BIDI_CONTEXT_AUTO.. + * + * @return A reference to the builder. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API Builder& setBidiContext(UMFBidiContext dir); /** * Constructs a new immutable MessageFormatter using the pattern or data model * that was previously set, and the locale (if it was previously set) @@ -344,49 +493,34 @@ namespace message2 { // Selection methods // Takes a vector of FormattedPlaceholders - void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; + void resolveSelectors(MessageContext&, Environment& env, UErrorCode&, UVector&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) void filterVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) void sortVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of strings (input) and a vector of strings (output) - void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const; + void matchSelectorKeys(const UVector&, MessageContext&, InternalValue&& rv, UVector&, UErrorCode&) const; // Takes a vector of FormattedPlaceholders (input), // and a vector of vectors of strings (output) void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; - // Formatting methods + bool checkSelectOption(const FunctionValue&) const; - [[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const; - void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; - // Evaluates a function call - // Dispatches on argument type - [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const; - // Dispatches on function name - [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName, - InternalValue* argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; - // Formats an expression that appears in a pattern or as the definition of a local variable - [[nodiscard]] InternalValue* formatExpression(const UnicodeString&, - const Environment&, - const data_model::Expression&, - MessageContext&, - UErrorCode&) const; - [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] InternalValue* formatOperand(const UnicodeString&, - const Environment&, - const data_model::Operand&, - MessageContext&, - UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&, - const data_model::VariableName&, - MessageContext&, - UErrorCode&) const; - void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; + // Formatting methods + [[nodiscard]] InternalValue evalLiteral(const UnicodeString&, const data_model::Literal&, UErrorCode&) const; + [[nodiscard]] UnicodeString& bidiIsolate(UMFBidiOption, UMFDirectionality, UnicodeString&) const; + void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; + FunctionContext makeFunctionContext(const FunctionOptions&) const; + [[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&, + MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalExpression(const UnicodeString&, Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] FunctionOptions resolveOptions(Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalOperand(const UnicodeString&, Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + bool operandToStringWithBadOptionError(MessageContext&, Environment&, const Operand&, UnicodeString&, UErrorCode&) const; + void validateUOptionsOnMarkup(MessageContext&, Environment&, const Markup&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalVariableReference(const UnicodeString&, Environment&, const data_model::VariableName&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue evalArgument(const UnicodeString&, const data_model::VariableName&, MessageContext&, UErrorCode&) const; + void formatSelectors(MessageContext& context, Environment& env, UErrorCode &status, UnicodeString& result) const; // Function registry methods bool hasCustomMFFunctionRegistry() const { @@ -398,18 +532,12 @@ namespace message2 { // (a FormatterFactory can have mutable state) const MFFunctionRegistry& getCustomMFFunctionRegistry() const; - bool isCustomFormatter(const FunctionName&) const; - FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const; - bool isBuiltInSelector(const FunctionName&) const; - bool isBuiltInFormatter(const FunctionName&) const; - bool isCustomSelector(const FunctionName&) const; - const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; - bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } - bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } - const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; - - Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; - Formatter* getFormatter(const FunctionName&, UErrorCode&) const; + bool isCustomFunction(const FunctionName&) const; + bool isBuiltInFunction(const FunctionName&) const; + bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } + void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; + // Result is not adopted + Function* lookupFunction(const FunctionName&, UErrorCode&) const; bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; // Checking for resolution errors @@ -460,6 +588,16 @@ namespace message2 { // The default is false. bool signalErrors = false; + // Bidi isolation strategy. + UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_DEFAULT; + + // Message directionality + // Inferred from locale by default + UMFDirectionality msgdir = U_MF_DIRECTIONALITY_DEFAULT; + + // Bidi isolation style + UMFBidiIsolationStyle bidiIsolationStyle = U_MF_BIDI_STYLE_DEFAULT; + }; // class MessageFormatter } // namespace message2 diff --git a/icu4c/source/i18n/unicode/messageformat2_data_model.h b/icu4c/source/i18n/unicode/messageformat2_data_model.h index fd9b6432a5d1..42a4a77251e6 100644 --- a/icu4c/source/i18n/unicode/messageformat2_data_model.h +++ b/icu4c/source/i18n/unicode/messageformat2_data_model.h @@ -821,6 +821,7 @@ namespace message2 { virtual ~Builder(); }; // class OptionMap::Builder private: + friend class message2::MessageFormatter; friend class message2::Serializer; bool bogus = false; @@ -1240,6 +1241,7 @@ namespace message2 { private: friend class Builder; + friend class message2::MessageFormatter; friend class message2::Serializer; UMarkupType type; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 679798a4e81a..b9dd09b37ad7 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -31,10 +31,6 @@ class UVector; namespace message2 { - class Formatter; - class MessageContext; - class Selector; - // Formattable // ---------- @@ -475,36 +471,35 @@ namespace message2 { * a single named function option. It pairs the given name with the `Formattable` * value resulting from evaluating the option's value. * - * `ResolvedFunctionOption` is immutable and is not copyable or movable. + * `ResolvedFunctionOption` is immutable, movable, and copyable. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ #ifndef U_IN_DOXYGEN +class FunctionValue; class U_I18N_API ResolvedFunctionOption : public UObject { private: + friend class FunctionOptions; /* const */ UnicodeString name; - /* const */ Formattable value; - // True iff this option was represented in the syntax by a literal value. - // This is necessary in order to implement the spec for the `select` option - // of `:number` and `:integer`. - /* const */ bool sourceIsLiteral; + // owned by the global environment + const FunctionValue* value; + // True if this option is the product of merging two + // option maps together, and this option came from the + // first argument (the "older" options map). + bool thisWasMerged = false; public: const UnicodeString& getName() const { return name; } - const Formattable& getValue() const { return value; } - bool isLiteral() const { return sourceIsLiteral; } - ResolvedFunctionOption(const UnicodeString& n, const Formattable& f, bool s) - : name(n), value(f), sourceIsLiteral(s) {} + const FunctionValue& getValue() const { return *value; } + bool wasMerged() const { return thisWasMerged; } + ResolvedFunctionOption(const UnicodeString& n, const FunctionValue& f, bool b); ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); - ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { - name = std::move(other.name); - value = std::move(other.value); - sourceIsLiteral = other.sourceIsLiteral; - return *this; - } + ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) = default; + ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = default; + ResolvedFunctionOption(const ResolvedFunctionOption&) = default; virtual ~ResolvedFunctionOption(); }; // class ResolvedFunctionOption #endif @@ -516,11 +511,13 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ -using FunctionOptionsMap = std::map; +using FunctionOptionsMap = std::map; /** * Structure encapsulating named options passed to a custom selector or formatter. * + * This class is immutable, movable and copyable. + * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ @@ -531,24 +528,31 @@ class U_I18N_API FunctionOptions : public UObject { * The syntactic order of options is not guaranteed to * be preserved. * - * This class is immutable and movable but not copyable. - * - * @return A map from strings to `message2::Formattable` objects representing + * @return A map from strings to FunctionValue objects representing * the results of resolving each option value. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ FunctionOptionsMap getOptions() const { - int32_t len; - const ResolvedFunctionOption* resolvedOptions = getResolvedFunctionOptions(len); FunctionOptionsMap result; - for (int32_t i = 0; i < len; i++) { - const ResolvedFunctionOption& opt = resolvedOptions[i]; - result[opt.getName()] = opt.getValue(); + for (int32_t i = 0; i < functionOptionsLen; i++) { + ResolvedFunctionOption& opt = options[i]; + result[opt.getName()] = &opt.getValue(); } return result; } + /** + * Returns a new FunctionOptions object containing all the key-value + * pairs from `this` and `other`. When `this` and `other` define options with + * the same name, `this` takes preference. + * + * @return The result of merging `this` and `other`. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions mergeOptions(const FunctionOptions& other, UErrorCode&) const; /** * Default constructor. * Returns an empty mapping. @@ -565,351 +569,61 @@ class U_I18N_API FunctionOptions : public UObject { */ virtual ~FunctionOptions(); /** - * Move assignment operator: - * The source FunctionOptions will be left in a valid but undefined state. + * Non-member swap function. + * @param f1 will get f2's contents + * @param f2 will get f1's contents * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions& operator=(FunctionOptions&&) noexcept; + friend inline void swap(FunctionOptions& f1, FunctionOptions& f2) noexcept { + using std::swap; + + if (f1.bogus || f2.bogus) { + f1.bogus = f2.bogus = true; + return; + } + swap(f1.options, f2.options); + swap(f1.functionOptionsLen, f2.functionOptionsLen); + } /** - * Move constructor: - * The source FunctionOptions will be left in a valid but undefined state. + * Assignment operator * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions(FunctionOptions&&); + FunctionOptions& operator=(FunctionOptions) noexcept; /** * Copy constructor. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions& operator=(const FunctionOptions&) = delete; + FunctionOptions(const FunctionOptions&); private: - friend class InternalValue; friend class MessageFormatter; friend class StandardFunctions; explicit FunctionOptions(UVector&&, UErrorCode&); const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; - UBool getFunctionOption(std::u16string_view, Formattable&) const; - UBool wasSetFromLiteral(const UnicodeString&) const; + const FunctionValue* getFunctionOption(const std::u16string_view, UErrorCode&) const; // Returns empty string if option doesn't exist - UnicodeString getStringFunctionOption(std::u16string_view) const; + UnicodeString getStringFunctionOption(const std::u16string_view) const; + UBool wasSetFromLiteral(const std::u16string_view) const; + // Sets error code if option doesn't exist + UnicodeString getStringFunctionOption(const std::u16string_view, UErrorCode&) const; int32_t optionsCount() const { return functionOptionsLen; } + bool bogus = false; // Used in case a copy fails // Named options passed to functions // This is not a Hashtable in order to make it possible for code in a public header file // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put // that code in the header because it would have to call internal Hashtable methods. ResolvedFunctionOption* options; int32_t functionOptionsLen = 0; - - /** - * The original FunctionOptions isn't usable after this call. - * @returns A new, merged FunctionOptions. - */ - FunctionOptions mergeOptions(FunctionOptions&& other, UErrorCode&); }; // class FunctionOptions - /** - * A `FormattedValue` represents the result of formatting a `message2::Formattable`. - * It contains either a string or a formatted number. (More types could be added - * in the future.) - * - * `FormattedValue` is immutable and movable. It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedValue : public UObject { - public: - /** - * Formatted string constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(const UnicodeString&); - /** - * Formatted number constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(number::FormattedNumber&&); - /** - * Default constructor. Leaves the FormattedValue in - * a valid but undefined state. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue() : type(kString) {} - /** - * Returns true iff this is a formatted string. - * - * @return True if and only if this value is a formatted string. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isString() const { return type == kString; } - /** - * Returns true iff this is a formatted number. - * - * @return True if and only if this value is a formatted number. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isNumber() const { return type == kNumber; } - /** - * Gets the string contents of this value. If !isString(), then - * the result is undefined. - * @return A reference to a formatted string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getString() const { return stringOutput; } - /** - * Gets the number contents of this value. If !isNumber(), then - * the result is undefined. - * @return A reference to a formatted number. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const number::FormattedNumber& getNumber() const { return numberOutput; } - /** - * Move assignment operator: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue& operator=(FormattedValue&&) noexcept; - /** - * Move constructor: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue(FormattedValue&& other) { *this = std::move(other); } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormattedValue(); - private: - enum Type { - kString, - kNumber - }; - Type type; - UnicodeString stringOutput; - number::FormattedNumber numberOutput; - }; // class FormattedValue - - /** - * A `FormattablePlaceholder` encapsulates an input value (a `message2::Formattable`) - * together with an optional output value (a `message2::FormattedValue`). - * More information, such as source line/column numbers, could be added to the class - * in the future. - * - * `FormattablePlaceholder` is immutable (not deeply immutable) and movable. - * It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedPlaceholder : public UObject { - public: - /** - * Fallback constructor. Constructs a value that represents a formatting error, - * without recording an input `Formattable` as the source. - * - * @param s An error string. (See the MessageFormat specification for details - * on fallback strings.) - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedPlaceholder(const UnicodeString& s) : fallback(s), type(kFallback) {} - /** - * Constructor for fully formatted placeholders. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(FunctionOptions()), type(kEvaluated) {} - /** - * Constructor for fully formatted placeholders with options. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param opts Function options that were used to construct `output`. May be the empty map. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, FunctionOptions&& opts, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(std::move(opts)), type(kEvaluated) {} - /** - * Constructor for unformatted placeholders. - * - * @param input A `Formattable` object. - * @param fb Fallback string to use if an error occurs while formatting the input. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb) - : fallback(fb), source(input), type(kUnevaluated) {} - /** - * Default constructor. Leaves the FormattedPlaceholder in a - * valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder() : type(kNull) {} - /** - * Returns the source `Formattable` value for this placeholder. - * The result is undefined if this is a null operand. - * - * @return A message2::Formattable value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const message2::Formattable& asFormattable() const; - /** - * Returns true iff this is a fallback placeholder. - * - * @return True if and only if this placeholder was constructed from a fallback string, - * with no `Formattable` source or formatting output. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isFallback() const { return type == kFallback; } - /** - * Returns true iff this is a null placeholder. - * - * @return True if and only if this placeholder represents the absent argument to a formatter - * that was invoked without an argument. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isNullOperand() const { return type == kNull; } - /** - * Returns true iff this has formatting output. - * - * @return True if and only if this was constructed from both an input `Formattable` and - * output `FormattedValue`. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isEvaluated() const { return (type == kEvaluated); } - /** - * Returns true iff this represents a valid argument to the formatter. - * - * @return True if and only if this is neither the null argument nor a fallback placeholder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool canFormat() const { return !(isFallback() || isNullOperand()); } - /** - * Gets the fallback value of this placeholder, to be used in its place if an error occurs while - * formatting it. - * @return A reference to this placeholder's fallback string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getFallback() const { return fallback; } - /** - * Returns the options of this placeholder. The result is the empty map if !isEvaluated(). - * @return A reference to an option map, capturing the options that were used - * in producing the output of this `FormattedPlaceholder` - * (or empty if there is no output) - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FunctionOptions& options() const { return previousOptions; } - - /** - * Returns the formatted output of this placeholder. The result is undefined if !isEvaluated(). - * @return A fully formatted `FormattedPlaceholder`. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FormattedValue& output() const { return formatted; } - /** - * Move assignment operator: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder& operator=(FormattedPlaceholder&&) noexcept; - /** - * Move constructor: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(FormattedPlaceholder&& other) { *this = std::move(other); } - /** - * Formats this as a string, using defaults. If this is - * either the null operand or is a fallback value, the return value is the result of formatting the - * fallback value (which is the default fallback string if this is the null operand). - * If there is no formatted output and the input is object- or array-typed, - * then the argument is treated as a fallback value, since there is no default formatter - * for objects or arrays. - * - * @param locale The locale to use for formatting numbers or dates - * @param status Input/output error code - * @return The result of formatting this placeholder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UnicodeString formatToString(const Locale& locale, - UErrorCode& status) const; - - private: - friend class MessageFormatter; - - enum Type { - kFallback, // Represents the result of formatting that encountered an error - kNull, // Represents the absence of both an output and an input (not necessarily an error) - kUnevaluated, // `source` should be valid, but there's no result yet - kEvaluated, // `formatted` exists - }; - UnicodeString fallback; - Formattable source; - FormattedValue formatted; - FunctionOptions previousOptions; // Ignored unless type is kEvaluated - Type type; - }; // class FormattedPlaceholder - /** * Not yet implemented: The result of a message formatting operation. Based on * ICU4J's FormattedMessage.java. diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 37690d5e04a1..c9ed16d1cc43 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -16,6 +16,7 @@ #include "unicode/messageformat2_data_model_names.h" #include "unicode/messageformat2_formattable.h" +#include "unicode/ubidi.h" #ifndef U_HIDE_DEPRECATED_API @@ -30,81 +31,39 @@ namespace message2 { using namespace data_model; - /** - * Interface that factory classes for creating formatters must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormatterFactory : public UObject { - // TODO: the coding guidelines say that interface classes - // shouldn't inherit from UObject, but if I change it so these - // classes don't, and the individual formatter factory classes - // inherit from public FormatterFactory, public UObject, then - // memory leaks ensue - public: - /** - * Constructs a new formatter object. This method is not const; - * formatter factories with local state may be defined. - * - * @param locale Locale to be used by the formatter. - * @param status Input/output error code. - * @return The new Formatter, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Formatter* createFormatter(const Locale& locale, UErrorCode& status) = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormatterFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormatterFactory& operator=(const FormatterFactory&) = delete; - }; // class FormatterFactory + class Function; /** - * Interface that factory classes for creating selectors must implement. + * Used to represent the directionality of a message, where + * the AUTO setting has been resolved based on locale. * - * @internal ICU 75 technology preview + * @internal ICU 78 technology preview * @deprecated This API is for technology preview only. */ - class U_I18N_API SelectorFactory : public UObject { - public: - /** - * Constructs a new selector object. - * - * @param locale Locale to be used by the selector. - * @param status Input/output error code. - * @return The new selector, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Selector* createSelector(const Locale& locale, UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~SelectorFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - SelectorFactory& operator=(const SelectorFactory&) = delete; - }; // class SelectorFactory + typedef enum UMFDirectionality { + /** + * Denotes a left-to-right message. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_DIRECTIONALITY_LTR = 0, + /** + * Denotes a right-to-left message. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_DIRECTIONALITY_RTL, + /** + * Denotes a message with unknown directionality. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_DIRECTIONALITY_UNKNOWN, + U_MF_DIRECTIONALITY_DEFAULT = U_MF_DIRECTIONALITY_UNKNOWN + } UMFDirectionality; /** * Defines mappings from names of formatters and selectors to functions implementing them. @@ -119,38 +78,25 @@ namespace message2 { class U_I18N_API MFFunctionRegistry : public UObject { private: - using FormatterMap = Hashtable; // Map from stringified function names to FormatterFactory* - using SelectorMap = Hashtable; // Map from stringified function names to SelectorFactory* + using FunctionMap = Hashtable; // Map from function names to FunctionFactory* public: /** - * Looks up a formatter factory by the name of the formatter. The result is non-const, - * since formatter factories may have local state. Returns the result by pointer + * Looks up a function by the name of the function. The result is non-const, + * since functions may have local state. Returns the result by pointer * rather than by reference since it can fail. * - * @param formatterName Name of the desired formatter. - * @return A pointer to the `FormatterFactory` registered under `formatterName`, or null - * if no formatter was registered under that name. The pointer is not owned + * @param functionName Name of the desired function. + * @return A pointer to the function registered under `functionName`, or null + * if no function was registered under that name. The pointer is not owned * by the caller. * - * @internal ICU 75 technology preview + * @internal ICU 78 technology preview * @deprecated This API is for technology preview only. */ - FormatterFactory* getFormatter(const FunctionName& formatterName) const; + Function* getFunction(const FunctionName& functionName) const; /** - * Looks up a selector factory by the name of the selector. (This returns the result by pointer - * rather than by reference since `FormatterFactory` is an abstract class.) - * - * @param selectorName Name of the desired selector. - * @return A pointer to the `SelectorFactory` registered under `selectorName`, or null - * if no formatter was registered under that name. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const SelectorFactory* getSelector(const FunctionName& selectorName) const; - /** - * Looks up a formatter factory by a type tag. This method gets the name of the default formatter registered + * Looks up a function by a type tag. This method gets the name of the default formatter registered * for that type. If no formatter was explicitly registered for this type, it returns false. * * @param formatterType Type tag for the desired `FormattableObject` type to be formatted. @@ -176,9 +122,9 @@ namespace message2 { class U_I18N_API Builder : public UObject { private: // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters; - SelectorMap* selectors; - Hashtable* formattersByType; + FunctionMap* functions; + // Mapping from strings (type tags) to FunctionNames + Hashtable* formattersByType = nullptr; // Do not define copy constructor/assignment operator Builder& operator=(const Builder&) = delete; @@ -202,18 +148,20 @@ namespace message2 { be re-thought. */ /** - * Registers a formatter factory to a given formatter name. + * Registers a function to a given name. * - * @param formatterName Name of the formatter being registered. - * @param formatterFactory A pointer to a FormatterFactory object to use - * for creating `formatterName` formatters. This argument is adopted. + * @param functionName Name of the formatter being registered. + * @param function A pointer to a Function object. + * This argument is adopted. * @param errorCode Input/output error code * @return A reference to the builder. * - * @internal ICU 75 technology preview + * @internal ICU 78 technology preview * @deprecated This API is for technology preview only. */ - Builder& adoptFormatter(const data_model::FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode); + Builder& adoptFunction(const data_model::FunctionName& functionName, + Function* function, + UErrorCode& errorCode); /** * Registers a formatter factory to a given type tag. * (See `FormattableObject` for details on type tags.) @@ -227,21 +175,9 @@ namespace message2 { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - Builder& setDefaultFormatterNameByType(const UnicodeString& type, const data_model::FunctionName& functionName, UErrorCode& errorCode); - - /** - * Registers a selector factory to a given selector name. Adopts `selectorFactory`. - * - * @param selectorName Name of the selector being registered. - * @param selectorFactory A SelectorFactory object to use for creating `selectorName` - * selectors. - * @param errorCode Input/output error code - * @return A reference to the builder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder& adoptSelector(const data_model::FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode); + Builder& setDefaultFormatterNameByType(const UnicodeString& type, + const data_model::FunctionName& functionName, + UErrorCode& errorCode); /** * Creates an immutable `MFFunctionRegistry` object with the selectors and formatters * that were previously registered. The builder cannot be used after this call. @@ -307,112 +243,395 @@ namespace message2 { MFFunctionRegistry& operator=(const MFFunctionRegistry&) = delete; MFFunctionRegistry(const MFFunctionRegistry&) = delete; - MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType); + MFFunctionRegistry(FunctionMap*, Hashtable*); MFFunctionRegistry() {} // Debugging; should only be called on a function registry with // all the standard functions registered - void checkFormatter(const char*) const; - void checkSelector(const char*) const; + void checkFunction(const char*) const; void checkStandard() const; - bool hasFormatter(const data_model::FunctionName& f) const; - bool hasSelector(const data_model::FunctionName& s) const; + bool hasFunction(const data_model::FunctionName& f) const; void cleanup() noexcept; // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters = nullptr; - SelectorMap* selectors = nullptr; + FunctionMap* functions = nullptr; // Mapping from strings (type tags) to FunctionNames Hashtable* formattersByType = nullptr; }; // class MFFunctionRegistry - /** - * Interface that formatter classes must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API Formatter : public UObject { - public: /** - * Formats the input passed in `context` by setting an output using one of the - * `FormattingContext` methods or indicating an error. + * Used to denote the directionality of the input to a function. * - * @param toFormat Placeholder, including a source formattable value and possibly - * the output of a previous formatter applied to it; see - * `message2::FormattedPlaceholder` for details. Passed by move. - * @param options The named function options. Passed by move - * @param status Input/output error code. Should not be set directly by the - * custom formatter, which should use `FormattingContext::setFormattingWarning()` - * to signal errors. The custom formatter may pass `status` to other ICU functions - * that can signal errors using this mechanism. + * See https://github.com/unicode-org/message-format-wg/blob/main/spec/u-namespace.md#udir * - * @return The formatted value. - * - * @internal ICU 75 technology preview + * @internal ICU 78 technology preview * @deprecated This API is for technology preview only. */ - virtual FormattedPlaceholder format(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Formatter(); - }; // class Formatter + typedef enum UMFBidiOption { + /** + * Left-to-right directionality. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_OPTION_LTR = 0, + /** + * Right-to-left directionality. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_OPTION_RTL, + /** + * Directionality determined from expression contents. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_OPTION_AUTO, + /** + * Directionality inherited from the message without + * requiring isolation of the expression value. + * (Default when no u:dir option is present.) + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_MF_BIDI_OPTION_INHERIT + } UMFBidiOption; /** - * Interface that selector classes must implement. + * Class implementing data from contextual options. + * See https://github.com/unicode-org/message-format-wg/pull/846 * - * @internal ICU 75 technology preview + * @internal ICU 78 technology preview * @deprecated This API is for technology preview only. */ - class U_I18N_API Selector : public UObject { - public: - /** - * Compares the input to an array of keys, and returns an array of matching - * keys sorted by preference. - * - * @param toFormat The unnamed function argument; passed by move. - * @param options A reference to the named function options. - * @param keys An array of strings that are compared to the input - * (`context.getFormattableInput()`) in an implementation-specific way. - * @param keysLen The length of `keys`. - * @param prefs An array of strings with length `keysLen`. The contents of - * the array is undefined. `selectKey()` should set the contents - * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. - * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, - * which must be less than or equal to `keysLen`. - * @param status Input/output error code. Should not be set directly by the - * custom selector, which should use `FormattingContext::setSelectorError()` - * to signal errors. The custom selector may pass `status` to other ICU functions - * that can signal errors using this mechanism. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual void selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const = 0; - // Note: This takes array arguments because the internal MessageFormat code has to - // call this method, and can't include any code that constructs std::vectors. - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Selector(); - }; // class Selector + class U_I18N_API_CLASS FunctionContext : public UObject { + public: + /** + * Returns the locale from this context. + * + * @return Locale the context was created with. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API const Locale& getLocale() const { return locale; } + /** + * Returns the text direction from this context. + * + * @return A UMFBidiOption indicating the text direction. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API UMFBidiOption getDirection() const { return dir; } + /** + * Returns the ID from this context. + * + * @return A string to be used in formatting to parts. + * (Formatting to parts is not yet implemented.) + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API const UnicodeString& getID() const { return id; } + private: + friend class MessageFormatter; + + Locale locale; + UMFBidiOption dir; + UnicodeString id; + + FunctionContext(const Locale& loc, UMFBidiOption d, UnicodeString i) + : locale(loc), dir(d), id(i) {} + }; // class FunctionContext + + class FunctionValue; + + /** + * Interface that function handler classes must implement. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Function : public UObject { + public: + /** + * Calls this Function on a FunctionValue operand and its FunctionOptions options, + * returning a LocalPointer to a FunctionValue. + * + * @param context The context of this function, based on its contextual options + * @param operand The unnamed argument to the function. + * @param options Resolved options for this function. + * @param status Input/output error code + * @return The function value that is the result of calling this function on + * the arguments. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + virtual LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& status) = 0; + /** + * Destructor. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Function(); + }; // class Function + + /** + * Type representing argument and return values for custom functions. + * It encapsulates an operand and resolved options, and can be extended with + * additional state. + * Adding a new custom function requires adding a new class that + * implements this interface. + * + * FunctionValues are assumed to be immutable (the call() method on + * Function takes a const FunctionValue&, and the formatToString() + * and selectKeys() methods are const.) Feedback on whether internal + * mutable state in classes implementing FunctionValue is welcomed + * during the Technology Preview period. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API_CLASS FunctionValue : public UObject { + public: + /** + * Returns the string representation of this value. The default + * method signals an error. Must be overridden by classes + * implementing values that support formatting. + * + * @param status Input/output error code + * @return A string. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual UnicodeString formatToString(UErrorCode& status) const { + if (U_SUCCESS(status)) { + status = U_MF_FORMATTING_ERROR; + } + return {}; + } + /** + * Returns the Formattable operand that was used to construct + * this value. The operand may be obtained from calling getOperand() + * on the input FunctionValue, or it may be constructed separately. + * + * @return A reference to a message2::Formattable object. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual const Formattable& unwrap() const { return innerValue; } + /** + * Returns a reference to the resolved options for this value. + * + * @return A reference to the resolved options for this value. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual const FunctionOptions& getResolvedOptions() const { return opts; } + /** + * Returns the directionality of this value, i.e. the directionality + * that its formatted result should have. + * + * @return A UBiDiDirection indicating the directionality that + * the formatted result of this value should have. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual UMFDirectionality getDirection() const { return dir; } + /** + * Returns the directionality that this value was annotated with. + * + * This is distinct from the directionality of the formatted text. + * See the description of the "Default Bidi Strategy", + * https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text + * for further context. + * + * @return A UMFBidiOption indicating the directionality that + * this value was annotated with. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual UMFBidiOption getDirectionAnnotation() const { return inputDir; } + /** + * Returns true if this value supports selection. The default method + * returns false. The method must be overridden for values that support + * selection. + * + * @return True iff this value supports selection. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual UBool isSelectable() const { + // In the future, this function could return a capability + // indicating whether this function can format, select, or both. + return false; + } + /** + * Returns true if this value represents a null operand, that is, + * the absence of an argument. This method should not be overridden. + * It can be called in order to check whether the argument is present. + * Some functions may be nullary (they may work with no arguments). + * + * @return True iff this value represents an absent operand. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual UBool isNullOperand() const { return false; } + /** + * Compares this value to an array of keys, and returns an array of matching + * keys sorted by preference. The default implementation of this method + * signals an error. It should be overridden for value classes that support + * selection. + * + * @param keys An array of strings to compare to the input. + * @param keysLen The length of `keys`. + * @param prefs An array of indices into `keys`. + * The initial contents of + * the array is undefined. `selectKey()` should set the contents + * of `prefs` to a subset of the indices in `keys`, + * with the best match placed at the lowest index in `prefs`. + * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, + * which must be less than or equal to `keysLen`. + * @param status Input/output error code. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const { + (void) keys; + (void) keysLen; + (void) prefs; + (void) prefsLen; + if (U_SUCCESS(status)) { + status = U_MF_SELECTOR_ERROR; + } + } + /** + * Returns the name of the function that constructed this value. + * + * @returns A string representing a function name. The string does + * not include a leading ':'. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual const UnicodeString& getFunctionName() const { return functionName; } + /** + * Returns a fallback string that can be used as output + * if processing this function results in an error. + * * + * @returns A string determined by the creator of this FunctionValue. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual const UnicodeString& getFallback() const { return fallback; } + /** + * Destructor. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + U_I18N_API virtual ~FunctionValue(); + protected: + /** + * Computed result of the function invocation that + * returned this FunctionValue. This may simply be the + * operand, or may be a value computed from the operand. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + Formattable innerValue; + /** + * Resolved options attached to this value. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions opts; + /** + * The name of the function that constructed this FunctionValue. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + UnicodeString functionName; + /** + * Fallback string that can be used if a later function encounters + * an error when processing this FunctionValue. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + UnicodeString fallback; + /** + * Locale from u:locale option. + * Must be set from function context. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + Locale locale; + /** + * Directionality of formatted result. + * Defaults to U_MF_DIRECTIONALITY_UNKNOWN if not set + * by the subclass's constructor. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + UMFDirectionality dir = U_MF_DIRECTIONALITY_UNKNOWN; + /** + * Input directionality from u:dir option. + * Defaults to U_MF_BIDI_OPTION_INHERIT if not set + * by the subclass's constructor. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + UMFBidiOption inputDir = U_MF_BIDI_OPTION_INHERIT; + private: + friend class FunctionOptions; + + // Should only be overridden by BaseValue + /** + * Returns true iff this FunctionValue was created directly or indirectly + * from a literal. + * This method should not be overridden. It is overridden by an internal class + * in the message formatter. + * It is used to implement the MessageFormat specification for the `select` + * option of `:number` and `:integer`. + * + * @returns A boolean. + * + * @internal ICU 78 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool wasCreatedFromLiteral() const { return false; } + }; // class FunctionValue } // namespace message2 diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 609ccebd0004..e5c77bbade2b 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -24,6 +24,7 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec, TESTCASE_AUTO(testAPISimple); TESTCASE_AUTO(testDataModelAPI); TESTCASE_AUTO(testFormatterAPI); + TESTCASE_AUTO(testBidiAPI); TESTCASE_AUTO(testHighLoneSurrogate); TESTCASE_AUTO(testLowLoneSurrogate); TESTCASE_AUTO(testLoneSurrogateInQuotedLiteral); @@ -132,6 +133,86 @@ void TestMessageFormat2::testFormatterAPI() { result, "hello"); } +void TestMessageFormat2::testBidiAPI() { + + IcuTestErrorCode errorCode(*this, "testBidiAPI"); + UParseError parseError; + UnicodeString result; + + UnicodeString pattern = u"{{{1 :number u:dir=ltr}{2 :number u:dir=rtl}{3 :number}}}"; + MessageFormatter::Builder mfBuilder(errorCode); + mfBuilder.setPattern(pattern, parseError, errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success setting pattern"); + + // Bidi off, directionality LTR => no controls + mfBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_LTR); + MessageFormatter mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi off and LTR directionality"); + assertEquals("testBidiAPI: bidi off and LTR directionality", u"123", result); + + // Bidi off, directionality RTL => no controls + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_RTL); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi off and RTL directionality"); + assertEquals("testBidiAPI: bidi off and RTL directionality", u"123", result); + + // Bidi off, directionality auto => no controls + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_AUTO); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi off and auto directionality"); + assertEquals("testBidiAPI: bidi off and auto directionality", u"123", result); + + // Bidi auto, directionality LTR, style CONTROL => controls + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_LTR); + mfBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_AUTO); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto and LTR directionality"); + assertEquals("testBidiAPI: bidi auto and LTR directionality", u"\u20661\u2069\u20672\u20693", result); + + // Bidi auto, directionality LTR, style HTML => html + mfBuilder.setBidiIsolationStyle(MessageFormatter::U_MF_BIDI_STYLE_HTML); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto, LTR directionality and style HTML"); + assertEquals("testBidiAPI: bidi auto, LTR directionality and style HTML", u"123", result); + + // Bidi auto, directionality RTL => controls + mfBuilder.setBidiIsolationStyle(MessageFormatter::U_MF_BIDI_STYLE_CONTROL); + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_RTL); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto, RTL directionality"); + assertEquals("testBidiAPI: bidi auto, RTL directionality", u"\u20661\u2069\u20672\u2069\u20663\u2069", result); + + // Bidi auto, directionality auto, RTL locale + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_AUTO); + mfBuilder.setLocale(Locale("ar")); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto, auto directionality, RTL locale"); + assertEquals("testBidiAPI: bidi auto, auto directionality, RTL locale", u"\u20661\u2069\u20672\u2069\u20673\u2069", result); + + // Bidi auto, directionality LTR => controls + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_LTR); + mfBuilder.setLocale(Locale("en-US")); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto, LTR directionality"); + assertEquals("testBidiAPI: bidi auto, LTR directionality", u"\u20661\u2069\u20672\u20693", result); + + // Bidi auto, directionality auto, LTR locale -- same as above + mfBuilder.setBidiContext(MessageFormatter::U_MF_BIDI_CONTEXT_AUTO); + mf = mfBuilder.build(errorCode); + result = mf.formatToString(MessageArguments(), errorCode); + errorCode.errIfFailureAndReset("testBidiAPI: expected success from builder, bidi auto, auto directionality, LTR locale"); + assertEquals("testBidiAPI: bidi auto, auto directionality, LTR locale", u"\u20661\u2069\u20672\u20693", result); +} + // Example for design doc -- version without null and error checks void TestMessageFormat2::testAPISimple() { IcuTestErrorCode errorCode1(*this, "testAPI"); @@ -144,6 +225,8 @@ void TestMessageFormat2::testAPISimple() { // To be used in the test suite, it should include those checks // Null checks and error checks elided MessageFormatter::Builder builder(errorCode); + builder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + MessageFormatter mf = builder.setPattern(u"Hello, {$userName}!", parseError, errorCode) .build(errorCode); @@ -202,6 +285,7 @@ void TestMessageFormat2::testAPI() { .setArgument("userName", "John") .setExpected("Hello, John!") .setLocale("en_US") + .setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF) .build()); TestUtils::runTestCase(*this, test, errorCode); @@ -274,7 +358,7 @@ void TestMessageFormat2::testAPICustomFunctions() { // Set up custom function registry MFFunctionRegistry::Builder builder(errorCode); MFFunctionRegistry functionRegistry = - builder.adoptFormatter(data_model::FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + builder.adoptFunction(data_model::FunctionName("person"), new PersonNameFunction(), errorCode) .build(); Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); @@ -290,6 +374,7 @@ void TestMessageFormat2::testAPICustomFunctions() { .setPattern("Hello {$name :person formality=informal}", parseError, errorCode) .setLocale(locale) + .setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF) .build(errorCode); result = mf.formatToString(arguments, errorCode); assertEquals("testAPICustomFunctions", U_MF_UNKNOWN_FUNCTION_ERROR, errorCode); @@ -314,13 +399,13 @@ void TestMessageFormat2::testAPICustomFunctions() { // By type MFFunctionRegistry::Builder builderByType(errorCode); - FunctionName personFormatterName("person"); + FunctionName personFunctionName("person"); MFFunctionRegistry functionRegistryByType = - builderByType.adoptFormatter(personFormatterName, - new PersonNameFormatterFactory(), - errorCode) + builderByType.adoptFunction(personFunctionName, + new PersonNameFunction(), + errorCode) .setDefaultFormatterNameByType("person", - personFormatterName, + personFunctionName, errorCode) .build(); mfBuilder.setFunctionRegistry(functionRegistryByType); @@ -332,9 +417,12 @@ void TestMessageFormat2::testAPICustomFunctions() { // Expect "Hello John" because in the custom function we registered, // "informal" is the default formality and "length" is the default length assertEquals("testAPICustomFunctions", "Hello John", result); + delete person; } +PersonNameFunction::~PersonNameFunction() {} + // ICU-22890 lone surrogate cause infinity loop void TestMessageFormat2::testHighLoneSurrogate() { IcuTestErrorCode errorCode(*this, "testHighLoneSurrogate"); @@ -375,6 +463,7 @@ void TestMessageFormat2::testLoneSurrogateInQuotedLiteral() { UnicodeString expectedResult({0xdc02, 0}); icu::message2::MessageFormatter msgfmt2 = icu::message2::MessageFormatter::Builder(errorCode) + .setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF) .setPattern(literal, pe, errorCode) .build(errorCode); UnicodeString result = msgfmt2.formatToString({}, errorCode); diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 9fd5d253831d..69ca49345f5f 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -49,6 +49,8 @@ class TestMessageFormat2: public IntlTest { void testDataModelAPI(void); // Test the formatting API void testFormatterAPI(void); + // Test API functions for setting bidi preferences + void testBidiAPI(void); void testAPI(void); void testAPISimple(void); @@ -64,6 +66,8 @@ class TestMessageFormat2: public IntlTest { void testGrammarCasesFormatter(IcuTestErrorCode&); void testListFormatter(IcuTestErrorCode&); void testMessageRefFormatter(IcuTestErrorCode&); + void testComplexOptions(IcuTestErrorCode&); + void testSingleEvaluation(IcuTestErrorCode&); // Feature tests void testEmptyMessage(message2::TestCase::Builder&, IcuTestErrorCode&); @@ -101,11 +105,6 @@ U_NAMESPACE_BEGIN namespace message2 { // Custom function classes -class PersonNameFormatterFactory : public FormatterFactory { - - public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; class Person : public FormattableObject { public: @@ -119,11 +118,28 @@ class Person : public FormattableObject { const UnicodeString tagName; }; -class PersonNameFormatter : public Formatter { +class PersonNameFunction : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + LocalPointer call(const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) override; + virtual ~PersonNameFunction(); + PersonNameFunction() {} }; +class PersonNameValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + PersonNameValue(); + virtual ~PersonNameValue(); + private: + friend class PersonNameFunction; + + UnicodeString formattedString; + PersonNameValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); +}; // class PersonNameValue + class FormattableProperties : public FormattableObject { public: const UnicodeString& tag() const override { return tagName; } @@ -136,53 +152,139 @@ class FormattableProperties : public FormattableObject { const UnicodeString tagName; }; -class GrammarCasesFormatterFactory : public FormatterFactory { +class GrammarCasesFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); }; -class GrammarCasesFormatter : public Formatter { +class GrammarCasesValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + GrammarCasesValue(); + virtual ~GrammarCasesValue(); private: + friend class GrammarCasesFunction; + + UnicodeString formattedString; + GrammarCasesValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; -}; +}; // class GrammarCasesValue -class ListFormatterFactory : public FormatterFactory { +class ListFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); + ListFunction() {} + virtual ~ListFunction(); }; -class ListFormatter : public Formatter { +class ListValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + virtual ~ListValue(); private: - friend class ListFormatterFactory; - const Locale& locale; - ListFormatter(const Locale& loc) : locale(loc) {} -}; + friend class ListFunction; + + UnicodeString formattedString; + ListValue(const Locale&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ListValue -class ResourceManagerFactory : public FormatterFactory { +class NounValue : public FunctionValue { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; + UnicodeString formatToString(UErrorCode&) const override; + NounValue(); + virtual ~NounValue(); + private: + friend class NounFunction; + + UnicodeString formattedString; + NounValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class NounValue + +class AdjectiveValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + AdjectiveValue(); + virtual ~AdjectiveValue(); + private: + friend class AdjectiveFunction; + + UnicodeString formattedString; + AdjectiveValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class AdjectiveValue -class ResourceManager : public Formatter { + +class ResourceManager : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); static Hashtable* properties(UErrorCode&); static UnicodeString propertiesAsString(const Hashtable&); static Hashtable* parseProperties(const UnicodeString&, UErrorCode&); + ResourceManager() {} + virtual ~ResourceManager(); +}; +class ResourceManagerValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + ResourceManagerValue(); + virtual ~ResourceManagerValue(); private: - friend class ResourceManagerFactory; - ResourceManager(const Locale& loc) : locale(loc) {} - const Locale& locale; + friend class ResourceManager; + + UnicodeString formattedString; + ResourceManagerValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ResourceManagerValue + +class NounFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + NounFunction() { } + virtual ~NounFunction(); }; +class AdjectiveFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + AdjectiveFunction() { } + virtual ~AdjectiveFunction(); +}; + +class CounterFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + CounterFunction() { } + virtual ~CounterFunction(); + private: + int32_t count = 0; // Number of times the function was called +}; + +class CounterFunctionValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + CounterFunctionValue(); + virtual ~CounterFunctionValue(); + private: + friend class CounterFunction; + int32_t& count; + + CounterFunctionValue(int32_t&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ResourceManagerValue + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index da3d38eaa09d..b81c61992ad3 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -12,6 +12,7 @@ #include "plurrule_impl.h" #include "unicode/listformatter.h" +#include "unicode/numberformatter.h" #include "messageformat2test.h" #include "hash.h" #include "intltest.h" @@ -35,13 +36,16 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString name = "name"; LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); TestCase::Builder testBuilder; testBuilder.setName("testPersonFormatter"); testBuilder.setLocale(Locale("en")); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); TestCase test = testBuilder.setPattern("Hello {$name :person formality=formal}") .setArgument(name, person.getAlias()) @@ -92,14 +96,18 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { .setExpected("Hello Mr. Doe") .setExpectSuccess() .build(); + TestUtils::runTestCase(*this, test, errorCode); + } void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString host = "host"; UnicodeString hostGender = "hostGender"; @@ -139,6 +147,7 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err testBuilder.setLocale(Locale("en")); testBuilder.setPattern(message); testBuilder.setFunctionRegistry(&customRegistry); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); TestCase test = testBuilder.setArgument(host, jane.getAlias()) .setArgument(hostGender, "female") @@ -186,6 +195,52 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err TestUtils::runTestCase(*this, test, errorCode); } +void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFunction(FunctionName("noun"), + new NounFunction(), + errorCode) + .adoptFunction(FunctionName("adjective"), + new AdjectiveFunction(), + errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testComplexOptions"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + + // Test that options can be values with their own resolved + // options attached + TestCase test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setExpected("red balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + + // Test that the same noun can be used multiple times + test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + .local $sizeMatchingGrammaticalNumberGenderCase = {$size :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}, {$sizeMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setArgument(UnicodeString("size"), UnicodeString("huge")) + .setExpected("red balloon (accusative, singular adjective), \ +huge balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + +} + void TestMessageFormat2::testCustomFunctions() { IcuTestErrorCode errorCode(*this, "testCustomFunctions"); @@ -194,50 +249,92 @@ void TestMessageFormat2::testCustomFunctions() { testGrammarCasesFormatter(errorCode); testListFormatter(errorCode); testMessageRefFormatter(errorCode); + testComplexOptions(errorCode); + testSingleEvaluation(errorCode); } // -------------- Custom function implementations -Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; +static UnicodeString getStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k) { + if (opt.count(k) == 0) { + return {}; + } + UErrorCode localErrorCode = U_ZERO_ERROR; + const message2::FunctionValue* optVal = opt.at(k); + if (optVal == nullptr) { + return {}; + } + const UnicodeString& formatted = optVal->formatToString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return formatted; + } + const UnicodeString& original = optVal->unwrap().getString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return original; } + return {}; +} + +static bool hasStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k, const UnicodeString& v) { + return getStringOption(opt, k) == v; +} - // Locale not used - (void) locale; +LocalPointer PersonNameFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + (void) context; - Formatter* result = new PersonNameFormatter(); - if (result == nullptr) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer v(new PersonNameValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; } -message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +UnicodeString PersonNameValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} + +PersonNameValue::PersonNameValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } + innerValue = arg.unwrap(); + opts = options; - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("not a person"); - - if (!arg.canFormat() || arg.asFormattable().getType() != UFMT_OBJECT) { - return errorVal; + const Formattable* toFormat = &innerValue; + if (U_FAILURE(errorCode)) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - const Formattable& toFormat = arg.asFormattable(); - FunctionOptionsMap opt = options.getOptions(); - bool hasFormality = opt.count("formality") > 0 && opt["formality"].getType() == UFMT_STRING; - bool hasLength = opt.count("length") > 0 && opt["length"].getType() == UFMT_STRING; + FunctionOptionsMap opt = opts.getOptions(); - bool useFormal = hasFormality && opt["formality"].getString(errorCode) == "formal"; - UnicodeString length = hasLength ? opt["length"].getString(errorCode) : "short"; + bool useFormal = hasStringOption(opt, "formality", "formal"); + UnicodeString length = getStringOption(opt, "length"); + if (length.length() == 0) { + length = "short"; + } - const FormattableObject* fp = toFormat.getObject(errorCode); - U_ASSERT(U_SUCCESS(errorCode)); + const FormattableObject* fp = toFormat->getObject(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } if (fp == nullptr || fp->tag() != u"person") { - return errorVal; + errorCode = U_MF_FORMATTING_ERROR; + return; } const Person* p = static_cast(fp); @@ -245,58 +342,41 @@ message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder& UnicodeString firstName = p->firstName; UnicodeString lastName = p->lastName; - UnicodeString result; if (length == "long") { - result += title; - result += " "; - result += firstName; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else if (length == "medium") { if (useFormal) { - result += firstName; - result += " "; - result += lastName; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else { - result += title; - result += " "; - result += firstName; + formattedString += title; + formattedString += " "; + formattedString += firstName; } } else if (useFormal) { // Default to "short" length - result += title; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += lastName; } else { - result += firstName; + formattedString += firstName; } - - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); } FormattableProperties::~FormattableProperties() {} Person::~Person() {} +PersonNameValue::~PersonNameValue() {} /* See ICU4J: CustomFormatterGrammarCaseTest.java */ -Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - // Locale not used - (void) locale; - Formatter* result = new GrammarCasesFormatter(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - - -/* static */ void GrammarCasesFormatter::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { +/* static */ void GrammarCasesValue::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { UnicodeString postfix; if (value.endsWith("ana")) { value.extract(0, value.length() - 3, postfix); @@ -320,49 +400,78 @@ Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, U result += postfix; } -message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +LocalPointer +GrammarCasesFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + (void) context; + if (U_FAILURE(errorCode)) { - return {}; + return LocalPointer(); } - // Argument must be present - if (!arg.canFormat()) { - errorCode = U_MF_FORMATTING_ERROR; - return message2::FormattedPlaceholder("grammarBB"); + LocalPointer v(new GrammarCasesValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } + return v; +} - // Assumes the argument is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); - UnicodeString result; +UnicodeString GrammarCasesValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} - FunctionOptionsMap opt = options.getOptions(); - switch (toFormat.getType()) { +GrammarCasesValue::GrammarCasesValue(const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + innerValue = val.unwrap(); + // Tests don't cover composition, so no need to merge options + const Formattable* toFormat = &innerValue; + + UnicodeString result; + const FunctionOptionsMap opt = opts.getOptions(); + switch (toFormat->getType()) { case UFMT_STRING: { - const UnicodeString& in = toFormat.getString(errorCode); + const UnicodeString& in = toFormat->getString(errorCode); bool hasCase = opt.count("case") > 0; - bool caseIsString = opt["case"].getType() == UFMT_STRING; - if (hasCase && caseIsString && (opt["case"].getString(errorCode) == "dative" || opt["case"].getString(errorCode) == "genitive")) { - getDativeAndGenitive(in, result); - } else { - result += in; + const Formattable& caseAsFormattable = opt.at("case")->unwrap(); + if (U_FAILURE(errorCode)) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + bool caseIsString = caseAsFormattable.getType() == UFMT_STRING; + if (hasCase && caseIsString) { + const UnicodeString& caseOpt = caseAsFormattable.getString(errorCode); + if (caseOpt == "dative" || caseOpt == "genitive") { + getDativeAndGenitive(in, result); + } + else { + result += in; + } } U_ASSERT(U_SUCCESS(errorCode)); break; } default: { - result += toFormat.getString(errorCode); + result += toFormat->getString(errorCode); break; } } - return message2::FormattedPlaceholder(arg, FormattedValue(std::move(result))); + formattedString = result; } void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("grammarBB"), new GrammarCasesFormatterFactory(), errorCode) + .adoptFunction(FunctionName("grammarBB"), new GrammarCasesFunction(), errorCode) .build(); TestCase::Builder testBuilder; @@ -370,6 +479,8 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) testBuilder.setFunctionRegistry(&customRegistry); testBuilder.setLocale(Locale("ro")); testBuilder.setPattern("Cartea {$owner :grammarBB case=genitive}"); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + TestCase test = testBuilder.setArgument("owner", "Maria") .setExpected("Cartea Mariei") .build(); @@ -414,94 +525,103 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) TestUtils::runTestCase(*this, test, errorCode); } +GrammarCasesValue::~GrammarCasesValue() {} + /* See ICU4J: CustomFormatterListTest.java */ -Formatter* ListFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + +LocalPointer +ListFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return nullptr; + return LocalPointer(); } - Formatter* result = new ListFormatter(locale); - if (result == nullptr) { + LocalPointer + v(new ListValue(context.getLocale(), arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; } -message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +UnicodeString ListValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; +} + +message2::ListValue::ListValue(const Locale& locale, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - message2::FormattedPlaceholder errorVal = FormattedPlaceholder("listformat"); + innerValue = val.unwrap(); + // Tests don't cover composition, so no need to merge options - // Argument must be present - if (!arg.canFormat()) { - errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + const Formattable* toFormat = &innerValue; + if (U_FAILURE(errorCode)) { + // Must have an argument + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); - FunctionOptionsMap opt = options.getOptions(); - bool hasType = opt.count("type") > 0 && opt["type"].getType() == UFMT_STRING; + FunctionOptionsMap opt = opts.getOptions(); UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND; - if (hasType) { - if (opt["type"].getString(errorCode) == "OR") { - type = UListFormatterType::ULISTFMT_TYPE_OR; - } else if (opt["type"].getString(errorCode) == "UNITS") { - type = UListFormatterType::ULISTFMT_TYPE_UNITS; - } + if (hasStringOption(opt, "type", "OR")) { + type = UListFormatterType::ULISTFMT_TYPE_OR; + } else if (hasStringOption(opt, "type", "UNITS")) { + type = UListFormatterType::ULISTFMT_TYPE_UNITS; } - bool hasWidth = opt.count("width") > 0 && opt["width"].getType() == UFMT_STRING; UListFormatterWidth width = UListFormatterWidth::ULISTFMT_WIDTH_WIDE; - if (hasWidth) { - if (opt["width"].getString(errorCode) == "SHORT") { - width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; - } else if (opt["width"].getString(errorCode) == "NARROW") { - width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; - } + if (hasStringOption(opt, "width", "SHORT")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; + } else if (hasStringOption(opt, "width", "NARROW")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; } - U_ASSERT(U_SUCCESS(errorCode)); LocalPointer lf(icu::ListFormatter::createInstance(locale, type, width, errorCode)); if (U_FAILURE(errorCode)) { - return {}; + return; } - UnicodeString result; - - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_ARRAY: { int32_t n_items; - const Formattable* objs = toFormat.getArray(n_items, errorCode); + const Formattable* objs = toFormat->getArray(n_items, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } UnicodeString* parts = new UnicodeString[n_items]; if (parts == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } for (int32_t i = 0; i < n_items; i++) { parts[i] = objs[i].getString(errorCode); } U_ASSERT(U_SUCCESS(errorCode)); - lf->format(parts, n_items, result, errorCode); + lf->format(parts, n_items, formattedString, errorCode); delete[] parts; break; } default: { - result += toFormat.getString(errorCode); + formattedString += toFormat->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); break; } } - - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); } +ListValue::~ListValue() {} +ListFunction::~ListFunction() {} + void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; @@ -515,12 +635,14 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { TestCase::Builder testBuilder; MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("listformat"), new ListFormatterFactory(), errorCode) + .adoptFunction(FunctionName("listformat"), new ListFunction(), errorCode) .build(); CHECK_ERROR(errorCode); + testBuilder.setLocale(Locale("en")); testBuilder.setFunctionRegistry(®); testBuilder.setArgument("languages", progLanguages, 3); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); TestCase test = testBuilder.setName("testListFormatter") .setPattern("I know {$languages :listformat type=AND}!") @@ -572,66 +694,84 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { return nullptr; } -Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { +using Arguments = MessageArguments; + +static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + std::map result; + for (auto iter = opts.cbegin(); iter != opts.cend(); ++iter) { + result[iter->first] = iter->second->unwrap(); + } + return MessageArguments(result, status); +} + +LocalPointer +ResourceManager::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return nullptr; + return LocalPointer(); } - Formatter* result = new ResourceManager(locale); - if (result == nullptr) { + LocalPointer + result(new ResourceManagerValue(arg, std::move(options), errorCode)); + + if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -using Arguments = MessageArguments; - -static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - return MessageArguments(opts, status); +UnicodeString message2::ResourceManagerValue::formatToString(UErrorCode&) const { + return formattedString; } -message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +message2::ResourceManagerValue::ResourceManagerValue(const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("msgref"); + innerValue = arg.unwrap(); + // Tests don't cover composition, so no need to merge options - // Argument must be present - if (!arg.canFormat()) { + const Formattable* toFormat = &innerValue; + // Check for null or fallback + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } - - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); UnicodeString in; - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_STRING: { - in = toFormat.getString(errorCode); + in = toFormat->getString(errorCode); break; } default: { // Ignore non-strings - return errorVal; + return; } } - FunctionOptionsMap opt = options.getOptions(); - bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getType() == UFMT_OBJECT && opt["resbundle"].getObject(errorCode)->tag() == u"properties"; + FunctionOptionsMap opt = opts.getOptions(); + bool hasProperties = opt.count("resbundle") > 0 + && opt["resbundle"]->unwrap().getType() == UFMT_OBJECT + && opt["resbundle"]->unwrap().getObject(errorCode)->tag() == u"properties"; // If properties were provided, look up the given string in the properties, // yielding a message if (hasProperties) { - const FormattableProperties* properties = reinterpret_cast(opt["resbundle"].getObject(errorCode)); + const FormattableProperties* properties = reinterpret_cast + (opt["resbundle"]->unwrap().getObject(errorCode)); U_ASSERT(U_SUCCESS(errorCode)); UnicodeString* msg = static_cast(properties->properties->get(in)); if (msg == nullptr) { // No message given for this key -- error out errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } MessageFormatter::Builder mfBuilder(errorCode); UParseError parseErr; @@ -639,7 +779,7 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar MessageFormatter mf = mfBuilder.setPattern(*msg, parseErr, errorCode).build(errorCode); Arguments arguments = localToGlobal(opt, errorCode); if (U_FAILURE(errorCode)) { - return errorVal; + return; } UErrorCode savedStatus = errorCode; @@ -650,14 +790,16 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar if (U_FAILURE(errorCode)) { errorCode = savedStatus; } - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); + formattedString = result; } else { // Properties must be provided errorCode = U_MF_FORMATTING_ERROR; } - return errorVal; + return; } +ResourceManager::~ResourceManager() {} +ResourceManagerValue::~ResourceManagerValue() {} void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -670,7 +812,7 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { return; } MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("msgRef"), new ResourceManagerFactory(), errorCode) + .adoptFunction(FunctionName("msgRef"), new ResourceManager(), errorCode) .build(); CHECK_ERROR(errorCode); @@ -679,6 +821,7 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { testBuilder.setFunctionRegistry(®); testBuilder.setPattern(*static_cast(properties->get("firefox"))); testBuilder.setName("message-ref"); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); TestCase test = testBuilder.setArgument("gcase", "whatever") .setExpected("Firefox") @@ -731,6 +874,201 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { TestUtils::runTestCase(*this, test, errorCode); } +LocalPointer +NounFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new NounValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return v; +} + +UnicodeString NounValue::formatToString(UErrorCode& status) const { + (void) status; + + return formattedString; +} + +NounValue::NounValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + innerValue = arg.unwrap(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + + const Formattable* toFormat = &innerValue; + FunctionOptionsMap opt = opts.getOptions(); + + // very simplified example + bool useAccusative = hasStringOption(opt, "case", "accusative"); + bool useSingular = hasStringOption(opt, "count", "1"); + const UnicodeString& noun = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + + if (useAccusative) { + if (useSingular) { + formattedString = noun + " accusative, singular noun"; + } else { + formattedString = noun + " accusative, plural noun"; + } + } else { + if (useSingular) { + formattedString = noun + " dative, singular noun"; + } else { + formattedString = noun + " dative, plural noun"; + } + } +} + +LocalPointer +AdjectiveFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new AdjectiveValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return v; +} + +UnicodeString AdjectiveValue::formatToString(UErrorCode& status) const { + (void) status; + + return formattedString; +} + +AdjectiveValue::AdjectiveValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + innerValue = arg.unwrap(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + + const Formattable* toFormat = &innerValue; + + const FunctionOptionsMap opt = opts.getOptions(); + // Return empty string if no accord is provided + if (opt.count("accord") <= 0) { + return; + } + + const FunctionValue& accordOpt = *opt.at("accord"); + const Formattable& accordSrc = accordOpt.unwrap(); + UnicodeString accord = accordSrc.getString(errorCode); + const UnicodeString& adjective = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + + formattedString = adjective + " " + accord; + // very simplified example + FunctionOptionsMap accordOptionsMap = accordOpt.getResolvedOptions().getOptions(); + bool accordIsAccusative = hasStringOption(accordOptionsMap, "case", "accusative"); + bool accordIsSingular = hasStringOption(accordOptionsMap, "count", "1"); + if (accordIsAccusative) { + if (accordIsSingular) { + formattedString += " (accusative, singular adjective)"; + } else { + formattedString += " (accusative, plural adjective)"; + } + } else { + if (accordIsSingular) { + formattedString += " (dative, singular adjective)"; + } else { + formattedString += " (dative, plural adjective)"; + } + } +} + +NounFunction::~NounFunction() {} +AdjectiveFunction::~AdjectiveFunction() {} +NounValue::~NounValue() {} +AdjectiveValue::~AdjectiveValue() {} + +void TestMessageFormat2::testSingleEvaluation(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFunction(FunctionName("counter"), + new CounterFunction(), + errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testSingleEvaluation"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + testBuilder.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + + // Test that the RHS of each declaration is evaluated at most once + TestCase test = testBuilder.setPattern(".local $x = {:counter}\ + {{{$x} {$x}}}") + .setExpected("1 1") + .build(); + TestUtils::runTestCase(*this, test, errorCode); +} + +LocalPointer +CounterFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new CounterFunctionValue(count, arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + count++; + return v; +} + +CounterFunctionValue::CounterFunctionValue(int32_t& c, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) : count(c) { + // No operand, no options +} + +UnicodeString CounterFunctionValue::formatToString(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; + } + number::UnlocalizedNumberFormatter nf = number::NumberFormatter::with(); + number::FormattedNumber formattedNumber = nf.locale("en-US").formatInt(count, status); + return formattedNumber.toString(status); +} + +CounterFunction::~CounterFunction() {} +CounterFunctionValue::~CounterFunctionValue() {} + #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index 47a4fc29d401..8098d5ded585 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -141,6 +141,7 @@ static void runValidTest(TestMessageFormat2& icuTest, const std::string& testName, const std::string& defaultError, bool anyError, + const std::string& bidiIsolationStrategy, const json& j, IcuTestErrorCode& errorCode) { auto j_object = j.template get(); @@ -169,6 +170,12 @@ static void runValidTest(TestMessageFormat2& icuTest, return; } + if (bidiIsolationStrategy == "default") { + test.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_AUTO); + } else { + test.setBidiIsolationStrategy(MessageFormatter::U_MF_BIDI_OFF); + } + if (!j_object["exp"].is_null()) { // Set expected result if it's present std::string expectedOutput = j["exp"].template get(); @@ -275,6 +282,15 @@ static void runTestsFromJsonFile(TestMessageFormat2& t, } } + std::string bidiIsolationStrategy; + if (!j_object["defaultTestProperties"].is_null() + && !j_object["defaultTestProperties"]["bidiIsolation"].is_null()) { + auto bidiIsolation = j_object["defaultTestProperties"]["bidiIsolation"]; + if (bidiIsolation.is_string()) { + bidiIsolationStrategy = bidiIsolation.template get(); + } + } + if (!j_object["tests"].is_null()) { auto tests = j_object["tests"].template get>(); for (auto iter = tests.begin(); iter != tests.end(); ++iter) { @@ -283,7 +299,7 @@ static void runTestsFromJsonFile(TestMessageFormat2& t, // Use error_handler_t::ignore because of the patch to allow lone surrogates t.logln(u_str(iter->dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore))); - runValidTest(t, testName, defaultError, anyError, *iter, errorCode); + runValidTest(t, testName, defaultError, anyError, bidiIsolationStrategy, *iter, errorCode); } } else { // Test doesn't follow schema -- probably an error @@ -312,10 +328,9 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { // Do valid spec tests runTestsFromJsonFile(*this, "spec/syntax.json", errorCode); runTestsFromJsonFile(*this, "spec/fallback.json", errorCode); - - // Uncomment when test functions are implemented in the registry - // See https://unicode-org.atlassian.net/browse/ICU-22907 - // runTestsFromJsonFile(*this, "spec/pattern-selection.json", errorCode); + runTestsFromJsonFile(*this, "spec/u-options.json", errorCode); + runTestsFromJsonFile(*this, "spec/bidi.json", errorCode); + runTestsFromJsonFile(*this, "spec/pattern-selection.json", errorCode); // Do valid function tests runTestsFromJsonFile(*this, "spec/functions/date.json", errorCode); @@ -326,36 +341,16 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode); // Other tests (non-spec) - // TODO: https://github.com/unicode-org/message-format-wg/pull/902 will - // move the bidi tests into the spec - runTestsFromJsonFile(*this, "bidi.json", errorCode); runTestsFromJsonFile(*this, "more-functions.json", errorCode); runTestsFromJsonFile(*this, "valid-tests.json", errorCode); runTestsFromJsonFile(*this, "resolution-errors.json", errorCode); runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode); runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode); - runTestsFromJsonFile(*this, "runtime-errors.json", errorCode); - runTestsFromJsonFile(*this, "more-syntax-errors.json", errorCode); - - // Re: the expected output for the first test in this file: - // Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the - // overriding .input binding of $var2) but that's hard to achieve - // as so-called "implicit declarations" can only be detected after parsing, at which - // point the data model can't be modified. - // Probably this is going to change anyway so that any data model error gets replaced - // with a fallback for the whole message. - // The second test has a similar issue with the output. runTestsFromJsonFile(*this, "tricky-declarations.json", errorCode); // Markup is ignored when formatting to string runTestsFromJsonFile(*this, "markup.json", errorCode); - // TODO(duplicates): currently the expected output is based on using - // the last definition of the duplicate-declared variable; - // perhaps it's better to remove all declarations for $foo before formatting. - // however if https://github.com/unicode-org/message-format-wg/pull/704 lands, - // it'll be a moot point since the output will be expected to be the fallback string - // (This applies to the expected output for all the U_DUPLICATE_DECLARATION_ERROR tests) runTestsFromJsonFile(*this, "duplicate-declarations.json", errorCode); runTestsFromJsonFile(*this, "invalid-options.json", errorCode); @@ -369,6 +364,8 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "icu-parser-tests.json", errorCode); runTestsFromJsonFile(*this, "icu-test-selectors.json", errorCode); runTestsFromJsonFile(*this, "icu-test-previous-release.json", errorCode); + + // TODO (not yet implemented): currency, math (but math might be removed?) } #endif /* #if !UCONFIG_NO_MF2 */ diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index 47e960228510..8348ec8256cf 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -36,6 +36,7 @@ class TestCase : public UMemory { /* const */ uint32_t lineNumber; /* const */ uint32_t offset; /* const */ bool ignoreError; + /* const */ MessageFormatter::UMFBidiIsolationStrategy bidiIsolationStrategy; // Function registry is not owned by the TestCase object const MFFunctionRegistry* functionRegistry = nullptr; @@ -43,6 +44,9 @@ class TestCase : public UMemory { public: const UnicodeString& getPattern() const { return pattern; } const Locale& getLocale() const { return locale; } + MessageFormatter::UMFBidiIsolationStrategy getBidiIsolationStrategy() const { + return bidiIsolationStrategy; + } std::map getArguments() const { return std::move(arguments); } const UnicodeString& getTestName() const { return testName; } bool expectSuccess() const { @@ -163,6 +167,10 @@ class TestCase : public UMemory { locale = loc; return *this; } + Builder& setBidiIsolationStrategy(MessageFormatter::UMFBidiIsolationStrategy s) { + bidiIsolationStrategy = s; + return *this; + } Builder& setExpectedLineNumberAndOffset(uint32_t line, uint32_t o) { hasLineNumberAndOffset = true; lineNumber = line; @@ -201,10 +209,20 @@ class TestCase : public UMemory { uint32_t lineNumber; uint32_t offset; bool ignoreError; + MessageFormatter::UMFBidiIsolationStrategy bidiIsolationStrategy; const MFFunctionRegistry* functionRegistry = nullptr; // Not owned public: - Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), arbitraryError(false), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {} + Builder() : pattern(""), + locale(Locale::getDefault()), + hasExpectedOutput(false), + expected(""), + expectedError(U_ZERO_ERROR), + arbitraryError(false), + expectNoSyntaxError(false), + hasLineNumberAndOffset(false), + ignoreError(false), + bidiIsolationStrategy(MessageFormatter::U_MF_BIDI_AUTO) {} }; private: @@ -222,6 +240,7 @@ class TestCase : public UMemory { lineNumber(builder.hasLineNumberAndOffset ? builder.lineNumber : 0), offset(builder.hasLineNumberAndOffset ? builder.offset : 0), ignoreError(builder.ignoreError), + bidiIsolationStrategy(builder.bidiIsolationStrategy), functionRegistry(builder.functionRegistry) { // If an error is not expected, then the expected // output should be present @@ -240,6 +259,7 @@ class TestUtils { UParseError parseError; MessageFormatter::Builder mfBuilder(errorCode); + mfBuilder.setBidiIsolationStrategy(testCase.getBidiIsolationStrategy()); mfBuilder.setPattern(testCase.getPattern(), parseError, errorCode).setLocale(testCase.getLocale()); if (testCase.hasCustomRegistry()) { @@ -295,6 +315,9 @@ class TestUtils { if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) { failWrongOffset(tmsg, testCase, parseError.line, parseError.offset); } + if (testCase.expectSuccess() && !testCase.outputMatches(result)) { + failWrongOutput(tmsg, testCase, result); + } if (U_FAILURE(errorCode) && !testCase.expectSuccess() && testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR) { // Re-run the formatter if there was an error, diff --git a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json index 21f917da6e52..8cfd9d8ff7c4 100644 --- a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json +++ b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json @@ -127,7 +127,7 @@ ], "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": {"exp": { "date": 1722746637000 }, "user": "John", "tsOver" : "long" }, - "ignoreTest": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + "ignoreTest": "timeStyle=long should print 'PDT', not 'Pacific Daylight Time'?" }, { "srcs": [ diff --git a/testdata/message2/bidi.json b/testdata/message2/bidi.json deleted file mode 100644 index 0654befba25b..000000000000 --- a/testdata/message2/bidi.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "scenario": "Bidi support", - "description": "Tests for correct parsing of messages with bidirectional marks and isolates", - "defaultTestProperties": { - "locale": "en-US" - }, - "tests": [ - { - "comment": "simple-message = o [simple-start pattern]", - "src": " \u061C Hello world!", - "exp": " \u061C Hello world!" - }, - { - "comment": "complex-message = o *(declaration o) complex-body o", - "src": "\u200E .local $x = {1} {{ {$x}}}", - "exp": " 1" - }, - { - "comment": "complex-message = o *(declaration o) complex-body o", - "src": ".local $x = {1} \u200F {{ {$x}}}", - "exp": " 1" - }, - { - "comment": "complex-message = o *(declaration o) complex-body o", - "src": ".local $x = {1} {{ {$x}}} \u2066", - "exp": " 1" - }, - { - "comment": "input-declaration = input o variable-expression", - "src": ".input \u2067 {$x :number} {{hello}}", - "params": [{"name": "x", "value": "1"}], - "exp": "hello" - }, - { - "comment": "local s variable o \"=\" o expression", - "src": ".local $x \u2068 = \u2069 {1} {{hello}}", - "exp": "hello" - }, - { - "comment": "local s variable o \"=\" o expression", - "src": ".local \u2067 $x = {1} {{hello}}", - "exp": "hello" - }, - { - "comment": "local s variable o \"=\" o expression", - "src": ".local\u2067 $x = {1} {{hello}}", - "exp": "hello" - }, - { - "comment": "o \"{{\" pattern \"}}\"", - "src": "\u2067 {{hello}}", - "exp": "hello" - }, - { - "comment": "match-statement s variant *(o variant)", - "src": [".local $x = {1 :number}\n", - ".match $x\n", - "1 {{one}}\n", - "\u061C * {{other}}"], - "exp": "one" - }, - { - "comment": "match-statement s variant *(o variant)", - "src": [".local $x = {1 :number}", - ".match $x \u061c", - "1 {{one}}", - "* {{other}}"], - "exp": "one" - }, - { - "comment": "match-statement s variant *(o variant)", - "src": [".local $x = {1 :number}", - ".match $x \u061c", - "1 {{one}}", - "* {{other}}"], - "exp": "one" - }, - { - "comment": "variant = key *(s key) quoted-pattern", - "src": [".local $x = {1 :number} .local $y = {$x :number}", - ".match $x $y\n", - "1 \u200E 1 {{one}}", - "* * {{other}}"], - "exp": "one" - }, - { - "comment": "variant = key *(s key) quoted-pattern", - "src": [".local $x = {1 :number} .local $y = {$x :number}", - ".match $x $y\n", - "1\u200E 1 {{one}}", - "* * {{other}}"], - "exp": "one" - }, - { - "comment": "literal-expression = \"{\" o literal [s function] *(s attribute) o \"}\"", - "src": "{\u200E hello \u200F}", - "exp": "hello" - }, - { - "comment": "variable-expression = \"{\" o variable [s function] *(s attribute) o \"}\"", - "src": ".local $x = {1} {{ {\u200E $x \u200F} }}", - "exp": " 1 " - }, - { - "comment": "function-expression = \"{\" o function *(s attribute) o \"}\"", - "src": "{1 \u200E :number \u200F}", - "exp": "1" - }, - { - "comment": "markup = \"{\" o \"#\" identifier *(s option) *(s attribute) o [\"/\"] \"}\"", - "src": "{\u200F #b \u200E }", - "exp": "" - }, - { - "comment": "markup = \"{\" o \"/\" identifier *(s option) *(s attribute) o \"}\"", - "src": "{\u200F /b \u200E }", - "exp": "" - }, - { - "comment": "option = identifier o \"=\" o (literal / variable)", - "src": "{1 :number minimumFractionDigits\u200F=\u200E1 }", - "exp": "1.0" - }, - { - "comment": "attribute = \"@\" identifier [o \"=\" o (literal / variable)]", - "src": "{1 :number @locale\u200F=\u200Een }", - "exp": "1" - }, - { - "comment": " name... excludes U+FFFD and U+061C -- this pases as name -> [bidi] name-start *name-char", - "src": ".local $\u061Cfoo = {1} {{ {$\u061Cfoo} }}", - "exp": " 1 " - }, - { - "comment": " name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C", - "src": ".local $foo\u061Cbar = {2} {{ }}", - "expErrors": [{"type": "syntax-error"}] - }, - { - "comment": "name = [bidi] name-start *name-char [bidi]", - "src": ".local $\u200Efoo\u200F = {3} {{{$\u200Efoo\u200F}}}", - "exp": "3" - }, - { - "comment": "name = [bidi] name-start *name-char [bidi]", - "src": ".local $foo = {4} {{{$\u200Efoo\u200F}}}", - "exp": "4" - }, - { - "comment": "name = [bidi] name-start *name-char [bidi]", - "src": ".local $\u200Efoo\u200F = {5} {{{$foo}}}", - "exp": "5" - }, - { - "comment": "name = [bidi] name-start *name-char [bidi]", - "src": ".local $foo\u200Ebar = {6} {{{$foo\u200Ebar}}}", - "expErrors": [{"type": "syntax-error"}] - } - ] -} diff --git a/testdata/message2/icu-test-functions.json b/testdata/message2/icu-test-functions.json index a97446addf0e..ef5440691129 100644 --- a/testdata/message2/icu-test-functions.json +++ b/testdata/message2/icu-test-functions.json @@ -2,7 +2,8 @@ "scenario": "Function tests", "description": "Tests for ICU-specific formatting behavior.", "defaultTestProperties": { - "locale": "en-US" + "locale": "en-US", + "bidiIsolation": "none" }, "tests": [ { @@ -111,13 +112,20 @@ }, { "src": "Expires at {|2024-07-02T19:23:45Z| :datetime timeStyle=long}", - "exp": "Expires at 7:23:45 PM GMT", - "ignoreCpp": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT" }, { "src": "Expires at {|2024-07-02T19:23:45+03:30| :datetime timeStyle=full}", - "exp": "Expires at 7:23:45 PM GMT+03:30", - "ignoreCpp": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT+03:30" + }, + { + "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", + "src": [ + ".input {$exp :datetime timeStyle=short}\n", + "{{Hello John, or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}] }, { "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", @@ -131,8 +139,7 @@ "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": [{"name": "exp", "value": { "date": 1722746637000 }}, {"name": "user", "value": "John"}, - {"name": "tsOver", "value": "full" }], - "ignoreCpp": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + {"name": "tsOver", "value": "full" }] }, { "src": [ diff --git a/testdata/message2/icu-test-previous-release.json b/testdata/message2/icu-test-previous-release.json index 5f16c8587666..b80f8ba815a9 100644 --- a/testdata/message2/icu-test-previous-release.json +++ b/testdata/message2/icu-test-previous-release.json @@ -2,7 +2,8 @@ "scenario": "Tests from original ICU4J release", "description": "Tests taken from the September 2022 MF2 ICU4J release", "defaultTestProperties": { - "locale": "en-US" + "locale": "en-US", + "bidiIsolation": "none" }, "tests": [ { diff --git a/testdata/message2/more-functions.json b/testdata/message2/more-functions.json index 6d074f8b2d80..4e79b9316630 100644 --- a/testdata/message2/more-functions.json +++ b/testdata/message2/more-functions.json @@ -2,7 +2,8 @@ "scenario": "Function tests 2", "description": "More tests for ICU-specific formatting behavior.", "defaultTestProperties": { - "locale": "en-US" + "locale": "en-US", + "bidiIsolation": "none" }, "tests": [ { @@ -112,6 +113,35 @@ "exp": "Default number: 1.234.567.890.123.456.789,987654!", "locale": "ro", "params": [{ "name": "val", "value": {"decimal": "1234567890123456789.987654321"} }] + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=2} .local $y = {$x :number minimumFractionDigits=5} {{{$x} {$y}}}", + "exp": "42.00 42.00000", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumFractionDigits=2} {{{$x} {$y}}}", + "exp": "42.00000 42.00", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumIntegerDigits=3} {{{$x} {$y}}}", + "exp": "42.00000 042.00000", + "locale": "en" + }, + { + "comment": "Modified from icu4j test", + "src": [ + ".input {$exp :datetime timeStyle=short}\n", + ".input {$user :string}\n", + ".local $longExp = {$exp :datetime dateStyle=long}\n", + ".local $zooExp = {$exp :datetime dateStyle=short timeStyle=$tsOver}\n", + "{{Hello John, you want '{$exp}', '{$longExp}', or '{$zooExp}' or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}, + {"name": "user", "value": "John"}, + {"name": "tsOver", "value" : "full" }] } ] } diff --git a/testdata/message2/runtime-errors.json b/testdata/message2/runtime-errors.json deleted file mode 100644 index ffeb081fab0d..000000000000 --- a/testdata/message2/runtime-errors.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "scenario": "Runtime errors", - "description": "Tests for bad-selector and bad-operand errors", - "defaultTestProperties": { - "locale": "en-US" - }, - "tests": [ - { - "src": ".local $h = {|horse| :date} .match $h\n 1 {{The value is one.}}\n * {{Formatter used as selector.}}", - "exp": "Formatter used as selector.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".local $h = {|horse| :number} .match $h\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".local $sel = {|horse| :number}\n .match $sel\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - } - ] -} diff --git a/testdata/message2/spec/bidi.json b/testdata/message2/spec/bidi.json index 2d650a3e34d6..941448554036 100644 --- a/testdata/message2/spec/bidi.json +++ b/testdata/message2/spec/bidi.json @@ -1,4 +1,5 @@ { + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Bidi support", "description": "Tests for correct parsing of messages with bidirectional marks and isolates", "defaultTestProperties": { @@ -113,12 +114,12 @@ "exp": "1" }, { - "description": " name... excludes U+FFFD and U+061C -- this pases as name -> [bidi] name-start *name-char", + "description": "name... excludes bidi formatting character U+061C -- this parses as name -> [bidi] name-start *name-char", "src": ".local $\u061Cfoo = {1} {{ {$\u061Cfoo} }}", "exp": " \u20681\u2069 " }, { - "description": " name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C", + "description": "name excludes bidi formatting character U+061C", "src": ".local $foo\u061Cbar = {2} {{ }}", "expErrors": [{"type": "syntax-error"}] }, diff --git a/testdata/message2/spec/data-model-errors.json b/testdata/message2/spec/data-model-errors.json index f1f54cabe7c2..c7ba4fb33cc1 100644 --- a/testdata/message2/spec/data-model-errors.json +++ b/testdata/message2/spec/data-model-errors.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Data model errors", "defaultTestProperties": { "locale": "en-US" diff --git a/testdata/message2/spec/fallback.json b/testdata/message2/spec/fallback.json index fd1429c9b664..abf062e1c355 100644 --- a/testdata/message2/spec/fallback.json +++ b/testdata/message2/spec/fallback.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Fallback", "description": "Test cases for fallback behaviour.", "defaultTestProperties": { @@ -11,7 +11,8 @@ { "description": "function with unquoted literal operand", "src": "{42 :test:function fails=format}", - "exp": "{|42|}" + "exp": "{|42|}", + "expParts": [{ "type": "fallback", "source": "|42|" }] }, { "description": "function with quoted literal operand", @@ -26,7 +27,8 @@ { "description": "annotated implicit input variable", "src": "{$var :number}", - "exp": "{$var}" + "exp": "{$var}", + "expParts": [{ "type": "fallback", "source": "$var" }] }, { "description": "local variable with unknown function in declaration", @@ -46,7 +48,8 @@ { "description": "function with no operand", "src": "{:test:undefined}", - "exp": "{:test:undefined}" + "exp": "{:test:undefined}", + "expParts": [{ "type": "fallback", "source": ":test:undefined" }] } ] } diff --git a/testdata/message2/spec/functions/date.json b/testdata/message2/spec/functions/date.json index 625eb9712e46..c20b69a1bf10 100644 --- a/testdata/message2/spec/functions/date.json +++ b/testdata/message2/spec/functions/date.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "Date function", "description": "The built-in formatter for dates.", "defaultTestProperties": { diff --git a/testdata/message2/spec/functions/datetime.json b/testdata/message2/spec/functions/datetime.json index d8e8b6dad9d3..1d4551829065 100644 --- a/testdata/message2/spec/functions/datetime.json +++ b/testdata/message2/spec/functions/datetime.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "Datetime function", "description": "The built-in formatter for datetimes.", "defaultTestProperties": { diff --git a/testdata/message2/spec/functions/integer.json b/testdata/message2/spec/functions/integer.json index f2d344c951f1..fa95511f8098 100644 --- a/testdata/message2/spec/functions/integer.json +++ b/testdata/message2/spec/functions/integer.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "Integer function", "description": "The built-in formatter for integers.", "defaultTestProperties": { diff --git a/testdata/message2/spec/functions/number.json b/testdata/message2/spec/functions/number.json index 89f859164ec4..4c4c809c6582 100644 --- a/testdata/message2/spec/functions/number.json +++ b/testdata/message2/spec/functions/number.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "Number function", "description": "The built-in formatter for numbers.", "defaultTestProperties": { @@ -326,13 +326,7 @@ "expParts": [ { "type": "number", - "source": "|42|", - "parts": [ - { - "type": "integer", - "value": "42" - } - ] + "parts": [{ "type": "integer", "value": "42" }] } ] } diff --git a/testdata/message2/spec/functions/string.json b/testdata/message2/spec/functions/string.json index 06d0255ce538..67507cf645c9 100644 --- a/testdata/message2/spec/functions/string.json +++ b/testdata/message2/spec/functions/string.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "String function", "description": "The built-in formatter for strings.", "defaultTestProperties": { diff --git a/testdata/message2/spec/functions/time.json b/testdata/message2/spec/functions/time.json index 1f6cf2293132..56aab3e3fb75 100644 --- a/testdata/message2/spec/functions/time.json +++ b/testdata/message2/spec/functions/time.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../../schemas/v0/tests.schema.json", "scenario": "Time function", "description": "The built-in formatter for times.", "defaultTestProperties": { diff --git a/testdata/message2/spec/pattern-selection.json b/testdata/message2/spec/pattern-selection.json index 29dc146c1907..69d8cb063987 100644 --- a/testdata/message2/spec/pattern-selection.json +++ b/testdata/message2/spec/pattern-selection.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Pattern selection", "description": "Tests for pattern selection", "defaultTestProperties": { diff --git a/testdata/message2/spec/syntax-errors.json b/testdata/message2/spec/syntax-errors.json index b2e5ffc6d422..7f840b3cf4ff 100644 --- a/testdata/message2/spec/syntax-errors.json +++ b/testdata/message2/spec/syntax-errors.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Syntax errors", "description": "Strings that produce syntax errors when parsed.", "defaultTestProperties": { @@ -192,7 +192,6 @@ { "src": "{^.}" }, { "src": "{^ .}" }, { "src": "{&}" }, - { "src": "{\ud800}" }, { "src": "{\ufdd0}" }, { "src": "{\ufffe}" }, { "src": "{!.\\{}" }, diff --git a/testdata/message2/spec/syntax.json b/testdata/message2/spec/syntax.json index b334c8f734de..9bc93cb5eae8 100644 --- a/testdata/message2/spec/syntax.json +++ b/testdata/message2/spec/syntax.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "$schema": "../schemas/v0/tests.schema.json", "scenario": "Syntax", "description": "Test cases that do not depend on any registry definitions.", "defaultTestProperties": { @@ -412,13 +412,7 @@ "description": "... attribute -> \"@\" identifier s \"=\" s quoted-literal ...", "src": "{42 @foo=|bar|}", "exp": "42", - "expParts": [ - { - "type": "string", - "source": "|42|", - "value": "42" - } - ] + "expParts": [{ "type": "string", "value": "42" }] }, { "description": "... quoted-literal", @@ -644,7 +638,7 @@ "name": "tag" }, { - "type": "literal", + "type": "text", "value": "content" } ] @@ -659,7 +653,7 @@ "name": "ns:tag" }, { - "type": "literal", + "type": "text", "value": "content" }, { @@ -679,7 +673,7 @@ "name": "tag" }, { - "type": "literal", + "type": "text", "value": "content" } ] @@ -722,13 +716,7 @@ { "src": "{42 @foo @bar=13}", "exp": "42", - "expParts": [ - { - "type": "string", - "source": "|42|", - "value": "42" - } - ] + "expParts": [{ "type": "string", "value": "42" }] }, { "src": "{{trailing whitespace}} \n", diff --git a/testdata/message2/spec/u-options.json b/testdata/message2/spec/u-options.json new file mode 100644 index 000000000000..80cbaa774815 --- /dev/null +++ b/testdata/message2/spec/u-options.json @@ -0,0 +1,120 @@ +{ + "$schema": "../schemas/v0/tests.schema.json", + "scenario": "u: Options", + "description": "Common options affecting the function context", + "defaultTestProperties": { + "bidiIsolation": "default", + "locale": "en-US" + }, + "tests": [ + { + "tags": ["u:id"], + "src": "{#tag u:id=x}content{/ns:tag u:id=x}", + "exp": "content", + "expParts": [ + { "type": "markup", "kind": "open", "id": "x", "name": "tag" }, + { "type": "text", "value": "content" }, + { "type": "markup", "kind": "close", "id": "x", "name": "ns:tag" } + ] + }, + { + "tags": ["u:dir"], + "src": "{#tag u:dir=rtl}content{/ns:tag}", + "exp": "content", + "expErrors": [{ "type": "bad-option" }], + "expParts": [ + { "type": "markup", "kind": "open", "name": "tag" }, + { "type": "text", "value": "content" }, + { "type": "markup", "kind": "close", "name": "ns:tag" } + ] + }, + { + "tags": ["u:locale"], + "src": "hello {4.2 :number u:locale=fr}", + "exp": "hello 4,2" + }, + { + "tags": ["u:dir", "u:locale"], + "src": "{#tag u:dir=rtl u:locale=ar}content{/ns:tag}", + "exp": "content", + "expErrors": [{ "type": "bad-option" }], + "expParts": [ + { "type": "markup", "kind": "open", "name": "tag" }, + { "type": "text", "value": "content" }, + { "type": "markup", "kind": "close", "name": "ns:tag" } + ] + }, + { + "tags": ["u:dir", "u:id"], + "src": "hello {world :string u:dir=ltr u:id=foo}", + "exp": "hello \u2066world\u2069", + "expParts": [ + { "type": "text", "value": "hello " }, + { "type": "bidiIsolation", "value": "\u2066" }, + { "type": "string", "dir": "ltr", "id": "foo", "value": "world" }, + { "type": "bidiIsolation", "value": "\u2069" } + + ] + }, + { + "tags": ["u:dir"], + "src": "hello {world :string u:dir=rtl}", + "exp": "hello \u2067world\u2069", + "expParts": [ + { "type": "text", "value": "hello " }, + { "type": "bidiIsolation", "value": "\u2067" }, + { "type": "string", "dir": "rtl", "locale": "en-US", "value": "world" }, + { "type": "bidiIsolation", "value": "\u2069" } + ] + }, + { + "tags": ["u:dir"], + "src": "hello {world :string u:dir=auto}", + "exp": "hello \u2068world\u2069", + "expParts": [ + { "type": "text", "value": "hello " }, + { "type": "bidiIsolation", "value": "\u2068" }, + { + "type": "string", + "locale": "en-US", + "value": "world" + }, + { "type": "bidiIsolation", "value": "\u2069" } + ] + }, + { + "tags": ["u:dir", "u:id"], + "src": ".local $world = {world :string u:dir=ltr u:id=foo} {{hello {$world}}}", + "exp": "hello \u2066world\u2069", + "expParts": [ + { "type": "text", "value": "hello " }, + { "type": "bidiIsolation", "value": "\u2066" }, + { "type": "string", "dir": "ltr", "id": "foo", "value": "world" }, + { "type": "bidiIsolation", "value": "\u2069" } + ] + }, + { + "tags": ["u:dir"], + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=rtl}", + "exp": "أهلاً \u2067بالعالم\u2069" + }, + { + "tags": ["u:dir"], + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=auto}", + "exp": "أهلاً \u2068بالعالم\u2069" + }, + { + "tags": ["u:dir"], + "locale": "ar", + "src": "أهلاً {world :string u:dir=ltr}", + "exp": "أهلاً \u2066world\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string}", + "exp": "أهلاً \u2068بالعالم\u2069" + } + ] +} diff --git a/testdata/message2/valid-tests.json b/testdata/message2/valid-tests.json index 0f062116b733..1b9e9fa8257e 100644 --- a/testdata/message2/valid-tests.json +++ b/testdata/message2/valid-tests.json @@ -2,7 +2,8 @@ "scenario": "Valid tests", "description": "Additional valid tests", "defaultTestProperties": { - "locale": "en-US" + "locale": "en-US", + "bidiIsolation": "none" }, "tests": [ { "src": "hello {|4.2| :number minimumFractionDigits=2}", "exp": "hello 4.20"},