Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement String.prototype.normalize #4903

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions jerry-core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(JERRY_ERROR_MESSAGES OFF CACHE BOOL "Enable error mess
set(JERRY_EXTERNAL_CONTEXT OFF CACHE BOOL "Enable external context?")
set(JERRY_PARSER ON CACHE BOOL "Enable javascript-parser?")
set(JERRY_FUNCTION_TO_STRING OFF CACHE BOOL "Enable function toString operation?")
set(JERRY_ICU OFF CACHE BOOL "Enable ICU support?")
set(JERRY_LINE_INFO OFF CACHE BOOL "Enable line info?")
set(JERRY_LOGGING OFF CACHE BOOL "Enable logging?")
set(JERRY_MEM_STATS OFF CACHE BOOL "Enable memory statistics?")
Expand Down Expand Up @@ -78,13 +79,24 @@ if(JERRY_MEM_STATS OR JERRY_PARSER_DUMP_BYTE_CODE OR JERRY_REGEXP_DUMP_BYTE_CODE
set(JERRYRE_LOGGING_MESSAGE " (FORCED BY STATS OR DUMP)")
endif()

# ICU
if(JERRY_ICU)
find_package(ICU REQUIRED COMPONENTS uc)

if(NOT ICU_FOUND)
set(JERRY_ICU OFF)
set(JERRY_ICU_MESSAGE " (FORCED BY MISSING LIBRARY)")
endif()
endif()

# Status messages
message(STATUS "JERRY_CPOINTER_32_BIT " ${JERRY_CPOINTER_32_BIT} ${JERRY_CPOINTER_32_BIT_MESSAGE})
message(STATUS "JERRY_DEBUGGER " ${JERRY_DEBUGGER})
message(STATUS "JERRY_ERROR_MESSAGES " ${JERRY_ERROR_MESSAGES})
message(STATUS "JERRY_EXTERNAL_CONTEXT " ${JERRY_EXTERNAL_CONTEXT})
message(STATUS "JERRY_PARSER " ${JERRY_PARSER})
message(STATUS "JERRY_FUNCTION_TO_STRING " ${JERRY_FUNCTION_TO_STRING})
message(STATUS "JERRY_ICU " ${JERRY_ICU} ${JERRY_ICU_MESSAGE})
message(STATUS "JERRY_LINE_INFO " ${JERRY_LINE_INFO})
message(STATUS "JERRY_LOGGING " ${JERRY_LOGGING} ${JERRY_LOGGING_MESSAGE})
message(STATUS "JERRY_MEM_STATS " ${JERRY_MEM_STATS})
Expand Down Expand Up @@ -641,6 +653,12 @@ if(JERRY_VALGRIND)
set(INCLUDE_CORE_PRIVATE ${INCLUDE_CORE_PRIVATE} ${INCLUDE_THIRD_PARTY_VALGRIND})
endif()

# ICU
jerry_add_define01(JERRY_ICU)
if(JERRY_ICU)
set(INCLUDE_CORE_PRIVATE ${INCLUDE_CORE_PRIVATE} ${ICU_INCLUDE_DIRS})
endif()

# Enable VM execution stop callback
jerry_add_define01(JERRY_VM_HALT)

Expand Down Expand Up @@ -766,6 +784,10 @@ else()
endif()
endif()

if(JERRY_ICU)
target_link_libraries (${JERRY_CORE_NAME} ${ICU_LIBRARIES})
endif()

separate_arguments(EXTERNAL_LINK_LIBS)
foreach(EXT_LIB ${EXTERNAL_LINK_LIBS})
target_link_libraries(${JERRY_CORE_NAME} ${EXT_LIB})
Expand Down
11 changes: 6 additions & 5 deletions jerry-core/ecma/base/ecma-error-messages.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ ECMA_ERROR_DEF (ECMA_ERR_INVALID_REGEXP_FLAGS, "Invalid RegExp flags")
#if JERRY_BUILTIN_JSON
ECMA_ERROR_DEF (ECMA_ERR_JSON_STRINGIFY_ERROR, "JSON stringify error")
#endif /* JERRY_BUILTIN_JSON */
#if JERRY_BUILTIN_STRING && JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_NORMALIZATION_FAILED, "Normalization failed")
#endif /* JERRY_BUILTIN_STRING && JERRY_ESNEXT */
#if JERRY_BUILTIN_REGEXP
ECMA_ERROR_DEF (ECMA_ERR_STACK_LIMIT_EXCEEDED, "Stack limit exceeded")
#endif /* JERRY_BUILTIN_REGEXP */
Expand Down Expand Up @@ -203,6 +206,9 @@ ECMA_ERROR_DEF (ECMA_ERR_EXPECTED_A_FUNCTION_OBJECT, "Expected a function object
#if JERRY_BUILTIN_TYPEDARRAY
ECMA_ERROR_DEF (ECMA_ERR_INVALID_ARRAYBUFFER_LENGTH, "Invalid ArrayBuffer length")
#endif /* JERRY_BUILTIN_TYPEDARRAY */
#if JERRY_BUILTIN_STRING && JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_INVALID_NORMALIZATION_FORM, "Invalid normalization form")
#endif /* JERRY_BUILTIN_STRING && JERRY_ESNEXT */
#if !(JERRY_MODULE_SYSTEM)
ECMA_ERROR_DEF (ECMA_ERR_MODULE_NOT_SUPPORTED, "Module support is disabled")
#endif /* !(JERRY_MODULE_SYSTEM) */
Expand Down Expand Up @@ -547,11 +553,6 @@ ECMA_ERROR_DEF (ECMA_ERR_CONSTRUCTOR_UINT32_ARRAY_REQUIRES_NEW, "Constructor Uin
#if JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_GENERATOR_IS_CURRENTLY_UNDER_EXECUTION, "Generator is currently under execution")
ECMA_ERROR_DEF (ECMA_ERR_ITERATOR_RETURN_RESULT_IS_NOT_OBJECT, "Iterator 'return' result is not object")
#endif /* JERRY_ESNEXT */
#if JERRY_BUILTIN_TYPEDARRAY
ECMA_ERROR_DEF (ECMA_ERR_RETURNED_ARRAYBUFFER_HAS_BEEN_DETACHED, "Returned ArrayBuffer has been detached")
#endif /* JERRY_BUILTIN_TYPEDARRAY */
#if JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_SEARCH_STRING_CANNOT_BE_OF_TYPE_REGEXP, "Search string can't be of type: RegExp")
ECMA_ERROR_DEF (ECMA_ERR_VALUE_RECEIVED_BY_YIELD_IS_NOT_OBJECT, "Value received by yield* is not object")
#endif /* JERRY_ESNEXT */
Expand Down
3 changes: 2 additions & 1 deletion jerry-core/ecma/base/ecma-error-messages.ini
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ ECMA_ERR_RESOLVE_MUST_BE_UNDEFINED = "Resolve must be undefined"
ECMA_ERR_RESULT_OF_DEFAULTVALUE_IS_INVALID = "Result of [[DefaultValue]] is invalid"
ECMA_ERR_RETURN_VALUE_IS_NOT_AN_ARRAYBUFFER_OBJECT = "Return value is not an ArrayBuffer object"
ECMA_ERR_RETURN_VALUE_OF_EXEC_MUST_BE_AN_OBJECT_OR_NULL = "Return value of 'exec' must be an object or null"
ECMA_ERR_RETURNED_ARRAYBUFFER_HAS_BEEN_DETACHED = "Returned ArrayBuffer has been detached"
ECMA_ERR_RIGHT_VALUE_OF_IN_MUST_BE_AN_OBJECT = "Right value of 'in' must be an object"
ECMA_ERR_RIGHT_VALUE_OF_INSTANCEOF_MUST_BE_AN_OBJECT = "Right value of 'instanceof' must be an object"
ECMA_ERR_SEARCH_STRING_CANNOT_BE_OF_TYPE_REGEXP = "Search string can't be of type: RegExp"
Expand Down Expand Up @@ -333,3 +332,5 @@ ECMA_ERR_PRIVATE_METHOD_IS_NOT_WRITABLE = "Private method is not writable"
ECMA_ERR_PRIVATE_FIELD_WAS_DEFINED_WITHOUT_A_SETTER = "Private field was defined without a setter"
ECMA_ERR_CANNOT_READ_PRIVATE_MEMBER_TO_AN_OBJECT_WHOSE_CLASS_DID_NOT_DECLARE_IT = "Cannot read private member to an object whose class did not declare it"
ECMA_ERR_PRIVATE_FIELD_WAS_DEFINED_WITHOUT_A_GETTER = "Private field was defined without a getter"
ECMA_ERR_INVALID_NORMALIZATION_FORM = "Invalid normalization form"
ECMA_ERR_NORMALIZATION_FAILED = "Normalization failed"
52 changes: 52 additions & 0 deletions jerry-core/ecma/base/ecma-helpers-string.c
Original file line number Diff line number Diff line change
Expand Up @@ -2805,6 +2805,58 @@ ecma_op_advance_string_index (ecma_string_t *str_p, /**< input string */
} /* ecma_op_advance_string_index */
#endif /* JERRY_ESNEXT */

#if JERRY_ICU
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should have a default value in jerry-core/config.h

/**
* Copy the string's data into a newly allocated UTF16 encoded buffer
*
* @return pointer to the allocated buffer
*/
uint16_t *
ecma_string_cesu8_to_utf16 (ecma_string_t *str_p, /**< input string */
lit_utf8_size_t *utf16_length_p) /**< [out] utf16 buffer size */
{
lit_utf8_size_t utf8_size;
lit_utf8_size_t utf8_length;
uint8_t flags = ECMA_STRING_FLAG_EMPTY;
const lit_utf8_byte_t *utf8_buffer_p = ecma_string_get_chars (str_p, &utf8_size, &utf8_length, NULL, &flags);
const lit_utf8_byte_t *utf8_buffer_end_p = utf8_buffer_p + utf8_size;

*utf16_length_p = utf8_length;
uint16_t *utf16_buff_p = (uint16_t *) jmem_heap_alloc_block (*utf16_length_p * sizeof (uint16_t));
uint16_t *utf16_buff_iter_p = utf16_buff_p;

while (utf8_buffer_p < utf8_buffer_end_p)
{
*utf16_buff_iter_p++ = (uint16_t) lit_cesu8_read_next (&utf8_buffer_p);
}

if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
{
jmem_heap_free_block ((void *) utf8_buffer_p, utf8_size);
}

return utf16_buff_p;
} /* ecma_string_cesu8_to_utf16 */

/**
* Allocate a new string from UTF16 encoded buffer
*
* @return pointer to the allocated string
*/
ecma_string_t *
ecma_new_ecma_string_from_utf16 (uint16_t *utf16_buff_p, lit_utf8_size_t utf16_length)
{
ecma_stringbuilder_t builder = ecma_stringbuilder_create ();

while (utf16_length--)
{
ecma_stringbuilder_append_codepoint (&builder, *utf16_buff_p++);
}

return ecma_stringbuilder_finalize (&builder);
} /* ecma_new_ecma_string_from_utf16 */
#endif /* JERRY_ICU */

/**
* @}
* @}
Expand Down
4 changes: 4 additions & 0 deletions jerry-core/ecma/base/ecma-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ ecma_string_t *ecma_new_symbol_from_descriptor_string (ecma_value_t string_desc)
bool ecma_prop_name_is_symbol (ecma_string_t *string_p);
ecma_length_t ecma_op_advance_string_index (ecma_string_t *str_p, ecma_length_t index_num, bool is_unicode);
#endif /* JERRY_ESNEXT */
#if JERRY_ICU
uint16_t *ecma_string_cesu8_to_utf16 (ecma_string_t *str_p, lit_utf8_size_t *utf16_length_p);
ecma_string_t *ecma_new_ecma_string_from_utf16 (uint16_t *utf16_buff_p, lit_utf8_size_t utf16_length);
#endif /* JERRY_ICU */
#if JERRY_BUILTIN_CONTAINER
ecma_string_t *ecma_new_map_key_string (ecma_value_t value);
bool ecma_prop_name_is_map_key (ecma_string_t *string_p);
Expand Down
154 changes: 154 additions & 0 deletions jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
#include "ecma-regexp-object.h"
#endif /* JERRY_BUILTIN_REGEXP */

#if JERRY_ICU
#include "unicode/unorm2.h"
#endif /* JERRY_ICU */

#if JERRY_BUILTIN_STRING

#define ECMA_BUILTINS_INTERNAL
Expand Down Expand Up @@ -80,6 +84,7 @@ enum

ECMA_STRING_PROTOTYPE_SUBSTR,

ECMA_STRING_PROTOTYPE_NORMALIZE,
ECMA_STRING_PROTOTYPE_REPEAT,
ECMA_STRING_PROTOTYPE_CODE_POINT_AT,
ECMA_STRING_PROTOTYPE_PAD_START,
Expand Down Expand Up @@ -1225,6 +1230,150 @@ ecma_builtin_string_prototype_object_trim (ecma_string_t *original_string_p) /**
} /* ecma_builtin_string_prototype_object_trim */

#if JERRY_ESNEXT
#if JERRY_ICU
/**
* Helper macro to register form normalizer entries
*/
#define FORM_ENTRY(id, instance_cb) \
{ \
id, instance_cb \
}

/**
* ICU string normalizer instance callback
*/
typedef const UNormalizer2 *(*icu_string_normalizer_instance_cb_t) (UErrorCode *);
#else /* !JERRY_ICU */
/**
* Helper macro to register form normalizer entries
*/
#define FORM_ENTRY(id, instance_cb) \
{ \
id \
}
#endif /* JERRY_ICU */

/**
* Normalization form descriptor
*/
typedef struct
{
lit_magic_string_id_t kind; /**< kind */
#if JERRY_ICU
icu_string_normalizer_instance_cb_t instance_cb; /**< normalizer instance callback */
#endif /* JERRY_ICU */
} icu_string_form_normalizer_t;

/**
* List of normalization forms
*/
static const icu_string_form_normalizer_t icu_string_normalize_forms[] = {
FORM_ENTRY (LIT_MAGIC_STRING_NFC_U, unorm2_getNFCInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFD_U, unorm2_getNFDInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFKC_U, unorm2_getNFKCInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFKD_U, unorm2_getNFKDInstance)
};

#undef FORM_ENTRY

/**
* The String.prototype object's 'normalize' routine
*
* See also:
* ECMA-262 v12, 22.1.3.13
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
static ecma_value_t
ecma_builtin_string_prototype_object_normalize (ecma_string_t *original_string_p, /**< this argument */
ecma_value_t form_value) /**< normalization from */
{
#if JERRY_ICU
icu_string_normalizer_instance_cb_t normalizer_instance_cb = unorm2_getNFCInstance;
#endif /* JERRY_ICU */

if (!ecma_is_value_undefined (form_value))
{
ecma_string_t *form_p = ecma_op_to_string (form_value);

if (JERRY_UNLIKELY (form_p == NULL))
{
return ECMA_VALUE_ERROR;
}

size_t forms_size = sizeof (icu_string_normalize_forms) / sizeof (icu_string_normalize_forms[0]);
uint32_t form_idx = 0;

for (; form_idx < forms_size; form_idx++)
{
if (ecma_compare_ecma_string_to_magic_id (form_p, icu_string_normalize_forms[form_idx].kind))
{
#if JERRY_ICU
normalizer_instance_cb = icu_string_normalize_forms[form_idx].instance_cb;
#endif /* JERRY_ICU */
break;
}
}

ecma_deref_ecma_string (form_p);

if (form_idx >= forms_size)
{
return ecma_raise_range_error (ECMA_ERR_INVALID_NORMALIZATION_FORM);
}
}

#if JERRY_ICU
JERRY_ASSERT (normalizer_instance_cb != NULL);
size_t string_size = ecma_string_get_size (original_string_p);

if (string_size == 0)
{
ecma_ref_ecma_string (original_string_p);
return ecma_make_string_value (original_string_p);
}

UErrorCode status = U_ZERO_ERROR;
const UNormalizer2 *normalizer_cb = normalizer_instance_cb (&status);

if (!U_FAILURE (status))
{
ecma_value_t result = ECMA_VALUE_ERROR;

lit_utf8_size_t length;
uint16_t *buffer_p = ecma_string_cesu8_to_utf16 (original_string_p, &length);
int32_t norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, NULL, 0, &status);

if (!U_FAILURE (status) || status == U_BUFFER_OVERFLOW_ERROR)
{
uint16_t *norm_buff_p = (uint16_t *) jmem_heap_alloc_block ((uint32_t) norm_length * sizeof (uint16_t));

status = U_ZERO_ERROR;
norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, norm_buff_p, norm_length, &status);

if (!U_FAILURE (status))
{
result = ecma_make_string_value (ecma_new_ecma_string_from_utf16 (norm_buff_p, (uint32_t) norm_length));
}

jmem_heap_free_block (norm_buff_p, (uint32_t) norm_length * sizeof (uint16_t));
}

jmem_heap_free_block (buffer_p, length * sizeof (uint16_t));

if (!ECMA_IS_VALUE_ERROR (result))
{
return result;
}
}

return ecma_raise_type_error (ECMA_ERR_NORMALIZATION_FAILED);
#else /* !JERRY_ICU */
ecma_ref_ecma_string (original_string_p);
return ecma_make_string_value (original_string_p);
#endif /* JERRY_ICU */
} /* ecma_builtin_string_prototype_object_normalize */

/**
* The String.prototype object's 'repeat' routine
Expand Down Expand Up @@ -1570,6 +1719,11 @@ ecma_builtin_string_prototype_dispatch_routine (uint8_t builtin_routine_id, /**<
}
#endif /* JERRY_BUILTIN_ANNEXB */
#if JERRY_ESNEXT
case ECMA_STRING_PROTOTYPE_NORMALIZE:
{
ret_value = ecma_builtin_string_prototype_object_normalize (string_p, arg1);
break;
}
case ECMA_STRING_PROTOTYPE_REPEAT:
{
ret_value = ecma_builtin_string_prototype_object_repeat (string_p, arg1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ ROUTINE (LIT_MAGIC_STRING_SUBSTR, ECMA_STRING_PROTOTYPE_SUBSTR, 2, 2)
#endif /* JERRY_BUILTIN_ANNEXB */

#if JERRY_ESNEXT
ROUTINE (LIT_MAGIC_STRING_NORMALIZE, ECMA_STRING_PROTOTYPE_NORMALIZE, 1, 0)
ROUTINE (LIT_MAGIC_STRING_REPEAT, ECMA_STRING_PROTOTYPE_REPEAT, 1, 1)
ROUTINE (LIT_MAGIC_STRING_STARTS_WITH, ECMA_STRING_PROTOTYPE_STARTS_WITH, 2, 1)
ROUTINE (LIT_MAGIC_STRING_INCLUDES, ECMA_STRING_PROTOTYPE_INCLUDES, 2, 1)
Expand Down
Loading