From 8e77e46dcb3fb7616f4e5f42f5422c7112c6a154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 9 Jan 2025 23:43:45 +0100 Subject: [PATCH 1/3] [Data Liberation] Recognize self-closing blocks in WP_Block_Markup_Processor Adds a is_self_closing_block() method to WP_Block_Markup_Processor to enable detection and rewriting of block comments such as ```html ``` This will be needed in the markdown processor. A part of https://github.com/WordPress/wordpress-playground/issues/1894. ## Testing instructions CI. See the unit tests updated in this PR. --- .../WP_Block_Markup_Processor.php | 467 ++++++++++++++---- .../tests/WPBlockMarkupProcessorTests.php | 7 +- 2 files changed, 363 insertions(+), 111 deletions(-) diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 04785fe138..089dec1cbe 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -1,28 +1,84 @@ |null + */ protected $block_attributes; + + /** + * Whether the current block's attributes have been modified and need to be serialized + * + * @var bool + */ private $block_attributes_updated; + + /** + * Whether the current block token is a closing tag (e.g. ) + * + * @var bool + */ private $block_closer; /** + * Whether the current block is self-closing (e.g. ) + * + * @var bool + */ + private $self_closing_flag; + + /** + * Stack tracking the names of currently open blocks for validation + * + * @var array + */ + private $stack_of_open_blocks = array(); + + /** + * The most recent error encountered while parsing blocks + * + * @var string|null + */ + private $last_block_error; + + /** + * Iterator for traversing nested block attributes * @var \RecursiveIteratorIterator */ private $block_attributes_iterator; + /** + * Gets the type of the current token, adding a special '#block-comment' type + * for WordPress block delimiters. + * + * @return string|null The token type or null if no token + */ public function get_token_type(): ?string { switch ( $this->parser_state ) { case self::STATE_COMMENT: @@ -37,10 +93,96 @@ public function get_token_type(): ?string { } } + /** + * Gets the most recent error encountered while parsing blocks + * + * @return string|null The error message or null if no error + */ + public function get_last_error(): ?string { + return $this->last_block_error; + } + + /** + * Advances past the block closer of the currently matched block and returns + * the HTML content found between the block's opener and closer. + * + * @return string|false The inner HTML content of the block or false if not a block opener. + */ + public function skip_and_get_block_inner_html() { + if ( '#block-comment' !== $this->get_token_type() ) { + return false; + } + + if ( $this->is_block_closer() ) { + return false; + } + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-start' ) ) { + return false; + } + + $starting_block_depth = $this->get_block_depth(); + while ( $this->next_token() ) { + if ( + $this->get_token_type() === '#block-comment' && + $this->is_block_closer() && + $this->get_block_depth() === $starting_block_depth - 1 + ) { + break; + } + } + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-end' ) ) { + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); + return false; + } + + $inner_html_start = $this->bookmarks['block-start']->start + $this->bookmarks['block-start']->length; + $inner_html_end = $this->bookmarks['block-end']->start - $inner_html_start; + + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); + WP_HTML_Tag_Processor::release_bookmark( 'block-end' ); + + return substr( + $this->html, + $inner_html_start, + $inner_html_end + ); + } + + /** + * Gets the depth of the currently matched block on the block stack. It only + * considers the parent blocks and not HTML elements. + * + * For example, the paragraph block in the following markup has a depth of 1: + * + * + *
+ * + *

Hello, there

+ * + *
+ * + * + * @return int The number of ancestor blocks + */ + public function get_block_depth() { + return count( $this->stack_of_open_blocks ); + } + + /** + * Gets the names of all currently open blocks from outermost to innermost + * + * @return array List of block names in nesting order + */ + public function get_block_breadcrumbs() { + return $this->stack_of_open_blocks; + } + /** * Returns the name of the block if the current token is a block comment. * - * @return string|false + * @return string|false The block name (e.g. 'core/paragraph') or false if not at a block */ public function get_block_name() { if ( null === $this->block_name ) { @@ -50,6 +192,11 @@ public function get_block_name() { return $this->block_name; } + /** + * Gets all attributes of the current block + * + * @return array|false The block attributes or false if not at a block + */ public function get_block_attributes() { if ( null === $this->block_attributes ) { return false; @@ -58,12 +205,26 @@ public function get_block_attributes() { return $this->block_attributes; } + /** + * Gets a specific attribute value from the current block + * + * @param string $attribute_name The name of the attribute to get + * @return mixed|false The attribute value or false if not found + */ + public function get_block_attribute( $attribute_name ) { + if ( null === $this->block_attributes ) { + return false; + } + + return $this->block_attributes[ $attribute_name ] ?? false; + } + /** * Overwrites all the block attributes of the currently matched block * opener. * - * @param array $attributes The attributes to set. - * @return bool Whether the attributes were set. + * @param array $attributes The new attributes to set + * @return bool Whether the attributes were successfully set */ public function set_block_attributes( $attributes ) { if ( '#block-comment' !== $this->get_token_type() ) { @@ -77,108 +238,179 @@ public function set_block_attributes( $attributes ) { return true; } + /** + * Checks if the currently matched token is a block closer, + * e.g. . + * + * @return bool True if at a block closer. + */ public function is_block_closer() { return $this->block_name !== null && $this->block_closer === true; } - public function next_token(): bool { - $this->get_updated_html(); - - $this->block_name = null; - $this->block_attributes = null; - $this->block_closer = false; - $this->block_attributes_updated = false; - - if ( parent::next_token() === false ) { - return false; - } - - if ( parent::get_token_type() !== '#comment' ) { - return true; - } - - $text = parent::get_modifiable_text(); - /** - * Try to parse as a block. The block parser won't cut it because - * while it can parse blocks, it has no semantics for rewriting the - * block markup. Let's do our best here: - */ - $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. - - if ( $at >= strlen( $text ) ) { - // This is an empty comment. Not a block. - return true; - } - - // Blocks closers start with the solidus character (`/`). - if ( '/' === $text[ $at ] ) { - $this->block_closer = true; - ++$at; - } - - // Blocks start with wp. - if ( ! ( - $at + 3 < strlen( $text ) && - $text[ $at ] === 'w' && - $text[ $at + 1 ] === 'p' && - $text[ $at + 2 ] === ':' - ) ) { - return true; - } - - $name_starts_at = $at; - - // Skip wp. - $at += 3; - - // Parse the actual block name after wp. - $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); - if ( $name_length === 0 ) { - // This wasn't a block after all, just a regular comment. - return true; - } - $name = substr( $text, $name_starts_at, $name_length + 3 ); - $at += $name_length; - - // Skip the whitespace that follows the block name. - $at += strspn( $text, ' \t\f\r\n', $at ); - if ( $at >= strlen( $text ) ) { - // It's a block without attributes. - $this->block_name = $name; - - return true; - } - - // It seems we may have block attributes here. - - // Block closers cannot have attributes. - if ( $this->block_closer ) { - return true; - } - - // Let's try to parse them as JSON. - $json_maybe = substr( $text, $at ); - $attributes = json_decode( $json_maybe, true ); - if ( null === $attributes || ! is_array( $attributes ) ) { - // This comment looked like a block comment, but the attributes didn't - // parse as a JSON array. This means it wasn't a block after all. - return true; - } - - // We have a block name and a valid attributes array. We may not find a block - // closer, but let's assume is a block and process it as such. - // @TODO: Confirm that WordPress block parser would have parsed this as a block. - $this->block_name = $name; - $this->block_attributes = $attributes; + /** + * Checks if the currently matched token is a self-closing block, + * e.g. . + * + * @return bool True if at a self-closing block. + */ + public function is_self_closing_block() { + return $this->block_name !== null && $this->self_closing_flag === true; + } - return true; + /** + * Advances to the next token in the HTML stream. Matches: + * - The regular HTML tokens + * - WordPress block openers + * - WordPress block closers + * - WordPress self-closing blocks + * + * @return bool Whether a token was parsed. + */ + public function next_token(): bool { + $this->get_updated_html(); + + $this->block_name = null; + $this->block_attributes = null; + $this->block_attributes_iterator = null; + $this->block_closer = false; + $this->self_closing_flag = false; + $this->block_attributes_updated = false; + + while ( true ) { + if ( parent::next_token() === false ) { + return false; + } + + if ( + $this->get_token_type() === '#tag' && ( + $this->get_tag() === 'HTML' || + $this->get_tag() === 'HEAD' || + $this->get_tag() === 'BODY' + ) + ) { + continue; + } + + break; + } + + if ( parent::get_token_type() !== '#comment' ) { + return true; + } + + $text = parent::get_modifiable_text(); + /** + * Try to parse as a block. The block parser won't cut it because + * while it can parse blocks, it has no semantics for rewriting the + * block markup. Let's do our best here: + */ + $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. + + if ( $at >= strlen( $text ) ) { + // This is an empty comment. Not a block. + return true; + } + + // Blocks closers start with the solidus character (`/`). + if ( '/' === $text[ $at ] ) { + $this->block_closer = true; + ++$at; + } + + // Blocks start with wp. + if ( ! ( + $at + 3 < strlen( $text ) && + $text[ $at ] === 'w' && + $text[ $at + 1 ] === 'p' && + $text[ $at + 2 ] === ':' + ) ) { + return true; + } + + $name_starts_at = $at; + + // Skip wp. + $at += 3; + + // Parse the actual block name after wp. + $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); + if ( $name_length === 0 ) { + // This wasn't a block after all, just a regular comment. + return true; + } + $name = substr( $text, $name_starts_at, $name_length + 3 ); + $at += $name_length; + + // Assume no attributes by default. + $attributes = array(); + + // Skip the whitespace that follows the block name. + $at += strspn( $text, ' \t\f\r\n', $at ); + if ( $at < strlen( $text ) ) { + // It may be a self-closing block or a block with attributes. + + // However, block closers can be neither – let's short-circuit. + if ( $this->block_closer ) { + return true; + } + + // The rest of the comment can only consist of block attributes + // and an optional solidus character. + $rest = ltrim( substr( $text, $at ) ); + $at = strlen( $text ); + + // Inspect our potential JSON for the self-closing solidus (`/`) character. + $json_maybe = $rest; + if ( substr( $json_maybe, -1 ) === '/' ) { + // Self-closing block () + $this->self_closing_flag = true; + $json_maybe = substr( $json_maybe, 0, -1 ); + } + + // Let's try to parse attributes as JSON. + if ( strlen( $json_maybe ) > 0 ) { + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } + } + } + + // We have a block name and a valid attributes array. We may not find a block + // closer, but let's assume is a block and process it as such. + // @TODO: Confirm that WordPress block parser would have parsed this as a block. + $this->block_name = $name; + $this->block_attributes = $attributes; + + if ( $this->block_closer ) { + $popped = array_pop( $this->stack_of_open_blocks ); + if ( $popped !== $name ) { + $this->last_block_error = sprintf( 'Block closer %s does not match the last opened block %s.', $name, $popped ); + return false; + } + } elseif ( ! $this->self_closing_flag ) { + array_push( $this->stack_of_open_blocks, $name ); + } + + return true; } + /** + * @inheritDoc + */ public function get_updated_html(): string { $this->block_attribute_updates_to_modifiable_text_updates(); return parent::get_updated_html(); } + /** + * Converts block attribute updates into lexical updates. + * + * @return bool Whether any lexical updates were created + */ private function block_attribute_updates_to_modifiable_text_updates() { // Apply block attribute updates, if any. if ( ! $this->block_attributes_updated ) { @@ -206,6 +438,11 @@ private function block_attribute_updates_to_modifiable_text_updates() { return true; } + /** + * Advances to the next block attribute when a block is matched. + * + * @return bool Whether we successfully advanced to the next attribute. + */ public function next_block_attribute() { if ( '#block-comment' !== $this->get_token_type() ) { return false; @@ -234,6 +471,11 @@ public function next_block_attribute() { return false; } + /** + * Gets the key of the currently matched block attribute. + * + * @return string|false The attribute key or false if no attribute was matched + */ public function get_block_attribute_key() { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; @@ -242,6 +484,11 @@ public function get_block_attribute_key() { return $this->block_attributes_iterator->key(); } + /** + * Gets the value of the currently matched block attribute. + * + * @return mixed|false The attribute value or false if no attribute was matched + */ public function get_block_attribute_value() { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; @@ -250,17 +497,23 @@ public function get_block_attribute_value() { return $this->block_attributes_iterator->current(); } + /** + * Sets the value of the currently matched block attribute. + * + * @param mixed $new_value The new value to set + * @return bool Whether the value was successfully set + */ public function set_block_attribute_value( $new_value ) { if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { return false; } - $this->block_attributes_iterator->getSubIterator( - $this->block_attributes_iterator->getDepth() - )->offsetSet( - $this->get_block_attribute_key(), - $new_value - ); + $this->block_attributes_iterator + ->getSubIterator($this->block_attributes_iterator->getDepth()) + ->offsetSet( + $this->get_block_attribute_key(), + $new_value + ); $this->block_attributes_updated = true; return true; diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php index 3817d63b4d..0abf1a159c 100644 --- a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php +++ b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php @@ -18,9 +18,9 @@ public function test_finds_block_openers( $markup, $block_name, $block_attribute static public function provider_test_finds_block_openers() { return [ - 'Opener without attributes' => [ '', 'wp:paragraph', null ], - 'Opener without the trailing whitespace' => [ '', 'wp:paragraph', null ], - 'Opener with a lot of trailing whitespace' => [ '', 'wp:paragraph', null ], + 'Opener without attributes' => [ '', 'wp:paragraph', [] ], + 'Opener without the trailing whitespace' => [ '', 'wp:paragraph', [] ], + 'Opener with a lot of trailing whitespace' => [ '', 'wp:paragraph', [] ], 'Opener with attributes' => [ '', 'wp:paragraph', @@ -100,7 +100,6 @@ static public function provider_test_treat_invalid_block_closers_as_comments() { 'Closer with a line break before whitespace' => [ "", ], 'Closer with attributes' => [ '', ], 'Closer with solidus at the end (before whitespace)' => [ '', ], - 'Closer with solidus at the end (after whitespace)' => [ '', ], ]; } From ef48f738342b22a42616a365f498ac850085cac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 9 Jan 2025 23:46:51 +0100 Subject: [PATCH 2/3] Lint --- .../WP_Block_Markup_Processor.php | 320 +++++++++--------- 1 file changed, 160 insertions(+), 160 deletions(-) diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 089dec1cbe..bc6eb7a619 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -2,14 +2,14 @@ /** * A processor class capable of reading and rewriting block markup. - * + * * This class provides functionality to parse, traverse and modify WordPress block markup. * It extends WP_HTML_Tag_Processor to add block-specific capabilities like: * - Parsing block comments into name and attributes * - Tracking block nesting depth * - Modifying block attributes * - Validating block structure - * + * * Contrary to WP_HTML_Tag_Processor, this class does not support streaming. * It assumes block markup blobs are small enough to fit into memory, otherwise * WordPress won't be able to render them anyway. @@ -34,36 +34,36 @@ class WP_Block_Markup_Processor extends WP_HTML_Tag_Processor { /** * Whether the current block's attributes have been modified and need to be serialized - * - * @var bool + * + * @var bool */ private $block_attributes_updated; /** * Whether the current block token is a closing tag (e.g. ) - * - * @var bool + * + * @var bool */ private $block_closer; /** * Whether the current block is self-closing (e.g. ) - * - * @var bool + * + * @var bool */ private $self_closing_flag; /** * Stack tracking the names of currently open blocks for validation - * - * @var array + * + * @var array */ private $stack_of_open_blocks = array(); /** * The most recent error encountered while parsing blocks - * - * @var string|null + * + * @var string|null */ private $last_block_error; @@ -103,8 +103,8 @@ public function get_last_error(): ?string { } /** - * Advances past the block closer of the currently matched block and returns - * the HTML content found between the block's opener and closer. + * Advances past the block closer of the currently matched block and returns + * the HTML content found between the block's opener and closer. * * @return string|false The inner HTML content of the block or false if not a block opener. */ @@ -152,17 +152,17 @@ public function skip_and_get_block_inner_html() { /** * Gets the depth of the currently matched block on the block stack. It only - * considers the parent blocks and not HTML elements. - * - * For example, the paragraph block in the following markup has a depth of 1: - * - * - *
- * - *

Hello, there

- * - *
- * + * considers the parent blocks and not HTML elements. + * + * For example, the paragraph block in the following markup has a depth of 1: + * + * + *
+ * + *

Hello, there

+ * + *
+ * * * @return int The number of ancestor blocks */ @@ -240,7 +240,7 @@ public function set_block_attributes( $attributes ) { /** * Checks if the currently matched token is a block closer, - * e.g. . + * e.g. . * * @return bool True if at a block closer. */ @@ -250,7 +250,7 @@ public function is_block_closer() { /** * Checks if the currently matched token is a self-closing block, - * e.g. . + * e.g. . * * @return bool True if at a self-closing block. */ @@ -268,134 +268,134 @@ public function is_self_closing_block() { * @return bool Whether a token was parsed. */ public function next_token(): bool { - $this->get_updated_html(); - - $this->block_name = null; - $this->block_attributes = null; - $this->block_attributes_iterator = null; - $this->block_closer = false; - $this->self_closing_flag = false; - $this->block_attributes_updated = false; - - while ( true ) { - if ( parent::next_token() === false ) { - return false; - } - - if ( - $this->get_token_type() === '#tag' && ( - $this->get_tag() === 'HTML' || - $this->get_tag() === 'HEAD' || - $this->get_tag() === 'BODY' - ) - ) { - continue; - } - - break; - } - - if ( parent::get_token_type() !== '#comment' ) { - return true; - } - - $text = parent::get_modifiable_text(); - /** - * Try to parse as a block. The block parser won't cut it because - * while it can parse blocks, it has no semantics for rewriting the - * block markup. Let's do our best here: - */ - $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. - - if ( $at >= strlen( $text ) ) { - // This is an empty comment. Not a block. - return true; - } - - // Blocks closers start with the solidus character (`/`). - if ( '/' === $text[ $at ] ) { - $this->block_closer = true; - ++$at; - } - - // Blocks start with wp. - if ( ! ( - $at + 3 < strlen( $text ) && - $text[ $at ] === 'w' && - $text[ $at + 1 ] === 'p' && - $text[ $at + 2 ] === ':' - ) ) { - return true; - } - - $name_starts_at = $at; - - // Skip wp. - $at += 3; - - // Parse the actual block name after wp. - $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); - if ( $name_length === 0 ) { - // This wasn't a block after all, just a regular comment. - return true; - } - $name = substr( $text, $name_starts_at, $name_length + 3 ); - $at += $name_length; - - // Assume no attributes by default. - $attributes = array(); - - // Skip the whitespace that follows the block name. - $at += strspn( $text, ' \t\f\r\n', $at ); - if ( $at < strlen( $text ) ) { - // It may be a self-closing block or a block with attributes. - - // However, block closers can be neither – let's short-circuit. - if ( $this->block_closer ) { - return true; - } - - // The rest of the comment can only consist of block attributes - // and an optional solidus character. - $rest = ltrim( substr( $text, $at ) ); - $at = strlen( $text ); - - // Inspect our potential JSON for the self-closing solidus (`/`) character. - $json_maybe = $rest; - if ( substr( $json_maybe, -1 ) === '/' ) { - // Self-closing block () - $this->self_closing_flag = true; - $json_maybe = substr( $json_maybe, 0, -1 ); - } - - // Let's try to parse attributes as JSON. - if ( strlen( $json_maybe ) > 0 ) { - $attributes = json_decode( $json_maybe, true ); - if ( null === $attributes || ! is_array( $attributes ) ) { - // This comment looked like a block comment, but the attributes didn't - // parse as a JSON array. This means it wasn't a block after all. - return true; - } - } - } - - // We have a block name and a valid attributes array. We may not find a block - // closer, but let's assume is a block and process it as such. - // @TODO: Confirm that WordPress block parser would have parsed this as a block. - $this->block_name = $name; - $this->block_attributes = $attributes; - - if ( $this->block_closer ) { - $popped = array_pop( $this->stack_of_open_blocks ); - if ( $popped !== $name ) { - $this->last_block_error = sprintf( 'Block closer %s does not match the last opened block %s.', $name, $popped ); - return false; - } - } elseif ( ! $this->self_closing_flag ) { - array_push( $this->stack_of_open_blocks, $name ); - } - - return true; + $this->get_updated_html(); + + $this->block_name = null; + $this->block_attributes = null; + $this->block_attributes_iterator = null; + $this->block_closer = false; + $this->self_closing_flag = false; + $this->block_attributes_updated = false; + + while ( true ) { + if ( parent::next_token() === false ) { + return false; + } + + if ( + $this->get_token_type() === '#tag' && ( + $this->get_tag() === 'HTML' || + $this->get_tag() === 'HEAD' || + $this->get_tag() === 'BODY' + ) + ) { + continue; + } + + break; + } + + if ( parent::get_token_type() !== '#comment' ) { + return true; + } + + $text = parent::get_modifiable_text(); + /** + * Try to parse as a block. The block parser won't cut it because + * while it can parse blocks, it has no semantics for rewriting the + * block markup. Let's do our best here: + */ + $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. + + if ( $at >= strlen( $text ) ) { + // This is an empty comment. Not a block. + return true; + } + + // Blocks closers start with the solidus character (`/`). + if ( '/' === $text[ $at ] ) { + $this->block_closer = true; + ++$at; + } + + // Blocks start with wp. + if ( ! ( + $at + 3 < strlen( $text ) && + $text[ $at ] === 'w' && + $text[ $at + 1 ] === 'p' && + $text[ $at + 2 ] === ':' + ) ) { + return true; + } + + $name_starts_at = $at; + + // Skip wp. + $at += 3; + + // Parse the actual block name after wp. + $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); + if ( $name_length === 0 ) { + // This wasn't a block after all, just a regular comment. + return true; + } + $name = substr( $text, $name_starts_at, $name_length + 3 ); + $at += $name_length; + + // Assume no attributes by default. + $attributes = array(); + + // Skip the whitespace that follows the block name. + $at += strspn( $text, ' \t\f\r\n', $at ); + if ( $at < strlen( $text ) ) { + // It may be a self-closing block or a block with attributes. + + // However, block closers can be neither – let's short-circuit. + if ( $this->block_closer ) { + return true; + } + + // The rest of the comment can only consist of block attributes + // and an optional solidus character. + $rest = ltrim( substr( $text, $at ) ); + $at = strlen( $text ); + + // Inspect our potential JSON for the self-closing solidus (`/`) character. + $json_maybe = $rest; + if ( substr( $json_maybe, -1 ) === '/' ) { + // Self-closing block () + $this->self_closing_flag = true; + $json_maybe = substr( $json_maybe, 0, -1 ); + } + + // Let's try to parse attributes as JSON. + if ( strlen( $json_maybe ) > 0 ) { + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } + } + } + + // We have a block name and a valid attributes array. We may not find a block + // closer, but let's assume is a block and process it as such. + // @TODO: Confirm that WordPress block parser would have parsed this as a block. + $this->block_name = $name; + $this->block_attributes = $attributes; + + if ( $this->block_closer ) { + $popped = array_pop( $this->stack_of_open_blocks ); + if ( $popped !== $name ) { + $this->last_block_error = sprintf( 'Block closer %s does not match the last opened block %s.', $name, $popped ); + return false; + } + } elseif ( ! $this->self_closing_flag ) { + array_push( $this->stack_of_open_blocks, $name ); + } + + return true; } /** @@ -509,11 +509,11 @@ public function set_block_attribute_value( $new_value ) { } $this->block_attributes_iterator - ->getSubIterator($this->block_attributes_iterator->getDepth()) - ->offsetSet( - $this->get_block_attribute_key(), - $new_value - ); + ->getSubIterator( $this->block_attributes_iterator->getDepth() ) + ->offsetSet( + $this->get_block_attribute_key(), + $new_value + ); $this->block_attributes_updated = true; return true; From a13c6d969615d8cb035ea8f92f3c78c95b00f6f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 9 Jan 2025 23:48:14 +0100 Subject: [PATCH 3/3] Add tests for self closing blocks --- .../tests/WPBlockMarkupProcessorTests.php | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php index 0abf1a159c..c4e559500d 100644 --- a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php +++ b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php @@ -40,6 +40,34 @@ static public function provider_test_finds_block_openers() { ]; } + /** + * + * @dataProvider provider_test_finds_self_closing_blocks + */ + public function test_finds_self_closing_blocks( $markup, $block_name, $block_attributes ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertEquals( '#block-comment', $p->get_token_type(), 'Failed to identify the block comment' ); + $this->assertEquals( $block_name, $p->get_block_name(), 'Failed to identify the block name' ); + $this->assertEquals( $block_attributes, $p->get_block_attributes(), 'Failed to identify the block attributes' ); + $this->assertTrue( $p->is_self_closing_block(), 'Failed to identify the self-closing block status' ); + } + + static public function provider_test_finds_self_closing_blocks() { + return [ + 'Self-closing block without attributes' => [ + '', + 'wp:spacer', + [] + ], + 'Self-closing block with attributes' => [ + '', + 'wp:spacer', + [ 'height' => '20px' ] + ], + ]; + } + /** * * @dataProvider provider_test_finds_block_closers