Skip to content

Commit

Permalink
Use WP_HTML_Processor::create_fragment instead of new WP_HTML_Processor
Browse files Browse the repository at this point in the history
  • Loading branch information
adamziel committed Dec 19, 2024
1 parent 440f8aa commit 2df1458
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class WP_HTML_To_Blocks implements WP_Block_Markup_Converter {
private $metadata = array();

public function __construct( $html ) {
$this->html = new \WP_HTML_Processor( $html );
$this->html = WP_HTML_Processor::create_fragment( $html );
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1613,10 +1613,7 @@ private function step_in_head(): bool {
*/
$charset = $this->get_attribute( 'charset' );
if ( is_string( $charset ) && 'tentative' === $this->state->encoding_confidence ) {
// Commenting this out for now. We're assuming UTF-8 in WP_HTML_To_Blocks and
// we don't want to fail just because a document contained a meta tag with a UTF-8 charset.
// @TODO: Bail on non-utf8 charsets.
// $this->bail( 'Cannot yet process META tags with charset to determine encoding.' );
$this->bail( 'Cannot yet process META tags with charset to determine encoding.' );
}

/*
Expand All @@ -1635,10 +1632,7 @@ private function step_in_head(): bool {
0 === strcasecmp( $http_equiv, 'Content-Type' ) &&
'tentative' === $this->state->encoding_confidence
) {
// Commenting this out for now. We're assuming UTF-8 in WP_HTML_To_Blocks and
// we don't want to fail just because a document contained a meta tag with a UTF-8 charset.
// @TODO: Bail on non-utf8 charsets.
// $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' );
$this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' );
}

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,9 @@ private function normalize_markup( $markup ) {
$serialized = $processor->serialize();
$serialized = trim(
str_replace(
[
// Naively remove parts of the HTML that serialize()
// adds that we don't want.
'<html><head></head><body>',
'</body></html>',
// Even more naively, remove all the newlines.
"\n"
],
// Naively remove all the newlines to prevent minor formatting differences
// from causing false negatives in $expected === $actual.
"\n",
'',
$serialized
)
Expand Down

0 comments on commit 2df1458

Please sign in to comment.