diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php index 13b043a..798ac05 100644 --- a/src/JSLikeHTMLElement.php +++ b/src/JSLikeHTMLElement.php @@ -56,7 +56,8 @@ public function __set($name, $value) $f = $this->ownerDocument->createDocumentFragment(); // appendXML() expects well-formed markup (XHTML) - $result = @$f->appendXML($value); // @ to suppress PHP warnings + // @ to suppress PHP warnings + $result = @$f->appendXML($value); if ($result) { if ($f->hasChildNodes()) { $this->appendChild($f); @@ -75,6 +76,7 @@ public function __set($name, $value) if ($result) { $import = $f->getElementsByTagName('htmlfragment')->item(0); + foreach ($import->childNodes as $child) { $importedNode = $this->ownerDocument->importNode($child, true); $this->appendChild($importedNode); @@ -102,6 +104,7 @@ public function __get($name) { if ($name == 'innerHTML') { $inner = ''; + foreach ($this->childNodes as $child) { $inner .= $this->ownerDocument->saveXML($child); } diff --git a/src/Readability.php b/src/Readability.php index d658a97..b40a544 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -250,17 +250,6 @@ public function getContent() return $this->articleContent; } - /** - * Add pre filter for raw input HTML processing. - * - * @param string RegExp for replace - * @param string (optional) Replacer - */ - public function addPreFilter($filter, $replacer = '') - { - $this->pre_filters[$filter] = $replacer; - } - /** * Add post filter for raw output HTML processing. * @@ -302,7 +291,7 @@ public function init() } } - if ($bodyElems->length > 0 && $this->body == null) { + if ($bodyElems->length > 0 && $this->body === null) { $this->body = $bodyElems->item(0); } @@ -385,12 +374,11 @@ public function postProcessContent(\DOMElement $articleContent) */ protected function getArticleTitle() { - $origTitle = ''; - try { $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); } catch (\Exception $e) { $curTitle = ''; + $origTitle = ''; } if (preg_match('/ [\|\-] /', $curTitle)) { @@ -431,7 +419,7 @@ protected function prepDocument() * In some cases a body element can't be found (if the HTML is totally hosed for example) * so we create a new body node and append it to the document. */ - if ($this->body == null) { + if ($this->body === null) { $this->body = $this->dom->createElement('body'); $this->dom->documentElement->appendChild($this->body); } @@ -571,7 +559,6 @@ public function prepArticle(\DOMElement $articleContent) $this->cleanConditionally($articleContent, 'form'); $this->cleanConditionally($articleContent, 'table'); $this->cleanConditionally($articleContent, 'ul'); - //if (!$this->lightClean) $this->cleanConditionally($articleContent, 'div'); // Remove extra paragraphs. diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index e9a0ef3..c74766f 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -215,6 +215,22 @@ public function testWithClasses() $this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML); } + public function testWithClassesWithoutLightClean() + { + $readability = new ReadabilityTested('
'.str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7).'
'.str_repeat('

This text should be removed

', 10).'
', 'http://0.0.0.0'); + $readability->debug = true; + $readability->lightClean = false; + $res = $readability->init(); + + $this->assertTrue($res); + $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); + $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); + $this->assertContains('alt="article"', $readability->getContent()->innerHTML); + $this->assertEmpty($readability->getTitle()->innerHTML); + $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); + $this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML); + } + public function testWithTd() { $readability = new ReadabilityTested(''.str_repeat('', 7).'

This is an awesome text with some links, here there are the awesome

', 'http://0.0.0.0'); @@ -429,7 +445,22 @@ public function testAppendIdAlreadyHere() $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); - // $this->assertContains('', $readability->getContent()->innerHTML); - // $this->assertContains('3D Touch', $readability->getTitle()->innerHTML); + } + + public function testPostFilters() + { + $readability = new ReadabilityTested('
'.str_repeat('

This is the awesome content :)

', 7).'
', 'http://0.0.0.0'); + $res = $readability->init(); + + $this->assertTrue($res); + $this->assertContains('This is the awesome content :)', $readability->getContent()->innerHTML); + + $readability = new ReadabilityTested('
'.str_repeat('

This is the awesome content :)

', 7).'
', 'http://0.0.0.0'); + $readability->addPostFilter('!]*>(.*?)!is', ''); + + $res = $readability->init(); + + $this->assertTrue($res); + $this->assertContains('This the awesome content :)', $readability->getContent()->innerHTML); } }