Skip to content

Commit

Permalink
Remove addPreFilter
Browse files Browse the repository at this point in the history
Pre filters are used in the __construct so adding more pre filters once the object is instantiated is useless.
  • Loading branch information
j0k3r committed Feb 29, 2016
1 parent 209c404 commit 149a333
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 19 deletions.
5 changes: 4 additions & 1 deletion src/JSLikeHTMLElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ public function __set($name, $value)
$f = $this->ownerDocument->createDocumentFragment();

// appendXML() expects well-formed markup (XHTML)
$result = @$f->appendXML($value); // @ to suppress PHP warnings
// @ to suppress PHP warnings
$result = @$f->appendXML($value);
if ($result) {
if ($f->hasChildNodes()) {
$this->appendChild($f);
Expand All @@ -75,6 +76,7 @@ public function __set($name, $value)

if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0);

foreach ($import->childNodes as $child) {
$importedNode = $this->ownerDocument->importNode($child, true);
$this->appendChild($importedNode);
Expand Down Expand Up @@ -102,6 +104,7 @@ public function __get($name)
{
if ($name == 'innerHTML') {
$inner = '';

foreach ($this->childNodes as $child) {
$inner .= $this->ownerDocument->saveXML($child);
}
Expand Down
19 changes: 3 additions & 16 deletions src/Readability.php
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,6 @@ public function getContent()
return $this->articleContent;
}

/**
* Add pre filter for raw input HTML processing.
*
* @param string RegExp for replace
* @param string (optional) Replacer
*/
public function addPreFilter($filter, $replacer = '')
{
$this->pre_filters[$filter] = $replacer;
}

/**
* Add post filter for raw output HTML processing.
*
Expand Down Expand Up @@ -302,7 +291,7 @@ public function init()
}
}

if ($bodyElems->length > 0 && $this->body == null) {
if ($bodyElems->length > 0 && $this->body === null) {
$this->body = $bodyElems->item(0);
}

Expand Down Expand Up @@ -385,12 +374,11 @@ public function postProcessContent(\DOMElement $articleContent)
*/
protected function getArticleTitle()
{
$origTitle = '';

try {
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
} catch (\Exception $e) {
$curTitle = '';
$origTitle = '';
}

if (preg_match('/ [\|\-] /', $curTitle)) {
Expand Down Expand Up @@ -431,7 +419,7 @@ protected function prepDocument()
* In some cases a body element can't be found (if the HTML is totally hosed for example)
* so we create a new body node and append it to the document.
*/
if ($this->body == null) {
if ($this->body === null) {
$this->body = $this->dom->createElement('body');
$this->dom->documentElement->appendChild($this->body);
}
Expand Down Expand Up @@ -571,7 +559,6 @@ public function prepArticle(\DOMElement $articleContent)
$this->cleanConditionally($articleContent, 'form');
$this->cleanConditionally($articleContent, 'table');
$this->cleanConditionally($articleContent, 'ul');
//if (!$this->lightClean)
$this->cleanConditionally($articleContent, 'div');

// Remove extra paragraphs.
Expand Down
35 changes: 33 additions & 2 deletions tests/ReadabilityTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,22 @@ public function testWithClasses()
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
}

public function testWithClassesWithoutLightClean()
{
$readability = new ReadabilityTested('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<div style="display:none">'.str_repeat('<p class="clock">This text should be removed</p>', 10).'</div></article>', 'http://0.0.0.0');
$readability->debug = true;
$readability->lightClean = false;
$res = $readability->init();

$this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
}

public function testWithTd()
{
$readability = new ReadabilityTested('<table><tr>'.str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7).'</tr></table>', 'http://0.0.0.0');
Expand Down Expand Up @@ -429,7 +445,22 @@ public function testAppendIdAlreadyHere()
$this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
// $this->assertContains('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->innerHTML);
// $this->assertContains('3D Touch', $readability->getTitle()->innerHTML);
}

public function testPostFilters()
{
$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$res = $readability->init();

$this->assertTrue($res);
$this->assertContains('This <strong>is</strong> the awesome content :)', $readability->getContent()->innerHTML);

$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$readability->addPostFilter('!<strong[^>]*>(.*?)</strong>!is', '');

$res = $readability->init();

$this->assertTrue($res);
$this->assertContains('This the awesome content :)', $readability->getContent()->innerHTML);
}
}

0 comments on commit 149a333

Please sign in to comment.