1
0
mirror of https://git.tt-rss.org/git/tt-rss.git synced 2025-12-25 08:21:30 +00:00

af_readability: add missing file

This commit is contained in:
Andrew Dolgov
2019-08-16 15:29:24 +03:00
parent 865c54abcb
commit 3e4701116d
11 changed files with 202 additions and 66 deletions

View File

@@ -0,0 +1,82 @@
<?php
namespace andreskrey\Readability\Nodes\DOM;
/**
* Class DOMNodeList.
*
* This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
* are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
* add() that allows to insert new DOMNodes into the DOMNodeList.
*
* It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
* from the user and cannot be extended, changed, or tweaked.
*/
class DOMNodeList implements \Countable, \IteratorAggregate
{
/**
* @var array
*/
protected $items = [];
/**
* @var int
*/
protected $length = 0;
/**
* To allow access to length in the same way that DOMNodeList allows.
*
* {@inheritdoc}
*/
public function __get($name)
{
switch ($name) {
case 'length':
return $this->length;
default:
trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
}
}
/**
* @param DOMNode|DOMElement|DOMComment $node
*
* @return DOMNodeList
*/
public function add($node)
{
$this->items[] = $node;
$this->length++;
return $this;
}
/**
* @param int $offset
*
* @return DOMNode|DOMElement|DOMComment
*/
public function item(int $offset)
{
return $this->items[$offset];
}
/**
* @return int|void
*/
public function count(): int
{
return $this->length;
}
/**
* To make it compatible with iterator_to_array() function.
*
* {@inheritdoc}
*/
public function getIterator(): \ArrayIterator
{
return new \ArrayIterator($this->items);
}
}

View File

@@ -181,11 +181,11 @@ trait NodeTrait
/**
* Override for native hasAttribute.
*
* @see getAttribute
*
* @param $attributeName
*
* @return bool
*
* @see getAttribute
*/
public function hasAttribute($attributeName)
{
@@ -317,10 +317,14 @@ trait NodeTrait
*
* @param bool $filterEmptyDOMText Filter empty DOMText nodes?
*
* @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0
*
* @return array
*/
public function getChildren($filterEmptyDOMText = false)
{
@trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED);
$ret = iterator_to_array($this->childNodes);
if ($filterEmptyDOMText) {
// Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
@@ -418,12 +422,12 @@ trait NodeTrait
public function hasSingleTagInsideElement($tag)
{
// There should be exactly 1 element child with given tag
if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) {
if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) {
return false;
}
// And there should be no text nodes with real content
return array_reduce($children, function ($carry, $child) {
return array_reduce(iterator_to_array($children), function ($carry, $child) {
if (!$carry === false) {
return false;
}
@@ -443,7 +447,7 @@ trait NodeTrait
{
$result = false;
if ($this->hasChildNodes()) {
foreach ($this->getChildren() as $child) {
foreach ($this->childNodes as $child) {
if (in_array($child->nodeName, $this->divToPElements)) {
$result = true;
} else {
@@ -500,18 +504,22 @@ trait NodeTrait
);
}
/**
* In the original JS project they check if the node has the style display=none, which unfortunately
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
*
* Might be a good idea to check for classes or other attributes like 'aria-hidden'
*
* @return bool
*/
public function isProbablyVisible()
{
/*
* In the original JS project they check if the node has the style display=none, which unfortunately
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
*
* Might be a good idea to check for classes or other attributes like 'aria-hidden'
*/
return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
}
/**
* @return bool
*/
public function isWhitespace()
{
return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) ||
@@ -557,4 +565,23 @@ trait NodeTrait
$count -= ($count - $nodes->length);
}
}
/**
* Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
* function to get the first one that is an DOMElement node.
*
* @return \DOMElement|null
*/
public function getFirstElementChild()
{
if ($this->childNodes instanceof \Traversable) {
foreach ($this->childNodes as $node) {
if ($node instanceof \DOMElement) {
return $node;
}
}
}
return null;
}
}

View File

@@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes;
use andreskrey\Readability\Nodes\DOM\DOMDocument;
use andreskrey\Readability\Nodes\DOM\DOMElement;
use andreskrey\Readability\Nodes\DOM\DOMNode;
use andreskrey\Readability\Nodes\DOM\DOMNodeList;
/**
* Class NodeUtility.
@@ -157,4 +158,23 @@ class NodeUtility
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
}
/**
* Remove all empty DOMNodes from DOMNodeLists.
*
* @param \DOMNodeList $list
*
* @return DOMNodeList
*/
public static function filterTextNodes(\DOMNodeList $list)
{
$newList = new DOMNodeList();
foreach ($list as $node) {
if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) {
$newList->add($node);
}
}
return $newList;
}
}