1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
<?php
error_reporting(-1);
class MyHtmlTidy
{
const
TAG = '<(?:"[^"]*"|\'[^\']*\'|[^\'">])*>',
ATTR = '\w++\s*=\s*"[^"]++"|\w++\s*=\s*\'[^\']++\'|\w++\s*=\s*[^\s]++';
private
$_goodTags = array('b', 'i', 'u', 's', 'p', 'a', 'img', 'br', 'hr'),
$_selfClose = array('img', 'br', 'hr'),
$_goodAttrs = array(
'a' => array('href', 'title'),
'img' => array('src', 'alt')),
$_nest = array();
public
$errors = array();
public function preparse($html)
{
$this->_nest = array();
$this->errors = array();
$text = preg_replace_callback('/('.self::TAG.')/Uus', array($this, '_replace'), $html);
if (!empty($this->_nest)) {
$this->errors[] = 'Unclosed tags ' . implode(', ', $this->_nest);
$text .= '</' . implode('></', array_reverse($this->_nest)) . '>';
}
return $text;
}
private function _replace($matches)
{
$tag = $matches[1];
preg_match('/^<\/?(\w++)/', $tag, $m);
$tagName = strtolower($m[1]);
$isSelfClosed = $tag{strlen($tag) - 2} == '/';
$attrs = trim(substr($tag, strlen($m[0]), ($isSelfClosed ? -2 : -1)));
if (!in_array($tagName, $this->_goodTags)) {
$this->errors[] = 'Tag ' . $tagName . ' is deprecated';
return '';
}
// Closing tag
if ($tag{1} == '/') {
if (empty($this->_nest) || end($this->_nest) != $tagName) {
$this->errors[] = 'Odd close tag ' . $tagName;
return '<' . $tagName . '></' . $tagName . '>';
}
array_pop($this->_nest);
return '</' . $tagName . '>';
}
// Open tag or self-closing tag
$isSelfClosed = $isSelfClosed || in_array($tagName, $this->_selfClose);
if (!$isSelfClosed) {
$this->_nest[] = $tagName;
}
if (!isset($this->_goodAttrs[$tagName])) {
// No attributes at all
if (strlen($attrs)) {
$this->errors[] = 'Tag ' . $tagName . ' cannot have attributes';
}
$attrs = '';
} else {
// Check every attribute
preg_match_all('/'.self::ATTR.'/Uus', $attrs, $m);
$attrs = $m[0];
foreach ($attrs as $i => $attr) {
$p = strpos($attr, '=');
$attrName = strtolower(trim(substr($attr, 0, $p)));
if (!in_array($attrName, $this->_goodAttrs[$tagName])) {
$this->errors[] = 'Wrong ' . $tagName . ' attribute ' . $attrName;
unset($attrs[$i]);
} else {
$attrs[$i] = $attrName . '=' . trim(substr($attr, $p + 1));
}
}
$attrs = count($attrs) ? (' ' . implode(' ', $attrs)) : '';
}
return '<' . $tagName . $attrs . ($isSelfClosed ? '/>' : '>');
}
}
$t = new MyHtmlTidy();
$html = <<<HTML
<p class='blabla'>dslkldsldslsd<br>
kjksdjsdk<a href="http://thesite.name/path" target="_new" title="ololo" onclick="javascript:doit('xxx')">djdkjdk</a>
<img src=0.gif alt='pysh-pysh'>
ds;lsd;; <b>skjskjsk kjdkjdkd
HTML;
header('Content-type: text/plain');
echo $html;
echo "\n===========================\n";
$preparsed = $t->preparse($html);
if (!empty($t->errors)) {
echo implode("\n", $t->errors);
echo "\n===========================\n";
}
echo $preparsed; |
Комментариев нет:
Отправить комментарий