Код - пример #1 | |
1 |
<?php error_reporting(-1); class MyHtmlTidy { const TAG = '<(?:"[^"]*"|\'[^\']*\'|[^\'">])*>', ATTR = '\w++\s*=\s*"[^"]++"|\w++\s*=\s*\'[^\']++\'|\w++\s*=\s*[^\s]++'; private $_goodTags = array('b', 'i', 'u', 's', 'p', 'a', 'img', 'br', 'hr'), $_selfClose = array('img', 'br', 'hr'), $_goodAttrs = array( 'a' => array('href', 'title'), 'img' => array('src', 'alt')), $_nest = array(); public $errors = array(); public function preparse($html) { $this->_nest = array(); $this->errors = array(); $text = preg_replace_callback('/('.self::TAG.')/Uus', array($this, '_replace'), $html); if (!empty($this->_nest)) { $this->errors[] = 'Unclosed tags ' . implode(', ', $this->_nest); $text .= '</' . implode('></', array_reverse($this->_nest)) . '>'; } return $text; } private function _replace($matches) { $tag = $matches[1]; preg_match('/^<\/?(\w++)/', $tag, $m); $tagName = strtolower($m[1]); $isSelfClosed = $tag{strlen($tag) - 2} == '/'; $attrs = trim(substr($tag, strlen($m[0]), ($isSelfClosed ? -2 : -1))); if (!in_array($tagName, $this->_goodTags)) { $this->errors[] = 'Tag ' . $tagName . ' is deprecated'; return ''; } // Closing tag if ($tag{1} == '/') { if (empty($this->_nest) || end($this->_nest) != $tagName) { $this->errors[] = 'Odd close tag ' . $tagName; return '<' . $tagName . '></' . $tagName . '>'; } array_pop($this->_nest); return '</' . $tagName . '>'; } // Open tag or self-closing tag $isSelfClosed = $isSelfClosed || in_array($tagName, $this->_selfClose); if (!$isSelfClosed) { $this->_nest[] = $tagName; } if (!isset($this->_goodAttrs[$tagName])) { // No attributes at all if (strlen($attrs)) { $this->errors[] = 'Tag ' . $tagName . ' cannot have attributes'; } $attrs = ''; } else { // Check every attribute preg_match_all('/'.self::ATTR.'/Uus', $attrs, $m); $attrs = $m[0]; foreach ($attrs as $i => $attr) { $p = strpos($attr, '='); $attrName = strtolower(trim(substr($attr, 0, $p))); if (!in_array($attrName, $this->_goodAttrs[$tagName])) { $this->errors[] = 'Wrong ' . $tagName . ' attribute ' . $attrName; unset($attrs[$i]); } else { $attrs[$i] = $attrName . '=' . trim(substr($attr, $p + 1)); } } $attrs = count($attrs) ? (' ' . implode(' ', $attrs)) : ''; } return '<' . $tagName . $attrs . ($isSelfClosed ? '/>' : '>'); } } $t = new MyHtmlTidy(); $html = <<<HTML <p class='blabla'>dslkldsldslsd<br> kjksdjsdk<a href="http://thesite.name/path" target="_new" title="ololo" onclick="javascript:doit('xxx')">djdkjdk</a> <img src=0.gif alt='pysh-pysh'> ds;lsd;; <b>skjskjsk kjdkjdkd HTML; header('Content-type: text/plain'); echo $html; echo "\n===========================\n"; $preparsed = $t->preparse($html); if (!empty($t->errors)) { echo implode("\n", $t->errors); echo "\n===========================\n"; } echo $preparsed; |
Код - пример #1 |
<p class='blabla'>dslkldsldslsd<br> kjksdjsdk<a href="http://thesite.name/path" target="_new" title="ololo" onclick="javascript:doit('xxx')">djdkjdk</a> <img src=0.gif alt='pysh-pysh'> ds;lsd;; <b>skjskjsk kjdkjdkd =========================== Tag p cannot have attributes Wrong a attribute target Wrong a attribute onclick Unclosed tags p, b =========================== <p>dslkldsldslsd<br/> kjksdjsdk<a href="http://thesite.name/path" title="ololo">djdkjdk</a> <img src=0.gif alt='pysh-pysh'/> ds;lsd;; <b>skjskjsk kjdkjdkd</b></p> |