Initial checkin
This commit is contained in:
commit
d75eb444fc
4304 changed files with 369634 additions and 0 deletions
259
thirdparty/filter/filter.php
vendored
Normal file
259
thirdparty/filter/filter.php
vendored
Normal file
|
@ -0,0 +1,259 @@
|
|||
<?php
|
||||
|
||||
class Filter
|
||||
{
|
||||
private $allowed_protocols = array('http','https'), $allowed_tags = array('a', 'i', 'b', 'em', 'span', 'strong', 'ul', 'ol', 'li','sup','sub','table', 'tr', 'hr','blockquote','s','thead', 'th', 'tbody', 'br','h1','h2','h3');
|
||||
|
||||
public function addAllowedProtocols($protocols)
|
||||
{
|
||||
$this->allowed_protocols = (array)$protocols;
|
||||
}
|
||||
|
||||
public function addAllowedTags($tags)
|
||||
{
|
||||
$this->allowed_tags = (array)$tags;
|
||||
}
|
||||
|
||||
public function xss($string)
|
||||
{
|
||||
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
|
||||
// site scripting issues on Internet Explorer 6.
|
||||
if (!$this->isUtf8($string)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Remove NULL characters (ignored by some browsers).
|
||||
$string = str_replace(chr(0), '', $string);
|
||||
|
||||
// Remove Netscape 4 JS entities.
|
||||
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
||||
|
||||
// Defuse all HTML entities.
|
||||
$string = str_replace('&', '&', $string);
|
||||
|
||||
// Change back only well-formed entities in our whitelist:
|
||||
// Decimal numeric entities.
|
||||
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
||||
|
||||
// Hexadecimal numeric entities.
|
||||
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
||||
|
||||
// Named entities.
|
||||
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
||||
|
||||
return preg_replace_callback('%
|
||||
(
|
||||
<(?=[^a-zA-Z!/]) # a lone <
|
||||
| # or
|
||||
<!--.*?--> # a comment
|
||||
| # or
|
||||
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
||||
| # or
|
||||
> # just a >
|
||||
)%x', array($this, 'split'), $string);
|
||||
}
|
||||
|
||||
private function isUtf8($string)
|
||||
{
|
||||
if (strlen($string) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (preg_match('/^./us', $string) == 1);
|
||||
}
|
||||
|
||||
private function split($m)
|
||||
{
|
||||
$string = $m[1];
|
||||
|
||||
if (substr($string, 0, 1) != '<') {
|
||||
// We matched a lone ">" character.
|
||||
return '>';
|
||||
} elseif (strlen($string) == 1) {
|
||||
// We matched a lone "<" character.
|
||||
return '<';
|
||||
}
|
||||
|
||||
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9\-]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
|
||||
// Seriously malformed.
|
||||
return '';
|
||||
}
|
||||
|
||||
$slash = trim($matches[1]);
|
||||
$elem = &$matches[2];
|
||||
$attrlist = &$matches[3];
|
||||
$comment = &$matches[4];
|
||||
|
||||
if ($comment) {
|
||||
$elem = '!--';
|
||||
}
|
||||
|
||||
if (!in_array(strtolower($elem), $this->allowed_tags, true)) {
|
||||
// Disallowed HTML element.
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($comment) {
|
||||
return $comment;
|
||||
}
|
||||
|
||||
if ($slash != '') {
|
||||
return "</$elem>";
|
||||
}
|
||||
|
||||
// Is there a closing XHTML slash at the end of the attributes?
|
||||
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
||||
$xhtml_slash = $count ? ' /' : '';
|
||||
|
||||
// Clean up attributes.
|
||||
$attr2 = implode(' ', $this->attributes($attrlist));
|
||||
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
||||
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
||||
|
||||
return "<$elem$attr2$xhtml_slash>";
|
||||
}
|
||||
|
||||
private function attributes($attr) {
|
||||
|
||||
$attrarr = array();
|
||||
$mode = 0;
|
||||
$attrname = '';
|
||||
|
||||
while (strlen($attr) != 0) {
|
||||
// Was the last operation successful?
|
||||
$working = 0;
|
||||
|
||||
switch ($mode) {
|
||||
case 0:
|
||||
// Attribute name, href for instance.
|
||||
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
|
||||
$attrname = strtolower($match[1]);
|
||||
$skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
|
||||
$working = $mode = 1;
|
||||
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
// Equals sign or valueless ("selected").
|
||||
if (preg_match('/^\s*=\s*/', $attr)) {
|
||||
$working = 1;
|
||||
$mode = 2;
|
||||
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
||||
break;
|
||||
}
|
||||
|
||||
if (preg_match('/^\s+/', $attr)) {
|
||||
$working = 1;
|
||||
$mode = 0;
|
||||
|
||||
if (!$skip) {
|
||||
$attrarr[] = $attrname;
|
||||
}
|
||||
|
||||
$attr = preg_replace('/^\s+/', '', $attr);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
// Attribute value, a URL after href= for instance.
|
||||
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
|
||||
$thisval = $this->badProtocol($match[1]);
|
||||
|
||||
if (!$skip) {
|
||||
$attrarr[] = "$attrname=\"$thisval\"";
|
||||
}
|
||||
|
||||
$working = 1;
|
||||
$mode = 0;
|
||||
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
||||
break;
|
||||
}
|
||||
|
||||
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
|
||||
$thisval = $this->badProtocol($match[1]);
|
||||
|
||||
if (!$skip) {
|
||||
$attrarr[] = "$attrname='$thisval'";
|
||||
}
|
||||
|
||||
$working = 1;
|
||||
$mode = 0;
|
||||
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
||||
break;
|
||||
}
|
||||
|
||||
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
|
||||
$thisval = $this->badProtocol($match[1]);
|
||||
|
||||
if (!$skip) {
|
||||
$attrarr[] = "$attrname=\"$thisval\"";
|
||||
}
|
||||
|
||||
$working = 1;
|
||||
$mode = 0;
|
||||
$attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if ($working == 0) {
|
||||
// Not well formed; remove and try again.
|
||||
$attr = preg_replace('/
|
||||
^
|
||||
(
|
||||
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
||||
| # or
|
||||
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
||||
| # or
|
||||
\S # - a non-whitespace character
|
||||
)* # any number of the above three
|
||||
\s* # any number of whitespaces
|
||||
/x', '', $attr);
|
||||
|
||||
$mode = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// The attribute list ends with a valueless attribute like "selected".
|
||||
if ($mode == 1 && !$skip) {
|
||||
$attrarr[] = $attrname;
|
||||
}
|
||||
|
||||
return $attrarr;
|
||||
}
|
||||
|
||||
private function badProtocol($string) {
|
||||
|
||||
$string = html_entity_decode($string, ENT_QUOTES, 'UTF-8');
|
||||
return htmlspecialchars($this->stripDangerousProtocols($string), ENT_QUOTES, 'UTF-8');
|
||||
}
|
||||
|
||||
private function stripDangerousProtocols($uri)
|
||||
{
|
||||
|
||||
// Iteratively remove any invalid protocol found.
|
||||
do {
|
||||
$before = $uri;
|
||||
$colonpos = strpos($uri, ':');
|
||||
|
||||
if ($colonpos > 0) {
|
||||
// We found a colon, possibly a protocol. Verify.
|
||||
$protocol = substr($uri, 0, $colonpos);
|
||||
|
||||
// If a colon is preceded by a slash, question mark or hash, it cannot
|
||||
// possibly be part of the URL scheme. This must be a relative URL, which
|
||||
// inherits the (safe) protocol of the base document.
|
||||
if (preg_match('![/?#]!', $protocol)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
|
||||
// (URI Comparison) scheme comparison must be case-insensitive.
|
||||
if (!in_array(strtolower($protocol), $this->allowed_protocols, true)) {
|
||||
$uri = substr($uri, $colonpos + 1);
|
||||
}
|
||||
}
|
||||
} while ($before != $uri);
|
||||
|
||||
return $uri;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue