[ Index ]

PHP Cross Reference of Wordpress 2.9.1

title

Body

[close]

/wp-includes/ -> kses.php (source)

   1  <?php
   2  /**
   3   * HTML/XHTML filter that only allows some elements and attributes
   4   *
   5   * Added wp_ prefix to avoid conflicts with existing kses users
   6   *
   7   * @version 0.2.2
   8   * @copyright (C) 2002, 2003, 2005
   9   * @author Ulf Harnhammar <metaur@users.sourceforge.net>
  10   *
  11   * @package External
  12   * @subpackage KSES
  13   *
  14   * @internal
  15   * *** CONTACT INFORMATION ***
  16   * E-mail:      metaur at users dot sourceforge dot net
  17   * Web page:    http://sourceforge.net/projects/kses
  18   * Paper mail:  Ulf Harnhammar
  19   *              Ymergatan 17 C
  20   *              753 25  Uppsala
  21   *              SWEDEN
  22   *
  23   * [kses strips evil scripts!]
  24   */
  25  
  26  /**
  27   * You can override this in your my-hacks.php file You can also override this
  28   * in a plugin file. The my-hacks.php is deprecated in its usage.
  29   *
  30   * @since 1.2.0
  31   */
  32  if (!defined('CUSTOM_TAGS'))
  33      define('CUSTOM_TAGS', false);
  34  
  35  if (!CUSTOM_TAGS) {
  36      /**
  37       * Kses global for default allowable HTML tags.
  38       *
  39       * Can be override by using CUSTOM_TAGS constant.
  40       *
  41       * @global array $allowedposttags
  42       * @since 2.0.0
  43       */
  44      $allowedposttags = array(
  45          'address' => array(),
  46          'a' => array(
  47              'class' => array (),
  48              'href' => array (),
  49              'id' => array (),
  50              'title' => array (),
  51              'rel' => array (),
  52              'rev' => array (),
  53              'name' => array (),
  54              'target' => array()),
  55          'abbr' => array(
  56              'class' => array (),
  57              'title' => array ()),
  58          'acronym' => array(
  59              'title' => array ()),
  60          'b' => array(),
  61          'big' => array(),
  62          'blockquote' => array(
  63              'id' => array (),
  64              'cite' => array (),
  65              'class' => array(),
  66              'lang' => array(),
  67              'xml:lang' => array()),
  68          'br' => array (
  69              'class' => array ()),
  70          'button' => array(
  71              'disabled' => array (),
  72              'name' => array (),
  73              'type' => array (),
  74              'value' => array ()),
  75          'caption' => array(
  76              'align' => array (),
  77              'class' => array ()),
  78          'cite' => array (
  79              'class' => array(),
  80              'dir' => array(),
  81              'lang' => array(),
  82              'title' => array ()),
  83          'code' => array (
  84              'style' => array()),
  85          'col' => array(
  86              'align' => array (),
  87              'char' => array (),
  88              'charoff' => array (),
  89              'span' => array (),
  90              'dir' => array(),
  91              'style' => array (),
  92              'valign' => array (),
  93              'width' => array ()),
  94          'del' => array(
  95              'datetime' => array ()),
  96          'dd' => array(),
  97          'div' => array(
  98              'align' => array (),
  99              'class' => array (),
 100              'dir' => array (),
 101              'lang' => array(),
 102              'style' => array (),
 103              'xml:lang' => array()),
 104          'dl' => array(),
 105          'dt' => array(),
 106          'em' => array(),
 107          'fieldset' => array(),
 108          'font' => array(
 109              'color' => array (),
 110              'face' => array (),
 111              'size' => array ()),
 112          'form' => array(
 113              'action' => array (),
 114              'accept' => array (),
 115              'accept-charset' => array (),
 116              'enctype' => array (),
 117              'method' => array (),
 118              'name' => array (),
 119              'target' => array ()),
 120          'h1' => array(
 121              'align' => array (),
 122              'class' => array (),
 123              'id'    => array (),
 124              'style' => array ()),
 125          'h2' => array (
 126              'align' => array (),
 127              'class' => array (),
 128              'id'    => array (),
 129              'style' => array ()),
 130          'h3' => array (
 131              'align' => array (),
 132              'class' => array (),
 133              'id'    => array (),
 134              'style' => array ()),
 135          'h4' => array (
 136              'align' => array (),
 137              'class' => array (),
 138              'id'    => array (),
 139              'style' => array ()),
 140          'h5' => array (
 141              'align' => array (),
 142              'class' => array (),
 143              'id'    => array (),
 144              'style' => array ()),
 145          'h6' => array (
 146              'align' => array (),
 147              'class' => array (),
 148              'id'    => array (),
 149              'style' => array ()),
 150          'hr' => array (
 151              'align' => array (),
 152              'class' => array (),
 153              'noshade' => array (),
 154              'size' => array (),
 155              'width' => array ()),
 156          'i' => array(),
 157          'img' => array(
 158              'alt' => array (),
 159              'align' => array (),
 160              'border' => array (),
 161              'class' => array (),
 162              'height' => array (),
 163              'hspace' => array (),
 164              'longdesc' => array (),
 165              'vspace' => array (),
 166              'src' => array (),
 167              'style' => array (),
 168              'width' => array ()),
 169          'ins' => array(
 170              'datetime' => array (),
 171              'cite' => array ()),
 172          'kbd' => array(),
 173          'label' => array(
 174              'for' => array ()),
 175          'legend' => array(
 176              'align' => array ()),
 177          'li' => array (
 178              'align' => array (),
 179              'class' => array ()),
 180          'p' => array(
 181              'class' => array (),
 182              'align' => array (),
 183              'dir' => array(),
 184              'lang' => array(),
 185              'style' => array (),
 186              'xml:lang' => array()),
 187          'pre' => array(
 188              'style' => array(),
 189              'width' => array ()),
 190          'q' => array(
 191              'cite' => array ()),
 192          's' => array(),
 193          'span' => array (
 194              'class' => array (),
 195              'dir' => array (),
 196              'align' => array (),
 197              'lang' => array (),
 198              'style' => array (),
 199              'title' => array (),
 200              'xml:lang' => array()),
 201          'strike' => array(),
 202          'strong' => array(),
 203          'sub' => array(),
 204          'sup' => array(),
 205          'table' => array(
 206              'align' => array (),
 207              'bgcolor' => array (),
 208              'border' => array (),
 209              'cellpadding' => array (),
 210              'cellspacing' => array (),
 211              'class' => array (),
 212              'dir' => array(),
 213              'id' => array(),
 214              'rules' => array (),
 215              'style' => array (),
 216              'summary' => array (),
 217              'width' => array ()),
 218          'tbody' => array(
 219              'align' => array (),
 220              'char' => array (),
 221              'charoff' => array (),
 222              'valign' => array ()),
 223          'td' => array(
 224              'abbr' => array (),
 225              'align' => array (),
 226              'axis' => array (),
 227              'bgcolor' => array (),
 228              'char' => array (),
 229              'charoff' => array (),
 230              'class' => array (),
 231              'colspan' => array (),
 232              'dir' => array(),
 233              'headers' => array (),
 234              'height' => array (),
 235              'nowrap' => array (),
 236              'rowspan' => array (),
 237              'scope' => array (),
 238              'style' => array (),
 239              'valign' => array (),
 240              'width' => array ()),
 241          'textarea' => array(
 242              'cols' => array (),
 243              'rows' => array (),
 244              'disabled' => array (),
 245              'name' => array (),
 246              'readonly' => array ()),
 247          'tfoot' => array(
 248              'align' => array (),
 249              'char' => array (),
 250              'class' => array (),
 251              'charoff' => array (),
 252              'valign' => array ()),
 253          'th' => array(
 254              'abbr' => array (),
 255              'align' => array (),
 256              'axis' => array (),
 257              'bgcolor' => array (),
 258              'char' => array (),
 259              'charoff' => array (),
 260              'class' => array (),
 261              'colspan' => array (),
 262              'headers' => array (),
 263              'height' => array (),
 264              'nowrap' => array (),
 265              'rowspan' => array (),
 266              'scope' => array (),
 267              'valign' => array (),
 268              'width' => array ()),
 269          'thead' => array(
 270              'align' => array (),
 271              'char' => array (),
 272              'charoff' => array (),
 273              'class' => array (),
 274              'valign' => array ()),
 275          'title' => array(),
 276          'tr' => array(
 277              'align' => array (),
 278              'bgcolor' => array (),
 279              'char' => array (),
 280              'charoff' => array (),
 281              'class' => array (),
 282              'style' => array (),
 283              'valign' => array ()),
 284          'tt' => array(),
 285          'u' => array(),
 286          'ul' => array (
 287              'class' => array (),
 288              'style' => array (),
 289              'type' => array ()),
 290          'ol' => array (
 291              'class' => array (),
 292              'start' => array (),
 293              'style' => array (),
 294              'type' => array ()),
 295          'var' => array ());
 296  
 297      /**
 298       * Kses allowed HTML elements.
 299       *
 300       * @global array $allowedtags
 301       * @since 1.0.0
 302       */
 303      $allowedtags = array(
 304          'a' => array(
 305              'href' => array (),
 306              'title' => array ()),
 307          'abbr' => array(
 308              'title' => array ()),
 309          'acronym' => array(
 310              'title' => array ()),
 311          'b' => array(),
 312          'blockquote' => array(
 313              'cite' => array ()),
 314          //    'br' => array(),
 315          'cite' => array (),
 316          'code' => array(),
 317          'del' => array(
 318              'datetime' => array ()),
 319          //    'dd' => array(),
 320          //    'dl' => array(),
 321          //    'dt' => array(),
 322          'em' => array (), 'i' => array (),
 323          //    'ins' => array('datetime' => array(), 'cite' => array()),
 324          //    'li' => array(),
 325          //    'ol' => array(),
 326          //    'p' => array(),
 327          'q' => array(
 328              'cite' => array ()),
 329          'strike' => array(),
 330          'strong' => array(),
 331          //    'sub' => array(),
 332          //    'sup' => array(),
 333          //    'u' => array(),
 334          //    'ul' => array(),
 335      );
 336  }
 337  
 338  /**
 339   * Filters content and keeps only allowable HTML elements.
 340   *
 341   * This function makes sure that only the allowed HTML element names, attribute
 342   * names and attribute values plus only sane HTML entities will occur in
 343   * $string. You have to remove any slashes from PHP's magic quotes before you
 344   * call this function.
 345   *
 346   * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
 347   * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
 348   * link protocols, except for 'javascript' which should not be allowed for
 349   * untrusted users.
 350   *
 351   * @since 1.0.0
 352   *
 353   * @param string $string Content to filter through kses
 354   * @param array $allowed_html List of allowed HTML elements
 355   * @param array $allowed_protocols Optional. Allowed protocol in links.
 356   * @return string Filtered content with only allowed HTML elements
 357   */
 358  function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) {
 359      $string = wp_kses_no_null($string);
 360      $string = wp_kses_js_entities($string);
 361      $string = wp_kses_normalize_entities($string);
 362      $allowed_html_fixed = wp_kses_array_lc($allowed_html);
 363      $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
 364      return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
 365  }
 366  
 367  /**
 368   * You add any kses hooks here.
 369   *
 370   * There is currently only one kses WordPress hook and it is called here. All
 371   * parameters are passed to the hooks and expected to recieve a string.
 372   *
 373   * @since 1.0.0
 374   *
 375   * @param string $string Content to filter through kses
 376   * @param array $allowed_html List of allowed HTML elements
 377   * @param array $allowed_protocols Allowed protocol in links
 378   * @return string Filtered content through 'pre_kses' hook
 379   */
 380  function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
 381      $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
 382      return $string;
 383  }
 384  
 385  /**
 386   * This function returns kses' version number.
 387   *
 388   * @since 1.0.0
 389   *
 390   * @return string KSES Version Number
 391   */
 392  function wp_kses_version() {
 393      return '0.2.2';
 394  }
 395  
 396  /**
 397   * Searches for HTML tags, no matter how malformed.
 398   *
 399   * It also matches stray ">" characters.
 400   *
 401   * @since 1.0.0
 402   *
 403   * @param string $string Content to filter
 404   * @param array $allowed_html Allowed HTML elements
 405   * @param array $allowed_protocols Allowed protocols to keep
 406   * @return string Content with fixed HTML tags
 407   */
 408  function wp_kses_split($string, $allowed_html, $allowed_protocols) {
 409      global $pass_allowed_html, $pass_allowed_protocols;
 410      $pass_allowed_html = $allowed_html;
 411      $pass_allowed_protocols = $allowed_protocols;
 412      return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
 413          create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
 414  }
 415  
 416  /**
 417   * Callback for wp_kses_split for fixing malformed HTML tags.
 418   *
 419   * This function does a lot of work. It rejects some very malformed things like
 420   * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
 421   * strip_tags()!). Otherwise it splits the tag into an element and an attribute
 422   * list.
 423   *
 424   * After the tag is split into an element and an attribute list, it is run
 425   * through another filter which will remove illegal attributes and once that is
 426   * completed, will be returned.
 427   *
 428   * @access private
 429   * @since 1.0.0
 430   * @uses wp_kses_attr()
 431   *
 432   * @param string $string Content to filter
 433   * @param array $allowed_html Allowed HTML elements
 434   * @param array $allowed_protocols Allowed protocols to keep
 435   * @return string Fixed HTML element
 436   */
 437  function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
 438      $string = wp_kses_stripslashes($string);
 439  
 440      if (substr($string, 0, 1) != '<')
 441          return '&gt;';
 442      # It matched a ">" character
 443  
 444      if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
 445          $string = str_replace(array('<!--', '-->'), '', $matches[1]);
 446          while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
 447              $string = $newstring;
 448          if ( $string == '' )
 449              return '';
 450          // prevent multiple dashes in comments
 451          $string = preg_replace('/--+/', '-', $string);
 452          // prevent three dashes closing a comment
 453          $string = preg_replace('/-$/', '', $string);
 454          return "<!--{$string}-->";
 455      }
 456      # Allow HTML comments
 457  
 458      if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 459          return '';
 460      # It's seriously malformed
 461  
 462      $slash = trim($matches[1]);
 463      $elem = $matches[2];
 464      $attrlist = $matches[3];
 465  
 466      if (!@isset($allowed_html[strtolower($elem)]))
 467          return '';
 468      # They are using a not allowed HTML element
 469  
 470      if ($slash != '')
 471          return "<$slash$elem>";
 472      # No attributes are allowed for closing elements
 473  
 474      return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
 475  }
 476  
 477  /**
 478   * Removes all attributes, if none are allowed for this element.
 479   *
 480   * If some are allowed it calls wp_kses_hair() to split them further, and then
 481   * it builds up new HTML code from the data that kses_hair() returns. It also
 482   * removes "<" and ">" characters, if there are any left. One more thing it does
 483   * is to check if the tag has a closing XHTML slash, and if it does, it puts one
 484   * in the returned code as well.
 485   *
 486   * @since 1.0.0
 487   *
 488   * @param string $element HTML element/tag
 489   * @param string $attr HTML attributes from HTML element to closing HTML element tag
 490   * @param array $allowed_html Allowed HTML elements
 491   * @param array $allowed_protocols Allowed protocols to keep
 492   * @return string Sanitized HTML element
 493   */
 494  function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
 495      # Is there a closing XHTML slash at the end of the attributes?
 496  
 497      $xhtml_slash = '';
 498      if (preg_match('%\s/\s*$%', $attr))
 499          $xhtml_slash = ' /';
 500  
 501      # Are any attributes allowed at all for this element?
 502  
 503      if (@ count($allowed_html[strtolower($element)]) == 0)
 504          return "<$element$xhtml_slash>";
 505  
 506      # Split it
 507  
 508      $attrarr = wp_kses_hair($attr, $allowed_protocols);
 509  
 510      # Go through $attrarr, and save the allowed attributes for this element
 511      # in $attr2
 512  
 513      $attr2 = '';
 514  
 515      foreach ($attrarr as $arreach) {
 516          if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
 517              continue; # the attribute is not allowed
 518  
 519          $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
 520          if ($current == '')
 521              continue; # the attribute is not allowed
 522  
 523          if (!is_array($current))
 524              $attr2 .= ' '.$arreach['whole'];
 525          # there are no checks
 526  
 527          else {
 528              # there are some checks
 529              $ok = true;
 530              foreach ($current as $currkey => $currval)
 531                  if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
 532                      $ok = false;
 533                      break;
 534                  }
 535  
 536              if ( $arreach['name'] == 'style' ) {
 537                  $orig_value = $arreach['value'];
 538  
 539                  $value = safecss_filter_attr($orig_value);
 540  
 541                  if ( empty($value) )
 542                      continue;
 543  
 544                  $arreach['value'] = $value;
 545  
 546                  $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
 547              }
 548  
 549              if ($ok)
 550                  $attr2 .= ' '.$arreach['whole']; # it passed them
 551          } # if !is_array($current)
 552      } # foreach
 553  
 554      # Remove any "<" or ">" characters
 555  
 556      $attr2 = preg_replace('/[<>]/', '', $attr2);
 557  
 558      return "<$element$attr2$xhtml_slash>";
 559  }
 560  
 561  /**
 562   * Builds an attribute list from string containing attributes.
 563   *
 564   * This function does a lot of work. It parses an attribute list into an array
 565   * with attribute data, and tries to do the right thing even if it gets weird
 566   * input. It will add quotes around attribute values that don't have any quotes
 567   * or apostrophes around them, to make it easier to produce HTML code that will
 568   * conform to W3C's HTML specification. It will also remove bad URL protocols
 569   * from attribute values.  It also reduces duplicate attributes by using the
 570   * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
 571   *
 572   * @since 1.0.0
 573   *
 574   * @param string $attr Attribute list from HTML element to closing HTML element tag
 575   * @param array $allowed_protocols Allowed protocols to keep
 576   * @return array List of attributes after parsing
 577   */
 578  function wp_kses_hair($attr, $allowed_protocols) {
 579      $attrarr = array ();
 580      $mode = 0;
 581      $attrname = '';
 582      $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
 583  
 584      # Loop through the whole attribute list
 585  
 586      while (strlen($attr) != 0) {
 587          $working = 0; # Was the last operation successful?
 588  
 589          switch ($mode) {
 590              case 0 : # attribute name, href for instance
 591  
 592                  if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
 593                      $attrname = $match[1];
 594                      $working = $mode = 1;
 595                      $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 596                  }
 597  
 598                  break;
 599  
 600              case 1 : # equals sign or valueless ("selected")
 601  
 602                  if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 603                      {
 604                      $working = 1;
 605                      $mode = 2;
 606                      $attr = preg_replace('/^\s*=\s*/', '', $attr);
 607                      break;
 608                  }
 609  
 610                  if (preg_match('/^\s+/', $attr)) # valueless
 611                      {
 612                      $working = 1;
 613                      $mode = 0;
 614                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 615                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 616                      }
 617                      $attr = preg_replace('/^\s+/', '', $attr);
 618                  }
 619  
 620                  break;
 621  
 622              case 2 : # attribute value, a URL after href= for instance
 623  
 624                  if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
 625                      # "value"
 626                      {
 627                      $thisval = $match[1];
 628                      if ( in_array($attrname, $uris) )
 629                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 630  
 631                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 632                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 633                      }
 634                      $working = 1;
 635                      $mode = 0;
 636                      $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 637                      break;
 638                  }
 639  
 640                  if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
 641                      # 'value'
 642                      {
 643                      $thisval = $match[1];
 644                      if ( in_array($attrname, $uris) )
 645                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 646  
 647                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 648                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
 649                      }
 650                      $working = 1;
 651                      $mode = 0;
 652                      $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 653                      break;
 654                  }
 655  
 656                  if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
 657                      # value
 658                      {
 659                      $thisval = $match[1];
 660                      if ( in_array($attrname, $uris) )
 661                          $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
 662  
 663                      if(FALSE === array_key_exists($attrname, $attrarr)) {
 664                          $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 665                      }
 666                      # We add quotes to conform to W3C's HTML spec.
 667                      $working = 1;
 668                      $mode = 0;
 669                      $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 670                  }
 671  
 672                  break;
 673          } # switch
 674  
 675          if ($working == 0) # not well formed, remove and try again
 676          {
 677              $attr = wp_kses_html_error($attr);
 678              $mode = 0;
 679          }
 680      } # while
 681  
 682      if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
 683          # special case, for when the attribute list ends with a valueless
 684          # attribute like "selected"
 685          $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 686  
 687      return $attrarr;
 688  }
 689  
 690  /**
 691   * Performs different checks for attribute values.
 692   *
 693   * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
 694   * and "valueless" with even more checks to come soon.
 695   *
 696   * @since 1.0.0
 697   *
 698   * @param string $value Attribute value
 699   * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
 700   * @param string $checkname What $checkvalue is checking for.
 701   * @param mixed $checkvalue What constraint the value should pass
 702   * @return bool Whether check passes (true) or not (false)
 703   */
 704  function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
 705      $ok = true;
 706  
 707      switch (strtolower($checkname)) {
 708          case 'maxlen' :
 709              # The maxlen check makes sure that the attribute value has a length not
 710              # greater than the given value. This can be used to avoid Buffer Overflows
 711              # in WWW clients and various Internet servers.
 712  
 713              if (strlen($value) > $checkvalue)
 714                  $ok = false;
 715              break;
 716  
 717          case 'minlen' :
 718              # The minlen check makes sure that the attribute value has a length not
 719              # smaller than the given value.
 720  
 721              if (strlen($value) < $checkvalue)
 722                  $ok = false;
 723              break;
 724  
 725          case 'maxval' :
 726              # The maxval check does two things: it checks that the attribute value is
 727              # an integer from 0 and up, without an excessive amount of zeroes or
 728              # whitespace (to avoid Buffer Overflows). It also checks that the attribute
 729              # value is not greater than the given value.
 730              # This check can be used to avoid Denial of Service attacks.
 731  
 732              if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 733                  $ok = false;
 734              if ($value > $checkvalue)
 735                  $ok = false;
 736              break;
 737  
 738          case 'minval' :
 739              # The minval check checks that the attribute value is a positive integer,
 740              # and that it is not smaller than the given value.
 741  
 742              if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 743                  $ok = false;
 744              if ($value < $checkvalue)
 745                  $ok = false;
 746              break;
 747  
 748          case 'valueless' :
 749              # The valueless check checks if the attribute has a value
 750              # (like <a href="blah">) or not (<option selected>). If the given value
 751              # is a "y" or a "Y", the attribute must not have a value.
 752              # If the given value is an "n" or an "N", the attribute must have one.
 753  
 754              if (strtolower($checkvalue) != $vless)
 755                  $ok = false;
 756              break;
 757      } # switch
 758  
 759      return $ok;
 760  }
 761  
 762  /**
 763   * Sanitize string from bad protocols.
 764   *
 765   * This function removes all non-allowed protocols from the beginning of
 766   * $string. It ignores whitespace and the case of the letters, and it does
 767   * understand HTML entities. It does its work in a while loop, so it won't be
 768   * fooled by a string like "javascript:javascript:alert(57)".
 769   *
 770   * @since 1.0.0
 771   *
 772   * @param string $string Content to filter bad protocols from
 773   * @param array $allowed_protocols Allowed protocols to keep
 774   * @return string Filtered content
 775   */
 776  function wp_kses_bad_protocol($string, $allowed_protocols) {
 777      $string = wp_kses_no_null($string);
 778      $string2 = $string.'a';
 779  
 780      while ($string != $string2) {
 781          $string2 = $string;
 782          $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
 783      } # while
 784  
 785      return $string;
 786  }
 787  
 788  /**
 789   * Removes any NULL characters in $string.
 790   *
 791   * @since 1.0.0
 792   *
 793   * @param string $string
 794   * @return string
 795   */
 796  function wp_kses_no_null($string) {
 797      $string = preg_replace('/\0+/', '', $string);
 798      $string = preg_replace('/(\\\\0)+/', '', $string);
 799  
 800      return $string;
 801  }
 802  
 803  /**
 804   * Strips slashes from in front of quotes.
 805   *
 806   * This function changes the character sequence  \"  to just  ". It leaves all
 807   * other slashes alone. It's really weird, but the quoting from
 808   * preg_replace(//e) seems to require this.
 809   *
 810   * @since 1.0.0
 811   *
 812   * @param string $string String to strip slashes
 813   * @return string Fixed strings with quoted slashes
 814   */
 815  function wp_kses_stripslashes($string) {
 816      return preg_replace('%\\\\"%', '"', $string);
 817  }
 818  
 819  /**
 820   * Goes through an array and changes the keys to all lower case.
 821   *
 822   * @since 1.0.0
 823   *
 824   * @param array $inarray Unfiltered array
 825   * @return array Fixed array with all lowercase keys
 826   */
 827  function wp_kses_array_lc($inarray) {
 828      $outarray = array ();
 829  
 830      foreach ( (array) $inarray as $inkey => $inval) {
 831          $outkey = strtolower($inkey);
 832          $outarray[$outkey] = array ();
 833  
 834          foreach ( (array) $inval as $inkey2 => $inval2) {
 835              $outkey2 = strtolower($inkey2);
 836              $outarray[$outkey][$outkey2] = $inval2;
 837          } # foreach $inval
 838      } # foreach $inarray
 839  
 840      return $outarray;
 841  }
 842  
 843  /**
 844   * Removes the HTML JavaScript entities found in early versions of Netscape 4.
 845   *
 846   * @since 1.0.0
 847   *
 848   * @param string $string
 849   * @return string
 850   */
 851  function wp_kses_js_entities($string) {
 852      return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 853  }
 854  
 855  /**
 856   * Handles parsing errors in wp_kses_hair().
 857   *
 858   * The general plan is to remove everything to and including some whitespace,
 859   * but it deals with quotes and apostrophes as well.
 860   *
 861   * @since 1.0.0
 862   *
 863   * @param string $string
 864   * @return string
 865   */
 866  function wp_kses_html_error($string) {
 867      return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 868  }
 869  
 870  /**
 871   * Sanitizes content from bad protocols and other characters.
 872   *
 873   * This function searches for URL protocols at the beginning of $string, while
 874   * handling whitespace and HTML entities.
 875   *
 876   * @since 1.0.0
 877   *
 878   * @param string $string Content to check for bad protocols
 879   * @param string $allowed_protocols Allowed protocols
 880   * @return string Sanitized content
 881   */
 882  function wp_kses_bad_protocol_once($string, $allowed_protocols) {
 883      global $_kses_allowed_protocols;
 884      $_kses_allowed_protocols = $allowed_protocols;
 885  
 886      $string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
 887      if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
 888          $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);
 889      else
 890          $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);
 891  
 892      return $string;
 893  }
 894  
 895  /**
 896   * Callback for wp_kses_bad_protocol_once() regular expression.
 897   *
 898   * This function processes URL protocols, checks to see if they're in the
 899   * white-list or not, and returns different data depending on the answer.
 900   *
 901   * @access private
 902   * @since 1.0.0
 903   *
 904   * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
 905   * @return string Sanitized content
 906   */
 907  function wp_kses_bad_protocol_once2($matches) {
 908      global $_kses_allowed_protocols;
 909  
 910      if ( is_array($matches) ) {
 911          if ( ! isset($matches[1]) || empty($matches[1]) )
 912              return '';
 913  
 914          $string = $matches[1];
 915      } else {
 916          $string = $matches;
 917      }
 918  
 919      $string2 = wp_kses_decode_entities($string);
 920      $string2 = preg_replace('/\s/', '', $string2);
 921      $string2 = wp_kses_no_null($string2);
 922      $string2 = strtolower($string2);
 923  
 924      $allowed = false;
 925      foreach ( (array) $_kses_allowed_protocols as $one_protocol)
 926          if (strtolower($one_protocol) == $string2) {
 927              $allowed = true;
 928              break;
 929          }
 930  
 931      if ($allowed)
 932          return "$string2:";
 933      else
 934          return '';
 935  }
 936  
 937  /**
 938   * Converts and fixes HTML entities.
 939   *
 940   * This function normalizes HTML entities. It will convert "AT&T" to the correct
 941   * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 942   *
 943   * @since 1.0.0
 944   *
 945   * @param string $string Content to normalize entities
 946   * @return string Content with normalized entities
 947   */
 948  function wp_kses_normalize_entities($string) {
 949      # Disarm all entities by converting & to &amp;
 950  
 951      $string = str_replace('&', '&amp;', $string);
 952  
 953      # Change back the allowed entities in our entity whitelist
 954  
 955      $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
 956      $string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
 957      $string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
 958  
 959      return $string;
 960  }
 961  
 962  /**
 963   * Callback for wp_kses_normalize_entities() regular expression.
 964   *
 965   * This function helps wp_kses_normalize_entities() to only accept 16 bit values
 966   * and nothing more for &#number; entities.
 967   *
 968   * @access private
 969   * @since 1.0.0
 970   *
 971   * @param array $matches preg_replace_callback() matches array
 972   * @return string Correctly encoded entity
 973   */
 974  function wp_kses_normalize_entities2($matches) {
 975      if ( ! isset($matches[1]) || empty($matches[1]) )
 976          return '';
 977  
 978      $i = $matches[1];
 979      return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
 980  }
 981  
 982  /**
 983   * Callback for wp_kses_normalize_entities() for regular expression.
 984   *
 985   * This function helps wp_kses_normalize_entities() to only accept valid Unicode
 986   * numeric entities in hex form.
 987   *
 988   * @access private
 989   *
 990   * @param array $matches preg_replace_callback() matches array
 991   * @return string Correctly encoded entity
 992   */
 993  function wp_kses_normalize_entities3($matches) {
 994      if ( ! isset($matches[2]) || empty($matches[2]) )
 995          return '';
 996  
 997      $hexchars = $matches[2];
 998      return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
 999  }
1000  
1001  /**
1002   * Helper function to determine if a Unicode value is valid.
1003   *
1004   * @param int $i Unicode value
1005   * @return bool true if the value was a valid Unicode number
1006   */
1007  function valid_unicode($i) {
1008      return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1009              ($i >= 0x20 && $i <= 0xd7ff) ||
1010              ($i >= 0xe000 && $i <= 0xfffd) ||
1011              ($i >= 0x10000 && $i <= 0x10ffff) );
1012  }
1013  
1014  /**
1015   * Convert all entities to their character counterparts.
1016   *
1017   * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
1018   * anything with other entities like &auml;, but we don't need them in the URL
1019   * protocol whitelisting system anyway.
1020   *
1021   * @since 1.0.0
1022   *
1023   * @param string $string Content to change entities
1024   * @return string Content after decoded entities
1025   */
1026  function wp_kses_decode_entities($string) {
1027      $string = preg_replace_callback('/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $string);
1028      $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $string);
1029  
1030      return $string;
1031  }
1032  
1033  /**
1034   * Regex callback for wp_kses_decode_entities()
1035   *
1036   * @param array $match preg match
1037   * @return string
1038   */
1039  function _wp_kses_decode_entities_chr( $match ) {
1040      return chr( $match[1] );
1041  }
1042  
1043  /**
1044   * Regex callback for wp_kses_decode_entities()
1045   *
1046   * @param array $match preg match
1047   * @return string
1048   */
1049  function _wp_kses_decode_entities_chr_hexdec( $match ) {
1050      return chr( hexdec( $match[1] ) );
1051  }
1052  
1053  /**
1054   * Sanitize content with allowed HTML Kses rules.
1055   *
1056   * @since 1.0.0
1057   * @uses $allowedtags
1058   *
1059   * @param string $data Content to filter, expected to be escaped with slashes
1060   * @return string Filtered content
1061   */
1062  function wp_filter_kses($data) {
1063      global $allowedtags;
1064      return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1065  }
1066  
1067  /**
1068   * Sanitize content with allowed HTML Kses rules.
1069   *
1070   * @since 2.9.0
1071   * @uses $allowedtags
1072   *
1073   * @param string $data Content to filter, expected to not be escaped
1074   * @return string Filtered content
1075   */
1076  function wp_kses_data($data) {
1077      global $allowedtags;
1078      return wp_kses( $data , $allowedtags );
1079  }
1080  
1081  /**
1082   * Sanitize content for allowed HTML tags for post content.
1083   *
1084   * Post content refers to the page contents of the 'post' type and not $_POST
1085   * data from forms.
1086   *
1087   * @since 2.0.0
1088   * @uses $allowedposttags
1089   *
1090   * @param string $data Post content to filter, expected to be escaped with slashes
1091   * @return string Filtered post content with allowed HTML tags and attributes intact.
1092   */
1093  function wp_filter_post_kses($data) {
1094      global $allowedposttags;
1095      return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1096  }
1097  
1098  /**
1099   * Sanitize content for allowed HTML tags for post content.
1100   *
1101   * Post content refers to the page contents of the 'post' type and not $_POST
1102   * data from forms.
1103   *
1104   * @since 2.9.0
1105   * @uses $allowedposttags
1106   *
1107   * @param string $data Post content to filter
1108   * @return string Filtered post content with allowed HTML tags and attributes intact.
1109   */
1110  function wp_kses_post($data) {
1111      global $allowedposttags;
1112      return wp_kses( $data , $allowedposttags );
1113  }
1114  
1115  /**
1116   * Strips all of the HTML in the content.
1117   *
1118   * @since 2.1.0
1119   *
1120   * @param string $data Content to strip all HTML from
1121   * @return string Filtered content without any HTML
1122   */
1123  function wp_filter_nohtml_kses($data) {
1124      return addslashes ( wp_kses(stripslashes( $data ), array()) );
1125  }
1126  
1127  /**
1128   * Adds all Kses input form content filters.
1129   *
1130   * All hooks have default priority. The wp_filter_kses() function is added to
1131   * the 'pre_comment_content' and 'title_save_pre' hooks.
1132   *
1133   * The wp_filter_post_kses() function is added to the 'content_save_pre',
1134   * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1135   *
1136   * @since 2.0.0
1137   * @uses add_filter() See description for what functions are added to what hooks.
1138   */
1139  function kses_init_filters() {
1140      // Normal filtering.
1141      add_filter('pre_comment_content', 'wp_filter_kses');
1142      add_filter('title_save_pre', 'wp_filter_kses');
1143  
1144      // Post filtering
1145      add_filter('content_save_pre', 'wp_filter_post_kses');
1146      add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1147      add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1148  }
1149  
1150  /**
1151   * Removes all Kses input form content filters.
1152   *
1153   * A quick procedural method to removing all of the filters that kses uses for
1154   * content in WordPress Loop.
1155   *
1156   * Does not remove the kses_init() function from 'init' hook (priority is
1157   * default). Also does not remove kses_init() function from 'set_current_user'
1158   * hook (priority is also default).
1159   *
1160   * @since 2.0.6
1161   */
1162  function kses_remove_filters() {
1163      // Normal filtering.
1164      remove_filter('pre_comment_content', 'wp_filter_kses');
1165      remove_filter('title_save_pre', 'wp_filter_kses');
1166  
1167      // Post filtering
1168      remove_filter('content_save_pre', 'wp_filter_post_kses');
1169      remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1170      remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1171  }
1172  
1173  /**
1174   * Sets up most of the Kses filters for input form content.
1175   *
1176   * If you remove the kses_init() function from 'init' hook and
1177   * 'set_current_user' (priority is default), then none of the Kses filter hooks
1178   * will be added.
1179   *
1180   * First removes all of the Kses filters in case the current user does not need
1181   * to have Kses filter the content. If the user does not have unfiltered html
1182   * capability, then Kses filters are added.
1183   *
1184   * @uses kses_remove_filters() Removes the Kses filters
1185   * @uses kses_init_filters() Adds the Kses filters back if the user
1186   *        does not have unfiltered HTML capability.
1187   * @since 2.0.0
1188   */
1189  function kses_init() {
1190      kses_remove_filters();
1191  
1192      if (current_user_can('unfiltered_html') == false)
1193          kses_init_filters();
1194  }
1195  
1196  add_action('init', 'kses_init');
1197  add_action('set_current_user', 'kses_init');
1198  
1199  function safecss_filter_attr( $css, $deprecated = '' ) {
1200      $css = wp_kses_no_null($css);
1201      $css = str_replace(array("\n","\r","\t"), '', $css);
1202  
1203      if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments
1204          return '';
1205  
1206      $css_array = split( ';', trim( $css ) );
1207      $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1208      'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1209      'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1210      'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1211      'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1212      'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1213      'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1214      'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1215      'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1216      'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1217      'width' ) );
1218  
1219      if ( empty($allowed_attr) )
1220          return $css;
1221  
1222      $css = '';
1223      foreach ( $css_array as $css_item ) {
1224          if ( $css_item == '' )
1225              continue;
1226          $css_item = trim( $css_item );
1227          $found = false;
1228          if ( strpos( $css_item, ':' ) === false ) {
1229              $found = true;
1230          } else {
1231              $parts = split( ':', $css_item );
1232              if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1233                  $found = true;
1234          }
1235          if ( $found ) {
1236              if( $css != '' )
1237                  $css .= ';';
1238              $css .= $css_item;
1239          }
1240      }
1241  
1242      return $css;
1243  }


Generated: Fri Jan 8 00:19:48 2010 Cross-referenced by PHPXref 0.7