[ Index ]

PHP Cross Reference of Drupal 6 (yi-drupal)

title

Body

[close]

/modules/filter/ -> filter.module (source)

   1  <?php
   2  
   3  /**
   4   * @file
   5   * Framework for handling filtering of content.
   6   */
   7  
   8  // This is a special format ID which means "use the default format". This value
   9  // can be passed to the filter APIs as a format ID: this is equivalent to not
  10  // passing an explicit format at all.
  11  define('FILTER_FORMAT_DEFAULT', 0);
  12  
  13  define('FILTER_HTML_STRIP', 1);
  14  define('FILTER_HTML_ESCAPE', 2);
  15  
  16  /**
  17   * Implementation of hook_help().
  18   */
  19  function filter_help($path, $arg) {
  20    switch ($path) {
  21      case 'admin/help#filter':
  22        $output = '<p>'. t("The filter module allows administrators to configure text input formats for use on your site. An input format defines the HTML tags, codes, and other input allowed in both content and comments, and is a key feature in guarding against potentially damaging input from malicious users. Two input formats included by default are <em>Filtered HTML</em> (which allows only an administrator-approved subset of HTML tags) and <em>Full HTML</em> (which allows the full set of HTML tags). Additional input formats may be created by an administrator.") .'</p>';
  23        $output .= '<p>'. t('Each input format uses filters to manipulate text, and most input formats apply several different filters to text in a specific order. Each filter is designed for a specific purpose, and generally either adds, removes or transforms elements within user-entered text before it is displayed. A filter does not change the actual content of a post, but instead, modifies it temporarily before it is displayed. A filter may remove unapproved HTML tags, for instance, while another automatically adds HTML to make links referenced in text clickable.') .'</p>';
  24        $output .= '<p>'. t('Users can choose between the available input formats when creating or editing content. Administrators can configure which input formats are available to which user roles, as well as choose a default input format.') .'</p>';
  25        $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@filter">Filter module</a>.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) .'</p>';
  26        return $output;
  27      case 'admin/settings/filters':
  28        $output = '<p>'. t('<em>Input formats</em> define a way of processing user-supplied text in Drupal. Each input format uses filters to manipulate text, and most input formats apply several different filters to text, in a specific order. Each filter is designed to accomplish a specific purpose, and generally either removes elements from or adds elements to text before it is displayed. Users can choose between the available input formats when submitting content.') .'</p>';
  29        $output .= '<p>'. t('Use the list below to configure which input formats are available to which roles, as well as choose a default input format (used for imported content, for example). The default format is always available to users. All input formats are available to users in a role with the "administer filters" permission.') .'</p>';
  30        return $output;
  31      case 'admin/settings/filters/%':
  32        return '<p>'. t('Every <em>filter</em> performs one particular change on the user input, for example stripping out malicious HTML or making URLs clickable. Choose which filters you want to apply to text in this input format. If you notice some filters are causing conflicts in the output, you can <a href="@rearrange">rearrange them</a>.', array('@rearrange' => url('admin/settings/filters/'. $arg[3] .'/order'))) .'</p>';
  33      case 'admin/settings/filters/%/configure':
  34        return '<p>'. t('If you cannot find the settings for a certain filter, make sure you have enabled it on the <a href="@url">view tab</a> first.', array('@url' => url('admin/settings/filters/'. $arg[3]))) .'</p>';
  35      case 'admin/settings/filters/%/order':
  36        $output = '<p>'. t('Because of the flexible filtering system, you might encounter a situation where one filter prevents another from doing its job. For example: a word in an URL gets converted into a glossary term, before the URL can be converted to a clickable link. When this happens, rearrange the order of the filters.') .'</p>';
  37        $output .= '<p>'. t("Filters are executed from top-to-bottom. To change the order of the filters, modify the values in the <em>Weight</em> column or grab a drag-and-drop handle under the <em>Name</em> column and drag filters to new locations in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the <em>Save configuration</em> button at the bottom of the page.") .'</p>';
  38        return $output;
  39    }
  40  }
  41  
  42  /**
  43   * Implementation of hook_theme()
  44   */
  45  function filter_theme() {
  46    return array(
  47      'filter_admin_overview' => array(
  48        'arguments' => array('form' => NULL),
  49        'file' => 'filter.admin.inc',
  50      ),
  51      'filter_admin_order' => array(
  52        'arguments' => array('form' => NULL),
  53        'file' => 'filter.admin.inc',
  54      ),
  55      'filter_tips' => array(
  56        'arguments' => array('tips' => NULL, 'long' => FALSE, 'extra' => ''),
  57        'file' => 'filter.pages.inc',
  58      ),
  59      'filter_tips_more_info' => array(
  60        'arguments' => array(),
  61      ),
  62    );
  63  }
  64  
  65  /**
  66   * Implementation of hook_menu().
  67   */
  68  function filter_menu() {
  69    $items['admin/settings/filters'] = array(
  70      'title' => 'Input formats',
  71      'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.',
  72      'page callback' => 'drupal_get_form',
  73      'page arguments' => array('filter_admin_overview'),
  74      'access arguments' => array('administer filters'),
  75      'file' => 'filter.admin.inc',
  76    );
  77    $items['admin/settings/filters/list'] = array(
  78      'title' => 'List',
  79      'type' => MENU_DEFAULT_LOCAL_TASK,
  80    );
  81    $items['admin/settings/filters/add'] = array(
  82      'title' => 'Add input format',
  83      'page callback' => 'filter_admin_format_page',
  84      'access arguments' => array('administer filters'),
  85      'type' => MENU_LOCAL_TASK,
  86      'weight' => 1,
  87      'file' => 'filter.admin.inc',
  88    );
  89    $items['admin/settings/filters/delete'] = array(
  90      'title' => 'Delete input format',
  91      'page callback' => 'drupal_get_form',
  92      'page arguments' => array('filter_admin_delete'),
  93      'access arguments' => array('administer filters'),
  94      'type' => MENU_CALLBACK,
  95      'file' => 'filter.admin.inc',
  96    );
  97    $items['filter/tips'] = array(
  98      'title' => 'Compose tips',
  99      'page callback' => 'filter_tips_long',
 100      'access callback' => TRUE,
 101      'type' => MENU_SUGGESTED_ITEM,
 102      'file' => 'filter.pages.inc',
 103    );
 104    $items['admin/settings/filters/%filter_format'] = array(
 105      'type' => MENU_CALLBACK,
 106      'title callback' => 'filter_admin_format_title',
 107      'title arguments' => array(3),
 108      'page callback' => 'filter_admin_format_page',
 109      'page arguments' => array(3),
 110      'access arguments' => array('administer filters'),
 111      'file' => 'filter.admin.inc',
 112    );
 113  
 114    $items['admin/settings/filters/%filter_format/edit'] = array(
 115      'title' => 'Edit',
 116      'type' => MENU_DEFAULT_LOCAL_TASK,
 117      'weight' => 0,
 118      'file' => 'filter.admin.inc',
 119    );
 120    $items['admin/settings/filters/%filter_format/configure'] = array(
 121      'title' => 'Configure',
 122      'page callback' => 'filter_admin_configure_page',
 123      'page arguments' => array(3),
 124      'access arguments' => array('administer filters'),
 125      'type' => MENU_LOCAL_TASK,
 126      'weight' => 1,
 127      'file' => 'filter.admin.inc',
 128    );
 129    $items['admin/settings/filters/%filter_format/order'] = array(
 130      'title' => 'Rearrange',
 131      'page callback' => 'filter_admin_order_page',
 132      'page arguments' => array(3),
 133      'access arguments' => array('administer filters'),
 134      'type' => MENU_LOCAL_TASK,
 135      'weight' => 2,
 136      'file' => 'filter.admin.inc',
 137    );
 138    return $items;
 139  }
 140  
 141  function filter_format_load($arg) {
 142    return filter_formats($arg);
 143  }
 144  
 145  /**
 146   * Display a filter format form title.
 147   */
 148  function filter_admin_format_title($format) {
 149    return $format->name;
 150  }
 151  
 152  /**
 153   * Implementation of hook_perm().
 154   */
 155  function filter_perm() {
 156    return array('administer filters');
 157  }
 158  
 159  /**
 160   * Implementation of hook_cron().
 161   *
 162   * Expire outdated filter cache entries
 163   */
 164  function filter_cron() {
 165    cache_clear_all(NULL, 'cache_filter');
 166  }
 167  
 168  /**
 169   * Implementation of hook_filter_tips().
 170   */
 171  function filter_filter_tips($delta, $format, $long = FALSE) {
 172    global $base_url;
 173    switch ($delta) {
 174      case 0:
 175        if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
 176          if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
 177            switch ($long) {
 178              case 0:
 179                return t('Allowed HTML tags: @tags', array('@tags' => $allowed_html));
 180              case 1:
 181                $output = '<p>'. t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)) .'</p>';
 182                if (!variable_get("filter_html_help_$format", 1)) {
 183                  return $output;
 184                }
 185  
 186                $output .= t('
 187  <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
 188  <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
 189                $tips = array(
 190                  'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'Drupal') .'</a>'),
 191                  'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
 192                  'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
 193                  'strong' => array( t('Strong'), '<strong>'. t('Strong') .'</strong>'),
 194                  'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
 195                  'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
 196                  'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
 197                  'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
 198                  'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
 199                  'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
 200                  'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
 201                  'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
 202                  'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
 203                  'abbr' => array( t('Abbreviation'), t('<abbr title="Abbreviation">Abbrev.</abbr>')),
 204                  'acronym' => array( t('Acronym'), t('<acronym title="Three-Letter Acronym">TLA</acronym>')),
 205                  'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
 206                  'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
 207                  // Assumes and describes tr, td, th.
 208                  'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
 209                  'tr' => NULL, 'td' => NULL, 'th' => NULL,
 210                  'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
 211                  'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
 212                   // Assumes and describes li.
 213                  'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
 214                  'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
 215                  'li' => NULL,
 216                  // Assumes and describes dt and dd.
 217                  'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
 218                  'dt' => NULL, 'dd' => NULL,
 219                  'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
 220                  'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
 221                  'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
 222                  'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
 223                  'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
 224                  'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
 225                );
 226                $header = array(t('Tag Description'), t('You Type'), t('You Get'));
 227                preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
 228                foreach ($out[1] as $tag) {
 229                  if (array_key_exists($tag, $tips)) {
 230                    if ($tips[$tag]) {
 231                      $rows[] = array(
 232                        array('data' => $tips[$tag][0], 'class' => 'description'),
 233                        array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
 234                        array('data' => $tips[$tag][1], 'class' => 'get')
 235                      );
 236                    }
 237                  }
 238                  else {
 239                    $rows[] = array(
 240                      array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
 241                    );
 242                  }
 243                }
 244                $output .= theme('table', $header, $rows);
 245  
 246                $output .= t('
 247  <p>Most unusual characters can be directly entered without any problems.</p>
 248  <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
 249                $entities = array(
 250                  array( t('Ampersand'), '&amp;'),
 251                  array( t('Greater than'), '&gt;'),
 252                  array( t('Less than'), '&lt;'),
 253                  array( t('Quotation mark'), '&quot;'),
 254                );
 255                $header = array(t('Character Description'), t('You Type'), t('You Get'));
 256                unset($rows);
 257                foreach ($entities as $entity) {
 258                  $rows[] = array(
 259                    array('data' => $entity[0], 'class' => 'description'),
 260                    array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
 261                    array('data' => $entity[1], 'class' => 'get')
 262                  );
 263                }
 264                $output .= theme('table', $header, $rows);
 265                return $output;
 266            }
 267          }
 268          else {
 269            return t('No HTML tags allowed');
 270          }
 271        }
 272        break;
 273  
 274      case 1:
 275        switch ($long) {
 276          case 0:
 277            return t('Lines and paragraphs break automatically.');
 278          case 1:
 279            return t('Lines and paragraphs are automatically recognized. The &lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph tags are inserted automatically. If paragraphs are not recognized simply add a couple blank lines.');
 280        }
 281        break;
 282      case 2:
 283        return t('Web page addresses and e-mail addresses turn into links automatically.');
 284    }
 285  }
 286  
 287  /**
 288   * Retrieve a list of input formats.
 289   */
 290  function filter_formats($index = NULL) {
 291    global $user;
 292    static $formats;
 293  
 294    // Administrators can always use all input formats.
 295    $all = user_access('administer filters');
 296  
 297    if (!isset($formats)) {
 298      $formats = array();
 299  
 300      $query = 'SELECT * FROM {filter_formats}';
 301  
 302      // Build query for selecting the format(s) based on the user's roles.
 303      $args = array();
 304      if (!$all) {
 305        $where = array();
 306        foreach ($user->roles as $rid => $role) {
 307          $where[] = "roles LIKE '%%,%d,%%'";
 308          $args[] = $rid;
 309        }
 310        $query .= ' WHERE '. implode(' OR ', $where) .' OR format = %d';
 311        $args[] = variable_get('filter_default_format', 1);
 312      }
 313  
 314      $result = db_query($query, $args);
 315      while ($format = db_fetch_object($result)) {
 316        $formats[$format->format] = $format;
 317      }
 318    }
 319    if (isset($index)) {
 320      return isset($formats[$index]) ? $formats[$index] : FALSE;
 321    }
 322    return $formats;
 323  }
 324  
 325  /**
 326   * Build a list of all filters.
 327   */
 328  function filter_list_all() {
 329    $filters = array();
 330  
 331    foreach (module_list() as $module) {
 332      $list = module_invoke($module, 'filter', 'list');
 333      if (isset($list) && is_array($list)) {
 334        foreach ($list as $delta => $name) {
 335          $filters[$module .'/'. $delta] = (object)array('module' => $module, 'delta' => $delta, 'name' => $name);
 336        }
 337      }
 338    }
 339  
 340    uasort($filters, '_filter_list_cmp');
 341  
 342    return $filters;
 343  }
 344  
 345  /**
 346   * Helper function for sorting the filter list by filter name.
 347   */
 348  function _filter_list_cmp($a, $b) {
 349    return strcmp($a->name, $b->name);
 350  }
 351  
 352  /**
 353   * Resolve a format id, including the default format.
 354   */
 355  function filter_resolve_format($format) {
 356    return $format == FILTER_FORMAT_DEFAULT ? variable_get('filter_default_format', 1) : $format;
 357  }
 358  /**
 359   * Check if text in a certain input format is allowed to be cached.
 360   */
 361  function filter_format_allowcache($format) {
 362    static $cache = array();
 363    $format = filter_resolve_format($format);
 364    if (!isset($cache[$format])) {
 365      $cache[$format] = db_result(db_query('SELECT cache FROM {filter_formats} WHERE format = %d', $format));
 366    }
 367    return $cache[$format];
 368  }
 369  
 370  /**
 371   * Retrieve a list of filters for a certain format.
 372   */
 373  function filter_list_format($format) {
 374    static $filters = array();
 375  
 376    if (!isset($filters[$format])) {
 377      $result = db_query("SELECT * FROM {filters} WHERE format = %d ORDER BY weight, module, delta", $format);
 378      if (db_affected_rows($result) == 0 && !db_result(db_query("SELECT 1 FROM {filter_formats} WHERE format = %d", $format))) {
 379        // The format has no filters and does not exist, use the default input
 380        // format.
 381        $filters[$format] = filter_list_format(variable_get('filter_default_format', 1));
 382      }
 383      else {
 384        $filters[$format] = array();
 385        while ($filter = db_fetch_object($result)) {
 386          $list = module_invoke($filter->module, 'filter', 'list');
 387          if (isset($list) && is_array($list) && isset($list[$filter->delta])) {
 388            $filter->name = $list[$filter->delta];
 389            $filters[$format][$filter->module .'/'. $filter->delta] = $filter;
 390          }
 391        }
 392      }
 393    }
 394  
 395    return $filters[$format];
 396  }
 397  
 398  /**
 399   * @defgroup filtering_functions Filtering functions
 400   * @{
 401   * Functions for interacting with the content filtering system.
 402   *
 403   * For more info, see the hook_filter() documentation.
 404   *
 405   * Note: because filters can inject JavaScript or execute PHP code, security is
 406   * vital here. When a user supplies a $format, you should validate it with
 407   * filter_access($format) before accepting/using it. This is normally done in
 408   * the validation stage of the node system. You should for example never make a
 409   * preview of content in a disallowed format.
 410   */
 411  
 412  /**
 413   * Run all the enabled filters on a piece of text.
 414   *
 415   * @param $text
 416   *    The text to be filtered.
 417   * @param $format
 418   *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT for
 419   *    the default format.
 420   * @param $check
 421   *    Whether to check the $format with filter_access() first. Defaults to TRUE.
 422   *    Note that this will check the permissions of the current user, so you
 423   *    should specify $check = FALSE when viewing other people's content. When
 424   *    showing content that is not (yet) stored in the database (eg. upon preview),
 425   *    set to TRUE so the user's permissions are checked.
 426   */
 427  function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $check = TRUE) {
 428    // When $check = TRUE, do an access check on $format.
 429    if (isset($text) && (!$check || filter_access($format))) {
 430      $format = filter_resolve_format($format);
 431  
 432      // Check for a cached version of this piece of text.
 433      $cache_id = $format .':'. md5($text);
 434      if ($cached = cache_get($cache_id, 'cache_filter')) {
 435        return $cached->data;
 436      }
 437  
 438      // See if caching is allowed for this format.
 439      $cache = filter_format_allowcache($format);
 440  
 441      // Convert all Windows and Mac newlines to a single newline,
 442      // so filters only need to deal with one possibility.
 443      $text = str_replace(array("\r\n", "\r"), "\n", $text);
 444  
 445      // Get a complete list of filters, ordered properly.
 446      $filters = filter_list_format($format);
 447  
 448      // Give filters the chance to escape HTML-like data such as code or formulas.
 449      foreach ($filters as $filter) {
 450        $text = module_invoke($filter->module, 'filter', 'prepare', $filter->delta, $format, $text, $cache_id);
 451      }
 452  
 453      // Perform filtering.
 454      foreach ($filters as $filter) {
 455        $text = module_invoke($filter->module, 'filter', 'process', $filter->delta, $format, $text, $cache_id);
 456      }
 457  
 458      // Store in cache with a minimum expiration time of 1 day.
 459      if ($cache) {
 460        cache_set($cache_id, $text, 'cache_filter', time() + (60 * 60 * 24));
 461      }
 462    }
 463    else {
 464      $text = t('n/a');
 465    }
 466  
 467    return $text;
 468  }
 469  
 470  /**
 471   * Generates a selector for choosing a format in a form.
 472   *
 473   * @param $value
 474   *   The ID of the format that is currently selected; uses the default format
 475   *   if not provided.
 476   * @param $weight
 477   *   The weight of the form element within the form.
 478   * @param $parents
 479   *   The parents array of the element. Required when defining multiple text
 480   *   formats on a single form or having a different parent than 'format'.
 481   *
 482   * @return
 483   *   Form API array for the form element.
 484   *
 485   * @see filter_form_validate()
 486   * @ingroup forms
 487   */
 488  function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL, $parents = array('format')) {
 489    $value = filter_resolve_format($value);
 490    $formats = filter_formats();
 491  
 492    $extra = theme('filter_tips_more_info');
 493  
 494    if (count($formats) > 1) {
 495      $form = array(
 496        '#type' => 'fieldset',
 497        '#title' => t('Input format'),
 498        '#collapsible' => TRUE,
 499        '#collapsed' => TRUE,
 500        '#weight' => $weight,
 501        '#element_validate' => array('filter_form_validate'),
 502      );
 503      // Multiple formats available: display radio buttons with tips.
 504      foreach ($formats as $format) {
 505        // Generate the parents as the autogenerator does, so we will have a
 506        // unique id for each radio button.
 507        $parents_for_id = array_merge($parents, array($format->format));
 508        $form[$format->format] = array(
 509          '#type' => 'radio',
 510          '#title' => $format->name,
 511          '#default_value' => $value,
 512          '#return_value' => $format->format,
 513          '#parents' => $parents,
 514          '#description' => theme('filter_tips', _filter_tips($format->format, FALSE)),
 515          '#id' => form_clean_id('edit-'. implode('-', $parents_for_id)),
 516        );
 517      }
 518    }
 519    else {
 520      // Only one format available: use a hidden form item and only show tips.
 521      $format = array_shift($formats);
 522      $form[$format->format] = array('#type' => 'value', '#value' => $format->format, '#parents' => $parents);
 523      $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE);
 524      $form['format']['guidelines'] = array(
 525        '#title' => t('Formatting guidelines'),
 526        '#value' => theme('filter_tips', $tips, FALSE, $extra),
 527      );
 528    }
 529    $form[] = array('#value' => $extra);
 530    return $form;
 531  }
 532  
 533  /**
 534   * Validation callback for filter elements in a form.
 535   *
 536   * @see filter_form().
 537   */
 538  function filter_form_validate($form) {
 539    foreach (element_children($form) as $key) {
 540      if ($form[$key]['#value'] == $form[$key]['#return_value']) {
 541        return;
 542      }
 543    }
 544    form_error($form, t('An illegal choice has been detected. Please contact the site administrator.'));
 545    watchdog('form', 'Illegal choice %choice in %name element.', array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title']) ? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR);
 546  }
 547  
 548  /**
 549   * Returns TRUE if the user is allowed to access this format.
 550   */
 551  function filter_access($format) {
 552    $format = filter_resolve_format($format);
 553    if (user_access('administer filters') || ($format == variable_get('filter_default_format', 1))) {
 554      return TRUE;
 555    }
 556    else {
 557      $formats = filter_formats();
 558      return isset($formats[$format]);
 559    }
 560  }
 561  
 562  /**
 563   * @} End of "Filtering functions".
 564   */
 565  
 566  
 567  /**
 568   * Helper function for fetching filter tips.
 569   */
 570  function _filter_tips($format, $long = FALSE) {
 571    if ($format == -1) {
 572      $formats = filter_formats();
 573    }
 574    else {
 575      $formats = array(db_fetch_object(db_query("SELECT * FROM {filter_formats} WHERE format = %d", $format)));
 576    }
 577  
 578    $tips = array();
 579  
 580    foreach ($formats as $format) {
 581      $filters = filter_list_format($format->format);
 582  
 583      $tips[$format->name] = array();
 584      foreach ($filters as $id => $filter) {
 585        if ($tip = module_invoke($filter->module, 'filter_tips', $filter->delta, $format->format, $long)) {
 586          $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
 587        }
 588      }
 589    }
 590  
 591    return $tips;
 592  }
 593  
 594  
 595  /**
 596   * Format a link to the more extensive filter tips.
 597   *
 598   * @ingroup themeable
 599   */
 600  function theme_filter_tips_more_info() {
 601    return '<p>'. l(t('More information about formatting options'), 'filter/tips') .'</p>';
 602  }
 603  
 604  /**
 605   * @defgroup standard_filters Standard filters
 606   * @{
 607   * Filters implemented by the filter.module.
 608   */
 609  
 610  /**
 611   * Implementation of hook_filter().
 612   *
 613   * Sets up a basic set of essential filters.
 614   * - HTML filter: Restricts user-supplied HTML to certain tags, and removes
 615   *   dangerous components in allowed tags.
 616   * - Line break converter: Converts newlines into paragraph and break tags.
 617   * - URL filter: Converts URLs and e-mail addresses into links.
 618   * - HTML corrector: Fixes faulty HTML.
 619   */
 620  function filter_filter($op, $delta = 0, $format = -1, $text = '') {
 621    switch ($op) {
 622      case 'list':
 623        return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
 624  
 625      case 'description':
 626        switch ($delta) {
 627          case 0:
 628            return t('Allows you to restrict whether users can post HTML and which tags to filter out. It will also remove harmful content such as JavaScript events, JavaScript URLs and CSS styles from those tags that are not removed.');
 629          case 1:
 630            return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt; tags).');
 631          case 2:
 632            return t('Turns web and e-mail addresses into clickable links.');
 633          case 3:
 634            return t('Corrects faulty and chopped off HTML in postings.');
 635          default:
 636            return;
 637        }
 638  
 639      case 'process':
 640        switch ($delta) {
 641          case 0:
 642            return _filter_html($text, $format);
 643          case 1:
 644            return _filter_autop($text);
 645          case 2:
 646            return _filter_url($text, $format);
 647          case 3:
 648            return _filter_htmlcorrector($text);
 649          default:
 650            return $text;
 651        }
 652  
 653      case 'settings':
 654        switch ($delta) {
 655          case 0:
 656            return _filter_html_settings($format);
 657          case 2:
 658            return _filter_url_settings($format);
 659          default:
 660            return;
 661        }
 662  
 663      default:
 664        return $text;
 665    }
 666  }
 667  
 668  /**
 669   * Settings for the HTML filter.
 670   */
 671  function _filter_html_settings($format) {
 672    $form['filter_html'] = array(
 673      '#type' => 'fieldset',
 674      '#title' => t('HTML filter'),
 675      '#collapsible' => TRUE,
 676    );
 677    $form['filter_html']["filter_html_$format"] = array(
 678      '#type' => 'radios',
 679      '#title' => t('Filter HTML tags'),
 680      '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP),
 681      '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')),
 682      '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'),
 683    );
 684    $form['filter_html']["allowed_html_$format"] = array(
 685      '#type' => 'textfield',
 686      '#title' => t('Allowed HTML tags'),
 687      '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
 688      '#size' => 64,
 689      '#maxlength' => 1024,
 690      '#description' => t('If "Strip disallowed tags" is selected, optionally specify tags which should not be stripped. JavaScript event attributes are always stripped.'),
 691    );
 692    $form['filter_html']["filter_html_help_$format"] = array(
 693      '#type' => 'checkbox',
 694      '#title' => t('Display HTML help'),
 695      '#default_value' => variable_get("filter_html_help_$format", 1),
 696      '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'),
 697    );
 698    $form['filter_html']["filter_html_nofollow_$format"] = array(
 699      '#type' => 'checkbox',
 700      '#title' => t('Spam link deterrent'),
 701      '#default_value' => variable_get("filter_html_nofollow_$format", FALSE),
 702      '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'),
 703    );
 704    return $form;
 705  }
 706  
 707  /**
 708   * HTML filter. Provides filtering of input into accepted HTML.
 709   */
 710  function _filter_html($text, $format) {
 711    if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
 712      $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
 713      $text = filter_xss($text, $allowed_tags);
 714    }
 715  
 716    if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
 717      // Escape HTML
 718      $text = check_plain($text);
 719    }
 720  
 721    if (variable_get("filter_html_nofollow_$format", FALSE)) {
 722      $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
 723    }
 724  
 725    return trim($text);
 726  }
 727  
 728  /**
 729   * Settings for URL filter.
 730   */
 731  function _filter_url_settings($format) {
 732    $form['filter_urlfilter'] = array(
 733      '#type' => 'fieldset',
 734      '#title' => t('URL filter'),
 735      '#collapsible' => TRUE,
 736    );
 737    $form['filter_urlfilter']['filter_url_length_'. $format] = array(
 738      '#type' => 'textfield',
 739      '#title' => t('Maximum link text length'),
 740      '#default_value' => variable_get('filter_url_length_'. $format, 72),
 741      '#maxlength' => 4,
 742      '#description' => t('URLs longer than this number of characters will be truncated to prevent long strings that break formatting. The link itself will be retained; just the text portion of the link will be truncated.'),
 743    );
 744    return $form;
 745  }
 746  
 747  /**
 748   * URL filter. Automatically converts text web addresses (URLs, e-mail addresses,
 749   * ftp links, etc.) into hyperlinks.
 750   */
 751  function _filter_url($text, $format) {
 752    // Pass length to regexp callback
 753    _filter_url_trim(NULL, variable_get('filter_url_length_'. $format, 72));
 754  
 755    $text = ' '. $text .' ';
 756  
 757    // Match absolute URLs.
 758    $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
 759  
 760    // Match e-mail addresses.
 761    $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
 762  
 763    // Match www domains/addresses.
 764    $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
 765    $text = substr($text, 1, -1);
 766  
 767    return $text;
 768  }
 769  
 770  /**
 771   * Scan input and make sure that all HTML tags are properly closed and nested.
 772   */
 773  function _filter_htmlcorrector($text) {
 774    // Prepare tag lists.
 775    static $no_nesting, $single_use;
 776    if (!isset($no_nesting)) {
 777      // Tags which cannot be nested but are typically left unclosed.
 778      $no_nesting = drupal_map_assoc(array('li', 'p'));
 779  
 780      // Single use tags in HTML4
 781      $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
 782    }
 783  
 784    // Properly entify angles.
 785    $text = preg_replace('@<(?=[^a-zA-Z!/]|$)@', '&lt;', $text);
 786  
 787    // Split tags from text.
 788    $split = preg_split('/<(!--.*?--|[^>]+?)>/s', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 789    // Note: PHP ensures the array consists of alternating delimiters and literals
 790    // and begins and ends with a literal (inserting $null as required).
 791  
 792    $tag = false; // Odd/even counter. Tag or no tag.
 793    $stack = array();
 794    $output = '';
 795    foreach ($split as $value) {
 796      // Process HTML tags.
 797      if ($tag) {
 798        // Passthrough comments.
 799        if (substr($value, 0, 3) == '!--') {
 800          $output .= '<'. $value .'>';
 801        }
 802        else {
 803          list($tagname) = preg_split('/\s/', strtolower($value), 2);
 804          // Closing tag
 805          if ($tagname{0} == '/') {
 806            $tagname = substr($tagname, 1);
 807            // Discard XHTML closing tags for single use tags.
 808            if (!isset($single_use[$tagname])) {
 809              // See if we possibly have a matching opening tag on the stack.
 810              if (in_array($tagname, $stack)) {
 811                // Close other tags lingering first.
 812                do {
 813                  $output .= '</'. $stack[0] .'>';
 814                } while (array_shift($stack) != $tagname);
 815              }
 816              // Otherwise, discard it.
 817            }
 818          }
 819          // Opening tag
 820          else {
 821            // See if we have an identical 'no nesting' tag already open and close it if found.
 822            if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
 823              $output .= '</'. array_shift($stack) .'>';
 824            }
 825            // Push non-single-use tags onto the stack
 826            if (!isset($single_use[$tagname])) {
 827              array_unshift($stack, $tagname);
 828            }
 829            // Add trailing slash to single-use tags as per X(HT)ML.
 830            else {
 831              $value = rtrim($value, ' /') .' /';
 832            }
 833            $output .= '<'. $value .'>';
 834          }
 835        }
 836      }
 837      else {
 838        // Passthrough all text.
 839        $output .= $value;
 840      }
 841      $tag = !$tag;
 842    }
 843    // Close remaining tags.
 844    while (count($stack) > 0) {
 845      $output .= '</'. array_shift($stack) .'>';
 846    }
 847    return $output;
 848  }
 849  
 850  /**
 851   * Make links out of absolute URLs.
 852   */
 853  function _filter_url_parse_full_links($match) {
 854    $match[2] = decode_entities($match[2]);
 855    $caption = check_plain(_filter_url_trim($match[2]));
 856    $match[2] = check_url($match[2]);
 857    return $match[1] .'<a href="'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[5];
 858  }
 859  
 860  /**
 861   * Make links out of domain names starting with "www."
 862   */
 863  function _filter_url_parse_partial_links($match) {
 864    $match[2] = decode_entities($match[2]);
 865    $caption = check_plain(_filter_url_trim($match[2]));
 866    $match[2] = check_plain($match[2]);
 867    return $match[1] .'<a href="http://'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[3];
 868  }
 869  
 870  /**
 871   * Shortens long URLs to http://www.example.com/long/url...
 872   */
 873  function _filter_url_trim($text, $length = NULL) {
 874    static $_length;
 875    if ($length !== NULL) {
 876      $_length = $length;
 877    }
 878  
 879    // Use +3 for '...' string length.
 880    if (strlen($text) > $_length + 3) {
 881      $text = substr($text, 0, $_length) .'...';
 882    }
 883  
 884    return $text;
 885  }
 886  
 887  /**
 888   * Convert line breaks into <p> and <br> in an intelligent fashion.
 889   * Based on: http://photomatt.net/scripts/autop
 890   */
 891  function _filter_autop($text) {
 892    // All block level tags
 893    $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
 894  
 895    // Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags.
 896    // We don't apply any processing to the contents of these tags to avoid messing
 897    // up code. We look for matched pairs and allow basic nesting. For example:
 898    // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
 899    $chunks = preg_split('@(<(?:!--.*?--|/?(?:pre|script|style|object)[^>]*)>)@si', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 900    // Note: PHP ensures the array consists of alternating delimiters and literals
 901    // and begins and ends with a literal (inserting NULL as required).
 902    $ignore = FALSE;
 903    $ignoretag = '';
 904    $output = '';
 905    foreach ($chunks as $i => $chunk) {
 906      if ($i % 2) {
 907        // Passthrough comments.
 908        if (substr($chunk, 1, 3) == '!--') {
 909          $output .= $chunk;
 910        }
 911        else {
 912          // Opening or closing tag?
 913          $open = ($chunk[1] != '/');
 914          list($tag) = split('[ >]', substr($chunk, 2 - $open), 2);
 915          if (!$ignore) {
 916            if ($open) {
 917              $ignore = TRUE;
 918              $ignoretag = $tag;
 919            }
 920          }
 921          // Only allow a matching tag to close it.
 922          else if (!$open && $ignoretag == $tag) {
 923            $ignore = FALSE;
 924            $ignoretag = '';
 925          }
 926        }
 927      }
 928      else if (!$ignore) {
 929        $chunk = preg_replace('|\n*$|', '', $chunk) ."\n\n"; // just to make things a little easier, pad the end
 930        $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
 931        $chunk = preg_replace('!(<'. $block .'[^>]*>)!', "\n$1", $chunk); // Space things out a little
 932        $chunk = preg_replace('!(</'. $block .'>)!', "$1\n\n", $chunk); // Space things out a little
 933        $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
 934        $chunk = preg_replace('/^\n|\n\s*\n$/', '', $chunk);
 935        $chunk = '<p>'. preg_replace('/\n\s*\n\n?(.)/', "</p>\n<p>$1", $chunk) ."</p>\n"; // make paragraphs, including one at the end
 936        $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
 937        $chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
 938        $chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
 939        $chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace
 940        $chunk = preg_replace('!<p>\s*(</?'. $block .'[^>]*>)!', "$1", $chunk);
 941        $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*</p>!', "$1", $chunk);
 942        $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
 943        $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*<br />!', "$1", $chunk);
 944        $chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
 945        $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
 946      }
 947      $output .= $chunk;
 948    }
 949    return $output;
 950  }
 951  
 952  /**
 953   * Very permissive XSS/HTML filter for admin-only use.
 954   *
 955   * Use only for fields where it is impractical to use the
 956   * whole filter system, but where some (mainly inline) mark-up
 957   * is desired (so check_plain() is not acceptable).
 958   *
 959   * Allows all tags that can be used inside an HTML body, save
 960   * for scripts and styles.
 961   */
 962  function filter_xss_admin($string) {
 963    return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
 964  }
 965  
 966  /**
 967   * Filters an HTML string to prevent cross-site-scripting (XSS) vulnerabilities.
 968   *
 969   * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
 970   * For examples of various XSS attacks, see http://ha.ckers.org/xss.html.
 971   *
 972   * This code does four things:
 973   * - Removes characters and constructs that can trick browsers.
 974   * - Makes sure all HTML entities are well-formed.
 975   * - Makes sure all HTML tags and attributes are well-formed.
 976   * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
 977   *   javascript:).
 978   *
 979   * @param $string
 980   *   The string with raw HTML in it. It will be stripped of everything that can
 981   *   cause an XSS attack.
 982   * @param $allowed_tags
 983   *   An array of allowed tags.
 984   *
 985   * @return
 986   *   An XSS safe version of $string, or an empty string if $string is not
 987   *   valid UTF-8.
 988   *
 989   * @see drupal_validate_utf8()
 990   * @ingroup sanitization
 991   */
 992  function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
 993    // Only operate on valid UTF-8 strings. This is necessary to prevent cross
 994    // site scripting issues on Internet Explorer 6.
 995    if (!drupal_validate_utf8($string)) {
 996      return '';
 997    }
 998    // Store the input format
 999    _filter_xss_split($allowed_tags, TRUE);
1000    // Remove NUL characters (ignored by some browsers)
1001    $string = str_replace(chr(0), '', $string);
1002    // Remove Netscape 4 JS entities
1003    $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1004  
1005    // Defuse all HTML entities
1006    $string = str_replace('&', '&amp;', $string);
1007    // Change back only well-formed entities in our whitelist
1008    // Decimal numeric entities
1009    $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
1010    // Hexadecimal numeric entities
1011    $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
1012    // Named entities
1013    $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
1014  
1015    return preg_replace_callback('%
1016      (
1017      <(?=[^a-zA-Z!/])  # a lone <
1018      |                 # or
1019      <!--.*?-->        # a comment
1020      |                 # or
1021      <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
1022      |                 # or
1023      >                 # just a >
1024      )%x', '_filter_xss_split', $string);
1025  }
1026  
1027  /**
1028   * Processes an HTML tag.
1029   *
1030   * @param @m
1031   *   An array with various meaning depending on the value of $store.
1032   *   If $store is TRUE then the array contains the allowed tags.
1033   *   If $store is FALSE then the array has one element, the HTML tag to process.
1034   * @param $store
1035   *   Whether to store $m.
1036   * @return
1037   *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
1038   *   version of the HTML element.
1039   */
1040  function _filter_xss_split($m, $store = FALSE) {
1041    static $allowed_html;
1042  
1043    if ($store) {
1044      $allowed_html = array_flip($m);
1045      return;
1046    }
1047  
1048    $string = $m[1];
1049  
1050    if (substr($string, 0, 1) != '<') {
1051      // We matched a lone ">" character
1052      return '&gt;';
1053    }
1054    else if (strlen($string) == 1) {
1055      // We matched a lone "<" character
1056      return '&lt;';
1057    }
1058  
1059    if (!preg_match('%^(?:<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->))$%', $string, $matches)) {
1060      // Seriously malformed
1061      return '';
1062    }
1063  
1064    $slash = trim($matches[1]);
1065    $elem = &$matches[2];
1066    $attrlist = &$matches[3];
1067    $comment = &$matches[4];
1068  
1069    if ($comment) {
1070      $elem = '!--';
1071    }
1072  
1073    if (!isset($allowed_html[strtolower($elem)])) {
1074      // Disallowed HTML element
1075      return '';
1076    }
1077  
1078    if ($comment) {
1079      return $comment;
1080    }
1081  
1082    if ($slash != '') {
1083      return "</$elem>";
1084    }
1085  
1086    // Is there a closing XHTML slash at the end of the attributes?
1087    // In PHP 5.1.0+ we could count the changes, currently we need a separate match
1088    $xhtml_slash = preg_match('%\s?/\s*$%', $attrlist) ? ' /' : '';
1089    $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist);
1090  
1091    // Clean up attributes
1092    $attr2 = implode(' ', _filter_xss_attributes($attrlist));
1093    $attr2 = preg_replace('/[<>]/', '', $attr2);
1094    $attr2 = strlen($attr2) ? ' '. $attr2 : '';
1095  
1096    return "<$elem$attr2$xhtml_slash>";
1097  }
1098  
1099  /**
1100   * Processes a string of HTML attributes.
1101   *
1102   * @return
1103   *   Cleaned up version of the HTML attributes.
1104   */
1105  function _filter_xss_attributes($attr) {
1106    $attrarr = array();
1107    $mode = 0;
1108    $attrname = '';
1109  
1110    while (strlen($attr) != 0) {
1111      // Was the last operation successful?
1112      $working = 0;
1113  
1114      switch ($mode) {
1115        case 0:
1116          // Attribute name, href for instance
1117          if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
1118            $attrname = strtolower($match[1]);
1119            $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
1120            $working = $mode = 1;
1121            $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
1122          }
1123  
1124          break;
1125  
1126        case 1:
1127          // Equals sign or valueless ("selected")
1128          if (preg_match('/^\s*=\s*/', $attr)) {
1129            $working = 1; $mode = 2;
1130            $attr = preg_replace('/^\s*=\s*/', '', $attr);
1131            break;
1132          }
1133  
1134          if (preg_match('/^\s+/', $attr)) {
1135            $working = 1; $mode = 0;
1136            if (!$skip) {
1137              $attrarr[] = $attrname;
1138            }
1139            $attr = preg_replace('/^\s+/', '', $attr);
1140          }
1141  
1142          break;
1143  
1144        case 2:
1145          // Attribute value, a URL after href= for instance
1146          if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
1147            $thisval = filter_xss_bad_protocol($match[1]);
1148  
1149            if (!$skip) {
1150              $attrarr[] = "$attrname=\"$thisval\"";
1151            }
1152            $working = 1;
1153            $mode = 0;
1154            $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
1155            break;
1156          }
1157  
1158          if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
1159            $thisval = filter_xss_bad_protocol($match[1]);
1160  
1161            if (!$skip) {
1162              $attrarr[] = "$attrname='$thisval'";;
1163            }
1164            $working = 1; $mode = 0;
1165            $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
1166            break;
1167          }
1168  
1169          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
1170            $thisval = filter_xss_bad_protocol($match[1]);
1171  
1172            if (!$skip) {
1173              $attrarr[] = "$attrname=\"$thisval\"";
1174            }
1175            $working = 1; $mode = 0;
1176            $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
1177          }
1178  
1179          break;
1180      }
1181  
1182      if ($working == 0) {
1183        // not well formed, remove and try again
1184        $attr = preg_replace('/
1185          ^
1186          (
1187          "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
1188          |               # or
1189          \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
1190          |               # or
1191          \S              # - a non-whitespace character
1192          )*              # any number of the above three
1193          \s*             # any number of whitespaces
1194          /x', '', $attr);
1195        $mode = 0;
1196      }
1197    }
1198  
1199    // the attribute list ends with a valueless attribute like "selected"
1200    if ($mode == 1) {
1201      $attrarr[] = $attrname;
1202    }
1203    return $attrarr;
1204  }
1205  
1206  /**
1207   * Processes an HTML attribute value and ensures it does not contain an URL
1208   * with a disallowed protocol (e.g. javascript:)
1209   *
1210   * @param $string
1211   *   The string with the attribute value.
1212   * @param $decode
1213   *   Whether to decode entities in the $string. Set to FALSE if the $string
1214   *   is in plain text, TRUE otherwise. Defaults to TRUE.
1215   * @return
1216   *   Cleaned up and HTML-escaped version of $string.
1217   */
1218  function filter_xss_bad_protocol($string, $decode = TRUE) {
1219    static $allowed_protocols;
1220    if (!isset($allowed_protocols)) {
1221      $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'tel', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal', 'rtsp')));
1222    }
1223  
1224    // Get the plain text representation of the attribute value (i.e. its meaning).
1225    if ($decode) {
1226      $string = decode_entities($string);
1227    }
1228  
1229    // Iteratively remove any invalid protocol found.
1230  
1231    do {
1232      $before = $string;
1233      $colonpos = strpos($string, ':');
1234      if ($colonpos > 0) {
1235        // We found a colon, possibly a protocol. Verify.
1236        $protocol = substr($string, 0, $colonpos);
1237        // If a colon is preceded by a slash, question mark or hash, it cannot
1238        // possibly be part of the URL scheme. This must be a relative URL,
1239        // which inherits the (safe) protocol of the base document.
1240        if (preg_match('![/?#]!', $protocol)) {
1241          break;
1242        }
1243        // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive
1244        // Check if this is a disallowed protocol.
1245        if (!isset($allowed_protocols[strtolower($protocol)])) {
1246          $string = substr($string, $colonpos + 1);
1247        }
1248      }
1249    } while ($before != $string);
1250    return check_plain($string);
1251  }
1252  
1253  /**
1254   * @} End of "Standard filters".
1255   */


Generated: Mon Jul 9 18:01:44 2012 Cross-referenced by PHPXref 0.7