[ Index ]

PHP Cross Reference of Drupal 6 (gatewave)

title

Body

[close]

/modules/filter/ -> filter.module (source)

   1  <?php
   2  // $Id: filter.module,v 1.204.2.17 2010/11/03 14:11:43 goba Exp $
   3  
   4  /**
   5   * @file
   6   * Framework for handling filtering of content.
   7   */
   8  
   9  // This is a special format ID which means "use the default format". This value
  10  // can be passed to the filter APIs as a format ID: this is equivalent to not
  11  // passing an explicit format at all.
  12  define('FILTER_FORMAT_DEFAULT', 0);
  13  
  14  define('FILTER_HTML_STRIP', 1);
  15  define('FILTER_HTML_ESCAPE', 2);
  16  
  17  /**
  18   * Implementation of hook_help().
  19   */
  20  function filter_help($path, $arg) {
  21    switch ($path) {
  22      case 'admin/help#filter':
  23        $output = '<p>'. t("The filter module allows administrators to configure text input formats for use on your site. An input format defines the HTML tags, codes, and other input allowed in both content and comments, and is a key feature in guarding against potentially damaging input from malicious users. Two input formats included by default are <em>Filtered HTML</em> (which allows only an administrator-approved subset of HTML tags) and <em>Full HTML</em> (which allows the full set of HTML tags). Additional input formats may be created by an administrator.") .'</p>';
  24        $output .= '<p>'. t('Each input format uses filters to manipulate text, and most input formats apply several different filters to text in a specific order. Each filter is designed for a specific purpose, and generally either adds, removes or transforms elements within user-entered text before it is displayed. A filter does not change the actual content of a post, but instead, modifies it temporarily before it is displayed. A filter may remove unapproved HTML tags, for instance, while another automatically adds HTML to make links referenced in text clickable.') .'</p>';
  25        $output .= '<p>'. t('Users can choose between the available input formats when creating or editing content. Administrators can configure which input formats are available to which user roles, as well as choose a default input format.') .'</p>';
  26        $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@filter">Filter module</a>.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) .'</p>';
  27        return $output;
  28      case 'admin/settings/filters':
  29        $output = '<p>'. t('<em>Input formats</em> define a way of processing user-supplied text in Drupal. Each input format uses filters to manipulate text, and most input formats apply several different filters to text, in a specific order. Each filter is designed to accomplish a specific purpose, and generally either removes elements from or adds elements to text before it is displayed. Users can choose between the available input formats when submitting content.') .'</p>';
  30        $output .= '<p>'. t('Use the list below to configure which input formats are available to which roles, as well as choose a default input format (used for imported content, for example). The default format is always available to users. All input formats are available to users in a role with the "administer filters" permission.') .'</p>';
  31        return $output;
  32      case 'admin/settings/filters/%':
  33        return '<p>'. t('Every <em>filter</em> performs one particular change on the user input, for example stripping out malicious HTML or making URLs clickable. Choose which filters you want to apply to text in this input format. If you notice some filters are causing conflicts in the output, you can <a href="@rearrange">rearrange them</a>.', array('@rearrange' => url('admin/settings/filters/'. $arg[3] .'/order'))) .'</p>';
  34      case 'admin/settings/filters/%/configure':
  35        return '<p>'. t('If you cannot find the settings for a certain filter, make sure you have enabled it on the <a href="@url">view tab</a> first.', array('@url' => url('admin/settings/filters/'. $arg[3]))) .'</p>';
  36      case 'admin/settings/filters/%/order':
  37        $output = '<p>'. t('Because of the flexible filtering system, you might encounter a situation where one filter prevents another from doing its job. For example: a word in an URL gets converted into a glossary term, before the URL can be converted to a clickable link. When this happens, rearrange the order of the filters.') .'</p>';
  38        $output .= '<p>'. t("Filters are executed from top-to-bottom. To change the order of the filters, modify the values in the <em>Weight</em> column or grab a drag-and-drop handle under the <em>Name</em> column and drag filters to new locations in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the <em>Save configuration</em> button at the bottom of the page.") .'</p>';
  39        return $output;
  40    }
  41  }
  42  
  43  /**
  44   * Implementation of hook_theme()
  45   */
  46  function filter_theme() {
  47    return array(
  48      'filter_admin_overview' => array(
  49        'arguments' => array('form' => NULL),
  50        'file' => 'filter.admin.inc',
  51      ),
  52      'filter_admin_order' => array(
  53        'arguments' => array('form' => NULL),
  54        'file' => 'filter.admin.inc',
  55      ),
  56      'filter_tips' => array(
  57        'arguments' => array('tips' => NULL, 'long' => FALSE, 'extra' => ''),
  58        'file' => 'filter.pages.inc',
  59      ),
  60      'filter_tips_more_info' => array(
  61        'arguments' => array(),
  62      ),
  63    );
  64  }
  65  
  66  /**
  67   * Implementation of hook_menu().
  68   */
  69  function filter_menu() {
  70    $items['admin/settings/filters'] = array(
  71      'title' => 'Input formats',
  72      'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.',
  73      'page callback' => 'drupal_get_form',
  74      'page arguments' => array('filter_admin_overview'),
  75      'access arguments' => array('administer filters'),
  76      'file' => 'filter.admin.inc',
  77    );
  78    $items['admin/settings/filters/list'] = array(
  79      'title' => 'List',
  80      'type' => MENU_DEFAULT_LOCAL_TASK,
  81    );
  82    $items['admin/settings/filters/add'] = array(
  83      'title' => 'Add input format',
  84      'page callback' => 'filter_admin_format_page',
  85      'access arguments' => array('administer filters'),
  86      'type' => MENU_LOCAL_TASK,
  87      'weight' => 1,
  88      'file' => 'filter.admin.inc',
  89    );
  90    $items['admin/settings/filters/delete'] = array(
  91      'title' => 'Delete input format',
  92      'page callback' => 'drupal_get_form',
  93      'page arguments' => array('filter_admin_delete'),
  94      'access arguments' => array('administer filters'),
  95      'type' => MENU_CALLBACK,
  96      'file' => 'filter.admin.inc',
  97    );
  98    $items['filter/tips'] = array(
  99      'title' => 'Compose tips',
 100      'page callback' => 'filter_tips_long',
 101      'access callback' => TRUE,
 102      'type' => MENU_SUGGESTED_ITEM,
 103      'file' => 'filter.pages.inc',
 104    );
 105    $items['admin/settings/filters/%filter_format'] = array(
 106      'type' => MENU_CALLBACK,
 107      'title callback' => 'filter_admin_format_title',
 108      'title arguments' => array(3),
 109      'page callback' => 'filter_admin_format_page',
 110      'page arguments' => array(3),
 111      'access arguments' => array('administer filters'),
 112      'file' => 'filter.admin.inc',
 113    );
 114  
 115    $items['admin/settings/filters/%filter_format/edit'] = array(
 116      'title' => 'Edit',
 117      'type' => MENU_DEFAULT_LOCAL_TASK,
 118      'weight' => 0,
 119      'file' => 'filter.admin.inc',
 120    );
 121    $items['admin/settings/filters/%filter_format/configure'] = array(
 122      'title' => 'Configure',
 123      'page callback' => 'filter_admin_configure_page',
 124      'page arguments' => array(3),
 125      'access arguments' => array('administer filters'),
 126      'type' => MENU_LOCAL_TASK,
 127      'weight' => 1,
 128      'file' => 'filter.admin.inc',
 129    );
 130    $items['admin/settings/filters/%filter_format/order'] = array(
 131      'title' => 'Rearrange',
 132      'page callback' => 'filter_admin_order_page',
 133      'page arguments' => array(3),
 134      'access arguments' => array('administer filters'),
 135      'type' => MENU_LOCAL_TASK,
 136      'weight' => 2,
 137      'file' => 'filter.admin.inc',
 138    );
 139    return $items;
 140  }
 141  
 142  function filter_format_load($arg) {
 143    return filter_formats($arg);
 144  }
 145  
 146  /**
 147   * Display a filter format form title.
 148   */
 149  function filter_admin_format_title($format) {
 150    return $format->name;
 151  }
 152  
 153  /**
 154   * Implementation of hook_perm().
 155   */
 156  function filter_perm() {
 157    return array('administer filters');
 158  }
 159  
 160  /**
 161   * Implementation of hook_cron().
 162   *
 163   * Expire outdated filter cache entries
 164   */
 165  function filter_cron() {
 166    cache_clear_all(NULL, 'cache_filter');
 167  }
 168  
 169  /**
 170   * Implementation of hook_filter_tips().
 171   */
 172  function filter_filter_tips($delta, $format, $long = FALSE) {
 173    global $base_url;
 174    switch ($delta) {
 175      case 0:
 176        if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
 177          if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
 178            switch ($long) {
 179              case 0:
 180                return t('Allowed HTML tags: @tags', array('@tags' => $allowed_html));
 181              case 1:
 182                $output = '<p>'. t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)) .'</p>';
 183                if (!variable_get("filter_html_help_$format", 1)) {
 184                  return $output;
 185                }
 186  
 187                $output .= t('
 188  <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
 189  <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
 190                $tips = array(
 191                  'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'Drupal') .'</a>'),
 192                  'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
 193                  'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
 194                  'strong' => array( t('Strong'), '<strong>'. t('Strong') .'</strong>'),
 195                  'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
 196                  'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
 197                  'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
 198                  'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
 199                  'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
 200                  'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
 201                  'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
 202                  'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
 203                  'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
 204                  'abbr' => array( t('Abbreviation'), t('<abbr title="Abbreviation">Abbrev.</abbr>')),
 205                  'acronym' => array( t('Acronym'), t('<acronym title="Three-Letter Acronym">TLA</acronym>')),
 206                  'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
 207                  'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
 208                  // Assumes and describes tr, td, th.
 209                  'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
 210                  'tr' => NULL, 'td' => NULL, 'th' => NULL,
 211                  'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
 212                  'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
 213                   // Assumes and describes li.
 214                  'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
 215                  'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
 216                  'li' => NULL,
 217                  // Assumes and describes dt and dd.
 218                  'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
 219                  'dt' => NULL, 'dd' => NULL,
 220                  'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
 221                  'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
 222                  'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
 223                  'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
 224                  'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
 225                  'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
 226                );
 227                $header = array(t('Tag Description'), t('You Type'), t('You Get'));
 228                preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
 229                foreach ($out[1] as $tag) {
 230                  if (array_key_exists($tag, $tips)) {
 231                    if ($tips[$tag]) {
 232                      $rows[] = array(
 233                        array('data' => $tips[$tag][0], 'class' => 'description'),
 234                        array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
 235                        array('data' => $tips[$tag][1], 'class' => 'get')
 236                      );
 237                    }
 238                  }
 239                  else {
 240                    $rows[] = array(
 241                      array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
 242                    );
 243                  }
 244                }
 245                $output .= theme('table', $header, $rows);
 246  
 247                $output .= t('
 248  <p>Most unusual characters can be directly entered without any problems.</p>
 249  <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
 250                $entities = array(
 251                  array( t('Ampersand'), '&amp;'),
 252                  array( t('Greater than'), '&gt;'),
 253                  array( t('Less than'), '&lt;'),
 254                  array( t('Quotation mark'), '&quot;'),
 255                );
 256                $header = array(t('Character Description'), t('You Type'), t('You Get'));
 257                unset($rows);
 258                foreach ($entities as $entity) {
 259                  $rows[] = array(
 260                    array('data' => $entity[0], 'class' => 'description'),
 261                    array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
 262                    array('data' => $entity[1], 'class' => 'get')
 263                  );
 264                }
 265                $output .= theme('table', $header, $rows);
 266                return $output;
 267            }
 268          }
 269          else {
 270            return t('No HTML tags allowed');
 271          }
 272        }
 273        break;
 274  
 275      case 1:
 276        switch ($long) {
 277          case 0:
 278            return t('Lines and paragraphs break automatically.');
 279          case 1:
 280            return t('Lines and paragraphs are automatically recognized. The &lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph tags are inserted automatically. If paragraphs are not recognized simply add a couple blank lines.');
 281        }
 282        break;
 283      case 2:
 284        return t('Web page addresses and e-mail addresses turn into links automatically.');
 285    }
 286  }
 287  
 288  /**
 289   * Retrieve a list of input formats.
 290   */
 291  function filter_formats($index = NULL) {
 292    global $user;
 293    static $formats;
 294  
 295    // Administrators can always use all input formats.
 296    $all = user_access('administer filters');
 297  
 298    if (!isset($formats)) {
 299      $formats = array();
 300  
 301      $query = 'SELECT * FROM {filter_formats}';
 302  
 303      // Build query for selecting the format(s) based on the user's roles.
 304      $args = array();
 305      if (!$all) {
 306        $where = array();
 307        foreach ($user->roles as $rid => $role) {
 308          $where[] = "roles LIKE '%%,%d,%%'";
 309          $args[] = $rid;
 310        }
 311        $query .= ' WHERE '. implode(' OR ', $where) .' OR format = %d';
 312        $args[] = variable_get('filter_default_format', 1);
 313      }
 314  
 315      $result = db_query($query, $args);
 316      while ($format = db_fetch_object($result)) {
 317        $formats[$format->format] = $format;
 318      }
 319    }
 320    if (isset($index)) {
 321      return isset($formats[$index]) ? $formats[$index] : FALSE;
 322    }
 323    return $formats;
 324  }
 325  
 326  /**
 327   * Build a list of all filters.
 328   */
 329  function filter_list_all() {
 330    $filters = array();
 331  
 332    foreach (module_list() as $module) {
 333      $list = module_invoke($module, 'filter', 'list');
 334      if (isset($list) && is_array($list)) {
 335        foreach ($list as $delta => $name) {
 336          $filters[$module .'/'. $delta] = (object)array('module' => $module, 'delta' => $delta, 'name' => $name);
 337        }
 338      }
 339    }
 340  
 341    uasort($filters, '_filter_list_cmp');
 342  
 343    return $filters;
 344  }
 345  
 346  /**
 347   * Helper function for sorting the filter list by filter name.
 348   */
 349  function _filter_list_cmp($a, $b) {
 350    return strcmp($a->name, $b->name);
 351  }
 352  
 353  /**
 354   * Resolve a format id, including the default format.
 355   */
 356  function filter_resolve_format($format) {
 357    return $format == FILTER_FORMAT_DEFAULT ? variable_get('filter_default_format', 1) : $format;
 358  }
 359  /**
 360   * Check if text in a certain input format is allowed to be cached.
 361   */
 362  function filter_format_allowcache($format) {
 363    static $cache = array();
 364    $format = filter_resolve_format($format);
 365    if (!isset($cache[$format])) {
 366      $cache[$format] = db_result(db_query('SELECT cache FROM {filter_formats} WHERE format = %d', $format));
 367    }
 368    return $cache[$format];
 369  }
 370  
 371  /**
 372   * Retrieve a list of filters for a certain format.
 373   */
 374  function filter_list_format($format) {
 375    static $filters = array();
 376  
 377    if (!isset($filters[$format])) {
 378      $result = db_query("SELECT * FROM {filters} WHERE format = %d ORDER BY weight, module, delta", $format);
 379      if (db_affected_rows($result) == 0 && !db_result(db_query("SELECT 1 FROM {filter_formats} WHERE format = %d", $format))) {
 380        // The format has no filters and does not exist, use the default input
 381        // format.
 382        $filters[$format] = filter_list_format(variable_get('filter_default_format', 1));
 383      }
 384      else {
 385        $filters[$format] = array();
 386        while ($filter = db_fetch_object($result)) {
 387          $list = module_invoke($filter->module, 'filter', 'list');
 388          if (isset($list) && is_array($list) && isset($list[$filter->delta])) {
 389            $filter->name = $list[$filter->delta];
 390            $filters[$format][$filter->module .'/'. $filter->delta] = $filter;
 391          }
 392        }
 393      }
 394    }
 395  
 396    return $filters[$format];
 397  }
 398  
 399  /**
 400   * @name Filtering functions
 401   * @{
 402   * Modules which need to have content filtered can use these functions to
 403   * interact with the filter system.
 404   *
 405   * For more info, see the hook_filter() documentation.
 406   *
 407   * Note: because filters can inject JavaScript or execute PHP code, security is
 408   * vital here. When a user supplies a $format, you should validate it with
 409   * filter_access($format) before accepting/using it. This is normally done in
 410   * the validation stage of the node system. You should for example never make a
 411   * preview of content in a disallowed format.
 412   */
 413  
 414  /**
 415   * Run all the enabled filters on a piece of text.
 416   *
 417   * @param $text
 418   *    The text to be filtered.
 419   * @param $format
 420   *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT for
 421   *    the default format.
 422   * @param $check
 423   *    Whether to check the $format with filter_access() first. Defaults to TRUE.
 424   *    Note that this will check the permissions of the current user, so you
 425   *    should specify $check = FALSE when viewing other people's content. When
 426   *    showing content that is not (yet) stored in the database (eg. upon preview),
 427   *    set to TRUE so the user's permissions are checked.
 428   */
 429  function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $check = TRUE) {
 430    // When $check = TRUE, do an access check on $format.
 431    if (isset($text) && (!$check || filter_access($format))) {
 432      $format = filter_resolve_format($format);
 433  
 434      // Check for a cached version of this piece of text.
 435      $cache_id = $format .':'. md5($text);
 436      if ($cached = cache_get($cache_id, 'cache_filter')) {
 437        return $cached->data;
 438      }
 439  
 440      // See if caching is allowed for this format.
 441      $cache = filter_format_allowcache($format);
 442  
 443      // Convert all Windows and Mac newlines to a single newline,
 444      // so filters only need to deal with one possibility.
 445      $text = str_replace(array("\r\n", "\r"), "\n", $text);
 446  
 447      // Get a complete list of filters, ordered properly.
 448      $filters = filter_list_format($format);
 449  
 450      // Give filters the chance to escape HTML-like data such as code or formulas.
 451      foreach ($filters as $filter) {
 452        $text = module_invoke($filter->module, 'filter', 'prepare', $filter->delta, $format, $text, $cache_id);
 453      }
 454  
 455      // Perform filtering.
 456      foreach ($filters as $filter) {
 457        $text = module_invoke($filter->module, 'filter', 'process', $filter->delta, $format, $text, $cache_id);
 458      }
 459  
 460      // Store in cache with a minimum expiration time of 1 day.
 461      if ($cache) {
 462        cache_set($cache_id, $text, 'cache_filter', time() + (60 * 60 * 24));
 463      }
 464    }
 465    else {
 466      $text = t('n/a');
 467    }
 468  
 469    return $text;
 470  }
 471  
 472  /**
 473   * Generates a selector for choosing a format in a form.
 474   *
 475   * @param $value
 476   *   The ID of the format that is currently selected; uses the default format
 477   *   if not provided.
 478   * @param $weight
 479   *   The weight of the form element within the form.
 480   * @param $parents
 481   *   The parents array of the element. Required when defining multiple text
 482   *   formats on a single form or having a different parent than 'format'.
 483   *
 484   * @return
 485   *   Form API array for the form element.
 486   *
 487   * @see filter_form_validate()
 488   * @ingroup forms
 489   */
 490  function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL, $parents = array('format')) {
 491    $value = filter_resolve_format($value);
 492    $formats = filter_formats();
 493  
 494    $extra = theme('filter_tips_more_info');
 495  
 496    if (count($formats) > 1) {
 497      $form = array(
 498        '#type' => 'fieldset',
 499        '#title' => t('Input format'),
 500        '#collapsible' => TRUE,
 501        '#collapsed' => TRUE,
 502        '#weight' => $weight,
 503        '#element_validate' => array('filter_form_validate'),
 504      );
 505      // Multiple formats available: display radio buttons with tips.
 506      foreach ($formats as $format) {
 507        // Generate the parents as the autogenerator does, so we will have a
 508        // unique id for each radio button.
 509        $parents_for_id = array_merge($parents, array($format->format));
 510        $form[$format->format] = array(
 511          '#type' => 'radio',
 512          '#title' => $format->name,
 513          '#default_value' => $value,
 514          '#return_value' => $format->format,
 515          '#parents' => $parents,
 516          '#description' => theme('filter_tips', _filter_tips($format->format, FALSE)),
 517          '#id' => form_clean_id('edit-'. implode('-', $parents_for_id)),
 518        );
 519      }
 520    }
 521    else {
 522      // Only one format available: use a hidden form item and only show tips.
 523      $format = array_shift($formats);
 524      $form[$format->format] = array('#type' => 'value', '#value' => $format->format, '#parents' => $parents);
 525      $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE);
 526      $form['format']['guidelines'] = array(
 527        '#title' => t('Formatting guidelines'),
 528        '#value' => theme('filter_tips', $tips, FALSE, $extra),
 529      );
 530    }
 531    $form[] = array('#value' => $extra);
 532    return $form;
 533  }
 534  
 535  /**
 536   * Validation callback for filter elements in a form.
 537   *
 538   * @see filter_form().
 539   */
 540  function filter_form_validate($form) {
 541    foreach (element_children($form) as $key) {
 542      if ($form[$key]['#value'] == $form[$key]['#return_value']) {
 543        return;
 544      }
 545    }
 546    form_error($form, t('An illegal choice has been detected. Please contact the site administrator.'));
 547    watchdog('form', 'Illegal choice %choice in %name element.', array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title']) ? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR);
 548  }
 549  
 550  /**
 551   * Returns TRUE if the user is allowed to access this format.
 552   */
 553  function filter_access($format) {
 554    $format = filter_resolve_format($format);
 555    if (user_access('administer filters') || ($format == variable_get('filter_default_format', 1))) {
 556      return TRUE;
 557    }
 558    else {
 559      $formats = filter_formats();
 560      return isset($formats[$format]);
 561    }
 562  }
 563  
 564  /**
 565   * @} End of "Filtering functions".
 566   */
 567  
 568  
 569  /**
 570   * Helper function for fetching filter tips.
 571   */
 572  function _filter_tips($format, $long = FALSE) {
 573    if ($format == -1) {
 574      $formats = filter_formats();
 575    }
 576    else {
 577      $formats = array(db_fetch_object(db_query("SELECT * FROM {filter_formats} WHERE format = %d", $format)));
 578    }
 579  
 580    $tips = array();
 581  
 582    foreach ($formats as $format) {
 583      $filters = filter_list_format($format->format);
 584  
 585      $tips[$format->name] = array();
 586      foreach ($filters as $id => $filter) {
 587        if ($tip = module_invoke($filter->module, 'filter_tips', $filter->delta, $format->format, $long)) {
 588          $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
 589        }
 590      }
 591    }
 592  
 593    return $tips;
 594  }
 595  
 596  
 597  /**
 598   * Format a link to the more extensive filter tips.
 599   *
 600   * @ingroup themeable
 601   */
 602  function theme_filter_tips_more_info() {
 603    return '<p>'. l(t('More information about formatting options'), 'filter/tips') .'</p>';
 604  }
 605  
 606  /**
 607   * @name Standard filters
 608   * @{
 609   * Filters implemented by the filter.module.
 610   */
 611  
 612  /**
 613   * Implementation of hook_filter(). Contains a basic set of essential filters.
 614   * - HTML filter:
 615   *     Validates user-supplied HTML, transforming it as necessary.
 616   * - Line break converter:
 617   *     Converts newlines into paragraph and break tags.
 618   * - URL and e-mail address filter:
 619   *     Converts newlines into paragraph and break tags.
 620   */
 621  function filter_filter($op, $delta = 0, $format = -1, $text = '') {
 622    switch ($op) {
 623      case 'list':
 624        return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
 625  
 626      case 'description':
 627        switch ($delta) {
 628          case 0:
 629            return t('Allows you to restrict whether users can post HTML and which tags to filter out. It will also remove harmful content such as JavaScript events, JavaScript URLs and CSS styles from those tags that are not removed.');
 630          case 1:
 631            return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt; tags).');
 632          case 2:
 633            return t('Turns web and e-mail addresses into clickable links.');
 634          case 3:
 635            return t('Corrects faulty and chopped off HTML in postings.');
 636          default:
 637            return;
 638        }
 639  
 640      case 'process':
 641        switch ($delta) {
 642          case 0:
 643            return _filter_html($text, $format);
 644          case 1:
 645            return _filter_autop($text);
 646          case 2:
 647            return _filter_url($text, $format);
 648          case 3:
 649            return _filter_htmlcorrector($text);
 650          default:
 651            return $text;
 652        }
 653  
 654      case 'settings':
 655        switch ($delta) {
 656          case 0:
 657            return _filter_html_settings($format);
 658          case 2:
 659            return _filter_url_settings($format);
 660          default:
 661            return;
 662        }
 663  
 664      default:
 665        return $text;
 666    }
 667  }
 668  
 669  /**
 670   * Settings for the HTML filter.
 671   */
 672  function _filter_html_settings($format) {
 673    $form['filter_html'] = array(
 674      '#type' => 'fieldset',
 675      '#title' => t('HTML filter'),
 676      '#collapsible' => TRUE,
 677    );
 678    $form['filter_html']["filter_html_$format"] = array(
 679      '#type' => 'radios',
 680      '#title' => t('Filter HTML tags'),
 681      '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP),
 682      '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')),
 683      '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'),
 684    );
 685    $form['filter_html']["allowed_html_$format"] = array(
 686      '#type' => 'textfield',
 687      '#title' => t('Allowed HTML tags'),
 688      '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
 689      '#size' => 64,
 690      '#maxlength' => 1024,
 691      '#description' => t('If "Strip disallowed tags" is selected, optionally specify tags which should not be stripped. JavaScript event attributes are always stripped.'),
 692    );
 693    $form['filter_html']["filter_html_help_$format"] = array(
 694      '#type' => 'checkbox',
 695      '#title' => t('Display HTML help'),
 696      '#default_value' => variable_get("filter_html_help_$format", 1),
 697      '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'),
 698    );
 699    $form['filter_html']["filter_html_nofollow_$format"] = array(
 700      '#type' => 'checkbox',
 701      '#title' => t('Spam link deterrent'),
 702      '#default_value' => variable_get("filter_html_nofollow_$format", FALSE),
 703      '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'),
 704    );
 705    return $form;
 706  }
 707  
 708  /**
 709   * HTML filter. Provides filtering of input into accepted HTML.
 710   */
 711  function _filter_html($text, $format) {
 712    if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
 713      $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
 714      $text = filter_xss($text, $allowed_tags);
 715    }
 716  
 717    if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
 718      // Escape HTML
 719      $text = check_plain($text);
 720    }
 721  
 722    if (variable_get("filter_html_nofollow_$format", FALSE)) {
 723      $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
 724    }
 725  
 726    return trim($text);
 727  }
 728  
 729  /**
 730   * Settings for URL filter.
 731   */
 732  function _filter_url_settings($format) {
 733    $form['filter_urlfilter'] = array(
 734      '#type' => 'fieldset',
 735      '#title' => t('URL filter'),
 736      '#collapsible' => TRUE,
 737    );
 738    $form['filter_urlfilter']['filter_url_length_'. $format] = array(
 739      '#type' => 'textfield',
 740      '#title' => t('Maximum link text length'),
 741      '#default_value' => variable_get('filter_url_length_'. $format, 72),
 742      '#maxlength' => 4,
 743      '#description' => t('URLs longer than this number of characters will be truncated to prevent long strings that break formatting. The link itself will be retained; just the text portion of the link will be truncated.'),
 744    );
 745    return $form;
 746  }
 747  
 748  /**
 749   * URL filter. Automatically converts text web addresses (URLs, e-mail addresses,
 750   * ftp links, etc.) into hyperlinks.
 751   */
 752  function _filter_url($text, $format) {
 753    // Pass length to regexp callback
 754    _filter_url_trim(NULL, variable_get('filter_url_length_'. $format, 72));
 755  
 756    $text = ' '. $text .' ';
 757  
 758    // Match absolute URLs.
 759    $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
 760  
 761    // Match e-mail addresses.
 762    $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
 763  
 764    // Match www domains/addresses.
 765    $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
 766    $text = substr($text, 1, -1);
 767  
 768    return $text;
 769  }
 770  
 771  /**
 772   * Scan input and make sure that all HTML tags are properly closed and nested.
 773   */
 774  function _filter_htmlcorrector($text) {
 775    // Prepare tag lists.
 776    static $no_nesting, $single_use;
 777    if (!isset($no_nesting)) {
 778      // Tags which cannot be nested but are typically left unclosed.
 779      $no_nesting = drupal_map_assoc(array('li', 'p'));
 780  
 781      // Single use tags in HTML4
 782      $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
 783    }
 784  
 785    // Properly entify angles.
 786    $text = preg_replace('@<(?=[^a-zA-Z!/]|$)@', '&lt;', $text);
 787  
 788    // Split tags from text.
 789    $split = preg_split('/<(!--.*?--|[^>]+?)>/s', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 790    // Note: PHP ensures the array consists of alternating delimiters and literals
 791    // and begins and ends with a literal (inserting $null as required).
 792  
 793    $tag = false; // Odd/even counter. Tag or no tag.
 794    $stack = array();
 795    $output = '';
 796    foreach ($split as $value) {
 797      // Process HTML tags.
 798      if ($tag) {
 799        // Passthrough comments.
 800        if (substr($value, 0, 3) == '!--') {
 801          $output .= '<'. $value .'>';
 802        }
 803        else {
 804          list($tagname) = preg_split('/\s/', strtolower($value), 2);
 805          // Closing tag
 806          if ($tagname{0} == '/') {
 807            $tagname = substr($tagname, 1);
 808            // Discard XHTML closing tags for single use tags.
 809            if (!isset($single_use[$tagname])) {
 810              // See if we possibly have a matching opening tag on the stack.
 811              if (in_array($tagname, $stack)) {
 812                // Close other tags lingering first.
 813                do {
 814                  $output .= '</'. $stack[0] .'>';
 815                } while (array_shift($stack) != $tagname);
 816              }
 817              // Otherwise, discard it.
 818            }
 819          }
 820          // Opening tag
 821          else {
 822            // See if we have an identical 'no nesting' tag already open and close it if found.
 823            if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
 824              $output .= '</'. array_shift($stack) .'>';
 825            }
 826            // Push non-single-use tags onto the stack
 827            if (!isset($single_use[$tagname])) {
 828              array_unshift($stack, $tagname);
 829            }
 830            // Add trailing slash to single-use tags as per X(HT)ML.
 831            else {
 832              $value = rtrim($value, ' /') .' /';
 833            }
 834            $output .= '<'. $value .'>';
 835          }
 836        }
 837      }
 838      else {
 839        // Passthrough all text.
 840        $output .= $value;
 841      }
 842      $tag = !$tag;
 843    }
 844    // Close remaining tags.
 845    while (count($stack) > 0) {
 846      $output .= '</'. array_shift($stack) .'>';
 847    }
 848    return $output;
 849  }
 850  
 851  /**
 852   * Make links out of absolute URLs.
 853   */
 854  function _filter_url_parse_full_links($match) {
 855    $match[2] = decode_entities($match[2]);
 856    $caption = check_plain(_filter_url_trim($match[2]));
 857    $match[2] = check_url($match[2]);
 858    return $match[1] .'<a href="'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[5];
 859  }
 860  
 861  /**
 862   * Make links out of domain names starting with "www."
 863   */
 864  function _filter_url_parse_partial_links($match) {
 865    $match[2] = decode_entities($match[2]);
 866    $caption = check_plain(_filter_url_trim($match[2]));
 867    $match[2] = check_plain($match[2]);
 868    return $match[1] .'<a href="http://'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[3];
 869  }
 870  
 871  /**
 872   * Shortens long URLs to http://www.example.com/long/url...
 873   */
 874  function _filter_url_trim($text, $length = NULL) {
 875    static $_length;
 876    if ($length !== NULL) {
 877      $_length = $length;
 878    }
 879  
 880    // Use +3 for '...' string length.
 881    if (strlen($text) > $_length + 3) {
 882      $text = substr($text, 0, $_length) .'...';
 883    }
 884  
 885    return $text;
 886  }
 887  
 888  /**
 889   * Convert line breaks into <p> and <br> in an intelligent fashion.
 890   * Based on: http://photomatt.net/scripts/autop
 891   */
 892  function _filter_autop($text) {
 893    // All block level tags
 894    $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
 895  
 896    // Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags.
 897    // We don't apply any processing to the contents of these tags to avoid messing
 898    // up code. We look for matched pairs and allow basic nesting. For example:
 899    // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
 900    $chunks = preg_split('@(<(?:!--.*?--|/?(?:pre|script|style|object)[^>]*)>)@si', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 901    // Note: PHP ensures the array consists of alternating delimiters and literals
 902    // and begins and ends with a literal (inserting NULL as required).
 903    $ignore = FALSE;
 904    $ignoretag = '';
 905    $output = '';
 906    foreach ($chunks as $i => $chunk) {
 907      if ($i % 2) {
 908        // Passthrough comments.
 909        if (substr($chunk, 1, 3) == '!--') {
 910          $output .= $chunk;
 911        }
 912        else {
 913          // Opening or closing tag?
 914          $open = ($chunk[1] != '/');
 915          list($tag) = split('[ >]', substr($chunk, 2 - $open), 2);
 916          if (!$ignore) {
 917            if ($open) {
 918              $ignore = TRUE;
 919              $ignoretag = $tag;
 920            }
 921          }
 922          // Only allow a matching tag to close it.
 923          else if (!$open && $ignoretag == $tag) {
 924            $ignore = FALSE;
 925            $ignoretag = '';
 926          }
 927        }
 928      }
 929      else if (!$ignore) {
 930        $chunk = preg_replace('|\n*$|', '', $chunk) ."\n\n"; // just to make things a little easier, pad the end
 931        $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
 932        $chunk = preg_replace('!(<'. $block .'[^>]*>)!', "\n$1", $chunk); // Space things out a little
 933        $chunk = preg_replace('!(</'. $block .'>)!', "$1\n\n", $chunk); // Space things out a little
 934        $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
 935        $chunk = preg_replace('/^\n|\n\s*\n$/', '', $chunk);
 936        $chunk = '<p>'. preg_replace('/\n\s*\n\n?(.)/', "</p>\n<p>$1", $chunk) ."</p>\n"; // make paragraphs, including one at the end
 937        $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
 938        $chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
 939        $chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
 940        $chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace
 941        $chunk = preg_replace('!<p>\s*(</?'. $block .'[^>]*>)!', "$1", $chunk);
 942        $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*</p>!', "$1", $chunk);
 943        $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
 944        $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*<br />!', "$1", $chunk);
 945        $chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
 946        $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
 947      }
 948      $output .= $chunk;
 949    }
 950    return $output;
 951  }
 952  
 953  /**
 954   * Very permissive XSS/HTML filter for admin-only use.
 955   *
 956   * Use only for fields where it is impractical to use the
 957   * whole filter system, but where some (mainly inline) mark-up
 958   * is desired (so check_plain() is not acceptable).
 959   *
 960   * Allows all tags that can be used inside an HTML body, save
 961   * for scripts and styles.
 962   */
 963  function filter_xss_admin($string) {
 964    return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
 965  }
 966  
 967  /**
 968   * Filters an HTML string to prevent cross-site-scripting (XSS) vulnerabilities.
 969   *
 970   * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
 971   * For examples of various XSS attacks, see http://ha.ckers.org/xss.html.
 972   *
 973   * This code does four things:
 974   * - Removes characters and constructs that can trick browsers.
 975   * - Makes sure all HTML entities are well-formed.
 976   * - Makes sure all HTML tags and attributes are well-formed.
 977   * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
 978   *   javascript:).
 979   *
 980   * @param $string
 981   *   The string with raw HTML in it. It will be stripped of everything that can
 982   *   cause an XSS attack.
 983   * @param $allowed_tags
 984   *   An array of allowed tags.
 985   *
 986   * @return
 987   *   An XSS safe version of $string, or an empty string if $string is not
 988   *   valid UTF-8.
 989   *
 990   * @see drupal_validate_utf8()
 991   * @ingroup sanitization
 992   */
 993  function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
 994    // Only operate on valid UTF-8 strings. This is necessary to prevent cross
 995    // site scripting issues on Internet Explorer 6.
 996    if (!drupal_validate_utf8($string)) {
 997      return '';
 998    }
 999    // Store the input format
1000    _filter_xss_split($allowed_tags, TRUE);
1001    // Remove NUL characters (ignored by some browsers)
1002    $string = str_replace(chr(0), '', $string);
1003    // Remove Netscape 4 JS entities
1004    $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
1005  
1006    // Defuse all HTML entities
1007    $string = str_replace('&', '&amp;', $string);
1008    // Change back only well-formed entities in our whitelist
1009    // Decimal numeric entities
1010    $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
1011    // Hexadecimal numeric entities
1012    $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
1013    // Named entities
1014    $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
1015  
1016    return preg_replace_callback('%
1017      (
1018      <(?=[^a-zA-Z!/])  # a lone <
1019      |                 # or
1020      <!--.*?-->        # a comment
1021      |                 # or
1022      <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
1023      |                 # or
1024      >                 # just a >
1025      )%x', '_filter_xss_split', $string);
1026  }
1027  
1028  /**
1029   * Processes an HTML tag.
1030   *
1031   * @param @m
1032   *   An array with various meaning depending on the value of $store.
1033   *   If $store is TRUE then the array contains the allowed tags.
1034   *   If $store is FALSE then the array has one element, the HTML tag to process.
1035   * @param $store
1036   *   Whether to store $m.
1037   * @return
1038   *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
1039   *   version of the HTML element.
1040   */
1041  function _filter_xss_split($m, $store = FALSE) {
1042    static $allowed_html;
1043  
1044    if ($store) {
1045      $allowed_html = array_flip($m);
1046      return;
1047    }
1048  
1049    $string = $m[1];
1050  
1051    if (substr($string, 0, 1) != '<') {
1052      // We matched a lone ">" character
1053      return '&gt;';
1054    }
1055    else if (strlen($string) == 1) {
1056      // We matched a lone "<" character
1057      return '&lt;';
1058    }
1059  
1060    if (!preg_match('%^(?:<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->))$%', $string, $matches)) {
1061      // Seriously malformed
1062      return '';
1063    }
1064  
1065    $slash = trim($matches[1]);
1066    $elem = &$matches[2];
1067    $attrlist = &$matches[3];
1068    $comment = &$matches[4];
1069  
1070    if ($comment) {
1071      $elem = '!--';
1072    }
1073  
1074    if (!isset($allowed_html[strtolower($elem)])) {
1075      // Disallowed HTML element
1076      return '';
1077    }
1078  
1079    if ($comment) {
1080      return $comment;
1081    }
1082  
1083    if ($slash != '') {
1084      return "</$elem>";
1085    }
1086  
1087    // Is there a closing XHTML slash at the end of the attributes?
1088    // In PHP 5.1.0+ we could count the changes, currently we need a separate match
1089    $xhtml_slash = preg_match('%\s?/\s*$%', $attrlist) ? ' /' : '';
1090    $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist);
1091  
1092    // Clean up attributes
1093    $attr2 = implode(' ', _filter_xss_attributes($attrlist));
1094    $attr2 = preg_replace('/[<>]/', '', $attr2);
1095    $attr2 = strlen($attr2) ? ' '. $attr2 : '';
1096  
1097    return "<$elem$attr2$xhtml_slash>";
1098  }
1099  
1100  /**
1101   * Processes a string of HTML attributes.
1102   *
1103   * @return
1104   *   Cleaned up version of the HTML attributes.
1105   */
1106  function _filter_xss_attributes($attr) {
1107    $attrarr = array();
1108    $mode = 0;
1109    $attrname = '';
1110  
1111    while (strlen($attr) != 0) {
1112      // Was the last operation successful?
1113      $working = 0;
1114  
1115      switch ($mode) {
1116        case 0:
1117          // Attribute name, href for instance
1118          if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
1119            $attrname = strtolower($match[1]);
1120            $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
1121            $working = $mode = 1;
1122            $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
1123          }
1124  
1125          break;
1126  
1127        case 1:
1128          // Equals sign or valueless ("selected")
1129          if (preg_match('/^\s*=\s*/', $attr)) {
1130            $working = 1; $mode = 2;
1131            $attr = preg_replace('/^\s*=\s*/', '', $attr);
1132            break;
1133          }
1134  
1135          if (preg_match('/^\s+/', $attr)) {
1136            $working = 1; $mode = 0;
1137            if (!$skip) {
1138              $attrarr[] = $attrname;
1139            }
1140            $attr = preg_replace('/^\s+/', '', $attr);
1141          }
1142  
1143          break;
1144  
1145        case 2:
1146          // Attribute value, a URL after href= for instance
1147          if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
1148            $thisval = filter_xss_bad_protocol($match[1]);
1149  
1150            if (!$skip) {
1151              $attrarr[] = "$attrname=\"$thisval\"";
1152            }
1153            $working = 1;
1154            $mode = 0;
1155            $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
1156            break;
1157          }
1158  
1159          if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
1160            $thisval = filter_xss_bad_protocol($match[1]);
1161  
1162            if (!$skip) {
1163              $attrarr[] = "$attrname='$thisval'";;
1164            }
1165            $working = 1; $mode = 0;
1166            $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
1167            break;
1168          }
1169  
1170          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
1171            $thisval = filter_xss_bad_protocol($match[1]);
1172  
1173            if (!$skip) {
1174              $attrarr[] = "$attrname=\"$thisval\"";
1175            }
1176            $working = 1; $mode = 0;
1177            $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
1178          }
1179  
1180          break;
1181      }
1182  
1183      if ($working == 0) {
1184        // not well formed, remove and try again
1185        $attr = preg_replace('/
1186          ^
1187          (
1188          "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
1189          |               # or
1190          \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
1191          |               # or
1192          \S              # - a non-whitespace character
1193          )*              # any number of the above three
1194          \s*             # any number of whitespaces
1195          /x', '', $attr);
1196        $mode = 0;
1197      }
1198    }
1199  
1200    // the attribute list ends with a valueless attribute like "selected"
1201    if ($mode == 1) {
1202      $attrarr[] = $attrname;
1203    }
1204    return $attrarr;
1205  }
1206  
1207  /**
1208   * Processes an HTML attribute value and ensures it does not contain an URL
1209   * with a disallowed protocol (e.g. javascript:)
1210   *
1211   * @param $string
1212   *   The string with the attribute value.
1213   * @param $decode
1214   *   Whether to decode entities in the $string. Set to FALSE if the $string
1215   *   is in plain text, TRUE otherwise. Defaults to TRUE.
1216   * @return
1217   *   Cleaned up and HTML-escaped version of $string.
1218   */
1219  function filter_xss_bad_protocol($string, $decode = TRUE) {
1220    static $allowed_protocols;
1221    if (!isset($allowed_protocols)) {
1222      $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'tel', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal', 'rtsp')));
1223    }
1224  
1225    // Get the plain text representation of the attribute value (i.e. its meaning).
1226    if ($decode) {
1227      $string = decode_entities($string);
1228    }
1229  
1230    // Iteratively remove any invalid protocol found.
1231  
1232    do {
1233      $before = $string;
1234      $colonpos = strpos($string, ':');
1235      if ($colonpos > 0) {
1236        // We found a colon, possibly a protocol. Verify.
1237        $protocol = substr($string, 0, $colonpos);
1238        // If a colon is preceded by a slash, question mark or hash, it cannot
1239        // possibly be part of the URL scheme. This must be a relative URL,
1240        // which inherits the (safe) protocol of the base document.
1241        if (preg_match('![/?#]!', $protocol)) {
1242          break;
1243        }
1244        // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive
1245        // Check if this is a disallowed protocol.
1246        if (!isset($allowed_protocols[strtolower($protocol)])) {
1247          $string = substr($string, $colonpos + 1);
1248        }
1249      }
1250    } while ($before != $string);
1251    return check_plain($string);
1252  }
1253  
1254  /**
1255   * @} End of "Standard filters".
1256   */


Generated: Thu Mar 24 11:18:33 2011 Cross-referenced by PHPXref 0.7