[ Index ]

PHP Cross Reference of Drupal 6 (yi-drupal)

title

Body

[close]

/sites/all/modules/ctools/includes/ -> cleanstring.inc (source)

   1  <?php
   2  // $Id $
   3  
   4  /**
   5   * @file
   6   * Helper class to clean strings to make them URL safe and translatable.
   7   *
   8   * This was copied directly from pathauto and put here to be made available
   9   * to all, because more things than just pathauto want URL safe strings.
  10   *
  11   * To use, simply:
  12   * @code
  13   * ctools_include('cleanstring');
  14   * $output = ctools_cleanstring($string);
  15   *
  16   * You can add a variety of settings as an array in the second argument,
  17   * including words to ignore, how to deal with punctuation, length
  18   * limits, and more. See the function itself for options.
  19   */
  20  
  21  /**
  22   * Matches Unicode character classes.
  23   *
  24   * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
  25   *
  26   * The index only contains the following character classes:
  27   *   Lu  Letter, Uppercase
  28   *   Ll  Letter, Lowercase
  29   *   Lt  Letter, Titlecase
  30   *   Lo  Letter, Other
  31   *   Nd  Number, Decimal Digit
  32   *   No  Number, Other
  33   *
  34   * Copied from search.module's PREG_CLASS_SEARCH_EXCLUDE.
  35   */
  36  define('CTOOLS_PREG_CLASS_ALNUM',
  37  '\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
  38  '\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
  39  '\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
  40  '\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
  41  '\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
  42  '\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
  43  '\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
  44  '\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
  45  '\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
  46  '\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
  47  '\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
  48  '\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
  49  '\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
  50  '\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
  51  '\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
  52  '\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
  53  '\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
  54  '\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
  55  '\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
  56  '\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
  57  '\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
  58  '\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
  59  '\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
  60  '\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
  61  '\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
  62  '\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
  63  '\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
  64  
  65  
  66  /**
  67   * Clean up a string value provided by a module.
  68   *
  69   * Resulting string contains only alphanumerics and separators.
  70   *
  71   * @param $string
  72   *   A string to clean.
  73   * @param $settings
  74   *   An optional array of settings to use.
  75   *   - 'clean slash': If set, slashes will be cleaned. Defaults to TRUE,
  76   *     so you have to explicitly set this to FALSE to not clean the
  77   *     slashes.
  78   *   - 'ignore words': Set to an array of words that will be removed
  79   *     rather than made safe. Defaults to an empty array.
  80   *   - 'separator': Change spaces and untranslatable characters to
  81   *     this character. Defaults to '-'.
  82   *   - 'replacements': An array of direct replacements to be made that will
  83   *     be implemented via strtr(). Defaults to an empty array.
  84   *   - 'transliterate': If set, use the transliteration replacements. If set
  85   *     to an array, use these replacements instead of the defaults in CTools.
  86   *     Defaults to FALSE.
  87   *   - 'reduce ascii': If set to TRUE further reduce to ASCII96 only. Defaults
  88   *      to TRUE.
  89   *   - 'max length': If set to a number, reduce the resulting string to this
  90   *      maximum length. Defaults to no maximum length.
  91   *   - 'lower case': If set to TRUE, convert the result to lower case.
  92   *     Defaults to false.
  93   *   These settings will be passed through drupal_alter.
  94   *
  95   * @return
  96   *   The cleaned string.
  97   */
  98  function ctools_cleanstring($string, $settings = array()) {
  99    $settings += array(
 100      'clean slash' => TRUE,
 101      'ignore words' => array(),
 102      'separator' => '-',
 103      'replacements' => array(),
 104      'transliterate' => FALSE,
 105      'reduce ascii' => TRUE,
 106      'max length' => FALSE,
 107      'lower case' => FALSE,
 108    );
 109  
 110    // Allow modules to make other changes to the settings.
 111    if (isset($settings['clean id'])) {
 112      drupal_alter('ctools_cleanstring_' . $settings['clean id'], $settings);
 113    }
 114  
 115    drupal_alter('ctools_cleanstring', $settings);
 116  
 117    $output = $string;
 118  
 119    // Do any replacements the user selected up front.
 120    if (!empty($settings['replacements'])) {
 121      $output = strtr($output, $settings['replacements']);
 122    }
 123  
 124    // Remove slashes if instructed to do so.
 125    if ($settings['clean slash']) {
 126      $output = str_replace('/', '', $output);
 127    }
 128  
 129    if (!empty($settings['transliterate']) && module_exists('transliteration')) {
 130      $output = transliteration_get($output);
 131    }
 132  
 133    // Reduce to the subset of ASCII96 letters and numbers
 134    if ($settings['reduce ascii']) {
 135      $pattern = '/[^a-zA-Z0-9\/]+/';
 136      $output = preg_replace($pattern, $settings['separator'], $output);
 137    }
 138  
 139    // Get rid of words that are on the ignore list
 140    if (!empty($settings['ignore words'])) {
 141      $ignore_re = '\b'. preg_replace('/,/', '\b|\b', $settings['ignore words']) .'\b';
 142  
 143      if (function_exists('mb_eregi_replace')) {
 144        $output = mb_eregi_replace($ignore_re, '', $output);
 145      }
 146      else {
 147        $output = preg_replace("/$ignore_re/i", '', $output);
 148      }
 149    }
 150  
 151    // Always replace whitespace with the separator.
 152    $output = preg_replace('/\s+/', $settings['separator'], $output);
 153  
 154    // In preparation for pattern matching,
 155    // escape the separator if and only if it is not alphanumeric.
 156    if (isset($settings['separator'])) {
 157      if (preg_match('/^[^'. CTOOLS_PREG_CLASS_ALNUM .']+$/uD', $settings['separator'])) {
 158        $seppattern = $settings['separator'];
 159      }
 160      else {
 161        $seppattern = '\\'. $settings['separator'];
 162      }
 163      // Trim any leading or trailing separators (note the need to
 164      $output = preg_replace("/^$seppattern+|$seppattern+$/", '', $output);
 165  
 166      // Replace multiple separators with a single one
 167      $output = preg_replace("/$seppattern+/", $settings['separator'], $output);
 168    }
 169  
 170    // Enforce the maximum component length
 171    if (!empty($settings['max length'])) {
 172      $output = ctools_cleanstring_truncate($output, $settings['max length'], $settings['separator']);
 173    }
 174  
 175    if (!empty($settings['lower case'])) {
 176      $output = drupal_strtolower($output);
 177    }
 178    return $output;
 179  }
 180  
 181  /**
 182   * A friendly version of truncate_utf8.
 183   *
 184   * @param $string
 185   *   The string to be truncated.
 186   * @param $length
 187   *   An integer for the maximum desired length.
 188   * @param $separator
 189   *   A string which contains the word boundary such as - or _.
 190   *
 191   * @return
 192   *  The string truncated below the maxlength.
 193   */
 194  function ctools_cleanstring_truncate($string, $length, $separator) {
 195    if (drupal_strlen($string) > $length) {
 196      $string = drupal_substr($string, 0, $length + 1); // leave one more character
 197      if ($last_break = strrpos($string, $separator)) { // space exists AND is not on position 0
 198        $string = substr($string, 0, $last_break);
 199      }
 200      else {
 201        $string = drupal_substr($string, 0, $length);
 202      }
 203    }
 204    return $string;
 205  }


Generated: Mon Jul 9 18:01:44 2012 Cross-referenced by PHPXref 0.7