[ Index ]

PHP Cross Reference of Wordpress 2.9.1

title

Body

[close]

/wp-includes/ -> class-snoopy.php (source)

   1  <?php
   2  if ( !in_array('Snoopy', get_declared_classes() ) ) :
   3  /*************************************************
   4  
   5  Snoopy - the PHP net client
   6  Author: Monte Ohrt <monte@ispi.net>
   7  Copyright (c): 1999-2008 New Digital Group, all rights reserved
   8  Version: 1.2.4
   9  
  10   * This library is free software; you can redistribute it and/or
  11   * modify it under the terms of the GNU Lesser General Public
  12   * License as published by the Free Software Foundation; either
  13   * version 2.1 of the License, or (at your option) any later version.
  14   *
  15   * This library is distributed in the hope that it will be useful,
  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18   * Lesser General Public License for more details.
  19   *
  20   * You should have received a copy of the GNU Lesser General Public
  21   * License along with this library; if not, write to the Free Software
  22   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23  
  24  You may contact the author of Snoopy by e-mail at:
  25  monte@ohrt.com
  26  
  27  The latest version of Snoopy can be obtained from:
  28  http://snoopy.sourceforge.net/
  29  
  30  *************************************************/
  31  
  32  class Snoopy
  33  {
  34      /**** Public variables ****/
  35  
  36      /* user definable vars */
  37  
  38      var $host            =    "www.php.net";        // host name we are connecting to
  39      var $port            =    80;                    // port we are connecting to
  40      var $proxy_host        =    "";                    // proxy host to use
  41      var $proxy_port        =    "";                    // proxy port to use
  42      var $proxy_user        =    "";                    // proxy user to use
  43      var $proxy_pass        =    "";                    // proxy password to use
  44  
  45      var $agent            =    "Snoopy v1.2.4";    // agent we masquerade as
  46      var    $referer        =    "";                    // referer info to pass
  47      var $cookies        =    array();            // array of cookies to pass
  48                                                  // $cookies["username"]="joe";
  49      var    $rawheaders        =    array();            // array of raw headers to send
  50                                                  // $rawheaders["Content-type"]="text/html";
  51  
  52      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  53      var $lastredirectaddr    =    "";                // contains address of last redirected address
  54      var    $offsiteok        =    true;                // allows redirection off-site
  55      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  56      var $expandlinks    =    true;                // expand links to fully qualified URLs.
  57                                                  // this only applies to fetchlinks()
  58                                                  // submitlinks(), and submittext()
  59      var $passcookies    =    true;                // pass set cookies back through redirects
  60                                                  // NOTE: this currently does not respect
  61                                                  // dates, domains or paths.
  62  
  63      var    $user            =    "";                    // user for http authentication
  64      var    $pass            =    "";                    // password for http authentication
  65  
  66      // http accept types
  67      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  68  
  69      var $results        =    "";                    // where the content is put
  70  
  71      var $error            =    "";                    // error messages sent here
  72      var    $response_code    =    "";                    // response code returned from server
  73      var    $headers        =    array();            // headers returned from server sent here
  74      var    $maxlength        =    500000;                // max return data length (body)
  75      var $read_timeout    =    0;                    // timeout on read operations, in seconds
  76                                                  // supported only since PHP 4 Beta 4
  77                                                  // set to 0 to disallow timeouts
  78      var $timed_out        =    false;                // if a read operation timed out
  79      var    $status            =    0;                    // http request status
  80  
  81      var $temp_dir        =    "/tmp";                // temporary directory that the webserver
  82                                                  // has permission to write to.
  83                                                  // under Windows, this should be C:\temp
  84  
  85      var    $curl_path        =    "/usr/local/bin/curl";
  86                                                  // Snoopy will use cURL for fetching
  87                                                  // SSL content if a full system path to
  88                                                  // the cURL binary is supplied here.
  89                                                  // set to false if you do not have
  90                                                  // cURL installed. See http://curl.haxx.se
  91                                                  // for details on installing cURL.
  92                                                  // Snoopy does *not* use the cURL
  93                                                  // library functions built into php,
  94                                                  // as these functions are not stable
  95                                                  // as of this Snoopy release.
  96  
  97      /**** Private variables ****/
  98  
  99      var    $_maxlinelen    =    4096;                // max line length (headers)
 100  
 101      var $_httpmethod    =    "GET";                // default http request method
 102      var $_httpversion    =    "HTTP/1.0";            // default http request version
 103      var $_submit_method    =    "POST";                // default submit method
 104      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
 105      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
 106      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
 107      var $_redirectdepth    =    0;                    // increments on an http redirect
 108      var $_frameurls        =     array();            // frame src urls
 109      var $_framedepth    =    0;                    // increments on frame depth
 110  
 111      var $_isproxy        =    false;                // set if using a proxy server
 112      var $_fp_timeout    =    30;                    // timeout for socket connection
 113  
 114  /*======================================================================*\
 115      Function:    fetch
 116      Purpose:    fetch the contents of a web page
 117                  (and possibly other protocols in the
 118                  future like ftp, nntp, gopher, etc.)
 119      Input:        $URI    the location of the page to fetch
 120      Output:        $this->results    the output text from the fetch
 121  \*======================================================================*/
 122  
 123  	function fetch($URI)
 124      {
 125  
 126          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 127          $URI_PARTS = parse_url($URI);
 128          if (!empty($URI_PARTS["user"]))
 129              $this->user = $URI_PARTS["user"];
 130          if (!empty($URI_PARTS["pass"]))
 131              $this->pass = $URI_PARTS["pass"];
 132          if (empty($URI_PARTS["query"]))
 133              $URI_PARTS["query"] = '';
 134          if (empty($URI_PARTS["path"]))
 135              $URI_PARTS["path"] = '';
 136  
 137          switch(strtolower($URI_PARTS["scheme"]))
 138          {
 139              case "http":
 140                  $this->host = $URI_PARTS["host"];
 141                  if(!empty($URI_PARTS["port"]))
 142                      $this->port = $URI_PARTS["port"];
 143                  if($this->_connect($fp))
 144                  {
 145                      if($this->_isproxy)
 146                      {
 147                          // using proxy, send entire URI
 148                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 149                      }
 150                      else
 151                      {
 152                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 153                          // no proxy, send only the path
 154                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 155                      }
 156  
 157                      $this->_disconnect($fp);
 158  
 159                      if($this->_redirectaddr)
 160                      {
 161                          /* url was redirected, check if we've hit the max depth */
 162                          if($this->maxredirs > $this->_redirectdepth)
 163                          {
 164                              // only follow redirect if it's on this site, or offsiteok is true
 165                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 166                              {
 167                                  /* follow the redirect */
 168                                  $this->_redirectdepth++;
 169                                  $this->lastredirectaddr=$this->_redirectaddr;
 170                                  $this->fetch($this->_redirectaddr);
 171                              }
 172                          }
 173                      }
 174  
 175                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 176                      {
 177                          $frameurls = $this->_frameurls;
 178                          $this->_frameurls = array();
 179  
 180                          while(list(,$frameurl) = each($frameurls))
 181                          {
 182                              if($this->_framedepth < $this->maxframes)
 183                              {
 184                                  $this->fetch($frameurl);
 185                                  $this->_framedepth++;
 186                              }
 187                              else
 188                                  break;
 189                          }
 190                      }
 191                  }
 192                  else
 193                  {
 194                      return false;
 195                  }
 196                  return true;
 197                  break;
 198              case "https":
 199                  if(!$this->curl_path)
 200                      return false;
 201                  if(function_exists("is_executable"))
 202                      if (!is_executable($this->curl_path))
 203                          return false;
 204                  $this->host = $URI_PARTS["host"];
 205                  if(!empty($URI_PARTS["port"]))
 206                      $this->port = $URI_PARTS["port"];
 207                  if($this->_isproxy)
 208                  {
 209                      // using proxy, send entire URI
 210                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 211                  }
 212                  else
 213                  {
 214                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 215                      // no proxy, send only the path
 216                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 217                  }
 218  
 219                  if($this->_redirectaddr)
 220                  {
 221                      /* url was redirected, check if we've hit the max depth */
 222                      if($this->maxredirs > $this->_redirectdepth)
 223                      {
 224                          // only follow redirect if it's on this site, or offsiteok is true
 225                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 226                          {
 227                              /* follow the redirect */
 228                              $this->_redirectdepth++;
 229                              $this->lastredirectaddr=$this->_redirectaddr;
 230                              $this->fetch($this->_redirectaddr);
 231                          }
 232                      }
 233                  }
 234  
 235                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 236                  {
 237                      $frameurls = $this->_frameurls;
 238                      $this->_frameurls = array();
 239  
 240                      while(list(,$frameurl) = each($frameurls))
 241                      {
 242                          if($this->_framedepth < $this->maxframes)
 243                          {
 244                              $this->fetch($frameurl);
 245                              $this->_framedepth++;
 246                          }
 247                          else
 248                              break;
 249                      }
 250                  }
 251                  return true;
 252                  break;
 253              default:
 254                  // not a valid protocol
 255                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 256                  return false;
 257                  break;
 258          }
 259          return true;
 260      }
 261  
 262  /*======================================================================*\
 263      Function:    submit
 264      Purpose:    submit an http form
 265      Input:        $URI    the location to post the data
 266                  $formvars    the formvars to use.
 267                      format: $formvars["var"] = "val";
 268                  $formfiles  an array of files to submit
 269                      format: $formfiles["var"] = "/dir/filename.ext";
 270      Output:        $this->results    the text output from the post
 271  \*======================================================================*/
 272  
 273  	function submit($URI, $formvars="", $formfiles="")
 274      {
 275          unset($postdata);
 276  
 277          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 278  
 279          $URI_PARTS = parse_url($URI);
 280          if (!empty($URI_PARTS["user"]))
 281              $this->user = $URI_PARTS["user"];
 282          if (!empty($URI_PARTS["pass"]))
 283              $this->pass = $URI_PARTS["pass"];
 284          if (empty($URI_PARTS["query"]))
 285              $URI_PARTS["query"] = '';
 286          if (empty($URI_PARTS["path"]))
 287              $URI_PARTS["path"] = '';
 288  
 289          switch(strtolower($URI_PARTS["scheme"]))
 290          {
 291              case "http":
 292                  $this->host = $URI_PARTS["host"];
 293                  if(!empty($URI_PARTS["port"]))
 294                      $this->port = $URI_PARTS["port"];
 295                  if($this->_connect($fp))
 296                  {
 297                      if($this->_isproxy)
 298                      {
 299                          // using proxy, send entire URI
 300                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 301                      }
 302                      else
 303                      {
 304                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 305                          // no proxy, send only the path
 306                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 307                      }
 308  
 309                      $this->_disconnect($fp);
 310  
 311                      if($this->_redirectaddr)
 312                      {
 313                          /* url was redirected, check if we've hit the max depth */
 314                          if($this->maxredirs > $this->_redirectdepth)
 315                          {
 316                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 317                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 318  
 319                              // only follow redirect if it's on this site, or offsiteok is true
 320                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 321                              {
 322                                  /* follow the redirect */
 323                                  $this->_redirectdepth++;
 324                                  $this->lastredirectaddr=$this->_redirectaddr;
 325                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 326                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 327                                  else
 328                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 329                              }
 330                          }
 331                      }
 332  
 333                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 334                      {
 335                          $frameurls = $this->_frameurls;
 336                          $this->_frameurls = array();
 337  
 338                          while(list(,$frameurl) = each($frameurls))
 339                          {
 340                              if($this->_framedepth < $this->maxframes)
 341                              {
 342                                  $this->fetch($frameurl);
 343                                  $this->_framedepth++;
 344                              }
 345                              else
 346                                  break;
 347                          }
 348                      }
 349  
 350                  }
 351                  else
 352                  {
 353                      return false;
 354                  }
 355                  return true;
 356                  break;
 357              case "https":
 358                  if(!$this->curl_path)
 359                      return false;
 360                  if(function_exists("is_executable"))
 361                      if (!is_executable($this->curl_path))
 362                          return false;
 363                  $this->host = $URI_PARTS["host"];
 364                  if(!empty($URI_PARTS["port"]))
 365                      $this->port = $URI_PARTS["port"];
 366                  if($this->_isproxy)
 367                  {
 368                      // using proxy, send entire URI
 369                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 370                  }
 371                  else
 372                  {
 373                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 374                      // no proxy, send only the path
 375                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 376                  }
 377  
 378                  if($this->_redirectaddr)
 379                  {
 380                      /* url was redirected, check if we've hit the max depth */
 381                      if($this->maxredirs > $this->_redirectdepth)
 382                      {
 383                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 384                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 385  
 386                          // only follow redirect if it's on this site, or offsiteok is true
 387                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 388                          {
 389                              /* follow the redirect */
 390                              $this->_redirectdepth++;
 391                              $this->lastredirectaddr=$this->_redirectaddr;
 392                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 393                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 394                              else
 395                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 396                          }
 397                      }
 398                  }
 399  
 400                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 401                  {
 402                      $frameurls = $this->_frameurls;
 403                      $this->_frameurls = array();
 404  
 405                      while(list(,$frameurl) = each($frameurls))
 406                      {
 407                          if($this->_framedepth < $this->maxframes)
 408                          {
 409                              $this->fetch($frameurl);
 410                              $this->_framedepth++;
 411                          }
 412                          else
 413                              break;
 414                      }
 415                  }
 416                  return true;
 417                  break;
 418  
 419              default:
 420                  // not a valid protocol
 421                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 422                  return false;
 423                  break;
 424          }
 425          return true;
 426      }
 427  
 428  /*======================================================================*\
 429      Function:    fetchlinks
 430      Purpose:    fetch the links from a web page
 431      Input:        $URI    where you are fetching from
 432      Output:        $this->results    an array of the URLs
 433  \*======================================================================*/
 434  
 435  	function fetchlinks($URI)
 436      {
 437          if ($this->fetch($URI))
 438          {
 439              if($this->lastredirectaddr)
 440                  $URI = $this->lastredirectaddr;
 441              if(is_array($this->results))
 442              {
 443                  for($x=0;$x<count($this->results);$x++)
 444                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 445              }
 446              else
 447                  $this->results = $this->_striplinks($this->results);
 448  
 449              if($this->expandlinks)
 450                  $this->results = $this->_expandlinks($this->results, $URI);
 451              return true;
 452          }
 453          else
 454              return false;
 455      }
 456  
 457  /*======================================================================*\
 458      Function:    fetchform
 459      Purpose:    fetch the form elements from a web page
 460      Input:        $URI    where you are fetching from
 461      Output:        $this->results    the resulting html form
 462  \*======================================================================*/
 463  
 464  	function fetchform($URI)
 465      {
 466  
 467          if ($this->fetch($URI))
 468          {
 469  
 470              if(is_array($this->results))
 471              {
 472                  for($x=0;$x<count($this->results);$x++)
 473                      $this->results[$x] = $this->_stripform($this->results[$x]);
 474              }
 475              else
 476                  $this->results = $this->_stripform($this->results);
 477  
 478              return true;
 479          }
 480          else
 481              return false;
 482      }
 483  
 484  
 485  /*======================================================================*\
 486      Function:    fetchtext
 487      Purpose:    fetch the text from a web page, stripping the links
 488      Input:        $URI    where you are fetching from
 489      Output:        $this->results    the text from the web page
 490  \*======================================================================*/
 491  
 492  	function fetchtext($URI)
 493      {
 494          if($this->fetch($URI))
 495          {
 496              if(is_array($this->results))
 497              {
 498                  for($x=0;$x<count($this->results);$x++)
 499                      $this->results[$x] = $this->_striptext($this->results[$x]);
 500              }
 501              else
 502                  $this->results = $this->_striptext($this->results);
 503              return true;
 504          }
 505          else
 506              return false;
 507      }
 508  
 509  /*======================================================================*\
 510      Function:    submitlinks
 511      Purpose:    grab links from a form submission
 512      Input:        $URI    where you are submitting from
 513      Output:        $this->results    an array of the links from the post
 514  \*======================================================================*/
 515  
 516  	function submitlinks($URI, $formvars="", $formfiles="")
 517      {
 518          if($this->submit($URI,$formvars, $formfiles))
 519          {
 520              if($this->lastredirectaddr)
 521                  $URI = $this->lastredirectaddr;
 522              if(is_array($this->results))
 523              {
 524                  for($x=0;$x<count($this->results);$x++)
 525                  {
 526                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 527                      if($this->expandlinks)
 528                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 529                  }
 530              }
 531              else
 532              {
 533                  $this->results = $this->_striplinks($this->results);
 534                  if($this->expandlinks)
 535                      $this->results = $this->_expandlinks($this->results,$URI);
 536              }
 537              return true;
 538          }
 539          else
 540              return false;
 541      }
 542  
 543  /*======================================================================*\
 544      Function:    submittext
 545      Purpose:    grab text from a form submission
 546      Input:        $URI    where you are submitting from
 547      Output:        $this->results    the text from the web page
 548  \*======================================================================*/
 549  
 550  	function submittext($URI, $formvars = "", $formfiles = "")
 551      {
 552          if($this->submit($URI,$formvars, $formfiles))
 553          {
 554              if($this->lastredirectaddr)
 555                  $URI = $this->lastredirectaddr;
 556              if(is_array($this->results))
 557              {
 558                  for($x=0;$x<count($this->results);$x++)
 559                  {
 560                      $this->results[$x] = $this->_striptext($this->results[$x]);
 561                      if($this->expandlinks)
 562                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 563                  }
 564              }
 565              else
 566              {
 567                  $this->results = $this->_striptext($this->results);
 568                  if($this->expandlinks)
 569                      $this->results = $this->_expandlinks($this->results,$URI);
 570              }
 571              return true;
 572          }
 573          else
 574              return false;
 575      }
 576  
 577  
 578  
 579  /*======================================================================*\
 580      Function:    set_submit_multipart
 581      Purpose:    Set the form submission content type to
 582                  multipart/form-data
 583  \*======================================================================*/
 584  	function set_submit_multipart()
 585      {
 586          $this->_submit_type = "multipart/form-data";
 587      }
 588  
 589  
 590  /*======================================================================*\
 591      Function:    set_submit_normal
 592      Purpose:    Set the form submission content type to
 593                  application/x-www-form-urlencoded
 594  \*======================================================================*/
 595  	function set_submit_normal()
 596      {
 597          $this->_submit_type = "application/x-www-form-urlencoded";
 598      }
 599  
 600  
 601  
 602  
 603  /*======================================================================*\
 604      Private functions
 605  \*======================================================================*/
 606  
 607  
 608  /*======================================================================*\
 609      Function:    _striplinks
 610      Purpose:    strip the hyperlinks from an html document
 611      Input:        $document    document to strip.
 612      Output:        $match        an array of the links
 613  \*======================================================================*/
 614  
 615  	function _striplinks($document)
 616      {
 617          preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
 618                          ([\"\'])?                    # find single or double quote
 619                          (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
 620                                                      # quote, otherwise match up to next space
 621                          'isx",$document,$links);
 622  
 623  
 624          // catenate the non-empty matches from the conditional subpattern
 625  
 626          while(list($key,$val) = each($links[2]))
 627          {
 628              if(!empty($val))
 629                  $match[] = $val;
 630          }
 631  
 632          while(list($key,$val) = each($links[3]))
 633          {
 634              if(!empty($val))
 635                  $match[] = $val;
 636          }
 637  
 638          // return the links
 639          return $match;
 640      }
 641  
 642  /*======================================================================*\
 643      Function:    _stripform
 644      Purpose:    strip the form elements from an html document
 645      Input:        $document    document to strip.
 646      Output:        $match        an array of the links
 647  \*======================================================================*/
 648  
 649  	function _stripform($document)
 650      {
 651          preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 652  
 653          // catenate the matches
 654          $match = implode("\r\n",$elements[0]);
 655  
 656          // return the links
 657          return $match;
 658      }
 659  
 660  
 661  
 662  /*======================================================================*\
 663      Function:    _striptext
 664      Purpose:    strip the text from an html document
 665      Input:        $document    document to strip.
 666      Output:        $text        the resulting text
 667  \*======================================================================*/
 668  
 669  	function _striptext($document)
 670      {
 671  
 672          // I didn't use preg eval (//e) since that is only available in PHP 4.0.
 673          // so, list your entities one by one here. I included some of the
 674          // more common ones.
 675  
 676          $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
 677                          "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
 678                          "'([\r\n])[\s]+'",                    // strip out white space
 679                          "'&(quot|#34|#034|#x22);'i",        // replace html entities
 680                          "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
 681                          "'&(lt|#60|#060|#x3c);'i",
 682                          "'&(gt|#62|#062|#x3e);'i",
 683                          "'&(nbsp|#160|#xa0);'i",
 684                          "'&(iexcl|#161);'i",
 685                          "'&(cent|#162);'i",
 686                          "'&(pound|#163);'i",
 687                          "'&(copy|#169);'i",
 688                          "'&(reg|#174);'i",
 689                          "'&(deg|#176);'i",
 690                          "'&(#39|#039|#x27);'",
 691                          "'&(euro|#8364);'i",                // europe
 692                          "'&a(uml|UML);'",                    // german
 693                          "'&o(uml|UML);'",
 694                          "'&u(uml|UML);'",
 695                          "'&A(uml|UML);'",
 696                          "'&O(uml|UML);'",
 697                          "'&U(uml|UML);'",
 698                          "'&szlig;'i",
 699                          );
 700          $replace = array(    "",
 701                              "",
 702                              "\\1",
 703                              "\"",
 704                              "&",
 705                              "<",
 706                              ">",
 707                              " ",
 708                              chr(161),
 709                              chr(162),
 710                              chr(163),
 711                              chr(169),
 712                              chr(174),
 713                              chr(176),
 714                              chr(39),
 715                              chr(128),
 716                              "ä",
 717                              "ö",
 718                              "ü",
 719                              "Ä",
 720                              "Ö",
 721                              "Ü",
 722                              "ß",
 723                          );
 724  
 725          $text = preg_replace($search,$replace,$document);
 726  
 727          return $text;
 728      }
 729  
 730  /*======================================================================*\
 731      Function:    _expandlinks
 732      Purpose:    expand each link into a fully qualified URL
 733      Input:        $links            the links to qualify
 734                  $URI            the full URI to get the base from
 735      Output:        $expandedLinks    the expanded links
 736  \*======================================================================*/
 737  
 738  	function _expandlinks($links,$URI)
 739      {
 740  
 741          preg_match("/^[^\?]+/",$URI,$match);
 742  
 743          $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 744          $match = preg_replace("|/$|","",$match);
 745          $match_part = parse_url($match);
 746          $match_root =
 747          $match_part["scheme"]."://".$match_part["host"];
 748  
 749          $search = array(     "|^http://".preg_quote($this->host)."|i",
 750                              "|^(\/)|i",
 751                              "|^(?!http://)(?!mailto:)|i",
 752                              "|/\./|",
 753                              "|/[^\/]+/\.\./|"
 754                          );
 755  
 756          $replace = array(    "",
 757                              $match_root."/",
 758                              $match."/",
 759                              "/",
 760                              "/"
 761                          );
 762  
 763          $expandedLinks = preg_replace($search,$replace,$links);
 764  
 765          return $expandedLinks;
 766      }
 767  
 768  /*======================================================================*\
 769      Function:    _httprequest
 770      Purpose:    go get the http data from the server
 771      Input:        $url        the url to fetch
 772                  $fp            the current open file pointer
 773                  $URI        the full URI
 774                  $body        body contents to send if any (POST)
 775      Output:
 776  \*======================================================================*/
 777  
 778  	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 779      {
 780          $cookie_headers = '';
 781          if($this->passcookies && $this->_redirectaddr)
 782              $this->setcookies();
 783  
 784          $URI_PARTS = parse_url($URI);
 785          if(empty($url))
 786              $url = "/";
 787          $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
 788          if(!empty($this->agent))
 789              $headers .= "User-Agent: ".$this->agent."\r\n";
 790          if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 791              $headers .= "Host: ".$this->host;
 792              if(!empty($this->port) && $this->port != 80)
 793                  $headers .= ":".$this->port;
 794              $headers .= "\r\n";
 795          }
 796          if(!empty($this->accept))
 797              $headers .= "Accept: ".$this->accept."\r\n";
 798          if(!empty($this->referer))
 799              $headers .= "Referer: ".$this->referer."\r\n";
 800          if(!empty($this->cookies))
 801          {
 802              if(!is_array($this->cookies))
 803                  $this->cookies = (array)$this->cookies;
 804  
 805              reset($this->cookies);
 806              if ( count($this->cookies) > 0 ) {
 807                  $cookie_headers .= 'Cookie: ';
 808                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 809                  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 810                  }
 811                  $headers .= substr($cookie_headers,0,-2) . "\r\n";
 812              }
 813          }
 814          if(!empty($this->rawheaders))
 815          {
 816              if(!is_array($this->rawheaders))
 817                  $this->rawheaders = (array)$this->rawheaders;
 818              while(list($headerKey,$headerVal) = each($this->rawheaders))
 819                  $headers .= $headerKey.": ".$headerVal."\r\n";
 820          }
 821          if(!empty($content_type)) {
 822              $headers .= "Content-type: $content_type";
 823              if ($content_type == "multipart/form-data")
 824                  $headers .= "; boundary=".$this->_mime_boundary;
 825              $headers .= "\r\n";
 826          }
 827          if(!empty($body))
 828              $headers .= "Content-length: ".strlen($body)."\r\n";
 829          if(!empty($this->user) || !empty($this->pass))
 830              $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 831  
 832          //add proxy auth headers
 833          if(!empty($this->proxy_user))
 834              $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 835  
 836  
 837          $headers .= "\r\n";
 838  
 839          // set the read timeout if needed
 840          if ($this->read_timeout > 0)
 841              socket_set_timeout($fp, $this->read_timeout);
 842          $this->timed_out = false;
 843  
 844          fwrite($fp,$headers.$body,strlen($headers.$body));
 845  
 846          $this->_redirectaddr = false;
 847          unset($this->headers);
 848  
 849          while($currentHeader = fgets($fp,$this->_maxlinelen))
 850          {
 851              if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 852              {
 853                  $this->status=-100;
 854                  return false;
 855              }
 856  
 857              if($currentHeader == "\r\n")
 858                  break;
 859  
 860              // if a header begins with Location: or URI:, set the redirect
 861              if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 862              {
 863                  // get URL portion of the redirect
 864                  preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 865                  // look for :// in the Location header to see if hostname is included
 866                  if(!preg_match("|\:\/\/|",$matches[2]))
 867                  {
 868                      // no host in the path, so prepend
 869                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 870                      // eliminate double slash
 871                      if(!preg_match("|^/|",$matches[2]))
 872                              $this->_redirectaddr .= "/".$matches[2];
 873                      else
 874                              $this->_redirectaddr .= $matches[2];
 875                  }
 876                  else
 877                      $this->_redirectaddr = $matches[2];
 878              }
 879  
 880              if(preg_match("|^HTTP/|",$currentHeader))
 881              {
 882                  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 883                  {
 884                      $this->status= $status[1];
 885                  }
 886                  $this->response_code = $currentHeader;
 887              }
 888  
 889              $this->headers[] = $currentHeader;
 890          }
 891  
 892          $results = '';
 893          do {
 894              $_data = fread($fp, $this->maxlength);
 895              if (strlen($_data) == 0) {
 896                  break;
 897              }
 898              $results .= $_data;
 899          } while(true);
 900  
 901          if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 902          {
 903              $this->status=-100;
 904              return false;
 905          }
 906  
 907          // check if there is a a redirect meta tag
 908  
 909          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 910  
 911          {
 912              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
 913          }
 914  
 915          // have we hit our frame depth and is there frame src to fetch?
 916          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 917          {
 918              $this->results[] = $results;
 919              for($x=0; $x<count($match[1]); $x++)
 920                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 921          }
 922          // have we already fetched framed content?
 923          elseif(is_array($this->results))
 924              $this->results[] = $results;
 925          // no framed content
 926          else
 927              $this->results = $results;
 928  
 929          return true;
 930      }
 931  
 932  /*======================================================================*\
 933      Function:    _httpsrequest
 934      Purpose:    go get the https data from the server using curl
 935      Input:        $url        the url to fetch
 936                  $URI        the full URI
 937                  $body        body contents to send if any (POST)
 938      Output:
 939  \*======================================================================*/
 940  
 941  	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 942      {
 943          if($this->passcookies && $this->_redirectaddr)
 944              $this->setcookies();
 945  
 946          $headers = array();
 947  
 948          $URI_PARTS = parse_url($URI);
 949          if(empty($url))
 950              $url = "/";
 951          // GET ... header not needed for curl
 952          //$headers[] = $http_method." ".$url." ".$this->_httpversion;
 953          if(!empty($this->agent))
 954              $headers[] = "User-Agent: ".$this->agent;
 955          if(!empty($this->host))
 956              if(!empty($this->port))
 957                  $headers[] = "Host: ".$this->host.":".$this->port;
 958              else
 959                  $headers[] = "Host: ".$this->host;
 960          if(!empty($this->accept))
 961              $headers[] = "Accept: ".$this->accept;
 962          if(!empty($this->referer))
 963              $headers[] = "Referer: ".$this->referer;
 964          if(!empty($this->cookies))
 965          {
 966              if(!is_array($this->cookies))
 967                  $this->cookies = (array)$this->cookies;
 968  
 969              reset($this->cookies);
 970              if ( count($this->cookies) > 0 ) {
 971                  $cookie_str = 'Cookie: ';
 972                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 973                  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 974                  }
 975                  $headers[] = substr($cookie_str,0,-2);
 976              }
 977          }
 978          if(!empty($this->rawheaders))
 979          {
 980              if(!is_array($this->rawheaders))
 981                  $this->rawheaders = (array)$this->rawheaders;
 982              while(list($headerKey,$headerVal) = each($this->rawheaders))
 983                  $headers[] = $headerKey.": ".$headerVal;
 984          }
 985          if(!empty($content_type)) {
 986              if ($content_type == "multipart/form-data")
 987                  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 988              else
 989                  $headers[] = "Content-type: $content_type";
 990          }
 991          if(!empty($body))
 992              $headers[] = "Content-length: ".strlen($body);
 993          if(!empty($this->user) || !empty($this->pass))
 994              $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
 995  
 996          for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
 997              $safer_header = strtr( $headers[$curr_header], "\"", " " );
 998              $cmdline_params .= " -H \"".$safer_header."\"";
 999          }
1000  
1001          if(!empty($body))
1002              $cmdline_params .= " -d \"$body\"";
1003  
1004          if($this->read_timeout > 0)
1005              $cmdline_params .= " -m ".$this->read_timeout;
1006  
1007          $headerfile = tempnam($temp_dir, "sno");
1008  
1009          exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010  
1011          if($return)
1012          {
1013              $this->error = "Error: cURL could not retrieve the document, error $return.";
1014              return false;
1015          }
1016  
1017  
1018          $results = implode("\r\n",$results);
1019  
1020          $result_headers = file("$headerfile");
1021  
1022          $this->_redirectaddr = false;
1023          unset($this->headers);
1024  
1025          for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026          {
1027  
1028              // if a header begins with Location: or URI:, set the redirect
1029              if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030              {
1031                  // get URL portion of the redirect
1032                  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033                  // look for :// in the Location header to see if hostname is included
1034                  if(!preg_match("|\:\/\/|",$matches[2]))
1035                  {
1036                      // no host in the path, so prepend
1037                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038                      // eliminate double slash
1039                      if(!preg_match("|^/|",$matches[2]))
1040                              $this->_redirectaddr .= "/".$matches[2];
1041                      else
1042                              $this->_redirectaddr .= $matches[2];
1043                  }
1044                  else
1045                      $this->_redirectaddr = $matches[2];
1046              }
1047  
1048              if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049                  $this->response_code = $result_headers[$currentHeader];
1050  
1051              $this->headers[] = $result_headers[$currentHeader];
1052          }
1053  
1054          // check if there is a a redirect meta tag
1055  
1056          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057          {
1058              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1059          }
1060  
1061          // have we hit our frame depth and is there frame src to fetch?
1062          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063          {
1064              $this->results[] = $results;
1065              for($x=0; $x<count($match[1]); $x++)
1066                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067          }
1068          // have we already fetched framed content?
1069          elseif(is_array($this->results))
1070              $this->results[] = $results;
1071          // no framed content
1072          else
1073              $this->results = $results;
1074  
1075          unlink("$headerfile");
1076  
1077          return true;
1078      }
1079  
1080  /*======================================================================*\
1081      Function:    setcookies()
1082      Purpose:    set cookies for a redirection
1083  \*======================================================================*/
1084  
1085  	function setcookies()
1086      {
1087          for($x=0; $x<count($this->headers); $x++)
1088          {
1089          if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090              $this->cookies[$match[1]] = urldecode($match[2]);
1091          }
1092      }
1093  
1094  
1095  /*======================================================================*\
1096      Function:    _check_timeout
1097      Purpose:    checks whether timeout has occurred
1098      Input:        $fp    file pointer
1099  \*======================================================================*/
1100  
1101  	function _check_timeout($fp)
1102      {
1103          if ($this->read_timeout > 0) {
1104              $fp_status = socket_get_status($fp);
1105              if ($fp_status["timed_out"]) {
1106                  $this->timed_out = true;
1107                  return true;
1108              }
1109          }
1110          return false;
1111      }
1112  
1113  /*======================================================================*\
1114      Function:    _connect
1115      Purpose:    make a socket connection
1116      Input:        $fp    file pointer
1117  \*======================================================================*/
1118  
1119  	function _connect(&$fp)
1120      {
1121          if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122              {
1123                  $this->_isproxy = true;
1124  
1125                  $host = $this->proxy_host;
1126                  $port = $this->proxy_port;
1127              }
1128          else
1129          {
1130              $host = $this->host;
1131              $port = $this->port;
1132          }
1133  
1134          $this->status = 0;
1135  
1136          if($fp = fsockopen(
1137                      $host,
1138                      $port,
1139                      $errno,
1140                      $errstr,
1141                      $this->_fp_timeout
1142                      ))
1143          {
1144              // socket connection succeeded
1145  
1146              return true;
1147          }
1148          else
1149          {
1150              // socket connection failed
1151              $this->status = $errno;
1152              switch($errno)
1153              {
1154                  case -3:
1155                      $this->error="socket creation failed (-3)";
1156                  case -4:
1157                      $this->error="dns lookup failure (-4)";
1158                  case -5:
1159                      $this->error="connection refused or timed out (-5)";
1160                  default:
1161                      $this->error="connection failed (".$errno.")";
1162              }
1163              return false;
1164          }
1165      }
1166  /*======================================================================*\
1167      Function:    _disconnect
1168      Purpose:    disconnect a socket connection
1169      Input:        $fp    file pointer
1170  \*======================================================================*/
1171  
1172  	function _disconnect($fp)
1173      {
1174          return(fclose($fp));
1175      }
1176  
1177  
1178  /*======================================================================*\
1179      Function:    _prepare_post_body
1180      Purpose:    Prepare post body according to encoding type
1181      Input:        $formvars  - form variables
1182                  $formfiles - form upload files
1183      Output:        post body
1184  \*======================================================================*/
1185  
1186  	function _prepare_post_body($formvars, $formfiles)
1187      {
1188          settype($formvars, "array");
1189          settype($formfiles, "array");
1190          $postdata = '';
1191  
1192          if (count($formvars) == 0 && count($formfiles) == 0)
1193              return;
1194  
1195          switch ($this->_submit_type) {
1196              case "application/x-www-form-urlencoded":
1197                  reset($formvars);
1198                  while(list($key,$val) = each($formvars)) {
1199                      if (is_array($val) || is_object($val)) {
1200                          while (list($cur_key, $cur_val) = each($val)) {
1201                              $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202                          }
1203                      } else
1204                          $postdata .= urlencode($key)."=".urlencode($val)."&";
1205                  }
1206                  break;
1207  
1208              case "multipart/form-data":
1209                  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210  
1211                  reset($formvars);
1212                  while(list($key,$val) = each($formvars)) {
1213                      if (is_array($val) || is_object($val)) {
1214                          while (list($cur_key, $cur_val) = each($val)) {
1215                              $postdata .= "--".$this->_mime_boundary."\r\n";
1216                              $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217                              $postdata .= "$cur_val\r\n";
1218                          }
1219                      } else {
1220                          $postdata .= "--".$this->_mime_boundary."\r\n";
1221                          $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222                          $postdata .= "$val\r\n";
1223                      }
1224                  }
1225  
1226                  reset($formfiles);
1227                  while (list($field_name, $file_names) = each($formfiles)) {
1228                      settype($file_names, "array");
1229                      while (list(, $file_name) = each($file_names)) {
1230                          if (!is_readable($file_name)) continue;
1231  
1232                          $fp = fopen($file_name, "r");
1233                          $file_content = fread($fp, filesize($file_name));
1234                          fclose($fp);
1235                          $base_name = basename($file_name);
1236  
1237                          $postdata .= "--".$this->_mime_boundary."\r\n";
1238                          $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239                          $postdata .= "$file_content\r\n";
1240                      }
1241                  }
1242                  $postdata .= "--".$this->_mime_boundary."--\r\n";
1243                  break;
1244          }
1245  
1246          return $postdata;
1247      }
1248  }
1249  endif;
1250  ?>


Generated: Fri Jan 8 00:19:48 2010 Cross-referenced by PHPXref 0.7