用PHP来模拟浏览器行为
用PHP内置的Socket功能来做web client模拟浏览器
<?
/***************************************************************************
Browser Emulating file functions v2.0.1
(c) Kai Blankenhorn
www.bitfolge.de/browseremulator
kaib@bitfolge.de
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
****************************************************************************
Changelog:
v2.0.1
fixed authentication bug
added global debug switch
v2.0 03-09-03
added a wrapper class; this has the advantage that you no longer need
to specify a lot of parameters, just call the methods to set
each option
added option to use a special port number, may be given by setPort or
as part of the URL (e.g. server.com:80)
added getLastResponseHeaders()
v1.5
added Basic HTTP user authorization
minor optimizations
v1.0
initial release
***************************************************************************/
/**
* BrowserEmulator class. Provides methods for opening urls and emulating
* a web browser request.
**/
class BrowserEmulator {
var $headerLines = Array();
var $postData = Array();
var $authUser = "";
var $authPass = "";
var $port;
var $lastResponse = Array();
var $debug = false;
function BrowserEmulator() {
$this->resetHeaderLines();
$this->resetPort();
}
/**
* Adds a single header field to the HTTP request header. The resulting header
* line will have the format
* $name: $value\n
**/
function addHeaderLine($name, $value) {
$this->headerLines[$name] = $value;
}
/**
* Deletes all custom header lines. This will not remove the User-Agent header field,
* which is necessary for correct operation.
**/
function resetHeaderLines() {
$this->headerLines = Array();
/*******************************************************************************/
/************** YOU MAX SET THE USER AGENT STRING HERE *******************/
/* */
/* default is "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", */
/* which means Internet Explorer 6.0 on WinXP */
$this->headerLines["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
/*******************************************************************************/
/**
* Add a post parameter. Post parameters are sent in the body of an HTTP POST request.
**/
function addPostData($name, $value) {
$this->postData[$name] = $value;
}
/**
* Deletes all custom post parameters.
**/
function resetPostData() {
$this->postData = Array();
}
/**
* Sets an auth user and password to use for the request.
* Set both as empty strings to disable authentication.
**/
function setAuth($user, $pass) {
$this->authUser = $user;
$this->authPass = $pass;
}
/**
* Selects a custom port to use for the request.
**/
function setPort($portNumber) {
$this->port = $portNumber;
}
/**
* Resets the port used for request to the HTTP default (80).
**/
function resetPort() {
$this->port = 80;
}
/**
* Make an fopen call to $url with the parameters set by previous member
* method calls. Send all set headers, post data and user authentication data.
* Returns a file handle on success, or false on failure.
**/
function fopen($url) {
$this->lastResponse = Array();
preg_match("~([a-z]*://)?([^:^/]*)(:([0-9]{1,5}))?(/.*)?~i", $url, $matches);
$protocol = $matches[1];
$server = $matches[2];
$port = $matches[4];
$path = $matches[5];
if ($port!="") {
$this->setPort($port);
}
if ($path=="") $path = "/";
$socket = false;
$socket = fsockopen($server, $this->port);
if ($socket) {
$this->headerLines["Host"] = $server;
if ($this->authUser!="" && $this->authPass!="") {
$this->headerLines["Authorization"] = "Basic ".base64_encode($this->authUser.":".$this->authPass);
}
if (count($this->postData)==0) {
$request = "GET $path HTTP/1.0\r\n";
} else {
$request = "POST $path HTTP/1.0\r\n";
}
if ($this->debug) echo $request;
fputs ($socket, $request);
if (count($this->postData)>0) {
$PostStringArray = Array();
foreach ($this->postData AS $key=>$value) {
$PostStringArray[] = "$key=$value";
}
$PostString = join("&", $PostStringArray);
$this->headerLines["Content-Length"] = strlen($PostString);
}
foreach ($this->headerLines AS $key=>$value) {
if ($this->debug) echo "$key: $value\n";
fputs($socket, "$key: $value\r\n");
}
if ($this->debug) echo "\n";
fputs($socket, "\r\n");
if (count($this->postData)>0) {
if ($this->debug) echo "$PostString";
fputs($socket, $PostString."\r\n");
}
}
if ($this->debug) echo "\n";
if ($socket) {
$line = fgets($socket, 1000);
if ($this->debug) echo $line;
$this->lastResponse[] = $line;
$status = substr($line,9,3);
while (trim($line = fgets($socket, 1000)) != ""){
if ($this->debug) echo "$line";
$this->lastResponse[] = $line;
if ($status=="401" AND strpos($line,"WWW-Authenticate: Basic realm=\"")===0) {
fclose($socket);
return FALSE;
}
}
}
return $socket;
}
/**
* Make an file call to $url with the parameters set by previous member
* method calls. Send all set headers, post data and user authentication data.
* Returns the requested file as an array on success, or false on failure.
**/
function file($url) {
$file = Array();
$socket = $this->fopen($url);
if ($socket) {
$file = Array();
while (!feof($socket)) {
$file[] = fgets($socket, 10000);
}
} else {
return FALSE;
}
return $file;
}
function getLastResponseHeaders() {
return $this->lastResponse;
}
}
/*
// example code
$be = new BrowserEmulator();
$be->addHeaderLine("Referer", "http://previous.server.com/");
$be->addHeaderLine("Accept-Encoding", "x-compress; x-zip");
$be->addPostData("Submit", "OK");
$be->addPostData("item", "42");
$be->setAuth("admin", "secretpass");
// also possible:
// $be->setPort(10080);
$file = $be->fopen("http://restricted.server.com:10080/somepage.html");
$response = $be->getLastResponseHeaders();
while ($line = fgets($file, 1024)) {
// do something with the file
}
fclose($file);
*/
?>
==========================
#!/bin/perl
# data files, see samples below
$cookfile = "/tmp/lwpcookies.txt"; # where cookies live
$proxyfile= "proxy.dat"; # your proxylist
$browsers = "browsers"; # broser list
$proxyprog = "/home/sites/home/euro/progs/proxycheck"; # proggy to double check proxy (optional)
$referer1 = "http://www.yourname.com/friends/index.html"; # where you send traffic from
$friendslinks = "friends"; # list of out urls
use Getopt::Long qw(GetOptions);
use URI::URL;
use LWP::MediaTypes qw(media_suffix);
use HTML::Entities ();
use HTTP::Cookies;
use HTML::LinkExtor;
use HTTP::Headers;
use HTTP::Message;
require LWP::UserAgent;
open (DATA, $proxyfile);
@proxies = <DATA>;
push (@temp, splice (@proxies, rand (@proxies), 1))
while @proxies;
@proxies = @temp;
$found = 0;
foreach (@proxies) {
chop;
($ip, $port) = split (/:/, $_);
$i = system ("$proxyprog $ip $port");
$i += 0;
if ($i == 256) {
$found = 1;
last;
}
}
if ($found == 0) {
exit;
}
open (DATA, $browsers);
@browsers = <DATA>;
close (DATA);
chop(@browsers);
$agent = splice (@browsers, rand (@browsers), 1);
$proxy = "http://$ip:$port";
print "Using $proxy, \"$agent\" \n";
open (DATA, $friendslinks);
@urls = <DATA>;
close (DATA);
chop(@urls);
my $ua = new LWP::UserAgent;
$ua->agent($agent);
$ua->requests_redirectable (['GET']);
$ua->cookie_jar(HTTP::Cookies->new(file => $cookfile ,autosave => 1, ignore_discard => 1));
$ua->proxy (['http'], $proxy);
sub GetLinks {
($tag, %attr) = @_;
return if $tag ne 'a';
push(@links, values %attr);
}
$pp = HTML::LinkExtor->new(\&GetLinks);
foreach $url (@urls) {
@links = ();
$req->header ('Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/msword, */*');
$req->header ('Accept-Language: en');
$req->header ('Accept-Encoding: gzip, deflate');
$req->header ('Connection: Keep-Alive');
$req->referer(sprintf ("%s", $referer1);
$res = $ua->request($req, sub {$pp->parse($_[0])});
$base = $res->base;
$r = $res->request;
$referer = $r -> uri;
@links = map { $_ = url($_, $base)->abs; } @links;
foreach $url (@links) {
if ($url =~ 'cgi' || $$url =~ '\.pl' || $url =~ "\.php") {
$req = new HTTP::Request GET => $url;
$req->header ('Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/msword, */*');
$req->header ('Accept-Language: en');
$req->header ('Accept-Encoding: gzip, deflate');
$req->header ('Connection: Keep-Alive');
$req->referer(sprintf ("%s", $referer));
$res = $ua->request($req);
}
}
}
____________________________________________
作者:Kai Blankenhorn 更新日期:2004-12-23
来源:www.bitfolge.de
浏览次数:
相关文章
相关评论 发表评论
- No Comments