用Python/PHP编写验证码识别程序
1.了解验证码媒体格式,如png,gif,jpg,bmp,swf
2.模式识别
<?php
error_reporting(E_ALL);
class PNG_READER {
var $pngdata;
var $offset;
var $width;
var $height;
var $bitdepth; //bit depth
var $rowbytes;
var $irowbytes;
var $colortype;
var $bmpdata;
var $code = '';
function PNG_READER(&$data)
{
$this->pngdata = $data;
$this->offset = 0;
}
function _read_raw($len)
{
$start = $this->offset;
$this->offset += $len;
return substr($this->pngdata, $start, $len);
}
//return tag name and data
function _read_chunk(&$tag)
{
$chunk = unpack('Nlength', $this->_read_raw(4));
$data = $this->_read_raw($chunk['length'] + 4);
$chunk = unpack('Ncrc', $this->_read_raw(4));
if ($chunk['crc'] !== crc32($data)) {
exit("crc error\n");
}
$tag = substr($data, 0, 4);
return substr($data, 4);
}
function read()
{
$signature = $this->_read_raw(8);
$sign_str = pack('C8', 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A);
if ($signature != $sign_str) {
exit("invalid signature\n");
}
$tag = '';
do {
$data = $this->_read_chunk($tag);
switch($tag) {
case 'IHDR':
$fileheader = unpack('Nwidth/Nheight/Cdepth/Ctype/Ccompression/Cfilter/Cinterlace', $data);
$this->bitdepth = $fileheader['depth']; //2 == 2 palette index ,refrer to PLTE chunk
//the sample depth is always 8 bits.
$this->colortype = $fileheader['type']; //3 == indexed-color
$this->width = $fileheader['width'];
$this->height = $fileheader['height'];
$this->rowbytes = (($fileheader['depth'] * $fileheader['width']) + 7) >> 3;
//$this->row_bytes = ($fileheader['depth'] * $fileheader['width']) >> 3;
break;
case 'IDAT':
$this->irowbytes = $this->rowbytes + 1;
$size = ($this->irowbytes) * $this->height;
$this->bmpdata = gzuncompress($data);
break;
case 'PLET':
break;
default:
break;
}
} while('IEND' !== $tag);
unset($data);
}
function _convert_hex_binstr($hex)
{
$bin['0'] = ' ';
$bin['1'] = ' 1';
$bin['2'] = ' 1 ';
$bin['3'] = ' 11';
$bin['4'] = ' 1 ';
$bin['5'] = ' 1 1';
$bin['6'] = ' 11 ';
$bin['7'] = ' 111';
$bin['8'] = '1 ';
$bin['9'] = '1 1';
$bin['a'] = '1 1 ';
$bin['b'] = '1 11';
$bin['c'] = '11 ';
$bin['d'] = '11 1';
$bin['e'] = '111 ';
$bin['f'] = '1111';
return $bin[$hex];
}
function analyze_row()
{
$binstr = '';
$hexstr = bin2hex($this->bmpdata);
unset($this->bmpdata);
$hexlen = $this->irowbytes * 2;
for ($i = 0; $i < $this->height; $i ++) {
//echo bin2hex(substr($this->bmpdata, $i * $this->irowbytes, $this->irowbytes));
for ($j = 0; $j < $hexlen; $j ++) {
$pos = $hexlen * $i + $j;
$tmpstr = $this->_convert_hex_binstr($hexstr{$pos});
$binstr .= $tmpstr;
//echo $tmpstr;
}
//echo "\n";
}
$this->_reconstruct($binstr, $hexlen*4, $this->height-1);
}
function _reconstruct($binstr, $column, $row)
{
$base = 29;
for ($j = 0; $j < 4; $j ++) {
$sign[$j] = '';
for ($i = 9; $i < $row; $i ++) {
$sign[$j] .= $binstr{$base + $i * $column};
}
$base += 14;
$rcode = $this->_convert_signature_to_num($sign[$j]);
if (0 == $rcode) {
$dot = 11 * $column + 29 + 14 * $j;
if ($binstr{$dot+8} == 0) {
$rcode = 6;
}
}
$this->code .= $rcode;
}
}
function _convert_signature_to_num($str)
{
$fingermark[' 111111 '] = 0;
$fingermark[' 1 1'] = 1;
$fingermark[' 11 11'] = 2;
$fingermark[' 1 1 '] = 3;
$fingermark[' 11 '] = 4;
$fingermark['1111 1 '] = 5;
$fingermark['1 '] = 7;
$fingermark[' 11 111 '] = 8;
$fingermark[' 111 1 '] = 9;
return $fingermark[$str];
}
}
$filedata = file_get_contents('http://mydns.xinnet.com/pagemydns/authimg.php');
$png = & new PNG_READER($filedata);
$png->read();
$png->analyze_row();
print_r($png->code);
echo "\n";
?>
上面这段PHP代码,以xinnet的mydns为例,进行识别。由于这个验证码太有规律了,所以程序很简单,主要部分是进行png格式的分析
作者:Gavin.Shaw 更新日期:2006-09-30
来源:http://www.upsdn.net/
浏览次数:
相关文章
相关评论 发表评论
- No Comments