用Python/PHP编写验证码识别程序

1.了解验证码媒体格式,如png,gif,jpg,bmp,swf
2.模式识别

<?php
error_reporting(E_ALL);

class PNG_READER {
    var $pngdata;
    var $offset;

    var $width;
    var $height;
    var $bitdepth;     //bit depth
    var $rowbytes;
    var $irowbytes;
    var $colortype;

    var $bmpdata;
    var $code = '';

    function PNG_READER(&$data)
    {
        $this->pngdata = $data;
        $this->offset  = 0;
    }

    function _read_raw($len)
    {
        $start = $this->offset;
        $this->offset += $len;
        return substr($this->pngdata, $start, $len);
    }

    //return tag name and data
    function _read_chunk(&$tag)
    {
        $chunk = unpack('Nlength', $this->_read_raw(4));
        $data = $this->_read_raw($chunk['length'] + 4);
        $chunk  = unpack('Ncrc', $this->_read_raw(4));
        if ($chunk['crc'] !== crc32($data)) {
            exit("crc error\n");
        }
        $tag = substr($data, 0, 4);
        return substr($data, 4);
    }

    function read()
    {
        $signature = $this->_read_raw(8);
        $sign_str  = pack('C8', 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A);
        if ($signature != $sign_str) {
            exit("invalid signature\n");
        }
        $tag = '';
        do {
            $data = $this->_read_chunk($tag);
            switch($tag) {
            case 'IHDR':
                $fileheader = unpack('Nwidth/Nheight/Cdepth/Ctype/Ccompression/Cfilter/Cinterlace', $data);
                $this->bitdepth = $fileheader['depth'];   //2 == 2 palette index ,refrer to PLTE chunk
                                                          //the sample depth is always 8 bits.
                $this->colortype = $fileheader['type'];   //3 == indexed-color
                $this->width = $fileheader['width'];
                $this->height = $fileheader['height'];
                $this->rowbytes = (($fileheader['depth'] * $fileheader['width']) + 7) >> 3;
                //$this->row_bytes = ($fileheader['depth'] * $fileheader['width']) >> 3;
                break;
            case 'IDAT':
                $this->irowbytes = $this->rowbytes + 1;
                $size = ($this->irowbytes) * $this->height;
                $this->bmpdata = gzuncompress($data);
                break;
            case 'PLET':
                break;
            default:
                break;
            }
        } while('IEND' !== $tag);
        unset($data);
    }

    function _convert_hex_binstr($hex)
    {
        $bin['0'] = '    ';
        $bin['1'] = '   1';
        $bin['2'] = '  1 ';
        $bin['3'] = '  11';
        $bin['4'] = ' 1  ';
        $bin['5'] = ' 1 1';
        $bin['6'] = ' 11 ';
        $bin['7'] = ' 111';
        $bin['8'] = '1   ';
        $bin['9'] = '1  1';
        $bin['a'] = '1 1 ';
        $bin['b'] = '1 11';
        $bin['c'] = '11  ';
        $bin['d'] = '11 1';
        $bin['e'] = '111 ';
        $bin['f'] = '1111';
        return $bin[$hex];
    }

    function analyze_row()
    {
        $binstr = '';
        $hexstr = bin2hex($this->bmpdata);
        unset($this->bmpdata);
        $hexlen = $this->irowbytes * 2;

        for ($i = 0; $i < $this->height; $i ++) {           
            //echo bin2hex(substr($this->bmpdata, $i * $this->irowbytes, $this->irowbytes));
            for ($j = 0; $j < $hexlen; $j ++) {
                $pos = $hexlen * $i + $j;
                $tmpstr = $this->_convert_hex_binstr($hexstr{$pos});
                $binstr .= $tmpstr;
                //echo $tmpstr;
            }
            //echo "\n";
        }
        $this->_reconstruct($binstr, $hexlen*4,  $this->height-1);
    }

    function _reconstruct($binstr, $column, $row)
    {
        $base = 29;
        for ($j = 0; $j < 4; $j ++) {
            $sign[$j] = '';
            for ($i = 9; $i < $row; $i ++) {
                $sign[$j] .= $binstr{$base + $i * $column};
            }
            $base += 14;

            $rcode = $this->_convert_signature_to_num($sign[$j]);
            if (0 == $rcode) {
                $dot = 11 * $column + 29 + 14 * $j;
                if ($binstr{$dot+8} == 0) {
                    $rcode = 6;
                }
            }
            $this->code .= $rcode;
        }       
    }

    function _convert_signature_to_num($str)
    {
        $fingermark[' 111111 '] = 0; 
        $fingermark['  1    1'] = 1;
        $fingermark[' 11   11'] = 2;
        $fingermark[' 1    1 '] = 3;
        $fingermark['    11  '] = 4;
        $fingermark['1111  1 '] = 5;
        $fingermark['1       '] = 7;
        $fingermark[' 11 111 '] = 8;
        $fingermark[' 111  1 '] = 9;

        return $fingermark[$str];
    }


}




$filedata = file_get_contents('http://mydns.xinnet.com/pagemydns/authimg.php');
$png = & new PNG_READER($filedata);
$png->read();
$png->analyze_row();

print_r($png->code);
echo "\n";

?>

上面这段PHP代码,以xinnet的mydns为例,进行识别。由于这个验证码太有规律了,所以程序很简单,主要部分是进行png格式的分析

作者:Gavin.Shaw   更新日期:2006-09-30
来源:http://www.upsdn.net/   浏览次数:

相关文章

相关评论   发表评论