发信人: jackyz() 
整理人: dalasthunder(2002-07-25 06:17:19), 站内信件
 | 
 
 
// utf8.php
 <?
 # Unicode class... be sure you use charset=utf-8 in your html header i f you use this.
 #
 # You are free to use this code, change whatever and redistribute, jus t leave info about me and
 # don't forget to drop me a line if you think this code is useful ;)
 #
 # Romans (2000)
 # [email protected]
 
 class utf{
  var $map; 
 # loaded charset mappings. You can obtain them at ftp://ftp.unicode.or g/Public/MAPPINGS/
 
 // 俺在 ftp://ftp.unicode.org/Public/MAPPINGS/ 中
 // 发现了 GB2312 的 charset map file. 
 // ftp://ftp.unicode.org/Public/MAPPINGS/GB/GB2312.TXT
 // 大家识做啦. :)
 // 可是,175K!这个转换表也忒"伟大"了点,(谁叫咱中国人的字比别人多呢)
 // 做一下转换,总是要把这个大家伙倒来倒去的,效率方面,也是个大问题.
 // anyway,也算是一种能"逮到耗子"的解决方法吧.
 // 
 // jackyz 摇摇头,比俺的破东东可"标准"多了.呵呵.
 
  function loadmap($filename,$alias){
   # Load table with mapping into array for latter use. Pass alias to c p2utf function..
   $f=fopen($filename,'r') or die();
   while(!feof($f)){
    if($s=chop(fgets($f,1023))){
     list($x,$a,$b)=split('0x',$s);
     $a=hexdec(substr($a,0,2));
     $b=hexdec(substr($b,0,4));
     if($a&&$b)$this->map[$alias][$a]=$b;
    }
   }
  }
 
  function cp2utf($str,$alias=''){
   # Translate string ($str) to UTF-8 from given charset ($xcp)
   #  if charset is not present, ISO-8859-1 will be used.
 
   if($alias==''){
    for($x=0;$x<strlen($str);$x++){
     $xstr.=$this->code2utf(ord(substr($str,$x,1)));
    }
    return $xstr;
   }
   for($x=0;$x<strlen($str);$x++){
    $xstr.=$this->code2utf($this->map[$alias][ord(substr($str,$x,1))]); 
   }
   return $xstr;
  }
 
  function code2utf($num){
   # Translate numeric code of UTF-8 character code to corresponding ch aracter sequence. Refer to www.unicode.org for info.
 
   if($num<128)return chr($num); // ASCII
   if($num<1024)return chr(($num>>6)+192).chr(($num&63)+128);
   if($num<32768)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr (($num&63)+128);
   if($num<2097152)return chr($num>>18+224).chr((($num>>12)&63)+128).ch r(($num>>6)&63+128).chr($num&63+128);
   return '';
  }
 }
 # EOF
 ?>
 
 // cp1251.tab 也就是罗刹国老毛子文字的 Unicode 转换表
 #
 #    Name:     cp1251 to Unicode table
 #    Unicode version: 2.0
 #    Table version: 2.01
 #    Table format:  Format A
 #    Date:          04/15/98
 #
 #    Contact:       [email protected]
 #
 #    General notes: none
 #
 #    Format: Three tab-separated columns
 #        Column #1 is the cp1251 code (in hex)
 #        Column #2 is the Unicode (in hex as 0xXXXX)
 #        Column #3 is the Unicode name (follows a comment sign, '#')
 #
 #    The entries are in cp1251 order
 #
 0x00    0x0000  #NULL
 0x01    0x0001  #START OF HEADING
 0x02    0x0002  #START OF TEXT
 # ......
 # 反正不认识,该省的也就省了吧.哈哈.
 # ......
 0xFF    0x044F  #CYRILLIC SMALL LETTER YA
 
 // test.php
 <html><head><meta http-equiv="Content-Type" content="text/html; charse t=utf-8"></head><body>
 <?
  # utf class example
 
 require'utf8.php';
 $utf=new utf;
 $utf->loadmap('cp1251.tab','cp');
 # The next line may be corrupted... :(
 echo $utf->cp2utf("Russian(CP1251): Ophber, lhp!!\n",'cp');
 ?>
 </body>
  -- ※ 来源:.月光软件站 http://www.moon-soft.com.[FROM: 61.141.205.28]
  | 
 
 
 |