精华区 [关闭][返回]

当前位置:网易精华区>>讨论区精华>>编程开发>>● PHP>>功能代码>>罗刹国老毛子的 UTF-8 转换方案.

主题:罗刹国老毛子的 UTF-8 转换方案.
发信人: jackyz()
整理人: dalasthunder(2002-07-25 06:17:19), 站内信件
// utf8.php
<?
# Unicode class... be sure you use charset=utf-8 in your html header i
f you use this.
#
# You are free to use this code, change whatever and redistribute, jus
t leave info about me and
# don't forget to drop me a line if you think this code is useful ;)
#
# Romans (2000)
# [email protected]

class utf{
var $map;
# loaded charset mappings. You can obtain them at ftp://ftp.unicode.or
g/Public/MAPPINGS/

// 俺在 ftp://ftp.unicode.org/Public/MAPPINGS/ 中
// 发现了 GB2312 的 charset map file.
// ftp://ftp.unicode.org/Public/MAPPINGS/GB/GB2312.TXT
// 大家识做啦. :)
// 可是,175K!这个转换表也忒"伟大"了点,(谁叫咱中国人的字比别人多呢)
// 做一下转换,总是要把这个大家伙倒来倒去的,效率方面,也是个大问题.
// anyway,也算是一种能"逮到耗子"的解决方法吧.
//
// jackyz 摇摇头,比俺的破东东可"标准"多了.呵呵.

function loadmap($filename,$alias){
# Load table with mapping into array for latter use. Pass alias to c
p2utf function..
$f=fopen($filename,'r') or die();
while(!feof($f)){
if($s=chop(fgets($f,1023))){
list($x,$a,$b)=split('0x',$s);
$a=hexdec(substr($a,0,2));
$b=hexdec(substr($b,0,4));
if($a&&$b)$this->map[$alias][$a]=$b;
   }
  }
 }

 function cp2utf($str,$alias=''){
  # Translate string ($str) to UTF-8 from given charset ($xcp)
  #  if charset is not present, ISO-8859-1 will be used.

  if($alias==''){
   for($x=0;$x<strlen($str);$x++){
$xstr.=$this->code2utf(ord(substr($str,$x,1)));
   }
   return $xstr;
  }
  for($x=0;$x<strlen($str);$x++){
$xstr.=$this->code2utf($this->map[$alias][ord(substr($str,$x,1))]);

  }
  return $xstr;
 }

 function code2utf($num){
  # Translate numeric code of UTF-8 character code to corresponding ch
aracter sequence. Refer to www.unicode.org for info.

  if($num<128)return chr($num); // ASCII
if($num<1024)return chr(($num>>6)+192).chr(($num&63)+128);
  if($num<32768)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr
(($num&63)+128);
  if($num<2097152)return chr($num>>18+224).chr((($num>>12)&63)+128).ch
r(($num>>6)&63+128).chr($num&63+128);
  return '';
 }
}
# EOF
?>

// cp1251.tab 也就是罗刹国老毛子文字的 Unicode 转换表
#
#    Name:     cp1251 to Unicode table
#    Unicode version: 2.0
#    Table version: 2.01
#    Table format:  Format A
#    Date:          04/15/98
#
#    Contact:       [email protected]
#
#    General notes: none
#
#    Format: Three tab-separated columns
#        Column #1 is the cp1251 code (in hex)
#        Column #2 is the Unicode (in hex as 0xXXXX)
#        Column #3 is the Unicode name (follows a comment sign, '#')
#
#    The entries are in cp1251 order
#
0x00    0x0000  #NULL
0x01    0x0001  #START OF HEADING
0x02    0x0002  #START OF TEXT
# ......
# 反正不认识,该省的也就省了吧.哈哈.
# ......
0xFF    0x044F  #CYRILLIC SMALL LETTER YA

// test.php
<html><head><meta http-equiv="Content-Type" content="text/html; charse
t=utf-8"></head><body>
<?
# utf class example

require'utf8.php';
$utf=new utf;
$utf->loadmap('cp1251.tab','cp');
# The next line may be corrupted... :(
echo $utf->cp2utf("Russian(CP1251): Ophber, lhp!!\n",'cp');
?>
</body>

--
※ 来源:.月光软件站 http://www.moon-soft.com.[FROM: 61.141.205.28]

[关闭][返回]