发信人: jackyz()
整理人: dalasthunder(2002-07-25 06:17:19), 站内信件
|
// utf8.php
<?
# Unicode class... be sure you use charset=utf-8 in your html header i f you use this.
#
# You are free to use this code, change whatever and redistribute, jus t leave info about me and
# don't forget to drop me a line if you think this code is useful ;)
#
# Romans (2000)
# [email protected]
class utf{
var $map;
# loaded charset mappings. You can obtain them at ftp://ftp.unicode.or g/Public/MAPPINGS/
// 俺在 ftp://ftp.unicode.org/Public/MAPPINGS/ 中
// 发现了 GB2312 的 charset map file.
// ftp://ftp.unicode.org/Public/MAPPINGS/GB/GB2312.TXT
// 大家识做啦. :)
// 可是,175K!这个转换表也忒"伟大"了点,(谁叫咱中国人的字比别人多呢)
// 做一下转换,总是要把这个大家伙倒来倒去的,效率方面,也是个大问题.
// anyway,也算是一种能"逮到耗子"的解决方法吧.
//
// jackyz 摇摇头,比俺的破东东可"标准"多了.呵呵.
function loadmap($filename,$alias){
# Load table with mapping into array for latter use. Pass alias to c p2utf function..
$f=fopen($filename,'r') or die();
while(!feof($f)){
if($s=chop(fgets($f,1023))){
list($x,$a,$b)=split('0x',$s);
$a=hexdec(substr($a,0,2));
$b=hexdec(substr($b,0,4));
if($a&&$b)$this->map[$alias][$a]=$b;
}
}
}
function cp2utf($str,$alias=''){
# Translate string ($str) to UTF-8 from given charset ($xcp)
# if charset is not present, ISO-8859-1 will be used.
if($alias==''){
for($x=0;$x<strlen($str);$x++){
$xstr.=$this->code2utf(ord(substr($str,$x,1)));
}
return $xstr;
}
for($x=0;$x<strlen($str);$x++){
$xstr.=$this->code2utf($this->map[$alias][ord(substr($str,$x,1))]);
}
return $xstr;
}
function code2utf($num){
# Translate numeric code of UTF-8 character code to corresponding ch aracter sequence. Refer to www.unicode.org for info.
if($num<128)return chr($num); // ASCII
if($num<1024)return chr(($num>>6)+192).chr(($num&63)+128);
if($num<32768)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr (($num&63)+128);
if($num<2097152)return chr($num>>18+224).chr((($num>>12)&63)+128).ch r(($num>>6)&63+128).chr($num&63+128);
return '';
}
}
# EOF
?>
// cp1251.tab 也就是罗刹国老毛子文字的 Unicode 转换表
#
# Name: cp1251 to Unicode table
# Unicode version: 2.0
# Table version: 2.01
# Table format: Format A
# Date: 04/15/98
#
# Contact: [email protected]
#
# General notes: none
#
# Format: Three tab-separated columns
# Column #1 is the cp1251 code (in hex)
# Column #2 is the Unicode (in hex as 0xXXXX)
# Column #3 is the Unicode name (follows a comment sign, '#')
#
# The entries are in cp1251 order
#
0x00 0x0000 #NULL
0x01 0x0001 #START OF HEADING
0x02 0x0002 #START OF TEXT
# ......
# 反正不认识,该省的也就省了吧.哈哈.
# ......
0xFF 0x044F #CYRILLIC SMALL LETTER YA
// test.php
<html><head><meta http-equiv="Content-Type" content="text/html; charse t=utf-8"></head><body>
<?
# utf class example
require'utf8.php';
$utf=new utf;
$utf->loadmap('cp1251.tab','cp');
# The next line may be corrupted... :(
echo $utf->cp2utf("Russian(CP1251): Ophber, lhp!!\n",'cp');
?>
</body>
-- ※ 来源:.月光软件站 http://www.moon-soft.com.[FROM: 61.141.205.28]
|
|