php汉字转Unicode编码的函数! [php]
post by 豆蔻 / 2009-10-30 15:27 Friday
<?php
/*
* 豆蔻的博文 http://www.dotcoo.com
* $str 原始字符串
* $encoding 原始字符串的编码,默认GBK
* $prefix 编码后的前缀,默认"&#"
* $postfix 编码后的后缀,默认";"
*/
function unicode_encode($str, $encoding=''GBK'', $prefix=''&#'', $postfix='';''){
$str = iconv($encoding, ''UCS-2'', $str);
$arrstr = str_split($str, 2);
$unistr = '''';
for($i=0, $len=count($arrstr); $i<$len; $i++)
{
$dec = hexdec(bin2hex($arrstr[$i]));
$unistr .= $prefix.$dec.$postfix;
}
return $unistr;
}
/*
* 豆蔻的博文 http://www.dotcoo.com
* $str Unicode编码后的字符串
* $encoding 原始字符串的编码,默认GBK
* $prefix 编码字符串的前缀,默认"&#"
* $postfix 编码字符串的后缀,默认";"
*/
function unicode_decode($unistr, $encoding=''GBK'', $prefix=''&#'', $postfix='';''){
$arruni = explode($prefix, $unistr);
$unistr = '''';
for($i = 1, $len = count($arruni); $i<$len; $i++)
{
if(strlen($postfix) > 0) {
$arruni[$i] = substr($arruni[$i], 0, strlen($arruni[$i])-strlen($postfix)); }
$temp = intval($arruni[$i]);
$unistr .= ($temp < 256) ? chr(0).chr($temp) : chr($temp/256).chr($temp%256);
}
return iconv(''UCS-2'', $encoding, $unistr);
}
//GBK字符串测试
$str = ''<b>哈哈</b>'';
echo $str.''<br />'';
$unistr = unicode_encode($str);
echo $unistr.''<br />''; // <b>哈哈</b>
$str2 = unicode_decode($unistr);
echo $str2.''<br />''; //<b>哈哈</b>
//UTF-8字符串测试
$utf8_str = iconv(''GBK'', ''UTF-8'', $str);
echo $utf8_str.''<br />''; // <b>鍝堝搱</b> 注:UTF在GBK下显示的乱码!可切换浏览器的编码测试
$utf8_unistr = unicode_encode($utf8_str, ''UTF-8'');
echo $utf8_unistr.''<br />''; // <b>哈哈</b>
$utf8_str2 = unicode_decode($utf8_unistr, ''UTF-8'');
echo $utf8_str2.''<br />''; // <b>鍝堝搱</b>
//其它后缀、前缀测试
$prefix_unistr = unicode_encode($str, ''GBK'', "\\u", '''');
echo $prefix_unistr.''<br />''; // \u60\u98\u62\u21704\u21704\u60\u47\u98\u62
$profix_unistr2 = unicode_decode($prefix_unistr, ''GBK'', "\\u", '''');
echo $profix_unistr2.''<br />''; //<b>哈哈</b>
?>