Wednesday, August 31, 2011

php unicode string

Have you ever wanted to express unicode code points in a php string

$string = "Hello World \u4f60\u597d\u4e16\u754c";
(which is...)
$string = "Hello World 你好世界";
Try this:
$string = ustring("Hello World \u4f60\u597d\u4e16\u754c");
function ustring($string)
{
return preg_replace_callback("/\\\\[Uu]([0-9A-Fa-f]{4})/",'matcheduchar', $str)."\n";
}
function matcheduchar($matches)
{
$num = hexdec($matches[1]);
if($num<=0x7F) return chr($num); if($num<=0x7FF) return chr(($num>>6)+192).chr(($num&63)+128);
if(0xd800<=$num && $num<=0xdfff) return '';//invalid block of utf8 if($num<=0xFFFF) return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<=0x10FFFF) return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128).chr(($num&63)+128);
return '';
}