php Chinese to Pinyin initial letter problem

There are some problems with the original code, I made some changes, the original address is chenall/chenall/blob/master/php/tools/ZH-cn_TO_pinyin.phplt;?php

/ on github *

A simple PHP implementation method for converting Chinese characters into pinyin initials.

Requirements:? Can only be Chinese characters in the GB2312 code table

Convert the string The corresponding first letter of Pinyin is capitalized.

Usage:

echo?zh2py::conv('Chinese?中华国国'); //Chinese?ZHRMGHG

or

$py?=?new?zh2py;

echo?$py-gt;conv('Chinese?中国人***国') ;//Chinese?ZHRMGHG

*/

class?zh2py

{

//According to the Chinese character location table

//We can see that areas 16-55 are sorted by Pinyin letters, so we only need to judge the location code of a Chinese character to know its Pinyin initial letter.

//The first part of the location table, sorted by pinyin letters.

//District 16-District 55

/*

'A'= gt; 0xB0A1, ?'B'=gt; 0xB0C5, ?'C'=gt; 0xB2C1, ?'D'=gt; 0xB4EE, ?'E'=gt; 0xB6EA, ?'F'=gt; 0xB7A2,? 'G'=gt;0xB8C1,'H'=gt;0xB9FE,

'J'=gt;0xBBF7,?'K'=gt;0xBFA6,?'L'=gt;0xC0AC,? 'M'=gt; 0xC2E8, ?'N'=gt; 0xC4C3, ?'O'=gt; 0xC5B6, ?'P'=gt; 0xC5BE, 'Q'=gt; 0xC6DA,

'R'=gt; 0xC8BB, ?'S'=gt; 0xC8F6, ?'T'=gt; 0xCBFA, ?'W'=gt; 0xCDDA, ?'X'=gt; 0xCEF4, ?'Y'=gt ;0xD1B9,?'Z'=gt;0xD4D1

*/

private?static?$FirstTable?=?array(

0xB0C5,?0xB2C1, ?0xB4EE, ?0xB6EA, ?0xB7A2, ?0xB8C1, ?0xB9FE, ?0xBBF7, ?0xBFA6, ?0xC0AC, ?0xC2E8,

0xC4C3, ?0xC5B6, ?0xC5BE, ?0xC6DA, ?0xC8BB, ?0xC8F6 ,?0xCBFA,?0xCDDA,?0xCEF4,?0xD1B9,?0xD4D1,0xD7FA

);

private?static?$FirstLetter?=?"ABCDEFGHJKLMNOPQRSTWXYZ";

//The second part of the area table is irregular. The letters below are the first letters of the pinyin of the corresponding words in each area. It was compiled from the Internet and there may be some errors.

//District 56-District 87

private?static?$SecondTable?=?array(

"CJWGNSPGCGNEGYPBTYYZDXYKYGTZJNMJQMBSGZSCYJSYYFPGKBZGYDY

WJKGKLJSWKPJQHYJWRDZLSYMRYPYWWCCKZNKYYG",

"TTNGJEYKKZYTCJNMCYLQLYPYSFQRPZSLWBTGKJFYXJWZLTBNCXJJJJTXDTTSQZYCDXXHGCKBPHFFSSTYBGMXLPBYLLBHLX",

"SMZMYJHSOJNGHDZQYK LGJHSGQZHXQGKXZZWYSCSCJXYEYXADZPMDSSMZJZQJYZCJJFWQJBDZBXGZNZCPWHWXHQKMWFBPBY",

"DTJZZKXHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJYQDCSBBQBEFSJYHWWGZKPYLQBGLDLCDTNMAYDDK SSNGYCSGXLYZAYPN",

"PTSDKDYLHGYMYLCXPYCJNDQJWXQXFYYFJLEJPZRXCCQWQQSBZKYMGPLBMJRQCFLNYMYQMSQYRBCJTHZTQFRXQHXMQJCJLY",

"QGJMSHZKBSWYEMYLTXFSYDXWLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCL",

"QKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNMGYKLDYXZPYLGGSMTCFBAJ JZYLJTYANJGBJPLQGSZYQYAXBKYSECJSZNSLYZH",

"ZXLZCGHPXZHZNYTDSBCJKDLZAYFFYDLEBBGQ YZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCS",

"YDBDLLSCDDNLKJYKJ SYCJLKWHQASDKNHCSGAGHDAASHTCPLCPQYBSZMPJLPCJOQLCDHJJYSPRCHNWJNLHLYYQYYWZPTCZG",

"WWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYDCFCXYHLXCHYZJQSQQAGMNYXPFR KSSBJLYXY ",

"SYGLNSCMHCWWMNZJJLXXHCHSYZSTTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLYXDCCWZOCWKCCSBNHCPDYZNFCYYTYCKX",

"KYBSQKKYTQQXFCMCCHYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQKZPQS QSCFYMMDMGBWHWLGSLLYSDLMLXPTHMJ",

"HWLJZYHZJXKTXJLHXRSWLWZJCBXMHZQXSDZPSGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCL",

"SLDCLRPBHZHXYYFHBMGDMYCN QQWLQHJJCYWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSLJHTZKZJECXJCJNMFBYCSFYWYB",

"JZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNTXHPLQKZCZWALSBCZJXSYZGWK",

"YPSGXFZFCDKHJGXTLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJC ZYDJWFMJKLDDPMJEGXYHYLXHLQYQHKYCW",

"CJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJC CCXFPZNZZLJPLXXYZTZLGDLTCKLYRZZGQTTJHHHJLJAXFGFJZSLCFDQZ",

"LCLGJDJZSNZLLJP " GJQJJPMGWGJJJPKQSB",

"GBMMCJSSCLPQPDXCDYYKYPCJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJYFYZDJCNMWESCYGLBTZZGMSS",

"LLYXYSXXBSJSBBSGGHFJLYPMZJNLYYWDQSHZXTYYWHMCYHYWDBXBTLMSYYYFS " SWTFGGLYPLLJZHGJJGYPZLTCSMCNBTJBQFKDHBYZGKPBBYMTDSSXTBNPDKLEYCJNYCDYKZTDHQH",

" SYZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDXJPLDLPCQDHZYCBZSCZBZMSLJFLKR",

"ZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLGNDJLSLYGK DZPZXJYYZLWCXSZFGWYYDLYHCLJS",

"CMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJYYCNRJMPDJGKLC LJBCTBJDDBBLBLCZQRPYXJCJLZCSHLTOLJNMDDDLNGKATHQH",

"JHYKHEZNMSHRPHQQJCHGMFPRXHJ GDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMM",

"MYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYJDJJZZHQPDS

ZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFFLBCSMDLDG",

"DZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYG",

"CTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZ LCLZXZDMXMPHJSGZ",

"GSZZQLYLWTJPFSYASMCJBTZYYCWMYTZSJJLJCQLWZMALBXYFBPNLSFHTG JWEJJXXGLLJSTGSHJQLZFKCGNNNSZFDEQFHBS",

"AQTGYLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ",

);

public?static?function?utf8_to_gbk($string)//Encoding conversion must be converted into GB2312 characters. The simple judgment here is not very accurate. You can write one yourself.

{

if?(mb_check_encoding($string,'gb2312'))

return?$string;

if?( function_exists('iconv'))

return?iconv("utf-8", "gb2312//IGNORE", $string);

return?mb_convert_encoding($string,' gb2312','utf-8');?

}

public?static?function?conv($str)

{

$str?=?self::utf8_to_gbk($str);

$len?=?strlen($str);

$newStr?=?'';

for($i=0;?$ilt;$len?;? $i)

{

$H?=?ord($str[ $i]);

$L?=?ord($str[$i 1]);

//Illegal character set

if?( $H?lt;?0xB0?||?$L?lt;?0xA1?||?$H?gt;?0xF7?||?$L?==?0xFF)

{

$newStr?.=?$str[$i];

continue;

}

if?($H?lt ;?0xD8)//($H?gt;=?0xB0?amp;amp;?$H?lt;=0xD7)//The query text is in the first-level Chinese character area (16-55)

{

$W?=?($H?lt;lt;?8)?|?$L;

foreach(self::$FirstTable?as?$key= gt; $value)

{

if?($W?lt;?$value)

{

$newStr? .

=?self::$FirstLetter[$key];

break;

}

}

}

else//?if?(H?gt;=?0xD8?amp;amp;?H?lt;=?0xF7)//Query Chinese in the secondary Chinese character area (56-87)

$newStr?.=self::$SecondTable[$H?-?0xD8][$L-0xA1];

$i;

}

return?$newStr;

}

}

echo?zh2py::conv('concubine'); //F