gdoo/app/Support/Pinyin.php

990 lines
24 KiB
PHP

<?php namespace App\Support;
class Pinyin
{
/**
* 汉字ASCII码库
*
* @var array
*/
protected static $lib = [
['a',-20319],
['ai',-20317],
['an',-20304],
['ang',-20295],
['ao',-20292],
['ba',-20283],
['bai',-20265],
['ban',-20257],
['bang',-20242],
['bao',-20230],
['bei',-20051],
['ben',-20036],
['beng',-20032],
['bi',-20026],
['bian',-20002],
['biao',-19990],
['bie',-19986],
['bin',-19982],
['bing',-19976],
['bo',-19805],
['bu',-19784],
['ca',-19775],
['cai',-19774],
['can',-19763],
['cang',-19756],
['cao',-19751],
['ce',-19746],
['ceng',-19741],
['cha',-19739],
['chai',-19728],
['chan',-19725],
['chang',-19715],
['chao',-19540],
['che',-19531],
['chen',-19525],
['cheng',-19515],
['chi',-19500],
['chong',-19484],
['chou',-19479],
['chu',-19467],
['chuai',-19289],
['chuan',-19288],
['chuang',-19281],
['chui',-19275],
['chun',-19270],
['chuo',-19263],
['ci',-19261],
['cong',-19249],
['cou',-19243],
['cu',-19242],
['cuan',-19238],
['cui',-19235],
['cun',-19227],
['cuo',-19224],
['da',-19218],
['dai',-19212],
['dan',-19038],
['dang',-19023],
['dao',-19018],
['de',-19006],
['deng',-19003],
['di',-18996],
['dian',-18977],
['diao',-18961],
['die',-18952],
['ding',-18783],
['diu',-18774],
['dong',-18773],
['dou',-18763],
['du',-18756],
['duan',-18741],
['dui',-18735],
['dun',-18731],
['duo',-18722],
['e',-18710],
['en',-18697],
['er',-18696],
['fa',-18526],
['fan',-18518],
['fang',-18501],
['fei',-18490],
['fen',-18478],
['feng',-18463],
['fo',-18448],
['fou',-18447],
['fu',-18446],
['ga',-18239],
['gai',-18237],
['gan',-18231],
['gang',-18220],
['gao',-18211],
['ge',-18201],
['gei',-18184],
['gen',-18183],
['geng',-18181],
['gong',-18012],
['gou',-17997],
['gu',-17988],
['gua',-17970],
['guai',-17964],
['guan',-17961],
['guang',-17950],
['gui',-17947],
['gun',-17931],
['guo',-17928],
['ha',-17922],
['hai',-17759],
['han',-17752],
['hang',-17733],
['hao',-17730],
['he',-17721],
['hei',-17703],
['hen',-17701],
['heng',-17697],
['hong',-17692],
['hou',-17683],
['hu',-17676],
['hua',-17496],
['huai',-17487],
['huan',-17482],
['huang',-17468],
['hui',-17454],
['hun',-17433],
['huo',-17427],
['ji',-17417],
['jia',-17202],
['jian',-17185],
['jiang',-16983],
['jiao',-16970],
['jie',-16942],
['jin',-16915],
['jing',-16733],
['jiong',-16708],
['jiu',-16706],
['ju',-16689],
['juan',-16664],
['jue',-16657],
['jun',-16647],
['ka',-16474],
['kai',-16470],
['kan',-16465],
['kang',-16459],
['kao',-16452],
['ke',-16448],
['ken',-16433],
['keng',-16429],
['kong',-16427],
['kou',-16423],
['ku',-16419],
['kua',-16412],
['kuai',-16407],
['kuan',-16403],
['kuang',-16401],
['kui',-16393],
['kun',-16220],
['kuo',-16216],
['la',-16212],
['lai',-16205],
['lan',-16202],
['lang',-16187],
['lao',-16180],
['le',-16171],
['lei',-16169],
['leng',-16158],
['li',-16155],
['lia',-15959],
['lian',-15958],
['liang',-15944],
['liao',-15933],
['lie',-15920],
['lin',-15915],
['ling',-15903],
['liu',-15889],
['long',-15878],
['lou',-15707],
['lu',-15701],
['lv',-15681],
['luan',-15667],
['lue',-15661],
['lun',-15659],
['luo',-15652],
['ma',-15640],
['mai',-15631],
['man',-15625],
['mang',-15454],
['mao',-15448],
['me',-15436],
['mei',-15435],
['men',-15419],
['meng',-15416],
['mi',-15408],
['mian',-15394],
['miao',-15385],
['mie',-15377],
['min',-15375],
['ming',-15369],
['miu',-15363],
['mo',-15362],
['mou',-15183],
['mu',-15180],
['na',-15165],
['nai',-15158],
['nan',-15153],
['nang',-15150],
['nao',-15149],
['ne',-15144],
['nei',-15143],
['nen',-15141],
['neng',-15140],
['ni',-15139],
['nian',-15128],
['niang',-15121],
['niao',-15119],
['nie',-15117],
['nin',-15110],
['ning',-15109],
['niu',-14941],
['nong',-14937],
['nu',-14933],
['nv',-14930],
['nuan',-14929],
['nue',-14928],
['nuo',-14926],
['o',-14922],
['ou',-14921],
['pa',-14914],
['pai',-14908],
['pan',-14902],
['pang',-14894],
['pao',-14889],
['pei',-14882],
['pen',-14873],
['peng',-14871],
['pi',-14857],
['pian',-14678],
['piao',-14674],
['pie',-14670],
['pin',-14668],
['ping',-14663],
['po',-14654],
['pu',-14645],
['qi',-14630],
['qia',-14594],
['qian',-14429],
['qiang',-14407],
['qiao',-14399],
['qie',-14384],
['qin',-14379],
['qing',-14368],
['qiong',-14355],
['qiu',-14353],
['qu',-14345],
['quan',-14170],
['que',-14159],
['qun',-14151],
['ran',-14149],
['rang',-14145],
['rao',-14140],
['re',-14137],
['ren',-14135],
['reng',-14125],
['ri',-14123],
['rong',-14122],
['rou',-14112],
['ru',-14109],
['ruan',-14099],
['rui',-14097],
['run',-14094],
['ruo',-14092],
['sa',-14090],
['sai',-14087],
['san',-14083],
['sang',-13917],
['sao',-13914],
['se',-13910],
['sen',-13907],
['seng',-13906],
['sha',-13905],
['shai',-13896],
['shan',-13894],
['shang',-13878],
['shao',-13870],
['she',-13859],
['shen',-13847],
['sheng',-13831],
['shi',-13658],
['shou',-13611],
['shu',-13601],
['shua',-13406],
['shuai',-13404],
['shuan',-13400],
['shuang',-13398],
['shui',-13395],
['shun',-13391],
['shuo',-13387],
['si',-13383],
['song',-13367],
['sou',-13359],
['su',-13356],
['suan',-13343],
['sui',-13340],
['sun',-13329],
['suo',-13326],
['ta',-13318],
['tai',-13147],
['tan',-13138],
['tang',-13120],
['tao',-13107],
['te',-13096],
['teng',-13095],
['ti',-13091],
['tian',-13076],
['tiao',-13068],
['tie',-13063],
['ting',-13060],
['tong',-12888],
['tou',-12875],
['tu',-12871],
['tuan',-12860],
['tui',-12858],
['tun',-12852],
['tuo',-12849],
['wa',-12838],
['wai',-12831],
['wan',-12829],
['wang',-12812],
['wei',-12802],
['wen',-12607],
['weng',-12597],
['wo',-12594],
['wu',-12585],
['xi',-12556],
['xia',-12359],
['xian',-12346],
['xiang',-12320],
['xiao',-12300],
['xie',-12120],
['xin',-12099],
['xing',-12089],
['xiong',-12074],
['xiu',-12067],
['xu',-12058],
['xuan',-12039],
['xue',-11867],
['xun',-11861],
['ya',-11847],
['yan',-11831],
['yang',-11798],
['yao',-11781],
['ye',-11604],
['yi',-11589],
['yin',-11536],
['ying',-11358],
['yo',-11340],
['yo',-11340],
['yong',-11339],
['you',-11324],
['yu',-11303],
['yuan',-11097],
['yue',-11077],
['yun',-11067],
['za',-11055],
['zai',-11052],
['zan',-11045],
['zang',-11041],
['zao',-11038],
['ze',-11024],
['zei',-11020],
['zen',-11019],
['zeng',-11018],
['zha',-11014],
['zhai',-10838],
['zhan',-10832],
['zhang',-10815],
['zhao',-10800],
['zhe',-10790],
['zhen',-10780],
['zheng',-10764],
['zhi',-10587],
['zhong',-10544],
['zhou',-10533],
['zhu',-10519],
['zhua',-10331],
['zhuai',-10329],
['zhuan',-10328],
['zhuang',-10322],
['zhui',-10315],
['zhun',-10309],
['zhuo',-10307],
['zi',-10296],
['zong',-10281],
['zou',-10274],
['zu',-10270],
['zuan',-10262],
['zui',-10260],
['zun',-10256],
['zuo',-10254]
];
protected static $py_mult_list = [
'19969' => 'DZ',
'19975' => 'WM',
'19988' => 'QJ',
'20048' => 'YL',
'20056' => 'SC',
'20060' => 'NM',
'20094' => 'QG',
'20127' => 'QJ',
'20167' => 'QC',
'20193' => 'YG',
'20250' => 'KH',
'20256' => 'ZC',
'20282' => 'SC',
'20285' => 'QJG',
'20291' => 'TD',
'20314' => 'YD',
'20340' => 'NE',
'20375' => 'TD',
'20389' => 'YJ',
'20391' => 'CZ',
'20415' => 'PB',
'20446' => 'YS',
'20447' => 'SQ',
'20504' => 'TC',
'20608' => 'KG',
'20854' => 'QJ',
'20857' => 'ZC',
'20911' => 'PF',
'20504' => 'TC',
'20608' => 'KG',
'20854' => 'QJ',
'20857' => 'ZC',
'20911' => 'PF',
'20985' => 'AW',
'21032' => 'PB',
'21048' => 'XQ',
'21049' => 'SC',
'21089' => 'YS',
'21119' => 'JC',
'21242' => 'SB',
'21273' => 'SC',
'21305' => 'YP',
'21306' => 'QO',
'21330' => 'ZC',
'21333' => 'SDC',
'21345' => 'QK',
'21378' => 'CA',
'21397' => 'SC',
'21414' => 'XS',
'21442' => 'SC',
'21477' => 'JG',
'21480' => 'TD',
'21484' => 'ZS',
'21494' => 'YX',
'21505' => 'YX',
'21512' => 'HG',
'21523' => 'XH',
'21537' => 'PB',
'21542' => 'PF',
'21549' => 'KH',
'21571' => 'E',
'21574' => 'DA',
'21588' => 'TD',
'21589' => 'O',
'21618' => 'ZC',
'21621' => 'KHA',
'21632' => 'ZJ',
'21654' => 'KG',
'21679' => 'LKG',
'21683' => 'KH',
'21710' => 'A',
'21719' => 'YH',
'21734' => 'WOE',
'21769' => 'A',
'21780' => 'WN',
'21804' => 'XH',
'21834' => 'A',
'21899' => 'ZD',
'21903' => 'RN',
'21908' => 'WO',
'21939' => 'ZC',
'21956' => 'SA',
'21964' => 'YA',
'21970' => 'TD',
'22003' => 'A',
'22031' => 'JG',
'22040' => 'XS',
'22060' => 'ZC',
'22066' => 'ZC',
'22079' => 'MH',
'22129' => 'XJ',
'22179' => 'XA',
'22237' => 'NJ',
'22244' => 'TD',
'22280' => 'JQ',
'22300' => 'YH',
'22313' => 'XW',
'22331' => 'YQ',
'22343' => 'YJ',
'22351' => 'PH',
'22395' => 'DC',
'22412' => 'TD',
'22484' => 'PB',
'22500' => 'PB',
'22534' => 'ZD',
'22549' => 'DH',
'22561' => 'PB',
'22612' => 'TD',
'22771' => 'KQ',
'22831' => 'HB',
'22841' => 'JG',
'22855' => 'QJ',
'22865' => 'XQ',
'23013' => 'ML',
'23081' => 'WM',
'23487' => 'SX',
'23558' => 'QJ',
'23561' => 'YW',
'23586' => 'YW',
'23614' => 'YW',
'23615' => 'SN',
'23631' => 'PB',
'23646' => 'ZS',
'23663' => 'ZT',
'23673' => 'YG',
'23762' => 'TD',
'23769' => 'ZS',
'23780' => 'QJ',
'23884' => 'QK',
'24055' => 'XH',
'24113' => 'DC',
'24162' => 'ZC',
'24191' => 'GA',
'24273' => 'QJ',
'24324' => 'NL',
'24377' => 'TD',
'24378' => 'QJ',
'24439' => 'PF',
'24554' => 'ZS',
'24683' => 'TD',
'24694' => 'WE',
'24733' => 'LK',
'24925' => 'TN',
'25094' => 'ZG',
'25100' => 'XQ',
'25103' => 'XH',
'25153' => 'PB',
'25170' => 'PB',
'25179' => 'KG',
'25203' => 'PB',
'25240' => 'ZS',
'25282' => 'FB',
'25303' => 'NA',
'25324' => 'KG',
'25341' => 'ZY',
'25373' => 'WZ',
'25375' => 'XJ',
'25384' => 'A',
'25457' => 'A',
'25528' => 'SD',
'25530' => 'SC',
'25552' => 'TD',
'25774' => 'ZC',
'25874' => 'ZC',
'26044' => 'YW',
'26080' => 'WM',
'26292' => 'PB',
'26333' => 'PB',
'26355' => 'ZY',
'26366' => 'CZ',
'26397' => 'ZC',
'26399' => 'QJ',
'26415' => 'ZS',
'26451' => 'SB',
'26526' => 'ZC',
'26552' => 'JG',
'26561' => 'TD',
'26588' => 'JG',
'26597' => 'CZ',
'26629' => 'ZS',
'26638' => 'YL',
'26646' => 'XQ',
'26653' => 'KG',
'26657' => 'XJ',
'26727' => 'HG',
'26894' => 'ZC',
'26937' => 'ZS',
'26946' => 'ZC',
'26999' => 'KJ',
'27099' => 'KJ',
'27449' => 'YQ',
'27481' => 'XS',
'27542' => 'ZS',
'27663' => 'ZS',
'27748' => 'TS',
'27784' => 'SC',
'27788' => 'ZD',
'27795' => 'TD',
'27812' => 'O',
'27850' => 'PB',
'27852' => 'MB',
'27895' => 'SL',
'27898' => 'PL',
'27973' => 'QJ',
'27981' => 'KH',
'27986' => 'HX',
'27994' => 'XJ',
'28044' => 'YC',
'28065' => 'WG',
'28177' => 'SM',
'28267' => 'QJ',
'28291' => 'KH',
'28337' => 'ZQ',
'28463' => 'TL',
'28548' => 'DC',
'28601' => 'TD',
'28689' => 'PB',
'28805' => 'JG',
'28820' => 'QG',
'28846' => 'PB',
'28952' => 'TD',
'28975' => 'ZC',
'29100' => 'A',
'29325' => 'QJ',
'29575' => 'SL',
'29602' => 'FB',
'30010' => 'TD',
'30044' => 'CX',
'30058' => 'PF',
'30091' => 'YSP',
'30111' => 'YN',
'30229' => 'XJ',
'30427' => 'SC',
'30465' => 'SX',
'30631' => 'YQ',
'30655' => 'QJ',
'30684' => 'QJG',
'30707' => 'SD',
'30729' => 'XH',
'30796' => 'LG',
'30917' => 'PB',
'31074' => 'NM',
'31085' => 'JZ',
'31109' => 'SC',
'31181' => 'ZC',
'31192' => 'MLB',
'31293' => 'JQ',
'31400' => 'YX',
'31584' => 'YJ',
'31896' => 'ZN',
'31909' => 'ZY',
'31995' => 'XJ',
'32321' => 'PF',
'32327' => 'ZY',
'32418' => 'HG',
'32420' => 'XQ',
'32421' => 'HG',
'32438' => 'LG',
'32473' => 'GJ',
'32488' => 'TD',
'32521' => 'QJ',
'32527' => 'PB',
'32562' => 'ZSQ',
'32564' => 'JZ',
'32735' => 'ZD',
'32793' => 'PB',
'33071' => 'PF',
'33098' => 'XL',
'33100' => 'YA',
'33152' => 'PB',
'33261' => 'CX',
'33324' => 'BP',
'33333' => 'TD',
'33406' => 'YA',
'33426' => 'WM',
'33432' => 'PB',
'33445' => 'JG',
'33486' => 'ZN',
'33493' => 'TS',
'33507' => 'QJ',
'33540' => 'QJ',
'33544' => 'ZC',
'33564' => 'XQ',
'33617' => 'YT',
'33632' => 'QJ',
'33636' => 'XH',
'33637' => 'YX',
'33694' => 'WG',
'33705' => 'PF',
'33728' => 'YW',
'33882' => 'SR',
'34067' => 'WM',
'34074' => 'YW',
'34121' => 'QJ',
'34255' => 'ZC',
'34259' => 'XL',
'34425' => 'JH',
'34430' => 'XH',
'34485' => 'KH',
'34503' => 'YS',
'34532' => 'HG',
'34552' => 'XS',
'34558' => 'YE',
'34593' => 'ZL',
'34660' => 'YQ',
'34892' => 'XH',
'34928' => 'SC',
'34999' => 'QJ',
'35048' => 'PB',
'35059' => 'SC',
'35098' => 'ZC',
'35203' => 'TQ',
'35265' => 'JX',
'35299' => 'JX',
'35782' => 'SZ',
'35828' => 'YS',
'35830' => 'E',
'35843' => 'TD',
'35895' => 'YG',
'35977' => 'MH',
'36158' => 'JG',
'36228' => 'QJ',
'36426' => 'XQ',
'36466' => 'DC',
'36710' => 'JC',
'36711' => 'ZYG',
'36767' => 'PB',
'36866' => 'SK',
'36951' => 'YW',
'37034' => 'YX',
'37063' => 'XH',
'37218' => 'ZC',
'37325' => 'ZC',
'38063' => 'PB',
'38079' => 'TD',
'38085' => 'QY',
'38107' => 'DC',
'38116' => 'TD',
'38123' => 'YD',
'38224' => 'HG',
'38241' => 'XTC',
'38271' => 'ZC',
'38415' => 'YE',
'38426' => 'KH',
'38461' => 'YD',
'38463' => 'AE',
'38466' => 'PB',
'38477' => 'XJ',
'38518' => 'YT',
'38551' => 'WK',
'38585' => 'ZC',
'38704' => 'XS',
'38739' => 'LJ',
'38761' => 'GJ',
'38808' => 'SQ',
'39048' => 'JG',
'39049' => 'XJ',
'39052' => 'HG',
'39076' => 'CZ',
'39271' => 'XT',
'39534' => 'TD',
'39552' => 'TD',
'39584' => 'PB',
'39647' => 'SB',
'39730' => 'LG',
'39748' => 'TPB',
'40109' => 'ZQ',
'40479' => 'ND',
'40516' => 'HG',
'40536' => 'HG',
'40583' => 'QJ',
'40765' => 'YQ',
'40784' => 'QJ',
'40840' => 'YK',
'40863' => 'QJG'
];
protected static $code = 'utf-8';
public static function getstr($str)
{
$code = static::$code;
$arr = array();
for ($i = 0, $len = mb_strlen($str, $code); $i < $len; $i++) {
$single = mb_substr($str, $i, 1, $code);
$ch = static::utf8_unicode($single, $code); // 获得unicode码
$w = static::$py_mult_list[$ch];
$arr[] = ($w) ? $w : static::getfirstchar($single);
}
$result = array("");
foreach ($arr as $v) {
if ($v) {
$result = static::makePY_list($v, $result);
}
}
return implode('|', $result);
}
public static function makePY_list($str, $arr)
{
for ($i = 0, $len = strlen($str); $i < $len; $i++) {
foreach ($arr as $t) {
$re[] = $t . $str[$i];
}
}
return $re;
}
// 读取utf8字符的unicode码
public static function utf8_unicode($c, $charset = "utf-8")
{
if ($charset != "utf-8") {
$c = iconv($charset, "utf-8", $c);
}
switch (strlen($c)) {
case 1:
return ord($c);
case 2:
$n = (ord($c[0]) & 0x3f) << 6;
$n+= ord($c[1]) & 0x3f;
return $n;
case 3:
$n = (ord($c[0]) & 0x1f) << 12;
$n+= (ord($c[1]) & 0x3f) << 6;
$n+= ord($c[2]) & 0x3f;
return $n;
case 4:
$n = (ord($c[0]) & 0x0f) << 18;
$n+= (ord($c[1]) & 0x3f) << 12;
$n+= (ord($c[2]) & 0x3f) << 6;
$n+= ord($c[3]) & 0x3f;
return $n;
}
}
// 获得单个汉字拼音首字母
public static function getfirstchar($s0)
{
$fchar = ord($s0{0});
if ($fchar >= ord('A') and $fchar <= ord('z')) {
return strtoupper($s0{0});
}
$s1 = iconv('UTF-8', 'gb2312', $s0);
$s2 = iconv('gb2312', 'UTF-8', $s1);
if ($s2 == $s0) {
$s = $s1;
} else {
$s = $s0;
}
$asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
if ($asc >= - 20319 and $asc <= - 20284) {
return "A";
}
if ($asc >= - 20283 and $asc <= - 19776) {
return "B";
}
if ($asc >= - 19775 and $asc <= - 19219) {
return "C";
}
if ($asc >= - 19218 and $asc <= - 18711) {
return "D";
}
if ($asc >= - 18710 and $asc <= - 18527) {
return "E";
}
if ($asc >= - 18526 and $asc <= - 18240) {
return "F";
}
if ($asc >= - 18239 and $asc <= - 17923) {
return "G";
}
if ($asc >= - 17922 and $asc <= - 17418) {
return "H";
}
if ($asc >= - 17417 and $asc <= - 16475) {
return "J";
}
if ($asc >= - 16474 and $asc <= - 16213) {
return "K";
}
if ($asc >= - 16212 and $asc <= - 15641) {
return "L";
}
if ($asc >= - 15640 and $asc <= - 15166) {
return "M";
}
if ($asc >= - 15165 and $asc <= - 14923) {
return "N";
}
if ($asc >= - 14922 and $asc <= - 14915) {
return "O";
}
if ($asc >= - 14914 and $asc <= - 14631) {
return "P";
}
if ($asc >= - 14630 and $asc <= - 14150) {
return "Q";
}
if ($asc >= - 14149 and $asc <= - 14091) {
return "R";
}
if ($asc >= - 14090 and $asc <= - 13319) {
return "S";
}
if ($asc >= - 13318 and $asc <= - 12839) {
return "T";
}
if ($asc >= - 12838 and $asc <= - 12557) {
return "W";
}
if ($asc >= - 12556 and $asc <= - 11848) {
return "X";
}
if ($asc >= - 11847 and $asc <= - 11056) {
return "Y";
}
if ($asc >= - 11055 and $asc <= - 10247) {
return "Z";
}
return null;
}
/**
* 将ASCII编码转化为字符串.
*
* @param integer $num
* @return string
*/
protected static function num2str($num)
{
if ($num > 0 && $num < 160) {
return chr($num);
} elseif ($num < -20319 || $num > -10247) {
return '';
} else {
$total = sizeof(static::$lib) - 1;
for ($i = $total; $i >= 0; $i--) {
if (static::$lib[$i][1] <= $num) {
break;
}
}
return static::$lib[$i][0];
}
}
/**
* 汉字转化并输出拼音
*
* @param string $str 所要转化拼音的汉字
* @param boolean $utf8 汉字编码是否为utf8
* @return string
*/
public static function output($str, $utf8 = true)
{
// 参数分析
if ($str == '') {
return false;
}
// 编码转换.
$str = ($utf8==true) ? iconv('utf-8', 'gbk', $str) : $str;
$num = strlen($str);
$pinyin = '';
for ($i=0; $i<$num; $i++) {
$temp = ord(substr($str, $i, 1));
if ($temp > 160) {
$temp2 = ord(substr($str, ++$i, 1));
$temp = $temp * 256 + $temp2-65536;
}
$pinyin .= static::num2str($temp);
}
// 输出的拼音编码转换.
return ($utf8 == true) ? iconv('gbk', 'utf-8', $pinyin) : $pinyin;
}
}