作者 : Bun Wong
日期 : 2010年08月05日
标签 : PHP
浏览 : 1252 次
http://www.hdwong.com/article/translate-url.html

标题中文转换成英文文件名

技术交流 @ 2010年08月05日 收藏&分享

因为要考虑到 SEO,今天按需求给项目写了个简单的函数,把文章标题和商品标题转换成英文文件名,

转换规则是:

中文中文 => Zhong-Wen-Zhong-Wen
中文chinese => Zhong-Wen-Chinese
chinese chinese => Chinese-Chinese

代码:

class Common_Model extends Bl_Model
{
  public static $trReplace = array('`' => '', '~' => '', '!' => '', '@' => '', '#' => '', '$' => '', '%' => '', '^' => '', '&' => '', '*' => '', '(' => '', ')' => '', '-' => '', '_' => '', '=' => '', '+' => '', '\\' => '', '|' => '', '[' => '', '{' => '', ']' => '', '}' => '', ';' => '', ':' => '', '\'' => '', '"' => '', ',' => '', '<' => '', '.' => '', '>' => '', '/' => '', '?' => '');

  public function translate($string)
  {
    $string = strtr(trim($string), self::$trReplace);
    $pieces = preg_split('/([\x{4e00}-\x{9fa5}]+)/u', $string, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
    foreach ($pieces as &$piece) {
      if (preg_match('/[\x{4e00}-\x{9fa5}]+/u', $piece)) {
        $piece = implode(' ', $this->translateWord($piece));
      } else {
        $piece = trim($piece);
      }
    }
    return strtr(ucwords(implode(' ', $pieces)), ' ', '-');
  }

  private function translateWord($string)
  {
    static $map = array(-10254 => 'Zuo', -10256 => 'Zun', -10260 => 'Zui', -10262 => 'Zuan', -10270 => 'Zu', -10274 => 'Zou', -10281 => 'Zong', -10296 => 'Zi', -10307 => 'Zhuo', -10309 => 'Zhun', -10315 => 'Zhui', -10322 => 'Zhuang', -10328 => 'Zhuan', -10329 => 'Zhuai', -10331 => 'Zhua', -10519 => 'Zhu', -10533 => 'Zhou', -10544 => 'Zhong', -10587 => 'Zhi', -10764 => 'Zheng', -10780 => 'Zhen', -10790 => 'Zhe', -10800 => 'Zhao', -10815 => 'Zhang', -10832 => 'Zhan', -10838 => 'Zhai', -11014 => 'Zha', -11018 => 'Zeng', -11019 => 'Zen', -11020 => 'Zei', -11024 => 'Ze', -11038 => 'Zao', -11041 => 'Zang', -11045 => 'Zan', -11052 => 'Zai', -11055 => 'Za', -11067 => 'Yun', -11077 => 'Yue', -11097 => 'Yuan', -11303 => 'Yu', -11324 => 'You', -11339 => 'Yong', -11340 => 'Yo', -11358 => 'Ying', -11536 => 'Yin', -11589 => 'Yi', -11604 => 'Ye', -11781 => 'Yao', -11798 => 'Yang', -11831 => 'Yan', -11847 => 'Ya', -11861 => 'Xun', -11867 => 'Xue', -12039 => 'Xuan', -12058 => 'Xu', -12067 => 'Xiu', -12074 => 'Xiong', -12089 => 'Xing', -12099 => 'Xin', -12120 => 'Xie', -12300 => 'Xiao', -12320 => 'Xiang', -12346 => 'Xian', -12359 => 'Xia', -12556 => 'Xi', -12585 => 'Wu', -12594 => 'Wo', -12597 => 'Weng', -12607 => 'Wen', -12802 => 'Wei', -12812 => 'Wang', -12829 => 'Wan', -12831 => 'Wai', -12838 => 'Wa', -12849 => 'Tuo', -12852 => 'Tun', -12858 => 'Tui', -12860 => 'Tuan', -12871 => 'Tu', -12875 => 'Tou', -12888 => 'Tong', -13060 => 'Ting', -13063 => 'Tie', -13068 => 'Tiao', -13076 => 'Tian', -13091 => 'Ti', -13095 => 'Teng', -13096 => 'Te', -13107 => 'Tao', -13120 => 'Tang', -13138 => 'Tan', -13147 => 'Tai', -13318 => 'Ta', -13326 => 'Suo', -13329 => 'Sun', -13340 => 'Sui', -13343 => 'Suan', -13356 => 'Su', -13359 => 'Sou', -13367 => 'Song', -13383 => 'Si', -13387 => 'Shuo', -13391 => 'Shun', -13395 => 'Shui', -13398 => 'Shuang', -13400 => 'Shuan', -13404 => 'Shuai', -13406 => 'Shua', -13601 => 'Shu', -13611 => 'Shou', -13658 => 'Shi', -13831 => 'Sheng', -13847 => 'Shen', -13859 => 'She', -13870 => 'Shao', -13878 => 'Shang', -13894 => 'Shan', -13896 => 'Shai', -13905 => 'Sha', -13906 => 'Seng', -13907 => 'Sen', -13910 => 'Se', -13914 => 'Sao', -13917 => 'Sang', -14083 => 'San', -14087 => 'Sai', -14090 => 'Sa', -14092 => 'Ruo', -14094 => 'Run', -14097 => 'Rui', -14099 => 'Ruan', -14109 => 'Ru', -14112 => 'Rou', -14122 => 'Rong', -14123 => 'Ri', -14125 => 'Reng', -14135 => 'Ren', -14137 => 'Re', -14140 => 'Rao', -14145 => 'Rang', -14149 => 'Ran', -14151 => 'Qun', -14159 => 'Que', -14170 => 'Quan', -14345 => 'Qu', -14353 => 'Qiu', -14355 => 'Qiong', -14368 => 'Qing', -14379 => 'Qin', -14384 => 'Qie', -14399 => 'Qiao', -14407 => 'Qiang', -14429 => 'Qian', -14594 => 'Qia', -14630 => 'Qi', -14645 => 'Pu', -14654 => 'Po', -14663 => 'Ping', -14668 => 'Pin', -14670 => 'Pie', -14674 => 'Piao', -14678 => 'Pian', -14857 => 'Pi', -14871 => 'Peng', -14873 => 'Pen', -14882 => 'Pei', -14889 => 'Pao', -14894 => 'Pang', -14902 => 'Pan', -14908 => 'Pai', -14914 => 'Pa', -14921 => 'Ou', -14922 => 'O', -14926 => 'Nuo', -14928 => 'Nue', -14929 => 'Nuan', -14930 => 'Nv', -14933 => 'Nu', -14937 => 'Nong', -14941 => 'Niu', -15109 => 'Ning', -15110 => 'Nin', -15117 => 'Nie', -15119 => 'Niao', -15121 => 'Niang', -15128 => 'Nian', -15139 => 'Ni', -15140 => 'Neng', -15141 => 'Nen', -15143 => 'Nei', -15144 => 'Ne', -15149 => 'Nao', -15150 => 'Nang', -15153 => 'Nan', -15158 => 'Nai', -15165 => 'Na', -15180 => 'Mu', -15183 => 'Mou', -15362 => 'Mo', -15363 => 'Miu', -15369 => 'Ming', -15375 => 'Min', -15377 => 'Mie', -15385 => 'Miao', -15394 => 'Mian', -15408 => 'Mi', -15416 => 'Meng', -15419 => 'Men', -15435 => 'Mei', -15436 => 'Me', -15448 => 'Mao', -15454 => 'Mang', -15625 => 'Man', -15631 => 'Mai', -15640 => 'Ma', -15652 => 'Luo', -15659 => 'Lun', -15661 => 'Lue', -15667 => 'Luan', -15681 => 'Lv', -15701 => 'Lu', -15707 => 'Lou', -15878 => 'Long', -15889 => 'Liu', -15903 => 'Ling', -15915 => 'Lin', -15920 => 'Lie', -15933 => 'Liao', -15944 => 'Liang', -15958 => 'Lian', -15959 => 'Lia', -16155 => 'Li', -16158 => 'Leng', -16169 => 'Lei', -16171 => 'Le', -16180 => 'Lao', -16187 => 'Lang', -16202 => 'Lan', -16205 => 'Lai', -16212 => 'La', -16216 => 'Kuo', -16220 => 'Kun', -16393 => 'Kui', -16401 => 'Kuang', -16403 => 'Kuan', -16407 => 'Kuai', -16412 => 'Kua', -16419 => 'Ku', -16423 => 'Kou', -16427 => 'Kong', -16429 => 'Keng', -16433 => 'Ken', -16448 => 'Ke', -16452 => 'Kao', -16459 => 'Kang', -16465 => 'Kan', -16470 => 'Kai', -16474 => 'Ka', -16647 => 'Jun', -16657 => 'Jue', -16664 => 'Juan', -16689 => 'Ju', -16706 => 'Jiu', -16708 => 'Jiong', -16733 => 'Jing', -16915 => 'Jin', -16942 => 'Jie', -16970 => 'Jiao', -16983 => 'Jiang', -17185 => 'Jian', -17202 => 'Jia', -17417 => 'Ji', -17427 => 'Huo', -17433 => 'Hun', -17454 => 'Hui', -17468 => 'Huang', -17482 => 'Huan', -17487 => 'Huai', -17496 => 'Hua', -17676 => 'Hu', -17683 => 'Hou', -17692 => 'Hong', -17697 => 'Heng', -17701 => 'Hen', -17703 => 'Hei', -17721 => 'He', -17730 => 'Hao', -17733 => 'Hang', -17752 => 'Han', -17759 => 'Hai', -17922 => 'Ha', -17928 => 'Guo', -17931 => 'Gun', -17947 => 'Gui', -17950 => 'Guang', -17961 => 'Guan', -17964 => 'Guai', -17970 => 'Gua', -17988 => 'Gu', -17997 => 'Gou', -18012 => 'Gong', -18181 => 'Geng', -18183 => 'Gen', -18184 => 'Gei', -18201 => 'Ge', -18211 => 'Gao', -18220 => 'Gang', -18231 => 'Gan', -18237 => 'Gai', -18239 => 'Ga', -18446 => 'Fu', -18447 => 'Fou', -18448 => 'Fo', -18463 => 'Feng', -18478 => 'Fen', -18490 => 'Fei', -18501 => 'Fang', -18518 => 'Fan', -18526 => 'Fa', -18696 => 'Er', -18697 => 'En', -18710 => 'E', -18722 => 'Duo', -18731 => 'Dun', -18735 => 'Dui', -18741 => 'Duan', -18756 => 'Du', -18763 => 'Dou', -18773 => 'Dong', -18774 => 'Diu', -18783 => 'Ding', -18952 => 'Die', -18961 => 'Diao', -18977 => 'Dian', -18996 => 'Di', -19003 => 'Deng', -19006 => 'De', -19018 => 'Dao', -19023 => 'Dang', -19038 => 'Dan', -19212 => 'Dai', -19218 => 'Da', -19224 => 'Cuo', -19227 => 'Cun', -19235 => 'Cui', -19238 => 'Cuan', -19242 => 'Cu', -19243 => 'Cou', -19249 => 'Cong', -19261 => 'Ci', -19263 => 'Chuo', -19270 => 'Chun', -19275 => 'Chui', -19281 => 'Chuang', -19288 => 'Chuan', -19289 => 'Chuai', -19467 => 'Chu', -19479 => 'Chou', -19484 => 'Chong', -19500 => 'Chi', -19515 => 'Cheng', -19525 => 'Chen', -19531 => 'Che', -19540 => 'Chao', -19715 => 'Chang', -19725 => 'Chan', -19728 => 'Chai', -19739 => 'Cha', -19741 => 'Ceng', -19746 => 'Ce', -19751 => 'Cao', -19756 => 'Cang', -19763 => 'Can', -19774 => 'Cai', -19775 => 'Ca', -19784 => 'Bu', -19805 => 'Bo', -19976 => 'Bing', -19982 => 'Bin', -19986 => 'Bie', -19990 => 'Biao', -20002 => 'Bian', -20026 => 'Bi', -20032 => 'Beng', -20036 => 'Ben', -20051 => 'Bei', -20230 => 'Bao', -20242 => 'Bang', -20257 => 'Ban', -20265 => 'Bai', -20283 => 'Ba', -20292 => 'Ao', -20295 => 'Ang', -20304 => 'An', -20317 => 'Ai', -20319 => 'A');
    $string = mb_convert_encoding($string, 'GBK', 'UTF-8');
    $return = array();
    for ($i = 0; $i < strlen($string); ++$i) {
      $chr = substr($string, $i, 1);
      $letter = ord($chr);
      if ($letter > 160) {
        $letter = ($letter << 8) + ord(substr($string, ++$i, 1)) - 65536;
        if ($letter < -20319 || $letter > -10247) {
          $chr = '';
        } else {
          foreach ($map as $k => $v) {
            if ($letter >= $k) {
              break;
            }
          }
          $chr = $v;
        }
      }
      $return[] = $chr;
    }
    return $return;
  }
}

调用方式:

$model = new Common_Model();
echo $model->translate('中文中文chinese');

© 2011 Bun Wong

本博客基于 Bun PHP Framework 构建 • 粤ICP备07036370号