(defconst htmlize-vowel-table '(("a" "ā" "á" "ǎ" "à" "a") ("e" "ē" "é" "ě" "è" "e") ("i" "ī" "í" "ǐ" "ì" "i") ("o" "ō" "ó" "ǒ" "ò" "o") ("u" "ū" "ú" "ǔ" "ù" "u") ("v" "ǖ" "ǘ" "ǚ" "ǜ" "ü"))) (defconst htmlize-syllable-table '(("zhi" ("zh." "i")) ("chi" ("ch." "i")) ("shi" ("sh." "i")) ("ri" ("r." "i")) ("zi" ("z." "i")) ("ci" ("c." "i")) ("si" ("s." "i")) ("a" ("." "a")) ("ba" ("b." "a")) ("pa" ("p." "a")) ("ma" ("m." "a")) ("fa" ("f." "a")) ("da" ("d." "a")) ("ta" ("t." "a")) ("na" ("n." "a")) ("la" ("l." "a")) ("ga" ("g." "a")) ("ka" ("k." "a")) ("ha" ("h." "a")) ("zha" ("zh." "a")) ("cha" ("ch." "a")) ("sha" ("sh." "a")) ("za" ("z." "a")) ("ca" ("c." "a")) ("sa" ("s." "a")) ("o" ("." "o")) ("bo" ("b." "o")) ("po" ("p." "o")) ("mo" ("m." "o")) ("fo" ("f." "o")) ("e" ("." "e")) ("me" ("m." "e")) ("de" ("d." "e")) ("te" ("t." "e")) ("ne" ("n." "e")) ("le" ("l." "e")) ("ge" ("g." "e")) ("ke" ("k." "e")) ("he" ("h." "e")) ("zhe" ("zh." "e")) ("che" ("ch." "e")) ("she" ("sh." "e")) ("re" ("r." "e")) ("ze" ("z." "e")) ("ce" ("c." "e")) ("se" ("s." "e")) ("ai" (".i" "a")) ("bai" ("b.i" "a")) ("pai" ("p.i" "a")) ("mai" ("m.i" "a")) ("dai" ("d.i" "a")) ("tai" ("t.i" "a")) ("nai" ("n.i" "a")) ("lai" ("l.i" "a")) ("gai" ("g.i" "a")) ("kai" ("k.i" "a")) ("hai" ("h.i" "a")) ("zhai" ("zh.i" "a")) ("chai" ("ch.i" "a")) ("shai" ("sh.i" "a")) ("zai" ("z.i" "a")) ("cai" ("c.i" "a")) ("sai" ("s.i" "a")) ("ei" (".i" "e")) ("bei" ("b.i" "e")) ("pei" ("p.i" "e")) ("mei" ("m.i" "e")) ("fei" ("f.i" "e")) ("dei" ("d.i" "e")) ("nei" ("n.i" "e")) ("lei" ("l.i" "e")) ("gei" ("g.i" "e")) ("hei" ("h.i" "e")) ("zhei" ("zh.i" "e")) ("shei" ("sh.i" "e")) ("zei" ("z.i" "e")) ("ao" (".o" "a")) ("bao" ("b.o" "a")) ("pao" ("p.o" "a")) ("mao" ("m.o" "a")) ("dao" ("d.o" "a")) ("tao" ("t.o" "a")) ("nao" ("n.o" "a")) ("lao" ("l.o" "a")) ("gao" ("g.o" "a")) ("kao" ("k.o" "a")) ("hao" ("h.o" "a")) ("zhao" ("zh.o" "a")) ("chao" ("ch.o" "a")) ("shao" ("sh.o" "a")) ("rao" ("r.o" "a")) ("zao" ("z.o" "a")) ("cao" ("c.o" "a")) ("sao" ("s.o" "a")) ("ou" (".u" "o")) ("pou" ("p.u" "o")) ("mou" ("m.u" "o")) ("fou" ("f.u" "o")) ("dou" ("d.u" "o")) ("tou" ("t.u" "o")) ("nou" ("n.u" "o")) ("lou" ("l.u" "o")) ("gou" ("g.u" "o")) ("kou" ("k.u" "o")) ("hou" ("h.u" "o")) ("zhou" ("zh.u" "o")) ("chou" ("ch.u" "o")) ("shou" ("sh.u" "o")) ("rou" ("r.u" "o")) ("zou" ("z.u" "o")) ("cou" ("c.u" "o")) ("sou" ("s.u" "o")) ("an" (".n" "a")) ("ban" ("b.n" "a")) ("pan" ("p.n" "a")) ("man" ("m.n" "a")) ("fan" ("f.n" "a")) ("dan" ("d.n" "a")) ("tan" ("t.n" "a")) ("nan" ("n.n" "a")) ("lan" ("l.n" "a")) ("gan" ("g.n" "a")) ("kan" ("k.n" "a")) ("han" ("h.n" "a")) ("zhan" ("zh.n" "a")) ("chan" ("ch.n" "a")) ("shan" ("sh.n" "a")) ("ran" ("r.n" "a")) ("zan" ("z.n" "a")) ("can" ("c.n" "a")) ("san" ("s.n" "a")) ("en" (".n" "e")) ("ben" ("b.n" "e")) ("pen" ("p.n" "e")) ("men" ("m.n" "e")) ("fen" ("f.n" "e")) ("nen" ("n.n" "e")) ("gen" ("g.n" "e")) ("ken" ("k.n" "e")) ("hen" ("h.n" "e")) ("zhen" ("zh.n" "e")) ("chen" ("ch.n" "e")) ("shen" ("sh.n" "e")) ("ren" ("r.n" "e")) ("zen" ("z.n" "e")) ("cen" ("c.n" "e")) ("sen" ("s.n" "e")) ("ang" (".ng" "a")) ("bang" ("b.ng" "a")) ("pang" ("p.ng" "a")) ("mang" ("m.ng" "a")) ("fang" ("f.ng" "a")) ("dang" ("d.ng" "a")) ("tang" ("t.ng" "a")) ("nang" ("n.ng" "a")) ("lang" ("l.ng" "a")) ("gang" ("g.ng" "a")) ("kang" ("k.ng" "a")) ("hang" ("h.ng" "a")) ("zhang" ("zh.ng" "a")) ("chang" ("ch.ng" "a")) ("shang" ("sh.ng" "a")) ("rang" ("r.ng" "a")) ("zang" ("z.ng" "a")) ("cang" ("c.ng" "a")) ("sang" ("s.ng" "a")) ("eng" (".ng" "e")) ("beng" ("b.ng" "e")) ("peng" ("p.ng" "e")) ("meng" ("m.ng" "e")) ("feng" ("f.ng" "e")) ("deng" ("d.ng" "e")) ("teng" ("t.ng" "e")) ("neng" ("n.ng" "e")) ("leng" ("l.ng" "e")) ("geng" ("g.ng" "e")) ("keng" ("k.ng" "e")) ("heng" ("h.ng" "e")) ("zheng" ("zh.ng" "e")) ("cheng" ("ch.ng" "e")) ("sheng" ("sh.ng" "e")) ("reng" ("r.ng" "e")) ("zeng" ("z.ng" "e")) ("ceng" ("c.ng" "e")) ("seng" ("s.ng" "e")) ("er" (".r" "e")) ("yi" ("y." "i")) ("bi" ("b." "i")) ("pi" ("p." "i")) ("mi" ("m." "i")) ("di" ("d." "i")) ("ti" ("t." "i")) ("ni" ("n." "i")) ("li" ("l." "i")) ("ji" ("j." "i")) ("qi" ("q." "i")) ("xi" ("x." "i")) ("ya" ("y." "a")) ("lia" ("li." "a")) ("jia" ("ji." "a")) ("qia" ("qi." "a")) ("xia" ("xi." "a")) ("yo" ("y." "o")) ("ye" ("y." "e")) ("bie" ("bi." "e")) ("pie" ("pi." "e")) ("mie" ("mi." "e")) ("die" ("di." "e")) ("tie" ("ti." "e")) ("nie" ("ni." "e")) ("lie" ("li." "e")) ("jie" ("ji." "e")) ("qie" ("qi." "e")) ("xie" ("xi." "e")) ("yai" ("y.i" "a")) ("yao" ("y.o" "a")) ("biao" ("bi.o" "a")) ("piao" ("pi.o" "a")) ("miao" ("mi.o" "a")) ("diao" ("di.o" "a")) ("tiao" ("ti.o" "a")) ("niao" ("ni.o" "a")) ("liao" ("li.o" "a")) ("jiao" ("ji.o" "a")) ("qiao" ("qi.o" "a")) ("xiao" ("xi.o" "a")) ("you" ("y.u" "o")) ("miu" ("mi." "u")) ("diu" ("di." "u")) ("niu" ("ni." "u")) ("liu" ("li." "u")) ("jiu" ("ji." "u")) ("qiu" ("qi." "u")) ("xiu" ("xi." "u")) ("yan" ("y.n" "a")) ("bian" ("bi.n" "a")) ("pian" ("pi.n" "a")) ("mian" ("mi.n" "a")) ("dian" ("di.n" "a")) ("tian" ("ti.n" "a")) ("nian" ("ni.n" "a")) ("lian" ("li.n" "a")) ("jian" ("ji.n" "a")) ("qian" ("qi.n" "a")) ("xian" ("xi.n" "a")) ("yin" ("y.n" "i")) ("bin" ("b.n" "i")) ("pin" ("p.n" "i")) ("min" ("m.n" "i")) ("nin" ("n.n" "i")) ("lin" ("l.n" "i")) ("jin" ("j.n" "i")) ("qin" ("q.n" "i")) ("xin" ("x.n" "i")) ("yang" ("y.ng" "a")) ("niang" ("ni.ng" "a")) ("liang" ("li.ng" "a")) ("jiang" ("ji.ng" "a")) ("qiang" ("qi.ng" "a")) ("xiang" ("xi.ng" "a")) ("ying" ("y.ng" "i")) ("bing" ("b.ng" "i")) ("ping" ("p.ng" "i")) ("ming" ("m.ng" "i")) ("ding" ("d.ng" "i")) ("ting" ("t.ng" "i")) ("ning" ("n.ng" "i")) ("ling" ("l.ng" "i")) ("jing" ("j.ng" "i")) ("qing" ("q.ng" "i")) ("xing" ("x.ng" "i")) ("wu" ("w." "u")) ("bu" ("b." "u")) ("pu" ("p." "u")) ("mu" ("m." "u")) ("fu" ("f." "u")) ("du" ("d." "u")) ("tu" ("t." "u")) ("nu" ("n." "u")) ("lu" ("l." "u")) ("gu" ("g." "u")) ("ku" ("k." "u")) ("hu" ("h." "u")) ("zhu" ("zh." "u")) ("chu" ("ch." "u")) ("shu" ("sh." "u")) ("ru" ("r." "u")) ("zu" ("z." "u")) ("cu" ("c." "u")) ("su" ("s." "u")) ("wa" ("w." "a")) ("gua" ("gu." "a")) ("kua" ("ku." "a")) ("hua" ("hu." "a")) ("zhua" ("zhu." "a")) ("chua" ("chu." "a")) ("shua" ("shu." "a")) ("wo" ("w." "o")) ("duo" ("du." "o")) ("tuo" ("tu." "o")) ("nuo" ("nu." "o")) ("luo" ("lu." "o")) ("guo" ("gu." "o")) ("kuo" ("ku." "o")) ("huo" ("hu." "o")) ("zhuo" ("zhu." "o")) ("chuo" ("chu." "o")) ("shuo" ("shu." "o")) ("ruo" ("ru." "o")) ("zuo" ("zu." "o")) ("cuo" ("cu." "o")) ("suo" ("su." "o")) ("wai" ("w.i" "a")) ("guai" ("gu.i" "a")) ("kuai" ("ku.i" "a")) ("huai" ("hu.i" "a")) ("zhuai" ("zhu.i" "a")) ("chuai" ("chu.i" "a")) ("shuai" ("shu.i" "a")) ("wei" ("w.i" "e")) ("dui" ("du." "i")) ("tui" ("tu." "i")) ("gui" ("gu." "i")) ("kui" ("ku." "i")) ("hui" ("hu." "i")) ("zhui" ("zhu." "i")) ("chui" ("chu." "i")) ("shui" ("shu." "i")) ("rui" ("ru." "i")) ("zui" ("zu." "i")) ("cui" ("cu." "i")) ("sui" ("su." "i")) ("wan" ("w.n" "a")) ("duan" ("du.n" "a")) ("tuan" ("tu.n" "a")) ("nuan" ("nu.n" "a")) ("luan" ("lu.n" "a")) ("guan" ("gu.n" "a")) ("kuan" ("ku.n" "a")) ("huan" ("hu.n" "a")) ("zhuan" ("zhu.n" "a")) ("chuan" ("chu.n" "a")) ("shuan" ("shu.n" "a")) ("ruan" ("ru.n" "a")) ("zuan" ("zu.n" "a")) ("cuan" ("cu.n" "a")) ("suan" ("su.n" "a")) ("wen" ("w.n" "e")) ("dun" ("d.n" "u")) ("tun" ("t.n" "u")) ("lun" ("l.n" "u")) ("gun" ("g.n" "u")) ("kun" ("k.n" "u")) ("hun" ("h.n" "u")) ("zhun" ("zh.n" "u")) ("chun" ("ch.n" "u")) ("shun" ("sh.n" "u")) ("run" ("r.n" "u")) ("zun" ("z.n" "u")) ("cun" ("c.n" "u")) ("sun" ("s.n" "u")) ("wang" ("w.ng" "a")) ("guang" ("gu.ng" "a")) ("kuang" ("ku.ng" "a")) ("huang" ("hu.ng" "a")) ("zhuang" ("zhu.ng" "a")) ("chuang" ("chu.ng" "a")) ("shuang" ("shu.ng" "a")) ("weng" ("w.ng" "e")) ("dong" ("d.ng" "o")) ("tong" ("t.ng" "o")) ("nong" ("n.ng" "o")) ("long" ("l.ng" "o")) ("gong" ("g.ng" "o")) ("kong" ("k.ng" "o")) ("hong" ("h.ng" "o")) ("zhong" ("zh.ng" "o")) ("chong" ("ch.ng" "o")) ("rong" ("r.ng" "o")) ("zong" ("z.ng" "o")) ("cong" ("c.ng" "o")) ("song" ("s.ng" "o")) ("yu" ("y." "u")) ("nu:" ("n." "v")) ("lu:" ("l." "v")) ("ju" ("j." "u")) ("qu" ("q." "u")) ("xu" ("x." "u")) ("yue" ("yu." "e")) ("nu:e" ("nü." "e")) ("lu:e" ("lü." "e")) ("jue" ("ju." "e")) ("que" ("qu." "e")) ("xue" ("xu." "e")) ("yuan" ("yu.n" "a")) ("lu:an" ("lü.n" "a")) ("juan" ("ju.n" "a")) ("quan" ("qu.n" "a")) ("xuan" ("xu.n" "a")) ("yun" ("y.n" "u")) ("lu:n" ("l.n" "v")) ("jun" ("j.n" "u")) ("qun" ("q.n" "u")) ("xun" ("x.n" "u")) ("yong" ("y.ng" "o")) ("jiong" ("ji.ng" "o")) ("qiong" ("qi.ng" "o")) ("xiong" ("xi.ng" "o")))) (defun split-string-in-two (string separator) "Splits STRING into two parts at the first match of SEPARATOR. SEPARATOR is not included in either half, but is required to be present in the original. Both halves can be an empty string." (let ((start (string-match separator string))) (list (substring string 0 start) (substring string (1+ start))))) (defun htmlize-pinyin (str) (when (not (member (substring str -1) '("1" "2" "3" "4" "5"))) (setq str (concat str "5"))) (let ((temp (assoc (substring str 0 -1) htmlize-syllable-table))) (if temp (let ((splitted (split-string-in-two (caadr temp) "\\.")) (intonation (string-to-number (substring str -1)))) (concat (car splitted) (nth intonation (assoc (cadadr temp) htmlize-vowel-table)) (cadr splitted))) (substring str 0 -1)))) (defun htmlize-pinyin-sequence (str) (let ((lst (mapcar 'htmlize-pinyin (split-string str " ")))) (substring (apply 'concat (mapcar #'(lambda (x) (concat x " ")) lst)) 0 -1))) (defun htmlize-next-line () "Takes the next line and prints its HTML representation." (if (re-search-forward "^\\([^ ]*\\) *\\[\\([^]]*\\)\\] */\\(.*\\)/ *$" (point-max) t) (let ((hanzi (match-string 1)) (definition (match-string 3)) (pinyin (htmlize-pinyin-sequence (match-string 2)))) (princ (concat "<div class=\"hanzi\">" hanzi "</div>\n" "<div class=\"pinyin\">" pinyin "</div>\n" "<div class=\"definition\">" definition "</div>\n" "<br>\n")) t) nil)) (defun htmlize-dictionary-buffer () "Generates a CSS-driven HTML file from the current buffer. The buffer is assumed to be in UTF-8 encoding and in CEDICT-format." (interactive) (with-output-to-temp-buffer (concat (buffer-name (current-buffer)) ".html") (princ (concat "<html>\n" "<head>\n" "<title>" (buffer-name (current-buffer)) "</title>\n" "<link rel=\"stylesheet\" type=\"text/css\"" " href=\"words.css\" media=\"all\" />\n" "</head>\n" "<meta http-equiv=\"Content-Type\"" " content=\"text/html;charset=utf-8\" />\n" "<body>\n")) (save-excursion (goto-char (point-min)) (while (htmlize-next-line))) (princ "</body>\n</html>")))