(defconst +szotar-tones+ '(("~" . high) ("/" . rising) ("\\" . falling))) (defconst +szotar-diacritics+ '((high . "̄") (rising . "́") (falling . "̀"))) (defun tone-class (str) "Return NIL for no tone and for tones 3 and 6; return a list of \(consonants type first-vocal rest) otherwise. In the case of ng, g takes the role of the vocal." (let ((match (string-match (concat "^\\([^aeiou]*\\)\\([aeiou]\\)" "\\([a-z]*\\)\\([^a-z]\\)\\(.*\\)$") str))) (if match (let ((consonant (match-string 1 str)) (vocal (match-string 2 str)) (rest (match-string 3 str)) (type (match-string 4 str)) (post (match-string 5 str))) (let ((tone (cdr (assoc type +szotar-tones+)))) (when tone (list consonant tone vocal (concat rest post))))) (let ((match (string-match "^ng\\(h?\\)\\([~/\\]\\)\\(.*\\)$" str))) (when match (let ((rest (match-string 1 str)) (type (match-string 2 str)) (post (match-string 3 str))) (let ((tone (cdr (assoc type +szotar-tones+)))) (when tone (list "n" tone "g" (concat rest post)))))))))) (defun format-toned (consonants type first-vocal rest) (let ((diacritic (cdr (assoc type +szotar-diacritics+)))) (concat consonants first-vocal diacritic rest))) (defun parse-accents (str) "For every word, if it ends with ~, / or \\, mark it with high, rising and falling, respectively." (let ((words (split-string str))) (with-output-to-string (while words (let ((toned-word (tone-class (car words)))) (if toned-word (princ (apply #'format-toned toned-word)) (princ (car words))) (when (cdr words) (princ " "))) (setq words (cdr words)))))) (defun cantonese-htmlize (str) (interactive "syuht ping: ") (kill-new (parse-accents str))) (defun szotar-next-line () "Takes the next line and prints its HTML representation." (if (re-search-forward "^\\(.*\\) = \\(.*\\)$" (point-max) t) (let* ((definition (match-string 2)) (cantonese (parse-accents (match-string 1)))) (princ (concat "\n" "" cantonese "\n" "" definition "\n" "\n")) t) nil)) (defun szotar-htmlize () "Generates a CSS-driven HTML file from the current buffer. The buffer is assumed to be in UTF-8 encoding." (interactive) (with-output-to-temp-buffer (concat (buffer-name (current-buffer)) ".html") (princ (concat "\n" "\n" "" (buffer-name (current-buffer)) "\n" "\n" "\n" "\n" "\n" "\n")) (save-excursion (goto-char (point-min)) (while (szotar-next-line))) (princ "