読者です 読者をやめる 読者になる 読者になる

レガシーコード生産ガイド

私に教えられることなら

Common LispとClojureでパーサコンビネータを作る

clojure CommonLisp

HaskellのParsecがすごいらしい、ということは聞いていたんですが、JavaScriptでパーサコンビネータのコンセプトを理解する(「正規表現だけに頼ってはいけない」の続き) - id:anatooのブログという記事のおかげでようやく使い方と作り方がわかったので、作ってS式をパースしてみました。

ちょっと分かり辛かったのがLazyで、元々関数だからlazyに評価する必要は無いのでは?と思っていたんですが、落ち着いて考えると、パーサを変数に入れる時点でその変数を評価しても未定義でした。

user=> (def yo [1 2 3 yo])
#'user/yo
user=> yo
[1 2 3 #<Unbound Unbound: #'user/yo>]

clojureなら遅延シーケンスを使えばいいかな……と考えていましたが、遅延シーケンス作る時点で評価されてしまいます。引数の評価は正格なので[& parsers]的な定義もできません。

user=> (def hey (take 4 [1 2 3 hey]))
#'user/hey
user=> hey
(1 2 3 #<Unbound Unbound: #'user/hey>)

user=> (+ (do (println "hoge") 3) 4)
hoge
7

今回はlazyマクロを書きますが、引数の評価も遅延なHaskellではlazy使わずに書けるみたいです。

感想

パーサコンビネータだと、読み取り・構造化に加えてある程度の加工もできるということで、S式を読み込んでそのままCommon Lisp/Clojure内でのリスト/シーケンスの表現に直しました。

しかし、コードを見てもらえればわかるように、文法の定義とリスト/シーケンス処理が混ざって分かりづらくなってしまったので、構造の処理は後回しにして、トークンへのタグ付け程度に抑えておいた方がいいかもしれません。

先にCommon Lisp版を書いたのですが、新しい状態を生成して渡していくやり方だと、Clojureのスレッドマクロや(foo 状態 キー 関数や引数)という書き方が便利に感じたのでそのスタイルで書きました。Common Lispらしい書き方も学ぼうといくつかgithubで見てみたんですが、大きくてよくわかりませんでした……

何度も同じ事を書くと洗練されていくだろうし(Common Lisp版の迷いがあるコードもそのままにしました)、別の言語で書くことで理解も深まるかなと思ったんですが、似てるけど細部が違うものを同時に学ぶと干渉が起こりやすいということを忘れていました。特にClojureの方でdefun連発したり、Lisp-1なのを忘れていてstrをモロに上書きして悩んだりしました。似てるものは暫く片方だけ触ったほうが良さそうです。

Common Lisp

使用例

CL-USER> (funcall s-exp " (list 1 2 (list (list 1) 3 4) 5)" 0)

(T ("list" "1" "2" ("list" ("list" "1") "3" "4") "5") 33)

コード

(ql:quickload 'alexandria)

;; utilities

(eval-when (:compile-toplevel :load-toplevel :execute)
  (defun insert-second (xs x)
    (cons (car xs) (cons x (cdr xs))))

  (defun thread-second (fst rests)
    (reduce
     (lambda (inner outer)
       (insert-second outer inner))
     rests :initial-value fst)))

(defmacro -> (fst &rest rests)
  (thread-second fst rests))


;; (success? result next-pos)

(defun success-p (parsed)
  (first parsed))

(defun result (parsed)
  (second parsed))

(defun next-pos (parsed)
  (third parsed))

(defun update-result (parsed f &rest args)
  (parsed (success-p parsed)
          (apply f (result parsed) args)
          (next-pos parsed)))

(defun update-pos (parsed f &rest args)
  (parsed (success-p parsed)
          (result parsed)
          (apply f (next-pos parsed) args)))

(defun parsed (success? result next-pos)
  (list success? result next-pos))


;;; token
;;;-----------------------------------------------------------------------------
(defun substr (str pos len)
  (subseq str pos (+ pos len)))

(defun over-position-p (str pos token-length)
  (> (+ pos token-length) (length str)))

(defun parse-hoge (str pos)
  (if (> (+ pos (length "hoge")) (length str))
      (parsed nil "" pos)
      (if (equal "hoge" (substr str pos (length "hoge")))
          (parsed t "hoge" (+ pos (length "hoge")))
          (parsed nil "" pos))))

(defun token (token)
  (let ((token-len (length token)))
    (lambda (str pos)
      (cond
        ((over-position-p str pos token-len)
         (parsed nil "" pos))
        ((equal token (substr str pos token-len))
         (parsed t token (+ pos token-len)))
        (t (parsed nil "" pos))))))


;;; many
;;; -----------------------------------------------------------------------------
(defun consed (xs x) (cons x xs))

(defun many-sub (str parser parsed)
  (let ((next (funcall parser str (next-pos parsed))))
    (if (success-p next)
        (many-sub str parser (update-result next #'cons (result parsed)))
        parsed)))

(defun many (parser)
  (lambda (str pos)
    (-> (many-sub str parser (parsed t () pos))
        (update-result #'reverse))))

;;; choice
;;; ----------------------------------------------------------------------------
(defun choice (&rest parsers)
  (lambda (str pos)
    (labels ((choice-1 (parsers)
               (if (null parsers) (parsed nil () pos)
                   (let ((next (funcall (car parsers) str pos)))
                     (if (success-p next) next
                         (choice-1 (cdr parsers)))))))
      (choice-1 parsers))))


;;; seq
;;; -----------------------------------------------------------------------------
(defun seq (&rest parsers)
  (lambda (str pos)
    (labels ((seq-1 (parsers parsed)
               (if (null parsers) parsed
                   (let ((next (funcall (car parsers) str (next-pos parsed))))
                     (if (success-p next)
                         (seq-1 (cdr parsers) (update-result next #'cons (result parsed)))
                         (parsed nil nil pos))))))
      (update-result (seq-1 parsers (parsed t () pos))
                     #'reverse))))

;;; option
;;; -----------------------------------------------------------------------------
(defun option (parser)
  (lambda (str pos)
    (let ((next (funcall parser str pos)))
      (if (success-p next) next (parsed t nil pos)))))

;;; lazy
;;; ----------------------------------------------------------------------------
(eval-when (:compile-toplevel :load-toplevel :execute)
  (defun lazy-parse (callback)
    (let (parser)
      (lambda (str pos)
        (unless parser (setf parser (funcall callback)))
        (funcall parser str pos)))))

(defmacro lazy (&body body)
  `(lazy-parse (lambda () ,@body)))

(defvar test-many (option (seq (token "hoge") (lazy test-many))))
;; (funcall test-many "hogehogehogehoge" 0)

;;; result updater
;;; -----------------------------------------------------------------------------

;; append
(defun append-result (parsed)
  (update-result parsed #'append))

(defun append-parser (parser)
  (lambda (str pos)
    (append-result (funcall parser str pos))))

;; flatten
(defun flatten-result (parsed)
  (update-result parsed #'alexandria:flatten))

(defun flatten-parser (parser)
  (lambda (str pos)
    (flatten-result (funcall parser str pos))))

;; flatten-once
(defun flatten-once (xs)
  (reduce
   (lambda (acc x) (append acc (if (listp x) x (list x))))
   xs :initial-value ()))

(defun flatten-once-result (parsed)
  (update-result parsed #'flatten-once))

(defun flatten-once-parser (parser)
  (lambda (str pos)
    (flatten-once-result (funcall parser str pos))))


;; nil remover
(defun remove-nil (xs)
  (remove-if #'null xs))

(defun remove-nil-result (parsed)
  (update-result parsed #'remove-nil))

(defun nil-remover (parser)
  (lambda (str pos)
    (remove-nil-result (funcall parser str pos))))

;; mapcar
(defun mapcar-result (parsed f)
  (let ((r (mapcar f (result parsed))))
    (parsed (success-p parsed) r (next-pos parsed))))

;; concat
(defun str (&rest strs) (format nil "~{~A~}" strs))

(defun strs (strs) (apply #'str strs))

(defun concat-result (parsed)
  (-> (flatten-result parsed)
      (update-result #'strs)))

(defun concat-parser (parser)
  (lambda (str pos)
    (concat-result (funcall parser str pos))))

;;; map
;;; -----------------------------------------------------------------------------
(defun flatten-f (f parser)
  (lambda (str pos)
    (let ((next (funcall parser str pos)))
      (if (success-p next)
          (-> (flatten-result next)
              (update-result f))
          next))))

(defun flatten-map (f parser)
  (lambda (str pos)
    (let ((next (funcall parser str pos)))
      (if (success-p next)
          (-> (flatten-result next) (mapcar-result f))
          next))))

;;; combined
;;; -----------------------------------------------------------------------------
(defun chars (str)
  (let* ((chars  (map 'list #'string str))
         (tokens (mapcar #'token chars)))
    (apply #'choice tokens)))

(defun needless-token (str)
  (flatten-f (lambda (token) (declare (ignore token)) nil) (token str)))

;;; s-exp
;;; -----------------------------------------------------------------------------
(defvar minus (token "-"))
(defvar top-digit (chars "123456789"))
(defvar digit (chars "0123456789"))
(defvar numbers
  (lambda (str pos)
    (concat-result
     (funcall (seq (option minus) top-digit (many digit)) str pos))))

(defvar upper-case (chars "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
(defvar lower-case (chars "abcdefghijklmnopqrstuvwxyz"))
(defvar marks (chars "!#$%&'=-~^|\\`@{[}]*:+;<,>.?/_"))
(defvar characters (choice upper-case lower-case marks digit))
(defvar symbols
  (lambda (str pos)
    (concat-result
     (funcall (seq characters (many characters)) str pos))))

(defvar token-space (needless-token " "))
(defvar spaces (seq token-space (many token-space)))
(defvar sp? (option spaces))

(defvar l-paren (needless-token "("))
(defvar r-paren (needless-token ")"))

(defvar token-atom (choice numbers symbols))

(defvar s-exp
      (nil-remover
       (choice
        (concat-parser (seq sp? token-atom sp?))
        (flatten-once-parser
         (seq sp? l-paren sp?
              (many (lazy s-exp))
              sp? r-paren sp?)))))

Clojure

使用例

user=> (s-exp " (list 1 2 (list (list 1) 3 4) 5)" 0)
{:success? true, :result ("list" "1" "2" ("list" ("list" "1") "3" "4") "5"), :next-pos 33}

コード

;;; data
;;; ----------------------------------------------------------------------------
(defn parsed [success? result next-pos]
  {:success? success? :result result :next-pos next-pos})

(defn failed [pos] (parsed false () pos))

(defn conj-result [result next]
  (if (or (empty? (:result next)) (= (:result next) nil))
    result (conj result (:result next))))

;;; token
;;; ----------------------------------------------------------------------------
(defn substr [str pos len]
  (subs str pos (+ pos len)))

(defn over-position? [str pos len]
  (> (+ pos len) (count str)))

(defn token [tk]
  (let [len (count tk)]
    (fn [str pos]
      (cond
        (over-position? str pos len) (failed pos)
        (= tk (substr str pos len)) (parsed true tk (+ pos len))
        :else (failed pos)))))

;;; many
;;; ----------------------------------------------------------------------------
(defn many [parser]
  (fn [str pos]
    (loop [result [] pos pos next (parser str pos)]
      (if (:success? next)
        (recur (conj-result result next)
               (:next-pos next)
               (parser str (:next-pos next)))
        (parsed true result pos)))))

;;; choice
;;; ----------------------------------------------------------------------------
(defn choice [& parsers]
  (fn [str pos]
    (loop [parsers parsers]
      (if (empty? parsers) (parsed false [] pos)
          (let [next ((first parsers) str pos)]
            (if (:success? next) next
                (recur (rest parsers))))))))

;;; pseq
;;; ----------------------------------------------------------------------------
(defn pseq [& parsers]
  (fn [str pos]
    (loop [result [] next-pos pos parsers parsers]
      (if (empty? parsers) (parsed true result next-pos)
          (let [next ((first parsers) str next-pos)]
            (if (:success? next)
              (recur (conj-result result next)
                     (:next-pos next)
                     (rest parsers))
              (failed pos)))))))

;;; option
;;; ----------------------------------------------------------------------------
(defn option [parser]
  (fn [str pos]
    (let [next (parser str pos)]
      (if (:success? next) next (parsed true [] pos)))))

;;; lazy
;;; ----------------------------------------------------------------------------
(defmacro lazy [parser]
  `(fn [str# pos#] (~parser str# pos#)))

;; (def recur-many (option (pseq (token "hoge") (lazy recur-many))))
;; (recur-many "hogehogehogehoge" 0)
;; {:success? true, :result ["hoge" ["hoge" ["hoge" ["hoge" []]]]], :next-pos 16}

;;; map
;;; ----------------------------------------------------------------------------
(defn map-parser [f parser]
  (fn [str pos]
    (let [next (parser str pos)]
      (if (:success? next)
        (update-in next [:result] #(map f %))
        next))))

;;; result utilities
;;; ----------------------------------------------------------------------------
(defn updater [f]
  (fn [parser]
    (fn [str pos]
      (update-in (parser str pos) [:result] f))))

(def pflatten (updater flatten))

(def pconcat (updater concat))

(def pstr (updater #(apply str %)))

(defn pflatten-str [parser]
  (pstr (pflatten parser)))

(defn flatten-once [coll]
  (reduce
   (fn [acc x]
     (concat acc (if (vector? x) x [x])))
   [] coll ))

(defn pflatten-once [parser]
  (fn [str pos]
    (update-in (parser str pos) [:result] flatten-once)))

(defn unvector [[x]] x)
(def punvector (updater unvector))

;;; combined
;;; ----------------------------------------------------------------------------
(defn pchar [s] (apply choice (map token (clojure.string/split s #""))))

(defn needless-token [tk]
  (fn [str pos]
    (assoc ((token tk) str pos) :result nil)))


;;; s-exp
;;; ----------------------------------------------------------------------------
(def minus (token "-"))
(def top-digit (pchar "123456789"))
(def digit (pchar "0123456789"))
(def numbers (pseq (option minus) top-digit (many digit)))
(def s-number (pflatten-str numbers))

(def upper-case (pchar "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
(def lower-case (pchar "abcdefghijklmnopqrstuvwxyz"))
(def marks (pchar "!#$%&'=-~^|\\`@{[}]*:+;<,>.?/_"))
(def characters (choice upper-case lower-case marks digit))
(def sym (pseq characters (many characters)))
(def s-symbol (pflatten-str sym))

(def token-space (needless-token " "))
(def sp? (many token-space))

(def l-paren (needless-token "("))
(def r-paren (needless-token ")"))

(def s-atom (choice s-number s-symbol))

(def s-exp
  (choice (punvector (pseq sp? s-atom sp?))
          (pflatten-once
           (pseq sp? l-paren sp?
                 sp? (many (lazy s-exp)) sp?
                 sp? r-paren sp?))))
広告を非表示にする