naoya_t:parser.peg利用メモ

naoya_t:parser.peg利用メモ

検索文字列のパーサをPEGで書いた例

(green|black|apple) tea とか drink -coffee のような検索文字列をパースする

;;; query-string-parser.scm
(use parser.peg)

(unless (global-variable-bound? (find-module 'user) 'peg-parse-string)
  (define peg-parse-string parse-string)
  (define ($try p) p))

;;
;; query string parser
;;
(define (combine ca cd)
  (if (null? cd) ca
      (let loop ((rest cd) (op 'and) (result (list ca)))
        (if (null? rest)
            (cons op (reverse! result))
            (let1 top (car rest)
              (if (pair? top)
                  (let ([curr-op (car top)]
                        [arg (cadr top)])
                    (cond [(eq? 'not curr-op)
                           (loop (cdr rest) op (cons (list 'not arg) result))]
                          [(pair? arg)
                           (if (eq? op (car arg))
                               (loop (cdr rest) op (append (reverse! (cdr arg)) result))
                               (loop (cdr rest) curr-op (cons arg result)))]
                          [else
                           (loop (cdr rest) curr-op (cons arg result))]
                          ))
                  (loop (cdr rest) op (cons top result))
                  ))))))

(define query-string-parser
  (letrec ([%slash  ($char #\/)]
           [%dquote ($char #\")]

         ;; or --> { "|" | "||" }
         [%or ($do [  ($many ($char #\|) 1 2)]
                   ($return 'or))]

         ;; and --> { "&" | "&&" }
         [%and ($do [  ($many ($char #\&) 1 2)]
                    ($return 'and))]

         ;; escaped --> \ .
         [escaped ($do [   ($char #\\)]
                       [ch anychar]
                       ($return ch))]

         ;; escaped-slash --> \ /
         [escaped-slash ($do [   ($char #\\)]
                             [sl ($char #\/)]
                             ($return sl))]
         [escaped-dquote ($do [   ($char #\\)]
                              [dq ($char #\")]
                              ($return dq))]

         ;; regexp --> / { escaped-slash | escaped | [^/] }* /
         [regexp ($do [  %slash]
                      [x ($many ($or escaped-slash escaped ($none-of #[/])) 1)]
                      [  %slash]
                      ($return (string->regexp (list->string x))))]

         ;; quoted --> " [^"]* "
         [quoted ($do [  %dquote]
                      [x ($many ($or escaped-dquote ($none-of #[\"])) 1)]
                      [  %dquote]
                      ($return (list->string x)))]

         ;; word --> [^ &()|]+
         [word ($do [x  ($many ($none-of #[ \&\(\)\|]) 1)]
                    ($return (list->string x)))]

         ;; tr --> tr:
         [tr ($do [x  ($try ($seq ($char #\t) ($char #\r) ($char #\:)))]
                  ($return 'trans))]

         ;; field --> [-+]? { regexp | quoted | word }
         [field ($do [pm     ($optional ($one-of #[-+]))]
                     [target ($optional tr)]
                     [one    ($or regexp quoted word)]
                     ($return (let1 body (if target (list 'trans one) one)
                                (if (and pm (eq? pm #\-)) (list 'not body) body)))
                     )]


         ;; <space> --> " "
         [%space ($do [   ($char #\Space)]
                      ($return 'spc))]
         [%space* ($many %space 0)]
         [%space+ ($do [   ($many %space 1)]
                      ($return 'spc))]

         ;; neg --> [!¬]
         [%neg ($do [  ($one-of #[-!¬])]
                    [  %space*]
                    ($return 'not))]

         ;; lp --> <space>* ( <space>*
         [%lp ($char #\()]
         ;; rp --> <space>* ) <space>*
         [%rp ($char #\))]

         [operator ($or %and %or)]

         ;; neg? expr
         [one ($do [neg        ($optional %neg)]
                   [body-expr  ($or paren field)]
                   ($return (if neg (list 'not body-expr)
                                body-expr)))]
         
         ;; query-string --> <space>* { one }[<space>+]+ <space>*
         ;; __ one { __ op? __ one }* __
         [sequence ($do [ca  one]
                        [cd  ($many ($try ($do (   %space*)
                                               (op ($optional operator))
                                               (   %space*)
                                               (f  one)
                                               ($return (if op (list op f) f))
                                               ))
                                    0)]
                        ($return (combine ca cd)))]

         [paren ($do [    %lp]
                     [    %space*]
                     [seq sequence]
                     [    %space*]
                     [    %rp]
                     ($return seq))]

         [query-string ($do [    %space*]
                            [seq sequence]
                            [    %space*]
                            ($return seq))]
         )
    query-string))

(define (zspaces->hspace str) (regexp-replace-all #/ +/ str " "))

(define (parse-query-string qs)
  (guard (e (else qs))
    (peg-parse-string query-string-parser (zspaces->hspace qs))))

テスト

(use gauche.test)

(require "./query-string-parser")
(test-start "query-string converter, with PEG parser")

(define-macro (test-parser* qs expected)
  `(test* ,(format #f "~s" qs) ,expected (parse-query-string ,qs)))

(test-section "normal")
(test-parser* "apple" "apple")
(test-parser* " apple" "apple")
(test-parser* "this is a pen" '(and "this" "is" "a" "pen"))
(test-parser* "this  is  a  pen" '(and "this" "is" "a" "pen"))
(test-parser* "これはりんごです" "これはりんごです")
(test-parser* "これは りんご です" '(and "これは" "りんご" "です"))
(test-parser* "これは りんご です" '(and "これは" "りんご" "です"))
(test-parser* "  これは  りんご   です " '(and "これは" "りんご" "です"))
(test-parser* "this ¥"apple seed¥" is edible" '(and "this" "apple seed" "is" "edible"))
(test-parser* "¥"That means ...¥" ¥"I'm single again!¥"" '(and "That means ..." "I'm single again!"))
(test-parser* "/^abc/" #/^abc/)
(test-parser* "/^abc defg/" #/^abc defg/)
(test-parser* "/^abc/ /defg$/" '(and #/^abc/ #/defg$/))
(test-parser* "/^ab¥¥/c/" #/^ab¥/c/)

(test-parser* "+this -is a pen" '(and "this" (not "is") "a" "pen"))

(test-parser* "!xyz" '(not "xyz"))
(test-parser* "!¥"abc xyz¥"" '(not "abc xyz"))
(test-parser* "!/^[a-z]/" '(not #/^[a-z]/))
(test-parser* "!(abc|def)" '(not (or "abc" "def")))

(test-parser* "(abc|def)" '(or "abc" "def"))
(test-parser* "(abc|def|ghi)" '(or "abc" "def" "ghi"))
(test-parser* "( abc | def )" '(or "abc" "def"))
(test-parser* "( abc | /def/ )" '(or "abc" #/def/))
(test-parser* "( /abc/ | def )" '(or #/abc/ "def"))
(test-parser* "( abc | /def/ | ghi )" '(or "abc" #/def/ "ghi"))

(test-parser* "(abc&def)" '(and "abc" "def"))
(test-parser* "(abc&def&ghi)" '(and "abc" "def" "ghi"))
(test-parser* "( abc & def )" '(and "abc" "def"))
(test-parser* "( abc & /def/ )" '(and "abc" #/def/))
(test-parser* "( /abc/ & def )" '(and #/abc/ "def"))
(test-parser* "( abc & /def/ & ghi )" '(and "abc" #/def/ "ghi"))

;; 括弧なし
(test-parser* "abc|def" '(or "abc" "def"))
(test-parser* "abc|def|ghi" '(or "abc" "def" "ghi"))
(test-parser* "abc | def" '(or "abc" "def"))
(test-parser* "abc | /def/" '(or "abc" #/def/))
(test-parser* "/abc/ | def" '(or #/abc/ "def"))
(test-parser* "abc | /def/ | ghi" '(or "abc" #/def/ "ghi"))
(test-parser* "abc&def" '(and "abc" "def"))
(test-parser* "abc&def&ghi" '(and "abc" "def" "ghi"))
(test-parser* "abc & def" '(and "abc" "def"))
(test-parser* "abc & /def/" '(and "abc" #/def/))
(test-parser* "/abc/ & def" '(and #/abc/ "def"))
(test-parser* "abc & /def/ & ghi" '(and "abc" #/def/ "ghi"))

(test-parser* "abc || def" '(or "abc" "def"))
(test-parser* "abc && def" '(and "abc" "def"))

(test-parser* "(( abc | def ) & ghi & (jkl & mno))" '(and (or "abc" "def") "ghi" "jkl" "mno"))
(test-parser* "( abc | def ) & ghi & (jkl & mno)" '(and (or "abc" "def") "ghi" "jkl" "mno"))
(test-parser* "(( abc | def ) & ghi & (jkl | mno))" '(and (or "abc" "def") "ghi" (or "jkl" "mno")))
(test-parser* "( abc | def ) & ghi & (jkl | mno)" '(and (or "abc" "def") "ghi" (or "jkl" "mno")))

;;;;;
(test-parser* "abc && tr:def" '(and "abc" (trans "def")))
(test-parser* "tr:abc && def" '(and (trans "abc") "def"))
(test-parser* "abc | tr:def" '(or "abc" (trans "def")))
(test-parser* "tr:abc | def" '(or (trans "abc") "def"))

;;;;
(test-section "regexp")
(test-parser* "/^[a-z]/" #/^[a-z]/)
(test-parser* "/^[0-9]/" #/^[0-9]/)
(test-parser* "/^a.....g/" #/^a.....g/)
(test-parser* "/efg$/" #/efg$/)
(test-parser* "/...def./" #/...def./)

;;;;
(test-section "not supported")
(test-parser* "/zzz" "/zzz")
(test-parser* "/^zzz" "/^zzz")
(test-parser* "zzz$/" "zzz$/")
(test-parser* "¥"aa bb cc" "¥"aa bb cc")
(test-parser* "a¥"a b/b c/c" '(and "a¥"a" "b/b" "c/c"))

(test-end)

この query-string-parser を使って

(define (make-criteria-proc qs)
  (let1 criteria (parse-query-string qs)
    (lambda (haystack)
      (define (match-criteria expr)
        (cond [(pair? expr)
               (case (car expr)
                 [(and) (every identity (map match-criteria (cdr expr)))]
                 [(or) (any identity (map match-criteria (cdr expr)))]
                 [(not) (not (match-criteria (cadr expr)))]
                 [else (every identity (map match-criteria expr))]
                 )]
              [(string? expr) (string-scan haystack expr)]
              [(regexp? expr) (expr haystack)]
              [else #f]))
      (match-criteria criteria)
      )))

のように、検索文字列をフィルタ関数にコンパイルしています。(実際にはもう少し色々な条件が入りますが説明のため簡略化しました)


Last modified : 2008/08/23 10:09:43 UTC