Gauche:whitespace

Gauche:whitespace

whitespace


空白文字(スペース、タブ、改行)だけで書くプログラミング言語。 まぁ遊びってことで作りました。 インタプリタですが、-aオプションでVMのマシン語命令列を出力します。 それをそのままwsdisasmに食わせれば、ディスアセンブルしてwhitespaceのコードに戻します。 また、-dオプションでデバッグモードで起動するので、VM命令列のリスト、 スタック、コールスタック、ヒープの確認と ブレークポイントの行指定での設定と一括解除のみができる。 ステップ実行も可能です。 レジスタの内容を変更することはできませんが、ちょっとハックすれば可能。

今回は副作用を使わずに実装してます。ってところだけがこだわったところかな。 util.matchばんざーいって感じです。cut-sea:2006/05/08 00:16:34 PDT

使ってみて、wsの動作で以下の3点が気になったので報告します。 いわもとこういち:2009/02/24 05:03:52 PST

ws(インタプリタ、デバッガ、VM命令列ダンプ)

":"; exec gosh $0 "$@"
;; -*- coding: euc-jp; mode: scheme -*-
;; whitespace interpreter
;;
(use srfi-1)
(use file.util)
(use util.list)
(use util.match)
(use gauche.parseopt)
(use gauche.parameter)

(define (usage)
  (format #t "Usage: ws [-a|asm] [-d|debug] [-h|help] <script>~%")
  (exit 0))

;; for parser
;;
(define (parseNum n c)
  (define (makenum ns)
    (match ns
       ((sign . args)
        (* (expt -1 sign)
           (fold (lambda (c r)
                   (+ c (* r 2))) 0 args)))))
  (cond ((null? c) (values (makenum (reverse n)) c))
        (else (match c
                 ((#\lf . args) (values (makenum (reverse n)) args))
                 ((#\sp . args) (parseNum (cons 0 n) args))
                 ((#\tab . args) (parseNum (cons 1 n) args))))))

(define (parseStr s c)
  (define (bits->digits bits)
    (fold (lambda (c r)
            (+ c (* r 2))) 0 bits))
  (define (makestr ss)
    (apply string
           (reverse
            (fold (lambda (d r)
                    (cons (integer->char (bits->digits d)) r)) '() (slices ss 8)))))
  (cond ((null? c) (values (makestr (reverse s)) c))
        (else (match c
                 ((#\lf . args) (values (makestr (reverse s)) args))
                 ((#\sp . args) (parseStr (cons 0 s) args))
                 ((#\tab . args) (parseStr (cons 1 s) args))))))

(define (parse i c)
  (cond ((null? c) (reverse i))
        (else (match c
                ((#\sp #\sp . args) (receive (num rest) (parseNum '() args)
                                      (parse (cons (%push num) i) rest)))
                ((#\sp #\lf #\sp . args) (parse (cons (%dup) i) args))
                ((#\sp #\tab #\sp . args) (receive (num rest) (parseNum '() args)
                                            (parse (cons (%ref num) i) rest)))
                ((#\sp #\tab #\lf . args) (receive (num rest) (parseNum '() args)
                                            (parse (cons (%slide num) i) rest)))
                ((#\sp #\lf #\tab . args) (parse (cons (%swap) i) args))
                ((#\sp #\lf #\lf . args) (parse (cons (%discard) i) args))
                
                ((#\tab #\sp #\sp #\sp . args) (parse (cons (%math +) i) args))
                ((#\tab #\sp #\sp #\tab . args) (parse (cons (%math -) i) args))
                ((#\tab #\sp #\sp #\lf . args) (parse (cons (%math *) i) args))
                ((#\tab #\sp #\tab #\sp . args) (parse (cons (%math /) i) args))
                ((#\tab #\sp #\tab #\tab . args) (parse (cons (%math modulo) i) args))
                
                ((#\tab #\tab #\sp . args) (parse (cons (%store) i) args))
                ((#\tab #\tab #\tab . args) (parse (cons (%retrieve) i) args))
                
                ((#\lf #\sp #\sp . args) (receive (str rest) (parseStr '() args)
                                           (parse (cons (%label str) i) rest)))
                ((#\lf #\sp #\tab . args) (receive (str rest) (parseStr '() args)
                                            (parse (cons (%call str) i) rest)))
                ((#\lf #\sp #\lf . args) (receive (str rest) (parseStr '() args)
                                           (parse (cons (%jump str) i) rest)))
                ((#\lf #\tab #\sp . args) (receive (str rest) (parseStr '() args)
                                            (parse (cons (%if zero? str) i) rest)))
                ((#\lf #\tab #\tab . args) (receive (str rest) (parseStr '() args)
                                             (parse (cons (%if negative? str) i) rest)))
                
                ((#\lf #\tab #\lf . args) (parse (cons (%return) i) args))
                ((#\lf #\lf #\lf . args) (parse (cons (%end) i) args))
                
                ((#\tab #\lf #\sp #\sp . args) (parse (cons (%outputchar) i) args))
                ((#\tab #\lf #\sp #\tab . args) (parse (cons (%outputnum) i) args))
                ((#\tab #\lf #\tab #\sp . args) (parse (cons (%readchar) i) args))
                ((#\tab #\lf #\tab #\tab . args) (parse (cons (%readnum) i) args))
                
                ((ch . args) (parse i args))))))

(define-macro (define-ws-instr expr body . rests)
  (let ((instr (car expr))
        (ops (cdr expr)))
    `(define ,expr
       (lambda args
         (match args
            ((prog stack cs heap pc br bp) ,body ,@rests)
            ((cmd) `(,',instr ,@,(cons 'list ops))))))))

;; definition of WhiteSpace VM instructions
;;
(define-ws-instr (%push num)
  (vm prog (cons num stack) cs heap pc br bp))
(define-ws-instr (%dup)
  (vm prog (cons (car stack) stack) cs heap pc br bp))
(define-ws-instr (%ref num)
  (vm prog (cons (list-ref stack num) stack) cs heap pc br bp))
(define-ws-instr (%slide num)
  (vm prog (cons (car stack) (drop stack num)) cs heap pc br bp))
(define-ws-instr (%swap)
  (vm prog (cons (cadr stack) (cons (car stack) (cddr stack))) cs heap pc br bp))
(define-ws-instr (%discard)
  (vm prog (cdr stack) cs heap pc br bp))
(define-ws-instr (%math op)
  (vm prog (cons (op (cadr stack) (car stack)) (cddr stack)) cs heap pc br bp))
(define-ws-instr (%outputchar)
  (display (integer->char (car stack))) (flush) (vm prog (cdr stack) cs heap pc br bp))
(define-ws-instr (%outputnum)
  (display (car stack)) (flush) (vm prog (cdr stack) cs heap pc br bp))
(define-ws-instr (%readchar)
  (vm prog (cdr stack) cs (store (char->integer (read-char)) (car stack) heap) pc br bp))
(define-ws-instr (%readnum)
  (vm prog (cdr stack) cs (store (x->integer (read-line)) (car stack) heap) pc br bp))
(define-ws-instr (%label name)
  (vm prog stack cs heap pc br bp))
(define-ws-instr (%call name)
  (vm prog stack (cons pc cs) heap (findlabel name prog) br bp))
(define-ws-instr (%jump name)
  (vm prog stack cs heap (findlabel name prog) br bp))
(define-ws-instr (%if pred? name)
  (if (pred? (car stack))
      (vm prog (cdr stack) cs heap (findlabel name prog) br bp)
      (vm prog (cdr stack) cs heap pc br bp)))
(define-ws-instr (%return)
  (vm prog stack (cdr cs) heap (car cs) br bp))
(define-ws-instr (%store)
  (vm prog (cddr stack) cs (store (car stack) (cadr stack) heap) pc br bp))
(define-ws-instr (%retrieve)
  (vm prog (cons (retrieve (car stack) heap) (cdr stack)) cs heap pc br bp))
(define-ws-instr (%end)
  (vm prog stack cs heap '() br bp))

(define (%label-of inst)
  (match (inst 'label)
     (('%label name) name)
     (else #f)))

(define (retrieve n heap)
  (list-ref heap n))

(define (store . args)
  (match args
     ((x 0 '()) (cons x '()))
     ((x n '()) (cons 0 (store x (- n 1) '())))
     ((x 0 (h . hs)) (cons x hs))
     ((x n (h . hs)) (cons h (store x (- n 1) hs)))))

(define (findlabel name prog)
  (cond ((null? prog) (error "Undefined label"))
        ((equal? name (%label-of (car prog))) (cdr prog))
        (else (findlabel name (cdr prog)))))

;; special instruction for debugger
;;
(define (%break)
  (lambda (prog stack cs heap pc br bp)
    (define (bye)
      (read-line) (print "quit.") (exit 0))
    (define (code-list n)
      (define (write-line lno code)
        (format #t "~4d ~a" lno (if (eq? (car pc) code) #\+ #\sp))
        (write-code code))
      (read-line)
      (let* ((total (length prog))
             (rest (length pc))
             (now (- total rest))
             (from (max 0 (- now n)))
             (to (min total (+ now n))))
        (for-each write-line (iota 10 from) (drop* (take* prog to) from)))
      ((%break) prog stack cs heap pc br bp))
    (define (stack-dump)
      (read-line)
      (print stack)
      ((%break) prog stack cs heap pc br bp))
    (define (call-stack)
      (define (write-line lno code)
        (format #t "~4d " lno) (write-code code))
      (read-line)
      (let* ((total (length prog))
             (lnos (map (lambda (p) (- total (length p))) cs))
             (codes (map car cs)))
        (for-each write-line lnos codes))
      ((%break) prog stack cs heap pc br bp))
    (define (print-heap)
      (read-line)
      (print heap)
      ((%break) prog stack cs heap pc br bp))
    (define (show-break-points)
      (define (print-code code n bp?)
        (format #t "~4d ~a" n (if bp? #\+ #\sp))
        (write-code code))
      (read-line)
      (let lp ((pc prog) (n 0))
        (cond ((null? pc) 'done)
              ((memq pc bp)
               (print-code (car pc) n #t) (lp (cdr pc) (+ n 1)))
              (else
               (print-code (car pc) n #f) (lp (cdr pc) (+ n 1)))))
      ((%break) prog stack cs heap pc br bp))
    (define (set-break line)
      (let ((p (drop prog line)))
        (read-line)
        (write-code (car p))
        ((%break) prog stack cs heap pc br (cons p bp))))
    (define (clear-break)
      ((%break) prog stack cs heap pc br '()))
    (define (next-step)
      (read-line)
      (write-code (car pc))
      ((car pc) prog stack cs heap (cdr pc) #t bp))
    (define (continue)
      (read-line)
      ((car pc) prog stack cs heap (cdr pc) #f bp))
    (define (show-help)
      (print "[x|q|bye|exit|quit] - abort program and exit.")
      (print "[l|list]            - print 10 instructions around here.")
      (print "[s|st|stack]        - print stack frame like as S-Expr.")
      (print "[w|where]           - print call stack frame like as S-Expr.")
      (print "[m|mem|heap]        - print heap area like as S-Expr.")
      (print "[b|bp|break]        - print break points.")
      (print "[t|trap|set]        - set break point.")
      (print "[c|clear]           - clear all break points.")
      (print "[r|run|cont]        - continue this program's running.")
      (print "[h|help]            - print this help message."))
    (define (help&break)
      (read-line) (show-help)
      ((%break) prog stack cs heap pc br bp))
    (display "wsdb> ")
    (flush)
    (let lp ((expr (read)))
      (cond ((memq expr '(x q bye exit quit)) (bye))
            ((memq expr '(l list)) (code-list 5))
            ((memq expr '(s st stack)) (stack-dump))
            ((memq expr '(w where)) (call-stack))
            ((memq expr '(m mem heap)) (print-heap))
            ((memq expr '(b bp break)) (show-break-points))
            ((memq expr '(t trap set)) (set-break (read)))
            ((memq expr '(c clear)) (clear-break))
            ((memq expr '(n next step)) (next-step))
            ((memq expr '(r run cont)) (continue))
            ((memq expr '(h help)) (help&break))
            (else (format #t " ~a: No such debug command.~%" expr)
                  (help&break))))))

;; for VM main loop
;;
(define (vm prog stack cs heap pc br bp)
  (cond ((null? pc) 'done)
        ((or br (memq pc bp)) ((%break) prog stack cs heap pc br bp))
        (else ((car pc) prog stack cs heap (cdr pc) br bp))))

(define (code->sexpr code)
  (map (lambda (i)
         (if (procedure? i) (string->symbol (ref i 'info)) i))
       code))

(define (write-code instr)
  (write (code->sexpr (instr 'asm)))
  (newline))

(define (vm-instr-list prog)
  (print ";; === VM INSTRUCTION LIST START ===")
  (for-each write-code prog)
  (print ";; === VM INSTRUCTION LIST END ==="))

;; Main
;;
(define (main args)
  (let-args (cdr args)
      ((asm    "a|asm" #f)
       (debug  "d|debug" #f)
       (help   "h|help" => usage)
       . args)
    (when (null? args) (usage))
    (call-with-input-file (car args)
      (lambda (in)
        (let lp ((r '())
                 (c (read-char in)))
          (cond ((eof-object? c) (let ((insts (parse '() (reverse r))))
                                   (cond (asm (vm-instr-list insts))
                                         (debug (vm insts '() '() '() insts #t '()))
                                         (else (vm insts '() '() '() insts #f '())))))
                ((char-whitespace? c) (lp (cons c r) (read-char in)))
                (else (lp r (read-char in)))))))))

実行例

whitespaceからDLできるHaskellによる インタプリタに附属のサンプルコードを使って。

cut-sea@nkisi> ./ws hanoi.ws
Enter a number: 3
1 -> 3
1 -> 2
3 -> 2
1 -> 3
2 -> 1
2 -> 3
1 -> 3
cut-sea@nkisi> 

VM命令列のダンプ

cut-sea@nkisi> ./ws -a hanoi.ws
;; === VM INSTRUCTION LIST START ===
(%push 0)
(%push 69)
(%store)
(%push 1)
(%push 110)
(%store)
(%push 2)
(%push 116)
(%store)
(%push 3)
(%push 101)
(%store)
  :
  :
(%store)
(%return)
(%label "newline")
(%push 10)
(%push 13)
(%outputchar)
(%outputchar)
(%return)
;; === VM INSTRUCTION LIST END ===

デバッガの実行例

cut-sea@nkisi> ./ws -d hanoi.ws        <= -dオプション指定
wsdb> h  <= ヘルプ
[x|q|bye|exit|quit] - abort program and exit.
[l|list]            - print 10 instructions around here.
[s|st|stack]        - print stack frame like as S-Expr.
[w|where]           - print call stack frame like as S-Expr.
[m|mem|heap]        - print heap area like as S-Expr.
[b|bp|break]        - print break points.
[t|trap|set]        - set break point.
[c|clear]           - clear all break points.
[r|run|cont]        - continue this program's running.
[h|help]            - print this help message.
wsdb> l  <= リストすると現在いる行の前後5行くらいが出力される
   0 +(%push 0)
   1  (%push 69)
   2  (%store)
   3  (%push 1)
   4  (%push 110)
wsdb> b  <= ブレークポイントの確認。全VM命令列が見えるのでその目的でも使える
   0  (%push 0)
   1  (%push 69)
   2  (%store)
        :
        :
  72  (%push 1)
  73  (%push 3)
  74  (%push 2)
  75  (%call "hanoi")
  76  (%end)
  77  (%label "hanoi")
  78  (%push 103)
        :
        :
wsdb> t 75  <= ブレークポイントを75行目に設定
(%call "hanoi")
wsdb> b  <= 確認してみる
   0  (%push 0)
   1  (%push 69)
        :
        :
  73  (%push 3)
  74  (%push 2)
  75 +(%call "hanoi")  <= ブレークポイントが設定されてれば+がつく。複数個所に設定可能
  76  (%end)
  77  (%label "hanoi")
        :
        :
 205  (%outputchar)
 206  (%outputchar)
 207  (%return)
wsdb> r <= 引き続き実行すると次のブレークまで進む
Enter a number: 3  <= これはhanoi.wsプログラムへの入力
wsdb> l <= ブレークしたところでリストすると75行目にいる
  70  (%push 100)
  71  (%retrieve)
  72  (%push 1)
  73  (%push 3)
  74  (%push 2)
  75 +(%call "hanoi")
  76  (%end)
  77  (%label "hanoi")
  78  (%push 103)
  79  (%swap)
wsdb> n <= ステップ実行
(%call "hanoi")
wsdb> w <= コールスタックの情報出力
  76 (%end)
wsdb> s <= スタックの中身
(2 3 1 3)
wsdb> m <= ヒープの中身
(69 110 116 101 114 32 97 32 110 117 109 98 101 114 58 32 0 0 0 0 32 45 62 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3)
wsdb> r <= 再度続行させる
1 -> 3
1 -> 2
3 -> 2
1 -> 3
2 -> 1
2 -> 3
1 -> 3

wsdisasm(VM命令からwhitespaceへのディスアセンブラ)

ws -a foo.wsで出力したコードをそのまま喰えます。 逆にこいつを使えば少しは楽にwhitespaceのコードが書けるかも。 つっても書くのはアナタ。 ただ、生のwhitespaceはとても書けたもんじゃないので VMの命令で書いてwsdisasmでwhitespaceのコードにすればその分は有利かも。
なお、whitespaceに置いてある、 Haskell実装によるインタプリタに附属のサンプルコードに 対して実行すれば、ある程度書き方とかも分かるかもね。

":"; exec gosh $0 "$@"
;; -*- coding: euc-jp; mode: scheme -*-
;; whitespace assembler
;;
(use srfi-1)
(use gauche.parseopt)
(use gauche.collection)

(define (usage)
  (format #t "Usage: wsasm [-h|help] <script>~%")
  (exit 0))

(define (makeNum num)
  (define (digits->bits n)
    (cond ((zero? n) '(#\sp))
          (else (map (lambda (b)
                       (if (zero? b) #\sp #\tab))
                     (unfold-right
                      (cut > 1 <>) (cut modulo <> 2) (cut quotient <> 2) n)))))
  (cond ((negative? num) (append '(#\tab) (digits->bits (* -1 num)) '(#\lf)))
        (else (append '(#\sp) (digits->bits num) '(#\lf)))))

(define (makeStr name)
  (define (digits->bits n)
    (cond ((zero? n) (make-list 8 #\sp))
          (else (cons #\sp
                      (map (lambda (b)
                             (if (zero? b) #\sp #\tab))
                           (unfold-right
                            (cut > 1 <>) (cut modulo <> 2) (cut quotient <> 2) n))))))
  (let ((char->sptab (compose digits->bits char->integer)))
    (apply append
           (append (map char->sptab name) '((#\lf))))))

(define (%push num) (apply string #\sp #\sp (makeNum num)))
(define (%dup) (string #\sp #\lf #\sp))
(define (%ref num) (apply string #\sp #\tab #\sp (makeNum num)))
(define (%slide num) (apply string #\sp #\tab #\lf (makeNum num)))
(define (%swap) (string #\sp #\lf #\tab))
(define (%discard) (string #\sp #\lf #\lf))

(define (%math op)
  (cond ((eq? op +) (string #\tab #\sp #\sp #\sp))
        ((eq? op -) (string #\tab #\sp #\sp #\tab))
        ((eq? op *) (string #\tab #\sp #\sp #\lf))
        ((eq? op /) (string #\tab #\sp #\tab #\sp))
        ((eq? op %) (string #\tab #\sp #\tab #\tab))))

(define (%store) (string #\tab #\tab #\sp))
(define (%retrieve) (string #\tab #\tab #\tab))

(define (%label name) (apply string #\lf #\sp #\sp (makeStr name)))
(define (%call name) (apply string #\lf #\sp #\tab (makeStr name)))
(define (%jump name) (apply string #\lf #\sp #\lf (makeStr name)))
(define (%if pred? name)
  (cond ((eq? pred? zero?) (apply string #\lf #\tab #\sp (makeStr name)))
        ((eq? pred? negative?) (apply string #\lf #\tab #\tab (makeStr name)))))

(define (%return) (string #\lf #\tab #\lf))
(define (%end) (string #\lf #\lf #\lf))

(define (%outputchar) (string #\tab #\lf #\sp #\sp))
(define (%outputnum) (string #\tab #\lf #\sp #\tab))
(define (%readchar) (string #\tab #\lf #\tab #\sp))
(define (%readnum) (string #\tab #\lf #\tab #\tab))

(define (main args)
  (let-args (cdr args)
      ((help "h|help" => usage)
       . args)
    (when (null? args) (usage))
    (call-with-input-file (car args)
      (lambda (in)
        (let lp ((expr (read in)))
          (cond ((eof-object? expr) 'done)
                (else (display (eval expr (current-module)))
                      (lp (read in)))))))))

wslink

リンカってもリンカじゃない。とりあえず単にmixする。
ドキュメント(READMEとかCOPYRIGHTとか)をマージするならアリかも。

Schemeとmixする時にはコメントはあらかじめ無しにしておかないと、 よほど運が良くないと実行できない。 wsの方が短かい場合には意味を持たないコードをずらずら追加するだけ。 逆にwsの方が長い分には特に細工はいらないのでずらーって付けてる。

できるならもっと自然なコードに見えるようにしたいんだけど、 結構面倒な感じだし難しい気がしてる。

":"; exec gosh $0 "$@"
;; -*- coding: euc-jp; mode: scheme -*-
;; whitespace linkage editor
;;
(use srfi-1)
(use srfi-13)
(use file.util)
(use gauche.parseopt)

(define ++ string-append)

(define (usage)
  (format #t "Usage: wslink <whitespace-script> <another>~%")
  (exit 0))

(define (split-ws-chars file)
  (map x->string
       (filter (lambda (c)
                 (any (cut char=? c <>) '(#\sp #\tab #\nl)))
               (string->list (file->string file)))))

(define (collect-tokens file)
  (string-tokenize (file->string file)))

(define (collect-tokens file)
  (string-tokenize (file->string file)))


(define (linkage ws toks ret)
  (cond ((null? toks) (apply ++ ret ws))
        ((null? ws) (++ ret "  " (string-join toks " ") "\n"))
        (else (linkage (cdr ws) (cdr toks) (++ ret (car ws) (car toks))))))

(define (add-header scm?)
  (cond (scm? #`"\":\"; exec gosh $0    \"$@\"    \n \n\n")
        (else "")))


(define (main args)
  (let-args (cdr args)
      ((scm  "s|scheme" #f)
       (help "h|help" => usage)
       . args)
    (unless (= 2 (length args)) (usage))
    (let ((ws-src (split-ws-chars (car args)))
          (an-src (collect-tokens (cadr args))))
      (display (linkage ws-src an-src (add-header scm))))))

Last modified : 2012/02/23 03:37:38 UTC