Scheme:初心者の質問箱より移動
アルファベットと空白文字からなる文字列があります。空白文字をセパレータとして文字列を分割する関数partial-splitを作りたいです。ただし、分割された文字列が、引数で指定された単語と一致する場合、その部分での分割は行われません(その単語と空白をはさんだ右隣の単語とは分割されない)。partial-splitをどのように書いたらよいですか? -coze
gosh> (partial-split "long time ago" ())
("long" "time" "ago")
gosh> (partial-split " long time ago " ())
("long" "time" "ago")
gosh> (partial-split "long time ago" '("time"))
("long" "time ago")
gosh> (partial-split "long time ago" '("ago"))
("long" "time" "ago")
gosh> (partial-split "long time ago" '("long" "time"))
("long time ago")
gosh> (partial-split "long long time ago" '("long"))
("long long time" "ago")
gosh> (partial-split " long long long time ago" '("long"))
("long long long time" "ago")
(use srfi-1)
(define (partial-split str glue-list)
(let next ((left-str str))
;; 前方に空白があるなら除去する
(let1 true-left-str ((#/^ */ left-str) 'after)
(if (zero? (string-length true-left-str))
'() ; 最後までチェック終了
(let* ((matched (#/ +|$/ true-left-str)) ; 最初の空白か末尾で分割する
(current-word (matched 'before)) ; 単語一つ
(splitter (matched)) ; 間の空白
(after-word (matched 'after)) ; 残り文字列
(next-list (next after-word)) ; もうこの時点で残りも算出
)
(if (not (find (cut string=? <> current-word) glue-list))
;; current-wordがglue-listに載ってないなら、分割したまま次へ。
(cons current-word next-list)
;; そうでないなら、次のリストの先頭要素を再連結する
;; (無駄に再連結しているのがどうも美しくない/効率が良くない。要改善。)
;; 但し、例外として、次が空なら連結しない
(if (null? next-list)
(cons current-word '())
(cons (string-append current-word splitter (car next-list))
(cdr next-list)))))))))
(use srfi-1)
(use gauche.sequence)
(define (split-word str)
(define (same-type? ch1 ch2)
(or (and (char-whitespace? ch1)
(char-whitespace? ch2))
(and (not (char-whitespace? ch1))
(not (char-whitespace? ch2)))))
(map list->string
(group-sequence str :test same-type?)))
(define (partial-split str merge-list)
(receive (words acc _)
(fold3 (lambda (str words acc merge?)
(cond ((char-whitespace? (string-ref str 0))
(values (if merge?
(cons str words)
words)
acc
merge?))
((member str merge-list)
(values (cons str words) acc #t))
(else
(values '()
(cons (apply string-append (reverse (cons str words)))
acc)
#f))))
'() '() #f (split-word str))
(reverse (append (drop-while (lambda (str)
(char-whitespace? (string-ref str 0)))
words)
acc))))
(use srfi-1)
(use srfi-11)
(use srfi-13)
(use srfi-14)
(define (partial-split str words)
(define (take-word cs)
(let*-values (((wcs cs*) (span (cut char-set-contains? char-set:letter <>) cs))
((scs rest) (span (cut char-set-contains? char-set:whitespace <>) cs*)))
(values wcs scs rest)))
(let loop ((cs (string->list str))
(rs '()))
(if (null? cs)
(drop-while string-null? (reverse (map list->string rs)))
(let collect ((cs cs)
(css '()))
(receive (wcs spaces rest) (take-word cs)
(if (member (list->string wcs) words)
(collect rest (cons* spaces wcs css))
(loop rest (cons (concatenate (reverse (cons wcs css))) rs))))))))
(define (match-each regword phrase)
(let ((matchobj (rxmatch regword phrase)))
(if (not matchobj) '()
(cons
(matchobj 1)
(match-each regword (matchobj 'after))))))
(define (glue-list->regexp glue-list)
(if (null? glue-list)
#/^ *([^ ]+)/
(string->regexp
(string-append
"^ *((?:"
(string-join
(map (cut string-append "(?:" <> " +)")
glue-list)
"|")
")*[^ ]+)"))))
(define (partial-split str glue-list)
(let ((glue-reg (glue-list->regexp glue-list)))
(match-each glue-reg str)))