Scheme:初心者の質問箱より移動
アルファベットと空白文字からなる文字列があります。空白文字をセパレータとして文字列を分割する関数partial-splitを作りたいです。ただし、分割された文字列が、引数で指定された単語と一致する場合、その部分での分割は行われません(その単語と空白をはさんだ右隣の単語とは分割されない)。partial-splitをどのように書いたらよいですか? -coze
gosh> (partial-split "long time ago" ()) ("long" "time" "ago") gosh> (partial-split " long time ago " ()) ("long" "time" "ago") gosh> (partial-split "long time ago" '("time")) ("long" "time ago") gosh> (partial-split "long time ago" '("ago")) ("long" "time" "ago") gosh> (partial-split "long time ago" '("long" "time")) ("long time ago") gosh> (partial-split "long long time ago" '("long")) ("long long time" "ago") gosh> (partial-split " long long long time ago" '("long")) ("long long long time" "ago")
(use srfi-1) (define (partial-split str glue-list) (let next ((left-str str)) ;; 前方に空白があるなら除去する (let1 true-left-str ((#/^ */ left-str) 'after) (if (zero? (string-length true-left-str)) '() ; 最後までチェック終了 (let* ((matched (#/ +|$/ true-left-str)) ; 最初の空白か末尾で分割する (current-word (matched 'before)) ; 単語一つ (splitter (matched)) ; 間の空白 (after-word (matched 'after)) ; 残り文字列 (next-list (next after-word)) ; もうこの時点で残りも算出 ) (if (not (find (cut string=? <> current-word) glue-list)) ;; current-wordがglue-listに載ってないなら、分割したまま次へ。 (cons current-word next-list) ;; そうでないなら、次のリストの先頭要素を再連結する ;; (無駄に再連結しているのがどうも美しくない/効率が良くない。要改善。) ;; 但し、例外として、次が空なら連結しない (if (null? next-list) (cons current-word '()) (cons (string-append current-word splitter (car next-list)) (cdr next-list)))))))))
(use srfi-1) (use gauche.sequence) (define (split-word str) (define (same-type? ch1 ch2) (or (and (char-whitespace? ch1) (char-whitespace? ch2)) (and (not (char-whitespace? ch1)) (not (char-whitespace? ch2))))) (map list->string (group-sequence str :test same-type?))) (define (partial-split str merge-list) (receive (words acc _) (fold3 (lambda (str words acc merge?) (cond ((char-whitespace? (string-ref str 0)) (values (if merge? (cons str words) words) acc merge?)) ((member str merge-list) (values (cons str words) acc #t)) (else (values '() (cons (apply string-append (reverse (cons str words))) acc) #f)))) '() '() #f (split-word str)) (reverse (append (drop-while (lambda (str) (char-whitespace? (string-ref str 0))) words) acc))))
(use srfi-1) (use srfi-11) (use srfi-13) (use srfi-14) (define (partial-split str words) (define (take-word cs) (let*-values (((wcs cs*) (span (cut char-set-contains? char-set:letter <>) cs)) ((scs rest) (span (cut char-set-contains? char-set:whitespace <>) cs*))) (values wcs scs rest))) (let loop ((cs (string->list str)) (rs '())) (if (null? cs) (drop-while string-null? (reverse (map list->string rs))) (let collect ((cs cs) (css '())) (receive (wcs spaces rest) (take-word cs) (if (member (list->string wcs) words) (collect rest (cons* spaces wcs css)) (loop rest (cons (concatenate (reverse (cons wcs css))) rs))))))))
(define (match-each regword phrase) (let ((matchobj (rxmatch regword phrase))) (if (not matchobj) '() (cons (matchobj 1) (match-each regword (matchobj 'after)))))) (define (glue-list->regexp glue-list) (if (null? glue-list) #/^ *([^ ]+)/ (string->regexp (string-append "^ *((?:" (string-join (map (cut string-append "(?:" <> " +)") glue-list) "|") ")*[^ ]+)")))) (define (partial-split str glue-list) (let ((glue-reg (glue-list->regexp glue-list))) (match-each glue-reg str)))