Gaucheで変換フィルタをつくっています。
(中で使っているkomono,web-helperとかの小物については別にまとめます。)
(use htmlprag)
(use pretty-print)
(use web-helper)
(use komono)
(use srfi-1) ;list
(use srfi-13) ;string
(use sxml.sxpath)
(use sxml.serializer)
(use sxml.tools)
;(use gauche.charconv)
(use gauche.generator)
(use text.tree)
(define news-title "History of Scintilla and SciTE")
(define news-uri "http://www.scintilla.org/ScintillaHistory.html")
(define (kugiri-pred e)
;#?=e
(and (pair? e)
(or
(case (sxml:name e)
( 'h3 #t )
(else #f ) ) ) ) )
(define parse
($ (^s
;#?=s
(receive (spl brk) (list-split s kugiri-pred)
(zip brk (cdr spl))))
$ (sxpath "//body/*")
;$ tee pretty-print
$) )
(define (generate-rss d)
($ srl:sxml->xml
`(rdf
(channel
(title ,news-title )
(link ,news-uri )
(description ,($ string-concatenate $ map srl:sxml->html $ car d) )
)
,@(map
(^x
;#?=(car x)
`(item
(title ,($ string-trim-both $ sxml->string $ car x))
,@(cond-list
(($ (if-car-sxpath "//a/@href/text()") $ car x) => (^l `(link ,l) ) ) )
(description ,($ string-concatenate $ map srl:sxml->html $ cdr x ) )) )
(cdr d) ) ))
)
(define (main args)
($ print
$ generate-rss
$ parse
$ port->sxml html->sxml (standard-input-port)
) 0 )
; $ sxml-get-http-uri news-uri
(use htmlprag)
(use web-helper)
(use pretty-print)
(use komono)
(use srfi-1) ;list
(use srfi-13) ;string
(use sxml.sxpath)
(use sxml.serializer)
(use sxml.tools)
;(use gauche.charconv)
(use gauche.generator)
(use text.tree)
(define news-title "EasyTAG is a tag editor for MP3, Ogg Vorbis files and more")
(define news-uri "http://projects.gnome.org/easytag/")
(define (kugiri-pred-h e)
(and (pair? e) (eq? (sxml:name e) 'h3) ) )
(define (kugiri-pred-i e)
(and (pair? e) (eq? (sxml:name e) 'h4) ) )
(define parse
($ (^s
(receive (spl-h brk-h) (list-split s kugiri-pred-h)
;#?=spl-h
;(pretty-print spl-h)
;#?=brk-h
(receive (spl-i brk-i) (list-split (caddr spl-h) kugiri-pred-i)
;#?=spl-i
;#?=brk-i
`(
( header . ,($ concatenate
$ append (take spl-h 2) (list(last spl-h)) ) )
( items . ,(zip (map ($ string-trim-both $ sxml->string $) brk-i) (cdr spl-i)) )
))))
$ (sxpath "//div[@id='content']/*")
$) )
(define (generate-rss d)
;(pretty-print d)
;(exit)
;#?=(assoc-ref d 'header )
;($ pretty-print $ assoc-ref d 'header)
($ srl:sxml->xml
`(rdf
(channel
(title ,news-title )
(link ,news-uri )
(description ,($ string-concatenate $ map srl:sxml->html $ assoc-ref d 'header ) )
)
,@(map
(^x
;#?=x
`(item
(title ,(car x))
(description ,($ string-concatenate $ map srl:sxml->html $ cdr x ) )) )
(assoc-ref d 'items) ) ))
)
(define (main args)
($ print
$ generate-rss
$ parse
$ port->sxml html->sxml (standard-input-port)
) 0 )
(use htmlprag)
(use pretty-print)
(use web-helper)
(use komono)
(use srfi-1) ;list
(use srfi-13) ;string
(use sxml.sxpath)
(use sxml.serializer)
(use sxml.tools)
;(use gauche.charconv)
(use gauche.generator)
(use text.tree)
(define news-title "Clementine Music Player")
(define news-uri "http://www.clementine-player.org/")
(define (kugiri-pred-h e)
(and (pair? e) (eq? (sxml:name e) 'h1) ) )
(define parse
($ (^s
(receive (spl-h brk-h) (list-split s kugiri-pred-h)
`(
(header . ,($ (cut take <> 2) $ cdr spl-h) )
(items . ,($ filter (^x (eq? (sxml:name x) 'div ) ) $ last spl-h) )
) ) )
$ (sxpath "//div[@id='node-2']/div[@class='content']/*")
$) )
(define (generate-rss d)
;(pretty-print d)
;(exit)
($ srl:sxml->xml
`(rdf
(channel
(title ,news-title )
(link ,news-uri )
(description ,($ string-concatenate $ map srl:sxml->html $ assoc-ref d 'header))
)
,@(map
(^x
;#?=x
;#?=((if-car-sxpath "/h3/text()" ) x )
`(item
(title ,((if-car-sxpath "/h3/text()" ) x ) )
(description ,(srl:sxml->html x ) )) )
(assoc-ref d 'items) ) ) ) )
(define (main args)
($ print
$ generate-rss
$ parse
$ port->sxml html->sxml (standard-input-port)
) 0 )
; $ sxml-get-http-uri news-uri
;(use htmlprag)
;(use pretty-print)
;(use web-helper)
(use komono)
(use srfi-1) ;list
(use srfi-13) ;string
(use sxml.serializer)
(use gauche.generator)
(define news-title "Clementine - Chenge Log")
(define news-uri "https://clementine-player.googlecode.com/git/Changelog")
(define (kugiri-pred e)
(and (string? e) (#/^Version/ e) ) )
(define (parse s)
(receive (spl brk) (list-split s kugiri-pred)
(zip (map string-trim-both brk) (cdr spl) ) ) )
(define (generate-rss d)
(srl:sxml->xml
`(rdf
(channel
(title ,news-title )
(link ,news-uri )
)
,@(map
(^x
;#?=(car x)
`(item
(title ,(car x))
(description
,(srl:sxml->html
`(pre ,@(map (cut format #f "~a\n" <>) (cadr x) ) ) ) ) ) )
d ) ))
)
(define (main args)
($ print
$ generate-rss
$ parse
$ generator->list
$ port->line-generator (standard-input-port)
) 0 )
; $ sxml-get-http-uri news-uri
(use htmlprag)
(use pretty-print)
(use web-helper)
(use komono)
(use srfi-1) ;list
(use srfi-13) ;string
(use sxml.sxpath)
(use sxml.serializer)
(use sxml.tools)
;(use gauche.charconv)
(use gauche.generator)
(use text.tree)
(define news-title "TEA News")
(define news-uri "http://tea-editor.sourceforge.net/")
(define (kugiri-pred-h e)
(and (pair? e) (eq? (sxml:name e) 'h2) ) )
(define (kugiri-pred-b e)
;#?=e
(and (pair? e) (and (eq? (sxml:name e) 'p) ((if-sxpath "/b") e)) ) )
(define parse
($ (^s
;#?=s
(receive (spl-h brk-h) (list-split s kugiri-pred-h)
(receive (spl-b brk-b) (list-split (cadr spl-h) kugiri-pred-b)
`(
(header . ,(map cons (cdr brk-h) (cddr spl-h) ) )
(items . ,(map
(^ (x y)
(cons* ((if-car-sxpath "b/text()")x) x y))
brk-b (cdr spl-b) ) ) )
) ) )
$ (sxpath "//table[@class='table_content']//tr/td[@class='table_main_cell']/*")
$) )
(define (generate-rss d)
;(pretty-print d)
($ srl:sxml->xml
`(rdf
(channel
(title ,news-title )
(link ,news-uri )
(description ,($ string-concatenate $ map srl:sxml->html $ assoc-ref d 'header))
)
,@(map
(^x
;#?=x
`(item
(title ,(car x))
(description ,($ string-concatenate $ map srl:sxml->html (cdr x) ) )) )
(assoc-ref d 'items) ) ) ) )
(define (main args)
($ print
$ generate-rss
$ parse
$ port->sxml html->sxml (standard-input-port)
) 0 )
; $ sxml-get-http-uri news-uri