[Plagger RSS 1.0 モジュール]の変更点 - superopac @Wiki

「Plagger RSS 1.0 モジュール」の編集履歴（バックアップ）一覧はこちら

「Plagger RSS 1.0 モジュール」(2006/10/12 (木) 14:04:09) の最新版変更点

追加された行は緑色になります。

削除された行は赤色になります。

*Plagger で RSS1.0 を出力するモジュール　plaggerで新着図書一覧ページから、RSS1.0でdcモジュールを使って見たかったので、作って見ました。　テスト環境が、ActivePerl＋XPなので、それ以外の環境ではディレクトリの指定など適宜修正してください。 **作成・修正が必要なモジュール - Plagger::Plugin::Publish::Feed2; - Plagger::Plugin::Filter::EntryFullText; - Plagger::Entry; - Plagger::Plugin::SmartFeed; **yamlの例 - config.yamlの例 - /Plagger/assets/plugins/Filter-EntryFullTextのhit-u.yamlの例 **モジュールの修正箇所 - Plagger::Plugin::Publish::Feed2; package Plagger::Plugin::Publish::Feed2; use strict; use base qw( Plagger::Plugin ); our $VERSION = 0.01; use XML::Feed; use XML::Feed::Entry; use XML::RSS::LibXML; use File::Spec; use XML::RSS; $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; sub register { my($self, $context) = @_; $context->register_hook( $self, 'publish.feed' => \&publish_feed, ); $self->init_feed($context); } sub init_feed { my($self, $context) = @_; # check dir my $dir = $self->conf->{dir}; unless (-e $dir && -d _) { mkdir $dir, 0755 or $context->error("mkdir $dir: $!"); } unless (exists $self->conf->{full_content}) { $self->conf->{full_content} = 1; } } sub publish_feed { my($self, $context, $args) = @_; my $conf = $self->conf; my $f = $args->{feed}; my $feed_format = $conf->{format} || 'Atom'; local $XML::Atom::DefaultVersion = "1.0"; # generate ATOM feed my $feed = XML::Feed->new($feed_format); $feed->title($f->title); $feed->link($f->link); $feed->modified(Plagger::Date->now); $feed->generator("Plagger/$Plagger::VERSION"); $feed->description($f->description || ''); $feed->author($f->author) if $f->primary_author; if ($feed_format eq 'Atom') { $feed->{atom}->id("tag:plagger.org,2006:" . $f->id); } # generate RSS 1.0 my $rss = new XML::RSS (version => '1.0'); $rss->channel( title => $f->title, link => $f->link, description => $f->description, dc => { date => Plagger::Date->now, subject => "Linux Software", creator => $f->author, publisher => "Plagger/$Plagger::VERSION", rights => 'Copyright 1999, Freshmeat.net', language => 'en-us', }, syn => { updatePeriod => "hourly", updateFrequency => "1", updateBase => "1901-01-01T00:00+00:00", }, taxo => [ 'http://dmoz.org/Computers/Internet', 'http://dmoz.org/Computers/PC' ] ); # create entries for my $e ($f->entries) { # create RSS 1.0 entries if ($feed_format eq 'RSS') { $rss->add_item( title => $e->title, link => $e->permalink, description => $e->body_text, dc => { publisher => $e->publisher, subject => $e->tags, creator => $e->author, language => $e->language, date => $e->date, } ); } # if # create Atom entries else { # add entry ## for my $e ($f->entries) { my $entry = XML::Feed::Entry->new($feed_format); $entry->title($e->title); $entry->link($e->permalink); $entry->summary($e->body_text) if defined $e->body; # hack to bypass XML::Feed Atom 0.3 crufts (type="text/html") if ($self->conf->{full_content} && defined $e->body) { if ($feed_format eq 'RSS') { $entry->content($e->body); } else { $entry->{entry}->content($e->body); } } # if ($self->conf $entry->category($e->tags) if $e->tags; $entry->issued($e->date) if $e->date; $entry->modified($e->date) if $e->date; $entry->id("tag:plagger.org,2006:" . $e->id); # tenuki # $entry->category(join(' ', @{$e->tags})); if ($e->has_enclosure) { for my $enclosure (grep { defined $_->url && !$_->is_inline } $e->enclosures) { $entry->add_enclosure({ url => $enclosure->url, length => $enclosure->length, type => $enclosure->type, }); # RSS 2.0 by spec doesn't allow multiple enclosures last if $feed_format eq 'RSS'; } # for my $enclosure } # if ($e->has_enclosure $feed->add_entry($entry); } } # else # generate file path my $filepath = File::Spec->catfile($self->conf->{dir}, $self->gen_filename($f)); $context->log(info => "save feed for " . $f->link . " to $filepath" . $feed_format); # generate RSS file if ($feed_format eq 'RSS') { my $xml = $rss->as_string; utf8::decode($xml) unless utf8::is_utf8($xml); open my $output, ">" , $filepath or $context->error("$filepath: $!"); print $output $xml; close $output; } else { my $xml = $feed->as_xml; utf8::decode($xml) unless utf8::is_utf8($xml); open my $output, ">:utf8", $filepath or $context->error("$filepath: $!"); # open my $output, $filepath or $context->error("$filepath: $!"); print $output $xml; close $output; } } my %formats = ( 'u' => sub { my $s = $_[0]->url; $s =~ s!^https?://!!; $s }, 'l' => sub { my $s = $_[0]->link; $s =~ s!^https?://!!; $s }, 't' => sub { $_[0]->title }, 'i' => sub { $_[0]->id }, ); my $format_re = qr/%(u|l|t|i)/; sub gen_filename { my($self, $feed) = @_; my $file = $self->conf->{filename} || '%i.' . ($self->conf->{format} eq 'RSS' ? 'rss' : 'atom'); $file =~ s{$format_re}{ $self->safe_filename($formats{$1}->($feed)) }egx; $file; } sub safe_filename { my($self, $path) = @_; $path =~ s![^\w\s]+!_!g; $path =~ s!\s+!_!g; $path; } # XXX okay, this is a hack until XML::Feed is updated *XML::Feed::Entry::Atom::add_enclosure = sub { my($entry, $enclosure) = @_; my $link = XML::Atom::Link->new; $link->rel('enclosure'); $link->type($enclosure->{type}); $link->href($enclosure->{url}); $link->length($enclosure->{length}); $entry->{entry}->add_link($link); }; *XML::Feed::Entry::RSS::add_enclosure = sub { my($entry, $enclosure) = @_; $entry->{entry}->{enclosure} = { url => $enclosure->{url}, type => $enclosure->{type}, length => $enclosure->{length}, }; }; 1; __END__ - Plagger::Plugin::Filter::EntryFullText; 14行目付近への追加 use Plagger::Tag; 149行目付近への追加 $args->{entry}->tags(Plagger::Tag->parse($data->{tags})) if $data->{tags}; # $args->{entry}->tags($data->{tags}) if $data->{tags}; $args->{entry}->author($data->{author}) if $data->{author}; $args->{entry}->publisher($data->{publisher}) if $data->{publisher}; $args->{entry}->language($data->{language}) if $data->{language}; - Plagger::Entry; 　5行目付近 __PACKAGE__->mk_accessors(qw( title author tags link feed_link id summary body rate icon meta source publisher language)); 　103行目付近(1;の手前) sub add_publisher { my $self = shift; $self->publisher; } sub add_language { my $self = shift; $self->langage; } sub add_tags { my $self = shift; $self->tags; } - Plagger::Plugin::SmartFeed; $feed->description( $self->conf->{description} || $feed->title ); $feed->author( $self->conf->{author} || $feed->title ); - config.yamlの例 global: plugin_path: - D:\Perl\site\lib\Plagger\Plugin assets_path: D:\Perl\site\lib\Plagger\assets timezone: Asia/Tokyo log: level: debug cache: base: c:\plagger # feedで指定したURLがある*.yamlを探し、データを取得しフィルタします # urlをいじったときはEFTのyamlも修正する。 plugins: - module: Subscription::Config config: feed: - url: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html # - url: http://opac.lib.hit-u.ac.jp/opac-new/book1/new.html # - url: http://opac.lib.hit-u.ac.jp/opac-new/book2/new.html meta: follow_link: /opac/books-query\?mode=2\&place=\&code=\d+ # ?と&はエスケープしないといけないらしい。 # http://shakenbu.org/yanagi/d/20060923.html # EntryFullText で custom_feed_handle と custom_feed_follow_link / custom_feed_follow_xpath を指定した場合、 CustomFeed::Simple はいらない。 # - module: CustomFeed::Config # - module: CustomFeed::Simple # データの重複を排除します # - module: Filter::Rule # rule: # module: Deduped # path: C:\plagger\dedupe-hit-u.db - module: SmartFeed rule: module: Fresh mtime: path: C:\plagger\tmp\foo.tmp autoupdate: 1 config: title: 一橋大学附属図書館新着図書 link: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html description: 一橋大学附属図書館の新着図書リストからRSS1.0を生成します。 author: 自分のお名前 # もしかするとEFTとCustomFeed::Configは両立しない？ # 一致するものがなかった場合、<content:endoded>に入れるかどうか - module: Filter::EntryFullText config: store_html_on_failure: 0 # 取得したファイルからHTMLタグを除去します - module: Filter::HTMLScrubber - module: Publish::Feed2 config: format: RSS dir: c:\plagger filename: rss-hit3.xml - /Plagger/assets/plugins/Filter-EntryFullText のhit-u.yaml author: wono custom_feed_handle: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html custom_feed_follow_link: /opac/books-query\?mode=2\&place=\&code=\d+ handle: http://opac\.lib\.hit-u\.ac\.jp/opac/books-query\?mode=2\&place=\&code=\d+ extract: <H1 CLASS="TR">(.*?)</H1>.*?<SPAN CLASS="PUBL" ID="VALUE">(.*?)</SPAN>.+?<SPAN CLASS="AL" ID="VALUE"><A HREF="/opac/disp-query\?mode=2\&con1=3\&kywd1=[\%a-zA-Z0-9]+\&con2=3\&con3=4\&disp=1">(.*?)</A>.*?<SPAN CLASS="CLS" ID="VALUE">(.*?)</A>.*?<SPAN CLASS="TXTL" ID="VALUE">(.*?)</SPAN> extract_capture: title publisher author tags language extract_after_hook: | if ($data->{publisher} eq "") {$data->{publisher} = "test"}; $data->{body} = $data->{publisher} . " author:" . $data->{author} . " lang:" . $data->{language}; if (!$data->{tags}) { $data->{tags} = "ZZZZZZZZZZZZZZZZZZ"; } else { $data->{tags} =~ s/\<[A-Za-z0-9 \%\/\?\-\&\=\"]+\>//ig; }

*Plagger で RSS1.0 を出力するモジュール　plaggerで新着図書一覧ページから、RSS1.0でdcモジュールを使って見たかったので、作って見ました。　テスト環境が、ActivePerl＋XPなので、それ以外の環境ではディレクトリの指定など適宜修正してください。　コーディングとしては手抜きの箇所が多々ありますが、一応動くので、載せておきます。 **作成・修正が必要なモジュール - Plagger::Plugin::Publish::Feed2; - Plagger::Plugin::Filter::EntryFullText; - Plagger::Entry; - Plagger::Plugin::SmartFeed; **yamlの例 - config.yamlの例 - /Plagger/assets/plugins/Filter-EntryFullTextのhit-u.yamlの例 **モジュールの修正箇所 - Plagger::Plugin::Publish::Feed2; package Plagger::Plugin::Publish::Feed2; use strict; use base qw( Plagger::Plugin ); our $VERSION = 0.01; use XML::Feed; use XML::Feed::Entry; use XML::RSS::LibXML; use File::Spec; use XML::RSS; $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; sub register { my($self, $context) = @_; $context->register_hook( $self, 'publish.feed' => \&publish_feed, ); $self->init_feed($context); } sub init_feed { my($self, $context) = @_; # check dir my $dir = $self->conf->{dir}; unless (-e $dir && -d _) { mkdir $dir, 0755 or $context->error("mkdir $dir: $!"); } unless (exists $self->conf->{full_content}) { $self->conf->{full_content} = 1; } } sub publish_feed { my($self, $context, $args) = @_; my $conf = $self->conf; my $f = $args->{feed}; my $feed_format = $conf->{format} || 'Atom'; local $XML::Atom::DefaultVersion = "1.0"; # generate ATOM feed my $feed = XML::Feed->new($feed_format); $feed->title($f->title); $feed->link($f->link); $feed->modified(Plagger::Date->now); $feed->generator("Plagger/$Plagger::VERSION"); $feed->description($f->description || ''); $feed->author($f->author) if $f->primary_author; if ($feed_format eq 'Atom') { $feed->{atom}->id("tag:plagger.org,2006:" . $f->id); } # generate RSS 1.0 my $rss = new XML::RSS (version => '1.0'); $rss->channel( title => $f->title, link => $f->link, description => $f->description, dc => { date => Plagger::Date->now, subject => "Linux Software", creator => $f->author, publisher => "Plagger/$Plagger::VERSION", rights => 'Copyright 1999, Freshmeat.net', language => 'en-us', }, syn => { updatePeriod => "hourly", updateFrequency => "1", updateBase => "1901-01-01T00:00+00:00", }, taxo => [ 'http://dmoz.org/Computers/Internet', 'http://dmoz.org/Computers/PC' ] ); # create entries for my $e ($f->entries) { # create RSS 1.0 entries if ($feed_format eq 'RSS') { $rss->add_item( title => $e->title, link => $e->permalink, description => $e->body_text, dc => { publisher => $e->publisher, subject => $e->tags, creator => $e->author, language => $e->language, date => $e->date, } ); } # if # create Atom entries else { # add entry ## for my $e ($f->entries) { my $entry = XML::Feed::Entry->new($feed_format); $entry->title($e->title); $entry->link($e->permalink); $entry->summary($e->body_text) if defined $e->body; # hack to bypass XML::Feed Atom 0.3 crufts (type="text/html") if ($self->conf->{full_content} && defined $e->body) { if ($feed_format eq 'RSS') { $entry->content($e->body); } else { $entry->{entry}->content($e->body); } } # if ($self->conf $entry->category($e->tags) if $e->tags; $entry->issued($e->date) if $e->date; $entry->modified($e->date) if $e->date; $entry->id("tag:plagger.org,2006:" . $e->id); # tenuki # $entry->category(join(' ', @{$e->tags})); if ($e->has_enclosure) { for my $enclosure (grep { defined $_->url && !$_->is_inline } $e->enclosures) { $entry->add_enclosure({ url => $enclosure->url, length => $enclosure->length, type => $enclosure->type, }); # RSS 2.0 by spec doesn't allow multiple enclosures last if $feed_format eq 'RSS'; } # for my $enclosure } # if ($e->has_enclosure $feed->add_entry($entry); } } # else # generate file path my $filepath = File::Spec->catfile($self->conf->{dir}, $self->gen_filename($f)); $context->log(info => "save feed for " . $f->link . " to $filepath" . $feed_format); # generate RSS file if ($feed_format eq 'RSS') { my $xml = $rss->as_string; utf8::decode($xml) unless utf8::is_utf8($xml); open my $output, ">" , $filepath or $context->error("$filepath: $!"); print $output $xml; close $output; } else { my $xml = $feed->as_xml; utf8::decode($xml) unless utf8::is_utf8($xml); open my $output, ">:utf8", $filepath or $context->error("$filepath: $!"); # open my $output, $filepath or $context->error("$filepath: $!"); print $output $xml; close $output; } } my %formats = ( 'u' => sub { my $s = $_[0]->url; $s =~ s!^https?://!!; $s }, 'l' => sub { my $s = $_[0]->link; $s =~ s!^https?://!!; $s }, 't' => sub { $_[0]->title }, 'i' => sub { $_[0]->id }, ); my $format_re = qr/%(u|l|t|i)/; sub gen_filename { my($self, $feed) = @_; my $file = $self->conf->{filename} || '%i.' . ($self->conf->{format} eq 'RSS' ? 'rss' : 'atom'); $file =~ s{$format_re}{ $self->safe_filename($formats{$1}->($feed)) }egx; $file; } sub safe_filename { my($self, $path) = @_; $path =~ s![^\w\s]+!_!g; $path =~ s!\s+!_!g; $path; } # XXX okay, this is a hack until XML::Feed is updated *XML::Feed::Entry::Atom::add_enclosure = sub { my($entry, $enclosure) = @_; my $link = XML::Atom::Link->new; $link->rel('enclosure'); $link->type($enclosure->{type}); $link->href($enclosure->{url}); $link->length($enclosure->{length}); $entry->{entry}->add_link($link); }; *XML::Feed::Entry::RSS::add_enclosure = sub { my($entry, $enclosure) = @_; $entry->{entry}->{enclosure} = { url => $enclosure->{url}, type => $enclosure->{type}, length => $enclosure->{length}, }; }; 1; __END__ - Plagger::Plugin::Filter::EntryFullText; 14行目付近への追加 use Plagger::Tag; 149行目付近への追加 $args->{entry}->tags(Plagger::Tag->parse($data->{tags})) if $data->{tags}; # $args->{entry}->tags($data->{tags}) if $data->{tags}; $args->{entry}->author($data->{author}) if $data->{author}; $args->{entry}->publisher($data->{publisher}) if $data->{publisher}; $args->{entry}->language($data->{language}) if $data->{language}; - Plagger::Entry; 　5行目付近 __PACKAGE__->mk_accessors(qw( title author tags link feed_link id summary body rate icon meta source publisher language)); 　103行目付近(1;の手前) sub add_publisher { my $self = shift; $self->publisher; } sub add_language { my $self = shift; $self->langage; } sub add_tags { my $self = shift; $self->tags; } - Plagger::Plugin::SmartFeed; $feed->description( $self->conf->{description} || $feed->title ); $feed->author( $self->conf->{author} || $feed->title ); - config.yamlの例 global: plugin_path: - D:\Perl\site\lib\Plagger\Plugin assets_path: D:\Perl\site\lib\Plagger\assets timezone: Asia/Tokyo log: level: debug cache: base: c:\plagger # feedで指定したURLがある*.yamlを探し、データを取得しフィルタします # urlをいじったときはEFTのyamlも修正する。 plugins: - module: Subscription::Config config: feed: - url: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html # - url: http://opac.lib.hit-u.ac.jp/opac-new/book1/new.html # - url: http://opac.lib.hit-u.ac.jp/opac-new/book2/new.html meta: follow_link: /opac/books-query\?mode=2\&place=\&code=\d+ # ?と&はエスケープしないといけないらしい。 # http://shakenbu.org/yanagi/d/20060923.html # EntryFullText で custom_feed_handle と custom_feed_follow_link / custom_feed_follow_xpath を指定した場合、 CustomFeed::Simple はいらない。 # - module: CustomFeed::Config # - module: CustomFeed::Simple # データの重複を排除します # - module: Filter::Rule # rule: # module: Deduped # path: C:\plagger\dedupe-hit-u.db - module: SmartFeed rule: module: Fresh mtime: path: C:\plagger\tmp\foo.tmp autoupdate: 1 config: title: 一橋大学附属図書館新着図書 link: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html description: 一橋大学附属図書館の新着図書リストからRSS1.0を生成します。 author: 自分のお名前 # もしかするとEFTとCustomFeed::Configは両立しない？ # 一致するものがなかった場合、<content:endoded>に入れるかどうか - module: Filter::EntryFullText config: store_html_on_failure: 0 # 取得したファイルからHTMLタグを除去します - module: Filter::HTMLScrubber - module: Publish::Feed2 config: format: RSS dir: c:\plagger filename: rss-hit3.xml - /Plagger/assets/plugins/Filter-EntryFullText のhit-u.yaml author: wono custom_feed_handle: http://opac.lib.hit-u.ac.jp/opac-new/book1/8.html custom_feed_follow_link: /opac/books-query\?mode=2\&place=\&code=\d+ handle: http://opac\.lib\.hit-u\.ac\.jp/opac/books-query\?mode=2\&place=\&code=\d+ extract: <H1 CLASS="TR">(.*?)</H1>.*?<SPAN CLASS="PUBL" ID="VALUE">(.*?)</SPAN>.+?<SPAN CLASS="AL" ID="VALUE"><A HREF="/opac/disp-query\?mode=2\&con1=3\&kywd1=[\%a-zA-Z0-9]+\&con2=3\&con3=4\&disp=1">(.*?)</A>.*?<SPAN CLASS="CLS" ID="VALUE">(.*?)</A>.*?<SPAN CLASS="TXTL" ID="VALUE">(.*?)</SPAN> extract_capture: title publisher author tags language extract_after_hook: | if ($data->{publisher} eq "") {$data->{publisher} = "test"}; $data->{body} = $data->{publisher} . " author:" . $data->{author} . " lang:" . $data->{language}; if (!$data->{tags}) { $data->{tags} = "ZZZZZZZZZZZZZZZZZZ"; } else { $data->{tags} =~ s/\<[A-Za-z0-9 \%\/\?\-\&\=\"]+\>//ig; }

表示オプション

横に並べて表示：

変化行の前後のみ表示：

最近更新されたページ

人気Wikiランキング

atwikiでよく見られているWikiのランキングです。新しい情報を発見してみよう！

全体ページランキング

最近アクセスの多かったページランキングです。話題のページを見に行こう！

「Plagger RSS 1.0 モジュール」(2006/10/12 (木) 14:04:09) の最新版変更点

更新履歴