#!/usr/bin/ruby
# Copyright (c) 2005  akira yamada.
# You can redistribute it and/or modify it under the same terms as Ruby.

PLANET_NAME = File.basename($0)
PLANET_VERSION = '0.3.3'

require 'open-uri'
require 'htree'
require 'erb'
require 'yaml'
require 'ostruct'
require 'logger'
require 'pstore'
require 'rexml/document'
require 'iconv'
require 'fcntl'
require 'gdbm'
require 'time'
require 'timeout'

require 'rss/0.9'
require 'rss/1.0'
require 'rss/2.0'
require 'rss/content'
require 'rss/image'
require 'rss/syndication'
require 'rss/parser'
require 'rss/maker'

FEED_CACHE_DB = 'feed_cache.db' # PStore
SORT_LIST_DB = 'sort_list.db' # PStore
ITEM_CACHE_DB = 'item_cache.dbm'
LOCKFILE = '.planet.rb.lock'
RSS10_FILENAME = 'rss10.xml'
RSS20_FILENAME = 'rss20.xml'
TEMPLATE_SUFFIX = 'erb'
FETCH_INTERVAL_MIN = 60*5

if RUBY_RELEASE_DATE < '2005-04-09'
  SUPPORTED_MULTIPLE_DC_SUBJECT = false
else
  SUPPORTED_MULTIPLE_DC_SUBJECT = true
end

HTML_BLOCK_TAGS = %w!
  blockquote div dl ol p pre table ul
  h1 h2 h3 h4 h5 h6 
  address
!
HTML_INLINE_TAGS = %w!
  a abbr acronym area b 
  bdo big br button caption 
  center cite code col colgroup dd 
  del dfn dir dt em 
  fieldset form
  hr i img input ins kbd label 
  legend li map menu optgroup 
  option q s samp select 
  small span strike strong sub sup 
  tbody td textarea tfoot th 
  thead title tr tt var
!
HTML_TAGS = HTML_BLOCK_TAGS + HTML_INLINE_TAGS
NOWARN_HTML_TAGS = %w!
  u script
!

HTML_ATTRS = %w!
  id title lang xml:lang dir
  href hreflang type media charset
  src cite datetime alt longdesc
  height width usemap ismap name
  shape nohref action method enctype
  accept accept-charset value checked 
  disabled readonly size maxlength 
  multiple tabindex selected label
  rows cols summary border 
  cellspacing cellpadding span abbr
  axis headers rowspan colspan
!

def parse_config(hash, log)
  config = OpenStruct.new
  config.sort_by = :seen
  config.timezone = :utc
  config.log_level = Logger::WARN
  config.output_dir = 'output'
  config.templates = ['index.html.erb']
  config.days_per_page = nil
  config.items_per_page = 60
  config.assume_charsets = nil
  config.feeds = {}

  error = false
  hash.each do |key, value|
    case key
    when /\Atitle\z/i
      config.title = value.to_s

    when /\Aowner\z/i
      config.owner = value.to_s

    when /\Aemail\z/i
      config.email = value.to_s

    when /\Alink\z/i
      begin
  	config.link = URI.parse(value.to_s)
      rescue URI::Error
	error = true
	log.error {"couldn't parse as URI: #{value}"}
      end

    when /\Amax-days\z/i
      config.days_per_page = value.to_i
      if config.days_per_page < 1
	config.days_per_page = nil
      end

    when /\Amax-items\z/i
      config.items_per_page = value.to_i
      if config.items_per_page < 1
	config.items_per_page = nil
      end

    when /\Aoutput-dir\z/i
      config.output_dir = value.to_s

    when /\Aassume-charsets\z/i
      config.assume_charsets = [value].flatten

    when /\Atemplates\z/i
      config.templates = []
      [value].flatten.each do |x|
	x = x.to_s
	config.templates << x if /\.#{TEMPLATE_SUFFIX}\z/o =~ x
      end

    when /\Asort-by\z/i
      case value
      when /\Aseen\z/i
	config.sort_by = :seen
      when /\Aupdate\z/i
	config.sort_by = :update
      else
	error = true
	log.error {"invalid value for 'sort-by': #{value} (expected 'seen' or 'update')"}
      end

    when /\Afetch-interval\z/i
      config.fetch_interval = value.to_i
      if config.fetch_interval < FETCH_INTERVAL_MIN
	config.fetch_interval = FETCH_INTERVAL_MIN
      end

    when /\Atimezone\z/i
      case value
      when /\Alocal\z/i
	config.timezone = :local
      when /\Autc\z/i
	config.timezone = :utc
      else
	error = true
	log.error {"invalid value for 'timezone': #{value} (expected 'utc' or 'local')"}
      end

    when /\Alog-level\z/i
      case value
      when /\Afatal\z/i
	config.log_level = Logger::FATAL
      when /\Aerror\z/i
	config.log_level = Logger::ERROR
      when /\Awarn\z/i
	config.log_level = Logger::WARN
      when /\Ainfo\z/i
	config.log_level = Logger::INFO
      when /\Adebug\z/i
	config.log_level = Logger::DEBUG
      else
	error = true
	log.error {"invalid value for 'log-level': #{value} (expected 'fatal', 'error', 'warn', 'info' or 'debug')"}
      end

    when /\Afeeds\z/i
      unless value.kind_of?(Array)
	error = true
	log.error {"invalid type value for 'feeds': #{value.class} (expected Array)"}
      else
	value.each do |fhash|
	  unless fhash.kind_of?(Hash)
	    error = true
	    log.error {"invalid type value for 'feeds': #{fhash.class} (expected Array of Hash)"}
	  else
	    feed = OpenStruct.new
	    feed.uri = nil
	    feed.keyword = nil
	    feed.category = nil
	    feed.interval = FETCH_INTERVAL_MIN
	    feed.ignore_date = false
	    feed.assume_charsets = nil

	    fhash.each do |fkey, fval|
	      case fkey
	      when /\Auri\z/i
		begin
		  feed.uri = URI.parse(fval)
		rescue URI::Error
		  error = true
		  log.error {"couldn't parse as URI: #{fval}"}
		  feed.uri = nil
		end
	      when /\Aname\z/i
		feed.name = fval.to_s
	      when /\Akeyword\z/i
		feed.keyword = [fval].flatten.collect {|x| x.to_s}
		log.error {"not yet supported parameter: keyword"}
	      when /\Acategory\z/i
		feed.category = [fval].flatten.collect {|x| x.to_s}
	      when /\Ainterval\z/i
		feed.interval = fval.to_i
		if feed.interval < FETCH_INTERVAL_MIN
		  feed.interval = FETCH_INTERVAL_MIN
		end
	      when /\Aignore-date\z/i
		if fval == true || /\Atrue|yes\z/i =~ fval
		  feed.ignore_date = true
		end
	      when /\Aface\z/i
		feed.face = fval.to_s
	      when /\Aface-width\z/i
		feed.face_width = fval.to_i
	      when /\Aface-height\z/i
		feed.face_height = fval.to_i
	      when /\Aassume-charsets\z/i
		feed.assume_charsets = [fval].flatten
	      else
		log.warn {"unknown feed parameter: #{fkey.to_s}"}
	      end
	    end # fhash.each

	    if feed.uri && feed.name
	      config.feeds[feed.uri] = feed
	      if feed.ignore_date
		log.info {"ignore date feeld in RSS-items: #{feed.uri}"}
	      end
	    end
	  end
	end
      end

    else
      log.warn {"unknown parameter: #{key.to_s}"}
    end # case key
  end

  raise if error

  config
end

def load_config(path, log)
  begin
    data = File.open(path, 'r') {|i| i.read}
    log.debug {"#{path} loaded"}
  rescue Exception
    log.fatal {"#{$!}"}
    raise
  end
  begin
    hash = YAML.load(data)
    log.debug {"#{path} decoded"}
  rescue Exception
    log.fatal {"#{$!} - #{path}"}
    raise
  end

  begin
    config = parse_config(hash, log)
    log.debug {"#{path} parsed"}
  rescue Exception
    raise
  end
  config
end

def pre_parse_filter(text)
  unless SUPPORTED_MULTIPLE_DC_SUBJECT
    # XXX: workaround for multiple dc:subject.
    # <dc:subject>foo</dc:subject><dc:subject>bar</dc:subject>...
    #  -> <dc:subject>foo, bar...</dc:subject>
    doc = REXML::Document.new(text)
    REXML::XPath.match(doc, "/rdf:RDF/item").each do |elem|
      dc_subj = elem.get_elements("dc:subject")
      unless dc_subj.empty?
	dc_subj_text = []
	dc_subj.each do |dcs|
	  elem.delete_element(dcs)
	  dc_subj_text << dcs.get_text.to_s #.gsub(/,/, '&#2c;')
	end
	dc_subj.first.text = dc_subj_text.join(', ')
	elem.add_element dc_subj.first
      end
    end

    text.replace(doc.to_s)
  end

  nil
end

class Fetcher
  class Passed < RuntimeError; end

  FEED_INFOv00 = Struct.new('FeedInfo', 
      :body, :last_modified, :etag, :last_fetched)
  FEED_INFOv01 = Struct.new('FeedInfo01', 
      :body, :last_modified, :etag, :last_fetched, :error_status)

  private

  def initialize(name, cache, logger)
    @name = name
    @cache = PStore.new(cache)
    @logger = logger
  end

  def normalize_text(text, charsets)
    ret = nil

    charsets.each do |code|
      tmp_text = text
      max_try = 7
      begin
	ret = Iconv.iconv(code, code, tmp_text).first
	break
      rescue Iconv::IllegalSequence
	if max_try > 0
	  tmp_text = $!.success.first + $!.failed.first[1..-1]
	  max_try -= 1
	  retry
	end
      rescue Exception
      end
    end

    raise 'normalize text failed' if ret.nil?

    ret
  end

  def fetch(uri, interval, charsets = [])
    ret = nil
    cached = false
    opts = {'User-Agent' => @name}
    @cache.transaction do
      if @cache[uri]
        case @cache[uri]
        when FEED_INFOv00
          @cache[uri] =
            FEED_INFOv01.new(@cache[uri].body, @cache[uri].last_modified,
                             @cache[uri].etag, @cache[uri].last_fetched, nil)
        when FEED_INFOv01
          # noop
        else
          # error
        end

        if @cache[uri].body
          cached = true
          @logger.debug {"cache found for #{uri}"}
        end

        if @cache[uri].error_status
          etime, e, n = @cache[uri].error_status
          @logger.debug {"errored at #{etime.iso8601} (#{n}) -  #{uri}"}
          eint = interval*1.5*n
          eint = interval*10 if eint > interval*10
          next_try = etime + eint
          if next_try > Time.now
            @logger.info {"the time not reached for #{uri} (next is #{next_try.iso8601})"}
            raise Passed
          end
        end

        if cached
          if @cache[uri].last_fetched + interval > Time.now
            @logger.info {"the time not reached for #{uri}"}
            ret = [@cache[uri].body,
                @cache[uri].last_modified, @cache[uri].last_fetched, cached]
          end

          if @cache[uri].etag
            opts['If-None-Match'] = @cache[uri].etag
          end
          if @cache[uri].last_modified
            opts['If-Modified-Since'] = @cache[uri].last_modified.httpdate
          elsif @cache[uri].last_fetched
            opts['If-Modified-Since'] = @cache[uri].last_fetched.httpdate
          end
        end
      end

      begin
        @cache[uri] ||= FEED_INFOv01.new(nil, nil, nil, nil, nil)
	open(uri, opts) do |i|
	  @logger.info {"opened #{uri}"}
	  now = Time.now
	  if charsets.nil? || charsets.empty?
	    body = i.read
	  else
	    body = normalize_text(i.read, charsets)
	  end

	  @cache[uri].last_modified = i.last_modified
	  @cache[uri].last_fetched = now
	  @cache[uri].etag = i.meta['etag']
	  if @cache[uri].body != body
	    @cache[uri].body = body
	  end

	  ret = [body, @cache[uri].last_modified,
              @cache[uri].last_fetched, cached]
	end

      rescue OpenURI::HTTPError, Timeout::Error, SocketError, SystemCallError
        if $!.kind_of?(OpenURI::HTTPError) && /\A304/ =~ $!.message
	  @logger.info {"#{uri} is not modified"}
          @cache[uri].error_status = nil
	  @cache[uri].last_fetched = Time.now
	  ret = [@cache[uri].body, @cache[uri].last_modified,
              @cache[uri].last_fetched, cached]

        else
          n = 0
          if @cache[uri].error_status
            n = @cache[uri].error_status[2] || 0
          end
          @cache[uri].error_status = [Time.now, $!, n + 1]

          if $!.kind_of?(Timeout::Error) && cached
            @logger.info {"#{uri} timeouted (uses cache)"}
            ret = [@cache[uri].body, @cache[uri].last_modified, 
                @cache[uri].last_fetched, cached]
          else
            @logger.error {"#{$!} - #{uri}"}
            raise
          end
        end

      ensure
        @cache.commit
      end
    end

    ret
  end

  def body(uri)
    @cache.transaction(true) do
      if @cache[uri]
	@cache[uri].body
      else
	nil
      end
    end
  end

  def etag(uri)
    @cache.transaction(true) do
      if @cache[uri]
	@cache[uri].etag
      else
	nil
      end
    end
  end

  def last_modified(uri)
    @cache.transaction(true) do
      if @cache[uri]
	@cache[uri].last_modified
      else
	nil
      end
    end
  end

  def last_fetched(uri)
    @cache.transaction(true) do
      if @cache[uri]
	@cache[uri].last_fetched
      else
	nil
      end
    end
  end

  public :fetch, :body, :etag, :last_modified, :last_fetched
end # class Fetcher

class Archive
  IGNORE_LINK_PATT = /\D\d{8}(?:\.html?)?\#[ct]\d\d+\z/ # tDiary
  MAX_RECORDS = 1000

  ITEM_INFOv00 =				# for PLANTER_VERSION 0.0
    Struct.new('ItemInfo',
	       :link, :date, :author,
	       :title, :category, :text,
	       :copyright, :feed)
  ITEM_INFOv01 =				# for PLANTER_VERSION 0.1
    Struct.new('ItemInfo01',
	       :link, :date, :author,
	       :title, :category, :text,
	       :copyright, :base, :feed)

  private

  def initialize(cache, sort_db, logger)
    @cache = GDBM.open(cache, 0600)
    @sort_db = PStore.new(sort_db)
    @logger = logger

    @sort_db.transaction do
      @sort_db['seen'] ||= {}
      @sort_db['update'] ||= {}
    end
  end

  def convert_00_to_01(info)
    ITEM_INFOv01.new(info.link, info.date, info.author,
		     info.title, info.category, info.text,
		     info.copyright, info.link, info.feed)
  end

  def close
    @cache.close
    @cache = nil
    @sort_db = nil
  end

  def update(rss, modified, feed)
    case rss.rss_version
    when /\A0\.9/
      update_09(rss, modified, feed)
    when /\A1\.0/
      update_10(rss, modified, feed)
    when /\A2\.0/
      update_20(rss, modified, feed)
    else
      raise "unsupported version: #{rss.rss_version}"
    end
  end

  def update_09(rss, modified, feed)
    begin
      rss_link = URI.parse(rss.channel.link)
    rescue Exception
      rss_link = nil
    end
    infos = rss.items.collect do |item|
      info = ITEM_INFOv01.new
      info.link = URI.parse(item.link)
      if feed.ignore_date
	info.date = modified
      else
	info.date = item.pubDate || item.date || modified
      end
      info.author = item.author
      info.title = item.title
      info.category = item.category
      info.text = item.description
      info.copyright = nil
      info.feed = feed.uri
      info.base = rss_link || info.link
      info
    end
    update_sort_db(infos, feed)
  end

  def update_10(rss, modified, feed)
    begin
      rss_link = URI.parse(rss.channel.link)
    rescue Exception
      rss_link = nil
    end
    infos = rss.items.collect do |item|
      info = ITEM_INFOv01.new
      info.link = URI.parse(item.about)
      if feed.ignore_date
	info.date = modified
      else
	info.date = item.dc_date || item.date || modified
      end
      info.author = item.dc_creator || item.dc_publisher
      info.title = item.title || item.dc_title
      if SUPPORTED_MULTIPLE_DC_SUBJECT
	info.category = item.dc_subjects.collect {|x| x.value}
      else
	info.category = item.dc_subject
      end
      info.text = item.content_encoded || item.description
      info.copyright = item.dc_rights
      info.feed = feed.uri
      info.base = rss_link || info.link
      info
    end
    update_sort_db(infos, feed)
  end

  def update_20(rss, modified, feed)
    update_09(rss, modified, feed)
  end

  def update_sort_db(infos, feed)
    infos.sort {|a, b| a.date <=> b.date}.each do |info|
      link = info.link.to_s
      if IGNORE_LINK_PATT =~ link
	@logger.debug {"archive update: ignored by IGNORE_LINK_PATT: #{link}"}
	next
      end

      @sort_db.transaction do
	if @cache[link]
	  prev_info = Marshal.load(@cache[link])
	  if prev_info != info
	    @cache[link] = Marshal.dump(info)
	    @sort_db['update'][link] = info.date
	  end
	else
	  @cache[link] = Marshal.dump(info)
	  @sort_db['seen'][link] = info.date
	  @sort_db['update'][link] = info.date
	end
      end
    end
  end

  def reduce!
    @sort_db.transaction do
      ['seen', 'update'].each do |type|
	db = @sort_db[type]
	if db.size > MAX_RECORDS
	  db.keys.sort {|a, b| db[b] <=> db[a]}[MAX_RECORDS .. -1].each do |link|
	    db.delete(link)
	  end
	end
      end

      @cache.keys.each do |link|
	unless @sort_db['seen'].include?(link) ||
	    @sort_db['update'].include?(link)
	  @cache.delete(link)
	end
      end
    end
  end

  def items_range(type, range, num, feeds)
    ret = []
    @sort_db.transaction(true) do
      db = @sort_db[type]
      db.keys.sort {|a, b| [db[b], b] <=> [db[a], a]}.each do |link|
	begin
	  info = Marshal.load(@cache[link])
	  if /::ItemInfo\z/ =~ info.class.to_s
	    info = convert_00_to_01(info)
	  end
	rescue Exception
	  @logger.error {"load error: #{$!} - #{link}"}
	end
	next unless feeds[info.feed]

	feed = feeds[info.feed]
	if info.category && feed.category
	  fc = feed.category.collect {|x| Regexp.quote(x)}.join('|')
	  ic = [info.category].flatten
	  if !fc.empty? && !ic.empty? && ic.grep(%r!#{fc}!i).empty?
	    @logger.debug {"archive items: ignored by category: #{ic} doesn't match /#{fc}/"}
	    next
	  end
	end

	if range && range === info.date ||
	    num && ret.size < num
	  new_info = info.dup
	  new_info.date = db[link]
	  ret << new_info
	else
	  break
	end
      end
    end
    ret
  end

  def items_seen(range, num, feeds)
    items_range('seen', range, num, feeds)
  end

  def items_updated(range, num, feeds)
    items_range('update', range, num, feeds)
  end

  public :close, :update, :reduce!, :items_seen, :items_updated
end # class Archive

# replace un-acceptable tags.
# elem: REXML::Document object or REXML::Element object
def filter_html(item, elem, logger)
  has_block_tag = false

  elem.children.each do |e|
    if e.kind_of?(REXML::Element)
      e_has_block_tag = filter_html(item, e, logger)

      unless HTML_TAGS.include?(e.name)
	if NOWARN_HTML_TAGS.include?(e.name)
	  logger.info {"replaced <#{e.name}> - #{item.link}"}
	else
	  logger.warn {"replaced <#{e.name}> - #{item.link}"}
	end
	if e_has_block_tag
	  n = REXML::Element.new("div")
	else
	  n = REXML::Element.new("span")
	end
	e.each_child {|x| n.add(x)}
	e.replace_with(n)
      end

      if HTML_BLOCK_TAGS.include?(e.name)
	has_block_tag = true
      end
    end
  end

  has_block_tag
end # def filter_html

# puts row-text in <div> into <p>.
# elem: REXML::Document object or REXML::Element object
def fixup_html(item, elem, logger)
  fixing = nil
  elem.children.each do |e|
    if e.kind_of?(REXML::Text) ||
        e.kind_of?(REXML::Element) && HTML_INLINE_TAGS.include?(e.name)
      if e.parent.name == "" || e.parent.name == "div"
        if fixing
          if e.kind_of?(REXML::Text)
            fixing.add_text(e)
          else
            fixing.add_element(e)
          end
          elem.delete(e)

        elsif e.kind_of?(REXML::Element) || /\A\s*\z/ !~ e.to_s
          n = REXML::Element.new("p")
          elem.replace_child(e, n)
          n.add_text(e)
          fixing = n

        else
          fixing = nil
        end

      else
        fixing = nil
      end

    elsif e.kind_of?(REXML::Element)
      fixing = nil
      fixup_html(item, e, logger)
    end
  end

  nil
end # def fixup_html

def cleanup_text(item, logger)
  title = ERB::Util::html_escape(item.title)
  doc = HTree.parse('<div>' + item.text + '</div>').to_rexml

  # XXX: tDiary hack(?) - reject title from head of text
  tmp = doc.children.first.children.first # tmp = /div/*[1]
  if tmp.kind_of?(REXML::Text) && 
      tmp.to_s[0, title.size] == title &&
      tmp.to_s[title.size] == ?<
    doc.replace_child(tmp, REXML::Text.new(''))
  end

  # XXX: tDiary hack - reject header which contains the title from head of text
  REXML::XPath.match(doc, '//h3').each do |h3|
    tmp = REXML::XPath.match(h3, './/text()').join()
    if tmp.gsub(/\s+/, '') == title.gsub(/\s+/, '')
      h3.parent.delete(h3)
    end
  end

  REXML::XPath.match(doc, '//*').each do |elem|
    # XXX: tDiary hack
    case elem.name.downcase
    when 'h1', 'h2', 'h3', 'h4'
      elem.name = 'h5'
    when 'h6'
      elem.name = 'h7'
    end

    # reject unacceptable attrs
    unless elem.attributes.empty?
      elem.attributes.each_pair do |aname, avalue|
	case aname
	when 'href', 'src'
	  begin
	    tmp = item.base + avalue.to_s
	    elem.attributes[aname] =
	       	REXML::Attribute.new(aname, tmp, avalue.element)
	  rescue URI::Error
	  end
	else
	  unless HTML_ATTRS.include?(aname)
	    elem.attributes.delete(aname)
	  end
	end
      end
    end
  end

  filter_html(item, doc, logger)
  fixup_html(item, doc, logger)
  text = ''
  doc[0].children.each {|c| c.write(text)}

  text
end

def expand_erb(path, items, feeds, config)
  prev_date = [nil, nil, nil]
  prev_feed = nil
  tmp_items = []
  items.each_with_index do |item, i|
    flag = {}
    cur_date = [item.date.year, item.date.mon, item.date.day]
    if prev_date != cur_date
      flag[:new_date] = true
      prev_date = cur_date
      prev_feed = nil
    end
    if prev_feed != item.feed
      flag[:new_feed] = true
      prev_feed = item.feed
    end
    if items.size > i + 1
      next_item = items[i + 1]
      next_date = [next_item.date.year, 
	  next_item.date.mon, next_item.date.day]
      if cur_date != next_date
	flag[:next_new_date] = true
      end
      if item.feed != next_item.feed
	flag[:next_new_feed] = true
      end
    elsif items.size == i + 1
      flag[:last_date] = true
    end
    tmp_items << [item, flag]
  end

  expand_erb0(path, tmp_items, feeds, config)
end

def expand_erb0(path, items, feeds, config)
  if config.timezone == :local
    now = Time.now
  else
    now = Time.now.gmtime
  end
  ERB.new('<% include ERB::Util %>' + 
      open(path, 'r') {|i| i.read}, 0, '>').result(binding)
end

# MAIN

log = Logger.new($stderr)
level = Logger::DEBUG
level = Logger::WARN unless $DEBUG
log.level = level
log.debug {"initial log level is #{level}"}

config_file = ARGV.shift
unless config_file
  log.fatal {"config file doesn't given"}
  log.fatal {"Usage: #{PLANET_NAME} config-file"}
  exit(1)
end

if FileTest.exist?(LOCKFILE)
  open(LOCKFILE, 'r+') do |i|
    i.flock(File::LOCK_EX)
    pid = i.read.to_i
    if pid != 0
      begin
	Process.kill(0, pid)
	log.fatal {"another #{PLANET_NAME} is running (pid = #{pid})"}
	exit(1)
      rescue Errno::ESRCH
      end
      log.info {"lockfile found but no such process, ignored (pid = #{pid})"}
      i.rewind
      i.puts $$.to_s
      i.truncate(i.pos)

    else
      log.fatal {"another #{PLANET_NAME} is running"}
      exit(1)
    end
  end

else
  open(LOCKFILE, 'a') do |o|
    unless o.flock(File::LOCK_EX|File::LOCK_NB)
      log.fatal {"another #{PLANET_NAME} is running"}
      exit(1)
    end
    o.rewind
    o.puts $$.to_s
    o.truncate(o.pos)
  end
end

begin
  config = load_config(config_file, log)
rescue Exception
  log.fatal {"load config failed: #{$!}"}
  exit(1)
end

if log.level != config.log_level
  log.level = config.log_level
  log.info {"log level is #{log.level}"}
end

fetcher = Fetcher.new(
    "#{PLANET_NAME}/#{PLANET_VERSION} (#{config.link})",
    FEED_CACHE_DB, log)
archive = Archive.new(ITEM_CACHE_DB, SORT_LIST_DB, log)

log.info {"fetch started"}
config.feeds.each do |key, feed|
  log.info {"uri: #{feed.uri}"}
  begin
    body, lm, lf, cached = a = fetcher.fetch(feed.uri, 
  	    feed.interval || config.fetch_interval,
	    config.assume_charsets || feed.assume_charsets)
  rescue Fetcher::Passed
    next
  rescue Timeout::Error
    log.warn {"fetch timeouted #{feed.uri}"}
    next
  rescue Exception
    log.error {"fetch failed #{feed.uri}: #{$!}"}
    next
  end

  begin
    pre_parse_filter(body)
  rescue Exception
    begin
      tmp = ''
      HTree.parse(body).display_xml(tmp)
      body = tmp
      pre_parse_filter(body)
    rescue Exception
      log.warn {"pre_parse_filter: #{$!} - #{feed.uri}"}
      next
    end
  end

  begin
    begin
      rss = RSS::Parser.parse(body, false)

    rescue
      e = $!
      tmp_items = tmp_body = nil
      if m = %r!(?:<item\s[^>]+>.*?</item>\s*)+!m.match(body)
	pre, tmp, post = m.pre_match, m.to_s, m.post_match
      	pre = "" unless pre
	post = "" unless post
	tmp_items = tmp.scan(%r!<item\s[^>]+>.*?</item>\s*!m)
      end
      if tmp_items.nil? || tmp_items.empty?
    	raise e
      end
      tmp_items.each do |tmp|
	begin
	  tmp_body = pre + tmp + post
	  RSS::Parser.parse(tmp_body)
	  pre = pre + tmp
	rescue
	  if %r!<item\s[^>]+>!m =~ tmp
	    log.info {"ignored imparseable item: #{$&.gsub(/\n/, '')} - #{feed.uri}"}
	  else
	    log.warn {"ignored imparseable item: #{tmp} - #{feed.uri}"}
	  end
	  tmp_body = nil
	end
      end
      raise e if tmp_body.nil?
      rss = RSS::Parser.parse(tmp_body, false)
    end

    unless config.feeds[key].title
      config.feeds[key].title = rss.channel.title
    end
    unless config.feeds[key].link
      config.feeds[key].link = rss.channel.link
    end
  rescue Exception
    log.warn {"rss parse: #{$!} - #{feed.uri}"}
    next
  end
  begin
    if cached
      archive.update(rss, lm || lf, feed)
    else
      # new feed
      archive.update(rss, Time.at(0), feed)
    end
  rescue Exception
    log.error {"archive: #{$!} - #{feed.uri}"}
    next
  end
end
log.info {"fetch end"}


log.info {"output started"}

now = Time.now
if config.sort_by == :seen
  items = archive.items_seen(
      config.days_per_page, config.items_per_page, config.feeds)
else
  items = archive.items_updated(
      config.days_per_page, config.items_per_page, config.feeds)
end
rss_items = items[0 .. 30]

# OUTPUT: HTML
items.each do |item|
#  log.debug {"cleanup item: #{item.link}"}
  begin
    item.text = cleanup_text(item, log)
  rescue Exception
    log.error {"cleanup error: #{item.link}: #{$!.message}"}
    items.delete(item)
  end
end
config.templates.each do |template|
  log.debug {"expands template: #{template}"}
  begin
    ret = expand_erb(template, items, config.feeds, config)

    of = File.join(config.output_dir, 
        File.basename(template.sub(/\.#{TEMPLATE_SUFFIX}\z/, '')))
    tf = of + '.new'
    log.info {"output to #{of}"}
    File.open(tf, 'w') {|o| o.print ret}
    File.rename(tf, of)
  rescue
    log.error {"template error: #{$!.message}"}
  end
end

# OUTPUT: RSS 1.0
rss10 = File.join(config.output_dir, RSS10_FILENAME)
tmp_rss10 = rss10 + '.new'

log.debug {"build RSS 1.0"}
rss = RSS::Maker.make('1.0') do |maker|
  maker.channel.title = config.title
  maker.channel.description = config.title
  maker.channel.link = config.link + RSS10_FILENAME
  maker.channel.about = config.link + RSS10_FILENAME
  maker.channel.dc_date = now

  rss_items.each do |item|
    rss_item = maker.items.new_item
    rss_item.link = item.link
    rss_item.title = config.feeds[item.feed].name + ': ' + item.title
    rss_item.content_encoded = item.text
    rss_item.dc_date = item.date
    rss_item.dc_creator = item.author
  end
end
log.info {"output to #{rss10}"}
begin
  File.open(tmp_rss10, 'w') {|o| o.print rss.to_s}
  File.rename(tmp_rss10, rss10)
rescue
  log.error {"output RSS 1.0 error: #{$!.message}"}
end

# OUTPUT: RSS 2.0
rss20 = File.join(config.output_dir, RSS20_FILENAME)
tmp_rss20 = rss20 + '.new'

log.debug {"build RSS 2.0"}
rss = RSS::Maker.make('2.0') do |maker|
  maker.channel.title = config.title
  maker.channel.description = config.title
  maker.channel.link = config.link + RSS10_FILENAME
  maker.channel.about = config.link + RSS10_FILENAME
  maker.channel.dc_date = now

  rss_items.each do |item|
    rss_item = maker.items.new_item
    rss_item.link = item.link
    rss_item.title = config.feeds[item.feed].name + ': ' + item.title
    rss_item.description = item.text
    rss_item.pubDate = item.date
    rss_item.author = item.author
  end
end

log.info {"output to #{rss20}"}
begin
  File.open(tmp_rss20, 'w') {|o| o.print rss.to_s}
  File.rename(tmp_rss20, rss20)
rescue
  log.error {"output RSS 2.0 error: #{$!.message}"}
end

log.info {"output end"}

# POST PROCESS
archive.reduce!
archive.close

File.unlink(LOCKFILE)
exit
