#
# Linko is a simple filter to ease linking for common words.
#
# Linko::WORDS holds a Hash that maps words to URLs, feel free
# to +replace+ it of you want, or just add your common links.
#
# Each occurence of myword_ will be translated into a
# footnote-style link.  Each occurence of myword__ will
# be translated into a link with the word as text.
#
# Linko uses the standard *Cloth API, that is:
#
#   Linko.new(mytext).to_html
#
# Linko tries to be minimal invasive and will not do link replacement
# inside tags like +pre+, +code+ and +script+.
#
class Linko < String
  # Word -> HTML mapping for Linko substitution.
  WORDS = {
    'Ruby' => 'http://www.ruby-lang.org/',
    'Anarchaia' => 'http://chneukirchen.org/anarchaia/',
    'chris blogs' => 'http://chneukirchen.org/blog/',
    'Linko' => 'http://chneukirchen.org/blog/archive/2005/06/introducing-linko.html',
    'Darcs' => 'http://darcs.net'
  }
  # Substitute Linko tags in the string, return HTML with proper links.
  def to_html
    result = ""
    apply = true
    footnotes = assign_footnotes
    tokenize { |kind, text|
      if kind == :tag
        result << text
        # Don't mess around in scripts and code.
        if text =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]!
          apply = ($1 == "/")  # Opening or closing tag?
        end
      else
        apply && WORDS.each { |word, url|
          text.gsub!(/#{Regexp.quote word}__/) {
            %Q{#{word}}
          }
          text.gsub!(/#{Regexp.quote word}_/) {
            %Q{#{word}#{footnotes[word]}}
          }
        }
        result << text
      end
    }
    result
  end
  private
  def assign_footnotes
    footnotes = {}
    WORDS.keys.find_all { |word| index_of word }. # What words do appear?
               sort_by { |word| index_of word }.  # Where do they appear?
               each_with_index { |word, i|
                 footnotes[word] = i + 1          # Assign a sequential number.
               }
    footnotes
  end
  # Match a single underscore word.
  def index_of(word)
    index /#{Regexp.quote(word)}_(?!_)/
  end
  TAG_SOUP = /\G([^<]*)(<[^>]*>)/
  # Small, little and probably horrible tag_soup-style parser.
  def tokenize(&block)
    tokens = []
    prev_end = 0
    scan(TAG_SOUP) {
      block.call(:text, $1)  if $1 != ""
      block.call(:tag, $2)
      
      prev_end = $~.end(0)
    }
    if prev_end < size
      block.call(:text, self[prev_end..-1])       # Flush rest.
    end
    self
  end
end