#!/usr/bin/ruby -w
#
# rit - Ruby IMAP tool
#
# $Id: rit,v 1.29 2003/05/23 22:30:47 ianmacd Exp $
#
# Version : 0.5.1
# Author  : Ian Macdonald <ian@caliban.org>
# 
# Copyright (C) 2003 Ian Macdonald
# 
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2, or (at your option)
#   any later version.
# 
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
# 
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software Foundation,
#   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

=begin

= NAME
rit - Ruby IMAP tool
= SYNOPSIS
 rit [-d|--debug] [-D|--dump] [-f|--safe [char]]
     [-m|--mailbox <mailbox>] [-n|--no-copy] [-c|--no-create]
     [-r|--delete] [-b|--batch <size>] [-t|--translate foo/bar]
     -s|--src-server <source server> -u|--src-user <source user>
     [-S|--dst-server <dest. server>] [-U|--dst-user <dest. user>]
 rit -h|--help|-v|--version
= DESCRIPTION
rit is a tool for copying e-mail between mail servers and mailboxes, using the IMAP protocol.
= OPTIONS
: -b ((*batch*)) or --batch ((*batch*))
  Specifies the batch size to use for meta-data fetches. This defaults to
  100, but some speed gains can be made at the expense of memory usage by
  picking batch sizes up to 500.
: -c or --no-create
  No mailboxes will be created on the destination side. If you are not using
  -n|--no-copy and the mailboxes do not exist on the destination, an error
  will occur.
: -d or --debug
  Debugging messages will be printed. Use of this switch is recommended.
: -D or --dump
  The IMAP protocol exchange occuring over the wire will be displayed in
  its entirety. Note that the program will be very noisy when this option
  is used, so it is not recommended for following the program's progress.
  Use -d|--debug instead.
: -f or --safe
  Different IMAP server implementations use different namespaces. UW IMAP
  allows mailbox names to be comprised of characters that Cyrus IMAP does
  not, for example. Mirapoint mail appliances restrict the set of allowable
  characters even further.
  Use of this option will, by default, strip all suspicious characters
  from mailbox names when creating them on the destination server. If this
  option is passed a single character argument, that character will be
  used to replace suspicious characters in the mailbox name.
  If copying multiple mailboxes and stripping unsafe characters from a
  mailbox name results in a destination mailbox name the same as another
  mailbox to be copied, a warning will be printed and the mailbox will not
  be copied.
: -h or --help
  The usage message will be printed.
: -m ((*name*)) or --mailbox ((*name*))
  Only the named mailbox will be acted upon, rather than all the user's
  mailboxes.
: -n or --no-copy
  No messages will be copied.
: -r or --delete
  If any mailboxes on the source server also exist on the destination,
  they will be deleted prior to any other operation being carried out.
: -s ((*host*)) or --src-server ((*host*))
  Specify the source server. 
: -S ((*host*)) or --dst-server ((*host*))
  Specify the destination server. If no destination server is given, all
  operations will be carried out on the source server.
: -t ((*find/replace*)) or --translate ((*find/replace*))
  The string ((*find*)) in the source mailbox name will be replaced with
  the string ((*replace*)) in the destination mailbox name. The text to
  be replaced should be separated from the replacement text by a literal
  slash ('/').
: -u ((*user*)) or --src-user ((*user*))
  Specifies the user to authenticate as on the source server.
: -U ((*user*)) or --dst-user ((*user*))
  Specifies the user to authenticate as on the destination server. If no
  destination user is given, all operations will be carried out as the
  same user, whether or not a separate destination server is specified.
  Specifies the shared secret for authentication.
: -V or --version
  Display version information.
= EXAMPLES
: $ rit -df -s foo -S bar -b 500 -u ibiggun
  This command will copy mail for ibiggun from foo to bar, displaying
  debug messages, filtering unsafe characters from mailbox names, and
  batching mailbox meta-data fetches on foo into chunks of 500 for greater
  throughput.
: $ rit -dcnrs foo -u ibiggun
  After copying ibiggun's e-mail with the previous command, the above
  command will remove the old mailboxes and their contents from foo without
  recreating the mailboxes and copying the mail. Again, debugging messages
  will be displayed.
= AUTHOR
Written by Ian Macdonald <ian@caliban.org>
= COPYRIGHT
 Copyright (C) 2003 Ian Macdonald

 This is free software; see the source for copying conditions.
 There is NO warranty; not even for MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE.
= SEE ALSO
* ((<"rit home page - http://www.caliban.org/ruby/"|URL:http://www.caliban.org/ruby/>))
* ((<"INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 - http://www.ietf.org/rfc/rfc2060.txt"|URL:http://www.ietf.org/rfc/rfc2060.txt>))
= BUGS
* when specifying -r and no destination is given, -c and -n should be assumed
* mailbox separator character ('/', '.', etc.) should be determined at run-time, rather than assumed


=end

require 'getoptlong'
require 'password'
require 'net/imap'

# a progress spinner class
# (this should really be put in a separate file to encourage code reuse)
#
class Spinner
  Baton = '\|/-'

  def initialize
    STDOUT.flush
    @t = Thread.new do
      rotation = 0
      loop do
        printf "%c\b", Baton[(rotation += 1) & 3]
        STDOUT.flush
      end
    end
  end

  def stop
    @t.kill
    print " \b"
  end
end

# extend Net::IMAP with some useful methods
#
module Net
  class IMAP

    # this list derived from mboxname.c in the Cyrus source code, but
    # some characters have been taken out, as the Mirapoint is more
    # conservative
    GOOD_CHARS = ['A'..'Z', 'a'..'z', '0'..'9'].map {|x| x.map {|y| y}}.to_s +
		 " :=_~.-"
    
    def exists?(mbox)
      result = list(mbox, '%')
      # 1.6 returns [], but 1.8 returns nil
      ! (result.nil? || result.empty?)
    end

    def selectable?(mbox)
      if exists?(mbox)
        result = list(mbox, '%')
        ! result[0].attr.include? :Noselect
      end
    end

    def msg_count(mbox)
      begin
        status(mbox, ["MESSAGES"])["MESSAGES"]
      rescue
	$stderr.puts("Unexpected error while retrieving message count " +
		     "for #{mbox}: #{$!}")
	0
      end
    end

    def all_msgs(mbox) search('ALL') end

    def truncate(mbox)
      select(mbox)
      msgs = search('ALL')
      delete_msgs(msgs)
      close
    end

    def delete_msgs(msgs)
      return if msgs.empty?
      store(msgs, '+FLAGS', [:Deleted])
      expunge
    end
  end
end

# display usage
#
def usage(code=0)
  $stderr.puts <<EOF
Usage: #{PROG_NAME} [-d|--debug] [-D|--dump] [-f|--safe [char]]
	   [-m|--mailbox <mailbox>] [-n|--no-copy] [-c|--no-create]
	   [-r|--delete] [-b|--batch <size>] [-t|--translate foo/bar]
	   -s|--src-server <source server> -u|--src-user <source user>
	   [-S|--dst-server <dest. server>] [-U|--dst-user <dest. user>]
       #{PROG_NAME} -h|--help|-v|--version

-b, --batch         batch FETCHes of message meta-data (default is 100)
-c, --no-create	    don't create mailboxes on the destination server
-d, --debug	    display debugging messages
-D, --dump	    dump IMAP protocol wire exchange (very noisy)
-f, --safe	    filter out commonly unsupported characters in mailbox names
-h, --help	    display this usage message
-m, --mailbox	    process this mailbox only (default is ALL)
-n, --no-copy	    do not actually copy any messages
-r, --delete        delete destination mailboxes prior to copy
-s, --src-server    source host of messages
-S, --dst-server    destination host for messages
-t, --translate	    translate 'foo' in source mailbox to 'bar' in destination
-u, --src-user	    account name of message owner on source server
-U, --dst-user	    account name of message owner on destination server
-v, --version	    display version and copyright message, then exit
EOF
  exit code
end

# display version
#
def version
  $stderr.puts <<EOF
#{PROG_NAME} #{PROG_VERSION}

Copyright (C) 2003 Ian Macdonald <ian@caliban.org>
This is free software; see the source for copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE, to the extent permitted by law.
EOF

  exit
end

PROG_NAME = File.basename($0)
PROG_VERSION = "0.5.1"

src_server = dst_server = nil
src_user = dst_user = nil
src_server = nil
src_user = nil
mailbox = src_mbox = nil
pattern = '*'
copy = create = true
debug = safe = delete = false
batch_size = 100
translate_from = translate_to = nil

begin
  opt = GetoptLong.new(
    [ "--batch",	"-b",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--debug",	"-d",   GetoptLong::NO_ARGUMENT ],
    [ "--delete",	"-r",   GetoptLong::NO_ARGUMENT ],
    [ "--dst-server",	"-S",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--dst-user",	"-U",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--dump",		"-D",   GetoptLong::NO_ARGUMENT ],
    [ "--help",		"-h",   GetoptLong::NO_ARGUMENT ],
    [ "--mailbox",	"-m",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--no-copy",	"-n",   GetoptLong::NO_ARGUMENT ],
    [ "--no-create",	"-c",   GetoptLong::NO_ARGUMENT ],
    [ "--safe",		"-f",   GetoptLong::OPTIONAL_ARGUMENT ],
    [ "--src-server",	"-s",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--translate",	"-t",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--src-user",	"-u",   GetoptLong::REQUIRED_ARGUMENT ],
    [ "--version",	"-v",   GetoptLong::NO_ARGUMENT ]
  )
  opt.each_option do |name, arg|
    case name
    when '--batch'
      batch_size = arg.to_i
    when '--debug'
      debug = true
    when '--delete'
      delete = true
    when '--dst-server'
      dst_server = arg
    when '--dst-user'
      dst_user = arg
    when '--dump'
      Net::IMAP::debug = true
    when '--help'
      usage
    when '--mailbox'
      src_mbox = arg
      pattern = '%'
    when '--no-copy'
      copy = false
    when '--no-create'
      create = false
    when '--safe'
      safe = arg
      if ! (safe.empty? || Net::IMAP::GOOD_CHARS.include?(safe[0]))
        $stderr.puts "Error: '#{safe[0].chr}' is not a safe replacement " +
		     "character for mailbox names."
        exit 1
      end
    when '--src-server'
      src_server = arg
    when '--src-user'
      src_user = arg
      mailbox = "user." + src_user
    when '--translate'
      translate_from, translate_to = /(.*)\/(.*)/.match(arg)[1,2]
      if translate_from.empty? || translate_to.empty?
	$stderr.puts "Error: null translation strings are not allowed."
	exit 1
      end
    when '--version'
      version
    end
  end
rescue GetoptLong::InvalidOption, GetoptLong::MissingArgument
  usage 1
end

# use same server and/or user on both ends if no destination specified
dst_server ||= src_server
dst_user ||= src_user

# default mailbox is user.<USER>
src_mbox ||= mailbox

usage 1 unless src_user && src_server && dst_server && src_mbox

src_passwd = Password.getc("%s's password on %s" % [src_user, src_server])
printf("Logging into %s as user %s...\n", src_server, src_user) if debug
src = Net::IMAP.new(src_server)
src.login(src_user, src_passwd)

if src_server == dst_server && src_user == dst_user
  if copy && debug
    printf("Source and destination are identical, so no copy will be done.\n")
  end
  dst = src
else
  dst_passwd = Password.getc("%s's password on %s" % [dst_user, dst_server])
  printf("Logging into %s as user %s...\n", dst_server, dst_user) if debug
  dst = Net::IMAP.new(dst_server)
  dst.login(dst_user, dst_passwd)
end

msg_count = 0
printf("Reading mailboxes on %s...\n", src_server) if debug
src_mbox_list = src.list(src_mbox, pattern).collect { |mbox| mbox.name }

sub_mbox_list = []
unless src == dst
  printf("Reading mailbox subscriptions on %s...\n", src_server) if debug
  begin
    sub_mbox_list = src.lsub(src_mbox, pattern).collect { |mbox| mbox.name }
  rescue
    # no subscriptions (can't call collect method on nil)
    sub_mbox_list = []
  end
end

src_mbox_list.each { |mbox| msg_count += src.msg_count(mbox) }
mbox_count = src_mbox_list.size
printf("Found a total of %d message(s) in %d mailbox(es) on server %s.\n",
       msg_count, mbox_count, src_server) if debug

if delete 
  # delete mailboxes in descending order, so that
  # user.foo.bar sorts before user.foo and doesn't cause an error
  src_mbox_list.sort {|a,b| b <=> a}.each do |mbox|
    if dst.exists?(mbox)
      printf("Mailbox %s exists on %s. Deleting...\n", mbox, dst_server) if debug
      begin
        dst.delete(mbox)
      rescue
	# folder may contain other mailboxes not on source
	printf("Error while deleting %s: %s\n", mbox, $!)
	puts "Trying truncation instead..."
	begin
	  dst.truncate(mbox)
	rescue
	  # folder may be a container of other mailboxes only,
	  # not of messages
	  printf("Hmm, that failed, too: %s\n", $!)
	end
      end
    end
  end
end

src_mbox_list.sort.each do |src_mbox|
  mbox_name = src_mbox.dup

  if safe
    # nuke characters that are not allowed in mailbox names on some servers
    if safe.empty?
      # no optional argument supplied - strip bad characters
      mbox_name.delete!('^' + Net::IMAP::GOOD_CHARS)
    else
      # argument supplied - replace bad characters with placeholder
      mbox_name.gsub!(/[^#{Net::IMAP::GOOD_CHARS}]/, safe[0].chr)
    end
  end

  mbox_name.gsub!(/#{translate_from}/, translate_to) if translate_from

  if safe || translate_from
    printf("Mailbox %s will be translated to %s on %s...\n",
	   src_mbox, mbox_name, dst_server) if mbox_name != src_mbox && debug
    if mbox_name != src_mbox && src_mbox_list.include?(mbox_name)
      # mailbox name has had bad characters removed, but now it has the
      # same name as some other mailbox
      $stderr.puts "Error: translated mailbox name #{mbox_name} already " +
		   "exists at source. Skipping copy..."
      next
    end
  end

  if create
    if dst.exists?(mbox_name) && dst.selectable?(mbox_name)
      printf("Mailbox %s exists on %s. Skipping creation...\n",
	     mbox_name, dst_server) if debug
    else
      printf("Creating mailbox %s...\n", mbox_name) if debug
      dst.create(mbox_name)
    end
  end

  if sub_mbox_list.include?(src_mbox) && dst.exists?(mbox_name)
    printf("Subscribing to mailbox %s on %s...\n", mbox_name, dst_server) if debug
    if dst.selectable?(mbox_name)
      dst.subscribe(mbox_name)
    else
      $stderr.puts "Error: #{mbox_name} is not selectable. Skipping subscription..."
    end
  end

  next if src == dst || ! copy

  src.select(src_mbox)

  printf("Checking for messages in %s on %s...\n", src_mbox, src_server) \
    if debug
  msgs = src.all_msgs(src_mbox)
  if msgs.size > 0
    printf("Copying %d message(s) from %s on %s to %s " \
	   "(meta-data batch size: %d)... ",
	   msgs.size, src_mbox, src_server, dst_server, batch_size) if debug
    printf("Renaming %s to %s on %s to play safe...", src_mbox, mbox_name,
	   dst_server) if debug && safe && mbox_name != src_mbox
    STDOUT.flush

    # leave this off for now - there seems to be a risk of thread deadlock
    # spin = Spinner.new if debug

    until (batch = msgs.slice!(0, batch_size)).empty?
      # PEEK avoids implicitly setting the \Seen flag on the source
      #
      # performance could be improved a lot here at the expense of memory
      # by batching FETCHes
      src.fetch(batch, ["BODY.PEEK[]", "FLAGS", "INTERNALDATE"]).each do |data|
	# extract the data we need to copy the message while preserving
	# all of its metadata
	text, flags, i_date =
	  ["BODY[]", "FLAGS", "INTERNALDATE"].collect {|i| data.attr[i]}
				    
	begin
	  flags.delete(:Recent)	 # Mirapoint IMAP seems to dislike this flag
	  dst.append(mbox_name, text, flags, i_date)
	rescue Net::IMAP::BadResponseError  # don't want an error to kill us
	  printf("\nError while appending to %s: %s\n", mbox_name, $!)
	end
      end
    end
    #(spin.stop; puts) if debug
    puts if debug
  end
  printf("%d mailbox(es) left to process.\n", mbox_count -= 1) if debug
end

src.logout
src.disconnect
unless src == dst
  dst.logout
  dst.disconnect
end
