#!/usr/bin/python2 -s
# -*- coding: ISO-8859-15 -*-
#
# digicamerge - (c) 2002-2006 Jerome Alet
#               (c) 2002-2006 C@LL - Conseil Internet & Logiciels Libres
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#
# $Id: digicamerge,v 1.9 2006/02/19 14:03:03 jalet Exp $
#
# $Log: digicamerge,v $
# Revision 1.9  2006/02/19 14:03:03  jalet
# Added the --nodupes command line option to remove duplicates.
#
# Revision 1.8  2004/07/24 19:41:56  jalet
# Minor fix wrt Python warning
#
# Revision 1.7  2004/01/05 14:04:38  jalet
# Added the -C|--copy command line option.
# Allows more variables in the naming scheme.
# Both features were suggested by Patrick Install.
#
# Revision 1.6  2003/12/27 16:00:35  uid67467
# Savannah is back online
#
# Revision 1.5  2003/01/10 18:05:58  jalet
# Correct handling of unknown Exif tags in naming scheme.
#
# Revision 1.4  2003/01/10 11:11:23  jalet
# Small bug correction.
# Better error handling for unrecognized DateTime fields.
#
# Revision 1.3  2003/01/09 23:01:57  jalet
# Should be ready for 1.50
# Many additionnal variables added to the environment when running post-processing
# commands.
#
# Revision 1.2  2003/01/07 11:33:13  jalet
# Python 2 or higher needed
#
# Revision 1.1  2003/01/06 12:05:10  jalet
# Initial import into CVS
#
#
#

import sys

if int(sys.version[0]) < 2 :
    sys.stderr.write("digicamerge : You need Python version 2.0 or higher.\n")
    sys.exit(-1)
    
import os
import time
import getopt
import fnmatch
import shutil
import errno
import md5

try :
    import exif as PyExif       # first we try to import the py-exif python module available from SourceForge
except ImportError :    
    pyexifpresent = 0
    try :
        import EXIF             # If first one is not available, then we try with Gene Cash's one.
    except ImportError :    
        geneexifpresent = 0
    else :    
        geneexifpresent = 1
        class PyExifClass :
            def parse(self, filename) :
                file = open(filename, "rb")
                parsed = EXIF.process_file(file, 0)
                file.close()
                if parsed :
                    parsed.update({"DateTime" : "%s" % parsed.get("EXIF DateTimeOriginal")})
                # now delete all spaces chars in exif tags' names    
                for (k, v) in parsed.items() :    
                    newk = "".join(k.split())
                    del parsed[k]
                    parsed[newk] = v
                return parsed
        PyExif = PyExifClass()        
else :                
    pyexifpresent = 1

__version__ = "2.00"
__author__ = "Jerome Alet - <alet@librelogiciel.com>"

__doc__ = """DigicaMerge v%s (C) 2002-2006 C@LL - Conseil Internet & Logiciels Libres
A command line tool to merge directories of pictures taken
with digital cameras.

If you've got a digital camera, your hard disk probably contains many
directories full of pictures all named with the same names. This utility
allows you to merge such directories' contents into a new directory,
and renames all the pictures on the fly, ensuring no filename clash
will occur.

command line usage :

  digicamerge [options] srcdir1 [srcdir2 ... srcdirN] destination

options :

  -v | --version     Prints DigicaMerge's version number then exits.
  -h | --help        Prints this message then exits.
  
  -q | --quiet       Doesn't print progress indicator.
  
  -m | --mode M      Does a chmod to mode M after each file move.
                     M is in octal but may not have a leading 0,
                     for example both 644 and 0644 work.
                          
  -n | --nodupes     Remove duplicates.                        
  
  -p | --pattern P   Uses pattern P to select the files to move to
                     the destination directory.
                     P default value is "*.jpg" which is convenient
                     for most cases.
  
  -s | --scheme S    Defines the naming scheme for the files created in
                     the destination directory. The default naming
                     scheme is "dscf%%(count1)04i.jpg" which matches my
                     digital camera default naming scheme. 
                     
                     You can currently use the following named formats,
                     which are all strings except count0 and count1 
                     which are integers :
                     
                       count0  : file number beginning at 0
                       count1  : file number beginning at 1
                       date    : date at which the picture was taken in
                                 the YYYYMMDD format.
                       isodate : same as above but in ISO8601 format,       
                                 i.e. YYYY-MM-DD
                       year    : year in YYYY format          
                       month   : month in MM format
                       day     : day in DD format
                       hour    : hour in hh format
                       minute  : minute in mm format 
                       second  : second in ss format
                       time    : time in hhmmss format
                       srcname   : Complete source filename
                       srcdir    : Source directory
                       srcbase   : Source basename
                       srcprefix : Source basename's prefix 
                       srcext    : Source extension
                       
                     You can also use all recognized Exif Tags, like  
                     ExposureProgram or ExifImageHeight for example.
                                 
                     A naming scheme MUST contain either %%(count0) or         
                     %%(count1) to avoid duplicate file names.
                     
                     The default naming scheme produces file names like :
                     
                       dscf0001.jpg
                       dscf0002.jpg
                       ...
                       
                     Look at the Python language string formatting 
                     capabilities to learn how to create your own 
                     naming scheme.
                     
  -c | --command C   Launches command C on each picture *after* each picture
                     move, but *before* the access and modification date and
                     time or access mode are modified. The C command can use 
                     environment variables to access to the predefined values 
                     discussed above, as well as to each Exif tag. All are 
                     available under the names listed above prefixed with DGCM,
                     like DGCMcount1 or DGCMExposureProgram.
                     Several additional variables are also defined :
                                DGCMsrcname : Complete source filename
                                DGCMdstname : Complete destination filename
                                 DGCMsrcdir : Source directory
                                 DGCMdstdir : Destination directory
                                DGCMsrcbase : Source basename
                                DGCMdstbase : Destination basename
                              DGCMsrcprefix : Source basename's prefix 
                              DGCMdstprefix : Destination basename's prefix
                                 DGCMsrcext : Source extension
                                 DGCMdstext : Destination extension
  
  -t | --touch       Modifies the file access and modification times according                     
                     to the date the picture was taken, as stated in the 
                     DateTime Exif tag.
                     
  -C | --copy        Copy files instead of moving/renaming them.                   
                              
examples :                              

  $ digicamerge --scheme "%%(date)s-%%(count0)i.jpeg" christmas newyear 2002end
  
  This will create a new directory named 2002end, and move all *.jpg pictures
  from the christmas and newyear directories into 2002end, renaming each file
  with the date at which the picture was taken followed by a number beginning 
  at 0, e.g. : 20021225-45.jpeg
  
  $ digicamerge pics renumbered
  
  This will put all the pictures present in the pics directory into 
  the renumbered directory. Each picture will be renumbered so that no 
  numbering hole will exist anymore (numbering holes might have been 
  created if you deleted some pictures from the pics directory) 
  
  $ digicamerge -c "jhead -st ~/thumbnails/\$DGCMdstname \$DGCMdstname" xmas xmas2
  
  This will do like the previous example, but thumbnails will be extracted 
  from each picture using the jhead command and will be put into the 
  ~/thumbnails/xmas2 directory under the same (possibly new) names.
  NB : Don't forget to escape the variable names with an antislash
       if you need to, otherwise they may come undefined !

  $ digicamerge --touch --scheme "%%(Model)s-%%(ExifImageWidth)sx%%(ExifImageHeight)s-%%(count1)04i.jpg" birthday mybirthday

  This will put all the pictures present in the birthday directory into
  the mybirthday directory. Each file will be renamed with a name which 
  looks like (for my digital camera) : FinePix S304-2048x1536-0001.jpg
  and the access and modification time of the picture files will be
  set to the date and time at which each picture was taken.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

Please e-mail bugs to: %s""" % (__version__, __author__)

class DigicaMergeError(Exception) :
    """DigicaMerge Exception class."""
    pass
    
def getFilenames(directory, pattern) :
    """Returns a list of all file names matching a pattern, recursively."""
    filenames = []
    for file in os.listdir(directory) :
        if os.path.isdir(file) :
            filenames.extend(getFilenames(file, pattern))
        elif fnmatch.fnmatch(file, pattern) :
            filenames.append(os.path.join(directory, file))
    return filenames        
    
def getExifData(fname) :    
    """Extracts the Exif data from the picture file and returns it as a Python mapping."""
    parsed = PyExif.parse(fname)
    datetime = parsed.get("DateTime")
    if datetime :
        year = datetime[:4]
        month = datetime[5:7]
        day = datetime[8:10]
        date = ''.join([year, month, day])
        isodate = '-'.join([year, month, day])
        hour = datetime[11:13]
        minute = datetime[14:16]
        second = datetime[17:19]
        hms = ''.join([hour, minute, second])
    else :    
        date = isodate = year = month = day = hms = hour = minute = second = ''
    values = { "isodate": isodate, \
               "date": date, \
               "year": year, \
               "month": month, \
               "day": day, \
               "time": hms, \
               "hour": hour, \
               "minute": minute, \
               "second": second, \
             }
    # we append some precomputed values         
    values.update(parsed)         
    return values
    
def computeMD5Hash(fname) :    
    """Computes the MD5 hash for a particular file."""
    infile = open(fname, "r")
    datas = infile.read()
    infile.close()
    return md5.md5(datas).hexdigest()
    
def main(*args, **kw) :
    """Does all the work."""
    destination = os.path.expanduser(args[-1])
    quiet = kw["quiet"]
    scheme = kw["scheme"]   
    pattern = kw["pattern"]
    mode = kw["mode"]
    touch = kw["touch"]
    command = kw["command"]
    copyinstead = kw["copy"]
    nodupes = kw["nodupes"]
    if copyinstead :
        verb = "copied to"
    else :    
        verb = "moved to"
    if (scheme.find("%(count0)") == -1) and (scheme.find("%(count1)") == -1) and (scheme.find("%") == -1) :
        raise DigicaMergeError, "Invalid naming scheme %s, it must contain either %%(count0) or %%(count1) or another variable" % scheme
    if (not pyexifpresent) and (not geneexifpresent) :
        raise DigicaMergeError, "An Exif Python module is needed to extract informations from picture files.\nPlease download one from either :\n\thttp://home.cfl.rr.com/genecash/digital_camera.html\n  or\n\thttp://sourceforge.net/projects/pyexif/\n and install it on your system. Both are supported, but they may provide the same informations under different names."
        
    sources = []
    for source in [os.path.expanduser(s) for s in args[:-1]] :
        sources.extend(getFilenames(source, pattern))
    if not sources :
        raise DigicaMergeError, "No picture to process, please use the --pattern option."
        
    try :
        # we want a non-existent destination directory to be sure
        # we don't overwrite people's pictures by mistake, so
        # we just try to create a new directory with this name.
        os.mkdir(destination)
    except OSError, (code, msg) :    
        if errno.errorcode[code] == 'EEXIST' :
            raise DigicaMergeError, "Directory %s already exists, too dangerous !\nAction aborted." % destination 
        else :    
            raise
            
    md5sums = {}        
    for count0 in range(len(sources)) :
        count1 = count0 + 1
        srcname = sources[count0]
        isdupe = 0
        if nodupes :
            sum = computeMD5Hash(srcname)
            if md5sums.has_key(sum) :
                # it's a duplicate
                isdupe = 1
            else :    
                # First time we see it
                md5sums[sum] = None
            
        if isdupe :    
            if copyinstead :
                msg = "is a duplicate, will be kept."
            else :    
                msg = "is a duplicate, will be deleted."
                os.unlink(srcname)
            if not quiet :
                print srcname, msg
        else :
            (srcdir, srcbase) = os.path.split(srcname)
            (srcprefix, srcext) = os.path.splitext(srcbase)
            exifdata = getExifData(srcname)
            exifdata.update({"count0": count0, \
                             "count1": count1, \
                             "srcname" : srcname, \
                             "srcdir"  : srcdir, \
                             "srcbase"  : srcbase, \
                             "srcprefix"  : srcprefix, \
                             "srcext"  : srcext})
            try :
                dstname = os.path.join(destination, scheme % exifdata)
            except KeyError :    
                raise DigicaMergeError, "Incorrect naming scheme %s for picture %s, maybe some Exif tags are not supported by your digital camera : %s" % (scheme, srcname, exifdata)
            if not copyinstead :
                try :
                    os.rename(srcname, dstname)
                except OSError, (code, msg) :    
                    if errno.errorcode[code] == 'EXDEV' :
                        # cross device rename, we must do it the old way...
                        shutil.copy2(srcname, dstname)
                        os.unlink(srcname)
                    else :
                        raise
            else :    
                shutil.copy2(srcname, dstname)
            if command is not None :        
                (dstdir, dstbase) = os.path.split(dstname)
                (dstprefix, dstext) = os.path.splitext(dstbase)
                exifdata.update({ "dstname" : dstname, \
                                  "dstdir"  : dstdir, \
                                  "dstbase"  : dstbase, \
                                  "dstprefix"  : dstprefix, \
                                  "dstext"  : dstext })
                for k in exifdata.keys() :
                    os.putenv("DGCM%s" % k, str(exifdata[k]))
                os.system(command)
            if mode is not None :        
                os.chmod(dstname, int(mode, 8))
            if touch :    
                dt = exifdata.get("DateTime")
                if dt :
                    try :
                        amtime = time.mktime(time.strptime(dt, "%Y:%m:%d %H:%M:%S"))
                    except ValueError :    
                        sys.stderr.write("Unrecognized DateTime info %s for %s, access and modification times were not changed. Please send this message to %s\n" % (dt, dstname, __author__))
                    else :    
                        try :
                            os.utime(dstname, (amtime, amtime))
                        except OSError, msg :
                            sys.stderr.write("ERROR '%s' when modifying file dates and times\n" % msg)
                else :    
                    sys.stderr.write("No DateTime info for %s, access and modification times were not changed.\n" % dstname)
            if not quiet :
                print srcname, verb, dstname

def display_version_and_quit() :
    """Displays version number, then exists successfully."""
    print __version__
    sys.exit(0)

def display_usage_and_quit() :
    """Displays command line usage, then exists successfully."""
    print __doc__
    sys.exit(0)

def parse_commandline(argv, short, long) :
    """Parses the command line, controlling options."""
    # split options in two lists: those which need an argument, those which don't need any
    withoutarg = []
    witharg = []
    lgs = len(short)
    i = 0
    while i < lgs :
        ii = i + 1
        if (ii < lgs) and (short[ii] == ':') :
            # needs an argument
            witharg.append(short[i])
            ii = ii + 1 # skip the ':'
        else :
            # doesn't need an argument
            withoutarg.append(short[i])
        i = ii
            
    for option in long :
        if option[-1] == '=' :
            # needs an argument
            witharg.append(option[:-1])
        else :
            # doesn't need an argument
            withoutarg.append(option)
    
    # we begin with all possible options unset
    parsed = {}
    for option in withoutarg + witharg :
        parsed[option] = None
    
    # then we parse the command line
    args = []       # to not break if something unexpected happened
    try :
        options, args = getopt.getopt(argv, short, long)
        if options :
            for (o, v) in options :
                # we skip the '-' chars
                lgo = len(o)
                i = 0
                while (i < lgo) and (o[i] == '-') :
                    i = i + 1
                o = o[i:]
                if o in witharg :
                    # needs an argument : set it
                    parsed[o] = v
                elif o in withoutarg :
                    # doesn't need an argument : boolean
                    parsed[o] = 1
                else :
                    # should never occur
                    raise DigicaMergeError, "Unexpected problem when parsing command line"
        elif (not args) and sys.stdin.isatty() : # no option and no argument, we display help if we are a tty
            display_usage_and_quit()
    except getopt.error, msg :
        sys.stderr.write("%s\n" % msg)
        display_usage_and_quit()
    return (parsed, args)
        
if __name__ == "__main__" : 
    try :
        defaults = { \
                     "scheme"  : "dscf%(count1)04i.jpg", \
                     "pattern" : "*.jpg", \
                   }
        short_options = "Cc:s:hm:p:qvtn"
        long_options = ["nodupes", "copy", "command=", "scheme=", "help", "mode=", "pattern=", "quiet", "touch", "version"]
        (options, args) = parse_commandline(sys.argv[1:], short_options, long_options)
        options["help"] = options["h"] or options["help"]
        options["version"] = options["v"] or options["version"]
        options["quiet"] = options["q"] or options["quiet"]
        options["scheme"] = options["s"] or options["scheme"] or defaults["scheme"]
        options["pattern"] = options["p"] or options["pattern"] or defaults["pattern"]
        options["mode"] = options["m"] or options["mode"] 
        options["touch"] = options["t"] or options["touch"]
        options["command"] = options["c"] or options["command"]
        options["copy"] = options["C"] or options["copy"]
        options["nodupes"] = options["n"] or options["nodupes"]
        if options["help"] :
            display_usage_and_quit()
        elif options["version"] :
            display_version_and_quit()
        else :
            apply(main, args, options)
    except DigicaMergeError, msg :            
        sys.stderr.write("%s\n" % msg)
        sys.stderr.flush()
        sys.exit(-1)
    else :    
        sys.exit(0)
