#!/usr/bin/python2 # # Copyright 2004 Michael Kefeder (h.t.d@gmx.de) # # This program renames files # It converts filenames from ANY supported encoding to UTF-8, this enables # a pretty good co-existence of netatalk 2.0 and samba 3.0 # If character decoding fails then the name is quoted in URL-style # (special-chars become %xx codes) # It converts CAP-sequences from ANY supported encoding to UTF-8 # It removes characters illegal for windows: */\?<>:|", # whitespace from beginning and end of filenames, # dots from beginning and end of filenames # unless disabled by cmd-line parameter # Filenames that would be truncated to a zero length string will be renamed # to a quote from the Movie "Finding Nemo" # e.g. ' ...' -=> 'Shark bait, oo ha ha!' # # TODO: # * check if filename-length exceeds 32 characters (mac os9, afp2.x limit) # --max-namelength (default 32) # --check-namelength "only_duplicates|always" # if filename too long: - 1 and iterate from 0-9 to find a new name # no name found: - 2 and iterate from 0-99 to find a new name # no name found: this is insane, stop processing! # * documentation (manpage?) # # FUTURE: # * cmd-line switch "--newfs-encoding=" to allow # output in other encodings than utf-8 # * cmd-line switch to list available encodings # (not implemented in python, point user at python2 documentation: # http://docs.python.org/lib/node127.html and simply check if the # passed argument is an existing codec before running.) # * cmd-line switch to set what to do if de-/en-coding of characters fails (abort, ignore, # replace,...) # * cmd-line switches for VETO_FILES and VETO_DIRS # * cmd-line switch for invalid characters import os import sys import string, urllib import re import getopt # presets # INVALID_CHARS = unicode ('*/\?<>:|"') VETO_DIRS = (".AppleDB", ".AppleDouble", ".AppleDesktop") VETO_FILES = ("Icon\r", ".htaccess", ":2eDS_Store", ".DS_Store") def usage(): print "USAGE: %s [-r|--recursive] " % (sys.argv[0]) print "\t -v|--verbose displays 'real-filename' ('CAPdecoded') -=> 'new-filename'" print "\t --notest ACTUALLY moves the files!" print "\t --no-capdecode disable decoding of CAP encoded sequences" print "\t --no-winvalid disable creation of windows-compatible names" print "\t --no-db-update disable updating of CNID-database" print "\t --cap-encoding= Character Encoding of CAP-sequences\n\t in filenames" print "\t default: MAC-ROMAN" print "\t --fs-encoding= Character Encoding of filenames\n\t as they are in filesystem" print "\t default: UTF-8" print "\t --cnid-db-path= Path to .AppleDB directory" print "\t default: /.AppleDB" print "\t -h|--help for help (this message)" print "NOTE: all output is UTF-8 encoded!" print "for a list of supported encodings see:" print "http://docs.python.org/lib/node127.html" sys.exit(1) #if len (sys.argv) < 2: # usage() try: opts, args = getopt.getopt(sys.argv[1:], "rvh", ["recursive", "verbose", "help", "notest", "no-capdecode", "no-winvalid", "no-db-update", "cap-encoding=", "fs-encoding=", "cnid-db-path="]) except getopt.GetoptError: usage() pRecursive = 0 pVerbose = 0 pNoTest = 0 pNoCAP = 0 pNoWIN = 0 pNoDB = 0 pCAPenc = 'mac-roman' pFSenc = 'utf-8' pDB_PATH = '' for o, a in opts: if o in ("-r", "--recursive"): pRecursive = 1 if o in ("-v", "--verbose"): pVerbose = 1 if o == "--notest": pNoTest = 1 if o == "--no-capdecode": pNoCAP = 1 if o == "--no-winvalid": pNoWIN = 1 if o == "--no-db-update": pNoDB = 1 if o == "--cap-encoding": pCAPenc = a if o == "--fs-encoding": pFSenc = a if o == "--cnid-db-path": pDB_PATH = a if o in ("-h", "--help"): usage() if len(args) == 0: usage() # python 2.2 workarounds try: x = UnicodeDecodeError except NameError: UnicodeDecodeError = UnicodeError else: del x start_path = args[0] if pNoDB == 0: from cnid2db import cnid_db cnid2 = cnid_db() if pDB_PATH is '': cnid2.open (start_path) else: cnid2.open (pDB_PATH) if pNoTest == 1: recursive = '' if pRecursive == 1: recursive = ' RECURSIVELY' sys.stderr.write ("This will rename all files in '%s'%s. Are you sure? (Y/n): " % (start_path, recursive)) sure = sys.stdin.readline() if string.strip(sure) != 'Y': sys.exit(1) sys.stderr.write ("Are you abso-fuckin'-lutely sure? (Y/n): ") sure = sys.stdin.readline() if string.strip(sure) != 'Y': sys.exit(1) else: sys.stderr.write ("To enable actual renaming use the --notest cmd-line option, see --help for more info.\n") def isValidUTF8 (s): return re.compile ("^(([\0-\x7F])|([\xC2-\xDF][\x80-\xBF])|((([\xE0][\xA0-\xBF])|([\xE1-\xEC\xEE-\xEF][\x80-\xBF])|([\xED][\x80-\x9F]))[\x80-\xBF])|((([\xF0][\x90-\xBF])|([\xF1-\xF3][\x80-\xBF])|([\xF4][\x80-\x8F]))[\x80-\xBF][\x80-\xBF]))*$").match(s) def stripChar (c, s): while len(s) > 0 and s[0] == c: s = s[1:] while len(s) > 0 and s[-1] == c: s = s[:-1] return s def invalidencoding2unicode (filename, fs_encoding): retval = unicode ('') for character in filename: try: retval += unicode (character, fs_encoding) except UnicodeDecodeError: retval += urllib.quote (character) return retval def filename2unicode (filename, fs_encoding): """ converts filename from current encoding to unicode if the filename is not valid UTF-8 this is useful to not convert existing utf-8 encoded names again, and again, and... if decoding fails the bad characters are quoted URL conform (%xx encoded) """ if not isValidUTF8 (filename): try: return unicode (filename, fs_encoding) except UnicodeDecodeError: return invalidencoding2unicode(filename, fs_encoding)#unicode (urllib.quote (filename)) else: return unicode (filename, 'utf-8') def cap2unicode (cap_encoded, cap_encoding = "mac-roman"): """ decodes ':xx' encoded characters from cap_encoding to unicode """ retval = unicode ('') c = 0 p = string.find (cap_encoded, ":", 0) if p is not -1: while c < len(cap_encoded): if cap_encoded[c] is unicode (':'): hexcode = cap_encoded[c+1:c+3] if hexcode[0] in string.hexdigits and hexcode[1] in string.hexdigits: value = int(hexcode, 16) try: retval += unicode ("%c" % value, cap_encoding) except UnicodeDecodeError: retval += urllib.quote ("%c" % value) c += 2 else: retval += ':' else: retval += cap_encoded[c] c += 1 else: retval = cap_encoded return retval # unicode (retval, cap_encoding) def createValidFilename (brokenFilename, fixedFilename = ""): """ replaces invalid characters for windows with '_' strips whitespace from beg./end of names strips dots from beg./end of names runs at least twice per name to check if output of 'createValidFilename' is really a valid name it recursively continues until filename has a length of 0 or stays the same after validation. example filename why recursion is necessary: '... file.extension ...' when the dots are stripped invalid spaces are left """ brokenFilename = string.strip (brokenFilename) # remove whitespace from start and end of filename retval = unicode("") for character in brokenFilename[:]: if character in INVALID_CHARS: retval += unicode ("_") else: retval += character retval = stripChar (unicode('.'), retval) if len(retval) == 0: return unicode("Shark bait, oo ha ha!") if retval == fixedFilename: return retval else: return createValidFilename (retval, retval) # run recursively because invalid filenames like ' ... ... ... .... ..' result in yet again invalid names, and again, and again.... def createUniqueFilename (newfilename, newfiles, files): # rename only if newfilename does NOT already exist if newfilename in newfiles or newfilename.encode ('utf-8') in files: sys.stdout.write (" WARNING: '%s' exists!" % (newfilename.encode ('utf-8'))) i = 0 maxlen = 30 # NOTE: maximum length for atalk v2 is 31 chars, for a new name it needs to append new chars & stay within limit tmpfilename = newfilename[:] while newfilename in newfiles or newfilename.encode ('utf-8') in files: newfilename = "%s%d" % (tmpfilename[:maxlen], i) i += 1 if i == 10: maxlen -= 1 if i > 99: # protection against crazy circumstances sys.stderr.write ("OMG, do something about that!") sys.exit(1) return newfilename # statistics ntotalf = 0 ntotald = 0 nrenamedf = 0 nrenamedd = 0 maxdepth = 0 def scanDirectory (path, recursive = 0, depth = 0): global ntotalf, ntotald, nrenamedf, nrenamedd, maxdepth if maxdepth < depth: maxdepth = depth os.chdir(path) print "%2dCWD: '%s'" % (depth, os.getcwd()) path = '.' files = os.listdir (path) files.sort() newfiles = [] # list of filenames which have been processed. Is needed to check if a file exists before moving oldfiles = [] # list of filenames already processed while len (files) > 0: # emulate .pop() for lists, inefficient! needed to check for filenames which have been truncated at the beginning curfilename = files[0] stat_data = os.stat (curfilename) cur_inode = stat_data.st_ino cur_device = stat_data.st_dev files = files [1:] if os.path.isfile (curfilename): ntotalf += 1 if curfilename in VETO_FILES: continue newfilename = filename2unicode(curfilename, pFSenc) sys.stdout.write ("%2dFILE: '%s'" % (depth, newfilename.encode('utf-8'))) if pNoCAP == 0: newfilename = cap2unicode (newfilename, pCAPenc) deCAPfilename = newfilename[:] if pNoWIN == 0: newfilename = createValidFilename (newfilename) newfilename = createUniqueFilename (newfilename, newfiles, files) newfiles.append (newfilename) newfilename = newfilename.encode ('utf-8') extras = "" if curfilename != newfilename: nrenamedf += 1 if pNoTest == 1: os.rename(curfilename, newfilename) if os.path.isdir ("./.AppleDouble") and os.path.isfile (os.path.join ("./.AppleDouble", curfilename)): os.rename(os.path.join ("./.AppleDouble", curfilename), os.path.join ("./.AppleDouble", newfilename)) extras += "R" if pNoDB == 0: cnid2.renameFromDEVINO (cur_device, cur_inode, newfilename) extras += "D" else: if pNoDB == 0: if cnid2.DEVINOexists (cur_device, cur_inode) is not 0: sys.stdout.write (" WARNING dev:%d ino:%d not in DB!" % (cur_device, cur_inode)) if pVerbose == 0: sys.stdout.write (" -=> '%s' %s\n" % (newfilename, extras)) else: sys.stdout.write (" ('%s') -=> '%s' %s\n" % (deCAPfilename.encode ('utf-8'), newfilename, extras)) else: sys.stdout.write("\n") # newline to separate unprocessed filenames sys.stdout.flush() elif os.path.isdir (curfilename): ntotald += 1 if curfilename in VETO_DIRS: continue newfilename = filename2unicode(curfilename, pFSenc) sys.stdout.write ("%2dDIRECTORY: '%s'" % (depth, newfilename.encode('utf-8'))) if pNoCAP == 0: newfilename = cap2unicode (newfilename, pCAPenc) deCAPfilename = newfilename[:] if pNoWIN == 0: newfilename = createValidFilename (newfilename) newfilename = createUniqueFilename (newfilename, newfiles, files) newfiles.append (newfilename) newfilename = newfilename.encode ('utf-8') extras = "" if curfilename != newfilename: nrenamedd += 1 if pNoTest == 1: os.rename(curfilename, newfilename) if os.path.isdir ("./.AppleDouble") and os.path.isfile (os.path.join ("./.AppleDouble", curfilename)): os.rename(os.path.join ("./.AppleDouble", curfilename), os.path.join ("./.AppleDouble", newfilename)) extras += "R" if pNoDB == 0: cnid2.renameFromDEVINO (cur_device, cur_inode, newfilename) extras += "D" else: if pNoDB == 0: if cnid2.DEVINOexists (cur_device, cur_inode) is not 0: sys.stdout.write (" WARNING dev:%d ino:%d not in DB!" % (cur_device, cur_inode)) if pVerbose == 0: sys.stdout.write (" -=> '%s' %s\n" % (newfilename, extras)) else: sys.stdout.write (" ('%s') -=> '%s' %s\n" % (deCAPfilename.encode ('utf-8'), newfilename, extras)) else: sys.stdout.write("\n") # newline to separate unprocessed filenames sys.stdout.flush() if recursive == 1: if pNoTest == 0: newfilename = curfilename scanDirectory(newfilename, 1, depth + 1) os.chdir('..') scanDirectory (start_path, pRecursive) sys.stderr.write ("Depth of deepest directory tree: %d\n" % maxdepth) sys.stderr.write ("Total # of Files processed: %d\n" % ntotalf) sys.stderr.write ("Total # of Directories processed: %d\n" % ntotald) sys.stderr.write ("Total # of Files renamed: %d\n" % nrenamedf) sys.stderr.write ("Total # of Directories renamed: %d\n" % nrenamedd) # close Database if pNoDB == 0: cnid2.close()