"""strip_attachments - rip through a user's Exchange mailbox and replace attachments by links to a copy on a network disk. """ import os, sys import md5 import win32api import win32com.client DEBUG = 0 LOG_FILENAME = "c:/temp/errors.log" try: os.remove (LOG_FILENAME) except OSError: pass SIZE_THRESHOLD = 10000 USERNAME = win32api.GetUserName () ATTACHMENTS_ROOT = os.path.normpath (r"\\tdi_nt4a\user\%s\attachments" % USERNAME) IGNORE_INFO_STORES = ["public folders"] IGNORE_FOLDERS = ["deleted items", "sent items"] IGNORE_ATTACHMENTS = ["outlook.bmp", "v156k.jpg"] def log (text): open ("c:/temp/errors.log", "a").write (text) def process_attachment (attachment, route): """For this attachment, attempt to save it to its message's directory. If the same filename already exists in that directory, keep adding numbers to it until you hit a spare one. This is because any message may have several files of the same name. """ name = attachment.Name.encode ("utf-8", "ignore").lower () if name in IGNORE_ATTACHMENTS: return 0 print print 2 * len (route) * " ", "Attachment", name basename = name offset = 0 while os.path.isfile (os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), basename)): root, ext = os.path.splitext (basename) offset += 1 basename = "%s.%d%s" % (root, offset, ext) filename = os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), basename) try: attachment.WriteToFile (filename) except: log ("Unable to write %s: %s - %s\n" % (folder.Name, message_name, name)) return 0 else: attachments = attachment.Parent new_attachment = attachments.Add () new_attachment.Position = 0 new_attachment.Type = win32com.client.constants.CdoFileLink new_attachment.Source = filename new_attachment.Name = attachment.Name attachment.Delete () return 1 def process_message (message, route): """For this message, use a shortened form of its ID as an identifier, as the subject may not be unique or even present! Create a directory to hold any attachments and then, if the message is over a certain size, store all its attachments away in that directory and replace them by a link to their new location. NB There doesn't seem to be any way of getting the size of an individual attachment (short of saving it anyway and calculating the size of the resulting file). So any big message may have even trivial attachments stored away. """ name = md5.md5 (message.ID).hexdigest () subject = message.Subject.encode ("utf-8", "ignore") print 2 * len (route) * " ", (subject or name).ljust (96), "\r", message_path = os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), name) # # For debug purposes, consider only messages whose subject # is CDO... something. # if DEBUG and not subject.startswith ("CDO"): return # # Only consider messages over a certain size; this # is a fudge, because there's no easy way to get # the size of individual attachments. # if message.Size > SIZE_THRESHOLD: attachments = message.Attachments n_attachments = attachments.Count if n_attachments > 0: if not os.path.isdir (message_path): os.mkdir (message_path) # # Don't bother trying to save OLE or other # unusual attachments - only straightforward # files. This will also prevent any attempts # to link to links to attachments previously # removed! # for n_attachment in range (1, n_attachments + 1): attachment = attachments.Item (n_attachment) if attachment.Type == win32com.client.constants.CdoFileData: if process_attachment (attachment, route + [name]): message.Update () def process_folder (folder, route): """For the folder in question, create its directory if necessary and then process all messages within it, followed by any folders within it. """ name = folder.Name.encode ("utf-8", "ignore").lower () if name in IGNORE_FOLDERS: return print 2 * len (route) * " ", "Checking folder", name.ljust (80) # # Attempt to use the Exchange folder's name as the # folder name on the disk. If this isn't possible, # perhaps because of duplication or invalid chars, # then resort to an MD5 digest instead. # try: folder_path = os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), name) if not os.path.isdir (folder_path): os.mkdir (folder_path) except (IOError, OSError): name = md5.md5 (folder.ID).hexdigest () folder_path = os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), name) if not os.path.isdir (folder_path): os.mkdir (folder_path) messages = folder.Messages n_messages = messages.Count for n_message in range (1, n_messages + 1): message = messages[n_message] process_message (message, route + [name]) folders = folder.Folders n_folders = folders.Count for n_folder in range (1, n_folders + 1): folder = folders[n_folder] process_folder (folder, route + [name]) def process_info_store (info_store, route): """For the info store in question, create a directory for it if needs be and process its root folder, which may have messages of its own, and then (by recursion) any subsidiary folders """ name = info_store.Name.encode ("UTF-8", "ignore") print 2 * len (route) * " ", "Checking info store", name info_store_path = os.path.join (ATTACHMENTS_ROOT, os.sep.join (route), name) if not os.path.isdir (info_store_path): os.mkdir (info_store_path) process_folder (info_store.RootFolder, route + [name]) def main (args): """Attach to the user's Exchange session and produce a list of Information Stores. By default these will be the user's standard mailbox and the Public Folders. These last will usually be ignored as you don't want everyone running processes to delete attachments from them. There may also be personal folders and other mailboxes. The user may either go through them and select one at a time, or may simply opt to rip through the whole lot. """ if not os.path.isdir (ATTACHMENTS_ROOT): try: os.mkdir (ATTACHMENTS_ROOT) except: raise RuntimeError, "Attachments directory %s does not exist and cannot be created" % ATTACHMENTS_ROOT session = win32com.client.gencache.EnsureDispatch ("MAPI.Session") session.Logon () _info_stores = session.InfoStores info_stores = [None] for n in range (1, _info_stores.Count + 1): info_store = _info_stores[n] if info_store.Name.lower () not in IGNORE_INFO_STORES: info_stores.append (info_store) while 1: print for n in range (1, len (info_stores)): info_store = info_stores[n] print "%d) %s" % (n, info_store.Name) print try: n_to_check = raw_input ("Which info store (0 - all, blank - exit): ") except KeyboardInterrupt: n_to_check = "" if n_to_check == "": return else: n_to_check = int (n_to_check) info_store = info_stores[n_to_check] if info_store: process_info_store (info_store, []) else: for info_store in info_stores[1:]: process_info_store (info_store, []) if __name__ == '__main__': main (sys.argv[1:])