""" checking how many duplicate files are in source folder""" import os import os.path from pprint import pprint import ImporterConst as const exceptions = ['.ann', '.db'] if __name__ == '__main__': files = {} for dirpath, dirnames, filenames in os.walk(const.ImageSourceDir): for filename in filenames: ext = os.path.splitext(filename)[1] if str.lower(ext) in exceptions: continue tmp = files.get(filename, []) tmp.append(dirpath) files[filename] = tmp duplicates = {} for key in sorted(files.keys()): if len(files[key]) > 1: duplicates[key] = files[key] with open('test01.txt', 'w') as f: print >>f, "Total number of duplicates: {}".format(len(duplicates)) pprint(duplicates, f)