""" dump duplicate files from source folder into SQL query""" import os import os.path import ImporterConst as const exceptions = ['.ann', '.db'] if __name__ == '__main__': files = {} for dirpath, dirnames, filenames in os.walk(const.ImageSourceDir): for filename in filenames: ext = os.path.splitext(filename)[1] if str.lower(ext) in exceptions: continue tmp = files.get(filename, []) tmp.append(dirpath) files[filename] = tmp #duplicates = {} #for key, v in files.iteritems(): # if len(v) > 1: # duplicates[key] = v # queryTmpl = "insert filemapping (fname, fpath, duplicate) values ('{filename}', '{filepath}', '{duplicate}')" # for filename, paths in duplicates.iteritems(): for filename, paths in files.iteritems(): duplicate = 1 if len(paths) > 1 else 0 for path in paths: filepath = '\\'.join(path.split('\\')[-2:]) query = queryTmpl.format(filename=filename, filepath=filepath, duplicate=duplicate) print query